xref: /illumos-gate/usr/src/uts/common/fs/pcfs/pc_vnops.c (revision f3af49816e370d667d566ab703e94b81305a536e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/param.h>
29 #include <sys/t_lock.h>
30 #include <sys/systm.h>
31 #include <sys/sysmacros.h>
32 #include <sys/user.h>
33 #include <sys/buf.h>
34 #include <sys/stat.h>
35 #include <sys/vfs.h>
36 #include <sys/vfs_opreg.h>
37 #include <sys/dirent.h>
38 #include <sys/vnode.h>
39 #include <sys/proc.h>
40 #include <sys/file.h>
41 #include <sys/fcntl.h>
42 #include <sys/uio.h>
43 #include <sys/fs/pc_label.h>
44 #include <sys/fs/pc_fs.h>
45 #include <sys/fs/pc_dir.h>
46 #include <sys/fs/pc_node.h>
47 #include <sys/mman.h>
48 #include <sys/pathname.h>
49 #include <sys/vmsystm.h>
50 #include <sys/cmn_err.h>
51 #include <sys/debug.h>
52 #include <sys/statvfs.h>
53 #include <sys/unistd.h>
54 #include <sys/kmem.h>
55 #include <sys/conf.h>
56 #include <sys/flock.h>
57 #include <sys/policy.h>
58 #include <sys/sdt.h>
59 
60 #include <vm/seg.h>
61 #include <vm/page.h>
62 #include <vm/pvn.h>
63 #include <vm/seg_map.h>
64 #include <vm/seg_vn.h>
65 #include <vm/hat.h>
66 #include <vm/as.h>
67 #include <vm/seg_kmem.h>
68 
69 #include <fs/fs_subr.h>
70 
71 static int pcfs_open(struct vnode **, int, struct cred *);
72 static int pcfs_close(struct vnode *, int, int, offset_t, struct cred *);
73 static int pcfs_read(struct vnode *, struct uio *, int, struct cred *,
74 	struct caller_context *);
75 static int pcfs_write(struct vnode *, struct uio *, int, struct cred *,
76 	struct caller_context *);
77 static int pcfs_getattr(struct vnode *, struct vattr *, int, struct cred *);
78 static int pcfs_setattr(struct vnode *, struct vattr *, int, struct cred *,
79 	caller_context_t *);
80 static int pcfs_access(struct vnode *, int, int, struct cred *);
81 static int pcfs_lookup(struct vnode *, char *, struct vnode **,
82 	struct pathname *, int, struct vnode *, struct cred *);
83 static int pcfs_create(struct vnode *, char *, struct vattr *,
84 	enum vcexcl, int mode, struct vnode **, struct cred *, int);
85 static int pcfs_remove(struct vnode *, char *, struct cred *);
86 static int pcfs_rename(struct vnode *, char *, struct vnode *, char *,
87 	struct cred *);
88 static int pcfs_mkdir(struct vnode *, char *, struct vattr *, struct vnode **,
89 	struct cred *);
90 static int pcfs_rmdir(struct vnode *, char *, struct vnode *, struct cred *);
91 static int pcfs_readdir(struct vnode *, struct uio *, struct cred *, int *);
92 static int pcfs_fsync(struct vnode *, int, struct cred *);
93 static void pcfs_inactive(struct vnode *, struct cred *);
94 static int pcfs_fid(struct vnode *vp, struct fid *fidp);
95 static int pcfs_space(struct vnode *, int, struct flock64 *, int,
96 	offset_t, cred_t *, caller_context_t *);
97 static int pcfs_getpage(struct vnode *, offset_t, size_t, uint_t *, page_t *[],
98 	size_t, struct seg *, caddr_t, enum seg_rw, struct cred *);
99 static int pcfs_getapage(struct vnode *, u_offset_t, size_t, uint_t *,
100 	page_t *[], size_t, struct seg *, caddr_t, enum seg_rw, struct cred *);
101 static int pcfs_putpage(struct vnode *, offset_t, size_t, int, struct cred *);
102 static int pcfs_map(struct vnode *, offset_t, struct as *, caddr_t *, size_t,
103 	uchar_t, uchar_t, uint_t, struct cred *);
104 static int pcfs_addmap(struct vnode *, offset_t, struct as *, caddr_t,
105 	size_t, uchar_t, uchar_t, uint_t, struct cred *);
106 static int pcfs_delmap(struct vnode *, offset_t, struct as *, caddr_t,
107 	size_t, uint_t, uint_t, uint_t, struct cred *);
108 static int pcfs_seek(struct vnode *, offset_t, offset_t *);
109 static int pcfs_pathconf(struct vnode *, int, ulong_t *, struct cred *);
110 
111 int pcfs_putapage(struct vnode *, page_t *, u_offset_t *, size_t *, int,
112 	struct cred *);
113 static int rwpcp(struct pcnode *, struct uio *, enum uio_rw, int);
114 static int get_long_fn_chunk(struct pcdir_lfn *ep, char *buf, int foldcase);
115 
116 extern krwlock_t pcnodes_lock;
117 
118 #define	lround(r)	(((r)+sizeof (long long)-1)&(~(sizeof (long long)-1)))
119 
120 /*
121  * vnode op vectors for files and directories.
122  */
123 struct vnodeops *pcfs_fvnodeops;
124 struct vnodeops *pcfs_dvnodeops;
125 
126 const fs_operation_def_t pcfs_fvnodeops_template[] = {
127 	VOPNAME_OPEN,		{ .vop_open = pcfs_open },
128 	VOPNAME_CLOSE,		{ .vop_close = pcfs_close },
129 	VOPNAME_READ,		{ .vop_read = pcfs_read },
130 	VOPNAME_WRITE,		{ .vop_write = pcfs_write },
131 	VOPNAME_GETATTR,	{ .vop_getattr = pcfs_getattr },
132 	VOPNAME_SETATTR,	{ .vop_setattr = pcfs_setattr },
133 	VOPNAME_ACCESS,		{ .vop_access = pcfs_access },
134 	VOPNAME_FSYNC,		{ .vop_fsync = pcfs_fsync },
135 	VOPNAME_INACTIVE,	{ .vop_inactive = pcfs_inactive },
136 	VOPNAME_FID,		{ .vop_fid = pcfs_fid },
137 	VOPNAME_SEEK,		{ .vop_seek = pcfs_seek },
138 	VOPNAME_SPACE,		{ .vop_space = pcfs_space },
139 	VOPNAME_GETPAGE,	{ .vop_getpage = pcfs_getpage },
140 	VOPNAME_PUTPAGE,	{ .vop_putpage = pcfs_putpage },
141 	VOPNAME_MAP,		{ .vop_map = pcfs_map },
142 	VOPNAME_ADDMAP,		{ .vop_addmap = pcfs_addmap },
143 	VOPNAME_DELMAP,		{ .vop_delmap = pcfs_delmap },
144 	VOPNAME_PATHCONF,	{ .vop_pathconf = pcfs_pathconf },
145 	VOPNAME_VNEVENT,	{ .vop_vnevent = fs_vnevent_support },
146 	NULL,			NULL
147 };
148 
149 const fs_operation_def_t pcfs_dvnodeops_template[] = {
150 	VOPNAME_OPEN,		{ .vop_open = pcfs_open },
151 	VOPNAME_CLOSE,		{ .vop_close = pcfs_close },
152 	VOPNAME_GETATTR,	{ .vop_getattr = pcfs_getattr },
153 	VOPNAME_SETATTR,	{ .vop_setattr = pcfs_setattr },
154 	VOPNAME_ACCESS,		{ .vop_access = pcfs_access },
155 	VOPNAME_LOOKUP,		{ .vop_lookup = pcfs_lookup },
156 	VOPNAME_CREATE,		{ .vop_create = pcfs_create },
157 	VOPNAME_REMOVE,		{ .vop_remove = pcfs_remove },
158 	VOPNAME_RENAME,		{ .vop_rename = pcfs_rename },
159 	VOPNAME_MKDIR,		{ .vop_mkdir = pcfs_mkdir },
160 	VOPNAME_RMDIR,		{ .vop_rmdir = pcfs_rmdir },
161 	VOPNAME_READDIR,	{ .vop_readdir = pcfs_readdir },
162 	VOPNAME_FSYNC,		{ .vop_fsync = pcfs_fsync },
163 	VOPNAME_INACTIVE,	{ .vop_inactive = pcfs_inactive },
164 	VOPNAME_FID,		{ .vop_fid = pcfs_fid },
165 	VOPNAME_SEEK,		{ .vop_seek = pcfs_seek },
166 	VOPNAME_PATHCONF,	{ .vop_pathconf = pcfs_pathconf },
167 	VOPNAME_VNEVENT,	{ .vop_vnevent = fs_vnevent_support },
168 	NULL,			NULL
169 };
170 
171 
172 /*ARGSUSED*/
173 static int
174 pcfs_open(
175 	struct vnode **vpp,
176 	int flag,
177 	struct cred *cr)
178 {
179 	return (0);
180 }
181 
182 /*
183  * files are sync'ed on close to keep floppy up to date
184  */
185 
186 /*ARGSUSED*/
187 static int
188 pcfs_close(
189 	struct vnode *vp,
190 	int flag,
191 	int count,
192 	offset_t offset,
193 	struct cred *cr)
194 {
195 	return (0);
196 }
197 
198 /*ARGSUSED*/
199 static int
200 pcfs_read(
201 	struct vnode *vp,
202 	struct uio *uiop,
203 	int ioflag,
204 	struct cred *cr,
205 	struct caller_context *ct)
206 {
207 	struct pcfs *fsp;
208 	struct pcnode *pcp;
209 	int error;
210 
211 	fsp = VFSTOPCFS(vp->v_vfsp);
212 	if (error = pc_verify(fsp))
213 		return (error);
214 	error = pc_lockfs(fsp, 0, 0);
215 	if (error)
216 		return (error);
217 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
218 		pc_unlockfs(fsp);
219 		return (EIO);
220 	}
221 	error = rwpcp(pcp, uiop, UIO_READ, ioflag);
222 	if ((fsp->pcfs_vfs->vfs_flag & VFS_RDONLY) == 0) {
223 		pcp->pc_flags |= PC_ACC;
224 		pc_mark_acc(pcp);
225 	}
226 	pc_unlockfs(fsp);
227 	if (error) {
228 		PC_DPRINTF1(1, "pcfs_read: io error = %d\n", error);
229 	}
230 	return (error);
231 }
232 
233 /*ARGSUSED*/
234 static int
235 pcfs_write(
236 	struct vnode *vp,
237 	struct uio *uiop,
238 	int ioflag,
239 	struct cred *cr,
240 	struct caller_context *ct)
241 {
242 	struct pcfs *fsp;
243 	struct pcnode *pcp;
244 	int error;
245 
246 	fsp = VFSTOPCFS(vp->v_vfsp);
247 	if (error = pc_verify(fsp))
248 		return (error);
249 	error = pc_lockfs(fsp, 0, 0);
250 	if (error)
251 		return (error);
252 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
253 		pc_unlockfs(fsp);
254 		return (EIO);
255 	}
256 	if (ioflag & FAPPEND) {
257 		/*
258 		 * in append mode start at end of file.
259 		 */
260 		uiop->uio_loffset = pcp->pc_size;
261 	}
262 	error = rwpcp(pcp, uiop, UIO_WRITE, ioflag);
263 	pcp->pc_flags |= PC_MOD;
264 	pc_mark_mod(pcp);
265 	if (ioflag & (FSYNC|FDSYNC))
266 		(void) pc_nodeupdate(pcp);
267 
268 	pc_unlockfs(fsp);
269 	if (error) {
270 		PC_DPRINTF1(1, "pcfs_write: io error = %d\n", error);
271 	}
272 	return (error);
273 }
274 
275 /*
276  * read or write a vnode
277  */
278 static int
279 rwpcp(
280 	struct pcnode *pcp,
281 	struct uio *uio,
282 	enum uio_rw rw,
283 	int ioflag)
284 {
285 	struct vnode *vp = PCTOV(pcp);
286 	struct pcfs *fsp;
287 	daddr_t bn;			/* phys block number */
288 	int n;
289 	offset_t off;
290 	caddr_t base;
291 	int mapon, pagecreate;
292 	int newpage;
293 	int error = 0;
294 	rlim64_t limit = uio->uio_llimit;
295 	int oresid = uio->uio_resid;
296 
297 	/*
298 	 * If the filesystem was umounted by force, return immediately.
299 	 */
300 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
301 		return (EIO);
302 
303 	PC_DPRINTF4(5, "rwpcp pcp=%p off=%lld resid=%ld size=%u\n", (void *)pcp,
304 	    uio->uio_loffset, uio->uio_resid, pcp->pc_size);
305 
306 	ASSERT(rw == UIO_READ || rw == UIO_WRITE);
307 	ASSERT(vp->v_type == VREG);
308 
309 	if (uio->uio_loffset >= UINT32_MAX && rw == UIO_READ) {
310 		return (0);
311 	}
312 
313 	if (uio->uio_loffset < 0)
314 		return (EINVAL);
315 
316 	if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
317 		limit = MAXOFFSET_T;
318 
319 	if (uio->uio_loffset >= limit && rw == UIO_WRITE) {
320 		proc_t *p = ttoproc(curthread);
321 
322 		mutex_enter(&p->p_lock);
323 		(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
324 		    p, RCA_UNSAFE_SIGINFO);
325 		mutex_exit(&p->p_lock);
326 		return (EFBIG);
327 	}
328 
329 	/* the following condition will occur only for write */
330 
331 	if (uio->uio_loffset >= UINT32_MAX)
332 		return (EFBIG);
333 
334 	if (uio->uio_resid == 0)
335 		return (0);
336 
337 	if (limit > UINT32_MAX)
338 		limit = UINT32_MAX;
339 
340 	fsp = VFSTOPCFS(vp->v_vfsp);
341 	if (fsp->pcfs_flags & PCFS_IRRECOV)
342 		return (EIO);
343 
344 	do {
345 		/*
346 		 * Assignments to "n" in this block may appear
347 		 * to overflow in some cases.  However, after careful
348 		 * analysis it was determined that all assignments to
349 		 * "n" serve only to make "n" smaller.  Since "n"
350 		 * starts out as no larger than MAXBSIZE, "int" is
351 		 * safe.
352 		 */
353 		off = uio->uio_loffset & MAXBMASK;
354 		mapon = (int)(uio->uio_loffset & MAXBOFFSET);
355 		n = MIN(MAXBSIZE - mapon, uio->uio_resid);
356 		if (rw == UIO_READ) {
357 			offset_t diff;
358 
359 			diff = pcp->pc_size - uio->uio_loffset;
360 			if (diff <= 0)
361 				return (0);
362 			if (diff < n)
363 				n = (int)diff;
364 		}
365 		/*
366 		 * Compare limit with the actual offset + n, not the
367 		 * rounded down offset "off" or we will overflow
368 		 * the maximum file size after all.
369 		 */
370 		if (rw == UIO_WRITE && uio->uio_loffset + n >= limit) {
371 			if (uio->uio_loffset >= limit) {
372 				error = EFBIG;
373 				break;
374 			}
375 			n = (int)(limit - uio->uio_loffset);
376 		}
377 		base = segmap_getmap(segkmap, vp, (u_offset_t)off);
378 		pagecreate = 0;
379 		newpage = 0;
380 		if (rw == UIO_WRITE) {
381 			/*
382 			 * If PAGESIZE < MAXBSIZE, perhaps we ought to deal
383 			 * with one page at a time, instead of one MAXBSIZE
384 			 * at a time, so we can fully explore pagecreate
385 			 * optimization??
386 			 */
387 			if (uio->uio_loffset + n > pcp->pc_size) {
388 				uint_t ncl, lcn;
389 
390 				ncl = (uint_t)howmany((offset_t)pcp->pc_size,
391 					fsp->pcfs_clsize);
392 				if (uio->uio_loffset > pcp->pc_size &&
393 				    ncl < (uint_t)howmany(uio->uio_loffset,
394 							fsp->pcfs_clsize)) {
395 					/*
396 					 * Allocate and zerofill skipped
397 					 * clusters. This may not be worth the
398 					 * effort since a small lseek beyond
399 					 * eof but still within the cluster
400 					 * will not be zeroed out.
401 					 */
402 					lcn = pc_lblkno(fsp, uio->uio_loffset);
403 					error = pc_balloc(pcp, (daddr_t)lcn,
404 					    1, &bn);
405 					ncl = lcn + 1;
406 				}
407 				if (!error &&
408 				    ncl < (uint_t)howmany(uio->uio_loffset + n,
409 							fsp->pcfs_clsize))
410 					/*
411 					 * allocate clusters w/o zerofill
412 					 */
413 					error = pc_balloc(pcp,
414 					    (daddr_t)pc_lblkno(fsp,
415 					    uio->uio_loffset + n - 1),
416 					    0, &bn);
417 
418 				pcp->pc_flags |= PC_CHG;
419 
420 				if (error) {
421 					pc_cluster32_t ncl;
422 					int nerror;
423 
424 					/*
425 					 * figure out new file size from
426 					 * cluster chain length. If this
427 					 * is detected to loop, the chain
428 					 * is corrupted and we'd better
429 					 * keep our fingers off that file.
430 					 */
431 					nerror = pc_fileclsize(fsp,
432 					    pcp->pc_scluster, &ncl);
433 					if (nerror) {
434 						PC_DPRINTF1(2,
435 						    "cluster chain "
436 						    "corruption, "
437 						    "scluster=%d\n",
438 						    pcp->pc_scluster);
439 						pcp->pc_size = 0;
440 						pcp->pc_flags |= PC_INVAL;
441 						error = nerror;
442 						(void) segmap_release(segkmap,
443 						    base, 0);
444 						break;
445 					}
446 					pcp->pc_size = fsp->pcfs_clsize * ncl;
447 
448 					if (error == ENOSPC &&
449 					    (pcp->pc_size - uio->uio_loffset)
450 						> 0) {
451 						PC_DPRINTF3(2, "rwpcp ENOSPC "
452 						    "off=%lld n=%d size=%d\n",
453 						    uio->uio_loffset,
454 						    n, pcp->pc_size);
455 						n = (int)(pcp->pc_size -
456 							uio->uio_loffset);
457 					} else {
458 						PC_DPRINTF1(1,
459 						    "rwpcp error1=%d\n", error);
460 						(void) segmap_release(segkmap,
461 						    base, 0);
462 						break;
463 					}
464 				} else {
465 					pcp->pc_size =
466 					    (uint_t)(uio->uio_loffset + n);
467 				}
468 				if (mapon == 0) {
469 					newpage = segmap_pagecreate(segkmap,
470 						base, (size_t)n, 0);
471 					pagecreate = 1;
472 				}
473 			} else if (n == MAXBSIZE) {
474 				newpage = segmap_pagecreate(segkmap, base,
475 						(size_t)n, 0);
476 				pagecreate = 1;
477 			}
478 		}
479 		error = uiomove(base + mapon, (size_t)n, rw, uio);
480 
481 		if (pagecreate && uio->uio_loffset <
482 			roundup(off + mapon + n, PAGESIZE)) {
483 			offset_t nzero, nmoved;
484 
485 			nmoved = uio->uio_loffset - (off + mapon);
486 			nzero = roundup(mapon + n, PAGESIZE) - nmoved;
487 			(void) kzero(base + mapon + nmoved, (size_t)nzero);
488 		}
489 
490 		/*
491 		 * Unlock the pages which have been allocated by
492 		 * page_create_va() in segmap_pagecreate().
493 		 */
494 		if (newpage)
495 			segmap_pageunlock(segkmap, base, (size_t)n,
496 				rw == UIO_WRITE ? S_WRITE : S_READ);
497 
498 		if (error) {
499 			PC_DPRINTF1(1, "rwpcp error2=%d\n", error);
500 			/*
501 			 * If we failed on a write, we may have already
502 			 * allocated file blocks as well as pages.  It's hard
503 			 * to undo the block allocation, but we must be sure
504 			 * to invalidate any pages that may have been
505 			 * allocated.
506 			 */
507 			if (rw == UIO_WRITE)
508 				(void) segmap_release(segkmap, base, SM_INVAL);
509 			else
510 				(void) segmap_release(segkmap, base, 0);
511 		} else {
512 			uint_t flags = 0;
513 
514 			if (rw == UIO_READ) {
515 				if (n + mapon == MAXBSIZE ||
516 				    uio->uio_loffset == pcp->pc_size)
517 					flags = SM_DONTNEED;
518 			} else if (ioflag & (FSYNC|FDSYNC)) {
519 				flags = SM_WRITE;
520 			} else if (n + mapon == MAXBSIZE) {
521 				flags = SM_WRITE|SM_ASYNC|SM_DONTNEED;
522 			}
523 			error = segmap_release(segkmap, base, flags);
524 		}
525 
526 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
527 
528 	if (oresid != uio->uio_resid)
529 		error = 0;
530 	return (error);
531 }
532 
533 /*ARGSUSED*/
534 static int
535 pcfs_getattr(
536 	struct vnode *vp,
537 	struct vattr *vap,
538 	int flags,
539 	struct cred *cr)
540 {
541 	struct pcnode *pcp;
542 	struct pcfs *fsp;
543 	int error;
544 	char attr;
545 	struct pctime atime;
546 	int64_t unixtime;
547 
548 	PC_DPRINTF1(8, "pcfs_getattr: vp=%p\n", (void *)vp);
549 
550 	fsp = VFSTOPCFS(vp->v_vfsp);
551 	error = pc_lockfs(fsp, 0, 0);
552 	if (error)
553 		return (error);
554 
555 	/*
556 	 * Note that we don't check for "invalid node" (PC_INVAL) here
557 	 * only in order to make stat() succeed. We allow no I/O on such
558 	 * a node, but do allow to check for its existance.
559 	 */
560 	if ((pcp = VTOPC(vp)) == NULL) {
561 		pc_unlockfs(fsp);
562 		return (EIO);
563 	}
564 	/*
565 	 * Copy from pcnode.
566 	 */
567 	vap->va_type = vp->v_type;
568 	attr = pcp->pc_entry.pcd_attr;
569 	if (PCA_IS_HIDDEN(fsp, attr))
570 		vap->va_mode = 0;
571 	else if (attr & PCA_LABEL)
572 		vap->va_mode = 0444;
573 	else if (attr & PCA_RDONLY)
574 		vap->va_mode = 0555;
575 	else if (fsp->pcfs_flags & PCFS_BOOTPART) {
576 		vap->va_mode = 0755;
577 	} else {
578 		vap->va_mode = 0777;
579 	}
580 
581 	if (attr & PCA_DIR)
582 		vap->va_mode |= S_IFDIR;
583 	else
584 		vap->va_mode |= S_IFREG;
585 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
586 		vap->va_uid = 0;
587 		vap->va_gid = 0;
588 	} else {
589 		vap->va_uid = crgetuid(cr);
590 		vap->va_gid = crgetgid(cr);
591 	}
592 	vap->va_fsid = vp->v_vfsp->vfs_dev;
593 	vap->va_nodeid = (ino64_t)pc_makenodeid(pcp->pc_eblkno,
594 	    pcp->pc_eoffset, pcp->pc_entry.pcd_attr,
595 	    pc_getstartcluster(fsp, &pcp->pc_entry), fsp->pcfs_entps);
596 	vap->va_nlink = 1;
597 	vap->va_size = (u_offset_t)pcp->pc_size;
598 
599 	pc_pcttotv(&pcp->pc_entry.pcd_mtime, &unixtime);
600 	if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0) {
601 		if (unixtime > INT32_MAX)
602 			DTRACE_PROBE1(pcfs__mtimeclamped, int64_t, unixtime);
603 		unixtime = MIN(unixtime, INT32_MAX);
604 	} else if (unixtime > INT32_MAX &&
605 	    get_udatamodel() == DATAMODEL_ILP32) {
606 		pc_unlockfs(fsp);
607 		DTRACE_PROBE1(pcfs__mtimeoverflowed, int64_t, unixtime);
608 		return (EOVERFLOW);
609 	}
610 
611 	vap->va_mtime.tv_sec = (time_t)unixtime;
612 	vap->va_mtime.tv_nsec = 0;
613 
614 	/*
615 	 * FAT doesn't know about POSIX ctime.
616 	 * Best approximation is to always set it to mtime.
617 	 */
618 	vap->va_ctime = vap->va_mtime;
619 
620 	/*
621 	 * FAT only stores "last access date". If that's the
622 	 * same as the date of last modification then the time
623 	 * of last access is known. Otherwise, use midnight.
624 	 */
625 	atime.pct_date = pcp->pc_entry.pcd_ladate;
626 	if (atime.pct_date == pcp->pc_entry.pcd_mtime.pct_date)
627 		atime.pct_time = pcp->pc_entry.pcd_mtime.pct_time;
628 	else
629 		atime.pct_time = 0;
630 	pc_pcttotv(&atime, &unixtime);
631 	if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0) {
632 		if (unixtime > INT32_MAX)
633 			DTRACE_PROBE1(pcfs__atimeclamped, int64_t, unixtime);
634 		unixtime = MIN(unixtime, INT32_MAX);
635 	} else if (unixtime > INT32_MAX &&
636 	    get_udatamodel() == DATAMODEL_ILP32) {
637 		pc_unlockfs(fsp);
638 		DTRACE_PROBE1(pcfs__atimeoverflowed, int64_t, unixtime);
639 		return (EOVERFLOW);
640 	}
641 
642 	vap->va_atime.tv_sec = (time_t)unixtime;
643 	vap->va_atime.tv_nsec = 0;
644 
645 	vap->va_rdev = 0;
646 	vap->va_nblocks = (fsblkcnt64_t)howmany((offset_t)pcp->pc_size,
647 				DEV_BSIZE);
648 	vap->va_blksize = fsp->pcfs_clsize;
649 	pc_unlockfs(fsp);
650 	return (0);
651 }
652 
653 
654 /*ARGSUSED*/
655 static int
656 pcfs_setattr(
657 	struct vnode *vp,
658 	struct vattr *vap,
659 	int flags,
660 	struct cred *cr,
661 	caller_context_t *ct)
662 {
663 	struct pcnode *pcp;
664 	mode_t mask = vap->va_mask;
665 	int error;
666 	struct pcfs *fsp;
667 	timestruc_t now, *timep;
668 
669 	PC_DPRINTF2(6, "pcfs_setattr: vp=%p mask=%x\n", (void *)vp, (int)mask);
670 	/*
671 	 * cannot set these attributes
672 	 */
673 	if (mask & (AT_NOSET | AT_UID | AT_GID)) {
674 		return (EINVAL);
675 	}
676 	/*
677 	 * pcfs_setattr is now allowed on directories to avoid silly warnings
678 	 * from 'tar' when it tries to set times on a directory, and console
679 	 * printf's on the NFS server when it gets EINVAL back on such a
680 	 * request. One possible problem with that since a directory entry
681 	 * identifies a file, '.' and all the '..' entries in subdirectories
682 	 * may get out of sync when the directory is updated since they're
683 	 * treated like separate files. We could fix that by looking for
684 	 * '.' and giving it the same attributes, and then looking for
685 	 * all the subdirectories and updating '..', but that's pretty
686 	 * expensive for something that doesn't seem likely to matter.
687 	 */
688 	/* can't do some ops on directories anyway */
689 	if ((vp->v_type == VDIR) &&
690 	    (mask & AT_SIZE)) {
691 		return (EINVAL);
692 	}
693 
694 	fsp = VFSTOPCFS(vp->v_vfsp);
695 	error = pc_lockfs(fsp, 0, 0);
696 	if (error)
697 		return (error);
698 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
699 		pc_unlockfs(fsp);
700 		return (EIO);
701 	}
702 
703 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
704 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
705 			pc_unlockfs(fsp);
706 			return (EACCES);
707 		}
708 	}
709 
710 	/*
711 	 * Change file access modes.
712 	 * If nobody has write permission, file is marked readonly.
713 	 * Otherwise file is writable by anyone.
714 	 */
715 	if ((mask & AT_MODE) && (vap->va_mode != (mode_t)-1)) {
716 		if ((vap->va_mode & 0222) == 0)
717 			pcp->pc_entry.pcd_attr |= PCA_RDONLY;
718 		else
719 			pcp->pc_entry.pcd_attr &= ~PCA_RDONLY;
720 		pcp->pc_flags |= PC_CHG;
721 	}
722 	/*
723 	 * Truncate file. Must have write permission.
724 	 */
725 	if ((mask & AT_SIZE) && (vap->va_size != (u_offset_t)-1)) {
726 		if (pcp->pc_entry.pcd_attr & PCA_RDONLY) {
727 			error = EACCES;
728 			goto out;
729 		}
730 		if (vap->va_size > UINT32_MAX) {
731 			error = EFBIG;
732 			goto out;
733 		}
734 		error = pc_truncate(pcp, (uint_t)vap->va_size);
735 		if (error)
736 			goto out;
737 	}
738 	/*
739 	 * Change file modified times.
740 	 */
741 	if (mask & (AT_MTIME | AT_CTIME)) {
742 		/*
743 		 * If SysV-compatible option to set access and
744 		 * modified times if privileged, owner, or write access,
745 		 * use current time rather than va_mtime.
746 		 *
747 		 * XXX - va_mtime.tv_sec == -1 flags this.
748 		 */
749 		timep = &vap->va_mtime;
750 		if (vap->va_mtime.tv_sec == -1) {
751 			gethrestime(&now);
752 			timep = &now;
753 		}
754 		if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0 &&
755 		    timep->tv_sec > INT32_MAX) {
756 			error = EOVERFLOW;
757 			goto out;
758 		}
759 		error = pc_tvtopct(timep, &pcp->pc_entry.pcd_mtime);
760 		if (error)
761 			goto out;
762 		pcp->pc_flags |= PC_CHG;
763 	}
764 	/*
765 	 * Change file access times.
766 	 */
767 	if (mask & AT_ATIME) {
768 		/*
769 		 * If SysV-compatible option to set access and
770 		 * modified times if privileged, owner, or write access,
771 		 * use current time rather than va_mtime.
772 		 *
773 		 * XXX - va_atime.tv_sec == -1 flags this.
774 		 */
775 		struct pctime	atime;
776 
777 		timep = &vap->va_atime;
778 		if (vap->va_atime.tv_sec == -1) {
779 			gethrestime(&now);
780 			timep = &now;
781 		}
782 		if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0 &&
783 		    timep->tv_sec > INT32_MAX) {
784 			error = EOVERFLOW;
785 			goto out;
786 		}
787 		error = pc_tvtopct(timep, &atime);
788 		if (error)
789 			goto out;
790 		pcp->pc_entry.pcd_ladate = atime.pct_date;
791 		pcp->pc_flags |= PC_CHG;
792 	}
793 out:
794 	pc_unlockfs(fsp);
795 	return (error);
796 }
797 
798 
799 /*ARGSUSED*/
800 static int
801 pcfs_access(
802 	struct vnode *vp,
803 	int mode,
804 	int flags,
805 	struct cred *cr)
806 {
807 	struct pcnode *pcp;
808 	struct pcfs *fsp;
809 
810 
811 	fsp = VFSTOPCFS(vp->v_vfsp);
812 
813 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL)
814 		return (EIO);
815 	if ((mode & VWRITE) && (pcp->pc_entry.pcd_attr & PCA_RDONLY))
816 		return (EACCES);
817 
818 	/*
819 	 * If this is a boot partition, privileged users have full access while
820 	 * others have read-only access.
821 	 */
822 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
823 		if ((mode & VWRITE) &&
824 		    secpolicy_pcfs_modify_bootpartition(cr) != 0)
825 			return (EACCES);
826 	}
827 	return (0);
828 }
829 
830 
831 /*ARGSUSED*/
832 static int
833 pcfs_fsync(
834 	struct vnode *vp,
835 	int syncflag,
836 	struct cred *cr)
837 {
838 	struct pcfs *fsp;
839 	struct pcnode *pcp;
840 	int error;
841 
842 	fsp = VFSTOPCFS(vp->v_vfsp);
843 	if (error = pc_verify(fsp))
844 		return (error);
845 	error = pc_lockfs(fsp, 0, 0);
846 	if (error)
847 		return (error);
848 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
849 		pc_unlockfs(fsp);
850 		return (EIO);
851 	}
852 	rw_enter(&pcnodes_lock, RW_WRITER);
853 	error = pc_nodesync(pcp);
854 	rw_exit(&pcnodes_lock);
855 	pc_unlockfs(fsp);
856 	return (error);
857 }
858 
859 
860 /*ARGSUSED*/
861 static void
862 pcfs_inactive(
863 	struct vnode *vp,
864 	struct cred *cr)
865 {
866 	struct pcnode *pcp;
867 	struct pcfs *fsp;
868 	int error;
869 
870 	fsp = VFSTOPCFS(vp->v_vfsp);
871 	error = pc_lockfs(fsp, 0, 1);
872 
873 	/*
874 	 * If the filesystem was umounted by force, all dirty
875 	 * pages associated with this vnode are invalidated
876 	 * and then the vnode will be freed.
877 	 */
878 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED) {
879 		pcp = VTOPC(vp);
880 		if (vn_has_cached_data(vp)) {
881 			(void) pvn_vplist_dirty(vp, (u_offset_t)0,
882 			    pcfs_putapage, B_INVAL, (struct cred *)NULL);
883 		}
884 		remque(pcp);
885 		if (error == 0)
886 			pc_unlockfs(fsp);
887 		vn_free(vp);
888 		kmem_free(pcp, sizeof (struct pcnode));
889 		VFS_RELE(PCFSTOVFS(fsp));
890 		return;
891 	}
892 
893 	mutex_enter(&vp->v_lock);
894 	ASSERT(vp->v_count >= 1);
895 	if (vp->v_count > 1) {
896 		vp->v_count--;  /* release our hold from vn_rele */
897 		mutex_exit(&vp->v_lock);
898 		pc_unlockfs(fsp);
899 		return;
900 	}
901 	mutex_exit(&vp->v_lock);
902 
903 	/*
904 	 * Check again to confirm that no intervening I/O error
905 	 * with a subsequent pc_diskchanged() call has released
906 	 * the pcnode. If it has then release the vnode as above.
907 	 */
908 	pcp = VTOPC(vp);
909 	if (pcp == NULL || pcp->pc_flags & PC_INVAL) {
910 		if (vn_has_cached_data(vp))
911 			(void) pvn_vplist_dirty(vp, (u_offset_t)0,
912 			    pcfs_putapage, B_INVAL | B_TRUNC,
913 			    (struct cred *)NULL);
914 	}
915 
916 	if (pcp == NULL) {
917 		vn_free(vp);
918 	} else {
919 		pc_rele(pcp);
920 	}
921 
922 	if (!error)
923 		pc_unlockfs(fsp);
924 }
925 
926 /*ARGSUSED*/
927 static int
928 pcfs_lookup(
929 	struct vnode *dvp,
930 	char *nm,
931 	struct vnode **vpp,
932 	struct pathname *pnp,
933 	int flags,
934 	struct vnode *rdir,
935 	struct cred *cr)
936 {
937 	struct pcfs *fsp;
938 	struct pcnode *pcp;
939 	int error;
940 
941 	/*
942 	 * If the filesystem was umounted by force, return immediately.
943 	 */
944 	if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
945 		return (EIO);
946 
947 	/*
948 	 * verify that the dvp is still valid on the disk
949 	 */
950 	fsp = VFSTOPCFS(dvp->v_vfsp);
951 	if (error = pc_verify(fsp))
952 		return (error);
953 	error = pc_lockfs(fsp, 0, 0);
954 	if (error)
955 		return (error);
956 	if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
957 		pc_unlockfs(fsp);
958 		return (EIO);
959 	}
960 	/*
961 	 * Null component name is a synonym for directory being searched.
962 	 */
963 	if (*nm == '\0') {
964 		VN_HOLD(dvp);
965 		*vpp = dvp;
966 		pc_unlockfs(fsp);
967 		return (0);
968 	}
969 
970 	error = pc_dirlook(VTOPC(dvp), nm, &pcp);
971 	if (!error) {
972 		*vpp = PCTOV(pcp);
973 		pcp->pc_flags |= PC_EXTERNAL;
974 	}
975 	pc_unlockfs(fsp);
976 	return (error);
977 }
978 
979 
980 /*ARGSUSED*/
981 static int
982 pcfs_create(
983 	struct vnode *dvp,
984 	char *nm,
985 	struct vattr *vap,
986 	enum vcexcl exclusive,
987 	int mode,
988 	struct vnode **vpp,
989 	struct cred *cr,
990 	int flag)
991 {
992 	int error;
993 	struct pcnode *pcp;
994 	struct vnode *vp;
995 	struct pcfs *fsp;
996 
997 	/*
998 	 * can't create directories. use pcfs_mkdir.
999 	 * can't create anything other than files.
1000 	 */
1001 	if (vap->va_type == VDIR)
1002 		return (EISDIR);
1003 	else if (vap->va_type != VREG)
1004 		return (EINVAL);
1005 
1006 	pcp = NULL;
1007 	fsp = VFSTOPCFS(dvp->v_vfsp);
1008 	error = pc_lockfs(fsp, 0, 0);
1009 	if (error)
1010 		return (error);
1011 	if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1012 		pc_unlockfs(fsp);
1013 		return (EIO);
1014 	}
1015 
1016 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1017 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1018 			pc_unlockfs(fsp);
1019 			return (EACCES);
1020 		}
1021 	}
1022 
1023 	if (*nm == '\0') {
1024 		/*
1025 		 * Null component name refers to the directory itself.
1026 		 */
1027 		VN_HOLD(dvp);
1028 		pcp = VTOPC(dvp);
1029 		error = EEXIST;
1030 	} else {
1031 		error = pc_direnter(VTOPC(dvp), nm, vap, &pcp);
1032 	}
1033 	/*
1034 	 * if file exists and this is a nonexclusive create,
1035 	 * check for access permissions
1036 	 */
1037 	if (error == EEXIST) {
1038 		vp = PCTOV(pcp);
1039 		if (exclusive == NONEXCL) {
1040 			if (vp->v_type == VDIR) {
1041 				error = EISDIR;
1042 			} else if (mode) {
1043 				error = pcfs_access(PCTOV(pcp), mode, 0,
1044 					cr);
1045 			} else {
1046 				error = 0;
1047 			}
1048 		}
1049 		if (error) {
1050 			VN_RELE(PCTOV(pcp));
1051 		} else if ((vp->v_type == VREG) && (vap->va_mask & AT_SIZE) &&
1052 			(vap->va_size == 0)) {
1053 			error = pc_truncate(pcp, 0L);
1054 			if (error)
1055 				VN_RELE(PCTOV(pcp));
1056 		}
1057 	}
1058 	if (error) {
1059 		pc_unlockfs(fsp);
1060 		return (error);
1061 	}
1062 	*vpp = PCTOV(pcp);
1063 	pcp->pc_flags |= PC_EXTERNAL;
1064 	pc_unlockfs(fsp);
1065 	return (error);
1066 }
1067 
1068 /*ARGSUSED*/
1069 static int
1070 pcfs_remove(
1071 	struct vnode *vp,
1072 	char *nm,
1073 	struct cred *cr)
1074 {
1075 	struct pcfs *fsp;
1076 	struct pcnode *pcp;
1077 	int error;
1078 
1079 	fsp = VFSTOPCFS(vp->v_vfsp);
1080 	if (error = pc_verify(fsp))
1081 		return (error);
1082 	error = pc_lockfs(fsp, 0, 0);
1083 	if (error)
1084 		return (error);
1085 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
1086 		pc_unlockfs(fsp);
1087 		return (EIO);
1088 	}
1089 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1090 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1091 			pc_unlockfs(fsp);
1092 			return (EACCES);
1093 		}
1094 	}
1095 	error = pc_dirremove(pcp, nm, (struct vnode *)0, VREG);
1096 	pc_unlockfs(fsp);
1097 	return (error);
1098 }
1099 
1100 /*
1101  * Rename a file or directory
1102  * This rename is restricted to only rename files within a directory.
1103  * XX should make rename more general
1104  */
1105 /*ARGSUSED*/
1106 static int
1107 pcfs_rename(
1108 	struct vnode *sdvp,		/* old (source) parent vnode */
1109 	char *snm,			/* old (source) entry name */
1110 	struct vnode *tdvp,		/* new (target) parent vnode */
1111 	char *tnm,			/* new (target) entry name */
1112 	struct cred *cr)
1113 {
1114 	struct pcfs *fsp;
1115 	struct pcnode *dp;	/* parent pcnode */
1116 	struct pcnode *tdp;
1117 	int error;
1118 
1119 	fsp = VFSTOPCFS(sdvp->v_vfsp);
1120 	if (error = pc_verify(fsp))
1121 		return (error);
1122 
1123 	/*
1124 	 * make sure we can muck with this directory.
1125 	 */
1126 	error = pcfs_access(sdvp, VWRITE, 0, cr);
1127 	if (error) {
1128 		return (error);
1129 	}
1130 	error = pc_lockfs(fsp, 0, 0);
1131 	if (error)
1132 		return (error);
1133 	if (((dp = VTOPC(sdvp)) == NULL) || ((tdp = VTOPC(tdvp)) == NULL) ||
1134 	    (dp->pc_flags & PC_INVAL) || (tdp->pc_flags & PC_INVAL)) {
1135 		pc_unlockfs(fsp);
1136 		return (EIO);
1137 	}
1138 	error = pc_rename(dp, tdp, snm, tnm);
1139 	pc_unlockfs(fsp);
1140 	return (error);
1141 }
1142 
1143 /*ARGSUSED*/
1144 static int
1145 pcfs_mkdir(
1146 	struct vnode *dvp,
1147 	char *nm,
1148 	struct vattr *vap,
1149 	struct vnode **vpp,
1150 	struct cred *cr)
1151 {
1152 	struct pcfs *fsp;
1153 	struct pcnode *pcp;
1154 	int error;
1155 
1156 	fsp = VFSTOPCFS(dvp->v_vfsp);
1157 	if (error = pc_verify(fsp))
1158 		return (error);
1159 	error = pc_lockfs(fsp, 0, 0);
1160 	if (error)
1161 		return (error);
1162 	if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1163 		pc_unlockfs(fsp);
1164 		return (EIO);
1165 	}
1166 
1167 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1168 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1169 			pc_unlockfs(fsp);
1170 			return (EACCES);
1171 		}
1172 	}
1173 
1174 	error = pc_direnter(VTOPC(dvp), nm, vap, &pcp);
1175 
1176 	if (!error) {
1177 		pcp -> pc_flags |= PC_EXTERNAL;
1178 		*vpp = PCTOV(pcp);
1179 	} else if (error == EEXIST) {
1180 		VN_RELE(PCTOV(pcp));
1181 	}
1182 	pc_unlockfs(fsp);
1183 	return (error);
1184 }
1185 
1186 /*ARGSUSED*/
1187 static int
1188 pcfs_rmdir(
1189 	struct vnode *dvp,
1190 	char *nm,
1191 	struct vnode *cdir,
1192 	struct cred *cr)
1193 {
1194 	struct pcfs *fsp;
1195 	struct pcnode *pcp;
1196 	int error;
1197 
1198 	fsp = VFSTOPCFS(dvp -> v_vfsp);
1199 	if (error = pc_verify(fsp))
1200 		return (error);
1201 	if (error = pc_lockfs(fsp, 0, 0))
1202 		return (error);
1203 
1204 	if ((pcp = VTOPC(dvp)) == NULL || pcp->pc_flags & PC_INVAL) {
1205 		pc_unlockfs(fsp);
1206 		return (EIO);
1207 	}
1208 
1209 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1210 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1211 			pc_unlockfs(fsp);
1212 			return (EACCES);
1213 		}
1214 	}
1215 
1216 	error = pc_dirremove(pcp, nm, cdir, VDIR);
1217 	pc_unlockfs(fsp);
1218 	return (error);
1219 }
1220 
1221 /*
1222  * read entries in a directory.
1223  * we must convert pc format to unix format
1224  */
1225 
1226 /*ARGSUSED*/
1227 static int
1228 pcfs_readdir(
1229 	struct vnode *dvp,
1230 	struct uio *uiop,
1231 	struct cred *cr,
1232 	int *eofp)
1233 {
1234 	struct pcnode *pcp;
1235 	struct pcfs *fsp;
1236 	struct pcdir *ep;
1237 	struct buf *bp = NULL;
1238 	offset_t offset;
1239 	int boff;
1240 	struct pc_dirent lbp;
1241 	struct pc_dirent *ld = &lbp;
1242 	int error;
1243 
1244 	/*
1245 	 * If the filesystem was umounted by force, return immediately.
1246 	 */
1247 	if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1248 		return (EIO);
1249 
1250 	if ((uiop->uio_iovcnt != 1) ||
1251 	    (uiop->uio_loffset % sizeof (struct pcdir)) != 0) {
1252 		return (EINVAL);
1253 	}
1254 	fsp = VFSTOPCFS(dvp->v_vfsp);
1255 	/*
1256 	 * verify that the dp is still valid on the disk
1257 	 */
1258 	if (error = pc_verify(fsp)) {
1259 		return (error);
1260 	}
1261 	error = pc_lockfs(fsp, 0, 0);
1262 	if (error)
1263 		return (error);
1264 	if ((pcp = VTOPC(dvp)) == NULL || pcp->pc_flags & PC_INVAL) {
1265 		pc_unlockfs(fsp);
1266 		return (EIO);
1267 	}
1268 
1269 	bzero(ld, sizeof (*ld));
1270 
1271 	if (eofp != NULL)
1272 		*eofp = 0;
1273 	offset = uiop->uio_loffset;
1274 
1275 	if (dvp->v_flag & VROOT) {
1276 		/*
1277 		 * kludge up entries for "." and ".." in the root.
1278 		 */
1279 		if (offset == 0) {
1280 			(void) strcpy(ld->d_name, ".");
1281 			ld->d_reclen = DIRENT64_RECLEN(1);
1282 			ld->d_off = (off64_t)sizeof (struct pcdir);
1283 			ld->d_ino = (ino64_t)UINT_MAX;
1284 			if (ld->d_reclen > uiop->uio_resid) {
1285 				pc_unlockfs(fsp);
1286 				return (ENOSPC);
1287 			}
1288 			(void) uiomove(ld, ld->d_reclen, UIO_READ, uiop);
1289 			uiop->uio_loffset = ld->d_off;
1290 			offset = uiop->uio_loffset;
1291 		}
1292 		if (offset == sizeof (struct pcdir)) {
1293 			(void) strcpy(ld->d_name, "..");
1294 			ld->d_reclen = DIRENT64_RECLEN(2);
1295 			if (ld->d_reclen > uiop->uio_resid) {
1296 				pc_unlockfs(fsp);
1297 				return (ENOSPC);
1298 			}
1299 			ld->d_off = (off64_t)(uiop->uio_loffset +
1300 			    sizeof (struct pcdir));
1301 			ld->d_ino = (ino64_t)UINT_MAX;
1302 			(void) uiomove(ld, ld->d_reclen, UIO_READ, uiop);
1303 			uiop->uio_loffset = ld->d_off;
1304 			offset = uiop->uio_loffset;
1305 		}
1306 		offset -= 2 * sizeof (struct pcdir);
1307 		/* offset now has the real offset value into directory file */
1308 	}
1309 
1310 	for (;;) {
1311 		boff = pc_blkoff(fsp, offset);
1312 		if (boff == 0 || bp == NULL || boff >= bp->b_bcount) {
1313 			if (bp != NULL) {
1314 				brelse(bp);
1315 				bp = NULL;
1316 			}
1317 			error = pc_blkatoff(pcp, offset, &bp, &ep);
1318 			if (error) {
1319 				if (error == ENOENT) {
1320 					error = 0;
1321 					if (eofp)
1322 						*eofp = 1;
1323 				}
1324 				break;
1325 			}
1326 		}
1327 		if (ep->pcd_filename[0] == PCD_UNUSED) {
1328 			if (eofp)
1329 				*eofp = 1;
1330 			break;
1331 		}
1332 		/*
1333 		 * Don't display label because it may contain funny characters.
1334 		 */
1335 		if (ep->pcd_filename[0] == PCD_ERASED) {
1336 			uiop->uio_loffset += sizeof (struct pcdir);
1337 			offset += sizeof (struct pcdir);
1338 			ep++;
1339 			continue;
1340 		}
1341 		if (PCDL_IS_LFN(ep)) {
1342 			if (pc_read_long_fn(dvp, uiop, ld, &ep, &offset, &bp) !=
1343 			    0)
1344 				break;
1345 			continue;
1346 		}
1347 
1348 		if (pc_read_short_fn(dvp, uiop, ld, &ep, &offset, &bp) != 0)
1349 			break;
1350 	}
1351 	if (bp)
1352 		brelse(bp);
1353 	pc_unlockfs(fsp);
1354 	return (error);
1355 }
1356 
1357 
1358 /*
1359  * Called from pvn_getpages or pcfs_getpage to get a particular page.
1360  * When we are called the pcfs is already locked.
1361  */
1362 /*ARGSUSED*/
1363 static int
1364 pcfs_getapage(
1365 	struct vnode *vp,
1366 	u_offset_t off,
1367 	size_t len,
1368 	uint_t *protp,
1369 	page_t *pl[],		/* NULL if async IO is requested */
1370 	size_t plsz,
1371 	struct seg *seg,
1372 	caddr_t addr,
1373 	enum seg_rw rw,
1374 	struct cred *cr)
1375 {
1376 	struct pcnode *pcp;
1377 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1378 	struct vnode *devvp;
1379 	page_t *pp;
1380 	page_t *pagefound;
1381 	int err;
1382 
1383 	/*
1384 	 * If the filesystem was umounted by force, return immediately.
1385 	 */
1386 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1387 		return (EIO);
1388 
1389 	PC_DPRINTF3(5, "pcfs_getapage: vp=%p off=%lld len=%lu\n",
1390 	    (void *)vp, off, len);
1391 
1392 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL)
1393 		return (EIO);
1394 	devvp = fsp->pcfs_devvp;
1395 
1396 	/* pcfs doesn't do readaheads */
1397 	if (pl == NULL)
1398 		return (0);
1399 
1400 	pl[0] = NULL;
1401 	err = 0;
1402 	/*
1403 	 * If the accessed time on the pcnode has not already been
1404 	 * set elsewhere (e.g. for read/setattr) we set the time now.
1405 	 * This gives us approximate modified times for mmap'ed files
1406 	 * which are accessed via loads in the user address space.
1407 	 */
1408 	if ((pcp->pc_flags & PC_ACC) == 0 &&
1409 	    ((fsp->pcfs_vfs->vfs_flag & VFS_RDONLY) == 0)) {
1410 		pcp->pc_flags |= PC_ACC;
1411 		pc_mark_acc(pcp);
1412 	}
1413 reread:
1414 	if ((pagefound = page_exists(vp, off)) == NULL) {
1415 		/*
1416 		 * Need to really do disk IO to get the page(s).
1417 		 */
1418 		struct buf *bp;
1419 		daddr_t lbn, bn;
1420 		u_offset_t io_off;
1421 		size_t io_len;
1422 		u_offset_t lbnoff, xferoffset;
1423 		u_offset_t pgoff;
1424 		uint_t	xfersize;
1425 		int err1;
1426 
1427 		lbn = pc_lblkno(fsp, off);
1428 		lbnoff = off & ~(fsp->pcfs_clsize - 1);
1429 		xferoffset = off & ~(fsp->pcfs_secsize - 1);
1430 
1431 		pp = pvn_read_kluster(vp, off, seg, addr, &io_off, &io_len,
1432 		    off, (size_t)MIN(pc_blksize(fsp, pcp, off), PAGESIZE), 0);
1433 		if (pp == NULL)
1434 			/*
1435 			 * XXX - If pcfs is made MT-hot, this should go
1436 			 * back to reread.
1437 			 */
1438 			panic("pcfs_getapage pvn_read_kluster");
1439 
1440 		for (pgoff = 0; pgoff < PAGESIZE && xferoffset < pcp->pc_size;
1441 		    pgoff += xfersize,
1442 		    lbn +=  howmany(xfersize, fsp->pcfs_clsize),
1443 		    lbnoff += xfersize, xferoffset += xfersize) {
1444 			/*
1445 			 * read as many contiguous blocks as possible to
1446 			 * fill this page
1447 			 */
1448 			xfersize = PAGESIZE - pgoff;
1449 			err1 = pc_bmap(pcp, lbn, &bn, &xfersize);
1450 			if (err1) {
1451 				PC_DPRINTF1(1, "pc_getapage err=%d", err1);
1452 				err = err1;
1453 				goto out;
1454 			}
1455 			bp = pageio_setup(pp, xfersize, devvp, B_READ);
1456 			bp->b_edev = devvp->v_rdev;
1457 			bp->b_dev = cmpdev(devvp->v_rdev);
1458 			bp->b_blkno = bn +
1459 			    /* add a sector offset within the cluster */
1460 			    /* when the clustersize > PAGESIZE */
1461 			    (xferoffset - lbnoff) / fsp->pcfs_secsize;
1462 			bp->b_un.b_addr = (caddr_t)(uintptr_t)pgoff;
1463 			bp->b_file = vp;
1464 			bp->b_offset = (offset_t)(off + pgoff);
1465 
1466 			(void) bdev_strategy(bp);
1467 
1468 			lwp_stat_update(LWP_STAT_INBLK, 1);
1469 
1470 			if (err == 0)
1471 				err = biowait(bp);
1472 			else
1473 				(void) biowait(bp);
1474 			pageio_done(bp);
1475 			if (err)
1476 				goto out;
1477 		}
1478 		if (pgoff < PAGESIZE) {
1479 			pagezero(pp->p_prev, pgoff, PAGESIZE - pgoff);
1480 		}
1481 		pvn_plist_init(pp, pl, plsz, off, io_len, rw);
1482 	}
1483 out:
1484 	if (err) {
1485 		if (pp != NULL)
1486 			pvn_read_done(pp, B_ERROR);
1487 		return (err);
1488 	}
1489 
1490 	if (pagefound) {
1491 		/*
1492 		 * Page exists in the cache, acquire the "shared"
1493 		 * lock.  If this fails, go back to reread.
1494 		 */
1495 		if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
1496 			goto reread;
1497 		}
1498 		pl[0] = pp;
1499 		pl[1] = NULL;
1500 	}
1501 	return (err);
1502 }
1503 
1504 /*
1505  * Return all the pages from [off..off+len] in given file
1506  */
1507 static int
1508 pcfs_getpage(
1509 	struct vnode *vp,
1510 	offset_t off,
1511 	size_t len,
1512 	uint_t *protp,
1513 	page_t *pl[],
1514 	size_t plsz,
1515 	struct seg *seg,
1516 	caddr_t addr,
1517 	enum seg_rw rw,
1518 	struct cred *cr)
1519 {
1520 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1521 	int err;
1522 
1523 	PC_DPRINTF0(6, "pcfs_getpage\n");
1524 	if (err = pc_verify(fsp))
1525 		return (err);
1526 	if (vp->v_flag & VNOMAP)
1527 		return (ENOSYS);
1528 	ASSERT(off <= UINT32_MAX);
1529 	err = pc_lockfs(fsp, 0, 0);
1530 	if (err)
1531 		return (err);
1532 	if (protp != NULL)
1533 		*protp = PROT_ALL;
1534 
1535 	ASSERT((off & PAGEOFFSET) == 0);
1536 	if (len <= PAGESIZE) {
1537 		err = pcfs_getapage(vp, off, len, protp, pl,
1538 		    plsz, seg, addr, rw, cr);
1539 	} else {
1540 		err = pvn_getpages(pcfs_getapage, vp, off,
1541 		    len, protp, pl, plsz, seg, addr, rw, cr);
1542 	}
1543 	pc_unlockfs(fsp);
1544 	return (err);
1545 }
1546 
1547 
1548 /*
1549  * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE}
1550  * If len == 0, do from off to EOF.
1551  *
1552  * The normal cases should be len == 0 & off == 0 (entire vp list),
1553  * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
1554  * (from pageout).
1555  *
1556  */
1557 /*ARGSUSED*/
1558 static int
1559 pcfs_putpage(
1560 	struct vnode *vp,
1561 	offset_t off,
1562 	size_t len,
1563 	int flags,
1564 	struct cred *cr)
1565 {
1566 	struct pcnode *pcp;
1567 	page_t *pp;
1568 	struct pcfs *fsp;
1569 	u_offset_t io_off;
1570 	size_t io_len;
1571 	offset_t eoff;
1572 	int err;
1573 
1574 	/*
1575 	 * If the filesystem was umounted by force, return immediately.
1576 	 */
1577 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1578 		return (EIO);
1579 
1580 	PC_DPRINTF1(6, "pcfs_putpage vp=0x%p\n", (void *)vp);
1581 	if (vp->v_flag & VNOMAP)
1582 		return (ENOSYS);
1583 
1584 	fsp = VFSTOPCFS(vp->v_vfsp);
1585 
1586 	if (err = pc_verify(fsp))
1587 		return (err);
1588 	if ((pcp = VTOPC(vp)) == NULL) {
1589 		PC_DPRINTF1(3, "pcfs_putpage NULL vp=0x%p\n", (void *)vp);
1590 		return (EIO);
1591 	}
1592 	if (pcp->pc_flags & PC_INVAL)
1593 		return (EIO);
1594 
1595 	if (curproc == proc_pageout) {
1596 		/*
1597 		 * XXX - This is a quick hack to avoid blocking
1598 		 * pageout. Also to avoid pcfs_getapage deadlocking
1599 		 * with putpage when memory is running out,
1600 		 * since we only have one global lock and we don't
1601 		 * support async putpage.
1602 		 * It should be fixed someday.
1603 		 *
1604 		 * Interestingly, this used to be a test of NOMEMWAIT().
1605 		 * We only ever got here once pcfs started supporting
1606 		 * NFS sharing, and then only because the NFS server
1607 		 * threads seem to do writes in sched's process context.
1608 		 * Since everyone else seems to just care about pageout,
1609 		 * the test was changed to look for pageout directly.
1610 		 */
1611 		return (ENOMEM);
1612 	}
1613 
1614 	ASSERT(off <= UINT32_MAX);
1615 
1616 	flags &= ~B_ASYNC;	/* XXX should fix this later */
1617 
1618 	err = pc_lockfs(fsp, 0, 0);
1619 	if (err)
1620 		return (err);
1621 	if (!vn_has_cached_data(vp) || off >= pcp->pc_size) {
1622 		pc_unlockfs(fsp);
1623 		return (0);
1624 	}
1625 
1626 	if (len == 0) {
1627 		/*
1628 		 * Search the entire vp list for pages >= off
1629 		 */
1630 		err = pvn_vplist_dirty(vp, off,
1631 		    pcfs_putapage, flags, cr);
1632 	} else {
1633 		eoff = off + len;
1634 
1635 		for (io_off = off; io_off < eoff &&
1636 		    io_off < pcp->pc_size; io_off += io_len) {
1637 			/*
1638 			 * If we are not invalidating, synchronously
1639 			 * freeing or writing pages use the routine
1640 			 * page_lookup_nowait() to prevent reclaiming
1641 			 * them from the free list.
1642 			 */
1643 			if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
1644 				pp = page_lookup(vp, io_off,
1645 					(flags & (B_INVAL | B_FREE)) ?
1646 					    SE_EXCL : SE_SHARED);
1647 			} else {
1648 				pp = page_lookup_nowait(vp, io_off,
1649 					(flags & B_FREE) ? SE_EXCL : SE_SHARED);
1650 			}
1651 
1652 			if (pp == NULL || pvn_getdirty(pp, flags) == 0)
1653 				io_len = PAGESIZE;
1654 			else {
1655 				err = pcfs_putapage(vp, pp, &io_off, &io_len,
1656 					flags, cr);
1657 				if (err != 0)
1658 					break;
1659 				/*
1660 				 * "io_off" and "io_len" are returned as
1661 				 * the range of pages we actually wrote.
1662 				 * This allows us to skip ahead more quickly
1663 				 * since several pages may've been dealt
1664 				 * with by this iteration of the loop.
1665 				 */
1666 			}
1667 		}
1668 	}
1669 	if (err == 0 && (flags & B_INVAL) &&
1670 	    off == 0 && len == 0 && vn_has_cached_data(vp)) {
1671 		/*
1672 		 * If doing "invalidation", make sure that
1673 		 * all pages on the vnode list are actually
1674 		 * gone.
1675 		 */
1676 		cmn_err(CE_PANIC,
1677 			"pcfs_putpage: B_INVAL, pages not gone");
1678 	} else if (err) {
1679 		PC_DPRINTF1(1, "pcfs_putpage err=%d\n", err);
1680 	}
1681 	pc_unlockfs(fsp);
1682 	return (err);
1683 }
1684 
1685 /*
1686  * Write out a single page, possibly klustering adjacent dirty pages.
1687  */
1688 /*ARGSUSED*/
1689 int
1690 pcfs_putapage(
1691 	struct vnode *vp,
1692 	page_t *pp,
1693 	u_offset_t *offp,
1694 	size_t *lenp,
1695 	int flags,
1696 	struct cred *cr)
1697 {
1698 	struct pcnode *pcp;
1699 	struct pcfs *fsp;
1700 	struct vnode *devvp;
1701 	size_t io_len;
1702 	daddr_t bn;
1703 	u_offset_t lbn, lbnoff, xferoffset;
1704 	uint_t pgoff, xfersize;
1705 	int err = 0;
1706 	u_offset_t io_off;
1707 
1708 	pcp = VTOPC(vp);
1709 	fsp = VFSTOPCFS(vp->v_vfsp);
1710 	devvp = fsp->pcfs_devvp;
1711 
1712 	/*
1713 	 * If the modified time on the inode has not already been
1714 	 * set elsewhere (e.g. for write/setattr) and this is not
1715 	 * a call from msync (B_FORCE) we set the time now.
1716 	 * This gives us approximate modified times for mmap'ed files
1717 	 * which are modified via stores in the user address space.
1718 	 */
1719 	if ((pcp->pc_flags & PC_MOD) == 0 || (flags & B_FORCE)) {
1720 		pcp->pc_flags |= PC_MOD;
1721 		pc_mark_mod(pcp);
1722 	}
1723 	pp = pvn_write_kluster(vp, pp, &io_off, &io_len, pp->p_offset,
1724 	    PAGESIZE, flags);
1725 
1726 	if (fsp->pcfs_flags & PCFS_IRRECOV) {
1727 		goto out;
1728 	}
1729 
1730 	PC_DPRINTF1(7, "pc_putpage writing dirty page off=%llu\n", io_off);
1731 
1732 	lbn = pc_lblkno(fsp, io_off);
1733 	lbnoff = io_off & ~(fsp->pcfs_clsize - 1);
1734 	xferoffset = io_off & ~(fsp->pcfs_secsize - 1);
1735 
1736 	for (pgoff = 0; pgoff < io_len && xferoffset < pcp->pc_size;
1737 	    pgoff += xfersize,
1738 	    lbn += howmany(xfersize, fsp->pcfs_clsize),
1739 	    lbnoff += xfersize, xferoffset += xfersize) {
1740 
1741 		struct buf *bp;
1742 		int err1;
1743 
1744 		/*
1745 		 * write as many contiguous blocks as possible from this page
1746 		 */
1747 		xfersize = io_len - pgoff;
1748 		err1 = pc_bmap(pcp, (daddr_t)lbn, &bn, &xfersize);
1749 		if (err1) {
1750 			err = err1;
1751 			goto out;
1752 		}
1753 		bp = pageio_setup(pp, xfersize, devvp, B_WRITE | flags);
1754 		bp->b_edev = devvp->v_rdev;
1755 		bp->b_dev = cmpdev(devvp->v_rdev);
1756 		bp->b_blkno = bn +
1757 		    /* add a sector offset within the cluster */
1758 		    /* when the clustersize > PAGESIZE */
1759 		    (xferoffset - lbnoff) / fsp->pcfs_secsize;
1760 		bp->b_un.b_addr = (caddr_t)(uintptr_t)pgoff;
1761 		bp->b_file = vp;
1762 		bp->b_offset = (offset_t)(io_off + pgoff);
1763 
1764 		(void) bdev_strategy(bp);
1765 
1766 		lwp_stat_update(LWP_STAT_OUBLK, 1);
1767 
1768 		if (err == 0)
1769 			err = biowait(bp);
1770 		else
1771 			(void) biowait(bp);
1772 		pageio_done(bp);
1773 	}
1774 	pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags);
1775 	pp = NULL;
1776 
1777 out:
1778 	if ((fsp->pcfs_flags & PCFS_IRRECOV) && pp != NULL) {
1779 		pvn_write_done(pp, B_WRITE | flags);
1780 	} else if (err != 0 && pp != NULL) {
1781 		pvn_write_done(pp, B_ERROR | B_WRITE | flags);
1782 	}
1783 
1784 	if (offp)
1785 		*offp = io_off;
1786 	if (lenp)
1787 		*lenp = io_len;
1788 		PC_DPRINTF4(4, "pcfs_putapage: vp=%p pp=%p off=%lld len=%lu\n",
1789 		    (void *)vp, (void *)pp, io_off, io_len);
1790 	if (err) {
1791 		PC_DPRINTF1(1, "pcfs_putapage err=%d", err);
1792 	}
1793 	return (err);
1794 }
1795 
1796 /*ARGSUSED*/
1797 static int
1798 pcfs_map(
1799 	struct vnode *vp,
1800 	offset_t off,
1801 	struct as *as,
1802 	caddr_t *addrp,
1803 	size_t len,
1804 	uchar_t prot,
1805 	uchar_t maxprot,
1806 	uint_t flags,
1807 	struct cred *cr)
1808 {
1809 	struct segvn_crargs vn_a;
1810 	int error;
1811 
1812 	PC_DPRINTF0(6, "pcfs_map\n");
1813 	if (vp->v_flag & VNOMAP)
1814 		return (ENOSYS);
1815 
1816 	if (off > UINT32_MAX || off + len > UINT32_MAX)
1817 		return (ENXIO);
1818 
1819 	as_rangelock(as);
1820 	if ((flags & MAP_FIXED) == 0) {
1821 		map_addr(addrp, len, off, 1, flags);
1822 		if (*addrp == NULL) {
1823 			as_rangeunlock(as);
1824 			return (ENOMEM);
1825 		}
1826 	} else {
1827 		/*
1828 		 * User specified address - blow away any previous mappings
1829 		 */
1830 		(void) as_unmap(as, *addrp, len);
1831 	}
1832 
1833 	vn_a.vp = vp;
1834 	vn_a.offset = off;
1835 	vn_a.type = flags & MAP_TYPE;
1836 	vn_a.prot = prot;
1837 	vn_a.maxprot = maxprot;
1838 	vn_a.flags = flags & ~MAP_TYPE;
1839 	vn_a.cred = cr;
1840 	vn_a.amp = NULL;
1841 	vn_a.szc = 0;
1842 	vn_a.lgrp_mem_policy_flags = 0;
1843 
1844 	error = as_map(as, *addrp, len, segvn_create, &vn_a);
1845 	as_rangeunlock(as);
1846 	return (error);
1847 }
1848 
1849 /* ARGSUSED */
1850 static int
1851 pcfs_seek(
1852 	struct vnode *vp,
1853 	offset_t ooff,
1854 	offset_t *noffp)
1855 {
1856 	if (*noffp < 0)
1857 		return (EINVAL);
1858 	else if (*noffp > MAXOFFSET_T)
1859 		return (EINVAL);
1860 	else
1861 		return (0);
1862 }
1863 
1864 /* ARGSUSED */
1865 static int
1866 pcfs_addmap(
1867 	struct vnode *vp,
1868 	offset_t off,
1869 	struct as *as,
1870 	caddr_t addr,
1871 	size_t len,
1872 	uchar_t prot,
1873 	uchar_t maxprot,
1874 	uint_t flags,
1875 	struct cred *cr)
1876 {
1877 	if (vp->v_flag & VNOMAP)
1878 		return (ENOSYS);
1879 	return (0);
1880 }
1881 
1882 /*ARGSUSED*/
1883 static int
1884 pcfs_delmap(
1885 	struct vnode *vp,
1886 	offset_t off,
1887 	struct as *as,
1888 	caddr_t addr,
1889 	size_t len,
1890 	uint_t prot,
1891 	uint_t maxprot,
1892 	uint_t flags,
1893 	struct cred *cr)
1894 {
1895 	if (vp->v_flag & VNOMAP)
1896 		return (ENOSYS);
1897 	return (0);
1898 }
1899 
1900 /*
1901  * POSIX pathconf() support.
1902  */
1903 /* ARGSUSED */
1904 static int
1905 pcfs_pathconf(
1906 	struct vnode *vp,
1907 	int cmd,
1908 	ulong_t *valp,
1909 	struct cred *cr)
1910 {
1911 	ulong_t val;
1912 	int error = 0;
1913 	struct statvfs64 vfsbuf;
1914 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1915 
1916 	switch (cmd) {
1917 
1918 	case _PC_LINK_MAX:
1919 		val = 1;
1920 		break;
1921 
1922 	case _PC_MAX_CANON:
1923 		val = MAX_CANON;
1924 		break;
1925 
1926 	case _PC_MAX_INPUT:
1927 		val = MAX_INPUT;
1928 		break;
1929 
1930 	case _PC_NAME_MAX:
1931 		bzero(&vfsbuf, sizeof (vfsbuf));
1932 		if (error = VFS_STATVFS(vp->v_vfsp, &vfsbuf))
1933 			break;
1934 		val = vfsbuf.f_namemax;
1935 		break;
1936 
1937 	case _PC_PATH_MAX:
1938 	case _PC_SYMLINK_MAX:
1939 		val = PCMAXPATHLEN;
1940 		break;
1941 
1942 	case _PC_PIPE_BUF:
1943 		val = PIPE_BUF;
1944 		break;
1945 
1946 	case _PC_NO_TRUNC:
1947 		val = (ulong_t)-1; 	/* Will truncate long file name */
1948 		break;
1949 
1950 	case _PC_VDISABLE:
1951 		val = _POSIX_VDISABLE;
1952 		break;
1953 
1954 	case _PC_CHOWN_RESTRICTED:
1955 		if (rstchown)
1956 			val = rstchown;		/* chown restricted enabled */
1957 		else
1958 			val = (ulong_t)-1;
1959 		break;
1960 
1961 	case _PC_ACL_ENABLED:
1962 		val = 0;
1963 		break;
1964 
1965 	case _PC_FILESIZEBITS:
1966 		/*
1967 		 * Both FAT16 and FAT32 support 4GB - 1 byte for file size.
1968 		 * FAT12 can only go up to the maximum filesystem capacity
1969 		 * which is ~509MB.
1970 		 */
1971 		val = IS_FAT12(fsp) ? 30 : 33;
1972 		break;
1973 	default:
1974 		error = EINVAL;
1975 		break;
1976 	}
1977 
1978 	if (error == 0)
1979 		*valp = val;
1980 	return (error);
1981 }
1982 
1983 /* ARGSUSED */
1984 static int
1985 pcfs_space(
1986 	struct vnode *vp,
1987 	int cmd,
1988 	struct flock64 *bfp,
1989 	int flag,
1990 	offset_t offset,
1991 	cred_t *cr,
1992 	caller_context_t *ct)
1993 {
1994 	struct vattr vattr;
1995 	int error;
1996 
1997 	if (cmd != F_FREESP)
1998 		return (EINVAL);
1999 
2000 	if ((error = convoff(vp, bfp, 0, offset)) == 0) {
2001 		if ((bfp->l_start > UINT32_MAX) || (bfp->l_len > UINT32_MAX))
2002 			return (EFBIG);
2003 		/*
2004 		 * we only support the special case of l_len == 0,
2005 		 * meaning free to end of file at this moment.
2006 		 */
2007 		if (bfp->l_len != 0)
2008 			return (EINVAL);
2009 		vattr.va_mask = AT_SIZE;
2010 		vattr.va_size = bfp->l_start;
2011 		error = VOP_SETATTR(vp, &vattr, 0, cr, ct);
2012 	}
2013 	return (error);
2014 }
2015 
2016 /*
2017  * Break up 'len' chars from 'buf' into a long file name chunk.
2018  * Pad with '0xff' to make Norton Disk Doctor and Microsoft ScanDisk happy.
2019  */
2020 void
2021 set_long_fn_chunk(struct pcdir_lfn *ep, char *buf, int len)
2022 {
2023 	char 	*tmp = buf;
2024 	int	i;
2025 
2026 
2027 	for (i = 0; i < PCLF_FIRSTNAMESIZE; i += 2) {
2028 		if (len > 0) {
2029 			ep->pcdl_firstfilename[i] = *tmp;
2030 			ep->pcdl_firstfilename[i+1] = 0;
2031 			len--;
2032 			tmp++;
2033 		} else {
2034 			ep->pcdl_firstfilename[i] = (uchar_t)0xff;
2035 			ep->pcdl_firstfilename[i+1] = (uchar_t)0xff;
2036 		}
2037 	}
2038 
2039 	for (i = 0; i < PCLF_SECONDNAMESIZE; i += 2) {
2040 		if (len > 0) {
2041 			ep->pcdl_secondfilename[i] = *tmp;
2042 			ep->pcdl_secondfilename[i+1] = 0;
2043 			len--;
2044 			tmp++;
2045 		} else {
2046 			ep->pcdl_secondfilename[i] = (uchar_t)0xff;
2047 			ep->pcdl_secondfilename[i+1] = (uchar_t)0xff;
2048 		}
2049 	}
2050 	for (i = 0; i < PCLF_THIRDNAMESIZE; i += 2) {
2051 		if (len > 0) {
2052 			ep->pcdl_thirdfilename[i] = *tmp;
2053 			ep->pcdl_thirdfilename[i+1] = 0;
2054 			len--;
2055 			tmp++;
2056 		} else {
2057 			ep->pcdl_thirdfilename[i] = (uchar_t)0xff;
2058 			ep->pcdl_thirdfilename[i+1] = (uchar_t)0xff;
2059 		}
2060 	}
2061 }
2062 
2063 /*
2064  * Extract the characters from the long filename chunk into 'buf'.
2065  * Return the number of characters extracted.
2066  */
2067 static int
2068 get_long_fn_chunk(struct pcdir_lfn *ep, char *buf, int foldcase)
2069 {
2070 	char 	*tmp = buf;
2071 	int	i;
2072 
2073 	for (i = 0; i < PCLF_FIRSTNAMESIZE; i += 2, tmp++) {
2074 		if (ep->pcdl_firstfilename[i+1] != '\0')
2075 			return (-1);
2076 		if (foldcase)
2077 			*tmp = tolower(ep->pcdl_firstfilename[i]);
2078 		else
2079 			*tmp = ep->pcdl_firstfilename[i];
2080 		if (*tmp == '\0')
2081 			return (tmp - buf);
2082 	}
2083 	for (i = 0; i < PCLF_SECONDNAMESIZE; i += 2, tmp++) {
2084 		if (ep->pcdl_secondfilename[i+1] != '\0')
2085 			return (-1);
2086 		if (foldcase)
2087 			*tmp = tolower(ep->pcdl_secondfilename[i]);
2088 		else
2089 			*tmp = ep->pcdl_secondfilename[i];
2090 		if (*tmp == '\0')
2091 			return (tmp - buf);
2092 	}
2093 	for (i = 0; i < PCLF_THIRDNAMESIZE; i += 2, tmp++) {
2094 		if (ep->pcdl_thirdfilename[i+1] != '\0')
2095 			return (-1);
2096 		if (foldcase)
2097 			*tmp = tolower(ep->pcdl_thirdfilename[i]);
2098 		else
2099 			*tmp = ep->pcdl_thirdfilename[i];
2100 		if (*tmp == '\0')
2101 			return (tmp - buf);
2102 	}
2103 	*tmp = '\0';
2104 	return (tmp - buf);
2105 }
2106 
2107 
2108 /*
2109  * Checksum the passed in short filename.
2110  * This is used to validate each component of the long name to make
2111  * sure the long name is valid (it hasn't been "detached" from the
2112  * short filename). This algorithm was found in FreeBSD.
2113  * (sys/fs/msdosfs/msdosfs_conv.c:winChksum(), Wolfgang Solfrank)
2114  */
2115 
2116 uchar_t
2117 pc_checksum_long_fn(char *name, char *ext)
2118 {
2119 	uchar_t c;
2120 	char	b[11];
2121 
2122 	bcopy(name, b, 8);
2123 	bcopy(ext, b+8, 3);
2124 
2125 	c = b[0];
2126 	c = ((c << 7) | (c >> 1)) + b[1];
2127 	c = ((c << 7) | (c >> 1)) + b[2];
2128 	c = ((c << 7) | (c >> 1)) + b[3];
2129 	c = ((c << 7) | (c >> 1)) + b[4];
2130 	c = ((c << 7) | (c >> 1)) + b[5];
2131 	c = ((c << 7) | (c >> 1)) + b[6];
2132 	c = ((c << 7) | (c >> 1)) + b[7];
2133 	c = ((c << 7) | (c >> 1)) + b[8];
2134 	c = ((c << 7) | (c >> 1)) + b[9];
2135 	c = ((c << 7) | (c >> 1)) + b[10];
2136 
2137 	return (c);
2138 }
2139 
2140 /*
2141  * Read a chunk of long filename entries into 'namep'.
2142  * Return with offset pointing to short entry (on success), or next
2143  * entry to read (if this wasn't a valid lfn really).
2144  * Uses the passed-in buffer if it can, otherwise kmem_allocs() room for
2145  * a long filename.
2146  *
2147  * Can also be called with a NULL namep, in which case it just returns
2148  * whether this was really a valid long filename and consumes it
2149  * (used by pc_dirempty()).
2150  */
2151 int
2152 pc_extract_long_fn(struct pcnode *pcp, char *namep,
2153     struct pcdir **epp, offset_t *offset, struct buf **bp)
2154 {
2155 	struct pcdir *ep = *epp;
2156 	struct pcdir_lfn *lep = (struct pcdir_lfn *)ep;
2157 	struct vnode *dvp = PCTOV(pcp);
2158 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2159 	char	*lfn;
2160 	char	*lfn_base;
2161 	int	boff;
2162 	int	i, cs;
2163 	char	buf[20];
2164 	uchar_t	cksum;
2165 	int	detached = 0;
2166 	int	error = 0;
2167 	int	foldcase;
2168 
2169 	foldcase = (fsp->pcfs_flags & PCFS_FOLDCASE);
2170 	/* use callers buffer unless we didn't get one */
2171 	if (namep)
2172 		lfn_base = namep;
2173 	else
2174 		lfn_base = kmem_alloc(PCMAXNAMLEN+1, KM_SLEEP);
2175 	lfn = lfn_base + PCMAXNAMLEN - 1;
2176 	*lfn = '\0';
2177 	cksum = lep->pcdl_checksum;
2178 
2179 	for (i = (lep->pcdl_ordinal & ~0xc0); i > 0; i--) {
2180 		/* read next block if necessary */
2181 		boff = pc_blkoff(fsp, *offset);
2182 		if (boff == 0 || *bp == NULL || boff >= (*bp)->b_bcount) {
2183 			if (*bp != NULL) {
2184 				brelse(*bp);
2185 				*bp = NULL;
2186 			}
2187 			error = pc_blkatoff(pcp, *offset, bp, &ep);
2188 			if (error) {
2189 				if (namep == NULL)
2190 					kmem_free(lfn_base, PCMAXNAMLEN+1);
2191 				return (error);
2192 			}
2193 			lep = (struct pcdir_lfn *)ep;
2194 		}
2195 		/* can this happen? Bad fs? */
2196 		if (!PCDL_IS_LFN((struct pcdir *)lep)) {
2197 			detached = 1;
2198 			break;
2199 		}
2200 		if (cksum != lep->pcdl_checksum)
2201 			detached = 1;
2202 		/* process current entry */
2203 		cs = get_long_fn_chunk(lep, buf, foldcase);
2204 		if (cs == -1) {
2205 			detached = 1;
2206 		} else {
2207 			for (; cs > 0; cs--) {
2208 				/* see if we underflow */
2209 				if (lfn >= lfn_base)
2210 					*--lfn = buf[cs - 1];
2211 				else
2212 					detached = 1;
2213 			}
2214 		}
2215 		lep++;
2216 		*offset += sizeof (struct pcdir);
2217 	}
2218 	/* read next block if necessary */
2219 	boff = pc_blkoff(fsp, *offset);
2220 	ep = (struct pcdir *)lep;
2221 	if (boff == 0 || *bp == NULL || boff >= (*bp)->b_bcount) {
2222 		if (*bp != NULL) {
2223 			brelse(*bp);
2224 			*bp = NULL;
2225 		}
2226 		error = pc_blkatoff(pcp, *offset, bp, &ep);
2227 		if (error) {
2228 			if (namep == NULL)
2229 				kmem_free(lfn_base, PCMAXNAMLEN+1);
2230 			return (error);
2231 		}
2232 	}
2233 	/* should be on the short one */
2234 	if (PCDL_IS_LFN(ep) || ((ep->pcd_filename[0] == PCD_UNUSED) ||
2235 	    (ep->pcd_filename[0] == PCD_ERASED))) {
2236 		detached = 1;
2237 	}
2238 	if (detached ||
2239 	    (cksum != pc_checksum_long_fn(ep->pcd_filename, ep->pcd_ext)) ||
2240 	    !pc_valid_long_fn(lfn)) {
2241 		/*
2242 		 * process current entry again. This may end up another lfn
2243 		 * or a short name.
2244 		 */
2245 		*epp = ep;
2246 		if (namep == NULL)
2247 			kmem_free(lfn_base, PCMAXNAMLEN+1);
2248 		return (EINVAL);
2249 	}
2250 	if (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) {
2251 		/*
2252 		 * Don't display label because it may contain
2253 		 * funny characters.
2254 		 */
2255 		*offset += sizeof (struct pcdir);
2256 		ep++;
2257 		*epp = ep;
2258 		if (namep == NULL)
2259 			kmem_free(lfn_base, PCMAXNAMLEN+1);
2260 		return (EINVAL);
2261 	}
2262 	if (namep) {
2263 		/* lfn is part of namep, but shifted. shift it back */
2264 		cs = strlen(lfn);
2265 		for (i = 0; i < cs; i++)
2266 			namep[i] = lfn[i];
2267 		namep[i] = '\0';
2268 	} else {
2269 		kmem_free(lfn_base, PCMAXNAMLEN+1);
2270 	}
2271 	*epp = ep;
2272 	return (0);
2273 }
2274 /*
2275  * Read a long filename into the pc_dirent structure and copy it out.
2276  */
2277 int
2278 pc_read_long_fn(struct vnode *dvp, struct uio *uiop, struct pc_dirent *ld,
2279     struct pcdir **epp, offset_t *offset, struct buf **bp)
2280 {
2281 	struct pcdir *ep;
2282 	struct pcnode *pcp = VTOPC(dvp);
2283 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2284 	offset_t uiooffset = uiop->uio_loffset;
2285 	int	error = 0;
2286 	offset_t oldoffset;
2287 
2288 	oldoffset = *offset;
2289 	error = pc_extract_long_fn(pcp, ld->d_name, epp, offset, bp);
2290 	if (error) {
2291 		if (error == EINVAL) {
2292 			uiop->uio_loffset += *offset - oldoffset;
2293 			return (0);
2294 		} else
2295 			return (error);
2296 	}
2297 
2298 	ep = *epp;
2299 	uiop->uio_loffset += *offset - oldoffset;
2300 	ld->d_reclen = DIRENT64_RECLEN(strlen(ld->d_name));
2301 	if (ld->d_reclen > uiop->uio_resid) {
2302 		uiop->uio_loffset = uiooffset;
2303 		return (ENOSPC);
2304 	}
2305 	ld->d_off = uiop->uio_loffset + sizeof (struct pcdir);
2306 	ld->d_ino = pc_makenodeid(pc_daddrdb(fsp, (*bp)->b_blkno),
2307 	    pc_blkoff(fsp, *offset), ep->pcd_attr,
2308 	    pc_getstartcluster(fsp, ep), fsp->pcfs_entps);
2309 	(void) uiomove((caddr_t)ld, ld->d_reclen, UIO_READ, uiop);
2310 	uiop->uio_loffset = ld->d_off;
2311 	*offset += sizeof (struct pcdir);
2312 	ep++;
2313 	*epp = ep;
2314 	return (0);
2315 }
2316 
2317 /*
2318  * Read a short filename into the pc_dirent structure and copy it out.
2319  */
2320 int
2321 pc_read_short_fn(struct vnode *dvp, struct uio *uiop, struct pc_dirent *ld,
2322     struct pcdir **epp, offset_t *offset, struct buf **bp)
2323 {
2324 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2325 	int	boff = pc_blkoff(fsp, *offset);
2326 	struct pcdir *ep = *epp;
2327 	offset_t	oldoffset = uiop->uio_loffset;
2328 	int	error;
2329 	int	foldcase;
2330 
2331 	if (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) {
2332 		uiop->uio_loffset += sizeof (struct pcdir);
2333 		*offset += sizeof (struct pcdir);
2334 		ep++;
2335 		*epp = ep;
2336 		return (0);
2337 	}
2338 	ld->d_ino = (ino64_t)pc_makenodeid(pc_daddrdb(fsp, (*bp)->b_blkno),
2339 	    boff, ep->pcd_attr, pc_getstartcluster(fsp, ep), fsp->pcfs_entps);
2340 	foldcase = (fsp->pcfs_flags & PCFS_FOLDCASE);
2341 	error = pc_fname_ext_to_name(&ld->d_name[0], &ep->pcd_filename[0],
2342 	    &ep->pcd_ext[0], foldcase);
2343 	if (error == 0) {
2344 		ld->d_reclen = DIRENT64_RECLEN(strlen(ld->d_name));
2345 		if (ld->d_reclen > uiop->uio_resid) {
2346 			uiop->uio_loffset = oldoffset;
2347 			return (ENOSPC);
2348 		}
2349 		ld->d_off = (off64_t)(uiop->uio_loffset +
2350 		    sizeof (struct pcdir));
2351 		(void) uiomove((caddr_t)ld,
2352 		    ld->d_reclen, UIO_READ, uiop);
2353 		uiop->uio_loffset = ld->d_off;
2354 	} else {
2355 		uiop->uio_loffset += sizeof (struct pcdir);
2356 	}
2357 	*offset += sizeof (struct pcdir);
2358 	ep++;
2359 	*epp = ep;
2360 	return (0);
2361 }
2362 
2363 static int
2364 pcfs_fid(struct vnode *vp, struct fid *fidp)
2365 {
2366 	struct pc_fid *pcfid;
2367 	struct pcnode *pcp;
2368 	struct pcfs	*fsp;
2369 	int	error;
2370 
2371 	fsp = VFSTOPCFS(vp->v_vfsp);
2372 	if (fsp == NULL)
2373 		return (EIO);
2374 	error = pc_lockfs(fsp, 0, 0);
2375 	if (error)
2376 		return (error);
2377 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
2378 		pc_unlockfs(fsp);
2379 		return (EIO);
2380 	}
2381 	if (fidp->fid_len < (sizeof (struct pc_fid) - sizeof (ushort_t))) {
2382 		fidp->fid_len = sizeof (struct pc_fid) - sizeof (ushort_t);
2383 		pc_unlockfs(fsp);
2384 		return (ENOSPC);
2385 	}
2386 
2387 	pcfid = (struct pc_fid *)fidp;
2388 	bzero(pcfid, sizeof (struct pc_fid));
2389 	pcfid->pcfid_len = sizeof (struct pc_fid) - sizeof (ushort_t);
2390 	if (vp->v_flag & VROOT) {
2391 		pcfid->pcfid_block = 0;
2392 		pcfid->pcfid_offset = 0;
2393 		pcfid->pcfid_ctime = 0;
2394 	} else {
2395 		pcfid->pcfid_block = pcp->pc_eblkno;
2396 		pcfid->pcfid_offset = pcp->pc_eoffset;
2397 		pcfid->pcfid_ctime = pcp->pc_entry.pcd_crtime.pct_time;
2398 	}
2399 	pc_unlockfs(fsp);
2400 	return (0);
2401 }
2402