xref: /titanic_41/usr/src/uts/common/fs/pcfs/pc_vnops.c (revision 91c7793e73e8fb0edb9c55f2828d2dfd8ff09994)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/param.h>
29 #include <sys/t_lock.h>
30 #include <sys/systm.h>
31 #include <sys/sysmacros.h>
32 #include <sys/user.h>
33 #include <sys/buf.h>
34 #include <sys/stat.h>
35 #include <sys/vfs.h>
36 #include <sys/dirent.h>
37 #include <sys/vnode.h>
38 #include <sys/proc.h>
39 #include <sys/file.h>
40 #include <sys/fcntl.h>
41 #include <sys/uio.h>
42 #include <sys/fs/pc_label.h>
43 #include <sys/fs/pc_fs.h>
44 #include <sys/fs/pc_dir.h>
45 #include <sys/fs/pc_node.h>
46 #include <sys/mman.h>
47 #include <sys/pathname.h>
48 #include <sys/vmsystm.h>
49 #include <sys/cmn_err.h>
50 #include <sys/debug.h>
51 #include <sys/statvfs.h>
52 #include <sys/unistd.h>
53 #include <sys/kmem.h>
54 #include <sys/conf.h>
55 #include <sys/flock.h>
56 #include <sys/policy.h>
57 #include <sys/sdt.h>
58 
59 #include <vm/seg.h>
60 #include <vm/page.h>
61 #include <vm/pvn.h>
62 #include <vm/seg_map.h>
63 #include <vm/seg_vn.h>
64 #include <vm/hat.h>
65 #include <vm/as.h>
66 #include <vm/seg_kmem.h>
67 
68 #include <fs/fs_subr.h>
69 
70 static int pcfs_open(struct vnode **, int, struct cred *);
71 static int pcfs_close(struct vnode *, int, int, offset_t, struct cred *);
72 static int pcfs_read(struct vnode *, struct uio *, int, struct cred *,
73 	struct caller_context *);
74 static int pcfs_write(struct vnode *, struct uio *, int, struct cred *,
75 	struct caller_context *);
76 static int pcfs_getattr(struct vnode *, struct vattr *, int, struct cred *);
77 static int pcfs_setattr(struct vnode *, struct vattr *, int, struct cred *,
78 	caller_context_t *);
79 static int pcfs_access(struct vnode *, int, int, struct cred *);
80 static int pcfs_lookup(struct vnode *, char *, struct vnode **,
81 	struct pathname *, int, struct vnode *, struct cred *);
82 static int pcfs_create(struct vnode *, char *, struct vattr *,
83 	enum vcexcl, int mode, struct vnode **, struct cred *, int);
84 static int pcfs_remove(struct vnode *, char *, struct cred *);
85 static int pcfs_rename(struct vnode *, char *, struct vnode *, char *,
86 	struct cred *);
87 static int pcfs_mkdir(struct vnode *, char *, struct vattr *, struct vnode **,
88 	struct cred *);
89 static int pcfs_rmdir(struct vnode *, char *, struct vnode *, struct cred *);
90 static int pcfs_readdir(struct vnode *, struct uio *, struct cred *, int *);
91 static int pcfs_fsync(struct vnode *, int, struct cred *);
92 static void pcfs_inactive(struct vnode *, struct cred *);
93 static int pcfs_fid(struct vnode *vp, struct fid *fidp);
94 static int pcfs_space(struct vnode *, int, struct flock64 *, int,
95 	offset_t, cred_t *, caller_context_t *);
96 static int pcfs_getpage(struct vnode *, offset_t, size_t, uint_t *, page_t *[],
97 	size_t, struct seg *, caddr_t, enum seg_rw, struct cred *);
98 static int pcfs_getapage(struct vnode *, u_offset_t, size_t, uint_t *,
99 	page_t *[], size_t, struct seg *, caddr_t, enum seg_rw, struct cred *);
100 static int pcfs_putpage(struct vnode *, offset_t, size_t, int, struct cred *);
101 static int pcfs_map(struct vnode *, offset_t, struct as *, caddr_t *, size_t,
102 	uchar_t, uchar_t, uint_t, struct cred *);
103 static int pcfs_addmap(struct vnode *, offset_t, struct as *, caddr_t,
104 	size_t, uchar_t, uchar_t, uint_t, struct cred *);
105 static int pcfs_delmap(struct vnode *, offset_t, struct as *, caddr_t,
106 	size_t, uint_t, uint_t, uint_t, struct cred *);
107 static int pcfs_seek(struct vnode *, offset_t, offset_t *);
108 static int pcfs_pathconf(struct vnode *, int, ulong_t *, struct cred *);
109 
110 int pcfs_putapage(struct vnode *, page_t *, u_offset_t *, size_t *, int,
111 	struct cred *);
112 static int rwpcp(struct pcnode *, struct uio *, enum uio_rw, int);
113 static int get_long_fn_chunk(struct pcdir_lfn *ep, char *buf, int foldcase);
114 
115 extern krwlock_t pcnodes_lock;
116 
117 #define	lround(r)	(((r)+sizeof (long long)-1)&(~(sizeof (long long)-1)))
118 
119 /*
120  * vnode op vectors for files and directories.
121  */
122 struct vnodeops *pcfs_fvnodeops;
123 struct vnodeops *pcfs_dvnodeops;
124 
125 const fs_operation_def_t pcfs_fvnodeops_template[] = {
126 	VOPNAME_OPEN, pcfs_open,
127 	VOPNAME_CLOSE, pcfs_close,
128 	VOPNAME_READ, pcfs_read,
129 	VOPNAME_WRITE, pcfs_write,
130 	VOPNAME_GETATTR, pcfs_getattr,
131 	VOPNAME_SETATTR, pcfs_setattr,
132 	VOPNAME_ACCESS, pcfs_access,
133 	VOPNAME_FSYNC, pcfs_fsync,
134 	VOPNAME_INACTIVE, (fs_generic_func_p) pcfs_inactive,
135 	VOPNAME_FID, pcfs_fid,
136 	VOPNAME_SEEK, pcfs_seek,
137 	VOPNAME_SPACE, pcfs_space,
138 	VOPNAME_GETPAGE, pcfs_getpage,
139 	VOPNAME_PUTPAGE, pcfs_putpage,
140 	VOPNAME_MAP, (fs_generic_func_p) pcfs_map,
141 	VOPNAME_ADDMAP, (fs_generic_func_p) pcfs_addmap,
142 	VOPNAME_DELMAP, pcfs_delmap,
143 	VOPNAME_PATHCONF, pcfs_pathconf,
144 	VOPNAME_VNEVENT, fs_vnevent_support,
145 	NULL, NULL
146 };
147 
148 const fs_operation_def_t pcfs_dvnodeops_template[] = {
149 	VOPNAME_OPEN, pcfs_open,
150 	VOPNAME_CLOSE, pcfs_close,
151 	VOPNAME_GETATTR, pcfs_getattr,
152 	VOPNAME_SETATTR, pcfs_setattr,
153 	VOPNAME_ACCESS, pcfs_access,
154 	VOPNAME_LOOKUP, pcfs_lookup,
155 	VOPNAME_CREATE, pcfs_create,
156 	VOPNAME_REMOVE, pcfs_remove,
157 	VOPNAME_RENAME, pcfs_rename,
158 	VOPNAME_MKDIR, pcfs_mkdir,
159 	VOPNAME_RMDIR, pcfs_rmdir,
160 	VOPNAME_READDIR, pcfs_readdir,
161 	VOPNAME_FSYNC, pcfs_fsync,
162 	VOPNAME_INACTIVE, (fs_generic_func_p) pcfs_inactive,
163 	VOPNAME_FID, pcfs_fid,
164 	VOPNAME_SEEK, pcfs_seek,
165 	VOPNAME_PATHCONF, pcfs_pathconf,
166 	VOPNAME_VNEVENT, fs_vnevent_support,
167 	NULL, NULL
168 };
169 
170 
171 /*ARGSUSED*/
172 static int
173 pcfs_open(
174 	struct vnode **vpp,
175 	int flag,
176 	struct cred *cr)
177 {
178 	return (0);
179 }
180 
181 /*
182  * files are sync'ed on close to keep floppy up to date
183  */
184 
185 /*ARGSUSED*/
186 static int
187 pcfs_close(
188 	struct vnode *vp,
189 	int flag,
190 	int count,
191 	offset_t offset,
192 	struct cred *cr)
193 {
194 	return (0);
195 }
196 
197 /*ARGSUSED*/
198 static int
199 pcfs_read(
200 	struct vnode *vp,
201 	struct uio *uiop,
202 	int ioflag,
203 	struct cred *cr,
204 	struct caller_context *ct)
205 {
206 	struct pcfs *fsp;
207 	struct pcnode *pcp;
208 	int error;
209 
210 	fsp = VFSTOPCFS(vp->v_vfsp);
211 	if (error = pc_verify(fsp))
212 		return (error);
213 	error = pc_lockfs(fsp, 0, 0);
214 	if (error)
215 		return (error);
216 	if ((pcp = VTOPC(vp)) == NULL) {
217 		pc_unlockfs(fsp);
218 		return (EIO);
219 	}
220 	error = rwpcp(pcp, uiop, UIO_READ, ioflag);
221 	if ((fsp->pcfs_vfs->vfs_flag & VFS_RDONLY) == 0) {
222 		pcp->pc_flags |= PC_ACC;
223 		pc_mark_acc(pcp);
224 	}
225 	pc_unlockfs(fsp);
226 	if (error) {
227 		PC_DPRINTF1(1, "pcfs_read: io error = %d\n", error);
228 	}
229 	return (error);
230 }
231 
232 /*ARGSUSED*/
233 static int
234 pcfs_write(
235 	struct vnode *vp,
236 	struct uio *uiop,
237 	int ioflag,
238 	struct cred *cr,
239 	struct caller_context *ct)
240 {
241 	struct pcfs *fsp;
242 	struct pcnode *pcp;
243 	int error;
244 
245 	fsp = VFSTOPCFS(vp->v_vfsp);
246 	if (error = pc_verify(fsp))
247 		return (error);
248 	error = pc_lockfs(fsp, 0, 0);
249 	if (error)
250 		return (error);
251 	if ((pcp = VTOPC(vp)) == NULL) {
252 		pc_unlockfs(fsp);
253 		return (EIO);
254 	}
255 	if (ioflag & FAPPEND) {
256 		/*
257 		 * in append mode start at end of file.
258 		 */
259 		uiop->uio_loffset = pcp->pc_size;
260 	}
261 	error = rwpcp(pcp, uiop, UIO_WRITE, ioflag);
262 	pcp->pc_flags |= PC_MOD;
263 	pc_mark_mod(pcp);
264 	if (ioflag & (FSYNC|FDSYNC))
265 		(void) pc_nodeupdate(pcp);
266 
267 	pc_unlockfs(fsp);
268 	if (error) {
269 		PC_DPRINTF1(1, "pcfs_write: io error = %d\n", error);
270 	}
271 	return (error);
272 }
273 
274 /*
275  * read or write a vnode
276  */
277 static int
278 rwpcp(
279 	struct pcnode *pcp,
280 	struct uio *uio,
281 	enum uio_rw rw,
282 	int ioflag)
283 {
284 	struct vnode *vp = PCTOV(pcp);
285 	struct pcfs *fsp;
286 	daddr_t bn;			/* phys block number */
287 	int n;
288 	offset_t off;
289 	caddr_t base;
290 	int mapon, pagecreate;
291 	int newpage;
292 	int error = 0;
293 	rlim64_t limit = uio->uio_llimit;
294 	int oresid = uio->uio_resid;
295 
296 	/*
297 	 * If the filesystem was umounted by force, return immediately.
298 	 */
299 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
300 		return (EIO);
301 
302 	PC_DPRINTF4(5, "rwpcp pcp=%p off=%lld resid=%ld size=%u\n", (void *)pcp,
303 	    uio->uio_loffset, uio->uio_resid, pcp->pc_size);
304 
305 	ASSERT(rw == UIO_READ || rw == UIO_WRITE);
306 	ASSERT(vp->v_type == VREG);
307 
308 	if (uio->uio_loffset >= UINT32_MAX && rw == UIO_READ) {
309 		return (0);
310 	}
311 
312 	if (uio->uio_loffset < 0)
313 		return (EINVAL);
314 
315 	if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
316 		limit = MAXOFFSET_T;
317 
318 	if (uio->uio_loffset >= limit && rw == UIO_WRITE) {
319 		proc_t *p = ttoproc(curthread);
320 
321 		mutex_enter(&p->p_lock);
322 		(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
323 		    p, RCA_UNSAFE_SIGINFO);
324 		mutex_exit(&p->p_lock);
325 		return (EFBIG);
326 	}
327 
328 	/* the following condition will occur only for write */
329 
330 	if (uio->uio_loffset >= UINT32_MAX)
331 		return (EFBIG);
332 
333 	if (uio->uio_resid == 0)
334 		return (0);
335 
336 	if (limit > UINT32_MAX)
337 		limit = UINT32_MAX;
338 
339 	fsp = VFSTOPCFS(vp->v_vfsp);
340 	if (fsp->pcfs_flags & PCFS_IRRECOV)
341 		return (EIO);
342 
343 	do {
344 		/*
345 		 * Assignments to "n" in this block may appear
346 		 * to overflow in some cases.  However, after careful
347 		 * analysis it was determined that all assignments to
348 		 * "n" serve only to make "n" smaller.  Since "n"
349 		 * starts out as no larger than MAXBSIZE, "int" is
350 		 * safe.
351 		 */
352 		off = uio->uio_loffset & MAXBMASK;
353 		mapon = (int)(uio->uio_loffset & MAXBOFFSET);
354 		n = MIN(MAXBSIZE - mapon, uio->uio_resid);
355 		if (rw == UIO_READ) {
356 			offset_t diff;
357 
358 			diff = pcp->pc_size - uio->uio_loffset;
359 			if (diff <= 0)
360 				return (0);
361 			if (diff < n)
362 				n = (int)diff;
363 		}
364 		/*
365 		 * Compare limit with the actual offset + n, not the
366 		 * rounded down offset "off" or we will overflow
367 		 * the maximum file size after all.
368 		 */
369 		if (rw == UIO_WRITE && uio->uio_loffset + n >= limit) {
370 			if (uio->uio_loffset >= limit) {
371 				error = EFBIG;
372 				break;
373 			}
374 			n = (int)(limit - uio->uio_loffset);
375 		}
376 		base = segmap_getmap(segkmap, vp, (u_offset_t)off);
377 		pagecreate = 0;
378 		newpage = 0;
379 		if (rw == UIO_WRITE) {
380 			/*
381 			 * If PAGESIZE < MAXBSIZE, perhaps we ought to deal
382 			 * with one page at a time, instead of one MAXBSIZE
383 			 * at a time, so we can fully explore pagecreate
384 			 * optimization??
385 			 */
386 			if (uio->uio_loffset + n > pcp->pc_size) {
387 				uint_t ncl, lcn;
388 
389 				ncl = (uint_t)howmany((offset_t)pcp->pc_size,
390 					fsp->pcfs_clsize);
391 				if (uio->uio_loffset > pcp->pc_size &&
392 				    ncl < (uint_t)howmany(uio->uio_loffset,
393 							fsp->pcfs_clsize)) {
394 					/*
395 					 * Allocate and zerofill skipped
396 					 * clusters. This may not be worth the
397 					 * effort since a small lseek beyond
398 					 * eof but still within the cluster
399 					 * will not be zeroed out.
400 					 */
401 					lcn = pc_lblkno(fsp, uio->uio_loffset);
402 					error = pc_balloc(pcp, (daddr_t)lcn,
403 					    1, &bn);
404 					ncl = lcn + 1;
405 				}
406 				if (!error &&
407 				    ncl < (uint_t)howmany(uio->uio_loffset + n,
408 							fsp->pcfs_clsize))
409 					/*
410 					 * allocate clusters w/o zerofill
411 					 */
412 					error = pc_balloc(pcp,
413 					    (daddr_t)pc_lblkno(fsp,
414 					    uio->uio_loffset + n - 1),
415 					    0, &bn);
416 
417 				pcp->pc_flags |= PC_CHG;
418 
419 				if (error) {
420 					/* figure out new file size */
421 					pcp->pc_size = fsp->pcfs_clsize *
422 					    pc_fileclsize(fsp,
423 						pcp->pc_scluster);
424 
425 					if (error == ENOSPC &&
426 					    (pcp->pc_size - uio->uio_loffset)
427 						> 0) {
428 						PC_DPRINTF3(2, "rwpcp ENOSPC "
429 						    "off=%lld n=%d size=%d\n",
430 						    uio->uio_loffset,
431 						    n, pcp->pc_size);
432 						n = (int)(pcp->pc_size -
433 							uio->uio_loffset);
434 					} else {
435 						PC_DPRINTF1(1,
436 						    "rwpcp error1=%d\n", error);
437 						(void) segmap_release(segkmap,
438 						    base, 0);
439 						break;
440 					}
441 				} else {
442 					pcp->pc_size =
443 					    (uint_t)(uio->uio_loffset + n);
444 				}
445 				if (mapon == 0) {
446 					newpage = segmap_pagecreate(segkmap,
447 						base, (size_t)n, 0);
448 					pagecreate = 1;
449 				}
450 			} else if (n == MAXBSIZE) {
451 				newpage = segmap_pagecreate(segkmap, base,
452 						(size_t)n, 0);
453 				pagecreate = 1;
454 			}
455 		}
456 		error = uiomove(base + mapon, (size_t)n, rw, uio);
457 
458 		if (pagecreate && uio->uio_loffset <
459 			roundup(off + mapon + n, PAGESIZE)) {
460 			offset_t nzero, nmoved;
461 
462 			nmoved = uio->uio_loffset - (off + mapon);
463 			nzero = roundup(mapon + n, PAGESIZE) - nmoved;
464 			(void) kzero(base + mapon + nmoved, (size_t)nzero);
465 		}
466 
467 		/*
468 		 * Unlock the pages which have been allocated by
469 		 * page_create_va() in segmap_pagecreate().
470 		 */
471 		if (newpage)
472 			segmap_pageunlock(segkmap, base, (size_t)n,
473 				rw == UIO_WRITE ? S_WRITE : S_READ);
474 
475 		if (error) {
476 			PC_DPRINTF1(1, "rwpcp error2=%d\n", error);
477 			/*
478 			 * If we failed on a write, we may have already
479 			 * allocated file blocks as well as pages.  It's hard
480 			 * to undo the block allocation, but we must be sure
481 			 * to invalidate any pages that may have been
482 			 * allocated.
483 			 */
484 			if (rw == UIO_WRITE)
485 				(void) segmap_release(segkmap, base, SM_INVAL);
486 			else
487 				(void) segmap_release(segkmap, base, 0);
488 		} else {
489 			uint_t flags = 0;
490 
491 			if (rw == UIO_READ) {
492 				if (n + mapon == MAXBSIZE ||
493 				    uio->uio_loffset == pcp->pc_size)
494 					flags = SM_DONTNEED;
495 			} else if (ioflag & (FSYNC|FDSYNC)) {
496 				flags = SM_WRITE;
497 			} else if (n + mapon == MAXBSIZE) {
498 				flags = SM_WRITE|SM_ASYNC|SM_DONTNEED;
499 			}
500 			error = segmap_release(segkmap, base, flags);
501 		}
502 
503 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
504 
505 	if (oresid != uio->uio_resid)
506 		error = 0;
507 	return (error);
508 }
509 
510 /*ARGSUSED*/
511 static int
512 pcfs_getattr(
513 	struct vnode *vp,
514 	struct vattr *vap,
515 	int flags,
516 	struct cred *cr)
517 {
518 	struct pcnode *pcp;
519 	struct pcfs *fsp;
520 	int error;
521 	char attr;
522 	struct pctime atime;
523 	int64_t unixtime;
524 
525 	PC_DPRINTF1(8, "pcfs_getattr: vp=%p\n", (void *)vp);
526 
527 	fsp = VFSTOPCFS(vp->v_vfsp);
528 	error = pc_lockfs(fsp, 0, 0);
529 	if (error)
530 		return (error);
531 	if ((pcp = VTOPC(vp)) == NULL) {
532 		pc_unlockfs(fsp);
533 		return (EIO);
534 	}
535 	/*
536 	 * Copy from pcnode.
537 	 */
538 	vap->va_type = vp->v_type;
539 	attr = pcp->pc_entry.pcd_attr;
540 	if (PCA_IS_HIDDEN(fsp, attr))
541 		vap->va_mode = 0;
542 	else if (attr & PCA_LABEL)
543 		vap->va_mode = 0444;
544 	else if (attr & PCA_RDONLY)
545 		vap->va_mode = 0555;
546 	else if (fsp->pcfs_flags & PCFS_BOOTPART) {
547 		vap->va_mode = 0755;
548 	} else {
549 		vap->va_mode = 0777;
550 	}
551 
552 	if (attr & PCA_DIR)
553 		vap->va_mode |= S_IFDIR;
554 	else
555 		vap->va_mode |= S_IFREG;
556 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
557 		vap->va_uid = 0;
558 		vap->va_gid = 0;
559 	} else {
560 		vap->va_uid = crgetuid(cr);
561 		vap->va_gid = crgetgid(cr);
562 	}
563 	vap->va_fsid = vp->v_vfsp->vfs_dev;
564 	vap->va_nodeid = (ino64_t)pc_makenodeid(pcp->pc_eblkno,
565 	    pcp->pc_eoffset, pcp->pc_entry.pcd_attr,
566 	    pc_getstartcluster(fsp, &pcp->pc_entry), fsp->pcfs_entps);
567 	vap->va_nlink = 1;
568 	vap->va_size = (u_offset_t)pcp->pc_size;
569 
570 	pc_pcttotv(&pcp->pc_entry.pcd_mtime, &unixtime);
571 	if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0) {
572 		if (unixtime > INT32_MAX)
573 			DTRACE_PROBE1(pcfs__mtimeclamped, int64_t, unixtime);
574 		unixtime = MIN(unixtime, INT32_MAX);
575 	} else if (unixtime > INT32_MAX &&
576 	    get_udatamodel() == DATAMODEL_ILP32) {
577 		pc_unlockfs(fsp);
578 		DTRACE_PROBE1(pcfs__mtimeoverflowed, int64_t, unixtime);
579 		return (EOVERFLOW);
580 	}
581 
582 	vap->va_mtime.tv_sec = (time_t)unixtime;
583 	vap->va_mtime.tv_nsec = 0;
584 
585 	/*
586 	 * FAT doesn't know about POSIX ctime.
587 	 * Best approximation is to always set it to mtime.
588 	 */
589 	vap->va_ctime = vap->va_mtime;
590 
591 	/*
592 	 * FAT only stores "last access date". If that's the
593 	 * same as the date of last modification then the time
594 	 * of last access is known. Otherwise, use midnight.
595 	 */
596 	atime.pct_date = pcp->pc_entry.pcd_ladate;
597 	if (atime.pct_date == pcp->pc_entry.pcd_mtime.pct_date)
598 		atime.pct_time = pcp->pc_entry.pcd_mtime.pct_time;
599 	else
600 		atime.pct_time = 0;
601 	pc_pcttotv(&atime, &unixtime);
602 	if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0) {
603 		if (unixtime > INT32_MAX)
604 			DTRACE_PROBE1(pcfs__atimeclamped, int64_t, unixtime);
605 		unixtime = MIN(unixtime, INT32_MAX);
606 	} else if (unixtime > INT32_MAX &&
607 	    get_udatamodel() == DATAMODEL_ILP32) {
608 		pc_unlockfs(fsp);
609 		DTRACE_PROBE1(pcfs__atimeoverflowed, int64_t, unixtime);
610 		return (EOVERFLOW);
611 	}
612 
613 	vap->va_atime.tv_sec = (time_t)unixtime;
614 	vap->va_atime.tv_nsec = 0;
615 
616 	vap->va_rdev = 0;
617 	vap->va_nblocks = (fsblkcnt64_t)howmany((offset_t)pcp->pc_size,
618 				DEV_BSIZE);
619 	vap->va_blksize = fsp->pcfs_clsize;
620 	pc_unlockfs(fsp);
621 	return (0);
622 }
623 
624 
625 /*ARGSUSED*/
626 static int
627 pcfs_setattr(
628 	struct vnode *vp,
629 	struct vattr *vap,
630 	int flags,
631 	struct cred *cr,
632 	caller_context_t *ct)
633 {
634 	struct pcnode *pcp;
635 	mode_t mask = vap->va_mask;
636 	int error;
637 	struct pcfs *fsp;
638 	timestruc_t now, *timep;
639 
640 	PC_DPRINTF2(6, "pcfs_setattr: vp=%p mask=%x\n", (void *)vp, (int)mask);
641 	/*
642 	 * cannot set these attributes
643 	 */
644 	if (mask & (AT_NOSET | AT_UID | AT_GID)) {
645 		return (EINVAL);
646 	}
647 	/*
648 	 * pcfs_setattr is now allowed on directories to avoid silly warnings
649 	 * from 'tar' when it tries to set times on a directory, and console
650 	 * printf's on the NFS server when it gets EINVAL back on such a
651 	 * request. One possible problem with that since a directory entry
652 	 * identifies a file, '.' and all the '..' entries in subdirectories
653 	 * may get out of sync when the directory is updated since they're
654 	 * treated like separate files. We could fix that by looking for
655 	 * '.' and giving it the same attributes, and then looking for
656 	 * all the subdirectories and updating '..', but that's pretty
657 	 * expensive for something that doesn't seem likely to matter.
658 	 */
659 	/* can't do some ops on directories anyway */
660 	if ((vp->v_type == VDIR) &&
661 	    (mask & AT_SIZE)) {
662 		return (EINVAL);
663 	}
664 
665 	fsp = VFSTOPCFS(vp->v_vfsp);
666 	error = pc_lockfs(fsp, 0, 0);
667 	if (error)
668 		return (error);
669 	if ((pcp = VTOPC(vp)) == NULL) {
670 		pc_unlockfs(fsp);
671 		return (EIO);
672 	}
673 
674 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
675 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
676 			pc_unlockfs(fsp);
677 			return (EACCES);
678 		}
679 	}
680 
681 	/*
682 	 * Change file access modes.
683 	 * If nobody has write permission, file is marked readonly.
684 	 * Otherwise file is writable by anyone.
685 	 */
686 	if ((mask & AT_MODE) && (vap->va_mode != (mode_t)-1)) {
687 		if ((vap->va_mode & 0222) == 0)
688 			pcp->pc_entry.pcd_attr |= PCA_RDONLY;
689 		else
690 			pcp->pc_entry.pcd_attr &= ~PCA_RDONLY;
691 		pcp->pc_flags |= PC_CHG;
692 	}
693 	/*
694 	 * Truncate file. Must have write permission.
695 	 */
696 	if ((mask & AT_SIZE) && (vap->va_size != (u_offset_t)-1)) {
697 		if (pcp->pc_entry.pcd_attr & PCA_RDONLY) {
698 			error = EACCES;
699 			goto out;
700 		}
701 		if (vap->va_size > UINT32_MAX) {
702 			error = EFBIG;
703 			goto out;
704 		}
705 		error = pc_truncate(pcp, (uint_t)vap->va_size);
706 		if (error)
707 			goto out;
708 	}
709 	/*
710 	 * Change file modified times.
711 	 */
712 	if (mask & (AT_MTIME | AT_CTIME)) {
713 		/*
714 		 * If SysV-compatible option to set access and
715 		 * modified times if privileged, owner, or write access,
716 		 * use current time rather than va_mtime.
717 		 *
718 		 * XXX - va_mtime.tv_sec == -1 flags this.
719 		 */
720 		timep = &vap->va_mtime;
721 		if (vap->va_mtime.tv_sec == -1) {
722 			gethrestime(&now);
723 			timep = &now;
724 		}
725 		if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0 &&
726 		    timep->tv_sec > INT32_MAX) {
727 			error = EOVERFLOW;
728 			goto out;
729 		}
730 		error = pc_tvtopct(timep, &pcp->pc_entry.pcd_mtime);
731 		if (error)
732 			goto out;
733 		pcp->pc_flags |= PC_CHG;
734 	}
735 	/*
736 	 * Change file access times.
737 	 */
738 	if (mask & AT_ATIME) {
739 		/*
740 		 * If SysV-compatible option to set access and
741 		 * modified times if privileged, owner, or write access,
742 		 * use current time rather than va_mtime.
743 		 *
744 		 * XXX - va_atime.tv_sec == -1 flags this.
745 		 */
746 		struct pctime	atime;
747 
748 		timep = &vap->va_atime;
749 		if (vap->va_atime.tv_sec == -1) {
750 			gethrestime(&now);
751 			timep = &now;
752 		}
753 		if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0 &&
754 		    timep->tv_sec > INT32_MAX) {
755 			error = EOVERFLOW;
756 			goto out;
757 		}
758 		error = pc_tvtopct(timep, &atime);
759 		if (error)
760 			goto out;
761 		pcp->pc_entry.pcd_ladate = atime.pct_date;
762 		pcp->pc_flags |= PC_CHG;
763 	}
764 out:
765 	pc_unlockfs(fsp);
766 	return (error);
767 }
768 
769 
770 /*ARGSUSED*/
771 static int
772 pcfs_access(
773 	struct vnode *vp,
774 	int mode,
775 	int flags,
776 	struct cred *cr)
777 {
778 	struct pcnode *pcp;
779 	struct pcfs *fsp;
780 
781 
782 	fsp = VFSTOPCFS(vp->v_vfsp);
783 
784 	if ((pcp = VTOPC(vp)) == NULL)
785 		return (EIO);
786 	if ((mode & VWRITE) && (pcp->pc_entry.pcd_attr & PCA_RDONLY))
787 		return (EACCES);
788 
789 	/*
790 	 * If this is a boot partition, privileged users have full access while
791 	 * others have read-only access.
792 	 */
793 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
794 		if ((mode & VWRITE) &&
795 		    secpolicy_pcfs_modify_bootpartition(cr) != 0)
796 			return (EACCES);
797 	}
798 	return (0);
799 }
800 
801 
802 /*ARGSUSED*/
803 static int
804 pcfs_fsync(
805 	struct vnode *vp,
806 	int syncflag,
807 	struct cred *cr)
808 {
809 	struct pcfs *fsp;
810 	struct pcnode *pcp;
811 	int error;
812 
813 	fsp = VFSTOPCFS(vp->v_vfsp);
814 	if (error = pc_verify(fsp))
815 		return (error);
816 	error = pc_lockfs(fsp, 0, 0);
817 	if (error)
818 		return (error);
819 	if ((pcp = VTOPC(vp)) == NULL) {
820 		pc_unlockfs(fsp);
821 		return (EIO);
822 	}
823 	rw_enter(&pcnodes_lock, RW_WRITER);
824 	error = pc_nodesync(pcp);
825 	rw_exit(&pcnodes_lock);
826 	pc_unlockfs(fsp);
827 	return (error);
828 }
829 
830 
831 /*ARGSUSED*/
832 static void
833 pcfs_inactive(
834 	struct vnode *vp,
835 	struct cred *cr)
836 {
837 	struct pcnode *pcp;
838 	struct pcfs *fsp;
839 	int error;
840 
841 	fsp = VFSTOPCFS(vp->v_vfsp);
842 	error = pc_lockfs(fsp, 0, 1);
843 
844 	/*
845 	 * If the filesystem was umounted by force, all dirty
846 	 * pages associated with this vnode are invalidated
847 	 * and then the vnode will be freed.
848 	 */
849 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED) {
850 		pcp = VTOPC(vp);
851 		if (vn_has_cached_data(vp)) {
852 			(void) pvn_vplist_dirty(vp, (u_offset_t)0,
853 			    pcfs_putapage, B_INVAL, (struct cred *)NULL);
854 		}
855 		remque(pcp);
856 		if (error == 0)
857 			pc_unlockfs(fsp);
858 		vn_free(vp);
859 		kmem_free(pcp, sizeof (struct pcnode));
860 		VFS_RELE(PCFSTOVFS(fsp));
861 		return;
862 	}
863 
864 	mutex_enter(&vp->v_lock);
865 	ASSERT(vp->v_count >= 1);
866 	if (vp->v_count > 1) {
867 		vp->v_count--;  /* release our hold from vn_rele */
868 		mutex_exit(&vp->v_lock);
869 		pc_unlockfs(fsp);
870 		return;
871 	}
872 	mutex_exit(&vp->v_lock);
873 
874 	/*
875 	 * Check again to confirm that no intervening I/O error
876 	 * with a subsequent pc_diskchanged() call has released
877 	 * the pcnode.  If it has then release the vnode as above.
878 	 */
879 	if ((pcp = VTOPC(vp)) == NULL) {
880 		if (vn_has_cached_data(vp))
881 			(void) pvn_vplist_dirty(vp, (u_offset_t)0,
882 			    pcfs_putapage, B_INVAL | B_TRUNC,
883 			    (struct cred *)NULL);
884 		vn_free(vp);
885 	} else {
886 		pc_rele(pcp);
887 	}
888 
889 	if (!error)
890 		pc_unlockfs(fsp);
891 }
892 
893 /*ARGSUSED*/
894 static int
895 pcfs_lookup(
896 	struct vnode *dvp,
897 	char *nm,
898 	struct vnode **vpp,
899 	struct pathname *pnp,
900 	int flags,
901 	struct vnode *rdir,
902 	struct cred *cr)
903 {
904 	struct pcfs *fsp;
905 	struct pcnode *pcp;
906 	int error;
907 
908 	/*
909 	 * If the filesystem was umounted by force, return immediately.
910 	 */
911 	if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
912 		return (EIO);
913 
914 	/*
915 	 * verify that the dvp is still valid on the disk
916 	 */
917 	fsp = VFSTOPCFS(dvp->v_vfsp);
918 	if (error = pc_verify(fsp))
919 		return (error);
920 	error = pc_lockfs(fsp, 0, 0);
921 	if (error)
922 		return (error);
923 	if (VTOPC(dvp) == NULL) {
924 		pc_unlockfs(fsp);
925 		return (EIO);
926 	}
927 	/*
928 	 * Null component name is a synonym for directory being searched.
929 	 */
930 	if (*nm == '\0') {
931 		VN_HOLD(dvp);
932 		*vpp = dvp;
933 		pc_unlockfs(fsp);
934 		return (0);
935 	}
936 
937 	error = pc_dirlook(VTOPC(dvp), nm, &pcp);
938 	if (!error) {
939 		*vpp = PCTOV(pcp);
940 		pcp->pc_flags |= PC_EXTERNAL;
941 	}
942 	pc_unlockfs(fsp);
943 	return (error);
944 }
945 
946 
947 /*ARGSUSED*/
948 static int
949 pcfs_create(
950 	struct vnode *dvp,
951 	char *nm,
952 	struct vattr *vap,
953 	enum vcexcl exclusive,
954 	int mode,
955 	struct vnode **vpp,
956 	struct cred *cr,
957 	int flag)
958 {
959 	int error;
960 	struct pcnode *pcp;
961 	struct vnode *vp;
962 	struct pcfs *fsp;
963 
964 	/*
965 	 * can't create directories. use pcfs_mkdir.
966 	 * can't create anything other than files.
967 	 */
968 	if (vap->va_type == VDIR)
969 		return (EISDIR);
970 	else if (vap->va_type != VREG)
971 		return (EINVAL);
972 
973 	pcp = NULL;
974 	fsp = VFSTOPCFS(dvp->v_vfsp);
975 	error = pc_lockfs(fsp, 0, 0);
976 	if (error)
977 		return (error);
978 	if (VTOPC(dvp) == NULL) {
979 		pc_unlockfs(fsp);
980 		return (EIO);
981 	}
982 
983 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
984 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
985 			pc_unlockfs(fsp);
986 			return (EACCES);
987 		}
988 	}
989 
990 	if (*nm == '\0') {
991 		/*
992 		 * Null component name refers to the directory itself.
993 		 */
994 		VN_HOLD(dvp);
995 		pcp = VTOPC(dvp);
996 		error = EEXIST;
997 	} else {
998 		error = pc_direnter(VTOPC(dvp), nm, vap, &pcp);
999 	}
1000 	/*
1001 	 * if file exists and this is a nonexclusive create,
1002 	 * check for access permissions
1003 	 */
1004 	if (error == EEXIST) {
1005 		vp = PCTOV(pcp);
1006 		if (exclusive == NONEXCL) {
1007 			if (vp->v_type == VDIR) {
1008 				error = EISDIR;
1009 			} else if (mode) {
1010 				error = pcfs_access(PCTOV(pcp), mode, 0,
1011 					cr);
1012 			} else {
1013 				error = 0;
1014 			}
1015 		}
1016 		if (error) {
1017 			VN_RELE(PCTOV(pcp));
1018 		} else if ((vp->v_type == VREG) && (vap->va_mask & AT_SIZE) &&
1019 			(vap->va_size == 0)) {
1020 			error = pc_truncate(pcp, 0L);
1021 			if (error)
1022 				VN_RELE(PCTOV(pcp));
1023 		}
1024 	}
1025 	if (error) {
1026 		pc_unlockfs(fsp);
1027 		return (error);
1028 	}
1029 	*vpp = PCTOV(pcp);
1030 	pcp->pc_flags |= PC_EXTERNAL;
1031 	pc_unlockfs(fsp);
1032 	return (error);
1033 }
1034 
1035 /*ARGSUSED*/
1036 static int
1037 pcfs_remove(
1038 	struct vnode *vp,
1039 	char *nm,
1040 	struct cred *cr)
1041 {
1042 	struct pcfs *fsp;
1043 	struct pcnode *pcp;
1044 	int error;
1045 
1046 	fsp = VFSTOPCFS(vp->v_vfsp);
1047 	if (error = pc_verify(fsp))
1048 		return (error);
1049 	error = pc_lockfs(fsp, 0, 0);
1050 	if (error)
1051 		return (error);
1052 	if ((pcp = VTOPC(vp)) == NULL) {
1053 		pc_unlockfs(fsp);
1054 		return (EIO);
1055 	}
1056 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1057 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1058 			pc_unlockfs(fsp);
1059 			return (EACCES);
1060 		}
1061 	}
1062 	error = pc_dirremove(pcp, nm, (struct vnode *)0, VREG);
1063 	pc_unlockfs(fsp);
1064 	return (error);
1065 }
1066 
1067 /*
1068  * Rename a file or directory
1069  * This rename is restricted to only rename files within a directory.
1070  * XX should make rename more general
1071  */
1072 /*ARGSUSED*/
1073 static int
1074 pcfs_rename(
1075 	struct vnode *sdvp,		/* old (source) parent vnode */
1076 	char *snm,			/* old (source) entry name */
1077 	struct vnode *tdvp,		/* new (target) parent vnode */
1078 	char *tnm,			/* new (target) entry name */
1079 	struct cred *cr)
1080 {
1081 	struct pcfs *fsp;
1082 	struct pcnode *dp;	/* parent pcnode */
1083 	struct pcnode *tdp;
1084 	int error;
1085 
1086 	fsp = VFSTOPCFS(sdvp->v_vfsp);
1087 	if (error = pc_verify(fsp))
1088 		return (error);
1089 	if (((dp = VTOPC(sdvp)) == NULL) || ((tdp = VTOPC(tdvp)) == NULL)) {
1090 		return (EIO);
1091 	}
1092 
1093 	/*
1094 	 * make sure we can muck with this directory.
1095 	 */
1096 	error = pcfs_access(sdvp, VWRITE, 0, cr);
1097 	if (error) {
1098 		return (error);
1099 	}
1100 	error = pc_lockfs(fsp, 0, 0);
1101 	if (error)
1102 		return (error);
1103 	if ((VTOPC(sdvp) == NULL) || (VTOPC(tdvp) == NULL)) {
1104 		pc_unlockfs(fsp);
1105 		return (EIO);
1106 	}
1107 	error = pc_rename(dp, tdp, snm, tnm);
1108 	pc_unlockfs(fsp);
1109 	return (error);
1110 }
1111 
1112 /*ARGSUSED*/
1113 static int
1114 pcfs_mkdir(
1115 	struct vnode *dvp,
1116 	char *nm,
1117 	struct vattr *vap,
1118 	struct vnode **vpp,
1119 	struct cred *cr)
1120 {
1121 	struct pcfs *fsp;
1122 	struct pcnode *pcp;
1123 	int error;
1124 
1125 	fsp = VFSTOPCFS(dvp->v_vfsp);
1126 	if (error = pc_verify(fsp))
1127 		return (error);
1128 	error = pc_lockfs(fsp, 0, 0);
1129 	if (error)
1130 		return (error);
1131 	if (VTOPC(dvp) == NULL) {
1132 		pc_unlockfs(fsp);
1133 		return (EIO);
1134 	}
1135 
1136 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1137 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1138 			pc_unlockfs(fsp);
1139 			return (EACCES);
1140 		}
1141 	}
1142 
1143 	error = pc_direnter(VTOPC(dvp), nm, vap, &pcp);
1144 
1145 	if (!error) {
1146 		pcp -> pc_flags |= PC_EXTERNAL;
1147 		*vpp = PCTOV(pcp);
1148 	} else if (error == EEXIST) {
1149 		VN_RELE(PCTOV(pcp));
1150 	}
1151 	pc_unlockfs(fsp);
1152 	return (error);
1153 }
1154 
1155 /*ARGSUSED*/
1156 static int
1157 pcfs_rmdir(
1158 	struct vnode *dvp,
1159 	char *nm,
1160 	struct vnode *cdir,
1161 	struct cred *cr)
1162 {
1163 	struct pcfs *fsp;
1164 	struct pcnode *pcp;
1165 	int error;
1166 
1167 	fsp = VFSTOPCFS(dvp -> v_vfsp);
1168 	if (error = pc_verify(fsp))
1169 		return (error);
1170 	if (error = pc_lockfs(fsp, 0, 0))
1171 		return (error);
1172 
1173 	if ((pcp = VTOPC(dvp)) == NULL) {
1174 		pc_unlockfs(fsp);
1175 		return (EIO);
1176 	}
1177 
1178 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1179 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1180 			pc_unlockfs(fsp);
1181 			return (EACCES);
1182 		}
1183 	}
1184 
1185 	error = pc_dirremove(pcp, nm, cdir, VDIR);
1186 	pc_unlockfs(fsp);
1187 	return (error);
1188 }
1189 
1190 /*
1191  * read entries in a directory.
1192  * we must convert pc format to unix format
1193  */
1194 
1195 /*ARGSUSED*/
1196 static int
1197 pcfs_readdir(
1198 	struct vnode *dvp,
1199 	struct uio *uiop,
1200 	struct cred *cr,
1201 	int *eofp)
1202 {
1203 	struct pcnode *pcp;
1204 	struct pcfs *fsp;
1205 	struct pcdir *ep;
1206 	struct buf *bp = NULL;
1207 	offset_t offset;
1208 	int boff;
1209 	struct pc_dirent lbp;
1210 	struct pc_dirent *ld = &lbp;
1211 	int error;
1212 
1213 	/*
1214 	 * If the filesystem was umounted by force, return immediately.
1215 	 */
1216 	if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1217 		return (EIO);
1218 
1219 	if ((uiop->uio_iovcnt != 1) ||
1220 	    (uiop->uio_loffset % sizeof (struct pcdir)) != 0) {
1221 		return (EINVAL);
1222 	}
1223 	fsp = VFSTOPCFS(dvp->v_vfsp);
1224 	/*
1225 	 * verify that the dp is still valid on the disk
1226 	 */
1227 	if (error = pc_verify(fsp)) {
1228 		return (error);
1229 	}
1230 	error = pc_lockfs(fsp, 0, 0);
1231 	if (error)
1232 		return (error);
1233 	if ((pcp = VTOPC(dvp)) == NULL) {
1234 		pc_unlockfs(fsp);
1235 		return (EIO);
1236 	}
1237 
1238 	bzero(ld, sizeof (*ld));
1239 
1240 	if (eofp != NULL)
1241 		*eofp = 0;
1242 	offset = uiop->uio_loffset;
1243 
1244 	if (dvp->v_flag & VROOT) {
1245 		/*
1246 		 * kludge up entries for "." and ".." in the root.
1247 		 */
1248 		if (offset == 0) {
1249 			(void) strcpy(ld->d_name, ".");
1250 			ld->d_reclen = DIRENT64_RECLEN(1);
1251 			ld->d_off = (off64_t)sizeof (struct pcdir);
1252 			ld->d_ino = (ino64_t)UINT_MAX;
1253 			if (ld->d_reclen > uiop->uio_resid) {
1254 				pc_unlockfs(fsp);
1255 				return (ENOSPC);
1256 			}
1257 			(void) uiomove(ld, ld->d_reclen, UIO_READ, uiop);
1258 			uiop->uio_loffset = ld->d_off;
1259 			offset = uiop->uio_loffset;
1260 		}
1261 		if (offset == sizeof (struct pcdir)) {
1262 			(void) strcpy(ld->d_name, "..");
1263 			ld->d_reclen = DIRENT64_RECLEN(2);
1264 			if (ld->d_reclen > uiop->uio_resid) {
1265 				pc_unlockfs(fsp);
1266 				return (ENOSPC);
1267 			}
1268 			ld->d_off = (off64_t)(uiop->uio_loffset +
1269 			    sizeof (struct pcdir));
1270 			ld->d_ino = (ino64_t)UINT_MAX;
1271 			(void) uiomove(ld, ld->d_reclen, UIO_READ, uiop);
1272 			uiop->uio_loffset = ld->d_off;
1273 			offset = uiop->uio_loffset;
1274 		}
1275 		offset -= 2 * sizeof (struct pcdir);
1276 		/* offset now has the real offset value into directory file */
1277 	}
1278 
1279 	for (;;) {
1280 		boff = pc_blkoff(fsp, offset);
1281 		if (boff == 0 || bp == NULL || boff >= bp->b_bcount) {
1282 			if (bp != NULL) {
1283 				brelse(bp);
1284 				bp = NULL;
1285 			}
1286 			error = pc_blkatoff(pcp, offset, &bp, &ep);
1287 			if (error) {
1288 				if (error == ENOENT) {
1289 					error = 0;
1290 					if (eofp)
1291 						*eofp = 1;
1292 				}
1293 				break;
1294 			}
1295 		}
1296 		if (ep->pcd_filename[0] == PCD_UNUSED) {
1297 			if (eofp)
1298 				*eofp = 1;
1299 			break;
1300 		}
1301 		/*
1302 		 * Don't display label because it may contain funny characters.
1303 		 */
1304 		if (ep->pcd_filename[0] == PCD_ERASED) {
1305 			uiop->uio_loffset += sizeof (struct pcdir);
1306 			offset += sizeof (struct pcdir);
1307 			ep++;
1308 			continue;
1309 		}
1310 		if (PCDL_IS_LFN(ep)) {
1311 			if (pc_read_long_fn(dvp, uiop, ld, &ep, &offset, &bp) !=
1312 			    0)
1313 				break;
1314 			continue;
1315 		}
1316 
1317 		if (pc_read_short_fn(dvp, uiop, ld, &ep, &offset, &bp) != 0)
1318 			break;
1319 	}
1320 	if (bp)
1321 		brelse(bp);
1322 	pc_unlockfs(fsp);
1323 	return (error);
1324 }
1325 
1326 
1327 /*
1328  * Called from pvn_getpages or pcfs_getpage to get a particular page.
1329  * When we are called the pcfs is already locked.
1330  */
1331 /*ARGSUSED*/
1332 static int
1333 pcfs_getapage(
1334 	struct vnode *vp,
1335 	u_offset_t off,
1336 	size_t len,
1337 	uint_t *protp,
1338 	page_t *pl[],		/* NULL if async IO is requested */
1339 	size_t plsz,
1340 	struct seg *seg,
1341 	caddr_t addr,
1342 	enum seg_rw rw,
1343 	struct cred *cr)
1344 {
1345 	struct pcnode *pcp;
1346 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1347 	struct vnode *devvp;
1348 	page_t *pp;
1349 	page_t *pagefound;
1350 	int err;
1351 
1352 	/*
1353 	 * If the filesystem was umounted by force, return immediately.
1354 	 */
1355 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1356 		return (EIO);
1357 
1358 	PC_DPRINTF3(5, "pcfs_getapage: vp=%p off=%lld len=%lu\n",
1359 	    (void *)vp, off, len);
1360 
1361 	if ((pcp = VTOPC(vp)) == NULL)
1362 		return (EIO);
1363 	devvp = fsp->pcfs_devvp;
1364 
1365 	/* pcfs doesn't do readaheads */
1366 	if (pl == NULL)
1367 		return (0);
1368 
1369 	pl[0] = NULL;
1370 	err = 0;
1371 	/*
1372 	 * If the accessed time on the pcnode has not already been
1373 	 * set elsewhere (e.g. for read/setattr) we set the time now.
1374 	 * This gives us approximate modified times for mmap'ed files
1375 	 * which are accessed via loads in the user address space.
1376 	 */
1377 	if ((pcp->pc_flags & PC_ACC) == 0 &&
1378 	    ((fsp->pcfs_vfs->vfs_flag & VFS_RDONLY) == 0)) {
1379 		pcp->pc_flags |= PC_ACC;
1380 		pc_mark_acc(pcp);
1381 	}
1382 reread:
1383 	if ((pagefound = page_exists(vp, off)) == NULL) {
1384 		/*
1385 		 * Need to really do disk IO to get the page(s).
1386 		 */
1387 		struct buf *bp;
1388 		daddr_t lbn, bn;
1389 		u_offset_t io_off;
1390 		size_t io_len;
1391 		u_offset_t lbnoff, xferoffset;
1392 		u_offset_t pgoff;
1393 		uint_t	xfersize;
1394 		int err1;
1395 
1396 		lbn = pc_lblkno(fsp, off);
1397 		lbnoff = off & ~(fsp->pcfs_clsize - 1);
1398 		xferoffset = off & ~(fsp->pcfs_secsize - 1);
1399 
1400 		pp = pvn_read_kluster(vp, off, seg, addr, &io_off, &io_len,
1401 		    off, (size_t)MIN(pc_blksize(fsp, pcp, off), PAGESIZE), 0);
1402 		if (pp == NULL)
1403 			/*
1404 			 * XXX - If pcfs is made MT-hot, this should go
1405 			 * back to reread.
1406 			 */
1407 			panic("pcfs_getapage pvn_read_kluster");
1408 
1409 		for (pgoff = 0; pgoff < PAGESIZE && xferoffset < pcp->pc_size;
1410 		    pgoff += xfersize,
1411 		    lbn +=  howmany(xfersize, fsp->pcfs_clsize),
1412 		    lbnoff += xfersize, xferoffset += xfersize) {
1413 			/*
1414 			 * read as many contiguous blocks as possible to
1415 			 * fill this page
1416 			 */
1417 			xfersize = PAGESIZE - pgoff;
1418 			err1 = pc_bmap(pcp, lbn, &bn, &xfersize);
1419 			if (err1) {
1420 				PC_DPRINTF1(1, "pc_getapage err=%d", err1);
1421 				err = err1;
1422 				goto out;
1423 			}
1424 			bp = pageio_setup(pp, xfersize, devvp, B_READ);
1425 			bp->b_edev = devvp->v_rdev;
1426 			bp->b_dev = cmpdev(devvp->v_rdev);
1427 			bp->b_blkno = bn +
1428 			    /* add a sector offset within the cluster */
1429 			    /* when the clustersize > PAGESIZE */
1430 			    (xferoffset - lbnoff) / fsp->pcfs_secsize;
1431 			bp->b_un.b_addr = (caddr_t)(uintptr_t)pgoff;
1432 			bp->b_file = vp;
1433 			bp->b_offset = (offset_t)(off + pgoff);
1434 
1435 			(void) bdev_strategy(bp);
1436 
1437 			lwp_stat_update(LWP_STAT_INBLK, 1);
1438 
1439 			if (err == 0)
1440 				err = biowait(bp);
1441 			else
1442 				(void) biowait(bp);
1443 			pageio_done(bp);
1444 			if (err)
1445 				goto out;
1446 		}
1447 		if (pgoff < PAGESIZE) {
1448 			pagezero(pp->p_prev, pgoff, PAGESIZE - pgoff);
1449 		}
1450 		pvn_plist_init(pp, pl, plsz, off, io_len, rw);
1451 	}
1452 out:
1453 	if (err) {
1454 		if (pp != NULL)
1455 			pvn_read_done(pp, B_ERROR);
1456 		return (err);
1457 	}
1458 
1459 	if (pagefound) {
1460 		/*
1461 		 * Page exists in the cache, acquire the "shared"
1462 		 * lock.  If this fails, go back to reread.
1463 		 */
1464 		if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
1465 			goto reread;
1466 		}
1467 		pl[0] = pp;
1468 		pl[1] = NULL;
1469 	}
1470 	return (err);
1471 }
1472 
1473 /*
1474  * Return all the pages from [off..off+len] in given file
1475  */
1476 static int
1477 pcfs_getpage(
1478 	struct vnode *vp,
1479 	offset_t off,
1480 	size_t len,
1481 	uint_t *protp,
1482 	page_t *pl[],
1483 	size_t plsz,
1484 	struct seg *seg,
1485 	caddr_t addr,
1486 	enum seg_rw rw,
1487 	struct cred *cr)
1488 {
1489 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1490 	int err;
1491 
1492 	PC_DPRINTF0(6, "pcfs_getpage\n");
1493 	if (err = pc_verify(fsp))
1494 		return (err);
1495 	if (vp->v_flag & VNOMAP)
1496 		return (ENOSYS);
1497 	ASSERT(off <= UINT32_MAX);
1498 	err = pc_lockfs(fsp, 0, 0);
1499 	if (err)
1500 		return (err);
1501 	if (protp != NULL)
1502 		*protp = PROT_ALL;
1503 
1504 	ASSERT((off & PAGEOFFSET) == 0);
1505 	if (len <= PAGESIZE) {
1506 		err = pcfs_getapage(vp, off, len, protp, pl,
1507 		    plsz, seg, addr, rw, cr);
1508 	} else {
1509 		err = pvn_getpages(pcfs_getapage, vp, off,
1510 		    len, protp, pl, plsz, seg, addr, rw, cr);
1511 	}
1512 	pc_unlockfs(fsp);
1513 	return (err);
1514 }
1515 
1516 
1517 /*
1518  * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE}
1519  * If len == 0, do from off to EOF.
1520  *
1521  * The normal cases should be len == 0 & off == 0 (entire vp list),
1522  * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
1523  * (from pageout).
1524  *
1525  */
1526 /*ARGSUSED*/
1527 static int
1528 pcfs_putpage(
1529 	struct vnode *vp,
1530 	offset_t off,
1531 	size_t len,
1532 	int flags,
1533 	struct cred *cr)
1534 {
1535 	struct pcnode *pcp;
1536 	page_t *pp;
1537 	struct pcfs *fsp;
1538 	u_offset_t io_off;
1539 	size_t io_len;
1540 	offset_t eoff;
1541 	int err;
1542 
1543 	/*
1544 	 * If the filesystem was umounted by force, return immediately.
1545 	 */
1546 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1547 		return (EIO);
1548 
1549 	PC_DPRINTF1(6, "pcfs_putpage vp=0x%p\n", (void *)vp);
1550 	if (vp->v_flag & VNOMAP)
1551 		return (ENOSYS);
1552 
1553 	fsp = VFSTOPCFS(vp->v_vfsp);
1554 
1555 	if (err = pc_verify(fsp))
1556 		return (err);
1557 	if ((pcp = VTOPC(vp)) == NULL) {
1558 		PC_DPRINTF1(3, "pcfs_putpage NULL vp=0x%p\n", (void *)vp);
1559 		return (EIO);
1560 	}
1561 
1562 	if (curproc == proc_pageout) {
1563 		/*
1564 		 * XXX - This is a quick hack to avoid blocking
1565 		 * pageout. Also to avoid pcfs_getapage deadlocking
1566 		 * with putpage when memory is running out,
1567 		 * since we only have one global lock and we don't
1568 		 * support async putpage.
1569 		 * It should be fixed someday.
1570 		 *
1571 		 * Interestingly, this used to be a test of NOMEMWAIT().
1572 		 * We only ever got here once pcfs started supporting
1573 		 * NFS sharing, and then only because the NFS server
1574 		 * threads seem to do writes in sched's process context.
1575 		 * Since everyone else seems to just care about pageout,
1576 		 * the test was changed to look for pageout directly.
1577 		 */
1578 		return (ENOMEM);
1579 	}
1580 
1581 	ASSERT(off <= UINT32_MAX);
1582 
1583 	flags &= ~B_ASYNC;	/* XXX should fix this later */
1584 
1585 	err = pc_lockfs(fsp, 0, 0);
1586 	if (err)
1587 		return (err);
1588 	if (!vn_has_cached_data(vp) || off >= pcp->pc_size) {
1589 		pc_unlockfs(fsp);
1590 		return (0);
1591 	}
1592 
1593 	if (len == 0) {
1594 		/*
1595 		 * Search the entire vp list for pages >= off
1596 		 */
1597 		err = pvn_vplist_dirty(vp, off,
1598 		    pcfs_putapage, flags, cr);
1599 	} else {
1600 		eoff = off + len;
1601 
1602 		for (io_off = off; io_off < eoff &&
1603 		    io_off < pcp->pc_size; io_off += io_len) {
1604 			/*
1605 			 * If we are not invalidating, synchronously
1606 			 * freeing or writing pages use the routine
1607 			 * page_lookup_nowait() to prevent reclaiming
1608 			 * them from the free list.
1609 			 */
1610 			if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
1611 				pp = page_lookup(vp, io_off,
1612 					(flags & (B_INVAL | B_FREE)) ?
1613 					    SE_EXCL : SE_SHARED);
1614 			} else {
1615 				pp = page_lookup_nowait(vp, io_off,
1616 					(flags & B_FREE) ? SE_EXCL : SE_SHARED);
1617 			}
1618 
1619 			if (pp == NULL || pvn_getdirty(pp, flags) == 0)
1620 				io_len = PAGESIZE;
1621 			else {
1622 				err = pcfs_putapage(vp, pp, &io_off, &io_len,
1623 					flags, cr);
1624 				if (err != 0)
1625 					break;
1626 				/*
1627 				 * "io_off" and "io_len" are returned as
1628 				 * the range of pages we actually wrote.
1629 				 * This allows us to skip ahead more quickly
1630 				 * since several pages may've been dealt
1631 				 * with by this iteration of the loop.
1632 				 */
1633 			}
1634 		}
1635 	}
1636 	if (err == 0 && (flags & B_INVAL) &&
1637 	    off == 0 && len == 0 && vn_has_cached_data(vp)) {
1638 		/*
1639 		 * If doing "invalidation", make sure that
1640 		 * all pages on the vnode list are actually
1641 		 * gone.
1642 		 */
1643 		cmn_err(CE_PANIC,
1644 			"pcfs_putpage: B_INVAL, pages not gone");
1645 	} else if (err) {
1646 		PC_DPRINTF1(1, "pcfs_putpage err=%d\n", err);
1647 	}
1648 	pc_unlockfs(fsp);
1649 	return (err);
1650 }
1651 
1652 /*
1653  * Write out a single page, possibly klustering adjacent dirty pages.
1654  */
1655 /*ARGSUSED*/
1656 int
1657 pcfs_putapage(
1658 	struct vnode *vp,
1659 	page_t *pp,
1660 	u_offset_t *offp,
1661 	size_t *lenp,
1662 	int flags,
1663 	struct cred *cr)
1664 {
1665 	struct pcnode *pcp;
1666 	struct pcfs *fsp;
1667 	struct vnode *devvp;
1668 	size_t io_len;
1669 	daddr_t bn;
1670 	u_offset_t lbn, lbnoff, xferoffset;
1671 	uint_t pgoff, xfersize;
1672 	int err = 0;
1673 	u_offset_t io_off;
1674 
1675 	pcp = VTOPC(vp);
1676 	fsp = VFSTOPCFS(vp->v_vfsp);
1677 	devvp = fsp->pcfs_devvp;
1678 
1679 	/*
1680 	 * If the modified time on the inode has not already been
1681 	 * set elsewhere (e.g. for write/setattr) and this is not
1682 	 * a call from msync (B_FORCE) we set the time now.
1683 	 * This gives us approximate modified times for mmap'ed files
1684 	 * which are modified via stores in the user address space.
1685 	 */
1686 	if ((pcp->pc_flags & PC_MOD) == 0 || (flags & B_FORCE)) {
1687 		pcp->pc_flags |= PC_MOD;
1688 		pc_mark_mod(pcp);
1689 	}
1690 	pp = pvn_write_kluster(vp, pp, &io_off, &io_len, pp->p_offset,
1691 	    PAGESIZE, flags);
1692 
1693 	if (fsp->pcfs_flags & PCFS_IRRECOV) {
1694 		goto out;
1695 	}
1696 
1697 	PC_DPRINTF1(7, "pc_putpage writing dirty page off=%llu\n", io_off);
1698 
1699 	lbn = pc_lblkno(fsp, io_off);
1700 	lbnoff = io_off & ~(fsp->pcfs_clsize - 1);
1701 	xferoffset = io_off & ~(fsp->pcfs_secsize - 1);
1702 
1703 	for (pgoff = 0; pgoff < io_len && xferoffset < pcp->pc_size;
1704 	    pgoff += xfersize,
1705 	    lbn += howmany(xfersize, fsp->pcfs_clsize),
1706 	    lbnoff += xfersize, xferoffset += xfersize) {
1707 
1708 		struct buf *bp;
1709 		int err1;
1710 
1711 		/*
1712 		 * write as many contiguous blocks as possible from this page
1713 		 */
1714 		xfersize = io_len - pgoff;
1715 		err1 = pc_bmap(pcp, (daddr_t)lbn, &bn, &xfersize);
1716 		if (err1) {
1717 			err = err1;
1718 			goto out;
1719 		}
1720 		bp = pageio_setup(pp, xfersize, devvp, B_WRITE | flags);
1721 		bp->b_edev = devvp->v_rdev;
1722 		bp->b_dev = cmpdev(devvp->v_rdev);
1723 		bp->b_blkno = bn +
1724 		    /* add a sector offset within the cluster */
1725 		    /* when the clustersize > PAGESIZE */
1726 		    (xferoffset - lbnoff) / fsp->pcfs_secsize;
1727 		bp->b_un.b_addr = (caddr_t)(uintptr_t)pgoff;
1728 		bp->b_file = vp;
1729 		bp->b_offset = (offset_t)(io_off + pgoff);
1730 
1731 		(void) bdev_strategy(bp);
1732 
1733 		lwp_stat_update(LWP_STAT_OUBLK, 1);
1734 
1735 		if (err == 0)
1736 			err = biowait(bp);
1737 		else
1738 			(void) biowait(bp);
1739 		pageio_done(bp);
1740 	}
1741 	pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags);
1742 	pp = NULL;
1743 
1744 out:
1745 	if ((fsp->pcfs_flags & PCFS_IRRECOV) && pp != NULL) {
1746 		pvn_write_done(pp, B_WRITE | flags);
1747 	} else if (err != 0 && pp != NULL) {
1748 		pvn_write_done(pp, B_ERROR | B_WRITE | flags);
1749 	}
1750 
1751 	if (offp)
1752 		*offp = io_off;
1753 	if (lenp)
1754 		*lenp = io_len;
1755 		PC_DPRINTF4(4, "pcfs_putapage: vp=%p pp=%p off=%lld len=%lu\n",
1756 		    (void *)vp, (void *)pp, io_off, io_len);
1757 	if (err) {
1758 		PC_DPRINTF1(1, "pcfs_putapage err=%d", err);
1759 	}
1760 	return (err);
1761 }
1762 
1763 /*ARGSUSED*/
1764 static int
1765 pcfs_map(
1766 	struct vnode *vp,
1767 	offset_t off,
1768 	struct as *as,
1769 	caddr_t *addrp,
1770 	size_t len,
1771 	uchar_t prot,
1772 	uchar_t maxprot,
1773 	uint_t flags,
1774 	struct cred *cr)
1775 {
1776 	struct segvn_crargs vn_a;
1777 	int error;
1778 
1779 	PC_DPRINTF0(6, "pcfs_map\n");
1780 	if (vp->v_flag & VNOMAP)
1781 		return (ENOSYS);
1782 
1783 	if (off > UINT32_MAX || off + len > UINT32_MAX)
1784 		return (ENXIO);
1785 
1786 	as_rangelock(as);
1787 	if ((flags & MAP_FIXED) == 0) {
1788 		map_addr(addrp, len, off, 1, flags);
1789 		if (*addrp == NULL) {
1790 			as_rangeunlock(as);
1791 			return (ENOMEM);
1792 		}
1793 	} else {
1794 		/*
1795 		 * User specified address - blow away any previous mappings
1796 		 */
1797 		(void) as_unmap(as, *addrp, len);
1798 	}
1799 
1800 	vn_a.vp = vp;
1801 	vn_a.offset = off;
1802 	vn_a.type = flags & MAP_TYPE;
1803 	vn_a.prot = prot;
1804 	vn_a.maxprot = maxprot;
1805 	vn_a.flags = flags & ~MAP_TYPE;
1806 	vn_a.cred = cr;
1807 	vn_a.amp = NULL;
1808 	vn_a.szc = 0;
1809 	vn_a.lgrp_mem_policy_flags = 0;
1810 
1811 	error = as_map(as, *addrp, len, segvn_create, &vn_a);
1812 	as_rangeunlock(as);
1813 	return (error);
1814 }
1815 
1816 /* ARGSUSED */
1817 static int
1818 pcfs_seek(
1819 	struct vnode *vp,
1820 	offset_t ooff,
1821 	offset_t *noffp)
1822 {
1823 	if (*noffp < 0)
1824 		return (EINVAL);
1825 	else if (*noffp > MAXOFFSET_T)
1826 		return (EINVAL);
1827 	else
1828 		return (0);
1829 }
1830 
1831 /* ARGSUSED */
1832 static int
1833 pcfs_addmap(
1834 	struct vnode *vp,
1835 	offset_t off,
1836 	struct as *as,
1837 	caddr_t addr,
1838 	size_t len,
1839 	uchar_t prot,
1840 	uchar_t maxprot,
1841 	uint_t flags,
1842 	struct cred *cr)
1843 {
1844 	if (vp->v_flag & VNOMAP)
1845 		return (ENOSYS);
1846 	return (0);
1847 }
1848 
1849 /*ARGSUSED*/
1850 static int
1851 pcfs_delmap(
1852 	struct vnode *vp,
1853 	offset_t off,
1854 	struct as *as,
1855 	caddr_t addr,
1856 	size_t len,
1857 	uint_t prot,
1858 	uint_t maxprot,
1859 	uint_t flags,
1860 	struct cred *cr)
1861 {
1862 	if (vp->v_flag & VNOMAP)
1863 		return (ENOSYS);
1864 	return (0);
1865 }
1866 
1867 /*
1868  * POSIX pathconf() support.
1869  */
1870 /* ARGSUSED */
1871 static int
1872 pcfs_pathconf(
1873 	struct vnode *vp,
1874 	int cmd,
1875 	ulong_t *valp,
1876 	struct cred *cr)
1877 {
1878 	ulong_t val;
1879 	int error = 0;
1880 	struct statvfs64 vfsbuf;
1881 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1882 
1883 	switch (cmd) {
1884 
1885 	case _PC_LINK_MAX:
1886 		val = 1;
1887 		break;
1888 
1889 	case _PC_MAX_CANON:
1890 		val = MAX_CANON;
1891 		break;
1892 
1893 	case _PC_MAX_INPUT:
1894 		val = MAX_INPUT;
1895 		break;
1896 
1897 	case _PC_NAME_MAX:
1898 		bzero(&vfsbuf, sizeof (vfsbuf));
1899 		if (error = VFS_STATVFS(vp->v_vfsp, &vfsbuf))
1900 			break;
1901 		val = vfsbuf.f_namemax;
1902 		break;
1903 
1904 	case _PC_PATH_MAX:
1905 	case _PC_SYMLINK_MAX:
1906 		val = PCMAXPATHLEN;
1907 		break;
1908 
1909 	case _PC_PIPE_BUF:
1910 		val = PIPE_BUF;
1911 		break;
1912 
1913 	case _PC_NO_TRUNC:
1914 		val = (ulong_t)-1; 	/* Will truncate long file name */
1915 		break;
1916 
1917 	case _PC_VDISABLE:
1918 		val = _POSIX_VDISABLE;
1919 		break;
1920 
1921 	case _PC_CHOWN_RESTRICTED:
1922 		if (rstchown)
1923 			val = rstchown;		/* chown restricted enabled */
1924 		else
1925 			val = (ulong_t)-1;
1926 		break;
1927 
1928 	case _PC_ACL_ENABLED:
1929 		val = 0;
1930 		break;
1931 
1932 	case _PC_FILESIZEBITS:
1933 		/*
1934 		 * Both FAT16 and FAT32 support 4GB - 1 byte for file size.
1935 		 * FAT12 can only go up to the maximum filesystem capacity
1936 		 * which is ~509MB.
1937 		 */
1938 		val = IS_FAT12(fsp) ? 30 : 33;
1939 		break;
1940 	default:
1941 		error = EINVAL;
1942 		break;
1943 	}
1944 
1945 	if (error == 0)
1946 		*valp = val;
1947 	return (error);
1948 }
1949 
1950 /* ARGSUSED */
1951 static int
1952 pcfs_space(
1953 	struct vnode *vp,
1954 	int cmd,
1955 	struct flock64 *bfp,
1956 	int flag,
1957 	offset_t offset,
1958 	cred_t *cr,
1959 	caller_context_t *ct)
1960 {
1961 	struct vattr vattr;
1962 	int error;
1963 
1964 	if (cmd != F_FREESP)
1965 		return (EINVAL);
1966 
1967 	if ((error = convoff(vp, bfp, 0, offset)) == 0) {
1968 		if ((bfp->l_start > UINT32_MAX) || (bfp->l_len > UINT32_MAX))
1969 			return (EFBIG);
1970 		/*
1971 		 * we only support the special case of l_len == 0,
1972 		 * meaning free to end of file at this moment.
1973 		 */
1974 		if (bfp->l_len != 0)
1975 			return (EINVAL);
1976 		vattr.va_mask = AT_SIZE;
1977 		vattr.va_size = bfp->l_start;
1978 		error = VOP_SETATTR(vp, &vattr, 0, cr, ct);
1979 	}
1980 	return (error);
1981 }
1982 
1983 /*
1984  * Break up 'len' chars from 'buf' into a long file name chunk.
1985  * Pad with '0xff' to make Norton Disk Doctor and Microsoft ScanDisk happy.
1986  */
1987 void
1988 set_long_fn_chunk(struct pcdir_lfn *ep, char *buf, int len)
1989 {
1990 	char 	*tmp = buf;
1991 	int	i;
1992 
1993 
1994 	for (i = 0; i < PCLF_FIRSTNAMESIZE; i += 2) {
1995 		if (len > 0) {
1996 			ep->pcdl_firstfilename[i] = *tmp;
1997 			ep->pcdl_firstfilename[i+1] = 0;
1998 			len--;
1999 			tmp++;
2000 		} else {
2001 			ep->pcdl_firstfilename[i] = (uchar_t)0xff;
2002 			ep->pcdl_firstfilename[i+1] = (uchar_t)0xff;
2003 		}
2004 	}
2005 
2006 	for (i = 0; i < PCLF_SECONDNAMESIZE; i += 2) {
2007 		if (len > 0) {
2008 			ep->pcdl_secondfilename[i] = *tmp;
2009 			ep->pcdl_secondfilename[i+1] = 0;
2010 			len--;
2011 			tmp++;
2012 		} else {
2013 			ep->pcdl_secondfilename[i] = (uchar_t)0xff;
2014 			ep->pcdl_secondfilename[i+1] = (uchar_t)0xff;
2015 		}
2016 	}
2017 	for (i = 0; i < PCLF_THIRDNAMESIZE; i += 2) {
2018 		if (len > 0) {
2019 			ep->pcdl_thirdfilename[i] = *tmp;
2020 			ep->pcdl_thirdfilename[i+1] = 0;
2021 			len--;
2022 			tmp++;
2023 		} else {
2024 			ep->pcdl_thirdfilename[i] = (uchar_t)0xff;
2025 			ep->pcdl_thirdfilename[i+1] = (uchar_t)0xff;
2026 		}
2027 	}
2028 }
2029 
2030 /*
2031  * Extract the characters from the long filename chunk into 'buf'.
2032  * Return the number of characters extracted.
2033  */
2034 static int
2035 get_long_fn_chunk(struct pcdir_lfn *ep, char *buf, int foldcase)
2036 {
2037 	char 	*tmp = buf;
2038 	int	i;
2039 
2040 	for (i = 0; i < PCLF_FIRSTNAMESIZE; i += 2, tmp++) {
2041 		if (ep->pcdl_firstfilename[i+1] != '\0')
2042 			return (-1);
2043 		if (foldcase)
2044 			*tmp = tolower(ep->pcdl_firstfilename[i]);
2045 		else
2046 			*tmp = ep->pcdl_firstfilename[i];
2047 		if (*tmp == '\0')
2048 			return (tmp - buf);
2049 	}
2050 	for (i = 0; i < PCLF_SECONDNAMESIZE; i += 2, tmp++) {
2051 		if (ep->pcdl_secondfilename[i+1] != '\0')
2052 			return (-1);
2053 		if (foldcase)
2054 			*tmp = tolower(ep->pcdl_secondfilename[i]);
2055 		else
2056 			*tmp = ep->pcdl_secondfilename[i];
2057 		if (*tmp == '\0')
2058 			return (tmp - buf);
2059 	}
2060 	for (i = 0; i < PCLF_THIRDNAMESIZE; i += 2, tmp++) {
2061 		if (ep->pcdl_thirdfilename[i+1] != '\0')
2062 			return (-1);
2063 		if (foldcase)
2064 			*tmp = tolower(ep->pcdl_thirdfilename[i]);
2065 		else
2066 			*tmp = ep->pcdl_thirdfilename[i];
2067 		if (*tmp == '\0')
2068 			return (tmp - buf);
2069 	}
2070 	*tmp = '\0';
2071 	return (tmp - buf);
2072 }
2073 
2074 
2075 /*
2076  * Checksum the passed in short filename.
2077  * This is used to validate each component of the long name to make
2078  * sure the long name is valid (it hasn't been "detached" from the
2079  * short filename). This algorithm was found in FreeBSD.
2080  * (sys/fs/msdosfs/msdosfs_conv.c:winChksum(), Wolfgang Solfrank)
2081  */
2082 
2083 uchar_t
2084 pc_checksum_long_fn(char *name, char *ext)
2085 {
2086 	uchar_t c;
2087 	char	b[11];
2088 
2089 	bcopy(name, b, 8);
2090 	bcopy(ext, b+8, 3);
2091 
2092 	c = b[0];
2093 	c = ((c << 7) | (c >> 1)) + b[1];
2094 	c = ((c << 7) | (c >> 1)) + b[2];
2095 	c = ((c << 7) | (c >> 1)) + b[3];
2096 	c = ((c << 7) | (c >> 1)) + b[4];
2097 	c = ((c << 7) | (c >> 1)) + b[5];
2098 	c = ((c << 7) | (c >> 1)) + b[6];
2099 	c = ((c << 7) | (c >> 1)) + b[7];
2100 	c = ((c << 7) | (c >> 1)) + b[8];
2101 	c = ((c << 7) | (c >> 1)) + b[9];
2102 	c = ((c << 7) | (c >> 1)) + b[10];
2103 
2104 	return (c);
2105 }
2106 
2107 /*
2108  * Read a chunk of long filename entries into 'namep'.
2109  * Return with offset pointing to short entry (on success), or next
2110  * entry to read (if this wasn't a valid lfn really).
2111  * Uses the passed-in buffer if it can, otherwise kmem_allocs() room for
2112  * a long filename.
2113  *
2114  * Can also be called with a NULL namep, in which case it just returns
2115  * whether this was really a valid long filename and consumes it
2116  * (used by pc_dirempty()).
2117  */
2118 int
2119 pc_extract_long_fn(struct pcnode *pcp, char *namep,
2120     struct pcdir **epp, offset_t *offset, struct buf **bp)
2121 {
2122 	struct pcdir *ep = *epp;
2123 	struct pcdir_lfn *lep = (struct pcdir_lfn *)ep;
2124 	struct vnode *dvp = PCTOV(pcp);
2125 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2126 	char	*lfn;
2127 	char	*lfn_base;
2128 	int	boff;
2129 	int	i, cs;
2130 	char	buf[20];
2131 	uchar_t	cksum;
2132 	int	detached = 0;
2133 	int	error = 0;
2134 	int	foldcase;
2135 
2136 	foldcase = (fsp->pcfs_flags & PCFS_FOLDCASE);
2137 	/* use callers buffer unless we didn't get one */
2138 	if (namep)
2139 		lfn_base = namep;
2140 	else
2141 		lfn_base = kmem_alloc(PCMAXNAMLEN+1, KM_SLEEP);
2142 	lfn = lfn_base + PCMAXNAMLEN - 1;
2143 	*lfn = '\0';
2144 	cksum = lep->pcdl_checksum;
2145 
2146 	for (i = (lep->pcdl_ordinal & ~0xc0); i > 0; i--) {
2147 		/* read next block if necessary */
2148 		boff = pc_blkoff(fsp, *offset);
2149 		if (boff == 0 || *bp == NULL || boff >= (*bp)->b_bcount) {
2150 			if (*bp != NULL) {
2151 				brelse(*bp);
2152 				*bp = NULL;
2153 			}
2154 			error = pc_blkatoff(pcp, *offset, bp, &ep);
2155 			if (error) {
2156 				if (namep == NULL)
2157 					kmem_free(lfn_base, PCMAXNAMLEN+1);
2158 				return (error);
2159 			}
2160 			lep = (struct pcdir_lfn *)ep;
2161 		}
2162 		/* can this happen? Bad fs? */
2163 		if (!PCDL_IS_LFN((struct pcdir *)lep)) {
2164 			detached = 1;
2165 			break;
2166 		}
2167 		if (cksum != lep->pcdl_checksum)
2168 			detached = 1;
2169 		/* process current entry */
2170 		cs = get_long_fn_chunk(lep, buf, foldcase);
2171 		if (cs == -1) {
2172 			detached = 1;
2173 		} else {
2174 			for (; cs > 0; cs--) {
2175 				/* see if we underflow */
2176 				if (lfn >= lfn_base)
2177 					*--lfn = buf[cs - 1];
2178 				else
2179 					detached = 1;
2180 			}
2181 		}
2182 		lep++;
2183 		*offset += sizeof (struct pcdir);
2184 	}
2185 	/* read next block if necessary */
2186 	boff = pc_blkoff(fsp, *offset);
2187 	ep = (struct pcdir *)lep;
2188 	if (boff == 0 || *bp == NULL || boff >= (*bp)->b_bcount) {
2189 		if (*bp != NULL) {
2190 			brelse(*bp);
2191 			*bp = NULL;
2192 		}
2193 		error = pc_blkatoff(pcp, *offset, bp, &ep);
2194 		if (error) {
2195 			if (namep == NULL)
2196 				kmem_free(lfn_base, PCMAXNAMLEN+1);
2197 			return (error);
2198 		}
2199 	}
2200 	/* should be on the short one */
2201 	if (PCDL_IS_LFN(ep) || ((ep->pcd_filename[0] == PCD_UNUSED) ||
2202 	    (ep->pcd_filename[0] == PCD_ERASED))) {
2203 		detached = 1;
2204 	}
2205 	if (detached ||
2206 	    (cksum != pc_checksum_long_fn(ep->pcd_filename, ep->pcd_ext)) ||
2207 	    !pc_valid_long_fn(lfn)) {
2208 		/*
2209 		 * process current entry again. This may end up another lfn
2210 		 * or a short name.
2211 		 */
2212 		*epp = ep;
2213 		if (namep == NULL)
2214 			kmem_free(lfn_base, PCMAXNAMLEN+1);
2215 		return (EINVAL);
2216 	}
2217 	if (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) {
2218 		/*
2219 		 * Don't display label because it may contain
2220 		 * funny characters.
2221 		 */
2222 		*offset += sizeof (struct pcdir);
2223 		ep++;
2224 		*epp = ep;
2225 		if (namep == NULL)
2226 			kmem_free(lfn_base, PCMAXNAMLEN+1);
2227 		return (EINVAL);
2228 	}
2229 	if (namep) {
2230 		/* lfn is part of namep, but shifted. shift it back */
2231 		cs = strlen(lfn);
2232 		for (i = 0; i < cs; i++)
2233 			namep[i] = lfn[i];
2234 		namep[i] = '\0';
2235 	} else {
2236 		kmem_free(lfn_base, PCMAXNAMLEN+1);
2237 	}
2238 	*epp = ep;
2239 	return (0);
2240 }
2241 /*
2242  * Read a long filename into the pc_dirent structure and copy it out.
2243  */
2244 int
2245 pc_read_long_fn(struct vnode *dvp, struct uio *uiop, struct pc_dirent *ld,
2246     struct pcdir **epp, offset_t *offset, struct buf **bp)
2247 {
2248 	struct pcdir *ep;
2249 	struct pcnode *pcp = VTOPC(dvp);
2250 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2251 	offset_t uiooffset = uiop->uio_loffset;
2252 	int	error = 0;
2253 	offset_t oldoffset;
2254 
2255 	oldoffset = *offset;
2256 	error = pc_extract_long_fn(pcp, ld->d_name, epp, offset, bp);
2257 	if (error) {
2258 		if (error == EINVAL) {
2259 			uiop->uio_loffset += *offset - oldoffset;
2260 			return (0);
2261 		} else
2262 			return (error);
2263 	}
2264 
2265 	ep = *epp;
2266 	uiop->uio_loffset += *offset - oldoffset;
2267 	ld->d_reclen = DIRENT64_RECLEN(strlen(ld->d_name));
2268 	if (ld->d_reclen > uiop->uio_resid) {
2269 		uiop->uio_loffset = uiooffset;
2270 		return (ENOSPC);
2271 	}
2272 	ld->d_off = uiop->uio_loffset + sizeof (struct pcdir);
2273 	ld->d_ino = pc_makenodeid(pc_daddrdb(fsp, (*bp)->b_blkno),
2274 	    pc_blkoff(fsp, *offset), ep->pcd_attr,
2275 	    pc_getstartcluster(fsp, ep), fsp->pcfs_entps);
2276 	(void) uiomove((caddr_t)ld, ld->d_reclen, UIO_READ, uiop);
2277 	uiop->uio_loffset = ld->d_off;
2278 	*offset += sizeof (struct pcdir);
2279 	ep++;
2280 	*epp = ep;
2281 	return (0);
2282 }
2283 
2284 /*
2285  * Read a short filename into the pc_dirent structure and copy it out.
2286  */
2287 int
2288 pc_read_short_fn(struct vnode *dvp, struct uio *uiop, struct pc_dirent *ld,
2289     struct pcdir **epp, offset_t *offset, struct buf **bp)
2290 {
2291 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2292 	int	boff = pc_blkoff(fsp, *offset);
2293 	struct pcdir *ep = *epp;
2294 	offset_t	oldoffset = uiop->uio_loffset;
2295 	int	error;
2296 	int	foldcase;
2297 
2298 	if (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) {
2299 		uiop->uio_loffset += sizeof (struct pcdir);
2300 		*offset += sizeof (struct pcdir);
2301 		ep++;
2302 		*epp = ep;
2303 		return (0);
2304 	}
2305 	ld->d_ino = (ino64_t)pc_makenodeid(pc_daddrdb(fsp, (*bp)->b_blkno),
2306 	    boff, ep->pcd_attr, pc_getstartcluster(fsp, ep), fsp->pcfs_entps);
2307 	foldcase = (fsp->pcfs_flags & PCFS_FOLDCASE);
2308 	error = pc_fname_ext_to_name(&ld->d_name[0], &ep->pcd_filename[0],
2309 	    &ep->pcd_ext[0], foldcase);
2310 	if (error == 0) {
2311 		ld->d_reclen = DIRENT64_RECLEN(strlen(ld->d_name));
2312 		if (ld->d_reclen > uiop->uio_resid) {
2313 			uiop->uio_loffset = oldoffset;
2314 			return (ENOSPC);
2315 		}
2316 		ld->d_off = (off64_t)(uiop->uio_loffset +
2317 		    sizeof (struct pcdir));
2318 		(void) uiomove((caddr_t)ld,
2319 		    ld->d_reclen, UIO_READ, uiop);
2320 		uiop->uio_loffset = ld->d_off;
2321 	} else {
2322 		uiop->uio_loffset += sizeof (struct pcdir);
2323 	}
2324 	*offset += sizeof (struct pcdir);
2325 	ep++;
2326 	*epp = ep;
2327 	return (0);
2328 }
2329 
2330 static int
2331 pcfs_fid(struct vnode *vp, struct fid *fidp)
2332 {
2333 	struct pc_fid *pcfid;
2334 	struct pcnode *pcp;
2335 	struct pcfs	*fsp;
2336 	int	error;
2337 
2338 	fsp = VFSTOPCFS(vp->v_vfsp);
2339 	if (fsp == NULL)
2340 		return (EIO);
2341 	error = pc_lockfs(fsp, 0, 0);
2342 	if (error)
2343 		return (error);
2344 	if ((pcp = VTOPC(vp)) == NULL) {
2345 		pc_unlockfs(fsp);
2346 		return (EIO);
2347 	}
2348 	if (fidp->fid_len < (sizeof (struct pc_fid) - sizeof (ushort_t))) {
2349 		fidp->fid_len = sizeof (struct pc_fid) - sizeof (ushort_t);
2350 		pc_unlockfs(fsp);
2351 		return (ENOSPC);
2352 	}
2353 
2354 	pcfid = (struct pc_fid *)fidp;
2355 	bzero(pcfid, sizeof (struct pc_fid));
2356 	pcfid->pcfid_len = sizeof (struct pc_fid) - sizeof (ushort_t);
2357 	if (vp->v_flag & VROOT) {
2358 		pcfid->pcfid_block = 0;
2359 		pcfid->pcfid_offset = 0;
2360 		pcfid->pcfid_ctime = 0;
2361 	} else {
2362 		pcfid->pcfid_block = pcp->pc_eblkno;
2363 		pcfid->pcfid_offset = pcp->pc_eoffset;
2364 		pcfid->pcfid_ctime = pcp->pc_entry.pcd_crtime.pct_time;
2365 	}
2366 	pc_unlockfs(fsp);
2367 	return (0);
2368 }
2369