1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
29 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
30 * Copyright (c) 2017 by Delphix. All rights reserved.
31 */
32
33 #include <sys/param.h>
34 #include <sys/t_lock.h>
35 #include <sys/systm.h>
36 #include <sys/sysmacros.h>
37 #include <sys/user.h>
38 #include <sys/buf.h>
39 #include <sys/stat.h>
40 #include <sys/vfs.h>
41 #include <sys/vfs_opreg.h>
42 #include <sys/dirent.h>
43 #include <sys/vnode.h>
44 #include <sys/proc.h>
45 #include <sys/file.h>
46 #include <sys/fcntl.h>
47 #include <sys/uio.h>
48 #include <sys/fs/pc_label.h>
49 #include <sys/fs/pc_fs.h>
50 #include <sys/fs/pc_dir.h>
51 #include <sys/fs/pc_node.h>
52 #include <sys/mman.h>
53 #include <sys/pathname.h>
54 #include <sys/vmsystm.h>
55 #include <sys/cmn_err.h>
56 #include <sys/debug.h>
57 #include <sys/statvfs.h>
58 #include <sys/unistd.h>
59 #include <sys/kmem.h>
60 #include <sys/conf.h>
61 #include <sys/flock.h>
62 #include <sys/policy.h>
63 #include <sys/sdt.h>
64 #include <sys/sunddi.h>
65 #include <sys/types.h>
66 #include <sys/errno.h>
67
68 #include <vm/seg.h>
69 #include <vm/page.h>
70 #include <vm/pvn.h>
71 #include <vm/seg_map.h>
72 #include <vm/seg_vn.h>
73 #include <vm/hat.h>
74 #include <vm/as.h>
75 #include <vm/seg_kmem.h>
76
77 #include <fs/fs_subr.h>
78
79 static int pcfs_open(struct vnode **, int, struct cred *, caller_context_t *ct);
80 static int pcfs_close(struct vnode *, int, int, offset_t, struct cred *,
81 caller_context_t *ct);
82 static int pcfs_read(struct vnode *, struct uio *, int, struct cred *,
83 caller_context_t *);
84 static int pcfs_write(struct vnode *, struct uio *, int, struct cred *,
85 caller_context_t *);
86 static int pcfs_getattr(struct vnode *, struct vattr *, int, struct cred *,
87 caller_context_t *ct);
88 static int pcfs_setattr(struct vnode *, struct vattr *, int, struct cred *,
89 caller_context_t *);
90 static int pcfs_access(struct vnode *, int, int, struct cred *,
91 caller_context_t *ct);
92 static int pcfs_lookup(struct vnode *, char *, struct vnode **,
93 struct pathname *, int, struct vnode *, struct cred *,
94 caller_context_t *, int *, pathname_t *);
95 static int pcfs_create(struct vnode *, char *, struct vattr *,
96 enum vcexcl, int mode, struct vnode **, struct cred *, int,
97 caller_context_t *, vsecattr_t *);
98 static int pcfs_remove(struct vnode *, char *, struct cred *,
99 caller_context_t *, int);
100 static int pcfs_rename(struct vnode *, char *, struct vnode *, char *,
101 struct cred *, caller_context_t *, int);
102 static int pcfs_mkdir(struct vnode *, char *, struct vattr *, struct vnode **,
103 struct cred *, caller_context_t *, int, vsecattr_t *);
104 static int pcfs_rmdir(struct vnode *, char *, struct vnode *, struct cred *,
105 caller_context_t *, int);
106 static int pcfs_readdir(struct vnode *, struct uio *, struct cred *, int *,
107 caller_context_t *, int);
108 static int pcfs_fsync(struct vnode *, int, struct cred *, caller_context_t *);
109 static void pcfs_inactive(struct vnode *, struct cred *, caller_context_t *);
110 static int pcfs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *);
111 static int pcfs_space(struct vnode *, int, struct flock64 *, int,
112 offset_t, cred_t *, caller_context_t *);
113 static int pcfs_getpage(struct vnode *, offset_t, size_t, uint_t *, page_t *[],
114 size_t, struct seg *, caddr_t, enum seg_rw, struct cred *,
115 caller_context_t *);
116 static int pcfs_getapage(struct vnode *, u_offset_t, size_t, uint_t *,
117 page_t *[], size_t, struct seg *, caddr_t, enum seg_rw, struct cred *);
118 static int pcfs_putpage(struct vnode *, offset_t, size_t, int, struct cred *,
119 caller_context_t *);
120 static int pcfs_map(struct vnode *, offset_t, struct as *, caddr_t *, size_t,
121 uchar_t, uchar_t, uint_t, struct cred *, caller_context_t *);
122 static int pcfs_addmap(struct vnode *, offset_t, struct as *, caddr_t,
123 size_t, uchar_t, uchar_t, uint_t, struct cred *, caller_context_t *);
124 static int pcfs_delmap(struct vnode *, offset_t, struct as *, caddr_t,
125 size_t, uint_t, uint_t, uint_t, struct cred *, caller_context_t *);
126 static int pcfs_seek(struct vnode *, offset_t, offset_t *,
127 caller_context_t *);
128 static int pcfs_pathconf(struct vnode *, int, ulong_t *, struct cred *,
129 caller_context_t *);
130
131 int pcfs_putapage(struct vnode *, page_t *, u_offset_t *, size_t *, int,
132 struct cred *);
133 static int rwpcp(struct pcnode *, struct uio *, enum uio_rw, int);
134 static int get_long_fn_chunk(struct pcdir_lfn *ep, char *buf);
135
136 extern krwlock_t pcnodes_lock;
137
138 #define lround(r) (((r)+sizeof (long long)-1)&(~(sizeof (long long)-1)))
139
140 /*
141 * vnode op vectors for files and directories.
142 */
143 struct vnodeops *pcfs_fvnodeops;
144 struct vnodeops *pcfs_dvnodeops;
145
146 const fs_operation_def_t pcfs_fvnodeops_template[] = {
147 VOPNAME_OPEN, { .vop_open = pcfs_open },
148 VOPNAME_CLOSE, { .vop_close = pcfs_close },
149 VOPNAME_READ, { .vop_read = pcfs_read },
150 VOPNAME_WRITE, { .vop_write = pcfs_write },
151 VOPNAME_GETATTR, { .vop_getattr = pcfs_getattr },
152 VOPNAME_SETATTR, { .vop_setattr = pcfs_setattr },
153 VOPNAME_ACCESS, { .vop_access = pcfs_access },
154 VOPNAME_FSYNC, { .vop_fsync = pcfs_fsync },
155 VOPNAME_INACTIVE, { .vop_inactive = pcfs_inactive },
156 VOPNAME_FID, { .vop_fid = pcfs_fid },
157 VOPNAME_SEEK, { .vop_seek = pcfs_seek },
158 VOPNAME_SPACE, { .vop_space = pcfs_space },
159 VOPNAME_GETPAGE, { .vop_getpage = pcfs_getpage },
160 VOPNAME_PUTPAGE, { .vop_putpage = pcfs_putpage },
161 VOPNAME_MAP, { .vop_map = pcfs_map },
162 VOPNAME_ADDMAP, { .vop_addmap = pcfs_addmap },
163 VOPNAME_DELMAP, { .vop_delmap = pcfs_delmap },
164 VOPNAME_PATHCONF, { .vop_pathconf = pcfs_pathconf },
165 VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support },
166 NULL, NULL
167 };
168
169 const fs_operation_def_t pcfs_dvnodeops_template[] = {
170 VOPNAME_OPEN, { .vop_open = pcfs_open },
171 VOPNAME_CLOSE, { .vop_close = pcfs_close },
172 VOPNAME_GETATTR, { .vop_getattr = pcfs_getattr },
173 VOPNAME_SETATTR, { .vop_setattr = pcfs_setattr },
174 VOPNAME_ACCESS, { .vop_access = pcfs_access },
175 VOPNAME_LOOKUP, { .vop_lookup = pcfs_lookup },
176 VOPNAME_CREATE, { .vop_create = pcfs_create },
177 VOPNAME_REMOVE, { .vop_remove = pcfs_remove },
178 VOPNAME_RENAME, { .vop_rename = pcfs_rename },
179 VOPNAME_MKDIR, { .vop_mkdir = pcfs_mkdir },
180 VOPNAME_RMDIR, { .vop_rmdir = pcfs_rmdir },
181 VOPNAME_READDIR, { .vop_readdir = pcfs_readdir },
182 VOPNAME_FSYNC, { .vop_fsync = pcfs_fsync },
183 VOPNAME_INACTIVE, { .vop_inactive = pcfs_inactive },
184 VOPNAME_FID, { .vop_fid = pcfs_fid },
185 VOPNAME_SEEK, { .vop_seek = pcfs_seek },
186 VOPNAME_PATHCONF, { .vop_pathconf = pcfs_pathconf },
187 VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support },
188 NULL, NULL
189 };
190
191
192 /*ARGSUSED*/
193 static int
pcfs_open(struct vnode ** vpp,int flag,struct cred * cr,caller_context_t * ct)194 pcfs_open(
195 struct vnode **vpp,
196 int flag,
197 struct cred *cr,
198 caller_context_t *ct)
199 {
200 return (0);
201 }
202
203 /*
204 * files are sync'ed on close to keep floppy up to date
205 */
206
207 /*ARGSUSED*/
208 static int
pcfs_close(struct vnode * vp,int flag,int count,offset_t offset,struct cred * cr,caller_context_t * ct)209 pcfs_close(
210 struct vnode *vp,
211 int flag,
212 int count,
213 offset_t offset,
214 struct cred *cr,
215 caller_context_t *ct)
216 {
217 return (0);
218 }
219
220 /*ARGSUSED*/
221 static int
pcfs_read(struct vnode * vp,struct uio * uiop,int ioflag,struct cred * cr,struct caller_context * ct)222 pcfs_read(
223 struct vnode *vp,
224 struct uio *uiop,
225 int ioflag,
226 struct cred *cr,
227 struct caller_context *ct)
228 {
229 struct pcfs *fsp;
230 struct pcnode *pcp;
231 int error;
232
233 fsp = VFSTOPCFS(vp->v_vfsp);
234 if (error = pc_verify(fsp))
235 return (error);
236 error = pc_lockfs(fsp, 0, 0);
237 if (error)
238 return (error);
239 if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
240 pc_unlockfs(fsp);
241 return (EIO);
242 }
243 error = rwpcp(pcp, uiop, UIO_READ, ioflag);
244 if ((fsp->pcfs_vfs->vfs_flag & VFS_RDONLY) == 0) {
245 pc_mark_acc(fsp, pcp);
246 }
247 pc_unlockfs(fsp);
248 if (error) {
249 PC_DPRINTF1(1, "pcfs_read: io error = %d\n", error);
250 }
251 return (error);
252 }
253
254 /*ARGSUSED*/
255 static int
pcfs_write(struct vnode * vp,struct uio * uiop,int ioflag,struct cred * cr,struct caller_context * ct)256 pcfs_write(
257 struct vnode *vp,
258 struct uio *uiop,
259 int ioflag,
260 struct cred *cr,
261 struct caller_context *ct)
262 {
263 struct pcfs *fsp;
264 struct pcnode *pcp;
265 int error;
266
267 fsp = VFSTOPCFS(vp->v_vfsp);
268 if (error = pc_verify(fsp))
269 return (error);
270 error = pc_lockfs(fsp, 0, 0);
271 if (error)
272 return (error);
273 if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
274 pc_unlockfs(fsp);
275 return (EIO);
276 }
277 if (ioflag & FAPPEND) {
278 /*
279 * in append mode start at end of file.
280 */
281 uiop->uio_loffset = pcp->pc_size;
282 }
283 error = rwpcp(pcp, uiop, UIO_WRITE, ioflag);
284 pcp->pc_flags |= PC_MOD;
285 pc_mark_mod(fsp, pcp);
286 if (ioflag & (FSYNC|FDSYNC))
287 (void) pc_nodeupdate(pcp);
288
289 pc_unlockfs(fsp);
290 if (error) {
291 PC_DPRINTF1(1, "pcfs_write: io error = %d\n", error);
292 }
293 return (error);
294 }
295
296 /*
297 * read or write a vnode
298 */
299 static int
rwpcp(struct pcnode * pcp,struct uio * uio,enum uio_rw rw,int ioflag)300 rwpcp(
301 struct pcnode *pcp,
302 struct uio *uio,
303 enum uio_rw rw,
304 int ioflag)
305 {
306 struct vnode *vp = PCTOV(pcp);
307 struct pcfs *fsp;
308 daddr_t bn; /* phys block number */
309 int n;
310 offset_t off;
311 caddr_t base;
312 int mapon, pagecreate;
313 int newpage;
314 int error = 0;
315 rlim64_t limit = uio->uio_llimit;
316 int oresid = uio->uio_resid;
317
318 /*
319 * If the filesystem was umounted by force, return immediately.
320 */
321 if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
322 return (EIO);
323
324 PC_DPRINTF4(5, "rwpcp pcp=%p off=%lld resid=%ld size=%u\n", (void *)pcp,
325 uio->uio_loffset, uio->uio_resid, pcp->pc_size);
326
327 ASSERT(rw == UIO_READ || rw == UIO_WRITE);
328 ASSERT(vp->v_type == VREG);
329
330 if (uio->uio_loffset >= UINT32_MAX && rw == UIO_READ) {
331 return (0);
332 }
333
334 if (uio->uio_loffset < 0)
335 return (EINVAL);
336
337 if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
338 limit = MAXOFFSET_T;
339
340 if (uio->uio_loffset >= limit && rw == UIO_WRITE) {
341 proc_t *p = ttoproc(curthread);
342
343 mutex_enter(&p->p_lock);
344 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
345 p, RCA_UNSAFE_SIGINFO);
346 mutex_exit(&p->p_lock);
347 return (EFBIG);
348 }
349
350 /* the following condition will occur only for write */
351
352 if (uio->uio_loffset >= UINT32_MAX)
353 return (EFBIG);
354
355 if (uio->uio_resid == 0)
356 return (0);
357
358 if (limit > UINT32_MAX)
359 limit = UINT32_MAX;
360
361 fsp = VFSTOPCFS(vp->v_vfsp);
362 if (fsp->pcfs_flags & PCFS_IRRECOV)
363 return (EIO);
364
365 do {
366 /*
367 * Assignments to "n" in this block may appear
368 * to overflow in some cases. However, after careful
369 * analysis it was determined that all assignments to
370 * "n" serve only to make "n" smaller. Since "n"
371 * starts out as no larger than MAXBSIZE, "int" is
372 * safe.
373 */
374 off = uio->uio_loffset & MAXBMASK;
375 mapon = (int)(uio->uio_loffset & MAXBOFFSET);
376 n = MIN(MAXBSIZE - mapon, uio->uio_resid);
377 if (rw == UIO_READ) {
378 offset_t diff;
379
380 diff = pcp->pc_size - uio->uio_loffset;
381 if (diff <= 0)
382 return (0);
383 if (diff < n)
384 n = (int)diff;
385 }
386 /*
387 * Compare limit with the actual offset + n, not the
388 * rounded down offset "off" or we will overflow
389 * the maximum file size after all.
390 */
391 if (rw == UIO_WRITE && uio->uio_loffset + n >= limit) {
392 if (uio->uio_loffset >= limit) {
393 error = EFBIG;
394 break;
395 }
396 n = (int)(limit - uio->uio_loffset);
397 }
398
399 /*
400 * Touch the page and fault it in if it is not in
401 * core before segmap_getmapflt can lock it. This
402 * is to avoid the deadlock if the buffer is mapped
403 * to the same file through mmap which we want to
404 * write to.
405 */
406 uio_prefaultpages((long)n, uio);
407
408 base = segmap_getmap(segkmap, vp, (u_offset_t)off);
409 pagecreate = 0;
410 newpage = 0;
411 if (rw == UIO_WRITE) {
412 /*
413 * If PAGESIZE < MAXBSIZE, perhaps we ought to deal
414 * with one page at a time, instead of one MAXBSIZE
415 * at a time, so we can fully explore pagecreate
416 * optimization??
417 */
418 if (uio->uio_loffset + n > pcp->pc_size) {
419 uint_t ncl, lcn;
420
421 ncl = (uint_t)howmany((offset_t)pcp->pc_size,
422 fsp->pcfs_clsize);
423 if (uio->uio_loffset > pcp->pc_size &&
424 ncl < (uint_t)howmany(uio->uio_loffset,
425 fsp->pcfs_clsize)) {
426 /*
427 * Allocate and zerofill skipped
428 * clusters. This may not be worth the
429 * effort since a small lseek beyond
430 * eof but still within the cluster
431 * will not be zeroed out.
432 */
433 lcn = pc_lblkno(fsp, uio->uio_loffset);
434 error = pc_balloc(pcp, (daddr_t)lcn,
435 1, &bn);
436 ncl = lcn + 1;
437 }
438 if (!error &&
439 ncl < (uint_t)howmany(uio->uio_loffset + n,
440 fsp->pcfs_clsize))
441 /*
442 * allocate clusters w/o zerofill
443 */
444 error = pc_balloc(pcp,
445 (daddr_t)pc_lblkno(fsp,
446 uio->uio_loffset + n - 1),
447 0, &bn);
448
449 pcp->pc_flags |= PC_CHG;
450
451 if (error) {
452 pc_cluster32_t ncl;
453 int nerror;
454
455 /*
456 * figure out new file size from
457 * cluster chain length. If this
458 * is detected to loop, the chain
459 * is corrupted and we'd better
460 * keep our fingers off that file.
461 */
462 nerror = pc_fileclsize(fsp,
463 pcp->pc_scluster, &ncl);
464 if (nerror) {
465 PC_DPRINTF1(2,
466 "cluster chain "
467 "corruption, "
468 "scluster=%d\n",
469 pcp->pc_scluster);
470 pcp->pc_size = 0;
471 pcp->pc_flags |= PC_INVAL;
472 error = nerror;
473 (void) segmap_release(segkmap,
474 base, 0);
475 break;
476 }
477 pcp->pc_size = fsp->pcfs_clsize * ncl;
478
479 if (error == ENOSPC &&
480 (pcp->pc_size - uio->uio_loffset)
481 > 0) {
482 PC_DPRINTF3(2, "rwpcp ENOSPC "
483 "off=%lld n=%d size=%d\n",
484 uio->uio_loffset,
485 n, pcp->pc_size);
486 n = (int)(pcp->pc_size -
487 uio->uio_loffset);
488 } else {
489 PC_DPRINTF1(1,
490 "rwpcp error1=%d\n", error);
491 (void) segmap_release(segkmap,
492 base, 0);
493 break;
494 }
495 } else {
496 pcp->pc_size =
497 (uint_t)(uio->uio_loffset + n);
498 }
499 if (mapon == 0) {
500 newpage = segmap_pagecreate(segkmap,
501 base, (size_t)n, 0);
502 pagecreate = 1;
503 }
504 } else if (n == MAXBSIZE) {
505 newpage = segmap_pagecreate(segkmap, base,
506 (size_t)n, 0);
507 pagecreate = 1;
508 }
509 }
510 error = uiomove(base + mapon, (size_t)n, rw, uio);
511
512 if (pagecreate && uio->uio_loffset <
513 roundup(off + mapon + n, PAGESIZE)) {
514 offset_t nzero, nmoved;
515
516 nmoved = uio->uio_loffset - (off + mapon);
517 nzero = roundup(mapon + n, PAGESIZE) - nmoved;
518 (void) kzero(base + mapon + nmoved, (size_t)nzero);
519 }
520
521 /*
522 * Unlock the pages which have been allocated by
523 * page_create_va() in segmap_pagecreate().
524 */
525 if (newpage) {
526 segmap_pageunlock(segkmap, base, (size_t)n,
527 rw == UIO_WRITE ? S_WRITE : S_READ);
528 }
529
530 if (error) {
531 PC_DPRINTF1(1, "rwpcp error2=%d\n", error);
532 /*
533 * If we failed on a write, we may have already
534 * allocated file blocks as well as pages. It's hard
535 * to undo the block allocation, but we must be sure
536 * to invalidate any pages that may have been
537 * allocated.
538 */
539 if (rw == UIO_WRITE)
540 (void) segmap_release(segkmap, base, SM_INVAL);
541 else
542 (void) segmap_release(segkmap, base, 0);
543 } else {
544 uint_t flags = 0;
545
546 if (rw == UIO_READ) {
547 if (n + mapon == MAXBSIZE ||
548 uio->uio_loffset == pcp->pc_size)
549 flags = SM_DONTNEED;
550 } else if (ioflag & (FSYNC|FDSYNC)) {
551 flags = SM_WRITE;
552 } else if (n + mapon == MAXBSIZE) {
553 flags = SM_WRITE|SM_ASYNC|SM_DONTNEED;
554 }
555 error = segmap_release(segkmap, base, flags);
556 }
557
558 } while (error == 0 && uio->uio_resid > 0 && n != 0);
559
560 if (oresid != uio->uio_resid)
561 error = 0;
562 return (error);
563 }
564
565 /*ARGSUSED*/
566 static int
pcfs_getattr(struct vnode * vp,struct vattr * vap,int flags,struct cred * cr,caller_context_t * ct)567 pcfs_getattr(
568 struct vnode *vp,
569 struct vattr *vap,
570 int flags,
571 struct cred *cr,
572 caller_context_t *ct)
573 {
574 struct pcnode *pcp;
575 struct pcfs *fsp;
576 int error;
577 char attr;
578 struct pctime atime;
579 int64_t unixtime;
580
581 PC_DPRINTF1(8, "pcfs_getattr: vp=%p\n", (void *)vp);
582
583 fsp = VFSTOPCFS(vp->v_vfsp);
584 error = pc_lockfs(fsp, 0, 0);
585 if (error)
586 return (error);
587
588 /*
589 * Note that we don't check for "invalid node" (PC_INVAL) here
590 * only in order to make stat() succeed. We allow no I/O on such
591 * a node, but do allow to check for its existence.
592 */
593 if ((pcp = VTOPC(vp)) == NULL) {
594 pc_unlockfs(fsp);
595 return (EIO);
596 }
597 /*
598 * Copy from pcnode.
599 */
600 vap->va_type = vp->v_type;
601 attr = pcp->pc_entry.pcd_attr;
602 if (PCA_IS_HIDDEN(fsp, attr))
603 vap->va_mode = 0;
604 else if (attr & PCA_LABEL)
605 vap->va_mode = 0444;
606 else if (attr & PCA_RDONLY)
607 vap->va_mode = 0555;
608 else if (fsp->pcfs_flags & PCFS_BOOTPART) {
609 vap->va_mode = 0755;
610 } else {
611 vap->va_mode = 0777;
612 }
613
614 if (attr & PCA_DIR)
615 vap->va_mode |= S_IFDIR;
616 else
617 vap->va_mode |= S_IFREG;
618 if (fsp->pcfs_flags & PCFS_BOOTPART) {
619 vap->va_uid = 0;
620 vap->va_gid = 0;
621 } else {
622 vap->va_uid = crgetuid(cr);
623 vap->va_gid = crgetgid(cr);
624 }
625 vap->va_fsid = vp->v_vfsp->vfs_dev;
626 vap->va_nodeid = (ino64_t)pc_makenodeid(pcp->pc_eblkno,
627 pcp->pc_eoffset, pcp->pc_entry.pcd_attr,
628 pc_getstartcluster(fsp, &pcp->pc_entry), pc_direntpersec(fsp));
629 vap->va_nlink = 1;
630 vap->va_size = (u_offset_t)pcp->pc_size;
631 vap->va_rdev = 0;
632 vap->va_nblocks =
633 (fsblkcnt64_t)howmany((offset_t)pcp->pc_size, DEV_BSIZE);
634 vap->va_blksize = fsp->pcfs_clsize;
635
636 /*
637 * FAT root directories have no timestamps. In order not to return
638 * "time zero" (1/1/1970), we record the time of the mount and give
639 * that. This breaks less expectations.
640 */
641 if (vp->v_flag & VROOT) {
642 vap->va_mtime = fsp->pcfs_mounttime;
643 vap->va_atime = fsp->pcfs_mounttime;
644 vap->va_ctime = fsp->pcfs_mounttime;
645 pc_unlockfs(fsp);
646 return (0);
647 }
648
649 pc_pcttotv(&pcp->pc_entry.pcd_mtime, &unixtime);
650 if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0) {
651 if (unixtime > INT32_MAX)
652 DTRACE_PROBE1(pcfs__mtimeclamped, int64_t, unixtime);
653 unixtime = MIN(unixtime, INT32_MAX);
654 } else if (unixtime > INT32_MAX &&
655 get_udatamodel() == DATAMODEL_ILP32) {
656 pc_unlockfs(fsp);
657 DTRACE_PROBE1(pcfs__mtimeoverflowed, int64_t, unixtime);
658 return (EOVERFLOW);
659 }
660
661 vap->va_mtime.tv_sec = (time_t)unixtime;
662 vap->va_mtime.tv_nsec = 0;
663
664 /*
665 * FAT doesn't know about POSIX ctime.
666 * Best approximation is to always set it to mtime.
667 */
668 vap->va_ctime = vap->va_mtime;
669
670 /*
671 * FAT only stores "last access date". If that's the
672 * same as the date of last modification then the time
673 * of last access is known. Otherwise, use midnight.
674 */
675 atime.pct_date = pcp->pc_entry.pcd_ladate;
676 if (atime.pct_date == pcp->pc_entry.pcd_mtime.pct_date)
677 atime.pct_time = pcp->pc_entry.pcd_mtime.pct_time;
678 else
679 atime.pct_time = 0;
680 pc_pcttotv(&atime, &unixtime);
681 if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0) {
682 if (unixtime > INT32_MAX)
683 DTRACE_PROBE1(pcfs__atimeclamped, int64_t, unixtime);
684 unixtime = MIN(unixtime, INT32_MAX);
685 } else if (unixtime > INT32_MAX &&
686 get_udatamodel() == DATAMODEL_ILP32) {
687 pc_unlockfs(fsp);
688 DTRACE_PROBE1(pcfs__atimeoverflowed, int64_t, unixtime);
689 return (EOVERFLOW);
690 }
691
692 vap->va_atime.tv_sec = (time_t)unixtime;
693 vap->va_atime.tv_nsec = 0;
694
695 pc_unlockfs(fsp);
696 return (0);
697 }
698
699
700 /*ARGSUSED*/
701 static int
pcfs_setattr(struct vnode * vp,struct vattr * vap,int flags,struct cred * cr,caller_context_t * ct)702 pcfs_setattr(
703 struct vnode *vp,
704 struct vattr *vap,
705 int flags,
706 struct cred *cr,
707 caller_context_t *ct)
708 {
709 struct pcnode *pcp;
710 mode_t mask = vap->va_mask;
711 int error;
712 struct pcfs *fsp;
713 timestruc_t now, *timep;
714
715 PC_DPRINTF2(6, "pcfs_setattr: vp=%p mask=%x\n", (void *)vp, (int)mask);
716 /*
717 * cannot set these attributes
718 */
719 if (mask & (AT_NOSET | AT_UID | AT_GID)) {
720 return (EINVAL);
721 }
722 /*
723 * pcfs_setattr is now allowed on directories to avoid silly warnings
724 * from 'tar' when it tries to set times on a directory, and console
725 * printf's on the NFS server when it gets EINVAL back on such a
726 * request. One possible problem with that since a directory entry
727 * identifies a file, '.' and all the '..' entries in subdirectories
728 * may get out of sync when the directory is updated since they're
729 * treated like separate files. We could fix that by looking for
730 * '.' and giving it the same attributes, and then looking for
731 * all the subdirectories and updating '..', but that's pretty
732 * expensive for something that doesn't seem likely to matter.
733 */
734 /* can't do some ops on directories anyway */
735 if ((vp->v_type == VDIR) &&
736 (mask & AT_SIZE)) {
737 return (EINVAL);
738 }
739
740 fsp = VFSTOPCFS(vp->v_vfsp);
741 error = pc_lockfs(fsp, 0, 0);
742 if (error)
743 return (error);
744 if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
745 pc_unlockfs(fsp);
746 return (EIO);
747 }
748
749 if (fsp->pcfs_flags & PCFS_BOOTPART) {
750 if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
751 pc_unlockfs(fsp);
752 return (EACCES);
753 }
754 }
755
756 /*
757 * Change file access modes.
758 * If nobody has write permission, file is marked readonly.
759 * Otherwise file is writable by anyone.
760 */
761 if ((mask & AT_MODE) && (vap->va_mode != (mode_t)-1)) {
762 if ((vap->va_mode & 0222) == 0)
763 pcp->pc_entry.pcd_attr |= PCA_RDONLY;
764 else
765 pcp->pc_entry.pcd_attr &= ~PCA_RDONLY;
766 pcp->pc_flags |= PC_CHG;
767 }
768 /*
769 * Truncate file. Must have write permission.
770 */
771 if ((mask & AT_SIZE) && (vap->va_size != (u_offset_t)-1)) {
772 if (pcp->pc_entry.pcd_attr & PCA_RDONLY) {
773 error = EACCES;
774 goto out;
775 }
776 if (vap->va_size > UINT32_MAX) {
777 error = EFBIG;
778 goto out;
779 }
780 error = pc_truncate(pcp, (uint_t)vap->va_size);
781
782 if (error)
783 goto out;
784
785 if (vap->va_size == 0)
786 vnevent_truncate(vp, ct);
787 }
788 /*
789 * Change file modified times.
790 */
791 if (mask & (AT_MTIME | AT_CTIME)) {
792 /*
793 * If SysV-compatible option to set access and
794 * modified times if privileged, owner, or write access,
795 * use current time rather than va_mtime.
796 *
797 * XXX - va_mtime.tv_sec == -1 flags this.
798 */
799 timep = &vap->va_mtime;
800 if (vap->va_mtime.tv_sec == -1) {
801 gethrestime(&now);
802 timep = &now;
803 }
804 if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0 &&
805 timep->tv_sec > INT32_MAX) {
806 error = EOVERFLOW;
807 goto out;
808 }
809 error = pc_tvtopct(timep, &pcp->pc_entry.pcd_mtime);
810 if (error)
811 goto out;
812 pcp->pc_flags |= PC_CHG;
813 }
814 /*
815 * Change file access times.
816 */
817 if (mask & AT_ATIME) {
818 /*
819 * If SysV-compatible option to set access and
820 * modified times if privileged, owner, or write access,
821 * use current time rather than va_mtime.
822 *
823 * XXX - va_atime.tv_sec == -1 flags this.
824 */
825 struct pctime atime;
826
827 timep = &vap->va_atime;
828 if (vap->va_atime.tv_sec == -1) {
829 gethrestime(&now);
830 timep = &now;
831 }
832 if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0 &&
833 timep->tv_sec > INT32_MAX) {
834 error = EOVERFLOW;
835 goto out;
836 }
837 error = pc_tvtopct(timep, &atime);
838 if (error)
839 goto out;
840 pcp->pc_entry.pcd_ladate = atime.pct_date;
841 pcp->pc_flags |= PC_CHG;
842 }
843 out:
844 pc_unlockfs(fsp);
845 return (error);
846 }
847
848
849 /*ARGSUSED*/
850 static int
pcfs_access(struct vnode * vp,int mode,int flags,struct cred * cr,caller_context_t * ct)851 pcfs_access(
852 struct vnode *vp,
853 int mode,
854 int flags,
855 struct cred *cr,
856 caller_context_t *ct)
857 {
858 struct pcnode *pcp;
859 struct pcfs *fsp;
860
861
862 fsp = VFSTOPCFS(vp->v_vfsp);
863
864 if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL)
865 return (EIO);
866 if ((mode & VWRITE) && (pcp->pc_entry.pcd_attr & PCA_RDONLY))
867 return (EACCES);
868
869 /*
870 * If this is a boot partition, privileged users have full access while
871 * others have read-only access.
872 */
873 if (fsp->pcfs_flags & PCFS_BOOTPART) {
874 if ((mode & VWRITE) &&
875 secpolicy_pcfs_modify_bootpartition(cr) != 0)
876 return (EACCES);
877 }
878 return (0);
879 }
880
881
882 /*ARGSUSED*/
883 static int
pcfs_fsync(struct vnode * vp,int syncflag,struct cred * cr,caller_context_t * ct)884 pcfs_fsync(
885 struct vnode *vp,
886 int syncflag,
887 struct cred *cr,
888 caller_context_t *ct)
889 {
890 struct pcfs *fsp;
891 struct pcnode *pcp;
892 int error;
893
894 fsp = VFSTOPCFS(vp->v_vfsp);
895 if (error = pc_verify(fsp))
896 return (error);
897 error = pc_lockfs(fsp, 0, 0);
898 if (error)
899 return (error);
900 if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
901 pc_unlockfs(fsp);
902 return (EIO);
903 }
904 rw_enter(&pcnodes_lock, RW_WRITER);
905 error = pc_nodesync(pcp);
906 rw_exit(&pcnodes_lock);
907 pc_unlockfs(fsp);
908 return (error);
909 }
910
911
912 /*ARGSUSED*/
913 static void
pcfs_inactive(struct vnode * vp,struct cred * cr,caller_context_t * ct)914 pcfs_inactive(
915 struct vnode *vp,
916 struct cred *cr,
917 caller_context_t *ct)
918 {
919 struct pcnode *pcp;
920 struct pcfs *fsp;
921 int error;
922
923 fsp = VFSTOPCFS(vp->v_vfsp);
924 error = pc_lockfs(fsp, 0, 1);
925
926 /*
927 * If the filesystem was umounted by force, all dirty
928 * pages associated with this vnode are invalidated
929 * and then the vnode will be freed.
930 */
931 if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED) {
932 pcp = VTOPC(vp);
933 if (vn_has_cached_data(vp)) {
934 (void) pvn_vplist_dirty(vp, (u_offset_t)0,
935 pcfs_putapage, B_INVAL, (struct cred *)NULL);
936 }
937 remque(pcp);
938 if (error == 0)
939 pc_unlockfs(fsp);
940 vn_free(vp);
941 kmem_free(pcp, sizeof (struct pcnode));
942 VFS_RELE(PCFSTOVFS(fsp));
943 return;
944 }
945
946 mutex_enter(&vp->v_lock);
947 ASSERT(vp->v_count >= 1);
948 if (vp->v_count > 1) {
949 VN_RELE_LOCKED(vp);
950 mutex_exit(&vp->v_lock);
951 pc_unlockfs(fsp);
952 return;
953 }
954 mutex_exit(&vp->v_lock);
955
956 /*
957 * Check again to confirm that no intervening I/O error
958 * with a subsequent pc_diskchanged() call has released
959 * the pcnode. If it has then release the vnode as above.
960 */
961 pcp = VTOPC(vp);
962 if (pcp == NULL || pcp->pc_flags & PC_INVAL) {
963 if (vn_has_cached_data(vp))
964 (void) pvn_vplist_dirty(vp, (u_offset_t)0,
965 pcfs_putapage, B_INVAL | B_TRUNC,
966 (struct cred *)NULL);
967 }
968
969 if (pcp == NULL) {
970 vn_free(vp);
971 } else {
972 pc_rele(pcp);
973 }
974
975 if (!error)
976 pc_unlockfs(fsp);
977 }
978
979 /*ARGSUSED*/
980 static int
pcfs_lookup(struct vnode * dvp,char * nm,struct vnode ** vpp,struct pathname * pnp,int flags,struct vnode * rdir,struct cred * cr,caller_context_t * ct,int * direntflags,pathname_t * realpnp)981 pcfs_lookup(
982 struct vnode *dvp,
983 char *nm,
984 struct vnode **vpp,
985 struct pathname *pnp,
986 int flags,
987 struct vnode *rdir,
988 struct cred *cr,
989 caller_context_t *ct,
990 int *direntflags,
991 pathname_t *realpnp)
992 {
993 struct pcfs *fsp;
994 struct pcnode *pcp;
995 int error;
996
997 /*
998 * If the filesystem was umounted by force, return immediately.
999 */
1000 if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1001 return (EIO);
1002
1003 /*
1004 * verify that the dvp is still valid on the disk
1005 */
1006 fsp = VFSTOPCFS(dvp->v_vfsp);
1007 if (error = pc_verify(fsp))
1008 return (error);
1009 error = pc_lockfs(fsp, 0, 0);
1010 if (error)
1011 return (error);
1012 if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1013 pc_unlockfs(fsp);
1014 return (EIO);
1015 }
1016 /*
1017 * Null component name is a synonym for directory being searched.
1018 */
1019 if (*nm == '\0') {
1020 VN_HOLD(dvp);
1021 *vpp = dvp;
1022 pc_unlockfs(fsp);
1023 return (0);
1024 }
1025
1026 error = pc_dirlook(VTOPC(dvp), nm, &pcp);
1027 if (!error) {
1028 *vpp = PCTOV(pcp);
1029 pcp->pc_flags |= PC_EXTERNAL;
1030 }
1031 pc_unlockfs(fsp);
1032 return (error);
1033 }
1034
1035
1036 /*ARGSUSED*/
1037 static int
pcfs_create(struct vnode * dvp,char * nm,struct vattr * vap,enum vcexcl exclusive,int mode,struct vnode ** vpp,struct cred * cr,int flag,caller_context_t * ct,vsecattr_t * vsecp)1038 pcfs_create(
1039 struct vnode *dvp,
1040 char *nm,
1041 struct vattr *vap,
1042 enum vcexcl exclusive,
1043 int mode,
1044 struct vnode **vpp,
1045 struct cred *cr,
1046 int flag,
1047 caller_context_t *ct,
1048 vsecattr_t *vsecp)
1049 {
1050 int error;
1051 struct pcnode *pcp;
1052 struct vnode *vp;
1053 struct pcfs *fsp;
1054
1055 /*
1056 * can't create directories. use pcfs_mkdir.
1057 * can't create anything other than files.
1058 */
1059 if (vap->va_type == VDIR)
1060 return (EISDIR);
1061 else if (vap->va_type != VREG)
1062 return (EINVAL);
1063
1064 pcp = NULL;
1065 fsp = VFSTOPCFS(dvp->v_vfsp);
1066 error = pc_lockfs(fsp, 0, 0);
1067 if (error)
1068 return (error);
1069 if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1070 pc_unlockfs(fsp);
1071 return (EIO);
1072 }
1073
1074 if (fsp->pcfs_flags & PCFS_BOOTPART) {
1075 if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1076 pc_unlockfs(fsp);
1077 return (EACCES);
1078 }
1079 }
1080
1081 if (*nm == '\0') {
1082 /*
1083 * Null component name refers to the directory itself.
1084 */
1085 VN_HOLD(dvp);
1086 pcp = VTOPC(dvp);
1087 error = EEXIST;
1088 } else {
1089 error = pc_direnter(VTOPC(dvp), nm, vap, &pcp);
1090 }
1091 /*
1092 * if file exists and this is a nonexclusive create,
1093 * check for access permissions
1094 */
1095 if (error == EEXIST) {
1096 vp = PCTOV(pcp);
1097 if (exclusive == NONEXCL) {
1098 if (vp->v_type == VDIR) {
1099 error = EISDIR;
1100 } else if (mode) {
1101 error = pcfs_access(PCTOV(pcp), mode, 0,
1102 cr, ct);
1103 } else {
1104 error = 0;
1105 }
1106 }
1107 if (error) {
1108 VN_RELE(PCTOV(pcp));
1109 } else if ((vp->v_type == VREG) && (vap->va_mask & AT_SIZE) &&
1110 (vap->va_size == 0)) {
1111 error = pc_truncate(pcp, 0L);
1112 if (error) {
1113 VN_RELE(PCTOV(pcp));
1114 } else {
1115 vnevent_create(PCTOV(pcp), ct);
1116 }
1117 }
1118 }
1119 if (error) {
1120 pc_unlockfs(fsp);
1121 return (error);
1122 }
1123 *vpp = PCTOV(pcp);
1124 pcp->pc_flags |= PC_EXTERNAL;
1125 pc_unlockfs(fsp);
1126 return (error);
1127 }
1128
1129 /*ARGSUSED*/
1130 static int
pcfs_remove(struct vnode * vp,char * nm,struct cred * cr,caller_context_t * ct,int flags)1131 pcfs_remove(
1132 struct vnode *vp,
1133 char *nm,
1134 struct cred *cr,
1135 caller_context_t *ct,
1136 int flags)
1137 {
1138 struct pcfs *fsp;
1139 struct pcnode *pcp;
1140 int error;
1141
1142 fsp = VFSTOPCFS(vp->v_vfsp);
1143 if (error = pc_verify(fsp))
1144 return (error);
1145 error = pc_lockfs(fsp, 0, 0);
1146 if (error)
1147 return (error);
1148 if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
1149 pc_unlockfs(fsp);
1150 return (EIO);
1151 }
1152 if (fsp->pcfs_flags & PCFS_BOOTPART) {
1153 if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1154 pc_unlockfs(fsp);
1155 return (EACCES);
1156 }
1157 }
1158 error = pc_dirremove(pcp, nm, (struct vnode *)0, VREG, ct);
1159 pc_unlockfs(fsp);
1160 return (error);
1161 }
1162
1163 /*
1164 * Rename a file or directory
1165 * This rename is restricted to only rename files within a directory.
1166 * XX should make rename more general
1167 */
1168 /*ARGSUSED*/
1169 static int
pcfs_rename(struct vnode * sdvp,char * snm,struct vnode * tdvp,char * tnm,struct cred * cr,caller_context_t * ct,int flags)1170 pcfs_rename(
1171 struct vnode *sdvp, /* old (source) parent vnode */
1172 char *snm, /* old (source) entry name */
1173 struct vnode *tdvp, /* new (target) parent vnode */
1174 char *tnm, /* new (target) entry name */
1175 struct cred *cr,
1176 caller_context_t *ct,
1177 int flags)
1178 {
1179 struct pcfs *fsp;
1180 struct pcnode *dp; /* parent pcnode */
1181 struct pcnode *tdp;
1182 int error;
1183
1184 fsp = VFSTOPCFS(sdvp->v_vfsp);
1185 if (error = pc_verify(fsp))
1186 return (error);
1187
1188 /*
1189 * make sure we can muck with this directory.
1190 */
1191 error = pcfs_access(sdvp, VWRITE, 0, cr, ct);
1192 if (error) {
1193 return (error);
1194 }
1195 error = pc_lockfs(fsp, 0, 0);
1196 if (error)
1197 return (error);
1198 if (((dp = VTOPC(sdvp)) == NULL) || ((tdp = VTOPC(tdvp)) == NULL) ||
1199 (dp->pc_flags & PC_INVAL) || (tdp->pc_flags & PC_INVAL)) {
1200 pc_unlockfs(fsp);
1201 return (EIO);
1202 }
1203 error = pc_rename(dp, tdp, snm, tnm, ct);
1204 pc_unlockfs(fsp);
1205 return (error);
1206 }
1207
1208 /*ARGSUSED*/
1209 static int
pcfs_mkdir(struct vnode * dvp,char * nm,struct vattr * vap,struct vnode ** vpp,struct cred * cr,caller_context_t * ct,int flags,vsecattr_t * vsecp)1210 pcfs_mkdir(
1211 struct vnode *dvp,
1212 char *nm,
1213 struct vattr *vap,
1214 struct vnode **vpp,
1215 struct cred *cr,
1216 caller_context_t *ct,
1217 int flags,
1218 vsecattr_t *vsecp)
1219 {
1220 struct pcfs *fsp;
1221 struct pcnode *pcp;
1222 int error;
1223
1224 fsp = VFSTOPCFS(dvp->v_vfsp);
1225 if (error = pc_verify(fsp))
1226 return (error);
1227 error = pc_lockfs(fsp, 0, 0);
1228 if (error)
1229 return (error);
1230 if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1231 pc_unlockfs(fsp);
1232 return (EIO);
1233 }
1234
1235 if (fsp->pcfs_flags & PCFS_BOOTPART) {
1236 if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1237 pc_unlockfs(fsp);
1238 return (EACCES);
1239 }
1240 }
1241
1242 error = pc_direnter(VTOPC(dvp), nm, vap, &pcp);
1243
1244 if (!error) {
1245 pcp -> pc_flags |= PC_EXTERNAL;
1246 *vpp = PCTOV(pcp);
1247 } else if (error == EEXIST) {
1248 VN_RELE(PCTOV(pcp));
1249 }
1250 pc_unlockfs(fsp);
1251 return (error);
1252 }
1253
1254 /*ARGSUSED*/
1255 static int
pcfs_rmdir(struct vnode * dvp,char * nm,struct vnode * cdir,struct cred * cr,caller_context_t * ct,int flags)1256 pcfs_rmdir(
1257 struct vnode *dvp,
1258 char *nm,
1259 struct vnode *cdir,
1260 struct cred *cr,
1261 caller_context_t *ct,
1262 int flags)
1263 {
1264 struct pcfs *fsp;
1265 struct pcnode *pcp;
1266 int error;
1267
1268 fsp = VFSTOPCFS(dvp -> v_vfsp);
1269 if (error = pc_verify(fsp))
1270 return (error);
1271 if (error = pc_lockfs(fsp, 0, 0))
1272 return (error);
1273
1274 if ((pcp = VTOPC(dvp)) == NULL || pcp->pc_flags & PC_INVAL) {
1275 pc_unlockfs(fsp);
1276 return (EIO);
1277 }
1278
1279 if (fsp->pcfs_flags & PCFS_BOOTPART) {
1280 if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1281 pc_unlockfs(fsp);
1282 return (EACCES);
1283 }
1284 }
1285
1286 error = pc_dirremove(pcp, nm, cdir, VDIR, ct);
1287 pc_unlockfs(fsp);
1288 return (error);
1289 }
1290
1291 /*
1292 * read entries in a directory.
1293 * we must convert pc format to unix format
1294 */
1295
1296 /*ARGSUSED*/
1297 static int
pcfs_readdir(struct vnode * dvp,struct uio * uiop,struct cred * cr,int * eofp,caller_context_t * ct,int flags)1298 pcfs_readdir(
1299 struct vnode *dvp,
1300 struct uio *uiop,
1301 struct cred *cr,
1302 int *eofp,
1303 caller_context_t *ct,
1304 int flags)
1305 {
1306 struct pcnode *pcp;
1307 struct pcfs *fsp;
1308 struct pcdir *ep;
1309 struct buf *bp = NULL;
1310 offset_t offset;
1311 int boff;
1312 struct pc_dirent lbp;
1313 struct pc_dirent *ld = &lbp;
1314 int error;
1315
1316 /*
1317 * If the filesystem was umounted by force, return immediately.
1318 */
1319 if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1320 return (EIO);
1321
1322 if ((uiop->uio_iovcnt != 1) ||
1323 (uiop->uio_loffset % sizeof (struct pcdir)) != 0) {
1324 return (EINVAL);
1325 }
1326 fsp = VFSTOPCFS(dvp->v_vfsp);
1327 /*
1328 * verify that the dp is still valid on the disk
1329 */
1330 if (error = pc_verify(fsp)) {
1331 return (error);
1332 }
1333 error = pc_lockfs(fsp, 0, 0);
1334 if (error)
1335 return (error);
1336 if ((pcp = VTOPC(dvp)) == NULL || pcp->pc_flags & PC_INVAL) {
1337 pc_unlockfs(fsp);
1338 return (EIO);
1339 }
1340
1341 bzero(ld, sizeof (*ld));
1342
1343 if (eofp != NULL)
1344 *eofp = 0;
1345 offset = uiop->uio_loffset;
1346
1347 if (dvp->v_flag & VROOT) {
1348 /*
1349 * kludge up entries for "." and ".." in the root.
1350 */
1351 if (offset == 0) {
1352 (void) strcpy(ld->d_name, ".");
1353 ld->d_reclen = DIRENT64_RECLEN(1);
1354 ld->d_off = (off64_t)sizeof (struct pcdir);
1355 ld->d_ino = (ino64_t)UINT_MAX;
1356 if (ld->d_reclen > uiop->uio_resid) {
1357 pc_unlockfs(fsp);
1358 return (ENOSPC);
1359 }
1360 (void) uiomove(ld, ld->d_reclen, UIO_READ, uiop);
1361 uiop->uio_loffset = ld->d_off;
1362 offset = uiop->uio_loffset;
1363 }
1364 if (offset == sizeof (struct pcdir)) {
1365 (void) strcpy(ld->d_name, "..");
1366 ld->d_reclen = DIRENT64_RECLEN(2);
1367 if (ld->d_reclen > uiop->uio_resid) {
1368 pc_unlockfs(fsp);
1369 return (ENOSPC);
1370 }
1371 ld->d_off = (off64_t)(uiop->uio_loffset +
1372 sizeof (struct pcdir));
1373 ld->d_ino = (ino64_t)UINT_MAX;
1374 (void) uiomove(ld, ld->d_reclen, UIO_READ, uiop);
1375 uiop->uio_loffset = ld->d_off;
1376 offset = uiop->uio_loffset;
1377 }
1378 offset -= 2 * sizeof (struct pcdir);
1379 /* offset now has the real offset value into directory file */
1380 }
1381
1382 for (;;) {
1383 boff = pc_blkoff(fsp, offset);
1384 if (boff == 0 || bp == NULL || boff >= bp->b_bcount) {
1385 if (bp != NULL) {
1386 brelse(bp);
1387 bp = NULL;
1388 }
1389 error = pc_blkatoff(pcp, offset, &bp, &ep);
1390 if (error) {
1391 if (error == ENOENT) {
1392 error = 0;
1393 if (eofp)
1394 *eofp = 1;
1395 }
1396 break;
1397 }
1398 }
1399 if (ep->pcd_filename[0] == PCD_UNUSED) {
1400 if (eofp)
1401 *eofp = 1;
1402 break;
1403 }
1404 /*
1405 * Don't display label because it may contain funny characters.
1406 */
1407 if (ep->pcd_filename[0] == PCD_ERASED) {
1408 uiop->uio_loffset += sizeof (struct pcdir);
1409 offset += sizeof (struct pcdir);
1410 ep++;
1411 continue;
1412 }
1413 if (PCDL_IS_LFN(ep)) {
1414 if (pc_read_long_fn(dvp, uiop, ld, &ep, &offset, &bp) !=
1415 0)
1416 break;
1417 continue;
1418 }
1419
1420 if (pc_read_short_fn(dvp, uiop, ld, &ep, &offset, &bp) != 0)
1421 break;
1422 }
1423 if (bp)
1424 brelse(bp);
1425 pc_unlockfs(fsp);
1426 return (error);
1427 }
1428
1429
1430 /*
1431 * Called from pvn_getpages to get a particular page. When we are called
1432 * the pcfs is already locked.
1433 */
1434 /*ARGSUSED*/
1435 static int
pcfs_getapage(struct vnode * vp,u_offset_t off,size_t len,uint_t * protp,page_t * pl[],size_t plsz,struct seg * seg,caddr_t addr,enum seg_rw rw,struct cred * cr)1436 pcfs_getapage(
1437 struct vnode *vp,
1438 u_offset_t off,
1439 size_t len,
1440 uint_t *protp,
1441 page_t *pl[], /* NULL if async IO is requested */
1442 size_t plsz,
1443 struct seg *seg,
1444 caddr_t addr,
1445 enum seg_rw rw,
1446 struct cred *cr)
1447 {
1448 struct pcnode *pcp;
1449 struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1450 struct vnode *devvp;
1451 page_t *pp;
1452 page_t *pagefound;
1453 int err;
1454
1455 /*
1456 * If the filesystem was umounted by force, return immediately.
1457 */
1458 if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1459 return (EIO);
1460
1461 PC_DPRINTF3(5, "pcfs_getapage: vp=%p off=%lld len=%lu\n",
1462 (void *)vp, off, len);
1463
1464 if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL)
1465 return (EIO);
1466 devvp = fsp->pcfs_devvp;
1467
1468 /* pcfs doesn't do readaheads */
1469 if (pl == NULL)
1470 return (0);
1471
1472 pl[0] = NULL;
1473 err = 0;
1474 /*
1475 * If the accessed time on the pcnode has not already been
1476 * set elsewhere (e.g. for read/setattr) we set the time now.
1477 * This gives us approximate modified times for mmap'ed files
1478 * which are accessed via loads in the user address space.
1479 */
1480 if ((pcp->pc_flags & PC_ACC) == 0 &&
1481 ((fsp->pcfs_vfs->vfs_flag & VFS_RDONLY) == 0)) {
1482 pc_mark_acc(fsp, pcp);
1483 }
1484 reread:
1485 if ((pagefound = page_exists(vp, off)) == NULL) {
1486 /*
1487 * Need to really do disk IO to get the page(s).
1488 */
1489 struct buf *bp;
1490 daddr_t lbn, bn;
1491 u_offset_t io_off;
1492 size_t io_len;
1493 u_offset_t lbnoff, xferoffset;
1494 u_offset_t pgoff;
1495 uint_t xfersize;
1496 int err1;
1497
1498 lbn = pc_lblkno(fsp, off);
1499 lbnoff = off & ~(fsp->pcfs_clsize - 1);
1500 xferoffset = off & ~(fsp->pcfs_secsize - 1);
1501
1502 pp = pvn_read_kluster(vp, off, seg, addr, &io_off, &io_len,
1503 off, (size_t)MIN(pc_blksize(fsp, pcp, off), PAGESIZE), 0);
1504 if (pp == NULL)
1505 /*
1506 * XXX - If pcfs is made MT-hot, this should go
1507 * back to reread.
1508 */
1509 panic("pcfs_getapage pvn_read_kluster");
1510
1511 for (pgoff = 0; pgoff < PAGESIZE && xferoffset < pcp->pc_size;
1512 pgoff += xfersize,
1513 lbn += howmany(xfersize, fsp->pcfs_clsize),
1514 lbnoff += xfersize, xferoffset += xfersize) {
1515 /*
1516 * read as many contiguous blocks as possible to
1517 * fill this page
1518 */
1519 xfersize = PAGESIZE - pgoff;
1520 err1 = pc_bmap(pcp, lbn, &bn, &xfersize);
1521 if (err1) {
1522 PC_DPRINTF1(1, "pc_getapage err=%d", err1);
1523 err = err1;
1524 goto out;
1525 }
1526 bp = pageio_setup(pp, xfersize, devvp, B_READ);
1527 bp->b_edev = devvp->v_rdev;
1528 bp->b_dev = cmpdev(devvp->v_rdev);
1529 bp->b_blkno = bn + btodt(xferoffset - lbnoff);
1530 bp->b_un.b_addr = (caddr_t)(uintptr_t)pgoff;
1531 bp->b_file = vp;
1532 bp->b_offset = (offset_t)(off + pgoff);
1533
1534 (void) bdev_strategy(bp);
1535
1536 lwp_stat_update(LWP_STAT_INBLK, 1);
1537
1538 if (err == 0)
1539 err = biowait(bp);
1540 else
1541 (void) biowait(bp);
1542 pageio_done(bp);
1543 if (err)
1544 goto out;
1545 }
1546 if (pgoff < PAGESIZE) {
1547 pagezero(pp->p_prev, pgoff, PAGESIZE - pgoff);
1548 }
1549 pvn_plist_init(pp, pl, plsz, off, io_len, rw);
1550 }
1551 out:
1552 if (err) {
1553 if (pp != NULL)
1554 pvn_read_done(pp, B_ERROR);
1555 return (err);
1556 }
1557
1558 if (pagefound) {
1559 /*
1560 * Page exists in the cache, acquire the "shared"
1561 * lock. If this fails, go back to reread.
1562 */
1563 if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
1564 goto reread;
1565 }
1566 pl[0] = pp;
1567 pl[1] = NULL;
1568 }
1569 return (err);
1570 }
1571
1572 /*
1573 * Return all the pages from [off..off+len] in given file
1574 */
1575 /* ARGSUSED */
1576 static int
pcfs_getpage(struct vnode * vp,offset_t off,size_t len,uint_t * protp,page_t * pl[],size_t plsz,struct seg * seg,caddr_t addr,enum seg_rw rw,struct cred * cr,caller_context_t * ct)1577 pcfs_getpage(
1578 struct vnode *vp,
1579 offset_t off,
1580 size_t len,
1581 uint_t *protp,
1582 page_t *pl[],
1583 size_t plsz,
1584 struct seg *seg,
1585 caddr_t addr,
1586 enum seg_rw rw,
1587 struct cred *cr,
1588 caller_context_t *ct)
1589 {
1590 struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1591 int err;
1592
1593 PC_DPRINTF0(6, "pcfs_getpage\n");
1594 if (err = pc_verify(fsp))
1595 return (err);
1596 if (vp->v_flag & VNOMAP)
1597 return (ENOSYS);
1598 ASSERT(off <= UINT32_MAX);
1599 err = pc_lockfs(fsp, 0, 0);
1600 if (err)
1601 return (err);
1602 if (protp != NULL)
1603 *protp = PROT_ALL;
1604
1605 ASSERT((off & PAGEOFFSET) == 0);
1606 err = pvn_getpages(pcfs_getapage, vp, off, len, protp, pl, plsz,
1607 seg, addr, rw, cr);
1608
1609 pc_unlockfs(fsp);
1610 return (err);
1611 }
1612
1613
1614 /*
1615 * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE}
1616 * If len == 0, do from off to EOF.
1617 *
1618 * The normal cases should be len == 0 & off == 0 (entire vp list),
1619 * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
1620 * (from pageout).
1621 *
1622 */
1623 /*ARGSUSED*/
1624 static int
pcfs_putpage(struct vnode * vp,offset_t off,size_t len,int flags,struct cred * cr,caller_context_t * ct)1625 pcfs_putpage(
1626 struct vnode *vp,
1627 offset_t off,
1628 size_t len,
1629 int flags,
1630 struct cred *cr,
1631 caller_context_t *ct)
1632 {
1633 struct pcnode *pcp;
1634 page_t *pp;
1635 struct pcfs *fsp;
1636 u_offset_t io_off;
1637 size_t io_len;
1638 offset_t eoff;
1639 int err;
1640
1641 /*
1642 * If the filesystem was umounted by force, return immediately.
1643 */
1644 if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1645 return (EIO);
1646
1647 PC_DPRINTF1(6, "pcfs_putpage vp=0x%p\n", (void *)vp);
1648 if (vp->v_flag & VNOMAP)
1649 return (ENOSYS);
1650
1651 fsp = VFSTOPCFS(vp->v_vfsp);
1652
1653 if (err = pc_verify(fsp))
1654 return (err);
1655 if ((pcp = VTOPC(vp)) == NULL) {
1656 PC_DPRINTF1(3, "pcfs_putpage NULL vp=0x%p\n", (void *)vp);
1657 return (EIO);
1658 }
1659 if (pcp->pc_flags & PC_INVAL)
1660 return (EIO);
1661
1662 if (curproc == proc_pageout) {
1663 /*
1664 * XXX - This is a quick hack to avoid blocking
1665 * pageout. Also to avoid pcfs_getapage deadlocking
1666 * with putpage when memory is running out,
1667 * since we only have one global lock and we don't
1668 * support async putpage.
1669 * It should be fixed someday.
1670 *
1671 * Interestingly, this used to be a test of NOMEMWAIT().
1672 * We only ever got here once pcfs started supporting
1673 * NFS sharing, and then only because the NFS server
1674 * threads seem to do writes in sched's process context.
1675 * Since everyone else seems to just care about pageout,
1676 * the test was changed to look for pageout directly.
1677 */
1678 return (ENOMEM);
1679 }
1680
1681 ASSERT(off <= UINT32_MAX);
1682
1683 flags &= ~B_ASYNC; /* XXX should fix this later */
1684
1685 err = pc_lockfs(fsp, 0, 0);
1686 if (err)
1687 return (err);
1688 if (!vn_has_cached_data(vp) || off >= pcp->pc_size) {
1689 pc_unlockfs(fsp);
1690 return (0);
1691 }
1692
1693 if (len == 0) {
1694 /*
1695 * Search the entire vp list for pages >= off
1696 */
1697 err = pvn_vplist_dirty(vp, off,
1698 pcfs_putapage, flags, cr);
1699 } else {
1700 eoff = off + len;
1701
1702 for (io_off = off; io_off < eoff &&
1703 io_off < pcp->pc_size; io_off += io_len) {
1704 /*
1705 * If we are not invalidating, synchronously
1706 * freeing or writing pages use the routine
1707 * page_lookup_nowait() to prevent reclaiming
1708 * them from the free list.
1709 */
1710 if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
1711 pp = page_lookup(vp, io_off,
1712 (flags & (B_INVAL | B_FREE)) ?
1713 SE_EXCL : SE_SHARED);
1714 } else {
1715 pp = page_lookup_nowait(vp, io_off,
1716 (flags & B_FREE) ? SE_EXCL : SE_SHARED);
1717 }
1718
1719 if (pp == NULL || pvn_getdirty(pp, flags) == 0)
1720 io_len = PAGESIZE;
1721 else {
1722 err = pcfs_putapage(vp, pp, &io_off, &io_len,
1723 flags, cr);
1724 if (err != 0)
1725 break;
1726 /*
1727 * "io_off" and "io_len" are returned as
1728 * the range of pages we actually wrote.
1729 * This allows us to skip ahead more quickly
1730 * since several pages may've been dealt
1731 * with by this iteration of the loop.
1732 */
1733 }
1734 }
1735 }
1736 if (err == 0 && (flags & B_INVAL) &&
1737 off == 0 && len == 0 && vn_has_cached_data(vp)) {
1738 /*
1739 * If doing "invalidation", make sure that
1740 * all pages on the vnode list are actually
1741 * gone.
1742 */
1743 cmn_err(CE_PANIC,
1744 "pcfs_putpage: B_INVAL, pages not gone");
1745 } else if (err) {
1746 PC_DPRINTF1(1, "pcfs_putpage err=%d\n", err);
1747 }
1748 pc_unlockfs(fsp);
1749 return (err);
1750 }
1751
1752 /*
1753 * Write out a single page, possibly klustering adjacent dirty pages.
1754 */
1755 /*ARGSUSED*/
1756 int
pcfs_putapage(struct vnode * vp,page_t * pp,u_offset_t * offp,size_t * lenp,int flags,struct cred * cr)1757 pcfs_putapage(
1758 struct vnode *vp,
1759 page_t *pp,
1760 u_offset_t *offp,
1761 size_t *lenp,
1762 int flags,
1763 struct cred *cr)
1764 {
1765 struct pcnode *pcp;
1766 struct pcfs *fsp;
1767 struct vnode *devvp;
1768 size_t io_len;
1769 daddr_t bn;
1770 u_offset_t lbn, lbnoff, xferoffset;
1771 uint_t pgoff, xfersize;
1772 int err = 0;
1773 u_offset_t io_off;
1774
1775 pcp = VTOPC(vp);
1776 fsp = VFSTOPCFS(vp->v_vfsp);
1777 devvp = fsp->pcfs_devvp;
1778
1779 /*
1780 * If the modified time on the inode has not already been
1781 * set elsewhere (e.g. for write/setattr) and this is not
1782 * a call from msync (B_FORCE) we set the time now.
1783 * This gives us approximate modified times for mmap'ed files
1784 * which are modified via stores in the user address space.
1785 */
1786 if ((pcp->pc_flags & PC_MOD) == 0 || (flags & B_FORCE)) {
1787 pcp->pc_flags |= PC_MOD;
1788 pc_mark_mod(fsp, pcp);
1789 }
1790 pp = pvn_write_kluster(vp, pp, &io_off, &io_len, pp->p_offset,
1791 PAGESIZE, flags);
1792
1793 if (fsp->pcfs_flags & PCFS_IRRECOV) {
1794 goto out;
1795 }
1796
1797 PC_DPRINTF1(7, "pc_putpage writing dirty page off=%llu\n", io_off);
1798
1799 lbn = pc_lblkno(fsp, io_off);
1800 lbnoff = io_off & ~(fsp->pcfs_clsize - 1);
1801 xferoffset = io_off & ~(fsp->pcfs_secsize - 1);
1802
1803 for (pgoff = 0; pgoff < io_len && xferoffset < pcp->pc_size;
1804 pgoff += xfersize,
1805 lbn += howmany(xfersize, fsp->pcfs_clsize),
1806 lbnoff += xfersize, xferoffset += xfersize) {
1807
1808 struct buf *bp;
1809 int err1;
1810
1811 /*
1812 * write as many contiguous blocks as possible from this page
1813 */
1814 xfersize = io_len - pgoff;
1815 err1 = pc_bmap(pcp, (daddr_t)lbn, &bn, &xfersize);
1816 if (err1) {
1817 err = err1;
1818 goto out;
1819 }
1820 bp = pageio_setup(pp, xfersize, devvp, B_WRITE | flags);
1821 bp->b_edev = devvp->v_rdev;
1822 bp->b_dev = cmpdev(devvp->v_rdev);
1823 bp->b_blkno = bn + btodt(xferoffset - lbnoff);
1824 bp->b_un.b_addr = (caddr_t)(uintptr_t)pgoff;
1825 bp->b_file = vp;
1826 bp->b_offset = (offset_t)(io_off + pgoff);
1827
1828 (void) bdev_strategy(bp);
1829
1830 lwp_stat_update(LWP_STAT_OUBLK, 1);
1831
1832 if (err == 0)
1833 err = biowait(bp);
1834 else
1835 (void) biowait(bp);
1836 pageio_done(bp);
1837 }
1838 pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags);
1839 pp = NULL;
1840
1841 out:
1842 if ((fsp->pcfs_flags & PCFS_IRRECOV) && pp != NULL) {
1843 pvn_write_done(pp, B_WRITE | flags);
1844 } else if (err != 0 && pp != NULL) {
1845 pvn_write_done(pp, B_ERROR | B_WRITE | flags);
1846 }
1847
1848 if (offp)
1849 *offp = io_off;
1850 if (lenp)
1851 *lenp = io_len;
1852 PC_DPRINTF4(4, "pcfs_putapage: vp=%p pp=%p off=%lld len=%lu\n",
1853 (void *)vp, (void *)pp, io_off, io_len);
1854 if (err) {
1855 PC_DPRINTF1(1, "pcfs_putapage err=%d", err);
1856 }
1857 return (err);
1858 }
1859
1860 /*ARGSUSED*/
1861 static int
pcfs_map(struct vnode * vp,offset_t off,struct as * as,caddr_t * addrp,size_t len,uchar_t prot,uchar_t maxprot,uint_t flags,struct cred * cr,caller_context_t * ct)1862 pcfs_map(
1863 struct vnode *vp,
1864 offset_t off,
1865 struct as *as,
1866 caddr_t *addrp,
1867 size_t len,
1868 uchar_t prot,
1869 uchar_t maxprot,
1870 uint_t flags,
1871 struct cred *cr,
1872 caller_context_t *ct)
1873 {
1874 struct segvn_crargs vn_a;
1875 int error;
1876
1877 PC_DPRINTF0(6, "pcfs_map\n");
1878 if (vp->v_flag & VNOMAP)
1879 return (ENOSYS);
1880
1881 if (off > UINT32_MAX || off + len > UINT32_MAX)
1882 return (ENXIO);
1883
1884 as_rangelock(as);
1885 error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
1886 if (error != 0) {
1887 as_rangeunlock(as);
1888 return (error);
1889 }
1890
1891 vn_a.vp = vp;
1892 vn_a.offset = off;
1893 vn_a.type = flags & MAP_TYPE;
1894 vn_a.prot = prot;
1895 vn_a.maxprot = maxprot;
1896 vn_a.flags = flags & ~MAP_TYPE;
1897 vn_a.cred = cr;
1898 vn_a.amp = NULL;
1899 vn_a.szc = 0;
1900 vn_a.lgrp_mem_policy_flags = 0;
1901
1902 error = as_map(as, *addrp, len, segvn_create, &vn_a);
1903 as_rangeunlock(as);
1904 return (error);
1905 }
1906
1907 /* ARGSUSED */
1908 static int
pcfs_seek(struct vnode * vp,offset_t ooff,offset_t * noffp,caller_context_t * ct)1909 pcfs_seek(
1910 struct vnode *vp,
1911 offset_t ooff,
1912 offset_t *noffp,
1913 caller_context_t *ct)
1914 {
1915 if (*noffp < 0)
1916 return (EINVAL);
1917 else if (*noffp > MAXOFFSET_T)
1918 return (EINVAL);
1919 else
1920 return (0);
1921 }
1922
1923 /* ARGSUSED */
1924 static int
pcfs_addmap(struct vnode * vp,offset_t off,struct as * as,caddr_t addr,size_t len,uchar_t prot,uchar_t maxprot,uint_t flags,struct cred * cr,caller_context_t * ct)1925 pcfs_addmap(
1926 struct vnode *vp,
1927 offset_t off,
1928 struct as *as,
1929 caddr_t addr,
1930 size_t len,
1931 uchar_t prot,
1932 uchar_t maxprot,
1933 uint_t flags,
1934 struct cred *cr,
1935 caller_context_t *ct)
1936 {
1937 if (vp->v_flag & VNOMAP)
1938 return (ENOSYS);
1939 return (0);
1940 }
1941
1942 /*ARGSUSED*/
1943 static int
pcfs_delmap(struct vnode * vp,offset_t off,struct as * as,caddr_t addr,size_t len,uint_t prot,uint_t maxprot,uint_t flags,struct cred * cr,caller_context_t * ct)1944 pcfs_delmap(
1945 struct vnode *vp,
1946 offset_t off,
1947 struct as *as,
1948 caddr_t addr,
1949 size_t len,
1950 uint_t prot,
1951 uint_t maxprot,
1952 uint_t flags,
1953 struct cred *cr,
1954 caller_context_t *ct)
1955 {
1956 if (vp->v_flag & VNOMAP)
1957 return (ENOSYS);
1958 return (0);
1959 }
1960
1961 /*
1962 * POSIX pathconf() support.
1963 */
1964 /* ARGSUSED */
1965 static int
pcfs_pathconf(struct vnode * vp,int cmd,ulong_t * valp,struct cred * cr,caller_context_t * ct)1966 pcfs_pathconf(
1967 struct vnode *vp,
1968 int cmd,
1969 ulong_t *valp,
1970 struct cred *cr,
1971 caller_context_t *ct)
1972 {
1973 struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1974
1975 switch (cmd) {
1976 case _PC_LINK_MAX:
1977 *valp = 1;
1978 return (0);
1979
1980 case _PC_CASE_BEHAVIOR:
1981 return (EINVAL);
1982
1983 case _PC_FILESIZEBITS:
1984 /*
1985 * Both FAT16 and FAT32 support 4GB - 1 byte for file size.
1986 * FAT12 can only go up to the maximum filesystem capacity
1987 * which is ~509MB.
1988 */
1989 *valp = IS_FAT12(fsp) ? 30 : 33;
1990 return (0);
1991
1992 case _PC_TIMESTAMP_RESOLUTION:
1993 /*
1994 * PCFS keeps track of modification times, it its own
1995 * internal format, to a resolution of 2 seconds.
1996 * Since 2000 million is representable in an int32_t
1997 * without overflow (or becoming negative), we allow
1998 * this value to be returned.
1999 */
2000 *valp = 2000000000L;
2001 return (0);
2002
2003 default:
2004 return (fs_pathconf(vp, cmd, valp, cr, ct));
2005 }
2006
2007 }
2008
2009 /* ARGSUSED */
2010 static int
pcfs_space(struct vnode * vp,int cmd,struct flock64 * bfp,int flag,offset_t offset,cred_t * cr,caller_context_t * ct)2011 pcfs_space(
2012 struct vnode *vp,
2013 int cmd,
2014 struct flock64 *bfp,
2015 int flag,
2016 offset_t offset,
2017 cred_t *cr,
2018 caller_context_t *ct)
2019 {
2020 struct vattr vattr;
2021 int error;
2022
2023 if (cmd != F_FREESP)
2024 return (EINVAL);
2025
2026 if ((error = convoff(vp, bfp, 0, offset)) == 0) {
2027 if ((bfp->l_start > UINT32_MAX) || (bfp->l_len > UINT32_MAX))
2028 return (EFBIG);
2029 /*
2030 * we only support the special case of l_len == 0,
2031 * meaning free to end of file at this moment.
2032 */
2033 if (bfp->l_len != 0)
2034 return (EINVAL);
2035 vattr.va_mask = AT_SIZE;
2036 vattr.va_size = bfp->l_start;
2037 error = VOP_SETATTR(vp, (vattr_t *)&vattr, 0, cr, ct);
2038 }
2039 return (error);
2040 }
2041
2042 /*
2043 * Break up 'len' chars from 'buf' into a long file name chunk.
2044 * Pad with '0xff' to make Norton Disk Doctor and Microsoft ScanDisk happy.
2045 */
2046 void
set_long_fn_chunk(struct pcdir_lfn * ep,char * buf,int len)2047 set_long_fn_chunk(struct pcdir_lfn *ep, char *buf, int len)
2048 {
2049 int i;
2050
2051 ASSERT(buf != NULL);
2052
2053 for (i = 0; i < PCLF_FIRSTNAMESIZE; i += 2) {
2054 if (len > 0) {
2055 ep->pcdl_firstfilename[i] = *buf++;
2056 ep->pcdl_firstfilename[i + 1] = *buf++;
2057 len -= 2;
2058 } else {
2059 ep->pcdl_firstfilename[i] = (uchar_t)0xff;
2060 ep->pcdl_firstfilename[i + 1] = (uchar_t)0xff;
2061 }
2062 }
2063
2064 for (i = 0; i < PCLF_SECONDNAMESIZE; i += 2) {
2065 if (len > 0) {
2066 ep->pcdl_secondfilename[i] = *buf++;
2067 ep->pcdl_secondfilename[i + 1] = *buf++;
2068 len -= 2;
2069 } else {
2070 ep->pcdl_secondfilename[i] = (uchar_t)0xff;
2071 ep->pcdl_secondfilename[i + 1] = (uchar_t)0xff;
2072 }
2073 }
2074 for (i = 0; i < PCLF_THIRDNAMESIZE; i += 2) {
2075 if (len > 0) {
2076 ep->pcdl_thirdfilename[i] = *buf++;
2077 ep->pcdl_thirdfilename[i + 1] = *buf++;
2078 len -= 2;
2079 } else {
2080 ep->pcdl_thirdfilename[i] = (uchar_t)0xff;
2081 ep->pcdl_thirdfilename[i + 1] = (uchar_t)0xff;
2082 }
2083 }
2084 }
2085
2086 /*
2087 * Extract the characters from the long filename chunk into 'buf'.
2088 * Return the number of characters extracted.
2089 */
2090 static int
get_long_fn_chunk(struct pcdir_lfn * ep,char * buf)2091 get_long_fn_chunk(struct pcdir_lfn *ep, char *buf)
2092 {
2093 char *tmp = buf;
2094 int i;
2095
2096 /* Copy all the names, no filtering now */
2097
2098 for (i = 0; i < PCLF_FIRSTNAMESIZE; i += 2, tmp += 2) {
2099 *tmp = ep->pcdl_firstfilename[i];
2100 *(tmp + 1) = ep->pcdl_firstfilename[i + 1];
2101
2102 if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2103 return (tmp - buf);
2104 }
2105 for (i = 0; i < PCLF_SECONDNAMESIZE; i += 2, tmp += 2) {
2106 *tmp = ep->pcdl_secondfilename[i];
2107 *(tmp + 1) = ep->pcdl_secondfilename[i + 1];
2108
2109 if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2110 return (tmp - buf);
2111 }
2112 for (i = 0; i < PCLF_THIRDNAMESIZE; i += 2, tmp += 2) {
2113 *tmp = ep->pcdl_thirdfilename[i];
2114 *(tmp + 1) = ep->pcdl_thirdfilename[i + 1];
2115
2116 if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2117 return (tmp - buf);
2118 }
2119 return (tmp - buf);
2120 }
2121
2122
2123 /*
2124 * Checksum the passed in short filename.
2125 * This is used to validate each component of the long name to make
2126 * sure the long name is valid (it hasn't been "detached" from the
2127 * short filename). This algorithm was found in FreeBSD.
2128 * (sys/fs/msdosfs/msdosfs_conv.c:winChksum(), Wolfgang Solfrank)
2129 */
2130
2131 uchar_t
pc_checksum_long_fn(char * name,char * ext)2132 pc_checksum_long_fn(char *name, char *ext)
2133 {
2134 uchar_t c;
2135 char b[11];
2136
2137 bcopy(name, b, 8);
2138 bcopy(ext, b+8, 3);
2139
2140 c = b[0];
2141 c = ((c << 7) | (c >> 1)) + b[1];
2142 c = ((c << 7) | (c >> 1)) + b[2];
2143 c = ((c << 7) | (c >> 1)) + b[3];
2144 c = ((c << 7) | (c >> 1)) + b[4];
2145 c = ((c << 7) | (c >> 1)) + b[5];
2146 c = ((c << 7) | (c >> 1)) + b[6];
2147 c = ((c << 7) | (c >> 1)) + b[7];
2148 c = ((c << 7) | (c >> 1)) + b[8];
2149 c = ((c << 7) | (c >> 1)) + b[9];
2150 c = ((c << 7) | (c >> 1)) + b[10];
2151
2152 return (c);
2153 }
2154
2155 /*
2156 * Read a chunk of long filename entries into 'namep'.
2157 * Return with offset pointing to short entry (on success), or next
2158 * entry to read (if this wasn't a valid lfn really).
2159 * Uses the passed-in buffer if it can, otherwise kmem_allocs() room for
2160 * a long filename.
2161 *
2162 * Can also be called with a NULL namep, in which case it just returns
2163 * whether this was really a valid long filename and consumes it
2164 * (used by pc_dirempty()).
2165 */
2166 int
pc_extract_long_fn(struct pcnode * pcp,char * namep,struct pcdir ** epp,offset_t * offset,struct buf ** bp)2167 pc_extract_long_fn(struct pcnode *pcp, char *namep,
2168 struct pcdir **epp, offset_t *offset, struct buf **bp)
2169 {
2170 struct pcdir *ep = *epp;
2171 struct pcdir_lfn *lep = (struct pcdir_lfn *)ep;
2172 struct vnode *dvp = PCTOV(pcp);
2173 struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2174 char *lfn;
2175 char *lfn_base;
2176 int boff;
2177 int i, cs;
2178 char *buf;
2179 uchar_t cksum;
2180 int detached = 0;
2181 int error = 0;
2182 int foldcase;
2183 int count = 0;
2184 size_t u16l = 0, u8l = 0;
2185 char *outbuf;
2186 size_t ret, inlen, outlen;
2187
2188 foldcase = (fsp->pcfs_flags & PCFS_FOLDCASE);
2189 lfn_base = kmem_alloc(PCMAXNAM_UTF16, KM_SLEEP);
2190 lfn = lfn_base + PCMAXNAM_UTF16 - sizeof (uint16_t);
2191 *lfn = '\0';
2192 *(lfn + 1) = '\0';
2193 cksum = lep->pcdl_checksum;
2194
2195 buf = kmem_alloc(PCMAXNAM_UTF16, KM_SLEEP);
2196 for (i = (lep->pcdl_ordinal & ~0xc0); i > 0; i--) {
2197 /* read next block if necessary */
2198 boff = pc_blkoff(fsp, *offset);
2199 if (boff == 0 || *bp == NULL || boff >= (*bp)->b_bcount) {
2200 if (*bp != NULL) {
2201 brelse(*bp);
2202 *bp = NULL;
2203 }
2204 error = pc_blkatoff(pcp, *offset, bp, &ep);
2205 if (error) {
2206 kmem_free(lfn_base, PCMAXNAM_UTF16);
2207 kmem_free(buf, PCMAXNAM_UTF16);
2208 return (error);
2209 }
2210 lep = (struct pcdir_lfn *)ep;
2211 }
2212 /* can this happen? Bad fs? */
2213 if (!PCDL_IS_LFN((struct pcdir *)lep)) {
2214 detached = 1;
2215 break;
2216 }
2217 if (cksum != lep->pcdl_checksum)
2218 detached = 1;
2219 /* process current entry */
2220 cs = get_long_fn_chunk(lep, buf);
2221 count += cs;
2222 for (; cs > 0; cs--) {
2223 /* see if we underflow */
2224 if (lfn >= lfn_base)
2225 *--lfn = buf[cs - 1];
2226 else
2227 detached = 1;
2228 }
2229 lep++;
2230 *offset += sizeof (struct pcdir);
2231 }
2232 kmem_free(buf, PCMAXNAM_UTF16);
2233 /* read next block if necessary */
2234 boff = pc_blkoff(fsp, *offset);
2235 ep = (struct pcdir *)lep;
2236 if (boff == 0 || *bp == NULL || boff >= (*bp)->b_bcount) {
2237 if (*bp != NULL) {
2238 brelse(*bp);
2239 *bp = NULL;
2240 }
2241 error = pc_blkatoff(pcp, *offset, bp, &ep);
2242 if (error) {
2243 kmem_free(lfn_base, PCMAXNAM_UTF16);
2244 return (error);
2245 }
2246 }
2247 /* should be on the short one */
2248 if (PCDL_IS_LFN(ep) || ((ep->pcd_filename[0] == PCD_UNUSED) ||
2249 (ep->pcd_filename[0] == PCD_ERASED))) {
2250 detached = 1;
2251 }
2252 if (detached ||
2253 (cksum != pc_checksum_long_fn(ep->pcd_filename, ep->pcd_ext)) ||
2254 !pc_valid_long_fn(lfn, 0)) {
2255 /*
2256 * process current entry again. This may end up another lfn
2257 * or a short name.
2258 */
2259 *epp = ep;
2260 kmem_free(lfn_base, PCMAXNAM_UTF16);
2261 return (EINVAL);
2262 }
2263 if (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) {
2264 /*
2265 * Don't display label because it may contain
2266 * funny characters.
2267 */
2268 *offset += sizeof (struct pcdir);
2269 ep++;
2270 *epp = ep;
2271 kmem_free(lfn_base, PCMAXNAM_UTF16);
2272 return (EINVAL);
2273 }
2274 if (namep) {
2275 u16l = count / 2;
2276 u8l = PCMAXNAMLEN;
2277 error = uconv_u16tou8((const uint16_t *)lfn, &u16l,
2278 (uchar_t *)namep, &u8l, UCONV_IN_LITTLE_ENDIAN);
2279 /*
2280 * uconv_u16tou8() will catch conversion errors including
2281 * the case where there is not enough room to write the
2282 * converted result and the u8l will never go over the given
2283 * PCMAXNAMLEN.
2284 */
2285 if (error != 0) {
2286 kmem_free(lfn_base, PCMAXNAM_UTF16);
2287 return (EINVAL);
2288 }
2289 namep[u8l] = '\0';
2290 if (foldcase) {
2291 inlen = strlen(namep);
2292 outlen = PCMAXNAMLEN;
2293 outbuf = kmem_alloc(PCMAXNAMLEN + 1, KM_SLEEP);
2294 ret = u8_textprep_str(namep, &inlen, outbuf,
2295 &outlen, U8_TEXTPREP_TOLOWER, U8_UNICODE_LATEST,
2296 &error);
2297 if (ret == -1) {
2298 kmem_free(outbuf, PCMAXNAMLEN + 1);
2299 kmem_free(lfn_base, PCMAXNAM_UTF16);
2300 return (EINVAL);
2301 }
2302 outbuf[PCMAXNAMLEN - outlen] = '\0';
2303 (void) strncpy(namep, outbuf, PCMAXNAMLEN + 1);
2304 kmem_free(outbuf, PCMAXNAMLEN + 1);
2305 }
2306 }
2307 kmem_free(lfn_base, PCMAXNAM_UTF16);
2308 *epp = ep;
2309 return (0);
2310 }
2311 /*
2312 * Read a long filename into the pc_dirent structure and copy it out.
2313 */
2314 int
pc_read_long_fn(struct vnode * dvp,struct uio * uiop,struct pc_dirent * ld,struct pcdir ** epp,offset_t * offset,struct buf ** bp)2315 pc_read_long_fn(struct vnode *dvp, struct uio *uiop, struct pc_dirent *ld,
2316 struct pcdir **epp, offset_t *offset, struct buf **bp)
2317 {
2318 struct pcdir *ep;
2319 struct pcnode *pcp = VTOPC(dvp);
2320 struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2321 offset_t uiooffset = uiop->uio_loffset;
2322 int error = 0;
2323 offset_t oldoffset;
2324
2325 oldoffset = *offset;
2326 error = pc_extract_long_fn(pcp, ld->d_name, epp, offset, bp);
2327 if (error) {
2328 if (error == EINVAL) {
2329 uiop->uio_loffset += *offset - oldoffset;
2330 return (0);
2331 } else
2332 return (error);
2333 }
2334
2335 ep = *epp;
2336 uiop->uio_loffset += *offset - oldoffset;
2337 ld->d_reclen = DIRENT64_RECLEN(strlen(ld->d_name));
2338 if (ld->d_reclen > uiop->uio_resid) {
2339 uiop->uio_loffset = uiooffset;
2340 return (ENOSPC);
2341 }
2342 ld->d_off = uiop->uio_loffset + sizeof (struct pcdir);
2343 ld->d_ino = pc_makenodeid(pc_daddrdb(fsp, (*bp)->b_blkno),
2344 pc_blkoff(fsp, *offset), ep->pcd_attr,
2345 pc_getstartcluster(fsp, ep), pc_direntpersec(fsp));
2346 (void) uiomove((caddr_t)ld, ld->d_reclen, UIO_READ, uiop);
2347 uiop->uio_loffset = ld->d_off;
2348 *offset += sizeof (struct pcdir);
2349 ep++;
2350 *epp = ep;
2351 return (0);
2352 }
2353
2354 /*
2355 * Read a short filename into the pc_dirent structure and copy it out.
2356 */
2357 int
pc_read_short_fn(struct vnode * dvp,struct uio * uiop,struct pc_dirent * ld,struct pcdir ** epp,offset_t * offset,struct buf ** bp)2358 pc_read_short_fn(struct vnode *dvp, struct uio *uiop, struct pc_dirent *ld,
2359 struct pcdir **epp, offset_t *offset, struct buf **bp)
2360 {
2361 struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2362 int boff = pc_blkoff(fsp, *offset);
2363 struct pcdir *ep = *epp;
2364 offset_t oldoffset = uiop->uio_loffset;
2365 int error;
2366 int foldcase;
2367
2368 if (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) {
2369 uiop->uio_loffset += sizeof (struct pcdir);
2370 *offset += sizeof (struct pcdir);
2371 ep++;
2372 *epp = ep;
2373 return (0);
2374 }
2375 ld->d_ino = (ino64_t)pc_makenodeid(pc_daddrdb(fsp, (*bp)->b_blkno),
2376 boff, ep->pcd_attr, pc_getstartcluster(fsp, ep),
2377 pc_direntpersec(fsp));
2378 foldcase = (fsp->pcfs_flags & PCFS_FOLDCASE);
2379 error = pc_fname_ext_to_name(&ld->d_name[0], &ep->pcd_filename[0],
2380 &ep->pcd_ext[0], foldcase);
2381 if (error == 0) {
2382 ld->d_reclen = DIRENT64_RECLEN(strlen(ld->d_name));
2383 if (ld->d_reclen > uiop->uio_resid) {
2384 uiop->uio_loffset = oldoffset;
2385 return (ENOSPC);
2386 }
2387 ld->d_off = (off64_t)(uiop->uio_loffset +
2388 sizeof (struct pcdir));
2389 (void) uiomove((caddr_t)ld,
2390 ld->d_reclen, UIO_READ, uiop);
2391 uiop->uio_loffset = ld->d_off;
2392 } else {
2393 uiop->uio_loffset += sizeof (struct pcdir);
2394 }
2395 *offset += sizeof (struct pcdir);
2396 ep++;
2397 *epp = ep;
2398 return (0);
2399 }
2400
2401 /* ARGSUSED */
2402 static int
pcfs_fid(struct vnode * vp,struct fid * fidp,caller_context_t * ct)2403 pcfs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
2404 {
2405 struct pc_fid *pcfid;
2406 struct pcnode *pcp;
2407 struct pcfs *fsp;
2408 int error;
2409
2410 fsp = VFSTOPCFS(vp->v_vfsp);
2411 if (fsp == NULL)
2412 return (EIO);
2413 error = pc_lockfs(fsp, 0, 0);
2414 if (error)
2415 return (error);
2416 if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
2417 pc_unlockfs(fsp);
2418 return (EIO);
2419 }
2420 if (fidp->fid_len < (sizeof (struct pc_fid) - sizeof (ushort_t))) {
2421 fidp->fid_len = sizeof (struct pc_fid) - sizeof (ushort_t);
2422 pc_unlockfs(fsp);
2423 return (ENOSPC);
2424 }
2425
2426 pcfid = (struct pc_fid *)fidp;
2427 bzero(pcfid, sizeof (struct pc_fid));
2428 pcfid->pcfid_len = sizeof (struct pc_fid) - sizeof (ushort_t);
2429 if (vp->v_flag & VROOT) {
2430 pcfid->pcfid_block = 0;
2431 pcfid->pcfid_offset = 0;
2432 pcfid->pcfid_ctime = 0;
2433 } else {
2434 pcfid->pcfid_block = pcp->pc_eblkno;
2435 pcfid->pcfid_offset = pcp->pc_eoffset;
2436 pcfid->pcfid_ctime = pcp->pc_entry.pcd_crtime.pct_time;
2437 }
2438 pc_unlockfs(fsp);
2439 return (0);
2440 }
2441