1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
22 /* All Rights Reserved */
23
24
25 /*
26 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
27 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
28 * Copyright 2015 Joyent, Inc.
29 */
30
31 /*
32 * Generic vnode operations.
33 */
34 #include <sys/types.h>
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/errno.h>
38 #include <sys/fcntl.h>
39 #include <sys/flock.h>
40 #include <sys/statvfs.h>
41 #include <sys/vfs.h>
42 #include <sys/vnode.h>
43 #include <sys/proc.h>
44 #include <sys/user.h>
45 #include <sys/unistd.h>
46 #include <sys/cred.h>
47 #include <sys/poll.h>
48 #include <sys/debug.h>
49 #include <sys/cmn_err.h>
50 #include <sys/stream.h>
51 #include <fs/fs_subr.h>
52 #include <fs/fs_reparse.h>
53 #include <sys/door.h>
54 #include <sys/acl.h>
55 #include <sys/share.h>
56 #include <sys/file.h>
57 #include <sys/kmem.h>
58 #include <sys/file.h>
59 #include <sys/nbmlock.h>
60 #include <acl/acl_common.h>
61 #include <sys/pathname.h>
62
63 static callb_cpr_t *frlock_serialize_blocked(flk_cb_when_t, void *);
64
65 /*
66 * Tunable to limit the number of retry to recover from STALE error.
67 */
68 int fs_estale_retry = 5;
69
70 /*
71 * supports for reparse point door upcall
72 */
73 static door_handle_t reparsed_door;
74 static kmutex_t reparsed_door_lock;
75
76 /*
77 * The associated operation is not supported by the file system.
78 */
79 int
fs_nosys()80 fs_nosys()
81 {
82 return (ENOSYS);
83 }
84
85 /*
86 * The associated operation is invalid (on this vnode).
87 */
88 int
fs_inval()89 fs_inval()
90 {
91 return (EINVAL);
92 }
93
94 /*
95 * The associated operation is valid only for directories.
96 */
97 int
fs_notdir()98 fs_notdir()
99 {
100 return (ENOTDIR);
101 }
102
103 /*
104 * Free the file system specific resources. For the file systems that
105 * do not support the forced unmount, it will be a nop function.
106 */
107
108 /*ARGSUSED*/
109 void
fs_freevfs(vfs_t * vfsp)110 fs_freevfs(vfs_t *vfsp)
111 {
112 }
113
114 /* ARGSUSED */
115 int
fs_nosys_map(struct vnode * vp,offset_t off,struct as * as,caddr_t * addrp,size_t len,uchar_t prot,uchar_t maxprot,uint_t flags,struct cred * cr,caller_context_t * ct)116 fs_nosys_map(struct vnode *vp, offset_t off, struct as *as, caddr_t *addrp,
117 size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, struct cred *cr,
118 caller_context_t *ct)
119 {
120 return (ENOSYS);
121 }
122
123 /* ARGSUSED */
124 int
fs_nosys_addmap(struct vnode * vp,offset_t off,struct as * as,caddr_t addr,size_t len,uchar_t prot,uchar_t maxprot,uint_t flags,struct cred * cr,caller_context_t * ct)125 fs_nosys_addmap(struct vnode *vp, offset_t off, struct as *as, caddr_t addr,
126 size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, struct cred *cr,
127 caller_context_t *ct)
128 {
129 return (ENOSYS);
130 }
131
132 /* ARGSUSED */
133 int
fs_nosys_poll(vnode_t * vp,short events,int anyyet,short * reventsp,struct pollhead ** phpp,caller_context_t * ct)134 fs_nosys_poll(vnode_t *vp, short events, int anyyet, short *reventsp,
135 struct pollhead **phpp, caller_context_t *ct)
136 {
137 return (ENOSYS);
138 }
139
140
141 /*
142 * The file system has nothing to sync to disk. However, the
143 * VFS_SYNC operation must not fail.
144 */
145 /* ARGSUSED */
146 int
fs_sync(struct vfs * vfspp,short flag,cred_t * cr)147 fs_sync(struct vfs *vfspp, short flag, cred_t *cr)
148 {
149 return (0);
150 }
151
152 /*
153 * Does nothing but VOP_FSYNC must not fail.
154 */
155 /* ARGSUSED */
156 int
fs_fsync(vnode_t * vp,int syncflag,cred_t * cr,caller_context_t * ct)157 fs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
158 {
159 return (0);
160 }
161
162 /*
163 * Does nothing but VOP_PUTPAGE must not fail.
164 */
165 /* ARGSUSED */
166 int
fs_putpage(vnode_t * vp,offset_t off,size_t len,int flags,cred_t * cr,caller_context_t * ctp)167 fs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr,
168 caller_context_t *ctp)
169 {
170 return (0);
171 }
172
173 /*
174 * Does nothing but VOP_IOCTL must not fail.
175 */
176 /* ARGSUSED */
177 int
fs_ioctl(vnode_t * vp,int com,intptr_t data,int flag,cred_t * cred,int * rvalp)178 fs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred,
179 int *rvalp)
180 {
181 return (0);
182 }
183
184 /*
185 * Read/write lock/unlock. Does nothing.
186 */
187 /* ARGSUSED */
188 int
fs_rwlock(vnode_t * vp,int write_lock,caller_context_t * ctp)189 fs_rwlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
190 {
191 return (-1);
192 }
193
194 /* ARGSUSED */
195 void
fs_rwunlock(vnode_t * vp,int write_lock,caller_context_t * ctp)196 fs_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
197 {
198 }
199
200 /*
201 * Compare two vnodes.
202 */
203 /*ARGSUSED2*/
204 int
fs_cmp(vnode_t * vp1,vnode_t * vp2,caller_context_t * ct)205 fs_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
206 {
207 return (vp1 == vp2);
208 }
209
210 /*
211 * No-op seek operation.
212 */
213 /* ARGSUSED */
214 int
fs_seek(vnode_t * vp,offset_t ooff,offset_t * noffp,caller_context_t * ct)215 fs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
216 {
217 return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
218 }
219
220 /*
221 * File and record locking.
222 */
223 /* ARGSUSED */
224 int
fs_frlock(vnode_t * vp,int cmd,struct flock64 * bfp,int flag,offset_t offset,flk_callback_t * flk_cbp,cred_t * cr,caller_context_t * ct)225 fs_frlock(vnode_t *vp, int cmd, struct flock64 *bfp, int flag, offset_t offset,
226 flk_callback_t *flk_cbp, cred_t *cr, caller_context_t *ct)
227 {
228 int frcmd;
229 int nlmid;
230 int error = 0;
231 boolean_t skip_lock = B_FALSE;
232 flk_callback_t serialize_callback;
233 int serialize = 0;
234 v_mode_t mode;
235
236 switch (cmd) {
237
238 case F_GETLK:
239 case F_O_GETLK:
240 if (flag & F_REMOTELOCK) {
241 frcmd = RCMDLCK;
242 } else if (flag & F_PXFSLOCK) {
243 frcmd = PCMDLCK;
244 } else {
245 frcmd = 0;
246 bfp->l_pid = ttoproc(curthread)->p_pid;
247 bfp->l_sysid = 0;
248 }
249 break;
250
251 case F_OFD_GETLK:
252 /*
253 * TBD we do not support remote OFD locks at this time.
254 */
255 if (flag & (F_REMOTELOCK | F_PXFSLOCK)) {
256 error = EINVAL;
257 goto done;
258 }
259 skip_lock = B_TRUE;
260 break;
261
262 case F_SETLK_NBMAND:
263 /*
264 * Are NBMAND locks allowed on this file?
265 */
266 if (!vp->v_vfsp ||
267 !(vp->v_vfsp->vfs_flag & VFS_NBMAND)) {
268 error = EINVAL;
269 goto done;
270 }
271 if (vp->v_type != VREG) {
272 error = EINVAL;
273 goto done;
274 }
275 /*FALLTHROUGH*/
276
277 case F_SETLK:
278 if (flag & F_REMOTELOCK) {
279 frcmd = SETFLCK|RCMDLCK;
280 } else if (flag & F_PXFSLOCK) {
281 frcmd = SETFLCK|PCMDLCK;
282 } else {
283 frcmd = SETFLCK;
284 bfp->l_pid = ttoproc(curthread)->p_pid;
285 bfp->l_sysid = 0;
286 }
287 if (cmd == F_SETLK_NBMAND &&
288 (bfp->l_type == F_RDLCK || bfp->l_type == F_WRLCK)) {
289 frcmd |= NBMLCK;
290 }
291
292 if (nbl_need_check(vp)) {
293 nbl_start_crit(vp, RW_WRITER);
294 serialize = 1;
295 if (frcmd & NBMLCK) {
296 mode = (bfp->l_type == F_RDLCK) ?
297 V_READ : V_RDANDWR;
298 if (vn_is_mapped(vp, mode)) {
299 error = EAGAIN;
300 goto done;
301 }
302 }
303 }
304 break;
305
306 case F_SETLKW:
307 if (flag & F_REMOTELOCK) {
308 frcmd = SETFLCK|SLPFLCK|RCMDLCK;
309 } else if (flag & F_PXFSLOCK) {
310 frcmd = SETFLCK|SLPFLCK|PCMDLCK;
311 } else {
312 frcmd = SETFLCK|SLPFLCK;
313 bfp->l_pid = ttoproc(curthread)->p_pid;
314 bfp->l_sysid = 0;
315 }
316
317 if (nbl_need_check(vp)) {
318 nbl_start_crit(vp, RW_WRITER);
319 serialize = 1;
320 }
321 break;
322
323 case F_OFD_SETLK:
324 case F_OFD_SETLKW:
325 case F_FLOCK:
326 case F_FLOCKW:
327 /*
328 * TBD we do not support remote OFD locks at this time.
329 */
330 if (flag & (F_REMOTELOCK | F_PXFSLOCK)) {
331 error = EINVAL;
332 goto done;
333 }
334 skip_lock = B_TRUE;
335 break;
336
337 case F_HASREMOTELOCKS:
338 nlmid = GETNLMID(bfp->l_sysid);
339 if (nlmid != 0) { /* booted as a cluster */
340 l_has_rmt(bfp) =
341 cl_flk_has_remote_locks_for_nlmid(vp, nlmid);
342 } else { /* not booted as a cluster */
343 l_has_rmt(bfp) = flk_has_remote_locks(vp);
344 }
345
346 goto done;
347
348 default:
349 error = EINVAL;
350 goto done;
351 }
352
353 /*
354 * If this is a blocking lock request and we're serializing lock
355 * requests, modify the callback list to leave the critical region
356 * while we're waiting for the lock.
357 */
358
359 if (serialize && (frcmd & SLPFLCK) != 0) {
360 flk_add_callback(&serialize_callback,
361 frlock_serialize_blocked, vp, flk_cbp);
362 flk_cbp = &serialize_callback;
363 }
364
365 if (!skip_lock)
366 error = reclock(vp, bfp, frcmd, flag, offset, flk_cbp);
367
368 if (serialize && (frcmd & SLPFLCK) != 0)
369 flk_del_callback(&serialize_callback);
370
371 done:
372 if (serialize)
373 nbl_end_crit(vp);
374
375 return (error);
376 }
377
378 /*
379 * Callback when a lock request blocks and we are serializing requests. If
380 * before sleeping, leave the critical region. If after wakeup, reenter
381 * the critical region.
382 */
383
384 static callb_cpr_t *
frlock_serialize_blocked(flk_cb_when_t when,void * infop)385 frlock_serialize_blocked(flk_cb_when_t when, void *infop)
386 {
387 vnode_t *vp = (vnode_t *)infop;
388
389 if (when == FLK_BEFORE_SLEEP)
390 nbl_end_crit(vp);
391 else {
392 nbl_start_crit(vp, RW_WRITER);
393 }
394
395 return (NULL);
396 }
397
398 /*
399 * Allow any flags.
400 */
401 /* ARGSUSED */
402 int
fs_setfl(vnode_t * vp,int oflags,int nflags,cred_t * cr,caller_context_t * ct)403 fs_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr, caller_context_t *ct)
404 {
405 return (0);
406 }
407
408 /*
409 * Return the answer requested to poll() for non-device files.
410 * Only POLLIN, POLLRDNORM, and POLLOUT are recognized.
411 */
412 struct pollhead fs_pollhd;
413
414 /* ARGSUSED */
415 int
fs_poll(vnode_t * vp,short events,int anyyet,short * reventsp,struct pollhead ** phpp,caller_context_t * ct)416 fs_poll(vnode_t *vp, short events, int anyyet, short *reventsp,
417 struct pollhead **phpp, caller_context_t *ct)
418 {
419 *reventsp = 0;
420 if (events & POLLIN)
421 *reventsp |= POLLIN;
422 if (events & POLLRDNORM)
423 *reventsp |= POLLRDNORM;
424 if (events & POLLRDBAND)
425 *reventsp |= POLLRDBAND;
426 if (events & POLLOUT)
427 *reventsp |= POLLOUT;
428 if (events & POLLWRBAND)
429 *reventsp |= POLLWRBAND;
430 *phpp = !anyyet && !*reventsp ? &fs_pollhd : (struct pollhead *)NULL;
431 return (0);
432 }
433
434 /*
435 * POSIX pathconf() support.
436 */
437 /* ARGSUSED */
438 int
fs_pathconf(vnode_t * vp,int cmd,ulong_t * valp,cred_t * cr,caller_context_t * ct)439 fs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
440 caller_context_t *ct)
441 {
442 ulong_t val;
443 int error = 0;
444 struct statvfs64 vfsbuf;
445
446 switch (cmd) {
447
448 case _PC_LINK_MAX:
449 val = MAXLINK;
450 break;
451
452 case _PC_MAX_CANON:
453 val = MAX_CANON;
454 break;
455
456 case _PC_MAX_INPUT:
457 val = MAX_INPUT;
458 break;
459
460 case _PC_NAME_MAX:
461 bzero(&vfsbuf, sizeof (vfsbuf));
462 if (error = VFS_STATVFS(vp->v_vfsp, &vfsbuf))
463 break;
464 val = vfsbuf.f_namemax;
465 break;
466
467 case _PC_PATH_MAX:
468 case _PC_SYMLINK_MAX:
469 val = MAXPATHLEN;
470 break;
471
472 case _PC_PIPE_BUF:
473 val = PIPE_BUF;
474 break;
475
476 case _PC_NO_TRUNC:
477 if (vp->v_vfsp->vfs_flag & VFS_NOTRUNC)
478 val = 1; /* NOTRUNC is enabled for vp */
479 else
480 val = (ulong_t)-1;
481 break;
482
483 case _PC_VDISABLE:
484 val = _POSIX_VDISABLE;
485 break;
486
487 case _PC_CHOWN_RESTRICTED:
488 if (rstchown)
489 val = rstchown; /* chown restricted enabled */
490 else
491 val = (ulong_t)-1;
492 break;
493
494 case _PC_FILESIZEBITS:
495
496 /*
497 * If ever we come here it means that underlying file system
498 * does not recognise the command and therefore this
499 * configurable limit cannot be determined. We return -1
500 * and don't change errno.
501 */
502
503 val = (ulong_t)-1; /* large file support */
504 break;
505
506 case _PC_ACL_ENABLED:
507 val = 0;
508 break;
509
510 case _PC_CASE_BEHAVIOR:
511 val = _CASE_SENSITIVE;
512 if (vfs_has_feature(vp->v_vfsp, VFSFT_CASEINSENSITIVE) == 1)
513 val |= _CASE_INSENSITIVE;
514 if (vfs_has_feature(vp->v_vfsp, VFSFT_NOCASESENSITIVE) == 1)
515 val &= ~_CASE_SENSITIVE;
516 break;
517
518 case _PC_SATTR_ENABLED:
519 case _PC_SATTR_EXISTS:
520 val = 0;
521 break;
522
523 case _PC_ACCESS_FILTERING:
524 val = 0;
525 break;
526
527 default:
528 error = EINVAL;
529 break;
530 }
531
532 if (error == 0)
533 *valp = val;
534 return (error);
535 }
536
537 /*
538 * Dispose of a page.
539 */
540 /* ARGSUSED */
541 void
fs_dispose(struct vnode * vp,page_t * pp,int fl,int dn,struct cred * cr,caller_context_t * ct)542 fs_dispose(struct vnode *vp, page_t *pp, int fl, int dn, struct cred *cr,
543 caller_context_t *ct)
544 {
545
546 ASSERT(fl == B_FREE || fl == B_INVAL);
547
548 if (fl == B_FREE)
549 page_free(pp, dn);
550 else
551 page_destroy(pp, dn);
552 }
553
554 /* ARGSUSED */
555 void
fs_nodispose(struct vnode * vp,page_t * pp,int fl,int dn,struct cred * cr,caller_context_t * ct)556 fs_nodispose(struct vnode *vp, page_t *pp, int fl, int dn, struct cred *cr,
557 caller_context_t *ct)
558 {
559 cmn_err(CE_PANIC, "fs_nodispose invoked");
560 }
561
562 /*
563 * fabricate acls for file systems that do not support acls.
564 */
565 /* ARGSUSED */
566 int
fs_fab_acl(vnode_t * vp,vsecattr_t * vsecattr,int flag,cred_t * cr,caller_context_t * ct)567 fs_fab_acl(vnode_t *vp, vsecattr_t *vsecattr, int flag, cred_t *cr,
568 caller_context_t *ct)
569 {
570 aclent_t *aclentp;
571 struct vattr vattr;
572 int error;
573 size_t aclsize;
574
575 vsecattr->vsa_aclcnt = 0;
576 vsecattr->vsa_aclentsz = 0;
577 vsecattr->vsa_aclentp = NULL;
578 vsecattr->vsa_dfaclcnt = 0; /* Default ACLs are not fabricated */
579 vsecattr->vsa_dfaclentp = NULL;
580
581 vattr.va_mask = AT_MODE | AT_UID | AT_GID;
582 if (error = VOP_GETATTR(vp, &vattr, 0, cr, ct))
583 return (error);
584
585 if (vsecattr->vsa_mask & (VSA_ACLCNT | VSA_ACL)) {
586 aclsize = 4 * sizeof (aclent_t);
587 vsecattr->vsa_aclcnt = 4; /* USER, GROUP, OTHER, and CLASS */
588 vsecattr->vsa_aclentp = kmem_zalloc(aclsize, KM_SLEEP);
589 aclentp = vsecattr->vsa_aclentp;
590
591 aclentp->a_type = USER_OBJ; /* Owner */
592 aclentp->a_perm = ((ushort_t)(vattr.va_mode & 0700)) >> 6;
593 aclentp->a_id = vattr.va_uid; /* Really undefined */
594 aclentp++;
595
596 aclentp->a_type = GROUP_OBJ; /* Group */
597 aclentp->a_perm = ((ushort_t)(vattr.va_mode & 0070)) >> 3;
598 aclentp->a_id = vattr.va_gid; /* Really undefined */
599 aclentp++;
600
601 aclentp->a_type = OTHER_OBJ; /* Other */
602 aclentp->a_perm = vattr.va_mode & 0007;
603 aclentp->a_id = (gid_t)-1; /* Really undefined */
604 aclentp++;
605
606 aclentp->a_type = CLASS_OBJ; /* Class */
607 aclentp->a_perm = (ushort_t)(0007);
608 aclentp->a_id = (gid_t)-1; /* Really undefined */
609 } else if (vsecattr->vsa_mask & (VSA_ACECNT | VSA_ACE)) {
610 VERIFY(0 == acl_trivial_create(vattr.va_mode,
611 (vp->v_type == VDIR), (ace_t **)&vsecattr->vsa_aclentp,
612 &vsecattr->vsa_aclcnt));
613 vsecattr->vsa_aclentsz = vsecattr->vsa_aclcnt * sizeof (ace_t);
614 }
615
616 return (error);
617 }
618
619 /*
620 * Common code for implementing DOS share reservations
621 */
622 /* ARGSUSED4 */
623 int
fs_shrlock(struct vnode * vp,int cmd,struct shrlock * shr,int flag,cred_t * cr,caller_context_t * ct)624 fs_shrlock(struct vnode *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr,
625 caller_context_t *ct)
626 {
627 int error;
628
629 /*
630 * Make sure that the file was opened with permissions appropriate
631 * for the request, and make sure the caller isn't trying to sneak
632 * in an NBMAND request.
633 */
634 if (cmd == F_SHARE) {
635 if (((shr->s_access & F_RDACC) && (flag & FREAD) == 0) ||
636 ((shr->s_access & F_WRACC) && (flag & FWRITE) == 0))
637 return (EBADF);
638 if (shr->s_access & (F_RMACC | F_MDACC))
639 return (EINVAL);
640 if (shr->s_deny & (F_MANDDNY | F_RMDNY))
641 return (EINVAL);
642 }
643 if (cmd == F_SHARE_NBMAND) {
644 /* make sure nbmand is allowed on the file */
645 if (!vp->v_vfsp ||
646 !(vp->v_vfsp->vfs_flag & VFS_NBMAND)) {
647 return (EINVAL);
648 }
649 if (vp->v_type != VREG) {
650 return (EINVAL);
651 }
652 }
653
654 nbl_start_crit(vp, RW_WRITER);
655
656 switch (cmd) {
657
658 case F_SHARE_NBMAND:
659 shr->s_deny |= F_MANDDNY;
660 /*FALLTHROUGH*/
661 case F_SHARE:
662 error = add_share(vp, shr);
663 break;
664
665 case F_UNSHARE:
666 error = del_share(vp, shr);
667 break;
668
669 case F_HASREMOTELOCKS:
670 /*
671 * We are overloading this command to refer to remote
672 * shares as well as remote locks, despite its name.
673 */
674 shr->s_access = shr_has_remote_shares(vp, shr->s_sysid);
675 error = 0;
676 break;
677
678 default:
679 error = EINVAL;
680 break;
681 }
682
683 nbl_end_crit(vp);
684 return (error);
685 }
686
687 /*ARGSUSED1*/
688 int
fs_vnevent_nosupport(vnode_t * vp,vnevent_t e,vnode_t * dvp,char * fnm,caller_context_t * ct)689 fs_vnevent_nosupport(vnode_t *vp, vnevent_t e, vnode_t *dvp, char *fnm,
690 caller_context_t *ct)
691 {
692 ASSERT(vp != NULL);
693 return (ENOTSUP);
694 }
695
696 /*ARGSUSED1*/
697 int
fs_vnevent_support(vnode_t * vp,vnevent_t e,vnode_t * dvp,char * fnm,caller_context_t * ct)698 fs_vnevent_support(vnode_t *vp, vnevent_t e, vnode_t *dvp, char *fnm,
699 caller_context_t *ct)
700 {
701 ASSERT(vp != NULL);
702 return (0);
703 }
704
705 /*
706 * return 1 for non-trivial ACL.
707 *
708 * NB: It is not necessary for the caller to VOP_RWLOCK since
709 * we only issue VOP_GETSECATTR.
710 *
711 * Returns 0 == trivial
712 * 1 == NOT Trivial
713 * <0 could not determine.
714 */
715 int
fs_acl_nontrivial(vnode_t * vp,cred_t * cr)716 fs_acl_nontrivial(vnode_t *vp, cred_t *cr)
717 {
718 ulong_t acl_styles;
719 ulong_t acl_flavor;
720 vsecattr_t vsecattr;
721 int error;
722 int isnontrivial;
723
724 /* determine the forms of ACLs maintained */
725 error = VOP_PATHCONF(vp, _PC_ACL_ENABLED, &acl_styles, cr, NULL);
726
727 /* clear bits we don't understand and establish default acl_style */
728 acl_styles &= (_ACL_ACLENT_ENABLED | _ACL_ACE_ENABLED);
729 if (error || (acl_styles == 0))
730 acl_styles = _ACL_ACLENT_ENABLED;
731
732 vsecattr.vsa_aclentp = NULL;
733 vsecattr.vsa_dfaclentp = NULL;
734 vsecattr.vsa_aclcnt = 0;
735 vsecattr.vsa_dfaclcnt = 0;
736
737 while (acl_styles) {
738 /* select one of the styles as current flavor */
739 acl_flavor = 0;
740 if (acl_styles & _ACL_ACLENT_ENABLED) {
741 acl_flavor = _ACL_ACLENT_ENABLED;
742 vsecattr.vsa_mask = VSA_ACLCNT | VSA_DFACLCNT;
743 } else if (acl_styles & _ACL_ACE_ENABLED) {
744 acl_flavor = _ACL_ACE_ENABLED;
745 vsecattr.vsa_mask = VSA_ACECNT | VSA_ACE;
746 }
747
748 ASSERT(vsecattr.vsa_mask && acl_flavor);
749 error = VOP_GETSECATTR(vp, &vsecattr, 0, cr, NULL);
750 if (error == 0)
751 break;
752
753 /* that flavor failed */
754 acl_styles &= ~acl_flavor;
755 }
756
757 /* if all styles fail then assume trivial */
758 if (acl_styles == 0)
759 return (0);
760
761 /* process the flavor that worked */
762 isnontrivial = 0;
763 if (acl_flavor & _ACL_ACLENT_ENABLED) {
764 if (vsecattr.vsa_aclcnt > MIN_ACL_ENTRIES)
765 isnontrivial = 1;
766 if (vsecattr.vsa_aclcnt && vsecattr.vsa_aclentp != NULL)
767 kmem_free(vsecattr.vsa_aclentp,
768 vsecattr.vsa_aclcnt * sizeof (aclent_t));
769 if (vsecattr.vsa_dfaclcnt && vsecattr.vsa_dfaclentp != NULL)
770 kmem_free(vsecattr.vsa_dfaclentp,
771 vsecattr.vsa_dfaclcnt * sizeof (aclent_t));
772 }
773 if (acl_flavor & _ACL_ACE_ENABLED) {
774 isnontrivial = ace_trivial(vsecattr.vsa_aclentp,
775 vsecattr.vsa_aclcnt);
776
777 if (vsecattr.vsa_aclcnt && vsecattr.vsa_aclentp != NULL)
778 kmem_free(vsecattr.vsa_aclentp,
779 vsecattr.vsa_aclcnt * sizeof (ace_t));
780 /* ACE has no vsecattr.vsa_dfaclcnt */
781 }
782 return (isnontrivial);
783 }
784
785 /*
786 * Check whether we need a retry to recover from STALE error.
787 */
788 int
fs_need_estale_retry(int retry_count)789 fs_need_estale_retry(int retry_count)
790 {
791 if (retry_count < fs_estale_retry)
792 return (1);
793 else
794 return (0);
795 }
796
797
798 static int (*fs_av_scan)(vnode_t *, cred_t *, int) = NULL;
799
800 /*
801 * Routine for anti-virus scanner to call to register its scanning routine.
802 */
803 void
fs_vscan_register(int (* av_scan)(vnode_t *,cred_t *,int))804 fs_vscan_register(int (*av_scan)(vnode_t *, cred_t *, int))
805 {
806 fs_av_scan = av_scan;
807 }
808
809 /*
810 * Routine for file systems to call to initiate anti-virus scanning.
811 * Scanning will only be done on REGular files (currently).
812 */
813 int
fs_vscan(vnode_t * vp,cred_t * cr,int async)814 fs_vscan(vnode_t *vp, cred_t *cr, int async)
815 {
816 int ret = 0;
817
818 if (fs_av_scan && vp->v_type == VREG)
819 ret = (*fs_av_scan)(vp, cr, async);
820
821 return (ret);
822 }
823
824 /*
825 * support functions for reparse point
826 */
827 /*
828 * reparse_vnode_parse
829 *
830 * Read the symlink data of a reparse point specified by the vnode
831 * and return the reparse data as name-value pair in the nvlist.
832 */
833 int
reparse_vnode_parse(vnode_t * vp,nvlist_t * nvl)834 reparse_vnode_parse(vnode_t *vp, nvlist_t *nvl)
835 {
836 int err;
837 char *lkdata;
838 struct uio uio;
839 struct iovec iov;
840
841 if (vp == NULL || nvl == NULL)
842 return (EINVAL);
843
844 lkdata = kmem_alloc(MAXREPARSELEN, KM_SLEEP);
845
846 /*
847 * Set up io vector to read sym link data
848 */
849 iov.iov_base = lkdata;
850 iov.iov_len = MAXREPARSELEN;
851 uio.uio_iov = &iov;
852 uio.uio_iovcnt = 1;
853 uio.uio_segflg = UIO_SYSSPACE;
854 uio.uio_extflg = UIO_COPY_CACHED;
855 uio.uio_loffset = (offset_t)0;
856 uio.uio_resid = MAXREPARSELEN;
857
858 if ((err = VOP_READLINK(vp, &uio, kcred, NULL)) == 0) {
859 *(lkdata + MAXREPARSELEN - uio.uio_resid) = '\0';
860 err = reparse_parse(lkdata, nvl);
861 }
862 kmem_free(lkdata, MAXREPARSELEN); /* done with lkdata */
863
864 return (err);
865 }
866
867 void
reparse_point_init()868 reparse_point_init()
869 {
870 mutex_init(&reparsed_door_lock, NULL, MUTEX_DEFAULT, NULL);
871 }
872
873 static door_handle_t
reparse_door_get_handle()874 reparse_door_get_handle()
875 {
876 door_handle_t dh;
877
878 mutex_enter(&reparsed_door_lock);
879 if ((dh = reparsed_door) == NULL) {
880 if (door_ki_open(REPARSED_DOOR, &reparsed_door) != 0) {
881 reparsed_door = NULL;
882 dh = NULL;
883 } else
884 dh = reparsed_door;
885 }
886 mutex_exit(&reparsed_door_lock);
887 return (dh);
888 }
889
890 static void
reparse_door_reset_handle()891 reparse_door_reset_handle()
892 {
893 mutex_enter(&reparsed_door_lock);
894 reparsed_door = NULL;
895 mutex_exit(&reparsed_door_lock);
896 }
897
898 /*
899 * reparse_kderef
900 *
901 * Accepts the service-specific item from the reparse point and returns
902 * the service-specific data requested. The caller specifies the size of
903 * the buffer provided via *bufsz; the routine will fail with EOVERFLOW
904 * if the results will not fit in the buffer, in which case, *bufsz will
905 * contain the number of bytes needed to hold the results.
906 *
907 * if ok return 0 and update *bufsize with length of actual result
908 * else return error code.
909 */
910 int
reparse_kderef(const char * svc_type,const char * svc_data,char * buf,size_t * bufsize)911 reparse_kderef(const char *svc_type, const char *svc_data, char *buf,
912 size_t *bufsize)
913 {
914 int err, retries, need_free, retried_doorhd;
915 size_t dlen, res_len;
916 char *darg;
917 door_arg_t door_args;
918 reparsed_door_res_t *resp;
919 door_handle_t rp_door;
920
921 if (svc_type == NULL || svc_data == NULL || buf == NULL ||
922 bufsize == NULL)
923 return (EINVAL);
924
925 /* get reparsed's door handle */
926 if ((rp_door = reparse_door_get_handle()) == NULL)
927 return (EBADF);
928
929 /* setup buffer for door_call args and results */
930 dlen = strlen(svc_type) + strlen(svc_data) + 2;
931 if (*bufsize < dlen) {
932 darg = kmem_alloc(dlen, KM_SLEEP);
933 need_free = 1;
934 } else {
935 darg = buf; /* use same buffer for door's args & results */
936 need_free = 0;
937 }
938
939 /* build argument string of door call */
940 (void) snprintf(darg, dlen, "%s:%s", svc_type, svc_data);
941
942 /* setup args for door call */
943 door_args.data_ptr = darg;
944 door_args.data_size = dlen;
945 door_args.desc_ptr = NULL;
946 door_args.desc_num = 0;
947 door_args.rbuf = buf;
948 door_args.rsize = *bufsize;
949
950 /* do the door_call */
951 retried_doorhd = 0;
952 retries = 0;
953 door_ki_hold(rp_door);
954 while ((err = door_ki_upcall_limited(rp_door, &door_args,
955 NULL, SIZE_MAX, 0)) != 0) {
956 if (err == EAGAIN || err == EINTR) {
957 if (++retries < REPARSED_DOORCALL_MAX_RETRY) {
958 delay(SEC_TO_TICK(1));
959 continue;
960 }
961 } else if (err == EBADF) {
962 /* door server goes away... */
963 reparse_door_reset_handle();
964
965 if (retried_doorhd == 0) {
966 door_ki_rele(rp_door);
967 retried_doorhd++;
968 rp_door = reparse_door_get_handle();
969 if (rp_door != NULL) {
970 door_ki_hold(rp_door);
971 continue;
972 }
973 }
974 }
975 break;
976 }
977
978 if (rp_door)
979 door_ki_rele(rp_door);
980
981 if (need_free)
982 kmem_free(darg, dlen); /* done with args buffer */
983
984 if (err != 0)
985 return (err);
986
987 resp = (reparsed_door_res_t *)door_args.rbuf;
988 if ((err = resp->res_status) == 0) {
989 /*
990 * have to save the length of the results before the
991 * bcopy below since it's can be an overlap copy that
992 * overwrites the reparsed_door_res_t structure at
993 * the beginning of the buffer.
994 */
995 res_len = (size_t)resp->res_len;
996
997 /* deref call is ok */
998 if (res_len > *bufsize)
999 err = EOVERFLOW;
1000 else
1001 bcopy(resp->res_data, buf, res_len);
1002 *bufsize = res_len;
1003 }
1004 if (door_args.rbuf != buf)
1005 kmem_free(door_args.rbuf, door_args.rsize);
1006
1007 return (err);
1008 }
1009