xref: /illumos-gate/usr/src/uts/common/fs/vnode.c (revision 814a60b13c0ad90e5d2edfd29a7a84bbf416cc1a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 /*
31  * University Copyright- Copyright (c) 1982, 1986, 1988
32  * The Regents of the University of California
33  * All Rights Reserved
34  *
35  * University Acknowledgment- Portions of this document are derived from
36  * software developed by the University of California, Berkeley, and its
37  * contributors.
38  */
39 
40 
41 #pragma ident	"%Z%%M%	%I%	%E% SMI"
42 
43 #include <sys/types.h>
44 #include <sys/param.h>
45 #include <sys/t_lock.h>
46 #include <sys/errno.h>
47 #include <sys/cred.h>
48 #include <sys/user.h>
49 #include <sys/uio.h>
50 #include <sys/file.h>
51 #include <sys/pathname.h>
52 #include <sys/vfs.h>
53 #include <sys/vnode.h>
54 #include <sys/rwstlock.h>
55 #include <sys/fem.h>
56 #include <sys/stat.h>
57 #include <sys/mode.h>
58 #include <sys/conf.h>
59 #include <sys/sysmacros.h>
60 #include <sys/cmn_err.h>
61 #include <sys/systm.h>
62 #include <sys/kmem.h>
63 #include <sys/debug.h>
64 #include <c2/audit.h>
65 #include <sys/acl.h>
66 #include <sys/nbmlock.h>
67 #include <sys/fcntl.h>
68 #include <fs/fs_subr.h>
69 
70 /* Determine if this vnode is a file that is read-only */
71 #define	ISROFILE(vp)	\
72 	((vp)->v_type != VCHR && (vp)->v_type != VBLK && \
73 	    (vp)->v_type != VFIFO && vn_is_readonly(vp))
74 
75 /*
76  * Convert stat(2) formats to vnode types and vice versa.  (Knows about
77  * numerical order of S_IFMT and vnode types.)
78  */
79 enum vtype iftovt_tab[] = {
80 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
81 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON
82 };
83 
84 ushort_t vttoif_tab[] = {
85 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO,
86 	S_IFDOOR, 0, S_IFSOCK, S_IFPORT, 0
87 };
88 
89 /*
90  * The system vnode cache.
91  */
92 
93 kmem_cache_t *vn_cache;
94 
95 
96 /*
97  * Vnode operations vector.
98  */
99 
100 static const fs_operation_trans_def_t vn_ops_table[] = {
101 	VOPNAME_OPEN, offsetof(struct vnodeops, vop_open),
102 	    fs_nosys, fs_nosys,
103 
104 	VOPNAME_CLOSE, offsetof(struct vnodeops, vop_close),
105 	    fs_nosys, fs_nosys,
106 
107 	VOPNAME_READ, offsetof(struct vnodeops, vop_read),
108 	    fs_nosys, fs_nosys,
109 
110 	VOPNAME_WRITE, offsetof(struct vnodeops, vop_write),
111 	    fs_nosys, fs_nosys,
112 
113 	VOPNAME_IOCTL, offsetof(struct vnodeops, vop_ioctl),
114 	    fs_nosys, fs_nosys,
115 
116 	VOPNAME_SETFL, offsetof(struct vnodeops, vop_setfl),
117 	    fs_setfl, fs_nosys,
118 
119 	VOPNAME_GETATTR, offsetof(struct vnodeops, vop_getattr),
120 	    fs_nosys, fs_nosys,
121 
122 	VOPNAME_SETATTR, offsetof(struct vnodeops, vop_setattr),
123 	    fs_nosys, fs_nosys,
124 
125 	VOPNAME_ACCESS, offsetof(struct vnodeops, vop_access),
126 	    fs_nosys, fs_nosys,
127 
128 	VOPNAME_LOOKUP, offsetof(struct vnodeops, vop_lookup),
129 	    fs_nosys, fs_nosys,
130 
131 	VOPNAME_CREATE, offsetof(struct vnodeops, vop_create),
132 	    fs_nosys, fs_nosys,
133 
134 	VOPNAME_REMOVE, offsetof(struct vnodeops, vop_remove),
135 	    fs_nosys, fs_nosys,
136 
137 	VOPNAME_LINK, offsetof(struct vnodeops, vop_link),
138 	    fs_nosys, fs_nosys,
139 
140 	VOPNAME_RENAME, offsetof(struct vnodeops, vop_rename),
141 	    fs_nosys, fs_nosys,
142 
143 	VOPNAME_MKDIR, offsetof(struct vnodeops, vop_mkdir),
144 	    fs_nosys, fs_nosys,
145 
146 	VOPNAME_RMDIR, offsetof(struct vnodeops, vop_rmdir),
147 	    fs_nosys, fs_nosys,
148 
149 	VOPNAME_READDIR, offsetof(struct vnodeops, vop_readdir),
150 	    fs_nosys, fs_nosys,
151 
152 	VOPNAME_SYMLINK, offsetof(struct vnodeops, vop_symlink),
153 	    fs_nosys, fs_nosys,
154 
155 	VOPNAME_READLINK, offsetof(struct vnodeops, vop_readlink),
156 	    fs_nosys, fs_nosys,
157 
158 	VOPNAME_FSYNC, offsetof(struct vnodeops, vop_fsync),
159 	    fs_nosys, fs_nosys,
160 
161 	VOPNAME_INACTIVE, offsetof(struct vnodeops, vop_inactive),
162 	    fs_nosys, fs_nosys,
163 
164 	VOPNAME_FID, offsetof(struct vnodeops, vop_fid),
165 	    fs_nosys, fs_nosys,
166 
167 	VOPNAME_RWLOCK, offsetof(struct vnodeops, vop_rwlock),
168 	    fs_rwlock, fs_rwlock,
169 
170 	VOPNAME_RWUNLOCK, offsetof(struct vnodeops, vop_rwunlock),
171 	    (fs_generic_func_p) fs_rwunlock,
172 	    (fs_generic_func_p) fs_rwunlock,	/* no errors allowed */
173 
174 	VOPNAME_SEEK, offsetof(struct vnodeops, vop_seek),
175 	    fs_nosys, fs_nosys,
176 
177 	VOPNAME_CMP, offsetof(struct vnodeops, vop_cmp),
178 	    fs_cmp, fs_cmp,		/* no errors allowed */
179 
180 	VOPNAME_FRLOCK, offsetof(struct vnodeops, vop_frlock),
181 	    fs_frlock, fs_nosys,
182 
183 	VOPNAME_SPACE, offsetof(struct vnodeops, vop_space),
184 	    fs_nosys, fs_nosys,
185 
186 	VOPNAME_REALVP, offsetof(struct vnodeops, vop_realvp),
187 	    fs_nosys, fs_nosys,
188 
189 	VOPNAME_GETPAGE, offsetof(struct vnodeops, vop_getpage),
190 	    fs_nosys, fs_nosys,
191 
192 	VOPNAME_PUTPAGE, offsetof(struct vnodeops, vop_putpage),
193 	    fs_nosys, fs_nosys,
194 
195 	VOPNAME_MAP, offsetof(struct vnodeops, vop_map),
196 	    (fs_generic_func_p) fs_nosys_map,
197 	    (fs_generic_func_p) fs_nosys_map,
198 
199 	VOPNAME_ADDMAP, offsetof(struct vnodeops, vop_addmap),
200 	    (fs_generic_func_p) fs_nosys_addmap,
201 	    (fs_generic_func_p) fs_nosys_addmap,
202 
203 	VOPNAME_DELMAP, offsetof(struct vnodeops, vop_delmap),
204 	    fs_nosys, fs_nosys,
205 
206 	VOPNAME_POLL, offsetof(struct vnodeops, vop_poll),
207 	    (fs_generic_func_p) fs_poll, (fs_generic_func_p) fs_nosys_poll,
208 
209 	VOPNAME_DUMP, offsetof(struct vnodeops, vop_dump),
210 	    fs_nosys, fs_nosys,
211 
212 	VOPNAME_PATHCONF, offsetof(struct vnodeops, vop_pathconf),
213 	    fs_pathconf, fs_nosys,
214 
215 	VOPNAME_PAGEIO, offsetof(struct vnodeops, vop_pageio),
216 	    fs_nosys, fs_nosys,
217 
218 	VOPNAME_DUMPCTL, offsetof(struct vnodeops, vop_dumpctl),
219 	    fs_nosys, fs_nosys,
220 
221 	VOPNAME_DISPOSE, offsetof(struct vnodeops, vop_dispose),
222 	    (fs_generic_func_p) fs_dispose,
223 	    (fs_generic_func_p) fs_nodispose,
224 
225 	VOPNAME_SETSECATTR, offsetof(struct vnodeops, vop_setsecattr),
226 	    fs_nosys, fs_nosys,
227 
228 	VOPNAME_GETSECATTR, offsetof(struct vnodeops, vop_getsecattr),
229 	    fs_fab_acl, fs_nosys,
230 
231 	VOPNAME_SHRLOCK, offsetof(struct vnodeops, vop_shrlock),
232 	    fs_shrlock, fs_nosys,
233 
234 	VOPNAME_VNEVENT, offsetof(struct vnodeops, vop_vnevent),
235 	    (fs_generic_func_p) fs_vnevent_nosupport,
236 	    (fs_generic_func_p) fs_vnevent_nosupport,
237 
238 	NULL, 0, NULL, NULL
239 };
240 
241 
242 /*
243  * Read or write a vnode.  Called from kernel code.
244  */
245 int
246 vn_rdwr(
247 	enum uio_rw rw,
248 	struct vnode *vp,
249 	caddr_t base,
250 	ssize_t len,
251 	offset_t offset,
252 	enum uio_seg seg,
253 	int ioflag,
254 	rlim64_t ulimit,	/* meaningful only if rw is UIO_WRITE */
255 	cred_t *cr,
256 	ssize_t *residp)
257 {
258 	struct uio uio;
259 	struct iovec iov;
260 	int error;
261 	int in_crit = 0;
262 
263 	if (rw == UIO_WRITE && ISROFILE(vp))
264 		return (EROFS);
265 
266 	if (len < 0)
267 		return (EIO);
268 
269 	iov.iov_base = base;
270 	iov.iov_len = len;
271 	uio.uio_iov = &iov;
272 	uio.uio_iovcnt = 1;
273 	uio.uio_loffset = offset;
274 	uio.uio_segflg = (short)seg;
275 	uio.uio_resid = len;
276 	uio.uio_llimit = ulimit;
277 
278 	/*
279 	 * We have to enter the critical region before calling VOP_RWLOCK
280 	 * to avoid a deadlock with ufs.
281 	 */
282 	if (nbl_need_check(vp)) {
283 		int svmand;
284 
285 		nbl_start_crit(vp, RW_READER);
286 		in_crit = 1;
287 		error = nbl_svmand(vp, cr, &svmand);
288 		if (error != 0)
289 			goto done;
290 		if (nbl_conflict(vp, rw == UIO_WRITE ? NBL_WRITE : NBL_READ,
291 		    uio.uio_offset, uio.uio_resid, svmand)) {
292 			error = EACCES;
293 			goto done;
294 		}
295 	}
296 
297 	(void) VOP_RWLOCK(vp,
298 		rw == UIO_WRITE ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE, NULL);
299 	if (rw == UIO_WRITE) {
300 		uio.uio_fmode = FWRITE;
301 		uio.uio_extflg = UIO_COPY_DEFAULT;
302 		error = VOP_WRITE(vp, &uio, ioflag, cr, NULL);
303 	} else {
304 		uio.uio_fmode = FREAD;
305 		uio.uio_extflg = UIO_COPY_CACHED;
306 		error = VOP_READ(vp, &uio, ioflag, cr, NULL);
307 	}
308 	VOP_RWUNLOCK(vp, rw == UIO_WRITE ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE,
309 									NULL);
310 	if (residp)
311 		*residp = uio.uio_resid;
312 	else if (uio.uio_resid)
313 		error = EIO;
314 
315 done:
316 	if (in_crit)
317 		nbl_end_crit(vp);
318 	return (error);
319 }
320 
321 /*
322  * Release a vnode.  Call VOP_INACTIVE on last reference or
323  * decrement reference count.
324  *
325  * To avoid race conditions, the v_count is left at 1 for
326  * the call to VOP_INACTIVE. This prevents another thread
327  * from reclaiming and releasing the vnode *before* the
328  * VOP_INACTIVE routine has a chance to destroy the vnode.
329  * We can't have more than 1 thread calling VOP_INACTIVE
330  * on a vnode.
331  */
332 void
333 vn_rele(vnode_t *vp)
334 {
335 	if (vp->v_count == 0)
336 		cmn_err(CE_PANIC, "vn_rele: vnode ref count 0");
337 	mutex_enter(&vp->v_lock);
338 	if (vp->v_count == 1) {
339 		mutex_exit(&vp->v_lock);
340 		VOP_INACTIVE(vp, CRED());
341 	} else {
342 		vp->v_count--;
343 		mutex_exit(&vp->v_lock);
344 	}
345 }
346 
347 /*
348  * Like vn_rele() except that it clears v_stream under v_lock.
349  * This is used by sockfs when it dismantels the association between
350  * the sockfs node and the vnode in the underlaying file system.
351  * v_lock has to be held to prevent a thread coming through the lookupname
352  * path from accessing a stream head that is going away.
353  */
354 void
355 vn_rele_stream(vnode_t *vp)
356 {
357 	if (vp->v_count == 0)
358 		cmn_err(CE_PANIC, "vn_rele: vnode ref count 0");
359 	mutex_enter(&vp->v_lock);
360 	vp->v_stream = NULL;
361 	if (vp->v_count == 1) {
362 		mutex_exit(&vp->v_lock);
363 		VOP_INACTIVE(vp, CRED());
364 	} else {
365 		vp->v_count--;
366 		mutex_exit(&vp->v_lock);
367 	}
368 }
369 
370 int
371 vn_open(
372 	char *pnamep,
373 	enum uio_seg seg,
374 	int filemode,
375 	int createmode,
376 	struct vnode **vpp,
377 	enum create crwhy,
378 	mode_t umask)
379 {
380 	return (vn_openat(pnamep, seg, filemode,
381 			createmode, vpp, crwhy, umask, NULL));
382 }
383 
384 
385 /*
386  * Open/create a vnode.
387  * This may be callable by the kernel, the only known use
388  * of user context being that the current user credentials
389  * are used for permissions.  crwhy is defined iff filemode & FCREAT.
390  */
391 int
392 vn_openat(
393 	char *pnamep,
394 	enum uio_seg seg,
395 	int filemode,
396 	int createmode,
397 	struct vnode **vpp,
398 	enum create crwhy,
399 	mode_t umask,
400 	struct vnode *startvp)
401 {
402 	struct vnode *vp;
403 	int mode;
404 	int error;
405 	int in_crit = 0;
406 	struct vattr vattr;
407 	enum symfollow follow;
408 
409 	mode = 0;
410 	if (filemode & FREAD)
411 		mode |= VREAD;
412 	if (filemode & (FWRITE|FTRUNC))
413 		mode |= VWRITE;
414 
415 	/* symlink interpretation */
416 	if (filemode & FNOFOLLOW)
417 		follow = NO_FOLLOW;
418 	else
419 		follow = FOLLOW;
420 
421 top:
422 	if (filemode & FCREAT) {
423 		enum vcexcl excl;
424 
425 		/*
426 		 * Wish to create a file.
427 		 */
428 		vattr.va_type = VREG;
429 		vattr.va_mode = createmode;
430 		vattr.va_mask = AT_TYPE|AT_MODE;
431 		if (filemode & FTRUNC) {
432 			vattr.va_size = 0;
433 			vattr.va_mask |= AT_SIZE;
434 		}
435 		if (filemode & FEXCL)
436 			excl = EXCL;
437 		else
438 			excl = NONEXCL;
439 
440 		if (error =
441 		    vn_createat(pnamep, seg, &vattr, excl, mode, &vp, crwhy,
442 					(filemode & ~(FTRUNC|FEXCL)),
443 						umask, startvp))
444 			return (error);
445 	} else {
446 		/*
447 		 * Wish to open a file.  Just look it up.
448 		 */
449 		if (error = lookupnameat(pnamep, seg, follow,
450 		    NULLVPP, &vp, startvp)) {
451 			if (error == ESTALE)
452 				goto top;
453 			return (error);
454 		}
455 
456 		/*
457 		 * Get the attributes to check whether file is large.
458 		 * We do this only if the FOFFMAX flag is not set and
459 		 * only for regular files.
460 		 */
461 
462 		if (!(filemode & FOFFMAX) && (vp->v_type == VREG)) {
463 			vattr.va_mask = AT_SIZE;
464 			if ((error = VOP_GETATTR(vp, &vattr, 0, CRED()))) {
465 				goto out;
466 			}
467 			if (vattr.va_size > (u_offset_t)MAXOFF32_T) {
468 				/*
469 				 * Large File API - regular open fails
470 				 * if FOFFMAX flag is set in file mode
471 				 */
472 				error = EOVERFLOW;
473 				goto out;
474 			}
475 		}
476 		/*
477 		 * Can't write directories, active texts, or
478 		 * read-only filesystems.  Can't truncate files
479 		 * on which mandatory locking is in effect.
480 		 */
481 		if (filemode & (FWRITE|FTRUNC)) {
482 			/*
483 			 * Allow writable directory if VDIROPEN flag is set.
484 			 */
485 			if (vp->v_type == VDIR && !(vp->v_flag & VDIROPEN)) {
486 				error = EISDIR;
487 				goto out;
488 			}
489 			if (ISROFILE(vp)) {
490 				error = EROFS;
491 				goto out;
492 			}
493 			/*
494 			 * Can't truncate files on which mandatory locking
495 			 * or non-blocking mandatory locking is in effect.
496 			 */
497 			if (filemode & FTRUNC) {
498 				vnode_t *rvp;
499 
500 				if (VOP_REALVP(vp, &rvp) != 0)
501 					rvp = vp;
502 				if (nbl_need_check(vp)) {
503 					nbl_start_crit(vp, RW_READER);
504 					in_crit = 1;
505 					vattr.va_mask = AT_MODE|AT_SIZE;
506 					if ((error = VOP_GETATTR(vp, &vattr, 0,
507 					    CRED())) == 0) {
508 						if (rvp->v_filocks != NULL)
509 							if (MANDLOCK(vp,
510 							    vattr.va_mode))
511 								error = EAGAIN;
512 						if (!error) {
513 							if (nbl_conflict(vp,
514 							    NBL_WRITE, 0,
515 							    vattr.va_size, 0))
516 								error = EACCES;
517 						}
518 					}
519 				} else if (rvp->v_filocks != NULL) {
520 					vattr.va_mask = AT_MODE;
521 					if ((error = VOP_GETATTR(vp, &vattr,
522 					    0, CRED())) == 0 && MANDLOCK(vp,
523 					    vattr.va_mode))
524 						error = EAGAIN;
525 				}
526 			}
527 			if (error)
528 				goto out;
529 		}
530 		/*
531 		 * Check permissions.
532 		 */
533 		if (error = VOP_ACCESS(vp, mode, 0, CRED()))
534 			goto out;
535 	}
536 
537 	/*
538 	 * Do remaining checks for FNOFOLLOW and FNOLINKS.
539 	 */
540 	if ((filemode & FNOFOLLOW) && vp->v_type == VLNK) {
541 		error = EINVAL;
542 		goto out;
543 	}
544 	if (filemode & FNOLINKS) {
545 		vattr.va_mask = AT_NLINK;
546 		if ((error = VOP_GETATTR(vp, &vattr, 0, CRED()))) {
547 			goto out;
548 		}
549 		if (vattr.va_nlink != 1) {
550 			error = EMLINK;
551 			goto out;
552 		}
553 	}
554 
555 	/*
556 	 * Opening a socket corresponding to the AF_UNIX pathname
557 	 * in the filesystem name space is not supported.
558 	 * However, VSOCK nodes in namefs are supported in order
559 	 * to make fattach work for sockets.
560 	 *
561 	 * XXX This uses VOP_REALVP to distinguish between
562 	 * an unopened namefs node (where VOP_REALVP returns a
563 	 * different VSOCK vnode) and a VSOCK created by vn_create
564 	 * in some file system (where VOP_REALVP would never return
565 	 * a different vnode).
566 	 */
567 	if (vp->v_type == VSOCK) {
568 		struct vnode *nvp;
569 
570 		error = VOP_REALVP(vp, &nvp);
571 		if (error != 0 || nvp == NULL || nvp == vp ||
572 		    nvp->v_type != VSOCK) {
573 			error = EOPNOTSUPP;
574 			goto out;
575 		}
576 	}
577 	/*
578 	 * Do opening protocol.
579 	 */
580 	error = VOP_OPEN(&vp, filemode, CRED());
581 	/*
582 	 * Truncate if required.
583 	 */
584 	if (error == 0 && (filemode & FTRUNC) && !(filemode & FCREAT)) {
585 		vattr.va_size = 0;
586 		vattr.va_mask = AT_SIZE;
587 		if ((error = VOP_SETATTR(vp, &vattr, 0, CRED(), NULL)) != 0)
588 			(void) VOP_CLOSE(vp, filemode, 1, (offset_t)0, CRED());
589 	}
590 out:
591 	ASSERT(vp->v_count > 0);
592 
593 	if (in_crit) {
594 		nbl_end_crit(vp);
595 		in_crit = 0;
596 	}
597 	if (error) {
598 		/*
599 		 * The following clause was added to handle a problem
600 		 * with NFS consistency.  It is possible that a lookup
601 		 * of the file to be opened succeeded, but the file
602 		 * itself doesn't actually exist on the server.  This
603 		 * is chiefly due to the DNLC containing an entry for
604 		 * the file which has been removed on the server.  In
605 		 * this case, we just start over.  If there was some
606 		 * other cause for the ESTALE error, then the lookup
607 		 * of the file will fail and the error will be returned
608 		 * above instead of looping around from here.
609 		 */
610 		VN_RELE(vp);
611 		if (error == ESTALE)
612 			goto top;
613 	} else
614 		*vpp = vp;
615 	return (error);
616 }
617 
618 int
619 vn_create(
620 	char *pnamep,
621 	enum uio_seg seg,
622 	struct vattr *vap,
623 	enum vcexcl excl,
624 	int mode,
625 	struct vnode **vpp,
626 	enum create why,
627 	int flag,
628 	mode_t umask)
629 {
630 	return (vn_createat(pnamep, seg, vap, excl, mode, vpp,
631 			why, flag, umask, NULL));
632 }
633 
634 /*
635  * Create a vnode (makenode).
636  */
637 int
638 vn_createat(
639 	char *pnamep,
640 	enum uio_seg seg,
641 	struct vattr *vap,
642 	enum vcexcl excl,
643 	int mode,
644 	struct vnode **vpp,
645 	enum create why,
646 	int flag,
647 	mode_t umask,
648 	struct vnode *startvp)
649 {
650 	struct vnode *dvp;	/* ptr to parent dir vnode */
651 	struct vnode *vp = NULL;
652 	struct pathname pn;
653 	int error;
654 	int in_crit = 0;
655 	struct vattr vattr;
656 	enum symfollow follow;
657 
658 	ASSERT((vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
659 
660 	/* symlink interpretation */
661 	if ((flag & FNOFOLLOW) || excl == EXCL)
662 		follow = NO_FOLLOW;
663 	else
664 		follow = FOLLOW;
665 	flag &= ~(FNOFOLLOW|FNOLINKS);
666 
667 top:
668 	/*
669 	 * Lookup directory.
670 	 * If new object is a file, call lower level to create it.
671 	 * Note that it is up to the lower level to enforce exclusive
672 	 * creation, if the file is already there.
673 	 * This allows the lower level to do whatever
674 	 * locking or protocol that is needed to prevent races.
675 	 * If the new object is directory call lower level to make
676 	 * the new directory, with "." and "..".
677 	 */
678 	if (error = pn_get(pnamep, seg, &pn))
679 		return (error);
680 #ifdef  C2_AUDIT
681 	if (audit_active)
682 		audit_vncreate_start();
683 #endif /* C2_AUDIT */
684 	dvp = NULL;
685 	*vpp = NULL;
686 	/*
687 	 * lookup will find the parent directory for the vnode.
688 	 * When it is done the pn holds the name of the entry
689 	 * in the directory.
690 	 * If this is a non-exclusive create we also find the node itself.
691 	 */
692 	error = lookuppnat(&pn, NULL, follow, &dvp,
693 	    (excl == EXCL) ? NULLVPP : vpp, startvp);
694 	if (error) {
695 		pn_free(&pn);
696 		if (error == ESTALE)
697 			goto top;
698 		if (why == CRMKDIR && error == EINVAL)
699 			error = EEXIST;		/* SVID */
700 		return (error);
701 	}
702 
703 	if (why != CRMKNOD)
704 		vap->va_mode &= ~VSVTX;
705 
706 	/*
707 	 * If default ACLs are defined for the directory don't apply the
708 	 * umask if umask is passed.
709 	 */
710 
711 	if (umask) {
712 
713 		vsecattr_t vsec;
714 
715 		vsec.vsa_aclcnt = 0;
716 		vsec.vsa_aclentp = NULL;
717 		vsec.vsa_dfaclcnt = 0;
718 		vsec.vsa_dfaclentp = NULL;
719 		vsec.vsa_mask = VSA_DFACLCNT;
720 		if (error = VOP_GETSECATTR(dvp, &vsec, 0, CRED())) {
721 			if (*vpp != NULL)
722 				VN_RELE(*vpp);
723 			goto out;
724 		}
725 
726 		/*
727 		 * Apply the umask if no default ACLs.
728 		 */
729 		if (vsec.vsa_dfaclcnt == 0)
730 			vap->va_mode &= ~umask;
731 
732 		/*
733 		 * VOP_GETSECATTR() may have allocated memory for ACLs we
734 		 * didn't request, so double-check and free it if necessary.
735 		 */
736 		if (vsec.vsa_aclcnt && vsec.vsa_aclentp != NULL)
737 			kmem_free((caddr_t)vsec.vsa_aclentp,
738 				vsec.vsa_aclcnt * sizeof (aclent_t));
739 		if (vsec.vsa_dfaclcnt && vsec.vsa_dfaclentp != NULL)
740 			kmem_free((caddr_t)vsec.vsa_dfaclentp,
741 				vsec.vsa_dfaclcnt * sizeof (aclent_t));
742 	}
743 
744 	/*
745 	 * In general we want to generate EROFS if the file system is
746 	 * readonly.  However, POSIX (IEEE Std. 1003.1) section 5.3.1
747 	 * documents the open system call, and it says that O_CREAT has no
748 	 * effect if the file already exists.  Bug 1119649 states
749 	 * that open(path, O_CREAT, ...) fails when attempting to open an
750 	 * existing file on a read only file system.  Thus, the first part
751 	 * of the following if statement has 3 checks:
752 	 *	if the file exists &&
753 	 *		it is being open with write access &&
754 	 *		the file system is read only
755 	 *	then generate EROFS
756 	 */
757 	if ((*vpp != NULL && (mode & VWRITE) && ISROFILE(*vpp)) ||
758 	    (*vpp == NULL && dvp->v_vfsp->vfs_flag & VFS_RDONLY)) {
759 		if (*vpp)
760 			VN_RELE(*vpp);
761 		error = EROFS;
762 	} else if (excl == NONEXCL && *vpp != NULL) {
763 		vnode_t *rvp;
764 
765 		/*
766 		 * File already exists.  If a mandatory lock has been
767 		 * applied, return error.
768 		 */
769 		vp = *vpp;
770 		if (VOP_REALVP(vp, &rvp) != 0)
771 			rvp = vp;
772 		if ((vap->va_mask & AT_SIZE) && nbl_need_check(vp)) {
773 			nbl_start_crit(vp, RW_READER);
774 			in_crit = 1;
775 		}
776 		if (rvp->v_filocks != NULL || rvp->v_shrlocks != NULL) {
777 			vattr.va_mask = AT_MODE|AT_SIZE;
778 			if (error = VOP_GETATTR(vp, &vattr, 0, CRED())) {
779 				goto out;
780 			}
781 			if (MANDLOCK(vp, vattr.va_mode)) {
782 				error = EAGAIN;
783 				goto out;
784 			}
785 			/*
786 			 * File cannot be truncated if non-blocking mandatory
787 			 * locks are currently on the file.
788 			 */
789 			if ((vap->va_mask & AT_SIZE) && in_crit) {
790 				u_offset_t offset;
791 				ssize_t length;
792 
793 				offset = vap->va_size > vattr.va_size ?
794 						vattr.va_size : vap->va_size;
795 				length = vap->va_size > vattr.va_size ?
796 						vap->va_size - vattr.va_size :
797 						vattr.va_size - vap->va_size;
798 				if (nbl_conflict(vp, NBL_WRITE, offset,
799 						length, 0)) {
800 					error = EACCES;
801 					goto out;
802 				}
803 			}
804 		}
805 
806 		/*
807 		 * If the file is the root of a VFS, we've crossed a
808 		 * mount point and the "containing" directory that we
809 		 * acquired above (dvp) is irrelevant because it's in
810 		 * a different file system.  We apply VOP_CREATE to the
811 		 * target itself instead of to the containing directory
812 		 * and supply a null path name to indicate (conventionally)
813 		 * the node itself as the "component" of interest.
814 		 *
815 		 * The intercession of the file system is necessary to
816 		 * ensure that the appropriate permission checks are
817 		 * done.
818 		 */
819 		if (vp->v_flag & VROOT) {
820 			ASSERT(why != CRMKDIR);
821 			error =
822 			    VOP_CREATE(vp, "", vap, excl, mode, vpp, CRED(),
823 				    flag);
824 			/*
825 			 * If the create succeeded, it will have created
826 			 * a new reference to the vnode.  Give up the
827 			 * original reference.  The assertion should not
828 			 * get triggered because NBMAND locks only apply to
829 			 * VREG files.  And if in_crit is non-zero for some
830 			 * reason, detect that here, rather than when we
831 			 * deference a null vp.
832 			 */
833 			ASSERT(in_crit == 0);
834 			VN_RELE(vp);
835 			vp = NULL;
836 			goto out;
837 		}
838 
839 		/*
840 		 * Large File API - non-large open (FOFFMAX flag not set)
841 		 * of regular file fails if the file size exceeds MAXOFF32_T.
842 		 */
843 		if (why != CRMKDIR &&
844 		    !(flag & FOFFMAX) &&
845 		    (vp->v_type == VREG)) {
846 			vattr.va_mask = AT_SIZE;
847 			if ((error = VOP_GETATTR(vp, &vattr, 0, CRED()))) {
848 				goto out;
849 			}
850 			if ((vattr.va_size > (u_offset_t)MAXOFF32_T)) {
851 				error = EOVERFLOW;
852 				goto out;
853 			}
854 		}
855 	}
856 
857 	if (error == 0) {
858 		/*
859 		 * Call mkdir() if specified, otherwise create().
860 		 */
861 		int must_be_dir = pn_fixslash(&pn);	/* trailing '/'? */
862 
863 		if (why == CRMKDIR)
864 			error = VOP_MKDIR(dvp, pn.pn_path, vap, vpp, CRED());
865 		else if (!must_be_dir)
866 			error = VOP_CREATE(dvp, pn.pn_path, vap,
867 			    excl, mode, vpp, CRED(), flag);
868 		else
869 			error = ENOTDIR;
870 	}
871 
872 out:
873 
874 #ifdef C2_AUDIT
875 	if (audit_active)
876 		audit_vncreate_finish(*vpp, error);
877 #endif  /* C2_AUDIT */
878 	if (in_crit) {
879 		nbl_end_crit(vp);
880 		in_crit = 0;
881 	}
882 	if (vp != NULL) {
883 		VN_RELE(vp);
884 		vp = NULL;
885 	}
886 	pn_free(&pn);
887 	VN_RELE(dvp);
888 	/*
889 	 * The following clause was added to handle a problem
890 	 * with NFS consistency.  It is possible that a lookup
891 	 * of the file to be created succeeded, but the file
892 	 * itself doesn't actually exist on the server.  This
893 	 * is chiefly due to the DNLC containing an entry for
894 	 * the file which has been removed on the server.  In
895 	 * this case, we just start over.  If there was some
896 	 * other cause for the ESTALE error, then the lookup
897 	 * of the file will fail and the error will be returned
898 	 * above instead of looping around from here.
899 	 */
900 	if (error == ESTALE)
901 		goto top;
902 	return (error);
903 }
904 
905 int
906 vn_link(char *from, char *to, enum uio_seg seg)
907 {
908 	struct vnode *fvp;		/* from vnode ptr */
909 	struct vnode *tdvp;		/* to directory vnode ptr */
910 	struct pathname pn;
911 	int error;
912 	struct vattr vattr;
913 	dev_t fsid;
914 
915 top:
916 	fvp = tdvp = NULL;
917 	if (error = pn_get(to, seg, &pn))
918 		return (error);
919 	if (error = lookupname(from, seg, NO_FOLLOW, NULLVPP, &fvp))
920 		goto out;
921 	if (error = lookuppn(&pn, NULL, NO_FOLLOW, &tdvp, NULLVPP))
922 		goto out;
923 	/*
924 	 * Make sure both source vnode and target directory vnode are
925 	 * in the same vfs and that it is writeable.
926 	 */
927 	vattr.va_mask = AT_FSID;
928 	if (error = VOP_GETATTR(fvp, &vattr, 0, CRED()))
929 		goto out;
930 	fsid = vattr.va_fsid;
931 	vattr.va_mask = AT_FSID;
932 	if (error = VOP_GETATTR(tdvp, &vattr, 0, CRED()))
933 		goto out;
934 	if (fsid != vattr.va_fsid) {
935 		error = EXDEV;
936 		goto out;
937 	}
938 	if (tdvp->v_vfsp->vfs_flag & VFS_RDONLY) {
939 		error = EROFS;
940 		goto out;
941 	}
942 	/*
943 	 * Do the link.
944 	 */
945 	(void) pn_fixslash(&pn);
946 	error = VOP_LINK(tdvp, fvp, pn.pn_path, CRED());
947 out:
948 	pn_free(&pn);
949 	if (fvp)
950 		VN_RELE(fvp);
951 	if (tdvp)
952 		VN_RELE(tdvp);
953 	if (error == ESTALE)
954 		goto top;
955 	return (error);
956 }
957 
958 int
959 vn_rename(char *from, char *to, enum uio_seg seg)
960 {
961 	return (vn_renameat(NULL, from, NULL, to, seg));
962 }
963 
964 int
965 vn_renameat(vnode_t *fdvp, char *fname, vnode_t *tdvp,
966 		char *tname, enum uio_seg seg)
967 {
968 	int error;
969 	struct vattr vattr;
970 	struct pathname fpn;		/* from pathname */
971 	struct pathname tpn;		/* to pathname */
972 	dev_t fsid;
973 	int in_crit = 0;
974 	vnode_t *fromvp, *fvp;
975 	vnode_t *tovp;
976 
977 top:
978 	fvp = fromvp = tovp = NULL;
979 	/*
980 	 * Get to and from pathnames.
981 	 */
982 	if (error = pn_get(fname, seg, &fpn))
983 		return (error);
984 	if (error = pn_get(tname, seg, &tpn)) {
985 		pn_free(&fpn);
986 		return (error);
987 	}
988 
989 	/*
990 	 * First we need to resolve the correct directories
991 	 * The passed in directories may only be a starting point,
992 	 * but we need the real directories the file(s) live in.
993 	 * For example the fname may be something like usr/lib/sparc
994 	 * and we were passed in the / directory, but we need to
995 	 * use the lib directory for the rename.
996 	 */
997 
998 #ifdef  C2_AUDIT
999 	if (audit_active)
1000 		audit_setfsat_path(1);
1001 #endif /* C2_AUDIT */
1002 	/*
1003 	 * Lookup to and from directories.
1004 	 */
1005 	if (error = lookuppnat(&fpn, NULL, NO_FOLLOW, &fromvp, &fvp, fdvp)) {
1006 		goto out;
1007 	}
1008 
1009 	/*
1010 	 * Make sure there is an entry.
1011 	 */
1012 	if (fvp == NULL) {
1013 		error = ENOENT;
1014 		goto out;
1015 	}
1016 
1017 #ifdef  C2_AUDIT
1018 	if (audit_active)
1019 		audit_setfsat_path(3);
1020 #endif /* C2_AUDIT */
1021 	if (error = lookuppnat(&tpn, NULL, NO_FOLLOW, &tovp, NULLVPP, tdvp)) {
1022 		goto out;
1023 	}
1024 
1025 	/*
1026 	 * Make sure both the from vnode directory and the to directory
1027 	 * are in the same vfs and the to directory is writable.
1028 	 * We check fsid's, not vfs pointers, so loopback fs works.
1029 	 */
1030 	if (fromvp != tovp) {
1031 		vattr.va_mask = AT_FSID;
1032 		if (error = VOP_GETATTR(fromvp, &vattr, 0, CRED()))
1033 			goto out;
1034 		fsid = vattr.va_fsid;
1035 		vattr.va_mask = AT_FSID;
1036 		if (error = VOP_GETATTR(tovp, &vattr, 0, CRED()))
1037 			goto out;
1038 		if (fsid != vattr.va_fsid) {
1039 			error = EXDEV;
1040 			goto out;
1041 		}
1042 	}
1043 
1044 	if (tovp->v_vfsp->vfs_flag & VFS_RDONLY) {
1045 		error = EROFS;
1046 		goto out;
1047 	}
1048 
1049 	if (nbl_need_check(fvp)) {
1050 		nbl_start_crit(fvp, RW_READER);
1051 		in_crit = 1;
1052 		if (nbl_conflict(fvp, NBL_RENAME, 0, 0, 0)) {
1053 			error = EACCES;
1054 			goto out;
1055 		}
1056 	}
1057 
1058 	/*
1059 	 * Do the rename.
1060 	 */
1061 	(void) pn_fixslash(&tpn);
1062 	error = VOP_RENAME(fromvp, fpn.pn_path, tovp, tpn.pn_path, CRED());
1063 
1064 out:
1065 	pn_free(&fpn);
1066 	pn_free(&tpn);
1067 	if (in_crit) {
1068 		nbl_end_crit(fvp);
1069 		in_crit = 0;
1070 	}
1071 	if (fromvp)
1072 		VN_RELE(fromvp);
1073 	if (tovp)
1074 		VN_RELE(tovp);
1075 	if (fvp)
1076 		VN_RELE(fvp);
1077 	if (error == ESTALE)
1078 		goto top;
1079 	return (error);
1080 }
1081 
1082 /*
1083  * Remove a file or directory.
1084  */
1085 int
1086 vn_remove(char *fnamep, enum uio_seg seg, enum rm dirflag)
1087 {
1088 	return (vn_removeat(NULL, fnamep, seg, dirflag));
1089 }
1090 
1091 int
1092 vn_removeat(vnode_t *startvp, char *fnamep, enum uio_seg seg, enum rm dirflag)
1093 {
1094 	struct vnode *vp;		/* entry vnode */
1095 	struct vnode *dvp;		/* ptr to parent dir vnode */
1096 	struct vnode *coveredvp;
1097 	struct pathname pn;		/* name of entry */
1098 	enum vtype vtype;
1099 	int error;
1100 	struct vfs *vfsp;
1101 	struct vfs *dvfsp;	/* ptr to parent dir vfs */
1102 	int in_crit = 0;
1103 
1104 top:
1105 	if (error = pn_get(fnamep, seg, &pn))
1106 		return (error);
1107 	dvp = vp = NULL;
1108 	if (error = lookuppnat(&pn, NULL, NO_FOLLOW, &dvp, &vp, startvp)) {
1109 		pn_free(&pn);
1110 		if (error == ESTALE)
1111 			goto top;
1112 		return (error);
1113 	}
1114 
1115 	/*
1116 	 * Make sure there is an entry.
1117 	 */
1118 	if (vp == NULL) {
1119 		error = ENOENT;
1120 		goto out;
1121 	}
1122 
1123 	vfsp = vp->v_vfsp;
1124 	dvfsp = dvp->v_vfsp;
1125 
1126 	/*
1127 	 * If the named file is the root of a mounted filesystem, fail,
1128 	 * unless it's marked unlinkable.  In that case, unmount the
1129 	 * filesystem and proceed to unlink the covered vnode.  (If the
1130 	 * covered vnode is a directory, use rmdir instead of unlink,
1131 	 * to avoid file system corruption.)
1132 	 */
1133 	if (vp->v_flag & VROOT) {
1134 		if (vfsp->vfs_flag & VFS_UNLINKABLE) {
1135 			if (dirflag == RMDIRECTORY) {
1136 				/*
1137 				 * User called rmdir(2) on a file that has
1138 				 * been namefs mounted on top of.  Since
1139 				 * namefs doesn't allow directories to
1140 				 * be mounted on other files we know
1141 				 * vp is not of type VDIR so fail to operation.
1142 				 */
1143 				error = ENOTDIR;
1144 				goto out;
1145 			}
1146 			coveredvp = vfsp->vfs_vnodecovered;
1147 			VN_HOLD(coveredvp);
1148 			VN_RELE(vp);
1149 			vp = NULL;
1150 			if ((error = vn_vfswlock(coveredvp)) == 0)
1151 				error = dounmount(vfsp, 0, CRED());
1152 			/*
1153 			 * Unmounted the namefs file system; now get
1154 			 * the object it was mounted over.
1155 			 */
1156 			vp = coveredvp;
1157 			/*
1158 			 * If namefs was mounted over a directory, then
1159 			 * we want to use rmdir() instead of unlink().
1160 			 */
1161 			if (vp->v_type == VDIR)
1162 				dirflag = RMDIRECTORY;
1163 		} else
1164 			error = EBUSY;
1165 
1166 		if (error)
1167 			goto out;
1168 	}
1169 
1170 	/*
1171 	 * Make sure filesystem is writeable.
1172 	 * We check the parent directory's vfs in case this is an lofs vnode.
1173 	 */
1174 	if (dvfsp && dvfsp->vfs_flag & VFS_RDONLY) {
1175 		error = EROFS;
1176 		goto out;
1177 	}
1178 
1179 	vtype = vp->v_type;
1180 
1181 	/*
1182 	 * If there is the possibility of an nbmand share reservation, make
1183 	 * sure it's okay to remove the file.  Keep a reference to the
1184 	 * vnode, so that we can exit the nbl critical region after
1185 	 * calling VOP_REMOVE.
1186 	 * If there is no possibility of an nbmand share reservation,
1187 	 * release the vnode reference now.  Filesystems like NFS may
1188 	 * behave differently if there is an extra reference, so get rid of
1189 	 * this one.  Fortunately, we can't have nbmand mounts on NFS
1190 	 * filesystems.
1191 	 */
1192 	if (nbl_need_check(vp)) {
1193 		nbl_start_crit(vp, RW_READER);
1194 		in_crit = 1;
1195 		if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0)) {
1196 			error = EACCES;
1197 			goto out;
1198 		}
1199 	} else {
1200 		VN_RELE(vp);
1201 		vp = NULL;
1202 	}
1203 
1204 	if (dirflag == RMDIRECTORY) {
1205 		/*
1206 		 * Caller is using rmdir(2), which can only be applied to
1207 		 * directories.
1208 		 */
1209 		if (vtype != VDIR) {
1210 			error = ENOTDIR;
1211 		} else {
1212 			vnode_t *cwd;
1213 			proc_t *pp = curproc;
1214 
1215 			mutex_enter(&pp->p_lock);
1216 			cwd = PTOU(pp)->u_cdir;
1217 			VN_HOLD(cwd);
1218 			mutex_exit(&pp->p_lock);
1219 			error = VOP_RMDIR(dvp, pn.pn_path, cwd, CRED());
1220 			VN_RELE(cwd);
1221 		}
1222 	} else {
1223 		/*
1224 		 * Unlink(2) can be applied to anything.
1225 		 */
1226 		error = VOP_REMOVE(dvp, pn.pn_path, CRED());
1227 	}
1228 
1229 out:
1230 	pn_free(&pn);
1231 	if (in_crit) {
1232 		nbl_end_crit(vp);
1233 		in_crit = 0;
1234 	}
1235 	if (vp != NULL)
1236 		VN_RELE(vp);
1237 	if (dvp != NULL)
1238 		VN_RELE(dvp);
1239 	if (error == ESTALE)
1240 		goto top;
1241 	return (error);
1242 }
1243 
1244 /*
1245  * Utility function to compare equality of vnodes.
1246  * Compare the underlying real vnodes, if there are underlying vnodes.
1247  * This is a more thorough comparison than the VN_CMP() macro provides.
1248  */
1249 int
1250 vn_compare(vnode_t *vp1, vnode_t *vp2)
1251 {
1252 	vnode_t *realvp;
1253 
1254 	if (vp1 != NULL && VOP_REALVP(vp1, &realvp) == 0)
1255 		vp1 = realvp;
1256 	if (vp2 != NULL && VOP_REALVP(vp2, &realvp) == 0)
1257 		vp2 = realvp;
1258 	return (VN_CMP(vp1, vp2));
1259 }
1260 
1261 /*
1262  * The number of locks to hash into.  This value must be a power
1263  * of 2 minus 1 and should probably also be prime.
1264  */
1265 #define	NUM_BUCKETS	1023
1266 
1267 struct  vn_vfslocks_bucket {
1268 	kmutex_t vb_lock;
1269 	vn_vfslocks_entry_t *vb_list;
1270 	char pad[64 - sizeof (kmutex_t) - sizeof (void *)];
1271 };
1272 
1273 /*
1274  * Total number of buckets will be NUM_BUCKETS + 1 .
1275  */
1276 
1277 #pragma	align	64(vn_vfslocks_buckets)
1278 static	struct vn_vfslocks_bucket	vn_vfslocks_buckets[NUM_BUCKETS + 1];
1279 
1280 #define	VN_VFSLOCKS_SHIFT	9
1281 
1282 #define	VN_VFSLOCKS_HASH(vfsvpptr)	\
1283 	((((intptr_t)(vfsvpptr)) >> VN_VFSLOCKS_SHIFT) & NUM_BUCKETS)
1284 
1285 /*
1286  * vn_vfslocks_getlock() uses an HASH scheme to generate
1287  * rwstlock using vfs/vnode pointer passed to it.
1288  *
1289  * vn_vfslocks_rele() releases a reference in the
1290  * HASH table which allows the entry allocated by
1291  * vn_vfslocks_getlock() to be freed at a later
1292  * stage when the refcount drops to zero.
1293  */
1294 
1295 vn_vfslocks_entry_t *
1296 vn_vfslocks_getlock(void *vfsvpptr)
1297 {
1298 	struct vn_vfslocks_bucket *bp;
1299 	vn_vfslocks_entry_t *vep;
1300 	vn_vfslocks_entry_t *tvep;
1301 
1302 	ASSERT(vfsvpptr != NULL);
1303 	bp = &vn_vfslocks_buckets[VN_VFSLOCKS_HASH(vfsvpptr)];
1304 
1305 	mutex_enter(&bp->vb_lock);
1306 	for (vep = bp->vb_list; vep != NULL; vep = vep->ve_next) {
1307 		if (vep->ve_vpvfs == vfsvpptr) {
1308 			vep->ve_refcnt++;
1309 			mutex_exit(&bp->vb_lock);
1310 			return (vep);
1311 		}
1312 	}
1313 	mutex_exit(&bp->vb_lock);
1314 	vep = kmem_alloc(sizeof (*vep), KM_SLEEP);
1315 	rwst_init(&vep->ve_lock, NULL, RW_DEFAULT, NULL);
1316 	vep->ve_vpvfs = (char *)vfsvpptr;
1317 	vep->ve_refcnt = 1;
1318 	mutex_enter(&bp->vb_lock);
1319 	for (tvep = bp->vb_list; tvep != NULL; tvep = tvep->ve_next) {
1320 		if (tvep->ve_vpvfs == vfsvpptr) {
1321 			tvep->ve_refcnt++;
1322 			mutex_exit(&bp->vb_lock);
1323 
1324 			/*
1325 			 * There is already an entry in the hash
1326 			 * destroy what we just allocated.
1327 			 */
1328 			rwst_destroy(&vep->ve_lock);
1329 			kmem_free(vep, sizeof (*vep));
1330 			return (tvep);
1331 		}
1332 	}
1333 	vep->ve_next = bp->vb_list;
1334 	bp->vb_list = vep;
1335 	mutex_exit(&bp->vb_lock);
1336 	return (vep);
1337 }
1338 
1339 void
1340 vn_vfslocks_rele(vn_vfslocks_entry_t *vepent)
1341 {
1342 	struct vn_vfslocks_bucket *bp;
1343 	vn_vfslocks_entry_t *vep;
1344 	vn_vfslocks_entry_t *pvep;
1345 
1346 	ASSERT(vepent != NULL);
1347 	ASSERT(vepent->ve_vpvfs != NULL);
1348 
1349 	bp = &vn_vfslocks_buckets[VN_VFSLOCKS_HASH(vepent->ve_vpvfs)];
1350 
1351 	mutex_enter(&bp->vb_lock);
1352 	vepent->ve_refcnt--;
1353 
1354 	if ((int32_t)vepent->ve_refcnt < 0)
1355 		cmn_err(CE_PANIC, "vn_vfslocks_rele: refcount negative");
1356 
1357 	if (vepent->ve_refcnt == 0) {
1358 		for (vep = bp->vb_list; vep != NULL; vep = vep->ve_next) {
1359 			if (vep->ve_vpvfs == vepent->ve_vpvfs) {
1360 				if (bp->vb_list == vep)
1361 					bp->vb_list = vep->ve_next;
1362 				else {
1363 					/* LINTED */
1364 					pvep->ve_next = vep->ve_next;
1365 				}
1366 				mutex_exit(&bp->vb_lock);
1367 				rwst_destroy(&vep->ve_lock);
1368 				kmem_free(vep, sizeof (*vep));
1369 				return;
1370 			}
1371 			pvep = vep;
1372 		}
1373 		cmn_err(CE_PANIC, "vn_vfslocks_rele: vp/vfs not found");
1374 	}
1375 	mutex_exit(&bp->vb_lock);
1376 }
1377 
1378 /*
1379  * vn_vfswlock_wait is used to implement a lock which is logically a writers
1380  * lock protecting the v_vfsmountedhere field.
1381  * vn_vfswlock_wait has been modified to be similar to vn_vfswlock,
1382  * except that it blocks to acquire the lock VVFSLOCK.
1383  *
1384  * traverse() and routines re-implementing part of traverse (e.g. autofs)
1385  * need to hold this lock. mount(), vn_rename(), vn_remove() and so on
1386  * need the non-blocking version of the writers lock i.e. vn_vfswlock
1387  */
1388 int
1389 vn_vfswlock_wait(vnode_t *vp)
1390 {
1391 	int retval;
1392 	vn_vfslocks_entry_t *vpvfsentry;
1393 	ASSERT(vp != NULL);
1394 
1395 	vpvfsentry = vn_vfslocks_getlock(vp);
1396 	retval = rwst_enter_sig(&vpvfsentry->ve_lock, RW_WRITER);
1397 
1398 	if (retval == EINTR) {
1399 		vn_vfslocks_rele(vpvfsentry);
1400 		return (EINTR);
1401 	}
1402 	return (retval);
1403 }
1404 
1405 int
1406 vn_vfsrlock_wait(vnode_t *vp)
1407 {
1408 	int retval;
1409 	vn_vfslocks_entry_t *vpvfsentry;
1410 	ASSERT(vp != NULL);
1411 
1412 	vpvfsentry = vn_vfslocks_getlock(vp);
1413 	retval = rwst_enter_sig(&vpvfsentry->ve_lock, RW_READER);
1414 
1415 	if (retval == EINTR) {
1416 		vn_vfslocks_rele(vpvfsentry);
1417 		return (EINTR);
1418 	}
1419 
1420 	return (retval);
1421 }
1422 
1423 
1424 /*
1425  * vn_vfswlock is used to implement a lock which is logically a writers lock
1426  * protecting the v_vfsmountedhere field.
1427  */
1428 int
1429 vn_vfswlock(vnode_t *vp)
1430 {
1431 	vn_vfslocks_entry_t *vpvfsentry;
1432 
1433 	/*
1434 	 * If vp is NULL then somebody is trying to lock the covered vnode
1435 	 * of /.  (vfs_vnodecovered is NULL for /).  This situation will
1436 	 * only happen when unmounting /.  Since that operation will fail
1437 	 * anyway, return EBUSY here instead of in VFS_UNMOUNT.
1438 	 */
1439 	if (vp == NULL)
1440 		return (EBUSY);
1441 
1442 	vpvfsentry = vn_vfslocks_getlock(vp);
1443 
1444 	if (rwst_tryenter(&vpvfsentry->ve_lock, RW_WRITER))
1445 		return (0);
1446 
1447 	vn_vfslocks_rele(vpvfsentry);
1448 	return (EBUSY);
1449 }
1450 
1451 int
1452 vn_vfsrlock(vnode_t *vp)
1453 {
1454 	vn_vfslocks_entry_t *vpvfsentry;
1455 
1456 	/*
1457 	 * If vp is NULL then somebody is trying to lock the covered vnode
1458 	 * of /.  (vfs_vnodecovered is NULL for /).  This situation will
1459 	 * only happen when unmounting /.  Since that operation will fail
1460 	 * anyway, return EBUSY here instead of in VFS_UNMOUNT.
1461 	 */
1462 	if (vp == NULL)
1463 		return (EBUSY);
1464 
1465 	vpvfsentry = vn_vfslocks_getlock(vp);
1466 
1467 	if (rwst_tryenter(&vpvfsentry->ve_lock, RW_READER))
1468 		return (0);
1469 
1470 	vn_vfslocks_rele(vpvfsentry);
1471 	return (EBUSY);
1472 }
1473 
1474 
1475 /*
1476  * For compatibility with old (deprecated) interface, continue
1477  * to support vanilla mutex.
1478  */
1479 int
1480 vn_vfslock(vnode_t *vp)
1481 {
1482 	return (vn_vfswlock(vp));
1483 }
1484 
1485 void
1486 vn_vfsunlock(vnode_t *vp)
1487 {
1488 	vn_vfslocks_entry_t *vpvfsentry;
1489 
1490 	/*
1491 	 * ve_refcnt needs to be decremented twice.
1492 	 * 1. To release refernce after a call to vn_vfslocks_getlock()
1493 	 * 2. To release the reference from the locking routines like
1494 	 *    vn_vfsrlock/vn_vfswlock etc,.
1495 	 */
1496 	vpvfsentry = vn_vfslocks_getlock(vp);
1497 	vn_vfslocks_rele(vpvfsentry);
1498 
1499 	rwst_exit(&vpvfsentry->ve_lock);
1500 	vn_vfslocks_rele(vpvfsentry);
1501 }
1502 
1503 int
1504 vn_vfswlock_held(vnode_t *vp)
1505 {
1506 	int held;
1507 	vn_vfslocks_entry_t *vpvfsentry;
1508 
1509 	ASSERT(vp != NULL);
1510 
1511 	vpvfsentry = vn_vfslocks_getlock(vp);
1512 	held = rwst_lock_held(&vpvfsentry->ve_lock, RW_WRITER);
1513 
1514 	vn_vfslocks_rele(vpvfsentry);
1515 	return (held);
1516 }
1517 
1518 
1519 int
1520 vn_make_ops(
1521 	const char *name,			/* Name of file system */
1522 	const fs_operation_def_t *templ,	/* Operation specification */
1523 	vnodeops_t **actual)			/* Return the vnodeops */
1524 {
1525 	int unused_ops;
1526 	int error;
1527 
1528 	*actual = (vnodeops_t *)kmem_alloc(sizeof (vnodeops_t), KM_SLEEP);
1529 
1530 	(*actual)->vnop_name = name;
1531 
1532 	error = fs_build_vector(*actual, &unused_ops, vn_ops_table, templ);
1533 	if (error) {
1534 		kmem_free(*actual, sizeof (vnodeops_t));
1535 	}
1536 
1537 #if DEBUG
1538 	if (unused_ops != 0)
1539 		cmn_err(CE_WARN, "vn_make_ops: %s: %d operations supplied "
1540 		    "but not used", name, unused_ops);
1541 #endif
1542 
1543 	return (error);
1544 }
1545 
1546 /*
1547  * Free the vnodeops created as a result of vn_make_ops()
1548  */
1549 void
1550 vn_freevnodeops(vnodeops_t *vnops)
1551 {
1552 	kmem_free(vnops, sizeof (vnodeops_t));
1553 }
1554 
1555 /*
1556  * Vnode cache.
1557  */
1558 
1559 /* ARGSUSED */
1560 static int
1561 vn_cache_constructor(void *buf, void *cdrarg, int kmflags)
1562 {
1563 	struct vnode *vp;
1564 
1565 	vp = buf;
1566 
1567 	mutex_init(&vp->v_lock, NULL, MUTEX_DEFAULT, NULL);
1568 	cv_init(&vp->v_cv, NULL, CV_DEFAULT, NULL);
1569 	rw_init(&vp->v_nbllock, NULL, RW_DEFAULT, NULL);
1570 	rw_init(&vp->v_mslock, NULL, RW_DEFAULT, NULL);
1571 
1572 	vp->v_femhead = NULL;	/* Must be done before vn_reinit() */
1573 	vp->v_path = NULL;
1574 	vp->v_mpssdata = NULL;
1575 
1576 	return (0);
1577 }
1578 
1579 /* ARGSUSED */
1580 static void
1581 vn_cache_destructor(void *buf, void *cdrarg)
1582 {
1583 	struct vnode *vp;
1584 
1585 	vp = buf;
1586 
1587 	rw_destroy(&vp->v_mslock);
1588 	rw_destroy(&vp->v_nbllock);
1589 	cv_destroy(&vp->v_cv);
1590 	mutex_destroy(&vp->v_lock);
1591 }
1592 
1593 void
1594 vn_create_cache(void)
1595 {
1596 	vn_cache = kmem_cache_create("vn_cache", sizeof (struct vnode), 64,
1597 	    vn_cache_constructor, vn_cache_destructor, NULL, NULL,
1598 	    NULL, 0);
1599 }
1600 
1601 void
1602 vn_destroy_cache(void)
1603 {
1604 	kmem_cache_destroy(vn_cache);
1605 }
1606 
1607 /*
1608  * Used by file systems when fs-specific nodes (e.g., ufs inodes) are
1609  * cached by the file system and vnodes remain associated.
1610  */
1611 void
1612 vn_recycle(vnode_t *vp)
1613 {
1614 	ASSERT(vp->v_pages == NULL);
1615 
1616 	/*
1617 	 * XXX - This really belongs in vn_reinit(), but we have some issues
1618 	 * with the counts.  Best to have it here for clean initialization.
1619 	 */
1620 	vp->v_rdcnt = 0;
1621 	vp->v_wrcnt = 0;
1622 	vp->v_mmap_read = 0;
1623 	vp->v_mmap_write = 0;
1624 
1625 	/*
1626 	 * If FEM was in use, make sure everything gets cleaned up
1627 	 * NOTE: vp->v_femhead is initialized to NULL in the vnode
1628 	 * constructor.
1629 	 */
1630 	if (vp->v_femhead) {
1631 		/* XXX - There should be a free_femhead() that does all this */
1632 		ASSERT(vp->v_femhead->femh_list == NULL);
1633 		mutex_destroy(&vp->v_femhead->femh_lock);
1634 		kmem_free(vp->v_femhead, sizeof (*(vp->v_femhead)));
1635 		vp->v_femhead = NULL;
1636 	}
1637 	if (vp->v_path) {
1638 		kmem_free(vp->v_path, strlen(vp->v_path) + 1);
1639 		vp->v_path = NULL;
1640 	}
1641 	vp->v_mpssdata = NULL;
1642 }
1643 
1644 /*
1645  * Used to reset the vnode fields including those that are directly accessible
1646  * as well as those which require an accessor function.
1647  *
1648  * Does not initialize:
1649  *	synchronization objects: v_lock, v_nbllock, v_cv
1650  *	v_data (since FS-nodes and vnodes point to each other and should
1651  *		be updated simultaneously)
1652  *	v_op (in case someone needs to make a VOP call on this object)
1653  */
1654 void
1655 vn_reinit(vnode_t *vp)
1656 {
1657 	vp->v_count = 1;
1658 	vp->v_vfsp = NULL;
1659 	vp->v_stream = NULL;
1660 	vp->v_vfsmountedhere = NULL;
1661 	vp->v_flag = 0;
1662 	vp->v_type = VNON;
1663 	vp->v_rdev = NODEV;
1664 
1665 	vp->v_filocks = NULL;
1666 	vp->v_shrlocks = NULL;
1667 	vp->v_pages = NULL;
1668 	vp->v_npages = 0;
1669 	vp->v_msnpages = 0;
1670 	vp->v_scanfront = NULL;
1671 	vp->v_scanback = NULL;
1672 
1673 	vp->v_locality = NULL;
1674 	vp->v_scantime = 0;
1675 	vp->v_mset = 0;
1676 	vp->v_msflags = 0;
1677 	vp->v_msnext = NULL;
1678 	vp->v_msprev = NULL;
1679 
1680 	/* Handles v_femhead, v_path, and the r/w/map counts */
1681 	vn_recycle(vp);
1682 }
1683 
1684 vnode_t *
1685 vn_alloc(int kmflag)
1686 {
1687 	vnode_t *vp;
1688 
1689 	vp = kmem_cache_alloc(vn_cache, kmflag);
1690 
1691 	if (vp != NULL) {
1692 		vp->v_femhead = NULL;	/* Must be done before vn_reinit() */
1693 		vn_reinit(vp);
1694 	}
1695 
1696 	return (vp);
1697 }
1698 
1699 void
1700 vn_free(vnode_t *vp)
1701 {
1702 	/*
1703 	 * Some file systems call vn_free() with v_count of zero,
1704 	 * some with v_count of 1.  In any case, the value should
1705 	 * never be anything else.
1706 	 */
1707 	ASSERT((vp->v_count == 0) || (vp->v_count == 1));
1708 	if (vp->v_path != NULL) {
1709 		kmem_free(vp->v_path, strlen(vp->v_path) + 1);
1710 		vp->v_path = NULL;
1711 	}
1712 
1713 	/* If FEM was in use, make sure everything gets cleaned up */
1714 	if (vp->v_femhead) {
1715 		/* XXX - There should be a free_femhead() that does all this */
1716 		ASSERT(vp->v_femhead->femh_list == NULL);
1717 		mutex_destroy(&vp->v_femhead->femh_lock);
1718 		kmem_free(vp->v_femhead, sizeof (*(vp->v_femhead)));
1719 		vp->v_femhead = NULL;
1720 	}
1721 	vp->v_mpssdata = NULL;
1722 	kmem_cache_free(vn_cache, vp);
1723 }
1724 
1725 /*
1726  * vnode status changes, should define better states than 1, 0.
1727  */
1728 void
1729 vn_reclaim(vnode_t *vp)
1730 {
1731 	vfs_t   *vfsp = vp->v_vfsp;
1732 
1733 	if (vfsp == NULL || vfsp->vfs_femhead == NULL) {
1734 		return;
1735 	}
1736 	(void) VFS_VNSTATE(vfsp, vp, VNTRANS_RECLAIMED);
1737 }
1738 
1739 void
1740 vn_idle(vnode_t *vp)
1741 {
1742 	vfs_t   *vfsp = vp->v_vfsp;
1743 
1744 	if (vfsp == NULL || vfsp->vfs_femhead == NULL) {
1745 		return;
1746 	}
1747 	(void) VFS_VNSTATE(vfsp, vp, VNTRANS_IDLED);
1748 }
1749 void
1750 vn_exists(vnode_t *vp)
1751 {
1752 	vfs_t   *vfsp = vp->v_vfsp;
1753 
1754 	if (vfsp == NULL || vfsp->vfs_femhead == NULL) {
1755 		return;
1756 	}
1757 	(void) VFS_VNSTATE(vfsp, vp, VNTRANS_EXISTS);
1758 }
1759 
1760 void
1761 vn_invalid(vnode_t *vp)
1762 {
1763 	vfs_t   *vfsp = vp->v_vfsp;
1764 
1765 	if (vfsp == NULL || vfsp->vfs_femhead == NULL) {
1766 		return;
1767 	}
1768 	(void) VFS_VNSTATE(vfsp, vp, VNTRANS_DESTROYED);
1769 }
1770 
1771 /* Vnode event notification */
1772 
1773 int
1774 vnevent_support(vnode_t *vp)
1775 {
1776 	if (vp == NULL)
1777 		return (EINVAL);
1778 
1779 	return (VOP_VNEVENT(vp, VE_SUPPORT));
1780 }
1781 
1782 void
1783 vnevent_rename_src(vnode_t *vp)
1784 {
1785 	if (vp == NULL || vp->v_femhead == NULL) {
1786 		return;
1787 	}
1788 	(void) VOP_VNEVENT(vp, VE_RENAME_SRC);
1789 }
1790 
1791 void
1792 vnevent_rename_dest(vnode_t *vp)
1793 {
1794 	if (vp == NULL || vp->v_femhead == NULL) {
1795 		return;
1796 	}
1797 	(void) VOP_VNEVENT(vp, VE_RENAME_DEST);
1798 }
1799 
1800 void
1801 vnevent_remove(vnode_t *vp)
1802 {
1803 	if (vp == NULL || vp->v_femhead == NULL) {
1804 		return;
1805 	}
1806 	(void) VOP_VNEVENT(vp, VE_REMOVE);
1807 }
1808 
1809 void
1810 vnevent_rmdir(vnode_t *vp)
1811 {
1812 	if (vp == NULL || vp->v_femhead == NULL) {
1813 		return;
1814 	}
1815 	(void) VOP_VNEVENT(vp, VE_RMDIR);
1816 }
1817 
1818 /*
1819  * Vnode accessors.
1820  */
1821 
1822 int
1823 vn_is_readonly(vnode_t *vp)
1824 {
1825 	return (vp->v_vfsp->vfs_flag & VFS_RDONLY);
1826 }
1827 
1828 int
1829 vn_has_flocks(vnode_t *vp)
1830 {
1831 	return (vp->v_filocks != NULL);
1832 }
1833 
1834 int
1835 vn_has_mandatory_locks(vnode_t *vp, int mode)
1836 {
1837 	return ((vp->v_filocks != NULL) && (MANDLOCK(vp, mode)));
1838 }
1839 
1840 int
1841 vn_has_cached_data(vnode_t *vp)
1842 {
1843 	return (vp->v_pages != NULL);
1844 }
1845 
1846 /*
1847  * Return 0 if the vnode in question shouldn't be permitted into a zone via
1848  * zone_enter(2).
1849  */
1850 int
1851 vn_can_change_zones(vnode_t *vp)
1852 {
1853 	struct vfssw *vswp;
1854 	int allow = 1;
1855 	vnode_t *rvp;
1856 
1857 	/*
1858 	 * We always want to look at the underlying vnode if there is one.
1859 	 */
1860 	if (VOP_REALVP(vp, &rvp) != 0)
1861 		rvp = vp;
1862 	/*
1863 	 * Some pseudo filesystems (including doorfs) don't actually register
1864 	 * their vfsops_t, so the following may return NULL; we happily let
1865 	 * such vnodes switch zones.
1866 	 */
1867 	vswp = vfs_getvfsswbyvfsops(vfs_getops(rvp->v_vfsp));
1868 	if (vswp != NULL) {
1869 		if (vswp->vsw_flag & VSW_NOTZONESAFE)
1870 			allow = 0;
1871 		vfs_unrefvfssw(vswp);
1872 	}
1873 	return (allow);
1874 }
1875 
1876 /*
1877  * Return nonzero if the vnode is a mount point, zero if not.
1878  */
1879 int
1880 vn_ismntpt(vnode_t *vp)
1881 {
1882 	return (vp->v_vfsmountedhere != NULL);
1883 }
1884 
1885 /* Retrieve the vfs (if any) mounted on this vnode */
1886 vfs_t *
1887 vn_mountedvfs(vnode_t *vp)
1888 {
1889 	return (vp->v_vfsmountedhere);
1890 }
1891 
1892 /*
1893  * vn_is_opened() checks whether a particular file is opened and
1894  * whether the open is for read and/or write.
1895  *
1896  * Vnode counts are only kept on regular files (v_type=VREG).
1897  */
1898 int
1899 vn_is_opened(
1900 	vnode_t *vp,
1901 	v_mode_t mode)
1902 {
1903 
1904 	ASSERT(vp != NULL);
1905 
1906 	switch (mode) {
1907 	case V_WRITE:
1908 		if (vp->v_wrcnt)
1909 			return (V_TRUE);
1910 		break;
1911 	case V_RDANDWR:
1912 		if (vp->v_rdcnt && vp->v_wrcnt)
1913 			return (V_TRUE);
1914 		break;
1915 	case V_RDORWR:
1916 		if (vp->v_rdcnt || vp->v_wrcnt)
1917 			return (V_TRUE);
1918 		break;
1919 	case V_READ:
1920 		if (vp->v_rdcnt)
1921 			return (V_TRUE);
1922 		break;
1923 	}
1924 
1925 	return (V_FALSE);
1926 }
1927 
1928 /*
1929  * vn_is_mapped() checks whether a particular file is mapped and whether
1930  * the file is mapped read and/or write.
1931  */
1932 int
1933 vn_is_mapped(
1934 	vnode_t *vp,
1935 	v_mode_t mode)
1936 {
1937 
1938 	ASSERT(vp != NULL);
1939 
1940 #if !defined(_LP64)
1941 	switch (mode) {
1942 	/*
1943 	 * The atomic_add_64_nv functions force atomicity in the
1944 	 * case of 32 bit architectures. Otherwise the 64 bit values
1945 	 * require two fetches. The value of the fields may be
1946 	 * (potentially) changed between the first fetch and the
1947 	 * second
1948 	 */
1949 	case V_WRITE:
1950 		if (atomic_add_64_nv((&(vp->v_mmap_write)), 0))
1951 			return (V_TRUE);
1952 		break;
1953 	case V_RDANDWR:
1954 		if ((atomic_add_64_nv((&(vp->v_mmap_read)), 0)) &&
1955 		    (atomic_add_64_nv((&(vp->v_mmap_write)), 0)))
1956 			return (V_TRUE);
1957 		break;
1958 	case V_RDORWR:
1959 		if ((atomic_add_64_nv((&(vp->v_mmap_read)), 0)) ||
1960 		    (atomic_add_64_nv((&(vp->v_mmap_write)), 0)))
1961 			return (V_TRUE);
1962 		break;
1963 	case V_READ:
1964 		if (atomic_add_64_nv((&(vp->v_mmap_read)), 0))
1965 			return (V_TRUE);
1966 		break;
1967 	}
1968 #else
1969 	switch (mode) {
1970 	case V_WRITE:
1971 		if (vp->v_mmap_write)
1972 			return (V_TRUE);
1973 		break;
1974 	case V_RDANDWR:
1975 		if (vp->v_mmap_read && vp->v_mmap_write)
1976 			return (V_TRUE);
1977 		break;
1978 	case V_RDORWR:
1979 		if (vp->v_mmap_read || vp->v_mmap_write)
1980 			return (V_TRUE);
1981 		break;
1982 	case V_READ:
1983 		if (vp->v_mmap_read)
1984 			return (V_TRUE);
1985 		break;
1986 	}
1987 #endif
1988 
1989 	return (V_FALSE);
1990 }
1991 
1992 /*
1993  * Set the operations vector for a vnode.
1994  *
1995  * FEM ensures that the v_femhead pointer is filled in before the
1996  * v_op pointer is changed.  This means that if the v_femhead pointer
1997  * is NULL, and the v_op field hasn't changed since before which checked
1998  * the v_femhead pointer; then our update is ok - we are not racing with
1999  * FEM.
2000  */
2001 void
2002 vn_setops(vnode_t *vp, vnodeops_t *vnodeops)
2003 {
2004 	vnodeops_t	*op;
2005 
2006 	ASSERT(vp != NULL);
2007 	ASSERT(vnodeops != NULL);
2008 
2009 	op = vp->v_op;
2010 	membar_consumer();
2011 	/*
2012 	 * If vp->v_femhead == NULL, then we'll call casptr() to do the
2013 	 * compare-and-swap on vp->v_op.  If either fails, then FEM is
2014 	 * in effect on the vnode and we need to have FEM deal with it.
2015 	 */
2016 	if (vp->v_femhead != NULL || casptr(&vp->v_op, op, vnodeops) != op) {
2017 		fem_setvnops(vp, vnodeops);
2018 	}
2019 }
2020 
2021 /*
2022  * Retrieve the operations vector for a vnode
2023  * As with vn_setops(above); make sure we aren't racing with FEM.
2024  * FEM sets the v_op to a special, internal, vnodeops that wouldn't
2025  * make sense to the callers of this routine.
2026  */
2027 vnodeops_t *
2028 vn_getops(vnode_t *vp)
2029 {
2030 	vnodeops_t	*op;
2031 
2032 	ASSERT(vp != NULL);
2033 
2034 	op = vp->v_op;
2035 	membar_consumer();
2036 	if (vp->v_femhead == NULL && op == vp->v_op) {
2037 		return (op);
2038 	} else {
2039 		return (fem_getvnops(vp));
2040 	}
2041 }
2042 
2043 /*
2044  * Returns non-zero (1) if the vnodeops matches that of the vnode.
2045  * Returns zero (0) if not.
2046  */
2047 int
2048 vn_matchops(vnode_t *vp, vnodeops_t *vnodeops)
2049 {
2050 	return (vn_getops(vp) == vnodeops);
2051 }
2052 
2053 /*
2054  * Returns non-zero (1) if the specified operation matches the
2055  * corresponding operation for that the vnode.
2056  * Returns zero (0) if not.
2057  */
2058 
2059 #define	MATCHNAME(n1, n2) (((n1)[0] == (n2)[0]) && (strcmp((n1), (n2)) == 0))
2060 
2061 int
2062 vn_matchopval(vnode_t *vp, char *vopname, fs_generic_func_p funcp)
2063 {
2064 	const fs_operation_trans_def_t *otdp;
2065 	fs_generic_func_p *loc = NULL;
2066 	vnodeops_t	*vop = vn_getops(vp);
2067 
2068 	ASSERT(vopname != NULL);
2069 
2070 	for (otdp = vn_ops_table; otdp->name != NULL; otdp++) {
2071 		if (MATCHNAME(otdp->name, vopname)) {
2072 			loc = (fs_generic_func_p *)((char *)(vop)
2073 							+ otdp->offset);
2074 			break;
2075 		}
2076 	}
2077 
2078 	return ((loc != NULL) && (*loc == funcp));
2079 }
2080 
2081 /*
2082  * fs_new_caller_id() needs to return a unique ID on a given local system.
2083  * The IDs do not need to survive across reboots.  These are primarily
2084  * used so that (FEM) monitors can detect particular callers (such as
2085  * the NFS server) to a given vnode/vfs operation.
2086  */
2087 u_longlong_t
2088 fs_new_caller_id()
2089 {
2090 	static uint64_t next_caller_id = 0LL; /* First call returns 1 */
2091 
2092 	return ((u_longlong_t)atomic_add_64_nv(&next_caller_id, 1));
2093 }
2094 
2095 /*
2096  * Given a starting vnode and a path, updates the path in the target vnode in
2097  * a safe manner.  If the vnode already has path information embedded, then the
2098  * cached path is left untouched.
2099  */
2100 void
2101 vn_setpath(vnode_t *rootvp, struct vnode *startvp, struct vnode *vp,
2102     const char *path, size_t plen)
2103 {
2104 	char	*rpath;
2105 	vnode_t	*base;
2106 	size_t	rpathlen, rpathalloc;
2107 	int	doslash = 1;
2108 
2109 	if (*path == '/') {
2110 		base = rootvp;
2111 		path++;
2112 		plen--;
2113 	} else {
2114 		base = startvp;
2115 	}
2116 
2117 	/*
2118 	 * We cannot grab base->v_lock while we hold vp->v_lock because of
2119 	 * the potential for deadlock.
2120 	 */
2121 	mutex_enter(&base->v_lock);
2122 	if (base->v_path == NULL) {
2123 		mutex_exit(&base->v_lock);
2124 		return;
2125 	}
2126 
2127 	rpathlen = strlen(base->v_path);
2128 	rpathalloc = rpathlen + plen + 1;
2129 	/* Avoid adding a slash if there's already one there */
2130 	if (base->v_path[rpathlen-1] == '/')
2131 		doslash = 0;
2132 	else
2133 		rpathalloc++;
2134 
2135 	/*
2136 	 * We don't want to call kmem_alloc(KM_SLEEP) with kernel locks held,
2137 	 * so we must do this dance.  If, by chance, something changes the path,
2138 	 * just give up since there is no real harm.
2139 	 */
2140 	mutex_exit(&base->v_lock);
2141 
2142 	rpath = kmem_alloc(rpathalloc, KM_SLEEP);
2143 
2144 	mutex_enter(&base->v_lock);
2145 	if (base->v_path == NULL || strlen(base->v_path) != rpathlen) {
2146 		mutex_exit(&base->v_lock);
2147 		kmem_free(rpath, rpathalloc);
2148 		return;
2149 	}
2150 	bcopy(base->v_path, rpath, rpathlen);
2151 	mutex_exit(&base->v_lock);
2152 
2153 	if (doslash)
2154 		rpath[rpathlen++] = '/';
2155 	bcopy(path, rpath + rpathlen, plen);
2156 	rpath[rpathlen + plen] = '\0';
2157 
2158 	mutex_enter(&vp->v_lock);
2159 	if (vp->v_path != NULL) {
2160 		mutex_exit(&vp->v_lock);
2161 		kmem_free(rpath, rpathalloc);
2162 	} else {
2163 		vp->v_path = rpath;
2164 		mutex_exit(&vp->v_lock);
2165 	}
2166 }
2167 
2168 /*
2169  * Sets the path to the vnode to be the given string, regardless of current
2170  * context.  The string must be a complete path from rootdir.  This is only used
2171  * by fsop_root() for setting the path based on the mountpoint.
2172  */
2173 void
2174 vn_setpath_str(struct vnode *vp, const char *str, size_t len)
2175 {
2176 	char *buf = kmem_alloc(len + 1, KM_SLEEP);
2177 
2178 	mutex_enter(&vp->v_lock);
2179 	if (vp->v_path != NULL) {
2180 		mutex_exit(&vp->v_lock);
2181 		kmem_free(buf, len + 1);
2182 		return;
2183 	}
2184 
2185 	vp->v_path = buf;
2186 	bcopy(str, vp->v_path, len);
2187 	vp->v_path[len] = '\0';
2188 
2189 	mutex_exit(&vp->v_lock);
2190 }
2191 
2192 /*
2193  * Similar to vn_setpath_str(), this function sets the path of the destination
2194  * vnode to the be the same as the source vnode.
2195  */
2196 void
2197 vn_copypath(struct vnode *src, struct vnode *dst)
2198 {
2199 	char *buf;
2200 	int alloc;
2201 
2202 	mutex_enter(&src->v_lock);
2203 	if (src->v_path == NULL) {
2204 		mutex_exit(&src->v_lock);
2205 		return;
2206 	}
2207 	alloc = strlen(src->v_path) + 1;
2208 
2209 	/* avoid kmem_alloc() with lock held */
2210 	mutex_exit(&src->v_lock);
2211 	buf = kmem_alloc(alloc, KM_SLEEP);
2212 	mutex_enter(&src->v_lock);
2213 	if (src->v_path == NULL || strlen(src->v_path) + 1 != alloc) {
2214 		mutex_exit(&src->v_lock);
2215 		kmem_free(buf, alloc);
2216 		return;
2217 	}
2218 	bcopy(src->v_path, buf, alloc);
2219 	mutex_exit(&src->v_lock);
2220 
2221 	mutex_enter(&dst->v_lock);
2222 	if (dst->v_path != NULL) {
2223 		mutex_exit(&dst->v_lock);
2224 		kmem_free(buf, alloc);
2225 		return;
2226 	}
2227 	dst->v_path = buf;
2228 	mutex_exit(&dst->v_lock);
2229 }
2230 
2231 /*
2232  * XXX Private interface for segvn routines that handle vnode
2233  * large page segments.
2234  *
2235  * return 1 if vp's file system VOP_PAGEIO() implementation
2236  * can be safely used instead of VOP_GETPAGE() for handling
2237  * pagefaults against regular non swap files. VOP_PAGEIO()
2238  * interface is considered safe here if its implementation
2239  * is very close to VOP_GETPAGE() implementation.
2240  * e.g. It zero's out the part of the page beyond EOF. Doesn't
2241  * panic if there're file holes but instead returns an error.
2242  * Doesn't assume file won't be changed by user writes, etc.
2243  *
2244  * return 0 otherwise.
2245  *
2246  * For now allow segvn to only use VOP_PAGEIO() with ufs and nfs.
2247  */
2248 int
2249 vn_vmpss_usepageio(vnode_t *vp)
2250 {
2251 	vfs_t   *vfsp = vp->v_vfsp;
2252 	char *fsname = vfssw[vfsp->vfs_fstype].vsw_name;
2253 	char *pageio_ok_fss[] = {"ufs", "nfs", NULL};
2254 	char **fsok = pageio_ok_fss;
2255 
2256 	if (fsname == NULL) {
2257 		return (0);
2258 	}
2259 
2260 	for (; *fsok; fsok++) {
2261 		if (strcmp(*fsok, fsname) == 0) {
2262 			return (1);
2263 		}
2264 	}
2265 	return (0);
2266 }
2267 
2268 /* VOP_XXX() macros call the corresponding fop_xxx() function */
2269 
2270 int
2271 fop_open(
2272 	vnode_t **vpp,
2273 	int mode,
2274 	cred_t *cr)
2275 {
2276 	int ret;
2277 	vnode_t *vp = *vpp;
2278 
2279 	VN_HOLD(vp);
2280 	/*
2281 	 * Adding to the vnode counts before calling open
2282 	 * avoids the need for a mutex. It circumvents a race
2283 	 * condition where a query made on the vnode counts results in a
2284 	 * false negative. The inquirer goes away believing the file is
2285 	 * not open when there is an open on the file already under way.
2286 	 *
2287 	 * The counts are meant to prevent NFS from granting a delegation
2288 	 * when it would be dangerous to do so.
2289 	 *
2290 	 * The vnode counts are only kept on regular files
2291 	 */
2292 	if ((*vpp)->v_type == VREG) {
2293 		if (mode & FREAD)
2294 			atomic_add_32(&((*vpp)->v_rdcnt), 1);
2295 		if (mode & FWRITE)
2296 			atomic_add_32(&((*vpp)->v_wrcnt), 1);
2297 	}
2298 
2299 	ret = (*(*(vpp))->v_op->vop_open)(vpp, mode, cr);
2300 
2301 	if (ret) {
2302 		/*
2303 		 * Use the saved vp just in case the vnode ptr got trashed
2304 		 * by the error.
2305 		 */
2306 		if ((vp->v_type == VREG) && (mode & FREAD))
2307 			atomic_add_32(&(vp->v_rdcnt), -1);
2308 		if ((vp->v_type == VREG) && (mode & FWRITE))
2309 			atomic_add_32(&(vp->v_wrcnt), -1);
2310 	} else {
2311 		/*
2312 		 * Some filesystems will return a different vnode,
2313 		 * but the same path was still used to open it.
2314 		 * So if we do change the vnode and need to
2315 		 * copy over the path, do so here, rather than special
2316 		 * casing each filesystem. Adjust the vnode counts to
2317 		 * reflect the vnode switch.
2318 		 */
2319 
2320 		if (*vpp != vp && *vpp != NULL) {
2321 			vn_copypath(vp, *vpp);
2322 			if (((*vpp)->v_type == VREG) && (mode & FREAD))
2323 				atomic_add_32(&((*vpp)->v_rdcnt), 1);
2324 			if ((vp->v_type == VREG) && (mode & FREAD))
2325 				atomic_add_32(&(vp->v_rdcnt), -1);
2326 			if (((*vpp)->v_type == VREG) && (mode & FWRITE))
2327 				atomic_add_32(&((*vpp)->v_wrcnt), 1);
2328 			if ((vp->v_type == VREG) && (mode & FWRITE))
2329 				atomic_add_32(&(vp->v_wrcnt), -1);
2330 		}
2331 	}
2332 	VN_RELE(vp);
2333 	return (ret);
2334 }
2335 
2336 int
2337 fop_close(
2338 	vnode_t *vp,
2339 	int flag,
2340 	int count,
2341 	offset_t offset,
2342 	cred_t *cr)
2343 {
2344 	int error;
2345 	error = (*(vp)->v_op->vop_close)(vp, flag, count, offset, cr);
2346 	/*
2347 	 * Check passed in count to handle possible dups. Vnode counts are only
2348 	 * kept on regular files
2349 	 */
2350 	if ((vp->v_type == VREG) && (count == 1))  {
2351 		if (flag & FREAD) {
2352 			ASSERT(vp->v_rdcnt > 0);
2353 			atomic_add_32(&(vp->v_rdcnt), -1);
2354 		}
2355 		if (flag & FWRITE) {
2356 			ASSERT(vp->v_wrcnt > 0);
2357 			atomic_add_32(&(vp->v_wrcnt), -1);
2358 		}
2359 	}
2360 	return (error);
2361 }
2362 
2363 int
2364 fop_read(
2365 	vnode_t *vp,
2366 	uio_t *uiop,
2367 	int ioflag,
2368 	cred_t *cr,
2369 	struct caller_context *ct)
2370 {
2371 	return (*(vp)->v_op->vop_read)(vp, uiop, ioflag, cr, ct);
2372 }
2373 
2374 int
2375 fop_write(
2376 	vnode_t *vp,
2377 	uio_t *uiop,
2378 	int ioflag,
2379 	cred_t *cr,
2380 	struct caller_context *ct)
2381 {
2382 	return (*(vp)->v_op->vop_write)(vp, uiop, ioflag, cr, ct);
2383 }
2384 
2385 int
2386 fop_ioctl(
2387 	vnode_t *vp,
2388 	int cmd,
2389 	intptr_t arg,
2390 	int flag,
2391 	cred_t *cr,
2392 	int *rvalp)
2393 {
2394 	return (*(vp)->v_op->vop_ioctl)(vp, cmd, arg, flag, cr, rvalp);
2395 }
2396 
2397 int
2398 fop_setfl(
2399 	vnode_t *vp,
2400 	int oflags,
2401 	int nflags,
2402 	cred_t *cr)
2403 {
2404 	return (*(vp)->v_op->vop_setfl)(vp, oflags, nflags, cr);
2405 }
2406 
2407 int
2408 fop_getattr(
2409 	vnode_t *vp,
2410 	vattr_t *vap,
2411 	int flags,
2412 	cred_t *cr)
2413 {
2414 	return (*(vp)->v_op->vop_getattr)(vp, vap, flags, cr);
2415 }
2416 
2417 int
2418 fop_setattr(
2419 	vnode_t *vp,
2420 	vattr_t *vap,
2421 	int flags,
2422 	cred_t *cr,
2423 	caller_context_t *ct)
2424 {
2425 	return (*(vp)->v_op->vop_setattr)(vp, vap, flags, cr, ct);
2426 }
2427 
2428 int
2429 fop_access(
2430 	vnode_t *vp,
2431 	int mode,
2432 	int flags,
2433 	cred_t *cr)
2434 {
2435 	return (*(vp)->v_op->vop_access)(vp, mode, flags, cr);
2436 }
2437 
2438 int
2439 fop_lookup(
2440 	vnode_t *dvp,
2441 	char *nm,
2442 	vnode_t **vpp,
2443 	pathname_t *pnp,
2444 	int flags,
2445 	vnode_t *rdir,
2446 	cred_t *cr)
2447 {
2448 	int ret;
2449 
2450 	ret = (*(dvp)->v_op->vop_lookup)(dvp, nm, vpp, pnp, flags, rdir, cr);
2451 	if (ret == 0 && *vpp && (*vpp)->v_path == NULL)
2452 		vn_setpath(rootdir, dvp, *vpp, nm, strlen(nm));
2453 
2454 	return (ret);
2455 }
2456 
2457 int
2458 fop_create(
2459 	vnode_t *dvp,
2460 	char *name,
2461 	vattr_t *vap,
2462 	vcexcl_t excl,
2463 	int mode,
2464 	vnode_t **vpp,
2465 	cred_t *cr,
2466 	int flag)
2467 {
2468 	int ret;
2469 
2470 	ret = (*(dvp)->v_op->vop_create)
2471 				(dvp, name, vap, excl, mode, vpp, cr, flag);
2472 	if (ret == 0 && *vpp && (*vpp)->v_path == NULL)
2473 		vn_setpath(rootdir, dvp, *vpp, name, strlen(name));
2474 
2475 	return (ret);
2476 }
2477 
2478 int
2479 fop_remove(
2480 	vnode_t *dvp,
2481 	char *nm,
2482 	cred_t *cr)
2483 {
2484 	return (*(dvp)->v_op->vop_remove)(dvp, nm, cr);
2485 }
2486 
2487 int
2488 fop_link(
2489 	vnode_t *tdvp,
2490 	vnode_t *svp,
2491 	char *tnm,
2492 	cred_t *cr)
2493 {
2494 	return (*(tdvp)->v_op->vop_link)(tdvp, svp, tnm, cr);
2495 }
2496 
2497 int
2498 fop_rename(
2499 	vnode_t *sdvp,
2500 	char *snm,
2501 	vnode_t *tdvp,
2502 	char *tnm,
2503 	cred_t *cr)
2504 {
2505 	return (*(sdvp)->v_op->vop_rename)(sdvp, snm, tdvp, tnm, cr);
2506 }
2507 
2508 int
2509 fop_mkdir(
2510 	vnode_t *dvp,
2511 	char *dirname,
2512 	vattr_t *vap,
2513 	vnode_t **vpp,
2514 	cred_t *cr)
2515 {
2516 	int ret;
2517 
2518 	ret = (*(dvp)->v_op->vop_mkdir)(dvp, dirname, vap, vpp, cr);
2519 	if (ret == 0 && *vpp && (*vpp)->v_path == NULL)
2520 		vn_setpath(rootdir, dvp, *vpp, dirname, strlen(dirname));
2521 
2522 	return (ret);
2523 }
2524 
2525 int
2526 fop_rmdir(
2527 	vnode_t *dvp,
2528 	char *nm,
2529 	vnode_t *cdir,
2530 	cred_t *cr)
2531 {
2532 	return (*(dvp)->v_op->vop_rmdir)(dvp, nm, cdir, cr);
2533 }
2534 
2535 int
2536 fop_readdir(
2537 	vnode_t *vp,
2538 	uio_t *uiop,
2539 	cred_t *cr,
2540 	int *eofp)
2541 {
2542 	return (*(vp)->v_op->vop_readdir)(vp, uiop, cr, eofp);
2543 }
2544 
2545 int
2546 fop_symlink(
2547 	vnode_t *dvp,
2548 	char *linkname,
2549 	vattr_t *vap,
2550 	char *target,
2551 	cred_t *cr)
2552 {
2553 	return (*(dvp)->v_op->vop_symlink) (dvp, linkname, vap, target, cr);
2554 }
2555 
2556 int
2557 fop_readlink(
2558 	vnode_t *vp,
2559 	uio_t *uiop,
2560 	cred_t *cr)
2561 {
2562 	return (*(vp)->v_op->vop_readlink)(vp, uiop, cr);
2563 }
2564 
2565 int
2566 fop_fsync(
2567 	vnode_t *vp,
2568 	int syncflag,
2569 	cred_t *cr)
2570 {
2571 	return (*(vp)->v_op->vop_fsync)(vp, syncflag, cr);
2572 }
2573 
2574 void
2575 fop_inactive(
2576 	vnode_t *vp,
2577 	cred_t *cr)
2578 {
2579 	(*(vp)->v_op->vop_inactive)(vp, cr);
2580 }
2581 
2582 int
2583 fop_fid(
2584 	vnode_t *vp,
2585 	fid_t *fidp)
2586 {
2587 	return (*(vp)->v_op->vop_fid)(vp, fidp);
2588 }
2589 
2590 int
2591 fop_rwlock(
2592 	vnode_t *vp,
2593 	int write_lock,
2594 	caller_context_t *ct)
2595 {
2596 	return ((*(vp)->v_op->vop_rwlock)(vp, write_lock, ct));
2597 }
2598 
2599 void
2600 fop_rwunlock(
2601 	vnode_t *vp,
2602 	int write_lock,
2603 	caller_context_t *ct)
2604 {
2605 	(*(vp)->v_op->vop_rwunlock)(vp, write_lock, ct);
2606 }
2607 
2608 int
2609 fop_seek(
2610 	vnode_t *vp,
2611 	offset_t ooff,
2612 	offset_t *noffp)
2613 {
2614 	return (*(vp)->v_op->vop_seek)(vp, ooff, noffp);
2615 }
2616 
2617 int
2618 fop_cmp(
2619 	vnode_t *vp1,
2620 	vnode_t *vp2)
2621 {
2622 	return (*(vp1)->v_op->vop_cmp)(vp1, vp2);
2623 }
2624 
2625 int
2626 fop_frlock(
2627 	vnode_t *vp,
2628 	int cmd,
2629 	flock64_t *bfp,
2630 	int flag,
2631 	offset_t offset,
2632 	struct flk_callback *flk_cbp,
2633 	cred_t *cr)
2634 {
2635 	return (*(vp)->v_op->vop_frlock)
2636 				(vp, cmd, bfp, flag, offset, flk_cbp, cr);
2637 }
2638 
2639 int
2640 fop_space(
2641 	vnode_t *vp,
2642 	int cmd,
2643 	flock64_t *bfp,
2644 	int flag,
2645 	offset_t offset,
2646 	cred_t *cr,
2647 	caller_context_t *ct)
2648 {
2649 	return (*(vp)->v_op->vop_space)(vp, cmd, bfp, flag, offset, cr, ct);
2650 }
2651 
2652 int
2653 fop_realvp(
2654 	vnode_t *vp,
2655 	vnode_t **vpp)
2656 {
2657 	return (*(vp)->v_op->vop_realvp)(vp, vpp);
2658 }
2659 
2660 int
2661 fop_getpage(
2662 	vnode_t *vp,
2663 	offset_t off,
2664 	size_t len,
2665 	uint_t *protp,
2666 	page_t **plarr,
2667 	size_t plsz,
2668 	struct seg *seg,
2669 	caddr_t addr,
2670 	enum seg_rw rw,
2671 	cred_t *cr)
2672 {
2673 	return (*(vp)->v_op->vop_getpage)
2674 			(vp, off, len, protp, plarr, plsz, seg, addr, rw, cr);
2675 }
2676 
2677 int
2678 fop_putpage(
2679 	vnode_t *vp,
2680 	offset_t off,
2681 	size_t len,
2682 	int flags,
2683 	cred_t *cr)
2684 {
2685 	return (*(vp)->v_op->vop_putpage)(vp, off, len, flags, cr);
2686 }
2687 
2688 int
2689 fop_map(
2690 	vnode_t *vp,
2691 	offset_t off,
2692 	struct as *as,
2693 	caddr_t *addrp,
2694 	size_t len,
2695 	uchar_t prot,
2696 	uchar_t maxprot,
2697 	uint_t flags,
2698 	cred_t *cr)
2699 {
2700 	return (*(vp)->v_op->vop_map)
2701 			(vp, off, as, addrp, len, prot, maxprot, flags, cr);
2702 }
2703 
2704 int
2705 fop_addmap(
2706 	vnode_t *vp,
2707 	offset_t off,
2708 	struct as *as,
2709 	caddr_t addr,
2710 	size_t len,
2711 	uchar_t prot,
2712 	uchar_t maxprot,
2713 	uint_t flags,
2714 	cred_t *cr)
2715 {
2716 	int error;
2717 	u_longlong_t delta;
2718 
2719 	error = (*(vp)->v_op->vop_addmap)
2720 			(vp, off, as, addr, len, prot, maxprot, flags, cr);
2721 
2722 	if ((!error) && (vp->v_type == VREG)) {
2723 		delta = (u_longlong_t)btopr(len);
2724 		/*
2725 		 * If file is declared MAP_PRIVATE, it can't be written back
2726 		 * even if open for write. Handle as read.
2727 		 */
2728 		if (flags & MAP_PRIVATE) {
2729 			atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
2730 				(int64_t)delta);
2731 		} else {
2732 			/*
2733 			 * atomic_add_64 forces the fetch of a 64 bit value to
2734 			 * be atomic on 32 bit machines
2735 			 */
2736 			if (maxprot & PROT_WRITE)
2737 				atomic_add_64((uint64_t *)(&(vp->v_mmap_write)),
2738 					(int64_t)delta);
2739 			if (maxprot & PROT_READ)
2740 				atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
2741 					(int64_t)delta);
2742 			if (maxprot & PROT_EXEC)
2743 				atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
2744 					(int64_t)delta);
2745 		}
2746 	}
2747 	return (error);
2748 }
2749 
2750 int
2751 fop_delmap(
2752 	vnode_t *vp,
2753 	offset_t off,
2754 	struct as *as,
2755 	caddr_t addr,
2756 	size_t len,
2757 	uint_t prot,
2758 	uint_t maxprot,
2759 	uint_t flags,
2760 	cred_t *cr)
2761 {
2762 	int error;
2763 	u_longlong_t delta;
2764 	error = (*(vp)->v_op->vop_delmap)
2765 		(vp, off, as, addr, len, prot, maxprot, flags, cr);
2766 
2767 	/*
2768 	 * NFS calls into delmap twice, the first time
2769 	 * it simply establishes a callback mechanism and returns EAGAIN
2770 	 * while the real work is being done upon the second invocation.
2771 	 * We have to detect this here and only decrement the counts upon
2772 	 * the second delmap request.
2773 	 */
2774 	if ((error != EAGAIN) && (vp->v_type == VREG)) {
2775 
2776 		delta = (u_longlong_t)btopr(len);
2777 
2778 		if (flags & MAP_PRIVATE) {
2779 			atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
2780 				(int64_t)(-delta));
2781 		} else {
2782 			/*
2783 			 * atomic_add_64 forces the fetch of a 64 bit value
2784 			 * to be atomic on 32 bit machines
2785 			 */
2786 			if (maxprot & PROT_WRITE)
2787 				atomic_add_64((uint64_t *)(&(vp->v_mmap_write)),
2788 					(int64_t)(-delta));
2789 			if (maxprot & PROT_READ)
2790 				atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
2791 					(int64_t)(-delta));
2792 			if (maxprot & PROT_EXEC)
2793 				atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
2794 					(int64_t)(-delta));
2795 		}
2796 	}
2797 	return (error);
2798 }
2799 
2800 
2801 int
2802 fop_poll(
2803 	vnode_t *vp,
2804 	short events,
2805 	int anyyet,
2806 	short *reventsp,
2807 	struct pollhead **phpp)
2808 {
2809 	return (*(vp)->v_op->vop_poll)(vp, events, anyyet, reventsp, phpp);
2810 }
2811 
2812 int
2813 fop_dump(
2814 	vnode_t *vp,
2815 	caddr_t addr,
2816 	int lbdn,
2817 	int dblks)
2818 {
2819 	return (*(vp)->v_op->vop_dump)(vp, addr, lbdn, dblks);
2820 }
2821 
2822 int
2823 fop_pathconf(
2824 	vnode_t *vp,
2825 	int cmd,
2826 	ulong_t *valp,
2827 	cred_t *cr)
2828 {
2829 	return (*(vp)->v_op->vop_pathconf)(vp, cmd, valp, cr);
2830 }
2831 
2832 int
2833 fop_pageio(
2834 	vnode_t *vp,
2835 	struct page *pp,
2836 	u_offset_t io_off,
2837 	size_t io_len,
2838 	int flags,
2839 	cred_t *cr)
2840 {
2841 	return (*(vp)->v_op->vop_pageio)(vp, pp, io_off, io_len, flags, cr);
2842 }
2843 
2844 int
2845 fop_dumpctl(
2846 	vnode_t *vp,
2847 	int action,
2848 	int *blkp)
2849 {
2850 	return (*(vp)->v_op->vop_dumpctl)(vp, action, blkp);
2851 }
2852 
2853 void
2854 fop_dispose(
2855 	vnode_t *vp,
2856 	page_t *pp,
2857 	int flag,
2858 	int dn,
2859 	cred_t *cr)
2860 {
2861 	(*(vp)->v_op->vop_dispose)(vp, pp, flag, dn, cr);
2862 }
2863 
2864 int
2865 fop_setsecattr(
2866 	vnode_t *vp,
2867 	vsecattr_t *vsap,
2868 	int flag,
2869 	cred_t *cr)
2870 {
2871 	return (*(vp)->v_op->vop_setsecattr) (vp, vsap, flag, cr);
2872 }
2873 
2874 int
2875 fop_getsecattr(
2876 	vnode_t *vp,
2877 	vsecattr_t *vsap,
2878 	int flag,
2879 	cred_t *cr)
2880 {
2881 	return (*(vp)->v_op->vop_getsecattr) (vp, vsap, flag, cr);
2882 }
2883 
2884 int
2885 fop_shrlock(
2886 	vnode_t *vp,
2887 	int cmd,
2888 	struct shrlock *shr,
2889 	int flag,
2890 	cred_t *cr)
2891 {
2892 	return (*(vp)->v_op->vop_shrlock)(vp, cmd, shr, flag, cr);
2893 }
2894 
2895 int
2896 fop_vnevent(vnode_t *vp, vnevent_t vnevent)
2897 {
2898 	return (*(vp)->v_op->vop_vnevent)(vp, vnevent);
2899 }
2900