xref: /titanic_50/usr/src/uts/common/fs/vnode.c (revision bdcaf82257ab2deb6b46efaaa4bc93a1a44b3885)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 /*
31  * University Copyright- Copyright (c) 1982, 1986, 1988
32  * The Regents of the University of California
33  * All Rights Reserved
34  *
35  * University Acknowledgment- Portions of this document are derived from
36  * software developed by the University of California, Berkeley, and its
37  * contributors.
38  */
39 
40 
41 #pragma ident	"%Z%%M%	%I%	%E% SMI"
42 
43 #include <sys/types.h>
44 #include <sys/param.h>
45 #include <sys/t_lock.h>
46 #include <sys/errno.h>
47 #include <sys/cred.h>
48 #include <sys/user.h>
49 #include <sys/uio.h>
50 #include <sys/file.h>
51 #include <sys/pathname.h>
52 #include <sys/vfs.h>
53 #include <sys/vnode.h>
54 #include <sys/rwstlock.h>
55 #include <sys/fem.h>
56 #include <sys/stat.h>
57 #include <sys/mode.h>
58 #include <sys/conf.h>
59 #include <sys/sysmacros.h>
60 #include <sys/cmn_err.h>
61 #include <sys/systm.h>
62 #include <sys/kmem.h>
63 #include <sys/debug.h>
64 #include <c2/audit.h>
65 #include <sys/acl.h>
66 #include <sys/nbmlock.h>
67 #include <sys/fcntl.h>
68 #include <fs/fs_subr.h>
69 
70 /* Determine if this vnode is a file that is read-only */
71 #define	ISROFILE(vp)	\
72 	((vp)->v_type != VCHR && (vp)->v_type != VBLK && \
73 	    (vp)->v_type != VFIFO && vn_is_readonly(vp))
74 
75 /*
76  * Convert stat(2) formats to vnode types and vice versa.  (Knows about
77  * numerical order of S_IFMT and vnode types.)
78  */
79 enum vtype iftovt_tab[] = {
80 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
81 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON
82 };
83 
84 ushort_t vttoif_tab[] = {
85 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO,
86 	S_IFDOOR, 0, S_IFSOCK, S_IFPORT, 0
87 };
88 
89 /*
90  * The system vnode cache.
91  */
92 
93 kmem_cache_t *vn_cache;
94 
95 
96 /*
97  * Vnode operations vector.
98  */
99 
100 static const fs_operation_trans_def_t vn_ops_table[] = {
101 	VOPNAME_OPEN, offsetof(struct vnodeops, vop_open),
102 	    fs_nosys, fs_nosys,
103 
104 	VOPNAME_CLOSE, offsetof(struct vnodeops, vop_close),
105 	    fs_nosys, fs_nosys,
106 
107 	VOPNAME_READ, offsetof(struct vnodeops, vop_read),
108 	    fs_nosys, fs_nosys,
109 
110 	VOPNAME_WRITE, offsetof(struct vnodeops, vop_write),
111 	    fs_nosys, fs_nosys,
112 
113 	VOPNAME_IOCTL, offsetof(struct vnodeops, vop_ioctl),
114 	    fs_nosys, fs_nosys,
115 
116 	VOPNAME_SETFL, offsetof(struct vnodeops, vop_setfl),
117 	    fs_setfl, fs_nosys,
118 
119 	VOPNAME_GETATTR, offsetof(struct vnodeops, vop_getattr),
120 	    fs_nosys, fs_nosys,
121 
122 	VOPNAME_SETATTR, offsetof(struct vnodeops, vop_setattr),
123 	    fs_nosys, fs_nosys,
124 
125 	VOPNAME_ACCESS, offsetof(struct vnodeops, vop_access),
126 	    fs_nosys, fs_nosys,
127 
128 	VOPNAME_LOOKUP, offsetof(struct vnodeops, vop_lookup),
129 	    fs_nosys, fs_nosys,
130 
131 	VOPNAME_CREATE, offsetof(struct vnodeops, vop_create),
132 	    fs_nosys, fs_nosys,
133 
134 	VOPNAME_REMOVE, offsetof(struct vnodeops, vop_remove),
135 	    fs_nosys, fs_nosys,
136 
137 	VOPNAME_LINK, offsetof(struct vnodeops, vop_link),
138 	    fs_nosys, fs_nosys,
139 
140 	VOPNAME_RENAME, offsetof(struct vnodeops, vop_rename),
141 	    fs_nosys, fs_nosys,
142 
143 	VOPNAME_MKDIR, offsetof(struct vnodeops, vop_mkdir),
144 	    fs_nosys, fs_nosys,
145 
146 	VOPNAME_RMDIR, offsetof(struct vnodeops, vop_rmdir),
147 	    fs_nosys, fs_nosys,
148 
149 	VOPNAME_READDIR, offsetof(struct vnodeops, vop_readdir),
150 	    fs_nosys, fs_nosys,
151 
152 	VOPNAME_SYMLINK, offsetof(struct vnodeops, vop_symlink),
153 	    fs_nosys, fs_nosys,
154 
155 	VOPNAME_READLINK, offsetof(struct vnodeops, vop_readlink),
156 	    fs_nosys, fs_nosys,
157 
158 	VOPNAME_FSYNC, offsetof(struct vnodeops, vop_fsync),
159 	    fs_nosys, fs_nosys,
160 
161 	VOPNAME_INACTIVE, offsetof(struct vnodeops, vop_inactive),
162 	    fs_nosys, fs_nosys,
163 
164 	VOPNAME_FID, offsetof(struct vnodeops, vop_fid),
165 	    fs_nosys, fs_nosys,
166 
167 	VOPNAME_RWLOCK, offsetof(struct vnodeops, vop_rwlock),
168 	    fs_rwlock, fs_rwlock,
169 
170 	VOPNAME_RWUNLOCK, offsetof(struct vnodeops, vop_rwunlock),
171 	    (fs_generic_func_p) fs_rwunlock,
172 	    (fs_generic_func_p) fs_rwunlock,	/* no errors allowed */
173 
174 	VOPNAME_SEEK, offsetof(struct vnodeops, vop_seek),
175 	    fs_nosys, fs_nosys,
176 
177 	VOPNAME_CMP, offsetof(struct vnodeops, vop_cmp),
178 	    fs_cmp, fs_cmp,		/* no errors allowed */
179 
180 	VOPNAME_FRLOCK, offsetof(struct vnodeops, vop_frlock),
181 	    fs_frlock, fs_nosys,
182 
183 	VOPNAME_SPACE, offsetof(struct vnodeops, vop_space),
184 	    fs_nosys, fs_nosys,
185 
186 	VOPNAME_REALVP, offsetof(struct vnodeops, vop_realvp),
187 	    fs_nosys, fs_nosys,
188 
189 	VOPNAME_GETPAGE, offsetof(struct vnodeops, vop_getpage),
190 	    fs_nosys, fs_nosys,
191 
192 	VOPNAME_PUTPAGE, offsetof(struct vnodeops, vop_putpage),
193 	    fs_nosys, fs_nosys,
194 
195 	VOPNAME_MAP, offsetof(struct vnodeops, vop_map),
196 	    (fs_generic_func_p) fs_nosys_map,
197 	    (fs_generic_func_p) fs_nosys_map,
198 
199 	VOPNAME_ADDMAP, offsetof(struct vnodeops, vop_addmap),
200 	    (fs_generic_func_p) fs_nosys_addmap,
201 	    (fs_generic_func_p) fs_nosys_addmap,
202 
203 	VOPNAME_DELMAP, offsetof(struct vnodeops, vop_delmap),
204 	    fs_nosys, fs_nosys,
205 
206 	VOPNAME_POLL, offsetof(struct vnodeops, vop_poll),
207 	    (fs_generic_func_p) fs_poll, (fs_generic_func_p) fs_nosys_poll,
208 
209 	VOPNAME_DUMP, offsetof(struct vnodeops, vop_dump),
210 	    fs_nosys, fs_nosys,
211 
212 	VOPNAME_PATHCONF, offsetof(struct vnodeops, vop_pathconf),
213 	    fs_pathconf, fs_nosys,
214 
215 	VOPNAME_PAGEIO, offsetof(struct vnodeops, vop_pageio),
216 	    fs_nosys, fs_nosys,
217 
218 	VOPNAME_DUMPCTL, offsetof(struct vnodeops, vop_dumpctl),
219 	    fs_nosys, fs_nosys,
220 
221 	VOPNAME_DISPOSE, offsetof(struct vnodeops, vop_dispose),
222 	    (fs_generic_func_p) fs_dispose,
223 	    (fs_generic_func_p) fs_nodispose,
224 
225 	VOPNAME_SETSECATTR, offsetof(struct vnodeops, vop_setsecattr),
226 	    fs_nosys, fs_nosys,
227 
228 	VOPNAME_GETSECATTR, offsetof(struct vnodeops, vop_getsecattr),
229 	    fs_fab_acl, fs_nosys,
230 
231 	VOPNAME_SHRLOCK, offsetof(struct vnodeops, vop_shrlock),
232 	    fs_shrlock, fs_nosys,
233 
234 	VOPNAME_VNEVENT, offsetof(struct vnodeops, vop_vnevent),
235 	    (fs_generic_func_p) fs_vnevent_nosupport,
236 	    (fs_generic_func_p) fs_vnevent_nosupport,
237 
238 	NULL, 0, NULL, NULL
239 };
240 
241 
242 /*
243  * Read or write a vnode.  Called from kernel code.
244  */
245 int
246 vn_rdwr(
247 	enum uio_rw rw,
248 	struct vnode *vp,
249 	caddr_t base,
250 	ssize_t len,
251 	offset_t offset,
252 	enum uio_seg seg,
253 	int ioflag,
254 	rlim64_t ulimit,	/* meaningful only if rw is UIO_WRITE */
255 	cred_t *cr,
256 	ssize_t *residp)
257 {
258 	struct uio uio;
259 	struct iovec iov;
260 	int error;
261 	int in_crit = 0;
262 
263 	if (rw == UIO_WRITE && ISROFILE(vp))
264 		return (EROFS);
265 
266 	if (len < 0)
267 		return (EIO);
268 
269 	iov.iov_base = base;
270 	iov.iov_len = len;
271 	uio.uio_iov = &iov;
272 	uio.uio_iovcnt = 1;
273 	uio.uio_loffset = offset;
274 	uio.uio_segflg = (short)seg;
275 	uio.uio_resid = len;
276 	uio.uio_llimit = ulimit;
277 
278 	/*
279 	 * We have to enter the critical region before calling VOP_RWLOCK
280 	 * to avoid a deadlock with ufs.
281 	 */
282 	if (nbl_need_check(vp)) {
283 		int svmand;
284 
285 		nbl_start_crit(vp, RW_READER);
286 		in_crit = 1;
287 		error = nbl_svmand(vp, cr, &svmand);
288 		if (error != 0)
289 			goto done;
290 		if (nbl_conflict(vp, rw == UIO_WRITE ? NBL_WRITE : NBL_READ,
291 		    uio.uio_offset, uio.uio_resid, svmand)) {
292 			error = EACCES;
293 			goto done;
294 		}
295 	}
296 
297 	(void) VOP_RWLOCK(vp,
298 		rw == UIO_WRITE ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE, NULL);
299 	if (rw == UIO_WRITE) {
300 		uio.uio_fmode = FWRITE;
301 		uio.uio_extflg = UIO_COPY_DEFAULT;
302 		error = VOP_WRITE(vp, &uio, ioflag, cr, NULL);
303 	} else {
304 		uio.uio_fmode = FREAD;
305 		uio.uio_extflg = UIO_COPY_CACHED;
306 		error = VOP_READ(vp, &uio, ioflag, cr, NULL);
307 	}
308 	VOP_RWUNLOCK(vp, rw == UIO_WRITE ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE,
309 									NULL);
310 	if (residp)
311 		*residp = uio.uio_resid;
312 	else if (uio.uio_resid)
313 		error = EIO;
314 
315 done:
316 	if (in_crit)
317 		nbl_end_crit(vp);
318 	return (error);
319 }
320 
321 /*
322  * Release a vnode.  Call VOP_INACTIVE on last reference or
323  * decrement reference count.
324  *
325  * To avoid race conditions, the v_count is left at 1 for
326  * the call to VOP_INACTIVE. This prevents another thread
327  * from reclaiming and releasing the vnode *before* the
328  * VOP_INACTIVE routine has a chance to destroy the vnode.
329  * We can't have more than 1 thread calling VOP_INACTIVE
330  * on a vnode.
331  */
332 void
333 vn_rele(vnode_t *vp)
334 {
335 	if (vp->v_count == 0)
336 		cmn_err(CE_PANIC, "vn_rele: vnode ref count 0");
337 	mutex_enter(&vp->v_lock);
338 	if (vp->v_count == 1) {
339 		mutex_exit(&vp->v_lock);
340 		VOP_INACTIVE(vp, CRED());
341 	} else {
342 		vp->v_count--;
343 		mutex_exit(&vp->v_lock);
344 	}
345 }
346 
347 /*
348  * Like vn_rele() except that it clears v_stream under v_lock.
349  * This is used by sockfs when it dismantels the association between
350  * the sockfs node and the vnode in the underlaying file system.
351  * v_lock has to be held to prevent a thread coming through the lookupname
352  * path from accessing a stream head that is going away.
353  */
354 void
355 vn_rele_stream(vnode_t *vp)
356 {
357 	if (vp->v_count == 0)
358 		cmn_err(CE_PANIC, "vn_rele: vnode ref count 0");
359 	mutex_enter(&vp->v_lock);
360 	vp->v_stream = NULL;
361 	if (vp->v_count == 1) {
362 		mutex_exit(&vp->v_lock);
363 		VOP_INACTIVE(vp, CRED());
364 	} else {
365 		vp->v_count--;
366 		mutex_exit(&vp->v_lock);
367 	}
368 }
369 
370 int
371 vn_open(
372 	char *pnamep,
373 	enum uio_seg seg,
374 	int filemode,
375 	int createmode,
376 	struct vnode **vpp,
377 	enum create crwhy,
378 	mode_t umask)
379 {
380 	return (vn_openat(pnamep, seg, filemode,
381 			createmode, vpp, crwhy, umask, NULL));
382 }
383 
384 
385 /*
386  * Open/create a vnode.
387  * This may be callable by the kernel, the only known use
388  * of user context being that the current user credentials
389  * are used for permissions.  crwhy is defined iff filemode & FCREAT.
390  */
391 int
392 vn_openat(
393 	char *pnamep,
394 	enum uio_seg seg,
395 	int filemode,
396 	int createmode,
397 	struct vnode **vpp,
398 	enum create crwhy,
399 	mode_t umask,
400 	struct vnode *startvp)
401 {
402 	struct vnode *vp;
403 	int mode;
404 	int error;
405 	int in_crit = 0;
406 	struct vattr vattr;
407 	enum symfollow follow;
408 
409 	mode = 0;
410 	if (filemode & FREAD)
411 		mode |= VREAD;
412 	if (filemode & (FWRITE|FTRUNC))
413 		mode |= VWRITE;
414 
415 	/* symlink interpretation */
416 	if (filemode & FNOFOLLOW)
417 		follow = NO_FOLLOW;
418 	else
419 		follow = FOLLOW;
420 
421 top:
422 	if (filemode & FCREAT) {
423 		enum vcexcl excl;
424 
425 		/*
426 		 * Wish to create a file.
427 		 */
428 		vattr.va_type = VREG;
429 		vattr.va_mode = createmode;
430 		vattr.va_mask = AT_TYPE|AT_MODE;
431 		if (filemode & FTRUNC) {
432 			vattr.va_size = 0;
433 			vattr.va_mask |= AT_SIZE;
434 		}
435 		if (filemode & FEXCL)
436 			excl = EXCL;
437 		else
438 			excl = NONEXCL;
439 
440 		if (error =
441 		    vn_createat(pnamep, seg, &vattr, excl, mode, &vp, crwhy,
442 					(filemode & ~(FTRUNC|FEXCL)),
443 						umask, startvp))
444 			return (error);
445 	} else {
446 		/*
447 		 * Wish to open a file.  Just look it up.
448 		 */
449 		if (error = lookupnameat(pnamep, seg, follow,
450 		    NULLVPP, &vp, startvp)) {
451 			if (error == ESTALE)
452 				goto top;
453 			return (error);
454 		}
455 
456 		/*
457 		 * Get the attributes to check whether file is large.
458 		 * We do this only if the FOFFMAX flag is not set and
459 		 * only for regular files.
460 		 */
461 
462 		if (!(filemode & FOFFMAX) && (vp->v_type == VREG)) {
463 			vattr.va_mask = AT_SIZE;
464 			if ((error = VOP_GETATTR(vp, &vattr, 0, CRED()))) {
465 				goto out;
466 			}
467 			if (vattr.va_size > (u_offset_t)MAXOFF32_T) {
468 				/*
469 				 * Large File API - regular open fails
470 				 * if FOFFMAX flag is set in file mode
471 				 */
472 				error = EOVERFLOW;
473 				goto out;
474 			}
475 		}
476 		/*
477 		 * Can't write directories, active texts, or
478 		 * read-only filesystems.  Can't truncate files
479 		 * on which mandatory locking is in effect.
480 		 */
481 		if (filemode & (FWRITE|FTRUNC)) {
482 			/*
483 			 * Allow writable directory if VDIROPEN flag is set.
484 			 */
485 			if (vp->v_type == VDIR && !(vp->v_flag & VDIROPEN)) {
486 				error = EISDIR;
487 				goto out;
488 			}
489 			if (ISROFILE(vp)) {
490 				error = EROFS;
491 				goto out;
492 			}
493 			/*
494 			 * Can't truncate files on which mandatory locking
495 			 * or non-blocking mandatory locking is in effect.
496 			 */
497 			if (filemode & FTRUNC) {
498 				vnode_t *rvp;
499 
500 				if (VOP_REALVP(vp, &rvp) != 0)
501 					rvp = vp;
502 				if (nbl_need_check(vp)) {
503 					nbl_start_crit(vp, RW_READER);
504 					in_crit = 1;
505 					vattr.va_mask = AT_MODE|AT_SIZE;
506 					if ((error = VOP_GETATTR(vp, &vattr, 0,
507 					    CRED())) == 0) {
508 						if (rvp->v_filocks != NULL)
509 							if (MANDLOCK(vp,
510 							    vattr.va_mode))
511 								error = EAGAIN;
512 						if (!error) {
513 							if (nbl_conflict(vp,
514 							    NBL_WRITE, 0,
515 							    vattr.va_size, 0))
516 								error = EACCES;
517 						}
518 					}
519 				} else if (rvp->v_filocks != NULL) {
520 					vattr.va_mask = AT_MODE;
521 					if ((error = VOP_GETATTR(vp, &vattr,
522 					    0, CRED())) == 0 && MANDLOCK(vp,
523 					    vattr.va_mode))
524 						error = EAGAIN;
525 				}
526 			}
527 			if (error)
528 				goto out;
529 		}
530 		/*
531 		 * Check permissions.
532 		 */
533 		if (error = VOP_ACCESS(vp, mode, 0, CRED()))
534 			goto out;
535 	}
536 
537 	/*
538 	 * Do remaining checks for FNOFOLLOW and FNOLINKS.
539 	 */
540 	if ((filemode & FNOFOLLOW) && vp->v_type == VLNK) {
541 		error = EINVAL;
542 		goto out;
543 	}
544 	if (filemode & FNOLINKS) {
545 		vattr.va_mask = AT_NLINK;
546 		if ((error = VOP_GETATTR(vp, &vattr, 0, CRED()))) {
547 			goto out;
548 		}
549 		if (vattr.va_nlink != 1) {
550 			error = EMLINK;
551 			goto out;
552 		}
553 	}
554 
555 	/*
556 	 * Opening a socket corresponding to the AF_UNIX pathname
557 	 * in the filesystem name space is not supported.
558 	 * However, VSOCK nodes in namefs are supported in order
559 	 * to make fattach work for sockets.
560 	 *
561 	 * XXX This uses VOP_REALVP to distinguish between
562 	 * an unopened namefs node (where VOP_REALVP returns a
563 	 * different VSOCK vnode) and a VSOCK created by vn_create
564 	 * in some file system (where VOP_REALVP would never return
565 	 * a different vnode).
566 	 */
567 	if (vp->v_type == VSOCK) {
568 		struct vnode *nvp;
569 
570 		error = VOP_REALVP(vp, &nvp);
571 		if (error != 0 || nvp == NULL || nvp == vp ||
572 		    nvp->v_type != VSOCK) {
573 			error = EOPNOTSUPP;
574 			goto out;
575 		}
576 	}
577 	/*
578 	 * Do opening protocol.
579 	 */
580 	error = VOP_OPEN(&vp, filemode, CRED());
581 	/*
582 	 * Truncate if required.
583 	 */
584 	if (error == 0 && (filemode & FTRUNC) && !(filemode & FCREAT)) {
585 		vattr.va_size = 0;
586 		vattr.va_mask = AT_SIZE;
587 		if ((error = VOP_SETATTR(vp, &vattr, 0, CRED(), NULL)) != 0)
588 			(void) VOP_CLOSE(vp, filemode, 1, (offset_t)0, CRED());
589 	}
590 out:
591 	ASSERT(vp->v_count > 0);
592 
593 	if (in_crit) {
594 		nbl_end_crit(vp);
595 		in_crit = 0;
596 	}
597 	if (error) {
598 		/*
599 		 * The following clause was added to handle a problem
600 		 * with NFS consistency.  It is possible that a lookup
601 		 * of the file to be opened succeeded, but the file
602 		 * itself doesn't actually exist on the server.  This
603 		 * is chiefly due to the DNLC containing an entry for
604 		 * the file which has been removed on the server.  In
605 		 * this case, we just start over.  If there was some
606 		 * other cause for the ESTALE error, then the lookup
607 		 * of the file will fail and the error will be returned
608 		 * above instead of looping around from here.
609 		 */
610 		VN_RELE(vp);
611 		if (error == ESTALE)
612 			goto top;
613 	} else
614 		*vpp = vp;
615 	return (error);
616 }
617 
618 int
619 vn_create(
620 	char *pnamep,
621 	enum uio_seg seg,
622 	struct vattr *vap,
623 	enum vcexcl excl,
624 	int mode,
625 	struct vnode **vpp,
626 	enum create why,
627 	int flag,
628 	mode_t umask)
629 {
630 	return (vn_createat(pnamep, seg, vap, excl, mode, vpp,
631 			why, flag, umask, NULL));
632 }
633 
634 /*
635  * Create a vnode (makenode).
636  */
637 int
638 vn_createat(
639 	char *pnamep,
640 	enum uio_seg seg,
641 	struct vattr *vap,
642 	enum vcexcl excl,
643 	int mode,
644 	struct vnode **vpp,
645 	enum create why,
646 	int flag,
647 	mode_t umask,
648 	struct vnode *startvp)
649 {
650 	struct vnode *dvp;	/* ptr to parent dir vnode */
651 	struct vnode *vp = NULL;
652 	struct pathname pn;
653 	int error;
654 	int in_crit = 0;
655 	struct vattr vattr;
656 	enum symfollow follow;
657 
658 	ASSERT((vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
659 
660 	/* symlink interpretation */
661 	if ((flag & FNOFOLLOW) || excl == EXCL)
662 		follow = NO_FOLLOW;
663 	else
664 		follow = FOLLOW;
665 	flag &= ~(FNOFOLLOW|FNOLINKS);
666 
667 top:
668 	/*
669 	 * Lookup directory.
670 	 * If new object is a file, call lower level to create it.
671 	 * Note that it is up to the lower level to enforce exclusive
672 	 * creation, if the file is already there.
673 	 * This allows the lower level to do whatever
674 	 * locking or protocol that is needed to prevent races.
675 	 * If the new object is directory call lower level to make
676 	 * the new directory, with "." and "..".
677 	 */
678 	if (error = pn_get(pnamep, seg, &pn))
679 		return (error);
680 #ifdef  C2_AUDIT
681 	if (audit_active)
682 		audit_vncreate_start();
683 #endif /* C2_AUDIT */
684 	dvp = NULL;
685 	*vpp = NULL;
686 	/*
687 	 * lookup will find the parent directory for the vnode.
688 	 * When it is done the pn holds the name of the entry
689 	 * in the directory.
690 	 * If this is a non-exclusive create we also find the node itself.
691 	 */
692 	error = lookuppnat(&pn, NULL, follow, &dvp,
693 	    (excl == EXCL) ? NULLVPP : vpp, startvp);
694 	if (error) {
695 		pn_free(&pn);
696 		if (error == ESTALE)
697 			goto top;
698 		if (why == CRMKDIR && error == EINVAL)
699 			error = EEXIST;		/* SVID */
700 		return (error);
701 	}
702 
703 	if (why != CRMKNOD)
704 		vap->va_mode &= ~VSVTX;
705 
706 	/*
707 	 * If default ACLs are defined for the directory don't apply the
708 	 * umask if umask is passed.
709 	 */
710 
711 	if (umask) {
712 
713 		vsecattr_t vsec;
714 
715 		vsec.vsa_aclcnt = 0;
716 		vsec.vsa_aclentp = NULL;
717 		vsec.vsa_dfaclcnt = 0;
718 		vsec.vsa_dfaclentp = NULL;
719 		vsec.vsa_mask = VSA_DFACLCNT;
720 		if (error = VOP_GETSECATTR(dvp, &vsec, 0, CRED())) {
721 			if (*vpp != NULL)
722 				VN_RELE(*vpp);
723 			goto out;
724 		}
725 
726 		/*
727 		 * Apply the umask if no default ACLs.
728 		 */
729 		if (vsec.vsa_dfaclcnt == 0)
730 			vap->va_mode &= ~umask;
731 
732 		/*
733 		 * VOP_GETSECATTR() may have allocated memory for ACLs we
734 		 * didn't request, so double-check and free it if necessary.
735 		 */
736 		if (vsec.vsa_aclcnt && vsec.vsa_aclentp != NULL)
737 			kmem_free((caddr_t)vsec.vsa_aclentp,
738 				vsec.vsa_aclcnt * sizeof (aclent_t));
739 		if (vsec.vsa_dfaclcnt && vsec.vsa_dfaclentp != NULL)
740 			kmem_free((caddr_t)vsec.vsa_dfaclentp,
741 				vsec.vsa_dfaclcnt * sizeof (aclent_t));
742 	}
743 
744 	/*
745 	 * In general we want to generate EROFS if the file system is
746 	 * readonly.  However, POSIX (IEEE Std. 1003.1) section 5.3.1
747 	 * documents the open system call, and it says that O_CREAT has no
748 	 * effect if the file already exists.  Bug 1119649 states
749 	 * that open(path, O_CREAT, ...) fails when attempting to open an
750 	 * existing file on a read only file system.  Thus, the first part
751 	 * of the following if statement has 3 checks:
752 	 *	if the file exists &&
753 	 *		it is being open with write access &&
754 	 *		the file system is read only
755 	 *	then generate EROFS
756 	 */
757 	if ((*vpp != NULL && (mode & VWRITE) && ISROFILE(*vpp)) ||
758 	    (*vpp == NULL && dvp->v_vfsp->vfs_flag & VFS_RDONLY)) {
759 		if (*vpp)
760 			VN_RELE(*vpp);
761 		error = EROFS;
762 	} else if (excl == NONEXCL && *vpp != NULL) {
763 		vnode_t *rvp;
764 
765 		/*
766 		 * File already exists.  If a mandatory lock has been
767 		 * applied, return error.
768 		 */
769 		vp = *vpp;
770 		if (VOP_REALVP(vp, &rvp) != 0)
771 			rvp = vp;
772 		if ((vap->va_mask & AT_SIZE) && nbl_need_check(vp)) {
773 			nbl_start_crit(vp, RW_READER);
774 			in_crit = 1;
775 		}
776 		if (rvp->v_filocks != NULL || rvp->v_shrlocks != NULL) {
777 			vattr.va_mask = AT_MODE|AT_SIZE;
778 			if (error = VOP_GETATTR(vp, &vattr, 0, CRED())) {
779 				goto out;
780 			}
781 			if (MANDLOCK(vp, vattr.va_mode)) {
782 				error = EAGAIN;
783 				goto out;
784 			}
785 			/*
786 			 * File cannot be truncated if non-blocking mandatory
787 			 * locks are currently on the file.
788 			 */
789 			if ((vap->va_mask & AT_SIZE) && in_crit) {
790 				u_offset_t offset;
791 				ssize_t length;
792 
793 				offset = vap->va_size > vattr.va_size ?
794 						vattr.va_size : vap->va_size;
795 				length = vap->va_size > vattr.va_size ?
796 						vap->va_size - vattr.va_size :
797 						vattr.va_size - vap->va_size;
798 				if (nbl_conflict(vp, NBL_WRITE, offset,
799 						length, 0)) {
800 					error = EACCES;
801 					goto out;
802 				}
803 			}
804 		}
805 
806 		/*
807 		 * If the file is the root of a VFS, we've crossed a
808 		 * mount point and the "containing" directory that we
809 		 * acquired above (dvp) is irrelevant because it's in
810 		 * a different file system.  We apply VOP_CREATE to the
811 		 * target itself instead of to the containing directory
812 		 * and supply a null path name to indicate (conventionally)
813 		 * the node itself as the "component" of interest.
814 		 *
815 		 * The intercession of the file system is necessary to
816 		 * ensure that the appropriate permission checks are
817 		 * done.
818 		 */
819 		if (vp->v_flag & VROOT) {
820 			ASSERT(why != CRMKDIR);
821 			error =
822 			    VOP_CREATE(vp, "", vap, excl, mode, vpp, CRED(),
823 				    flag);
824 			/*
825 			 * If the create succeeded, it will have created
826 			 * a new reference to the vnode.  Give up the
827 			 * original reference.  The assertion should not
828 			 * get triggered because NBMAND locks only apply to
829 			 * VREG files.  And if in_crit is non-zero for some
830 			 * reason, detect that here, rather than when we
831 			 * deference a null vp.
832 			 */
833 			ASSERT(in_crit == 0);
834 			VN_RELE(vp);
835 			vp = NULL;
836 			goto out;
837 		}
838 
839 		/*
840 		 * Large File API - non-large open (FOFFMAX flag not set)
841 		 * of regular file fails if the file size exceeds MAXOFF32_T.
842 		 */
843 		if (why != CRMKDIR &&
844 		    !(flag & FOFFMAX) &&
845 		    (vp->v_type == VREG)) {
846 			vattr.va_mask = AT_SIZE;
847 			if ((error = VOP_GETATTR(vp, &vattr, 0, CRED()))) {
848 				goto out;
849 			}
850 			if ((vattr.va_size > (u_offset_t)MAXOFF32_T)) {
851 				error = EOVERFLOW;
852 				goto out;
853 			}
854 		}
855 	}
856 
857 	if (error == 0) {
858 		/*
859 		 * Call mkdir() if specified, otherwise create().
860 		 */
861 		int must_be_dir = pn_fixslash(&pn);	/* trailing '/'? */
862 
863 		if (why == CRMKDIR)
864 			error = VOP_MKDIR(dvp, pn.pn_path, vap, vpp, CRED());
865 		else if (!must_be_dir)
866 			error = VOP_CREATE(dvp, pn.pn_path, vap,
867 			    excl, mode, vpp, CRED(), flag);
868 		else
869 			error = ENOTDIR;
870 	}
871 
872 out:
873 
874 #ifdef C2_AUDIT
875 	if (audit_active)
876 		audit_vncreate_finish(*vpp, error);
877 #endif  /* C2_AUDIT */
878 	if (in_crit) {
879 		nbl_end_crit(vp);
880 		in_crit = 0;
881 	}
882 	if (vp != NULL) {
883 		VN_RELE(vp);
884 		vp = NULL;
885 	}
886 	pn_free(&pn);
887 	VN_RELE(dvp);
888 	/*
889 	 * The following clause was added to handle a problem
890 	 * with NFS consistency.  It is possible that a lookup
891 	 * of the file to be created succeeded, but the file
892 	 * itself doesn't actually exist on the server.  This
893 	 * is chiefly due to the DNLC containing an entry for
894 	 * the file which has been removed on the server.  In
895 	 * this case, we just start over.  If there was some
896 	 * other cause for the ESTALE error, then the lookup
897 	 * of the file will fail and the error will be returned
898 	 * above instead of looping around from here.
899 	 */
900 	if (error == ESTALE)
901 		goto top;
902 	return (error);
903 }
904 
905 int
906 vn_link(char *from, char *to, enum uio_seg seg)
907 {
908 	struct vnode *fvp;		/* from vnode ptr */
909 	struct vnode *tdvp;		/* to directory vnode ptr */
910 	struct pathname pn;
911 	int error;
912 	struct vattr vattr;
913 	dev_t fsid;
914 
915 top:
916 	fvp = tdvp = NULL;
917 	if (error = pn_get(to, seg, &pn))
918 		return (error);
919 	if (error = lookupname(from, seg, NO_FOLLOW, NULLVPP, &fvp))
920 		goto out;
921 	if (error = lookuppn(&pn, NULL, NO_FOLLOW, &tdvp, NULLVPP))
922 		goto out;
923 	/*
924 	 * Make sure both source vnode and target directory vnode are
925 	 * in the same vfs and that it is writeable.
926 	 */
927 	vattr.va_mask = AT_FSID;
928 	if (error = VOP_GETATTR(fvp, &vattr, 0, CRED()))
929 		goto out;
930 	fsid = vattr.va_fsid;
931 	vattr.va_mask = AT_FSID;
932 	if (error = VOP_GETATTR(tdvp, &vattr, 0, CRED()))
933 		goto out;
934 	if (fsid != vattr.va_fsid) {
935 		error = EXDEV;
936 		goto out;
937 	}
938 	if (tdvp->v_vfsp->vfs_flag & VFS_RDONLY) {
939 		error = EROFS;
940 		goto out;
941 	}
942 	/*
943 	 * Do the link.
944 	 */
945 	(void) pn_fixslash(&pn);
946 	error = VOP_LINK(tdvp, fvp, pn.pn_path, CRED());
947 out:
948 	pn_free(&pn);
949 	if (fvp)
950 		VN_RELE(fvp);
951 	if (tdvp)
952 		VN_RELE(tdvp);
953 	if (error == ESTALE)
954 		goto top;
955 	return (error);
956 }
957 
958 int
959 vn_rename(char *from, char *to, enum uio_seg seg)
960 {
961 	return (vn_renameat(NULL, from, NULL, to, seg));
962 }
963 
964 int
965 vn_renameat(vnode_t *fdvp, char *fname, vnode_t *tdvp,
966 		char *tname, enum uio_seg seg)
967 {
968 	int error;
969 	struct vattr vattr;
970 	struct pathname fpn;		/* from pathname */
971 	struct pathname tpn;		/* to pathname */
972 	dev_t fsid;
973 	int in_crit = 0;
974 	vnode_t *fromvp, *fvp;
975 	vnode_t *tovp;
976 
977 top:
978 	fvp = fromvp = tovp = NULL;
979 	/*
980 	 * Get to and from pathnames.
981 	 */
982 	if (error = pn_get(fname, seg, &fpn))
983 		return (error);
984 	if (error = pn_get(tname, seg, &tpn)) {
985 		pn_free(&fpn);
986 		return (error);
987 	}
988 
989 	/*
990 	 * First we need to resolve the correct directories
991 	 * The passed in directories may only be a starting point,
992 	 * but we need the real directories the file(s) live in.
993 	 * For example the fname may be something like usr/lib/sparc
994 	 * and we were passed in the / directory, but we need to
995 	 * use the lib directory for the rename.
996 	 */
997 
998 #ifdef  C2_AUDIT
999 	if (audit_active)
1000 		audit_setfsat_path(1);
1001 #endif /* C2_AUDIT */
1002 	/*
1003 	 * Lookup to and from directories.
1004 	 */
1005 	if (error = lookuppnat(&fpn, NULL, NO_FOLLOW, &fromvp, &fvp, fdvp)) {
1006 		goto out;
1007 	}
1008 
1009 	/*
1010 	 * Make sure there is an entry.
1011 	 */
1012 	if (fvp == NULL) {
1013 		error = ENOENT;
1014 		goto out;
1015 	}
1016 
1017 #ifdef  C2_AUDIT
1018 	if (audit_active)
1019 		audit_setfsat_path(3);
1020 #endif /* C2_AUDIT */
1021 	if (error = lookuppnat(&tpn, NULL, NO_FOLLOW, &tovp, NULLVPP, tdvp)) {
1022 		goto out;
1023 	}
1024 
1025 	/*
1026 	 * Make sure both the from vnode directory and the to directory
1027 	 * are in the same vfs and the to directory is writable.
1028 	 * We check fsid's, not vfs pointers, so loopback fs works.
1029 	 */
1030 	if (fromvp != tovp) {
1031 		vattr.va_mask = AT_FSID;
1032 		if (error = VOP_GETATTR(fromvp, &vattr, 0, CRED()))
1033 			goto out;
1034 		fsid = vattr.va_fsid;
1035 		vattr.va_mask = AT_FSID;
1036 		if (error = VOP_GETATTR(tovp, &vattr, 0, CRED()))
1037 			goto out;
1038 		if (fsid != vattr.va_fsid) {
1039 			error = EXDEV;
1040 			goto out;
1041 		}
1042 	}
1043 
1044 	if (tovp->v_vfsp->vfs_flag & VFS_RDONLY) {
1045 		error = EROFS;
1046 		goto out;
1047 	}
1048 
1049 	if (nbl_need_check(fvp)) {
1050 		nbl_start_crit(fvp, RW_READER);
1051 		in_crit = 1;
1052 		if (nbl_conflict(fvp, NBL_RENAME, 0, 0, 0)) {
1053 			error = EACCES;
1054 			goto out;
1055 		}
1056 	}
1057 
1058 	/*
1059 	 * Do the rename.
1060 	 */
1061 	(void) pn_fixslash(&tpn);
1062 	error = VOP_RENAME(fromvp, fpn.pn_path, tovp, tpn.pn_path, CRED());
1063 
1064 out:
1065 	pn_free(&fpn);
1066 	pn_free(&tpn);
1067 	if (in_crit) {
1068 		nbl_end_crit(fvp);
1069 		in_crit = 0;
1070 	}
1071 	if (fromvp)
1072 		VN_RELE(fromvp);
1073 	if (tovp)
1074 		VN_RELE(tovp);
1075 	if (fvp)
1076 		VN_RELE(fvp);
1077 	if (error == ESTALE)
1078 		goto top;
1079 	return (error);
1080 }
1081 
1082 /*
1083  * Remove a file or directory.
1084  */
1085 int
1086 vn_remove(char *fnamep, enum uio_seg seg, enum rm dirflag)
1087 {
1088 	return (vn_removeat(NULL, fnamep, seg, dirflag));
1089 }
1090 
1091 int
1092 vn_removeat(vnode_t *startvp, char *fnamep, enum uio_seg seg, enum rm dirflag)
1093 {
1094 	struct vnode *vp;		/* entry vnode */
1095 	struct vnode *dvp;		/* ptr to parent dir vnode */
1096 	struct vnode *coveredvp;
1097 	struct pathname pn;		/* name of entry */
1098 	enum vtype vtype;
1099 	int error;
1100 	struct vfs *vfsp;
1101 	struct vfs *dvfsp;	/* ptr to parent dir vfs */
1102 	int in_crit = 0;
1103 
1104 top:
1105 	if (error = pn_get(fnamep, seg, &pn))
1106 		return (error);
1107 	dvp = vp = NULL;
1108 	if (error = lookuppnat(&pn, NULL, NO_FOLLOW, &dvp, &vp, startvp)) {
1109 		pn_free(&pn);
1110 		if (error == ESTALE)
1111 			goto top;
1112 		return (error);
1113 	}
1114 
1115 	/*
1116 	 * Make sure there is an entry.
1117 	 */
1118 	if (vp == NULL) {
1119 		error = ENOENT;
1120 		goto out;
1121 	}
1122 
1123 	vfsp = vp->v_vfsp;
1124 	dvfsp = dvp->v_vfsp;
1125 
1126 	/*
1127 	 * If the named file is the root of a mounted filesystem, fail,
1128 	 * unless it's marked unlinkable.  In that case, unmount the
1129 	 * filesystem and proceed to unlink the covered vnode.  (If the
1130 	 * covered vnode is a directory, use rmdir instead of unlink,
1131 	 * to avoid file system corruption.)
1132 	 */
1133 	if (vp->v_flag & VROOT) {
1134 		if (vfsp->vfs_flag & VFS_UNLINKABLE) {
1135 			if (dirflag == RMDIRECTORY) {
1136 				/*
1137 				 * User called rmdir(2) on a file that has
1138 				 * been namefs mounted on top of.  Since
1139 				 * namefs doesn't allow directories to
1140 				 * be mounted on other files we know
1141 				 * vp is not of type VDIR so fail to operation.
1142 				 */
1143 				error = ENOTDIR;
1144 				goto out;
1145 			}
1146 			coveredvp = vfsp->vfs_vnodecovered;
1147 			VN_HOLD(coveredvp);
1148 			VN_RELE(vp);
1149 			vp = NULL;
1150 			if ((error = vn_vfswlock(coveredvp)) == 0)
1151 				error = dounmount(vfsp, 0, CRED());
1152 			/*
1153 			 * Unmounted the namefs file system; now get
1154 			 * the object it was mounted over.
1155 			 */
1156 			vp = coveredvp;
1157 			/*
1158 			 * If namefs was mounted over a directory, then
1159 			 * we want to use rmdir() instead of unlink().
1160 			 */
1161 			if (vp->v_type == VDIR)
1162 				dirflag = RMDIRECTORY;
1163 		} else
1164 			error = EBUSY;
1165 
1166 		if (error)
1167 			goto out;
1168 	}
1169 
1170 	/*
1171 	 * Make sure filesystem is writeable.
1172 	 * We check the parent directory's vfs in case this is an lofs vnode.
1173 	 */
1174 	if (dvfsp && dvfsp->vfs_flag & VFS_RDONLY) {
1175 		error = EROFS;
1176 		goto out;
1177 	}
1178 
1179 	vtype = vp->v_type;
1180 
1181 	/*
1182 	 * If there is the possibility of an nbmand share reservation, make
1183 	 * sure it's okay to remove the file.  Keep a reference to the
1184 	 * vnode, so that we can exit the nbl critical region after
1185 	 * calling VOP_REMOVE.
1186 	 * If there is no possibility of an nbmand share reservation,
1187 	 * release the vnode reference now.  Filesystems like NFS may
1188 	 * behave differently if there is an extra reference, so get rid of
1189 	 * this one.  Fortunately, we can't have nbmand mounts on NFS
1190 	 * filesystems.
1191 	 */
1192 	if (nbl_need_check(vp)) {
1193 		nbl_start_crit(vp, RW_READER);
1194 		in_crit = 1;
1195 		if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0)) {
1196 			error = EACCES;
1197 			goto out;
1198 		}
1199 	} else {
1200 		VN_RELE(vp);
1201 		vp = NULL;
1202 	}
1203 
1204 	if (dirflag == RMDIRECTORY) {
1205 		/*
1206 		 * Caller is using rmdir(2), which can only be applied to
1207 		 * directories.
1208 		 */
1209 		if (vtype != VDIR) {
1210 			error = ENOTDIR;
1211 		} else {
1212 			vnode_t *cwd;
1213 			proc_t *pp = curproc;
1214 
1215 			mutex_enter(&pp->p_lock);
1216 			cwd = PTOU(pp)->u_cdir;
1217 			VN_HOLD(cwd);
1218 			mutex_exit(&pp->p_lock);
1219 			error = VOP_RMDIR(dvp, pn.pn_path, cwd, CRED());
1220 			VN_RELE(cwd);
1221 		}
1222 	} else {
1223 		/*
1224 		 * Unlink(2) can be applied to anything.
1225 		 */
1226 		error = VOP_REMOVE(dvp, pn.pn_path, CRED());
1227 	}
1228 
1229 out:
1230 	pn_free(&pn);
1231 	if (in_crit) {
1232 		nbl_end_crit(vp);
1233 		in_crit = 0;
1234 	}
1235 	if (vp != NULL)
1236 		VN_RELE(vp);
1237 	if (dvp != NULL)
1238 		VN_RELE(dvp);
1239 	if (error == ESTALE)
1240 		goto top;
1241 	return (error);
1242 }
1243 
1244 /*
1245  * Utility function to compare equality of vnodes.
1246  * Compare the underlying real vnodes, if there are underlying vnodes.
1247  * This is a more thorough comparison than the VN_CMP() macro provides.
1248  */
1249 int
1250 vn_compare(vnode_t *vp1, vnode_t *vp2)
1251 {
1252 	vnode_t *realvp;
1253 
1254 	if (vp1 != NULL && VOP_REALVP(vp1, &realvp) == 0)
1255 		vp1 = realvp;
1256 	if (vp2 != NULL && VOP_REALVP(vp2, &realvp) == 0)
1257 		vp2 = realvp;
1258 	return (VN_CMP(vp1, vp2));
1259 }
1260 
1261 /*
1262  * The number of locks to hash into.  This value must be a power
1263  * of 2 minus 1 and should probably also be prime.
1264  */
1265 #define	NUM_BUCKETS	1023
1266 
1267 struct  vn_vfslocks_bucket {
1268 	kmutex_t vb_lock;
1269 	vn_vfslocks_entry_t *vb_list;
1270 	char pad[64 - sizeof (kmutex_t) - sizeof (void *)];
1271 };
1272 
1273 /*
1274  * Total number of buckets will be NUM_BUCKETS + 1 .
1275  */
1276 
1277 #pragma	align	64(vn_vfslocks_buckets)
1278 static	struct vn_vfslocks_bucket	vn_vfslocks_buckets[NUM_BUCKETS + 1];
1279 
1280 #define	VN_VFSLOCKS_SHIFT	9
1281 
1282 #define	VN_VFSLOCKS_HASH(vfsvpptr)	\
1283 	((((intptr_t)(vfsvpptr)) >> VN_VFSLOCKS_SHIFT) & NUM_BUCKETS)
1284 
1285 /*
1286  * vn_vfslocks_getlock() uses an HASH scheme to generate
1287  * rwstlock using vfs/vnode pointer passed to it.
1288  *
1289  * vn_vfslocks_rele() releases a reference in the
1290  * HASH table which allows the entry allocated by
1291  * vn_vfslocks_getlock() to be freed at a later
1292  * stage when the refcount drops to zero.
1293  */
1294 
1295 vn_vfslocks_entry_t *
1296 vn_vfslocks_getlock(void *vfsvpptr)
1297 {
1298 	struct vn_vfslocks_bucket *bp;
1299 	vn_vfslocks_entry_t *vep;
1300 	vn_vfslocks_entry_t *tvep;
1301 
1302 	ASSERT(vfsvpptr != NULL);
1303 	bp = &vn_vfslocks_buckets[VN_VFSLOCKS_HASH(vfsvpptr)];
1304 
1305 	mutex_enter(&bp->vb_lock);
1306 	for (vep = bp->vb_list; vep != NULL; vep = vep->ve_next) {
1307 		if (vep->ve_vpvfs == vfsvpptr) {
1308 			vep->ve_refcnt++;
1309 			mutex_exit(&bp->vb_lock);
1310 			return (vep);
1311 		}
1312 	}
1313 	mutex_exit(&bp->vb_lock);
1314 	vep = kmem_alloc(sizeof (*vep), KM_SLEEP);
1315 	rwst_init(&vep->ve_lock, NULL, RW_DEFAULT, NULL);
1316 	vep->ve_vpvfs = (char *)vfsvpptr;
1317 	vep->ve_refcnt = 1;
1318 	mutex_enter(&bp->vb_lock);
1319 	for (tvep = bp->vb_list; tvep != NULL; tvep = tvep->ve_next) {
1320 		if (tvep->ve_vpvfs == vfsvpptr) {
1321 			tvep->ve_refcnt++;
1322 			mutex_exit(&bp->vb_lock);
1323 
1324 			/*
1325 			 * There is already an entry in the hash
1326 			 * destroy what we just allocated.
1327 			 */
1328 			rwst_destroy(&vep->ve_lock);
1329 			kmem_free(vep, sizeof (*vep));
1330 			return (tvep);
1331 		}
1332 	}
1333 	vep->ve_next = bp->vb_list;
1334 	bp->vb_list = vep;
1335 	mutex_exit(&bp->vb_lock);
1336 	return (vep);
1337 }
1338 
1339 void
1340 vn_vfslocks_rele(vn_vfslocks_entry_t *vepent)
1341 {
1342 	struct vn_vfslocks_bucket *bp;
1343 	vn_vfslocks_entry_t *vep;
1344 	vn_vfslocks_entry_t *pvep;
1345 
1346 	ASSERT(vepent != NULL);
1347 	ASSERT(vepent->ve_vpvfs != NULL);
1348 
1349 	bp = &vn_vfslocks_buckets[VN_VFSLOCKS_HASH(vepent->ve_vpvfs)];
1350 
1351 	mutex_enter(&bp->vb_lock);
1352 	vepent->ve_refcnt--;
1353 
1354 	if ((int32_t)vepent->ve_refcnt < 0)
1355 		cmn_err(CE_PANIC, "vn_vfslocks_rele: refcount negative");
1356 
1357 	if (vepent->ve_refcnt == 0) {
1358 		for (vep = bp->vb_list; vep != NULL; vep = vep->ve_next) {
1359 			if (vep->ve_vpvfs == vepent->ve_vpvfs) {
1360 				if (bp->vb_list == vep)
1361 					bp->vb_list = vep->ve_next;
1362 				else {
1363 					/* LINTED */
1364 					pvep->ve_next = vep->ve_next;
1365 				}
1366 				mutex_exit(&bp->vb_lock);
1367 				rwst_destroy(&vep->ve_lock);
1368 				kmem_free(vep, sizeof (*vep));
1369 				return;
1370 			}
1371 			pvep = vep;
1372 		}
1373 		cmn_err(CE_PANIC, "vn_vfslocks_rele: vp/vfs not found");
1374 	}
1375 	mutex_exit(&bp->vb_lock);
1376 }
1377 
1378 /*
1379  * vn_vfswlock_wait is used to implement a lock which is logically a writers
1380  * lock protecting the v_vfsmountedhere field.
1381  * vn_vfswlock_wait has been modified to be similar to vn_vfswlock,
1382  * except that it blocks to acquire the lock VVFSLOCK.
1383  *
1384  * traverse() and routines re-implementing part of traverse (e.g. autofs)
1385  * need to hold this lock. mount(), vn_rename(), vn_remove() and so on
1386  * need the non-blocking version of the writers lock i.e. vn_vfswlock
1387  */
1388 int
1389 vn_vfswlock_wait(vnode_t *vp)
1390 {
1391 	int retval;
1392 	vn_vfslocks_entry_t *vpvfsentry;
1393 	ASSERT(vp != NULL);
1394 
1395 	vpvfsentry = vn_vfslocks_getlock(vp);
1396 	retval = rwst_enter_sig(&vpvfsentry->ve_lock, RW_WRITER);
1397 
1398 	if (retval == EINTR) {
1399 		vn_vfslocks_rele(vpvfsentry);
1400 		return (EINTR);
1401 	}
1402 	return (retval);
1403 }
1404 
1405 int
1406 vn_vfsrlock_wait(vnode_t *vp)
1407 {
1408 	int retval;
1409 	vn_vfslocks_entry_t *vpvfsentry;
1410 	ASSERT(vp != NULL);
1411 
1412 	vpvfsentry = vn_vfslocks_getlock(vp);
1413 	retval = rwst_enter_sig(&vpvfsentry->ve_lock, RW_READER);
1414 
1415 	if (retval == EINTR) {
1416 		vn_vfslocks_rele(vpvfsentry);
1417 		return (EINTR);
1418 	}
1419 
1420 	return (retval);
1421 }
1422 
1423 
1424 /*
1425  * vn_vfswlock is used to implement a lock which is logically a writers lock
1426  * protecting the v_vfsmountedhere field.
1427  */
1428 int
1429 vn_vfswlock(vnode_t *vp)
1430 {
1431 	vn_vfslocks_entry_t *vpvfsentry;
1432 
1433 	/*
1434 	 * If vp is NULL then somebody is trying to lock the covered vnode
1435 	 * of /.  (vfs_vnodecovered is NULL for /).  This situation will
1436 	 * only happen when unmounting /.  Since that operation will fail
1437 	 * anyway, return EBUSY here instead of in VFS_UNMOUNT.
1438 	 */
1439 	if (vp == NULL)
1440 		return (EBUSY);
1441 
1442 	vpvfsentry = vn_vfslocks_getlock(vp);
1443 
1444 	if (rwst_tryenter(&vpvfsentry->ve_lock, RW_WRITER))
1445 		return (0);
1446 
1447 	vn_vfslocks_rele(vpvfsentry);
1448 	return (EBUSY);
1449 }
1450 
1451 int
1452 vn_vfsrlock(vnode_t *vp)
1453 {
1454 	vn_vfslocks_entry_t *vpvfsentry;
1455 
1456 	/*
1457 	 * If vp is NULL then somebody is trying to lock the covered vnode
1458 	 * of /.  (vfs_vnodecovered is NULL for /).  This situation will
1459 	 * only happen when unmounting /.  Since that operation will fail
1460 	 * anyway, return EBUSY here instead of in VFS_UNMOUNT.
1461 	 */
1462 	if (vp == NULL)
1463 		return (EBUSY);
1464 
1465 	vpvfsentry = vn_vfslocks_getlock(vp);
1466 
1467 	if (rwst_tryenter(&vpvfsentry->ve_lock, RW_READER))
1468 		return (0);
1469 
1470 	vn_vfslocks_rele(vpvfsentry);
1471 	return (EBUSY);
1472 }
1473 
1474 void
1475 vn_vfsunlock(vnode_t *vp)
1476 {
1477 	vn_vfslocks_entry_t *vpvfsentry;
1478 
1479 	/*
1480 	 * ve_refcnt needs to be decremented twice.
1481 	 * 1. To release refernce after a call to vn_vfslocks_getlock()
1482 	 * 2. To release the reference from the locking routines like
1483 	 *    vn_vfsrlock/vn_vfswlock etc,.
1484 	 */
1485 	vpvfsentry = vn_vfslocks_getlock(vp);
1486 	vn_vfslocks_rele(vpvfsentry);
1487 
1488 	rwst_exit(&vpvfsentry->ve_lock);
1489 	vn_vfslocks_rele(vpvfsentry);
1490 }
1491 
1492 int
1493 vn_vfswlock_held(vnode_t *vp)
1494 {
1495 	int held;
1496 	vn_vfslocks_entry_t *vpvfsentry;
1497 
1498 	ASSERT(vp != NULL);
1499 
1500 	vpvfsentry = vn_vfslocks_getlock(vp);
1501 	held = rwst_lock_held(&vpvfsentry->ve_lock, RW_WRITER);
1502 
1503 	vn_vfslocks_rele(vpvfsentry);
1504 	return (held);
1505 }
1506 
1507 
1508 int
1509 vn_make_ops(
1510 	const char *name,			/* Name of file system */
1511 	const fs_operation_def_t *templ,	/* Operation specification */
1512 	vnodeops_t **actual)			/* Return the vnodeops */
1513 {
1514 	int unused_ops;
1515 	int error;
1516 
1517 	*actual = (vnodeops_t *)kmem_alloc(sizeof (vnodeops_t), KM_SLEEP);
1518 
1519 	(*actual)->vnop_name = name;
1520 
1521 	error = fs_build_vector(*actual, &unused_ops, vn_ops_table, templ);
1522 	if (error) {
1523 		kmem_free(*actual, sizeof (vnodeops_t));
1524 	}
1525 
1526 #if DEBUG
1527 	if (unused_ops != 0)
1528 		cmn_err(CE_WARN, "vn_make_ops: %s: %d operations supplied "
1529 		    "but not used", name, unused_ops);
1530 #endif
1531 
1532 	return (error);
1533 }
1534 
1535 /*
1536  * Free the vnodeops created as a result of vn_make_ops()
1537  */
1538 void
1539 vn_freevnodeops(vnodeops_t *vnops)
1540 {
1541 	kmem_free(vnops, sizeof (vnodeops_t));
1542 }
1543 
1544 /*
1545  * Vnode cache.
1546  */
1547 
1548 /* ARGSUSED */
1549 static int
1550 vn_cache_constructor(void *buf, void *cdrarg, int kmflags)
1551 {
1552 	struct vnode *vp;
1553 
1554 	vp = buf;
1555 
1556 	mutex_init(&vp->v_lock, NULL, MUTEX_DEFAULT, NULL);
1557 	cv_init(&vp->v_cv, NULL, CV_DEFAULT, NULL);
1558 	rw_init(&vp->v_nbllock, NULL, RW_DEFAULT, NULL);
1559 	rw_init(&vp->v_mslock, NULL, RW_DEFAULT, NULL);
1560 
1561 	vp->v_femhead = NULL;	/* Must be done before vn_reinit() */
1562 	vp->v_path = NULL;
1563 	vp->v_mpssdata = NULL;
1564 
1565 	return (0);
1566 }
1567 
1568 /* ARGSUSED */
1569 static void
1570 vn_cache_destructor(void *buf, void *cdrarg)
1571 {
1572 	struct vnode *vp;
1573 
1574 	vp = buf;
1575 
1576 	rw_destroy(&vp->v_mslock);
1577 	rw_destroy(&vp->v_nbllock);
1578 	cv_destroy(&vp->v_cv);
1579 	mutex_destroy(&vp->v_lock);
1580 }
1581 
1582 void
1583 vn_create_cache(void)
1584 {
1585 	vn_cache = kmem_cache_create("vn_cache", sizeof (struct vnode), 64,
1586 	    vn_cache_constructor, vn_cache_destructor, NULL, NULL,
1587 	    NULL, 0);
1588 }
1589 
1590 void
1591 vn_destroy_cache(void)
1592 {
1593 	kmem_cache_destroy(vn_cache);
1594 }
1595 
1596 /*
1597  * Used by file systems when fs-specific nodes (e.g., ufs inodes) are
1598  * cached by the file system and vnodes remain associated.
1599  */
1600 void
1601 vn_recycle(vnode_t *vp)
1602 {
1603 	ASSERT(vp->v_pages == NULL);
1604 
1605 	/*
1606 	 * XXX - This really belongs in vn_reinit(), but we have some issues
1607 	 * with the counts.  Best to have it here for clean initialization.
1608 	 */
1609 	vp->v_rdcnt = 0;
1610 	vp->v_wrcnt = 0;
1611 	vp->v_mmap_read = 0;
1612 	vp->v_mmap_write = 0;
1613 
1614 	/*
1615 	 * If FEM was in use, make sure everything gets cleaned up
1616 	 * NOTE: vp->v_femhead is initialized to NULL in the vnode
1617 	 * constructor.
1618 	 */
1619 	if (vp->v_femhead) {
1620 		/* XXX - There should be a free_femhead() that does all this */
1621 		ASSERT(vp->v_femhead->femh_list == NULL);
1622 		mutex_destroy(&vp->v_femhead->femh_lock);
1623 		kmem_free(vp->v_femhead, sizeof (*(vp->v_femhead)));
1624 		vp->v_femhead = NULL;
1625 	}
1626 	if (vp->v_path) {
1627 		kmem_free(vp->v_path, strlen(vp->v_path) + 1);
1628 		vp->v_path = NULL;
1629 	}
1630 	vp->v_mpssdata = NULL;
1631 }
1632 
1633 /*
1634  * Used to reset the vnode fields including those that are directly accessible
1635  * as well as those which require an accessor function.
1636  *
1637  * Does not initialize:
1638  *	synchronization objects: v_lock, v_nbllock, v_cv
1639  *	v_data (since FS-nodes and vnodes point to each other and should
1640  *		be updated simultaneously)
1641  *	v_op (in case someone needs to make a VOP call on this object)
1642  */
1643 void
1644 vn_reinit(vnode_t *vp)
1645 {
1646 	vp->v_count = 1;
1647 	vp->v_vfsp = NULL;
1648 	vp->v_stream = NULL;
1649 	vp->v_vfsmountedhere = NULL;
1650 	vp->v_flag = 0;
1651 	vp->v_type = VNON;
1652 	vp->v_rdev = NODEV;
1653 
1654 	vp->v_filocks = NULL;
1655 	vp->v_shrlocks = NULL;
1656 	vp->v_pages = NULL;
1657 	vp->v_npages = 0;
1658 	vp->v_msnpages = 0;
1659 	vp->v_scanfront = NULL;
1660 	vp->v_scanback = NULL;
1661 
1662 	vp->v_locality = NULL;
1663 	vp->v_scantime = 0;
1664 	vp->v_mset = 0;
1665 	vp->v_msflags = 0;
1666 	vp->v_msnext = NULL;
1667 	vp->v_msprev = NULL;
1668 
1669 	/* Handles v_femhead, v_path, and the r/w/map counts */
1670 	vn_recycle(vp);
1671 }
1672 
1673 vnode_t *
1674 vn_alloc(int kmflag)
1675 {
1676 	vnode_t *vp;
1677 
1678 	vp = kmem_cache_alloc(vn_cache, kmflag);
1679 
1680 	if (vp != NULL) {
1681 		vp->v_femhead = NULL;	/* Must be done before vn_reinit() */
1682 		vn_reinit(vp);
1683 	}
1684 
1685 	return (vp);
1686 }
1687 
1688 void
1689 vn_free(vnode_t *vp)
1690 {
1691 	/*
1692 	 * Some file systems call vn_free() with v_count of zero,
1693 	 * some with v_count of 1.  In any case, the value should
1694 	 * never be anything else.
1695 	 */
1696 	ASSERT((vp->v_count == 0) || (vp->v_count == 1));
1697 	if (vp->v_path != NULL) {
1698 		kmem_free(vp->v_path, strlen(vp->v_path) + 1);
1699 		vp->v_path = NULL;
1700 	}
1701 
1702 	/* If FEM was in use, make sure everything gets cleaned up */
1703 	if (vp->v_femhead) {
1704 		/* XXX - There should be a free_femhead() that does all this */
1705 		ASSERT(vp->v_femhead->femh_list == NULL);
1706 		mutex_destroy(&vp->v_femhead->femh_lock);
1707 		kmem_free(vp->v_femhead, sizeof (*(vp->v_femhead)));
1708 		vp->v_femhead = NULL;
1709 	}
1710 	vp->v_mpssdata = NULL;
1711 	kmem_cache_free(vn_cache, vp);
1712 }
1713 
1714 /*
1715  * vnode status changes, should define better states than 1, 0.
1716  */
1717 void
1718 vn_reclaim(vnode_t *vp)
1719 {
1720 	vfs_t   *vfsp = vp->v_vfsp;
1721 
1722 	if (vfsp == NULL || vfsp->vfs_femhead == NULL) {
1723 		return;
1724 	}
1725 	(void) VFS_VNSTATE(vfsp, vp, VNTRANS_RECLAIMED);
1726 }
1727 
1728 void
1729 vn_idle(vnode_t *vp)
1730 {
1731 	vfs_t   *vfsp = vp->v_vfsp;
1732 
1733 	if (vfsp == NULL || vfsp->vfs_femhead == NULL) {
1734 		return;
1735 	}
1736 	(void) VFS_VNSTATE(vfsp, vp, VNTRANS_IDLED);
1737 }
1738 void
1739 vn_exists(vnode_t *vp)
1740 {
1741 	vfs_t   *vfsp = vp->v_vfsp;
1742 
1743 	if (vfsp == NULL || vfsp->vfs_femhead == NULL) {
1744 		return;
1745 	}
1746 	(void) VFS_VNSTATE(vfsp, vp, VNTRANS_EXISTS);
1747 }
1748 
1749 void
1750 vn_invalid(vnode_t *vp)
1751 {
1752 	vfs_t   *vfsp = vp->v_vfsp;
1753 
1754 	if (vfsp == NULL || vfsp->vfs_femhead == NULL) {
1755 		return;
1756 	}
1757 	(void) VFS_VNSTATE(vfsp, vp, VNTRANS_DESTROYED);
1758 }
1759 
1760 /* Vnode event notification */
1761 
1762 int
1763 vnevent_support(vnode_t *vp)
1764 {
1765 	if (vp == NULL)
1766 		return (EINVAL);
1767 
1768 	return (VOP_VNEVENT(vp, VE_SUPPORT));
1769 }
1770 
1771 void
1772 vnevent_rename_src(vnode_t *vp)
1773 {
1774 	if (vp == NULL || vp->v_femhead == NULL) {
1775 		return;
1776 	}
1777 	(void) VOP_VNEVENT(vp, VE_RENAME_SRC);
1778 }
1779 
1780 void
1781 vnevent_rename_dest(vnode_t *vp)
1782 {
1783 	if (vp == NULL || vp->v_femhead == NULL) {
1784 		return;
1785 	}
1786 	(void) VOP_VNEVENT(vp, VE_RENAME_DEST);
1787 }
1788 
1789 void
1790 vnevent_remove(vnode_t *vp)
1791 {
1792 	if (vp == NULL || vp->v_femhead == NULL) {
1793 		return;
1794 	}
1795 	(void) VOP_VNEVENT(vp, VE_REMOVE);
1796 }
1797 
1798 void
1799 vnevent_rmdir(vnode_t *vp)
1800 {
1801 	if (vp == NULL || vp->v_femhead == NULL) {
1802 		return;
1803 	}
1804 	(void) VOP_VNEVENT(vp, VE_RMDIR);
1805 }
1806 
1807 /*
1808  * Vnode accessors.
1809  */
1810 
1811 int
1812 vn_is_readonly(vnode_t *vp)
1813 {
1814 	return (vp->v_vfsp->vfs_flag & VFS_RDONLY);
1815 }
1816 
1817 int
1818 vn_has_flocks(vnode_t *vp)
1819 {
1820 	return (vp->v_filocks != NULL);
1821 }
1822 
1823 int
1824 vn_has_mandatory_locks(vnode_t *vp, int mode)
1825 {
1826 	return ((vp->v_filocks != NULL) && (MANDLOCK(vp, mode)));
1827 }
1828 
1829 int
1830 vn_has_cached_data(vnode_t *vp)
1831 {
1832 	return (vp->v_pages != NULL);
1833 }
1834 
1835 /*
1836  * Return 0 if the vnode in question shouldn't be permitted into a zone via
1837  * zone_enter(2).
1838  */
1839 int
1840 vn_can_change_zones(vnode_t *vp)
1841 {
1842 	struct vfssw *vswp;
1843 	int allow = 1;
1844 	vnode_t *rvp;
1845 
1846 	/*
1847 	 * We always want to look at the underlying vnode if there is one.
1848 	 */
1849 	if (VOP_REALVP(vp, &rvp) != 0)
1850 		rvp = vp;
1851 	/*
1852 	 * Some pseudo filesystems (including doorfs) don't actually register
1853 	 * their vfsops_t, so the following may return NULL; we happily let
1854 	 * such vnodes switch zones.
1855 	 */
1856 	vswp = vfs_getvfsswbyvfsops(vfs_getops(rvp->v_vfsp));
1857 	if (vswp != NULL) {
1858 		if (vswp->vsw_flag & VSW_NOTZONESAFE)
1859 			allow = 0;
1860 		vfs_unrefvfssw(vswp);
1861 	}
1862 	return (allow);
1863 }
1864 
1865 /*
1866  * Return nonzero if the vnode is a mount point, zero if not.
1867  */
1868 int
1869 vn_ismntpt(vnode_t *vp)
1870 {
1871 	return (vp->v_vfsmountedhere != NULL);
1872 }
1873 
1874 /* Retrieve the vfs (if any) mounted on this vnode */
1875 vfs_t *
1876 vn_mountedvfs(vnode_t *vp)
1877 {
1878 	return (vp->v_vfsmountedhere);
1879 }
1880 
1881 /*
1882  * vn_is_opened() checks whether a particular file is opened and
1883  * whether the open is for read and/or write.
1884  *
1885  * Vnode counts are only kept on regular files (v_type=VREG).
1886  */
1887 int
1888 vn_is_opened(
1889 	vnode_t *vp,
1890 	v_mode_t mode)
1891 {
1892 
1893 	ASSERT(vp != NULL);
1894 
1895 	switch (mode) {
1896 	case V_WRITE:
1897 		if (vp->v_wrcnt)
1898 			return (V_TRUE);
1899 		break;
1900 	case V_RDANDWR:
1901 		if (vp->v_rdcnt && vp->v_wrcnt)
1902 			return (V_TRUE);
1903 		break;
1904 	case V_RDORWR:
1905 		if (vp->v_rdcnt || vp->v_wrcnt)
1906 			return (V_TRUE);
1907 		break;
1908 	case V_READ:
1909 		if (vp->v_rdcnt)
1910 			return (V_TRUE);
1911 		break;
1912 	}
1913 
1914 	return (V_FALSE);
1915 }
1916 
1917 /*
1918  * vn_is_mapped() checks whether a particular file is mapped and whether
1919  * the file is mapped read and/or write.
1920  */
1921 int
1922 vn_is_mapped(
1923 	vnode_t *vp,
1924 	v_mode_t mode)
1925 {
1926 
1927 	ASSERT(vp != NULL);
1928 
1929 #if !defined(_LP64)
1930 	switch (mode) {
1931 	/*
1932 	 * The atomic_add_64_nv functions force atomicity in the
1933 	 * case of 32 bit architectures. Otherwise the 64 bit values
1934 	 * require two fetches. The value of the fields may be
1935 	 * (potentially) changed between the first fetch and the
1936 	 * second
1937 	 */
1938 	case V_WRITE:
1939 		if (atomic_add_64_nv((&(vp->v_mmap_write)), 0))
1940 			return (V_TRUE);
1941 		break;
1942 	case V_RDANDWR:
1943 		if ((atomic_add_64_nv((&(vp->v_mmap_read)), 0)) &&
1944 		    (atomic_add_64_nv((&(vp->v_mmap_write)), 0)))
1945 			return (V_TRUE);
1946 		break;
1947 	case V_RDORWR:
1948 		if ((atomic_add_64_nv((&(vp->v_mmap_read)), 0)) ||
1949 		    (atomic_add_64_nv((&(vp->v_mmap_write)), 0)))
1950 			return (V_TRUE);
1951 		break;
1952 	case V_READ:
1953 		if (atomic_add_64_nv((&(vp->v_mmap_read)), 0))
1954 			return (V_TRUE);
1955 		break;
1956 	}
1957 #else
1958 	switch (mode) {
1959 	case V_WRITE:
1960 		if (vp->v_mmap_write)
1961 			return (V_TRUE);
1962 		break;
1963 	case V_RDANDWR:
1964 		if (vp->v_mmap_read && vp->v_mmap_write)
1965 			return (V_TRUE);
1966 		break;
1967 	case V_RDORWR:
1968 		if (vp->v_mmap_read || vp->v_mmap_write)
1969 			return (V_TRUE);
1970 		break;
1971 	case V_READ:
1972 		if (vp->v_mmap_read)
1973 			return (V_TRUE);
1974 		break;
1975 	}
1976 #endif
1977 
1978 	return (V_FALSE);
1979 }
1980 
1981 /*
1982  * Set the operations vector for a vnode.
1983  *
1984  * FEM ensures that the v_femhead pointer is filled in before the
1985  * v_op pointer is changed.  This means that if the v_femhead pointer
1986  * is NULL, and the v_op field hasn't changed since before which checked
1987  * the v_femhead pointer; then our update is ok - we are not racing with
1988  * FEM.
1989  */
1990 void
1991 vn_setops(vnode_t *vp, vnodeops_t *vnodeops)
1992 {
1993 	vnodeops_t	*op;
1994 
1995 	ASSERT(vp != NULL);
1996 	ASSERT(vnodeops != NULL);
1997 
1998 	op = vp->v_op;
1999 	membar_consumer();
2000 	/*
2001 	 * If vp->v_femhead == NULL, then we'll call casptr() to do the
2002 	 * compare-and-swap on vp->v_op.  If either fails, then FEM is
2003 	 * in effect on the vnode and we need to have FEM deal with it.
2004 	 */
2005 	if (vp->v_femhead != NULL || casptr(&vp->v_op, op, vnodeops) != op) {
2006 		fem_setvnops(vp, vnodeops);
2007 	}
2008 }
2009 
2010 /*
2011  * Retrieve the operations vector for a vnode
2012  * As with vn_setops(above); make sure we aren't racing with FEM.
2013  * FEM sets the v_op to a special, internal, vnodeops that wouldn't
2014  * make sense to the callers of this routine.
2015  */
2016 vnodeops_t *
2017 vn_getops(vnode_t *vp)
2018 {
2019 	vnodeops_t	*op;
2020 
2021 	ASSERT(vp != NULL);
2022 
2023 	op = vp->v_op;
2024 	membar_consumer();
2025 	if (vp->v_femhead == NULL && op == vp->v_op) {
2026 		return (op);
2027 	} else {
2028 		return (fem_getvnops(vp));
2029 	}
2030 }
2031 
2032 /*
2033  * Returns non-zero (1) if the vnodeops matches that of the vnode.
2034  * Returns zero (0) if not.
2035  */
2036 int
2037 vn_matchops(vnode_t *vp, vnodeops_t *vnodeops)
2038 {
2039 	return (vn_getops(vp) == vnodeops);
2040 }
2041 
2042 /*
2043  * Returns non-zero (1) if the specified operation matches the
2044  * corresponding operation for that the vnode.
2045  * Returns zero (0) if not.
2046  */
2047 
2048 #define	MATCHNAME(n1, n2) (((n1)[0] == (n2)[0]) && (strcmp((n1), (n2)) == 0))
2049 
2050 int
2051 vn_matchopval(vnode_t *vp, char *vopname, fs_generic_func_p funcp)
2052 {
2053 	const fs_operation_trans_def_t *otdp;
2054 	fs_generic_func_p *loc = NULL;
2055 	vnodeops_t	*vop = vn_getops(vp);
2056 
2057 	ASSERT(vopname != NULL);
2058 
2059 	for (otdp = vn_ops_table; otdp->name != NULL; otdp++) {
2060 		if (MATCHNAME(otdp->name, vopname)) {
2061 			loc = (fs_generic_func_p *)((char *)(vop)
2062 							+ otdp->offset);
2063 			break;
2064 		}
2065 	}
2066 
2067 	return ((loc != NULL) && (*loc == funcp));
2068 }
2069 
2070 /*
2071  * fs_new_caller_id() needs to return a unique ID on a given local system.
2072  * The IDs do not need to survive across reboots.  These are primarily
2073  * used so that (FEM) monitors can detect particular callers (such as
2074  * the NFS server) to a given vnode/vfs operation.
2075  */
2076 u_longlong_t
2077 fs_new_caller_id()
2078 {
2079 	static uint64_t next_caller_id = 0LL; /* First call returns 1 */
2080 
2081 	return ((u_longlong_t)atomic_add_64_nv(&next_caller_id, 1));
2082 }
2083 
2084 /*
2085  * Given a starting vnode and a path, updates the path in the target vnode in
2086  * a safe manner.  If the vnode already has path information embedded, then the
2087  * cached path is left untouched.
2088  */
2089 void
2090 vn_setpath(vnode_t *rootvp, struct vnode *startvp, struct vnode *vp,
2091     const char *path, size_t plen)
2092 {
2093 	char	*rpath;
2094 	vnode_t	*base;
2095 	size_t	rpathlen, rpathalloc;
2096 	int	doslash = 1;
2097 
2098 	if (*path == '/') {
2099 		base = rootvp;
2100 		path++;
2101 		plen--;
2102 	} else {
2103 		base = startvp;
2104 	}
2105 
2106 	/*
2107 	 * We cannot grab base->v_lock while we hold vp->v_lock because of
2108 	 * the potential for deadlock.
2109 	 */
2110 	mutex_enter(&base->v_lock);
2111 	if (base->v_path == NULL) {
2112 		mutex_exit(&base->v_lock);
2113 		return;
2114 	}
2115 
2116 	rpathlen = strlen(base->v_path);
2117 	rpathalloc = rpathlen + plen + 1;
2118 	/* Avoid adding a slash if there's already one there */
2119 	if (base->v_path[rpathlen-1] == '/')
2120 		doslash = 0;
2121 	else
2122 		rpathalloc++;
2123 
2124 	/*
2125 	 * We don't want to call kmem_alloc(KM_SLEEP) with kernel locks held,
2126 	 * so we must do this dance.  If, by chance, something changes the path,
2127 	 * just give up since there is no real harm.
2128 	 */
2129 	mutex_exit(&base->v_lock);
2130 
2131 	rpath = kmem_alloc(rpathalloc, KM_SLEEP);
2132 
2133 	mutex_enter(&base->v_lock);
2134 	if (base->v_path == NULL || strlen(base->v_path) != rpathlen) {
2135 		mutex_exit(&base->v_lock);
2136 		kmem_free(rpath, rpathalloc);
2137 		return;
2138 	}
2139 	bcopy(base->v_path, rpath, rpathlen);
2140 	mutex_exit(&base->v_lock);
2141 
2142 	if (doslash)
2143 		rpath[rpathlen++] = '/';
2144 	bcopy(path, rpath + rpathlen, plen);
2145 	rpath[rpathlen + plen] = '\0';
2146 
2147 	mutex_enter(&vp->v_lock);
2148 	if (vp->v_path != NULL) {
2149 		mutex_exit(&vp->v_lock);
2150 		kmem_free(rpath, rpathalloc);
2151 	} else {
2152 		vp->v_path = rpath;
2153 		mutex_exit(&vp->v_lock);
2154 	}
2155 }
2156 
2157 /*
2158  * Sets the path to the vnode to be the given string, regardless of current
2159  * context.  The string must be a complete path from rootdir.  This is only used
2160  * by fsop_root() for setting the path based on the mountpoint.
2161  */
2162 void
2163 vn_setpath_str(struct vnode *vp, const char *str, size_t len)
2164 {
2165 	char *buf = kmem_alloc(len + 1, KM_SLEEP);
2166 
2167 	mutex_enter(&vp->v_lock);
2168 	if (vp->v_path != NULL) {
2169 		mutex_exit(&vp->v_lock);
2170 		kmem_free(buf, len + 1);
2171 		return;
2172 	}
2173 
2174 	vp->v_path = buf;
2175 	bcopy(str, vp->v_path, len);
2176 	vp->v_path[len] = '\0';
2177 
2178 	mutex_exit(&vp->v_lock);
2179 }
2180 
2181 /*
2182  * Similar to vn_setpath_str(), this function sets the path of the destination
2183  * vnode to the be the same as the source vnode.
2184  */
2185 void
2186 vn_copypath(struct vnode *src, struct vnode *dst)
2187 {
2188 	char *buf;
2189 	int alloc;
2190 
2191 	mutex_enter(&src->v_lock);
2192 	if (src->v_path == NULL) {
2193 		mutex_exit(&src->v_lock);
2194 		return;
2195 	}
2196 	alloc = strlen(src->v_path) + 1;
2197 
2198 	/* avoid kmem_alloc() with lock held */
2199 	mutex_exit(&src->v_lock);
2200 	buf = kmem_alloc(alloc, KM_SLEEP);
2201 	mutex_enter(&src->v_lock);
2202 	if (src->v_path == NULL || strlen(src->v_path) + 1 != alloc) {
2203 		mutex_exit(&src->v_lock);
2204 		kmem_free(buf, alloc);
2205 		return;
2206 	}
2207 	bcopy(src->v_path, buf, alloc);
2208 	mutex_exit(&src->v_lock);
2209 
2210 	mutex_enter(&dst->v_lock);
2211 	if (dst->v_path != NULL) {
2212 		mutex_exit(&dst->v_lock);
2213 		kmem_free(buf, alloc);
2214 		return;
2215 	}
2216 	dst->v_path = buf;
2217 	mutex_exit(&dst->v_lock);
2218 }
2219 
2220 /*
2221  * XXX Private interface for segvn routines that handle vnode
2222  * large page segments.
2223  *
2224  * return 1 if vp's file system VOP_PAGEIO() implementation
2225  * can be safely used instead of VOP_GETPAGE() for handling
2226  * pagefaults against regular non swap files. VOP_PAGEIO()
2227  * interface is considered safe here if its implementation
2228  * is very close to VOP_GETPAGE() implementation.
2229  * e.g. It zero's out the part of the page beyond EOF. Doesn't
2230  * panic if there're file holes but instead returns an error.
2231  * Doesn't assume file won't be changed by user writes, etc.
2232  *
2233  * return 0 otherwise.
2234  *
2235  * For now allow segvn to only use VOP_PAGEIO() with ufs and nfs.
2236  */
2237 int
2238 vn_vmpss_usepageio(vnode_t *vp)
2239 {
2240 	vfs_t   *vfsp = vp->v_vfsp;
2241 	char *fsname = vfssw[vfsp->vfs_fstype].vsw_name;
2242 	char *pageio_ok_fss[] = {"ufs", "nfs", NULL};
2243 	char **fsok = pageio_ok_fss;
2244 
2245 	if (fsname == NULL) {
2246 		return (0);
2247 	}
2248 
2249 	for (; *fsok; fsok++) {
2250 		if (strcmp(*fsok, fsname) == 0) {
2251 			return (1);
2252 		}
2253 	}
2254 	return (0);
2255 }
2256 
2257 /* VOP_XXX() macros call the corresponding fop_xxx() function */
2258 
2259 int
2260 fop_open(
2261 	vnode_t **vpp,
2262 	int mode,
2263 	cred_t *cr)
2264 {
2265 	int ret;
2266 	vnode_t *vp = *vpp;
2267 
2268 	VN_HOLD(vp);
2269 	/*
2270 	 * Adding to the vnode counts before calling open
2271 	 * avoids the need for a mutex. It circumvents a race
2272 	 * condition where a query made on the vnode counts results in a
2273 	 * false negative. The inquirer goes away believing the file is
2274 	 * not open when there is an open on the file already under way.
2275 	 *
2276 	 * The counts are meant to prevent NFS from granting a delegation
2277 	 * when it would be dangerous to do so.
2278 	 *
2279 	 * The vnode counts are only kept on regular files
2280 	 */
2281 	if ((*vpp)->v_type == VREG) {
2282 		if (mode & FREAD)
2283 			atomic_add_32(&((*vpp)->v_rdcnt), 1);
2284 		if (mode & FWRITE)
2285 			atomic_add_32(&((*vpp)->v_wrcnt), 1);
2286 	}
2287 
2288 	ret = (*(*(vpp))->v_op->vop_open)(vpp, mode, cr);
2289 
2290 	if (ret) {
2291 		/*
2292 		 * Use the saved vp just in case the vnode ptr got trashed
2293 		 * by the error.
2294 		 */
2295 		if ((vp->v_type == VREG) && (mode & FREAD))
2296 			atomic_add_32(&(vp->v_rdcnt), -1);
2297 		if ((vp->v_type == VREG) && (mode & FWRITE))
2298 			atomic_add_32(&(vp->v_wrcnt), -1);
2299 	} else {
2300 		/*
2301 		 * Some filesystems will return a different vnode,
2302 		 * but the same path was still used to open it.
2303 		 * So if we do change the vnode and need to
2304 		 * copy over the path, do so here, rather than special
2305 		 * casing each filesystem. Adjust the vnode counts to
2306 		 * reflect the vnode switch.
2307 		 */
2308 
2309 		if (*vpp != vp && *vpp != NULL) {
2310 			vn_copypath(vp, *vpp);
2311 			if (((*vpp)->v_type == VREG) && (mode & FREAD))
2312 				atomic_add_32(&((*vpp)->v_rdcnt), 1);
2313 			if ((vp->v_type == VREG) && (mode & FREAD))
2314 				atomic_add_32(&(vp->v_rdcnt), -1);
2315 			if (((*vpp)->v_type == VREG) && (mode & FWRITE))
2316 				atomic_add_32(&((*vpp)->v_wrcnt), 1);
2317 			if ((vp->v_type == VREG) && (mode & FWRITE))
2318 				atomic_add_32(&(vp->v_wrcnt), -1);
2319 		}
2320 	}
2321 	VN_RELE(vp);
2322 	return (ret);
2323 }
2324 
2325 int
2326 fop_close(
2327 	vnode_t *vp,
2328 	int flag,
2329 	int count,
2330 	offset_t offset,
2331 	cred_t *cr)
2332 {
2333 	int error;
2334 	error = (*(vp)->v_op->vop_close)(vp, flag, count, offset, cr);
2335 	/*
2336 	 * Check passed in count to handle possible dups. Vnode counts are only
2337 	 * kept on regular files
2338 	 */
2339 	if ((vp->v_type == VREG) && (count == 1))  {
2340 		if (flag & FREAD) {
2341 			ASSERT(vp->v_rdcnt > 0);
2342 			atomic_add_32(&(vp->v_rdcnt), -1);
2343 		}
2344 		if (flag & FWRITE) {
2345 			ASSERT(vp->v_wrcnt > 0);
2346 			atomic_add_32(&(vp->v_wrcnt), -1);
2347 		}
2348 	}
2349 	return (error);
2350 }
2351 
2352 int
2353 fop_read(
2354 	vnode_t *vp,
2355 	uio_t *uiop,
2356 	int ioflag,
2357 	cred_t *cr,
2358 	struct caller_context *ct)
2359 {
2360 	return (*(vp)->v_op->vop_read)(vp, uiop, ioflag, cr, ct);
2361 }
2362 
2363 int
2364 fop_write(
2365 	vnode_t *vp,
2366 	uio_t *uiop,
2367 	int ioflag,
2368 	cred_t *cr,
2369 	struct caller_context *ct)
2370 {
2371 	return (*(vp)->v_op->vop_write)(vp, uiop, ioflag, cr, ct);
2372 }
2373 
2374 int
2375 fop_ioctl(
2376 	vnode_t *vp,
2377 	int cmd,
2378 	intptr_t arg,
2379 	int flag,
2380 	cred_t *cr,
2381 	int *rvalp)
2382 {
2383 	return (*(vp)->v_op->vop_ioctl)(vp, cmd, arg, flag, cr, rvalp);
2384 }
2385 
2386 int
2387 fop_setfl(
2388 	vnode_t *vp,
2389 	int oflags,
2390 	int nflags,
2391 	cred_t *cr)
2392 {
2393 	return (*(vp)->v_op->vop_setfl)(vp, oflags, nflags, cr);
2394 }
2395 
2396 int
2397 fop_getattr(
2398 	vnode_t *vp,
2399 	vattr_t *vap,
2400 	int flags,
2401 	cred_t *cr)
2402 {
2403 	return (*(vp)->v_op->vop_getattr)(vp, vap, flags, cr);
2404 }
2405 
2406 int
2407 fop_setattr(
2408 	vnode_t *vp,
2409 	vattr_t *vap,
2410 	int flags,
2411 	cred_t *cr,
2412 	caller_context_t *ct)
2413 {
2414 	return (*(vp)->v_op->vop_setattr)(vp, vap, flags, cr, ct);
2415 }
2416 
2417 int
2418 fop_access(
2419 	vnode_t *vp,
2420 	int mode,
2421 	int flags,
2422 	cred_t *cr)
2423 {
2424 	return (*(vp)->v_op->vop_access)(vp, mode, flags, cr);
2425 }
2426 
2427 int
2428 fop_lookup(
2429 	vnode_t *dvp,
2430 	char *nm,
2431 	vnode_t **vpp,
2432 	pathname_t *pnp,
2433 	int flags,
2434 	vnode_t *rdir,
2435 	cred_t *cr)
2436 {
2437 	int ret;
2438 
2439 	ret = (*(dvp)->v_op->vop_lookup)(dvp, nm, vpp, pnp, flags, rdir, cr);
2440 	if (ret == 0 && *vpp && (*vpp)->v_path == NULL)
2441 		vn_setpath(rootdir, dvp, *vpp, nm, strlen(nm));
2442 
2443 	return (ret);
2444 }
2445 
2446 int
2447 fop_create(
2448 	vnode_t *dvp,
2449 	char *name,
2450 	vattr_t *vap,
2451 	vcexcl_t excl,
2452 	int mode,
2453 	vnode_t **vpp,
2454 	cred_t *cr,
2455 	int flag)
2456 {
2457 	int ret;
2458 
2459 	ret = (*(dvp)->v_op->vop_create)
2460 				(dvp, name, vap, excl, mode, vpp, cr, flag);
2461 	if (ret == 0 && *vpp && (*vpp)->v_path == NULL)
2462 		vn_setpath(rootdir, dvp, *vpp, name, strlen(name));
2463 
2464 	return (ret);
2465 }
2466 
2467 int
2468 fop_remove(
2469 	vnode_t *dvp,
2470 	char *nm,
2471 	cred_t *cr)
2472 {
2473 	return (*(dvp)->v_op->vop_remove)(dvp, nm, cr);
2474 }
2475 
2476 int
2477 fop_link(
2478 	vnode_t *tdvp,
2479 	vnode_t *svp,
2480 	char *tnm,
2481 	cred_t *cr)
2482 {
2483 	return (*(tdvp)->v_op->vop_link)(tdvp, svp, tnm, cr);
2484 }
2485 
2486 int
2487 fop_rename(
2488 	vnode_t *sdvp,
2489 	char *snm,
2490 	vnode_t *tdvp,
2491 	char *tnm,
2492 	cred_t *cr)
2493 {
2494 	return (*(sdvp)->v_op->vop_rename)(sdvp, snm, tdvp, tnm, cr);
2495 }
2496 
2497 int
2498 fop_mkdir(
2499 	vnode_t *dvp,
2500 	char *dirname,
2501 	vattr_t *vap,
2502 	vnode_t **vpp,
2503 	cred_t *cr)
2504 {
2505 	int ret;
2506 
2507 	ret = (*(dvp)->v_op->vop_mkdir)(dvp, dirname, vap, vpp, cr);
2508 	if (ret == 0 && *vpp && (*vpp)->v_path == NULL)
2509 		vn_setpath(rootdir, dvp, *vpp, dirname, strlen(dirname));
2510 
2511 	return (ret);
2512 }
2513 
2514 int
2515 fop_rmdir(
2516 	vnode_t *dvp,
2517 	char *nm,
2518 	vnode_t *cdir,
2519 	cred_t *cr)
2520 {
2521 	return (*(dvp)->v_op->vop_rmdir)(dvp, nm, cdir, cr);
2522 }
2523 
2524 int
2525 fop_readdir(
2526 	vnode_t *vp,
2527 	uio_t *uiop,
2528 	cred_t *cr,
2529 	int *eofp)
2530 {
2531 	return (*(vp)->v_op->vop_readdir)(vp, uiop, cr, eofp);
2532 }
2533 
2534 int
2535 fop_symlink(
2536 	vnode_t *dvp,
2537 	char *linkname,
2538 	vattr_t *vap,
2539 	char *target,
2540 	cred_t *cr)
2541 {
2542 	return (*(dvp)->v_op->vop_symlink) (dvp, linkname, vap, target, cr);
2543 }
2544 
2545 int
2546 fop_readlink(
2547 	vnode_t *vp,
2548 	uio_t *uiop,
2549 	cred_t *cr)
2550 {
2551 	return (*(vp)->v_op->vop_readlink)(vp, uiop, cr);
2552 }
2553 
2554 int
2555 fop_fsync(
2556 	vnode_t *vp,
2557 	int syncflag,
2558 	cred_t *cr)
2559 {
2560 	return (*(vp)->v_op->vop_fsync)(vp, syncflag, cr);
2561 }
2562 
2563 void
2564 fop_inactive(
2565 	vnode_t *vp,
2566 	cred_t *cr)
2567 {
2568 	(*(vp)->v_op->vop_inactive)(vp, cr);
2569 }
2570 
2571 int
2572 fop_fid(
2573 	vnode_t *vp,
2574 	fid_t *fidp)
2575 {
2576 	return (*(vp)->v_op->vop_fid)(vp, fidp);
2577 }
2578 
2579 int
2580 fop_rwlock(
2581 	vnode_t *vp,
2582 	int write_lock,
2583 	caller_context_t *ct)
2584 {
2585 	return ((*(vp)->v_op->vop_rwlock)(vp, write_lock, ct));
2586 }
2587 
2588 void
2589 fop_rwunlock(
2590 	vnode_t *vp,
2591 	int write_lock,
2592 	caller_context_t *ct)
2593 {
2594 	(*(vp)->v_op->vop_rwunlock)(vp, write_lock, ct);
2595 }
2596 
2597 int
2598 fop_seek(
2599 	vnode_t *vp,
2600 	offset_t ooff,
2601 	offset_t *noffp)
2602 {
2603 	return (*(vp)->v_op->vop_seek)(vp, ooff, noffp);
2604 }
2605 
2606 int
2607 fop_cmp(
2608 	vnode_t *vp1,
2609 	vnode_t *vp2)
2610 {
2611 	return (*(vp1)->v_op->vop_cmp)(vp1, vp2);
2612 }
2613 
2614 int
2615 fop_frlock(
2616 	vnode_t *vp,
2617 	int cmd,
2618 	flock64_t *bfp,
2619 	int flag,
2620 	offset_t offset,
2621 	struct flk_callback *flk_cbp,
2622 	cred_t *cr)
2623 {
2624 	return (*(vp)->v_op->vop_frlock)
2625 				(vp, cmd, bfp, flag, offset, flk_cbp, cr);
2626 }
2627 
2628 int
2629 fop_space(
2630 	vnode_t *vp,
2631 	int cmd,
2632 	flock64_t *bfp,
2633 	int flag,
2634 	offset_t offset,
2635 	cred_t *cr,
2636 	caller_context_t *ct)
2637 {
2638 	return (*(vp)->v_op->vop_space)(vp, cmd, bfp, flag, offset, cr, ct);
2639 }
2640 
2641 int
2642 fop_realvp(
2643 	vnode_t *vp,
2644 	vnode_t **vpp)
2645 {
2646 	return (*(vp)->v_op->vop_realvp)(vp, vpp);
2647 }
2648 
2649 int
2650 fop_getpage(
2651 	vnode_t *vp,
2652 	offset_t off,
2653 	size_t len,
2654 	uint_t *protp,
2655 	page_t **plarr,
2656 	size_t plsz,
2657 	struct seg *seg,
2658 	caddr_t addr,
2659 	enum seg_rw rw,
2660 	cred_t *cr)
2661 {
2662 	return (*(vp)->v_op->vop_getpage)
2663 			(vp, off, len, protp, plarr, plsz, seg, addr, rw, cr);
2664 }
2665 
2666 int
2667 fop_putpage(
2668 	vnode_t *vp,
2669 	offset_t off,
2670 	size_t len,
2671 	int flags,
2672 	cred_t *cr)
2673 {
2674 	return (*(vp)->v_op->vop_putpage)(vp, off, len, flags, cr);
2675 }
2676 
2677 int
2678 fop_map(
2679 	vnode_t *vp,
2680 	offset_t off,
2681 	struct as *as,
2682 	caddr_t *addrp,
2683 	size_t len,
2684 	uchar_t prot,
2685 	uchar_t maxprot,
2686 	uint_t flags,
2687 	cred_t *cr)
2688 {
2689 	return (*(vp)->v_op->vop_map)
2690 			(vp, off, as, addrp, len, prot, maxprot, flags, cr);
2691 }
2692 
2693 int
2694 fop_addmap(
2695 	vnode_t *vp,
2696 	offset_t off,
2697 	struct as *as,
2698 	caddr_t addr,
2699 	size_t len,
2700 	uchar_t prot,
2701 	uchar_t maxprot,
2702 	uint_t flags,
2703 	cred_t *cr)
2704 {
2705 	int error;
2706 	u_longlong_t delta;
2707 
2708 	error = (*(vp)->v_op->vop_addmap)
2709 			(vp, off, as, addr, len, prot, maxprot, flags, cr);
2710 
2711 	if ((!error) && (vp->v_type == VREG)) {
2712 		delta = (u_longlong_t)btopr(len);
2713 		/*
2714 		 * If file is declared MAP_PRIVATE, it can't be written back
2715 		 * even if open for write. Handle as read.
2716 		 */
2717 		if (flags & MAP_PRIVATE) {
2718 			atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
2719 				(int64_t)delta);
2720 		} else {
2721 			/*
2722 			 * atomic_add_64 forces the fetch of a 64 bit value to
2723 			 * be atomic on 32 bit machines
2724 			 */
2725 			if (maxprot & PROT_WRITE)
2726 				atomic_add_64((uint64_t *)(&(vp->v_mmap_write)),
2727 					(int64_t)delta);
2728 			if (maxprot & PROT_READ)
2729 				atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
2730 					(int64_t)delta);
2731 			if (maxprot & PROT_EXEC)
2732 				atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
2733 					(int64_t)delta);
2734 		}
2735 	}
2736 	return (error);
2737 }
2738 
2739 int
2740 fop_delmap(
2741 	vnode_t *vp,
2742 	offset_t off,
2743 	struct as *as,
2744 	caddr_t addr,
2745 	size_t len,
2746 	uint_t prot,
2747 	uint_t maxprot,
2748 	uint_t flags,
2749 	cred_t *cr)
2750 {
2751 	int error;
2752 	u_longlong_t delta;
2753 	error = (*(vp)->v_op->vop_delmap)
2754 		(vp, off, as, addr, len, prot, maxprot, flags, cr);
2755 
2756 	/*
2757 	 * NFS calls into delmap twice, the first time
2758 	 * it simply establishes a callback mechanism and returns EAGAIN
2759 	 * while the real work is being done upon the second invocation.
2760 	 * We have to detect this here and only decrement the counts upon
2761 	 * the second delmap request.
2762 	 */
2763 	if ((error != EAGAIN) && (vp->v_type == VREG)) {
2764 
2765 		delta = (u_longlong_t)btopr(len);
2766 
2767 		if (flags & MAP_PRIVATE) {
2768 			atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
2769 				(int64_t)(-delta));
2770 		} else {
2771 			/*
2772 			 * atomic_add_64 forces the fetch of a 64 bit value
2773 			 * to be atomic on 32 bit machines
2774 			 */
2775 			if (maxprot & PROT_WRITE)
2776 				atomic_add_64((uint64_t *)(&(vp->v_mmap_write)),
2777 					(int64_t)(-delta));
2778 			if (maxprot & PROT_READ)
2779 				atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
2780 					(int64_t)(-delta));
2781 			if (maxprot & PROT_EXEC)
2782 				atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
2783 					(int64_t)(-delta));
2784 		}
2785 	}
2786 	return (error);
2787 }
2788 
2789 
2790 int
2791 fop_poll(
2792 	vnode_t *vp,
2793 	short events,
2794 	int anyyet,
2795 	short *reventsp,
2796 	struct pollhead **phpp)
2797 {
2798 	return (*(vp)->v_op->vop_poll)(vp, events, anyyet, reventsp, phpp);
2799 }
2800 
2801 int
2802 fop_dump(
2803 	vnode_t *vp,
2804 	caddr_t addr,
2805 	int lbdn,
2806 	int dblks)
2807 {
2808 	return (*(vp)->v_op->vop_dump)(vp, addr, lbdn, dblks);
2809 }
2810 
2811 int
2812 fop_pathconf(
2813 	vnode_t *vp,
2814 	int cmd,
2815 	ulong_t *valp,
2816 	cred_t *cr)
2817 {
2818 	return (*(vp)->v_op->vop_pathconf)(vp, cmd, valp, cr);
2819 }
2820 
2821 int
2822 fop_pageio(
2823 	vnode_t *vp,
2824 	struct page *pp,
2825 	u_offset_t io_off,
2826 	size_t io_len,
2827 	int flags,
2828 	cred_t *cr)
2829 {
2830 	return (*(vp)->v_op->vop_pageio)(vp, pp, io_off, io_len, flags, cr);
2831 }
2832 
2833 int
2834 fop_dumpctl(
2835 	vnode_t *vp,
2836 	int action,
2837 	int *blkp)
2838 {
2839 	return (*(vp)->v_op->vop_dumpctl)(vp, action, blkp);
2840 }
2841 
2842 void
2843 fop_dispose(
2844 	vnode_t *vp,
2845 	page_t *pp,
2846 	int flag,
2847 	int dn,
2848 	cred_t *cr)
2849 {
2850 	(*(vp)->v_op->vop_dispose)(vp, pp, flag, dn, cr);
2851 }
2852 
2853 int
2854 fop_setsecattr(
2855 	vnode_t *vp,
2856 	vsecattr_t *vsap,
2857 	int flag,
2858 	cred_t *cr)
2859 {
2860 	return (*(vp)->v_op->vop_setsecattr) (vp, vsap, flag, cr);
2861 }
2862 
2863 int
2864 fop_getsecattr(
2865 	vnode_t *vp,
2866 	vsecattr_t *vsap,
2867 	int flag,
2868 	cred_t *cr)
2869 {
2870 	return (*(vp)->v_op->vop_getsecattr) (vp, vsap, flag, cr);
2871 }
2872 
2873 int
2874 fop_shrlock(
2875 	vnode_t *vp,
2876 	int cmd,
2877 	struct shrlock *shr,
2878 	int flag,
2879 	cred_t *cr)
2880 {
2881 	return (*(vp)->v_op->vop_shrlock)(vp, cmd, shr, flag, cr);
2882 }
2883 
2884 int
2885 fop_vnevent(vnode_t *vp, vnevent_t vnevent)
2886 {
2887 	return (*(vp)->v_op->vop_vnevent)(vp, vnevent);
2888 }
2889