xref: /titanic_41/usr/src/uts/common/fs/portfs/port_fop.c (revision a4dd1f3517267c5e5aa5b2bb53cb388002bc688f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
28  */
29 
30 /*
31  * File Events Notification
32  * ------------------------
33  *
34  * The File Events Notification facility provides file and directory change
35  * notification. It is implemented as an event source(PORT_SOURCE_FILE)
36  * under the Event Ports framework. Therefore the API is an extension to
37  * the Event Ports API.
38  *
39  * It uses the FEM (File Events Monitoring) framework to intercept
40  * operations on the files & directories and generate appropriate events.
41  *
42  * It provides event notification in accordance with what an application
43  * can find out by stat`ing the file and comparing time stamps. The various
44  * system calls that update the file's access, modification, and change
45  * time stamps are documented in the man page section 2.
46  *
47  * It is non intrusive. That is, having an active file event watch on a file
48  * or directory will not prevent it from being removed or renamed or block an
49  * unmount operation of the file system where the watched file or directory
50  * resides.
51  *
52  *
53  * Interface:
54  * ----------
55  *
56  *   The object for this event source is of type 'struct file_obj *'
57  *
58  *   The file that needs to be monitored is specified in 'fo_name'.
59  *   The time stamps collected by a stat(2) call are passed in fo_atime,
60  *   fo_mtime, fo_ctime. At the time a file events watch is registered, the
61  *   time stamps passed in are compared with the current time stamps of the
62  *   file. If it has changed, relevant events are sent immediately. If the time
63  *   stamps are all '0', they will not be compared.
64  *
65  *
66  * The events are delivered to an event port. A port is created using
67  * port_create().
68  *
69  * To register a file events watch on a file or directory.
70  *
71  *   port_associate(int port, PORT_SOURCE_FILE, (uintptr_t)&fobj, events, user)
72  *
73  *   'user' is the user pointer to be returned with the event.
74  *
75  * To de-register a file events watch,
76  *
77  *   port_dissociate(int port, PORT_SOURCE_FILE, (uintptr_t)&fobj)
78  *
79  * The events are collected using the port_get()/port_getn() interface. The
80  * event source will be PORT_SOURCE_FILE.
81  *
82  * After an event is delivered, the file events watch gets de-activated. To
83  * receive the next event, the process will have to re-register the watch and
84  * activate it by calling port_associate() again. This behavior is intentional
85  * and supports proper multi threaded programming when using file events
86  * notification API.
87  *
88  *
89  * Implementation overview:
90  * ------------------------
91  *
92  * Each file events watch is represented by 'portfop_t' in the kernel. A
93  * cache(in portfop_cache_t) of these portfop_t's are maintained per event
94  * port by this source. The object here is the pointer to the file_obj
95  * structure. The portfop_t's are hashed in using the object pointer. Therefore
96  * it is possible to have multiple file events watches on a file by the same
97  * process by using different object structure(file_obj_t) and hence can
98  * receive multiple event notification for a file. These watches can be for
99  * different event types.
100  *
101  * The cached entries of these file objects are retained, even after delivering
102  * an event, marking them inactive for performance reasons. The assumption
103  * is that the process would come back and re-register the file to receive
104  * further events. When there are more then 'port_fop_maxpfps' watches per file
105  * it will attempt to free the oldest inactive watches.
106  *
107  * In case the event that is being delivered is an exception event, the cached
108  * entries get removed. An exception event on a file or directory means its
109  * identity got changed(rename to/from, delete, mounted over, file system
110  * unmount).
111  *
112  * If the event port gets closed, all the associated file event watches will be
113  * removed and discarded.
114  *
115  *
116  * Data structures:
117  * ----------------
118  *
119  * The list of file event watches per file are managed by the data structure
120  * portfop_vp_t. The first time a file events watch is registered for a file,
121  * a portfop_vp_t is installed on the vnode_t's member v_fopdata. This gets
122  * removed and freed only when the vnode becomes inactive. The FEM hooks are
123  * also installed when the first watch is registered on a file. The FEM hooks
124  * get un-installed when all the watches are removed.
125  *
126  * Each file events watch is represented by the structure portfop_t. They
127  * get added to a list of portfop_t's on the vnode(portfop_vp_t). After
128  * delivering an event, the portfop_t is marked inactive but retained. It is
129  * moved to the end of the list. All the active portfop_t's are maintained at
130  * the beginning. In case of exception events, the portfop_t will be removed
131  * and discarded.
132  *
133  * To intercept unmount operations, FSEM hooks are added to the file system
134  * under which files are being watched. A hash table('portfop_vfs_hash_t') of
135  * active file systems is maintained. Each file system that has active watches
136  * is represented by 'portfop_vfs_t' and is added to the hash table.
137  * The vnode's 'portfop_vp_t' structure is added to the list of files(vnodes)
138  * being watched on the portfop_vfs_t structure.
139  *
140  *
141  * File system support:
142  * -------------------
143  *
144  * The file system implementation has to provide vnode event notifications
145  * (vnevents) in order to support watching any files on that file system.
146  * The vnode events(vnevents) are notifications provided by the file system
147  * for name based file operations like rename, remove etc, which do not go
148  * thru the VOP_** interfaces. If the file system does not implement vnode
149  * notifications, watching for file events on such file systems is not
150  * supported. The vnode event notifications support is determined by the call
151  * vnevent_support(vp) (VOP_VNEVENT(vp, VE_SUPPORT)), which the file system
152  * has to implement.
153  *
154  *
155  * Locking order:
156  * --------------
157  *
158  * A file(vnode) can have file event watches registered by different processes.
159  * There is one portfop_t per watch registered. These are on the vnode's list
160  * protected by the mutex 'pvp_mutex' in 'portfop_vp_t'. The portfop_t's are
161  * also on the per port cache. The cache is protected by the pfc_lock of
162  * portfop_cache_t. The lock order here is 'pfc_lock' -> 'pvp_mutex'.
163  *
164  */
165 
166 #include <sys/types.h>
167 #include <sys/systm.h>
168 #include <sys/stat.h>
169 #include <sys/errno.h>
170 #include <sys/kmem.h>
171 #include <sys/sysmacros.h>
172 #include <sys/debug.h>
173 #include <sys/vnode.h>
174 #include <sys/poll_impl.h>
175 #include <sys/port_impl.h>
176 #include <sys/fem.h>
177 #include <sys/vfs_opreg.h>
178 #include <sys/atomic.h>
179 #include <sys/mount.h>
180 #include <sys/mntent.h>
181 
182 /*
183  * For special case support of mnttab (/etc/mnttab).
184  */
185 extern struct vnode *vfs_mntdummyvp;
186 extern int mntfstype;
187 
188 #define	PORTFOP_PVFSH(vfsp)	(&portvfs_hash[PORTFOP_PVFSHASH(vfsp)])
189 portfop_vfs_hash_t	 portvfs_hash[PORTFOP_PVFSHASH_SZ];
190 
191 #define	PORTFOP_NVP	20
192 /*
193  * Inactive file event watches(portfop_t) are retained on the vnode's list
194  * for performance reason. If the applications re-registers the file, the
195  * inactive entry is made active and moved up the list.
196  *
197  * If there are greater then the following number of watches on a vnode,
198  * it will attempt to discard an oldest inactive watch(pfp) at the time
199  * a new watch is being registered and when events get delivered. We
200  * do this to avoid accumulating inactive watches on a file.
201  */
202 int	port_fop_maxpfps = 20;
203 
204 /* local functions */
205 static int	port_fop_callback(void *, int *, pid_t, int, void *);
206 
207 static void	port_pcache_insert(portfop_cache_t *, portfop_t *);
208 static void	port_pcache_delete(portfop_cache_t *, portfop_t *);
209 static void	port_close_fop(void *arg, int port, pid_t pid, int lastclose);
210 
211 /*
212  * port fop functions that will be the fem hooks.
213  */
214 static int port_fop_open(femarg_t *vf, int mode, cred_t *cr,
215     caller_context_t *);
216 static int port_fop_read(femarg_t *vf, uio_t *uiop, int ioflag, cred_t *cr,
217     struct caller_context *ct);
218 static int port_fop_write(femarg_t *vf, uio_t *uiop, int ioflag, cred_t *cr,
219     caller_context_t *ct);
220 static int port_fop_map(femarg_t *vf, offset_t off, struct as *as,
221     caddr_t *addrp, size_t len, uchar_t prot, uchar_t maxport,
222     uint_t flags, cred_t *cr, caller_context_t *ct);
223 static int port_fop_setattr(femarg_t *vf, vattr_t *vap, int flags, cred_t *cr,
224     caller_context_t *ct);
225 static int port_fop_create(femarg_t *vf, char *name, vattr_t *vap,
226     vcexcl_t excl, int mode, vnode_t **vpp, cred_t *cr, int flag,
227     caller_context_t *ct, vsecattr_t *vsecp);
228 static int port_fop_remove(femarg_t *vf, char *nm, cred_t *cr,
229     caller_context_t *ct, int flags);
230 static int port_fop_link(femarg_t *vf, vnode_t *svp, char *tnm, cred_t *cr,
231     caller_context_t *ct, int flags);
232 static int port_fop_rename(femarg_t *vf, char *snm, vnode_t *tdvp, char *tnm,
233     cred_t *cr, caller_context_t *ct, int flags);
234 static int port_fop_mkdir(femarg_t *vf, char *dirname, vattr_t *vap,
235     vnode_t **vpp, cred_t *cr, caller_context_t *ct, int flags,
236     vsecattr_t *vsecp);
237 static int port_fop_rmdir(femarg_t *vf, char *nm, vnode_t *cdir, cred_t *cr,
238     caller_context_t *ct, int flags);
239 static int port_fop_readdir(femarg_t *vf, uio_t *uiop, cred_t *cr, int *eofp,
240     caller_context_t *ct, int flags);
241 static int port_fop_symlink(femarg_t *vf, char *linkname, vattr_t *vap,
242     char *target, cred_t *cr, caller_context_t *ct, int flags);
243 static int port_fop_setsecattr(femarg_t *vf, vsecattr_t *vsap, int flag,
244     cred_t *cr, caller_context_t *ct);
245 
246 static int port_fop_vnevent(femarg_t *vf, vnevent_t vnevent, vnode_t *dvp,
247     char *cname, caller_context_t *ct);
248 
249 static int port_fop_unmount(fsemarg_t *vf, int flag, cred_t *cr);
250 
251 
252 /*
253  * Fem hooks.
254  */
255 const fs_operation_def_t	port_vnodesrc_template[] = {
256 	VOPNAME_OPEN,		{ .femop_open = port_fop_open },
257 	VOPNAME_READ,		{ .femop_read = port_fop_read },
258 	VOPNAME_WRITE,		{ .femop_write = port_fop_write },
259 	VOPNAME_MAP,		{ .femop_map = port_fop_map },
260 	VOPNAME_SETATTR, 	{ .femop_setattr = port_fop_setattr },
261 	VOPNAME_CREATE,		{ .femop_create = port_fop_create },
262 	VOPNAME_REMOVE,		{ .femop_remove = port_fop_remove },
263 	VOPNAME_LINK,		{ .femop_link = port_fop_link },
264 	VOPNAME_RENAME,		{ .femop_rename = port_fop_rename },
265 	VOPNAME_MKDIR,		{ .femop_mkdir = port_fop_mkdir },
266 	VOPNAME_RMDIR,		{ .femop_rmdir = port_fop_rmdir },
267 	VOPNAME_READDIR,	{ .femop_readdir = port_fop_readdir },
268 	VOPNAME_SYMLINK,	{ .femop_symlink = port_fop_symlink },
269 	VOPNAME_SETSECATTR, 	{ .femop_setsecattr = port_fop_setsecattr },
270 	VOPNAME_VNEVENT,	{ .femop_vnevent = port_fop_vnevent },
271 	NULL,	NULL
272 };
273 
274 /*
275  * Fsem - vfs ops hooks
276  */
277 const fs_operation_def_t	port_vfssrc_template[] = {
278 	VFSNAME_UNMOUNT, 	{ .fsemop_unmount = port_fop_unmount },
279 	NULL,	NULL
280 };
281 
282 fem_t *fop_femop;
283 fsem_t *fop_fsemop;
284 
285 static fem_t *
286 port_fop_femop()
287 {
288 	fem_t *femp;
289 	if (fop_femop != NULL)
290 		return (fop_femop);
291 	if (fem_create("portfop_fem",
292 	    (const struct fs_operation_def *)port_vnodesrc_template,
293 	    (fem_t **)&femp)) {
294 		return (NULL);
295 	}
296 	if (casptr(&fop_femop, NULL, femp) != NULL) {
297 		/*
298 		 * some other thread beat us to it.
299 		 */
300 		fem_free(femp);
301 	}
302 	return (fop_femop);
303 }
304 
305 static fsem_t *
306 port_fop_fsemop()
307 {
308 	fsem_t *fsemp;
309 	if (fop_fsemop != NULL)
310 		return (fop_fsemop);
311 	if (fsem_create("portfop_fsem", port_vfssrc_template, &fsemp)) {
312 		return (NULL);
313 	}
314 	if (casptr(&fop_fsemop, NULL, fsemp) != NULL) {
315 		/*
316 		 * some other thread beat us to it.
317 		 */
318 		fsem_free(fsemp);
319 	}
320 	return (fop_fsemop);
321 }
322 
323 /*
324  * port_fop_callback()
325  * - PORT_CALLBACK_DEFAULT
326  *	The file event will be delivered to the application.
327  * - PORT_CALLBACK_DISSOCIATE
328  *	The object will be dissociated from  the port.
329  * - PORT_CALLBACK_CLOSE
330  *	The object will be dissociated from the port because the port
331  *	is being closed.
332  */
333 /* ARGSUSED */
334 static int
335 port_fop_callback(void *arg, int *events, pid_t pid, int flag, void *evp)
336 {
337 	portfop_t	*pfp = (portfop_t *)arg;
338 	port_kevent_t	*pkevp = (port_kevent_t *)evp;
339 	int		error = 0;
340 
341 	ASSERT((events != NULL));
342 	if (flag == PORT_CALLBACK_DEFAULT) {
343 		if (curproc->p_pid != pid) {
344 				return (EACCES); /* deny delivery of events */
345 		}
346 
347 		*events = pkevp->portkev_events;
348 		pkevp->portkev_events = 0;
349 		if (pfp != NULL) {
350 			pfp->pfop_flags &= ~PORT_FOP_KEV_ONQ;
351 		}
352 	}
353 	return (error);
354 }
355 
356 /*
357  * Inserts a portfop_t into the port sources cache's.
358  */
359 static void
360 port_pcache_insert(portfop_cache_t *pfcp, portfop_t *pfp)
361 {
362 	portfop_t	**bucket;
363 
364 	ASSERT(MUTEX_HELD(&pfcp->pfc_lock));
365 	bucket = PORT_FOP_BUCKET(pfcp, pfp->pfop_object);
366 	pfp->pfop_hashnext = *bucket;
367 	*bucket = pfp;
368 	pfcp->pfc_objcount++;
369 }
370 
371 /*
372  * Remove the pfp from the port source cache.
373  */
374 static void
375 port_pcache_delete(portfop_cache_t *pfcp, portfop_t *pfp)
376 {
377 	portfop_t	*lpdp;
378 	portfop_t	*cpdp;
379 	portfop_t	**bucket;
380 
381 	bucket = PORT_FOP_BUCKET(pfcp, pfp->pfop_object);
382 	cpdp = *bucket;
383 	if (pfp == cpdp) {
384 		*bucket = pfp->pfop_hashnext;
385 	} else {
386 		while (cpdp != NULL) {
387 			lpdp = cpdp;
388 			cpdp = cpdp->pfop_hashnext;
389 			if (cpdp == pfp) {
390 				/* portfop struct found */
391 				lpdp->pfop_hashnext = pfp->pfop_hashnext;
392 				break;
393 			}
394 		}
395 	}
396 	pfcp->pfc_objcount--;
397 }
398 
399 /*
400  * The vnode's(portfop_vp_t) pfp list management. The 'pvp_mutex' is held
401  * when these routines are called.
402  *
403  * The 'pvp_lpfop' member points to the oldest inactive entry on the list.
404  * It is used to discard the oldtest inactive pfp if the number of entries
405  * exceed the limit.
406  */
407 static void
408 port_fop_listinsert(portfop_vp_t *pvp, portfop_t *pfp, int where)
409 {
410 	if (where == 1) {
411 		list_insert_head(&pvp->pvp_pfoplist, (void *)pfp);
412 	} else {
413 		list_insert_tail(&pvp->pvp_pfoplist, (void *)pfp);
414 	}
415 	if (pvp->pvp_lpfop == NULL) {
416 		pvp->pvp_lpfop = pfp;
417 	}
418 	pvp->pvp_cnt++;
419 }
420 
421 static void
422 port_fop_listinsert_head(portfop_vp_t *pvp, portfop_t *pfp)
423 {
424 	port_fop_listinsert(pvp, pfp, 1);
425 }
426 
427 static void
428 port_fop_listinsert_tail(portfop_vp_t *pvp, portfop_t *pfp)
429 {
430 	/*
431 	 * We point lpfop to an inactive one, if it was initially pointing
432 	 * to an active one. Insert to the tail is done only when a pfp goes
433 	 * inactive.
434 	 */
435 	if (pvp->pvp_lpfop && pvp->pvp_lpfop->pfop_flags & PORT_FOP_ACTIVE) {
436 		pvp->pvp_lpfop = pfp;
437 	}
438 	port_fop_listinsert(pvp, pfp, 0);
439 }
440 
441 static void
442 port_fop_listremove(portfop_vp_t *pvp, portfop_t *pfp)
443 {
444 	if (pvp->pvp_lpfop == pfp) {
445 		pvp->pvp_lpfop = list_next(&pvp->pvp_pfoplist, (void *)pfp);
446 	}
447 
448 	list_remove(&pvp->pvp_pfoplist, (void *)pfp);
449 
450 	pvp->pvp_cnt--;
451 	if (pvp->pvp_cnt && pvp->pvp_lpfop == NULL) {
452 		pvp->pvp_lpfop = list_head(&pvp->pvp_pfoplist);
453 	}
454 }
455 
456 static void
457 port_fop_listmove(portfop_vp_t *pvp, list_t *tlist)
458 {
459 	list_move_tail(tlist, &pvp->pvp_pfoplist);
460 	pvp->pvp_lpfop = NULL;
461 	pvp->pvp_cnt = 0;
462 }
463 
464 /*
465  * Remove a portfop_t from the port cache hash table and discard it.
466  * It is called only when pfp is not on the vnode's list. Otherwise,
467  * port_remove_fop() is called.
468  */
469 void
470 port_pcache_remove_fop(portfop_cache_t *pfcp, portfop_t *pfp)
471 {
472 	port_kevent_t	*pkevp;
473 
474 
475 	ASSERT(MUTEX_HELD(&pfcp->pfc_lock));
476 
477 	pkevp = pfp->pfop_pev;
478 	pfp->pfop_pev = NULL;
479 
480 	if (pkevp != NULL) {
481 		(void) port_remove_done_event(pkevp);
482 		port_free_event_local(pkevp, 0);
483 	}
484 
485 	port_pcache_delete(pfcp, pfp);
486 
487 	if (pfp->pfop_cname != NULL)
488 		kmem_free(pfp->pfop_cname, pfp->pfop_clen + 1);
489 	kmem_free(pfp, sizeof (portfop_t));
490 	if (pfcp->pfc_objcount == 0)
491 		cv_signal(&pfcp->pfc_lclosecv);
492 }
493 
494 /*
495  * if we have too many watches on the vnode, attempt to discard an
496  * inactive one.
497  */
498 static void
499 port_fop_trimpfplist(vnode_t *vp)
500 {
501 	portfop_vp_t *pvp;
502 	portfop_t *pfp = NULL;
503 	portfop_cache_t *pfcp;
504 	vnode_t	*tdvp;
505 
506 	/*
507 	 * Due to a reference the vnode cannot disappear, v_fopdata should
508 	 * not change.
509 	 */
510 	if ((pvp = vp->v_fopdata) != NULL &&
511 	    pvp->pvp_cnt > port_fop_maxpfps) {
512 		mutex_enter(&pvp->pvp_mutex);
513 		pfp = pvp->pvp_lpfop;
514 		pfcp = pfp->pfop_pcache;
515 		/*
516 		 * only if we can get the cache lock, we need to
517 		 * do this due to reverse lock order and some thread
518 		 * that may be trying to reactivate this entry.
519 		 */
520 		if (mutex_tryenter(&pfcp->pfc_lock)) {
521 			if (pfp && !(pfp->pfop_flags & PORT_FOP_ACTIVE) &&
522 			    !(pfp->pfop_flags & PORT_FOP_KEV_ONQ)) {
523 				port_fop_listremove(pvp, pfp);
524 				pfp->pfop_flags |= PORT_FOP_REMOVING;
525 			} else {
526 				mutex_exit(&pfcp->pfc_lock);
527 				pfp = NULL;
528 			}
529 		} else {
530 			pfp = NULL;
531 		}
532 		mutex_exit(&pvp->pvp_mutex);
533 
534 		/*
535 		 * discard pfp if any.
536 		 */
537 		if (pfp != NULL) {
538 			tdvp = pfp->pfop_dvp;
539 			port_pcache_remove_fop(pfcp, pfp);
540 			mutex_exit(&pfcp->pfc_lock);
541 			if (tdvp != NULL)
542 				VN_RELE(tdvp);
543 		}
544 	}
545 }
546 
547 /*
548  * This routine returns 1, if the vnode can be rele'ed by the caller.
549  * The caller has to VN_RELE the vnode with out holding any
550  * locks.
551  */
552 int
553 port_fop_femuninstall(vnode_t *vp)
554 {
555 	portfop_vp_t	*pvp;
556 	vfs_t		*vfsp;
557 	portfop_vfs_t *pvfsp;
558 	portfop_vfs_hash_t	*pvfsh;
559 	kmutex_t	*mtx;
560 	int	ret = 0;
561 
562 	/*
563 	 * if list is empty, uninstall fem.
564 	 */
565 	pvp = vp->v_fopdata;
566 	ASSERT(MUTEX_HELD(&pvp->pvp_mutex));
567 
568 	/*
569 	 * make sure the list is empty.
570 	 */
571 	if (!list_head(&pvp->pvp_pfoplist)) {
572 
573 		/*
574 		 * we could possibly uninstall the fem hooks when
575 		 * the vnode becomes inactive and the v_fopdata is
576 		 * free. But the hooks get triggered unnecessarily
577 		 * even though there are no active watches. So, we
578 		 * uninstall it here.
579 		 */
580 		(void) fem_uninstall(vp, (fem_t *)pvp->pvp_femp, vp);
581 		pvp->pvp_femp = NULL;
582 		mutex_exit(&pvp->pvp_mutex);
583 
584 
585 		/*
586 		 * If we successfully uninstalled fem, no process is watching
587 		 * this vnode, Remove it from the vfs's list of watched vnodes.
588 		 */
589 		pvfsp = pvp->pvp_pvfsp;
590 		vfsp = vp->v_vfsp;
591 		pvfsh = PORTFOP_PVFSH(vfsp);
592 		mtx = &pvfsh->pvfshash_mutex;
593 		mutex_enter(mtx);
594 		/*
595 		 * If unmount is in progress, that thread will remove and
596 		 * release the vnode from the vfs's list, just leave.
597 		 */
598 		if (!pvfsp->pvfs_unmount) {
599 			list_remove(&pvfsp->pvfs_pvplist, pvp);
600 			mutex_exit(mtx);
601 			ret = 1;
602 		} else {
603 			mutex_exit(mtx);
604 		}
605 	} else {
606 		mutex_exit(&pvp->pvp_mutex);
607 	}
608 	return (ret);
609 }
610 
611 /*
612  * Remove pfp from the vnode's watch list and the cache and discard it.
613  * If it is the last pfp on the vnode's list, the fem hooks get uninstalled.
614  * Returns 1 if pfp removed successfully.
615  *
616  * The *active is set to indicate if the pfp was still active(no events had
617  * been posted, or the posted event had not been collected yet and it was
618  * able to remove it from the port's queue).
619  *
620  * vpp and dvpp will point to the vnode and directory vnode which the caller
621  * is required to VN_RELE without holding any locks.
622  */
623 int
624 port_remove_fop(portfop_t *pfp, portfop_cache_t *pfcp, int cleanup,
625     int *active, vnode_t **vpp, vnode_t **dvpp)
626 {
627 	vnode_t		*vp;
628 	portfop_vp_t	*pvp;
629 	int	tactive = 0;
630 
631 	ASSERT(MUTEX_HELD(&pfcp->pfc_lock));
632 	vp = pfp->pfop_vp;
633 	pvp = vp->v_fopdata;
634 	mutex_enter(&pvp->pvp_mutex);
635 
636 	/*
637 	 * if not cleanup, remove it only if the pfp is still active and
638 	 * is not being removed by some other thread.
639 	 */
640 	if (!cleanup && (!(pfp->pfop_flags & PORT_FOP_ACTIVE) ||
641 	    pfp->pfop_flags & PORT_FOP_REMOVING)) {
642 		mutex_exit(&pvp->pvp_mutex);
643 		return (0);
644 	}
645 
646 	/*
647 	 * mark it inactive.
648 	 */
649 	if (pfp->pfop_flags & PORT_FOP_ACTIVE) {
650 		pfp->pfop_flags &= ~PORT_FOP_ACTIVE;
651 		tactive = 1;
652 	}
653 
654 	/*
655 	 * Check if the pfp is still on the vnode's list. This can
656 	 * happen if port_fop_excep() is in the process of removing it.
657 	 * In case of cleanup, just mark this pfp as inactive so that no
658 	 * new events (VNEVENT) will be delivered, and remove it from the
659 	 * event queue if it was already queued. Since the cache lock is
660 	 * held, the pfp will not disappear, even though it is being
661 	 * removed.
662 	 */
663 	if (pfp->pfop_flags & PORT_FOP_REMOVING) {
664 		mutex_exit(&pvp->pvp_mutex);
665 		if (!tactive && port_remove_done_event(pfp->pfop_pev)) {
666 			pfp->pfop_flags &= ~PORT_FOP_KEV_ONQ;
667 			tactive = 1;
668 		}
669 		if (active) {
670 			*active = tactive;
671 		}
672 		return (1);
673 	}
674 
675 	/*
676 	 * if we find an event on the queue and removed it, then this
677 	 * association is considered active.
678 	 */
679 	if (!tactive && port_remove_done_event(pfp->pfop_pev)) {
680 		pfp->pfop_flags &= ~PORT_FOP_KEV_ONQ;
681 		tactive = 1;
682 	}
683 
684 	if (active) {
685 		*active = tactive;
686 	}
687 	pvp = (portfop_vp_t *)vp->v_fopdata;
688 
689 	/*
690 	 * remove pfp from the vnode's list
691 	 */
692 	port_fop_listremove(pvp, pfp);
693 
694 	/*
695 	 * If no more associations on the vnode, uninstall fem hooks.
696 	 * The pvp mutex will be released in this routine.
697 	 */
698 	if (port_fop_femuninstall(vp))
699 		*vpp = vp;
700 	*dvpp = pfp->pfop_dvp;
701 	port_pcache_remove_fop(pfcp, pfp);
702 	return (1);
703 }
704 
705 /*
706  * This routine returns a pointer to a cached portfop entry, or NULL if it
707  * does not find it in the hash table. The object pointer is used as index.
708  * The entries are hashed by the object's address. We need to match the pid
709  * as the evet port can be shared between processes. The file events
710  * watches are per process only.
711  */
712 portfop_t *
713 port_cache_lookup_fop(portfop_cache_t *pfcp, pid_t pid, uintptr_t obj)
714 {
715 	portfop_t	*pfp = NULL;
716 	portfop_t	**bucket;
717 
718 	ASSERT(MUTEX_HELD(&pfcp->pfc_lock));
719 	bucket = PORT_FOP_BUCKET(pfcp, obj);
720 	pfp = *bucket;
721 	while (pfp != NULL) {
722 		if (pfp->pfop_object == obj && pfp->pfop_pid == pid)
723 			break;
724 		pfp = pfp->pfop_hashnext;
725 	}
726 	return (pfp);
727 }
728 
729 /*
730  * Given the file name, get the vnode and also the directory vnode
731  * On return, the vnodes are held (VN_HOLD). The caller has to VN_RELE
732  * the vnode(s).
733  */
734 int
735 port_fop_getdvp(void *objptr, vnode_t **vp, vnode_t **dvp,
736 	char **cname, int *len, int follow)
737 {
738 	int error = 0;
739 	struct pathname pn;
740 	char *fname;
741 
742 	if (get_udatamodel() == DATAMODEL_NATIVE) {
743 		fname = ((file_obj_t *)objptr)->fo_name;
744 #ifdef  _SYSCALL32_IMPL
745 	} else {
746 		fname = (caddr_t)(uintptr_t)((file_obj32_t *)objptr)->fo_name;
747 #endif	/* _SYSCALL32_IMPL */
748 	}
749 
750 	/*
751 	 * lookuppn may fail with EINVAL, if dvp is  non-null(like when
752 	 * looking for "."). So call again with dvp = NULL.
753 	 */
754 	if ((error = pn_get(fname, UIO_USERSPACE, &pn)) != 0) {
755 		return (error);
756 	}
757 
758 	error = lookuppn(&pn, NULL, follow, dvp, vp);
759 	if (error == EINVAL) {
760 		pn_free(&pn);
761 		if ((error = pn_get(fname, UIO_USERSPACE, &pn)) != 0) {
762 			return (error);
763 		}
764 		error = lookuppn(&pn, NULL, follow, NULL, vp);
765 		if (dvp != NULL) {
766 			*dvp = NULL;
767 		}
768 	}
769 
770 	if (error == 0 && cname != NULL && len != NULL) {
771 		pn_setlast(&pn);
772 		*len = pn.pn_pathlen;
773 		*cname = kmem_alloc(*len + 1, KM_SLEEP);
774 		(void) strcpy(*cname, pn.pn_path);
775 	} else {
776 		if (cname != NULL && len != NULL) {
777 			*cname = NULL;
778 			*len = 0;
779 		}
780 	}
781 
782 	pn_free(&pn);
783 	return (error);
784 }
785 
786 port_source_t *
787 port_getsrc(port_t *pp, int source)
788 {
789 	port_source_t *pse;
790 	int	lock = 0;
791 	/*
792 	 * get the port source structure.
793 	 */
794 	if (!MUTEX_HELD(&pp->port_queue.portq_source_mutex)) {
795 		mutex_enter(&pp->port_queue.portq_source_mutex);
796 		lock = 1;
797 	}
798 
799 	pse = pp->port_queue.portq_scache[PORT_SHASH(source)];
800 	for (; pse != NULL; pse = pse->portsrc_next) {
801 		if (pse->portsrc_source == source)
802 			break;
803 	}
804 
805 	if (lock) {
806 		mutex_exit(&pp->port_queue.portq_source_mutex);
807 	}
808 	return (pse);
809 }
810 
811 
812 /*
813  * Compare time stamps and generate an event if it has changed.
814  * Note that the port cache pointer will be valid due to a reference
815  * to the port. We need to grab the port cache lock and verify that
816  * the pfp is still the same before proceeding to deliver an event.
817  */
818 static void
819 port_check_timestamp(portfop_cache_t *pfcp, vnode_t *vp, vnode_t *dvp,
820 	portfop_t *pfp, void *objptr, uintptr_t object)
821 {
822 	vattr_t		vatt;
823 	portfop_vp_t	*pvp = vp->v_fopdata;
824 	int		events = 0;
825 	port_kevent_t	*pkevp;
826 	file_obj_t	*fobj;
827 	portfop_t	*tpfp;
828 
829 	/*
830 	 * If time stamps are specified, get attributes and compare.
831 	 */
832 	vatt.va_mask = AT_ATIME|AT_MTIME|AT_CTIME;
833 	if (get_udatamodel() == DATAMODEL_NATIVE) {
834 		fobj = (file_obj_t *)objptr;
835 		if (fobj->fo_atime.tv_sec || fobj->fo_atime.tv_nsec ||
836 		    fobj->fo_mtime.tv_sec || fobj->fo_mtime.tv_nsec ||
837 		    fobj->fo_ctime.tv_sec || fobj->fo_ctime.tv_nsec) {
838 			if (VOP_GETATTR(vp, &vatt, 0, CRED(), NULL)) {
839 				return;
840 			}
841 		} else {
842 			/*
843 			 * timestamp not specified, all 0's,
844 			 */
845 			return;
846 		}
847 #ifdef  _SYSCALL32_IMPL
848 	} else {
849 		file_obj32_t	*fobj32;
850 		fobj32 = (file_obj32_t *)objptr;
851 		if (fobj32->fo_atime.tv_sec || fobj32->fo_atime.tv_nsec ||
852 		    fobj32->fo_mtime.tv_sec || fobj32->fo_mtime.tv_nsec ||
853 		    fobj32->fo_ctime.tv_sec || fobj32->fo_ctime.tv_nsec) {
854 			if (VOP_GETATTR(vp, &vatt, 0, CRED(), NULL)) {
855 				return;
856 			}
857 		} else {
858 			/*
859 			 * timestamp not specified, all 0.
860 			 */
861 			return;
862 		}
863 #endif /* _SYSCALL32_IMPL */
864 	}
865 
866 	/*
867 	 * Now grab the cache lock and verify that we are still
868 	 * dealing with the same pfp and curthread is the one
869 	 * which registered it. We need to do this to avoid
870 	 * delivering redundant events.
871 	 */
872 	mutex_enter(&pfcp->pfc_lock);
873 	tpfp = port_cache_lookup_fop(pfcp, curproc->p_pid, object);
874 
875 	if (tpfp == NULL || tpfp != pfp ||
876 	    pfp->pfop_vp != vp || pfp->pfop_dvp != dvp ||
877 	    pfp->pfop_callrid != curthread ||
878 	    !(pfp->pfop_flags & PORT_FOP_ACTIVE)) {
879 		/*
880 		 * Some other event was delivered, the file
881 		 * watch was removed or reassociated. Just
882 		 * ignore it and leave
883 		 */
884 		mutex_exit(&pfcp->pfc_lock);
885 		return;
886 	}
887 
888 	mutex_enter(&pvp->pvp_mutex);
889 	/*
890 	 * The pfp cannot disappear as the port cache lock is held.
891 	 * While the pvp_mutex is held, no events will get delivered.
892 	 */
893 	if (pfp->pfop_flags & PORT_FOP_ACTIVE &&
894 	    !(pfp->pfop_flags & PORT_FOP_REMOVING)) {
895 		if (get_udatamodel() == DATAMODEL_NATIVE) {
896 			fobj = (file_obj_t *)objptr;
897 			if (pfp->pfop_events & FILE_ACCESS &&
898 			    (fobj->fo_atime.tv_sec || fobj->fo_atime.tv_nsec) &&
899 			    (vatt.va_atime.tv_sec != fobj->fo_atime.tv_sec ||
900 			    vatt.va_atime.tv_nsec != fobj->fo_atime.tv_nsec))
901 				events |= FILE_ACCESS;
902 
903 			if (pfp->pfop_events & FILE_MODIFIED &&
904 			    (fobj->fo_mtime.tv_sec || fobj->fo_mtime.tv_nsec) &&
905 			    (vatt.va_mtime.tv_sec != fobj->fo_mtime.tv_sec ||
906 			    vatt.va_mtime.tv_nsec != fobj->fo_mtime.tv_nsec))
907 				events |= FILE_MODIFIED;
908 
909 			if (pfp->pfop_events & FILE_ATTRIB &&
910 			    (fobj->fo_ctime.tv_sec || fobj->fo_ctime.tv_nsec) &&
911 			    (vatt.va_ctime.tv_sec != fobj->fo_ctime.tv_sec ||
912 			    vatt.va_ctime.tv_nsec != fobj->fo_ctime.tv_nsec))
913 				events |= FILE_ATTRIB;
914 #ifdef  _SYSCALL32_IMPL
915 		} else {
916 			file_obj32_t	*fobj32;
917 			fobj32 = (file_obj32_t *)objptr;
918 			if (pfp->pfop_events & FILE_ACCESS &&
919 			    (fobj32->fo_atime.tv_sec ||
920 			    fobj32->fo_atime.tv_nsec) &&
921 			    (vatt.va_atime.tv_sec != fobj32->fo_atime.tv_sec ||
922 			    vatt.va_atime.tv_nsec != fobj32->fo_atime.tv_nsec))
923 				events |= FILE_ACCESS;
924 
925 			if (pfp->pfop_events & FILE_MODIFIED &&
926 			    (fobj32->fo_mtime.tv_sec ||
927 			    fobj32->fo_mtime.tv_nsec) &&
928 			    (vatt.va_mtime.tv_sec != fobj32->fo_mtime.tv_sec ||
929 			    vatt.va_mtime.tv_nsec != fobj32->fo_mtime.tv_nsec))
930 				events |= FILE_MODIFIED;
931 
932 			if (pfp->pfop_events & FILE_ATTRIB &&
933 			    (fobj32->fo_ctime.tv_sec ||
934 			    fobj32->fo_ctime.tv_nsec) &&
935 			    (vatt.va_ctime.tv_sec != fobj32->fo_ctime.tv_sec ||
936 			    vatt.va_ctime.tv_nsec != fobj32->fo_ctime.tv_nsec))
937 				events |= FILE_ATTRIB;
938 #endif /* _SYSCALL32_IMPL */
939 		}
940 
941 		/*
942 		 * No events to deliver
943 		 */
944 		if (events == 0) {
945 			mutex_exit(&pvp->pvp_mutex);
946 			mutex_exit(&pfcp->pfc_lock);
947 			return;
948 		}
949 
950 		/*
951 		 * Deliver the event now.
952 		 */
953 		pkevp = pfp->pfop_pev;
954 		pfp->pfop_flags &= ~PORT_FOP_ACTIVE;
955 		pkevp->portkev_events |= events;
956 		/*
957 		 * Move it to the tail as active once are in the
958 		 * beginning of the list.
959 		 */
960 		port_fop_listremove(pvp, pfp);
961 		port_fop_listinsert_tail(pvp, pfp);
962 		port_send_event(pkevp);
963 		pfp->pfop_flags |= PORT_FOP_KEV_ONQ;
964 	}
965 	mutex_exit(&pvp->pvp_mutex);
966 	mutex_exit(&pfcp->pfc_lock);
967 }
968 
969 /*
970  * Add the event source to the port and return the port source cache pointer.
971  */
972 int
973 port_fop_associate_source(portfop_cache_t **pfcpp, port_t *pp, int source)
974 {
975 	portfop_cache_t *pfcp;
976 	port_source_t	*pse;
977 	int		error;
978 
979 	/*
980 	 * associate PORT_SOURCE_FILE source with the port, if it is
981 	 * not associated yet. Note the PORT_SOURCE_FILE source is
982 	 * associated once and will not be dissociated.
983 	 */
984 	if ((pse = port_getsrc(pp, PORT_SOURCE_FILE)) == NULL) {
985 		if (error = port_associate_ksource(pp->port_fd, source,
986 		    &pse, port_close_fop, pp, NULL)) {
987 			*pfcpp = NULL;
988 			return (error);
989 		}
990 	}
991 
992 	/*
993 	 * Get the portfop cache pointer.
994 	 */
995 	if ((pfcp = pse->portsrc_data) == NULL) {
996 		/*
997 		 * This is the first time that a file is being associated,
998 		 * create the portfop cache.
999 		 */
1000 		pfcp = kmem_zalloc(sizeof (portfop_cache_t), KM_SLEEP);
1001 		mutex_enter(&pp->port_queue.portq_source_mutex);
1002 		if (pse->portsrc_data == NULL) {
1003 			pse->portsrc_data = pfcp;
1004 			mutex_exit(&pp->port_queue.portq_source_mutex);
1005 		} else {
1006 			/*
1007 			 * someone else created the port cache, free
1008 			 * what we just now allocated.
1009 			 */
1010 			mutex_exit(&pp->port_queue.portq_source_mutex);
1011 			kmem_free(pfcp, sizeof (portfop_cache_t));
1012 			pfcp = pse->portsrc_data;
1013 		}
1014 	}
1015 	*pfcpp = pfcp;
1016 	return (0);
1017 }
1018 
1019 /*
1020  * Add the given pvp on the file system's list of vnodes watched.
1021  */
1022 int
1023 port_fop_pvfsadd(portfop_vp_t *pvp)
1024 {
1025 	int error = 0;
1026 	vnode_t	*vp = pvp->pvp_vp;
1027 	portfop_vfs_hash_t *pvfsh;
1028 	portfop_vfs_t	 *pvfsp;
1029 	fsem_t		*fsemp;
1030 
1031 	pvfsh = PORTFOP_PVFSH(vp->v_vfsp);
1032 	mutex_enter(&pvfsh->pvfshash_mutex);
1033 	for (pvfsp = pvfsh->pvfshash_pvfsp; pvfsp &&
1034 	    pvfsp->pvfs != vp->v_vfsp; pvfsp = pvfsp->pvfs_next)
1035 		;
1036 
1037 	if (!pvfsp) {
1038 		if ((fsemp = port_fop_fsemop()) != NULL) {
1039 			if ((error = fsem_install(vp->v_vfsp, fsemp,
1040 			    vp->v_vfsp, OPUNIQ, NULL, NULL))) {
1041 				mutex_exit(&pvfsh->pvfshash_mutex);
1042 				return (error);
1043 			}
1044 		} else {
1045 			mutex_exit(&pvfsh->pvfshash_mutex);
1046 			return (EINVAL);
1047 		}
1048 		pvfsp = kmem_zalloc(sizeof (portfop_vfs_t), KM_SLEEP);
1049 		pvfsp->pvfs = vp->v_vfsp;
1050 		list_create(&(pvfsp->pvfs_pvplist), sizeof (portfop_vp_t),
1051 		    offsetof(portfop_vp_t, pvp_pvfsnode));
1052 		pvfsp->pvfs_fsemp = fsemp;
1053 		pvfsp->pvfs_next = pvfsh->pvfshash_pvfsp;
1054 		pvfsh->pvfshash_pvfsp = pvfsp;
1055 	}
1056 
1057 	/*
1058 	 * check if an unmount is in progress.
1059 	 */
1060 	if (!pvfsp->pvfs_unmount) {
1061 		/*
1062 		 * insert the pvp on list.
1063 		 */
1064 		pvp->pvp_pvfsp = pvfsp;
1065 		list_insert_head(&pvfsp->pvfs_pvplist, (void *)pvp);
1066 	} else {
1067 		error = EINVAL;
1068 	}
1069 	mutex_exit(&pvfsh->pvfshash_mutex);
1070 	return (error);
1071 }
1072 
1073 /*
1074  * Installs the portfop_vp_t data structure on the
1075  * vnode. The 'pvp_femp == NULL' indicates it is not
1076  * active. The fem hooks have to be installed.
1077  * The portfop_vp_t is only freed when the vnode gets freed.
1078  */
1079 void
1080 port_install_fopdata(vnode_t *vp)
1081 {
1082 	portfop_vp_t *npvp;
1083 
1084 	npvp = kmem_zalloc(sizeof (*npvp), KM_SLEEP);
1085 	mutex_init(&npvp->pvp_mutex, NULL, MUTEX_DEFAULT, NULL);
1086 	list_create(&npvp->pvp_pfoplist, sizeof (portfop_t),
1087 	    offsetof(portfop_t, pfop_node));
1088 	npvp->pvp_vp = vp;
1089 	/*
1090 	 * If v_fopdata is not null, some other thread beat us to it.
1091 	 */
1092 	if (casptr(&vp->v_fopdata, NULL, npvp) != NULL) {
1093 		mutex_destroy(&npvp->pvp_mutex);
1094 		list_destroy(&npvp->pvp_pfoplist);
1095 		kmem_free(npvp, sizeof (*npvp));
1096 	}
1097 }
1098 
1099 
1100 /*
1101  * Allocate and add a portfop_t to the per port cache. Also add the portfop_t
1102  * to the vnode's list. The association is identified by the object pointer
1103  * address and pid.
1104  */
1105 int
1106 port_pfp_setup(portfop_t **pfpp, port_t *pp, vnode_t *vp, portfop_cache_t *pfcp,
1107 	uintptr_t object, int events, void *user, char *cname, int clen,
1108 	vnode_t *dvp)
1109 {
1110 	portfop_t	*pfp = NULL;
1111 	port_kevent_t	*pkevp;
1112 	fem_t		*femp;
1113 	int		error = 0;
1114 	portfop_vp_t	*pvp;
1115 
1116 
1117 	/*
1118 	 * The port cache mutex is held.
1119 	 */
1120 	*pfpp  = NULL;
1121 
1122 
1123 	/*
1124 	 * At this point the fem monitor is installed.
1125 	 * Allocate a port event structure per vnode association.
1126 	 */
1127 	if (pfp == NULL) {
1128 		if (error = port_alloc_event_local(pp, PORT_SOURCE_FILE,
1129 		    PORT_ALLOC_CACHED, &pkevp)) {
1130 			return (error);
1131 		}
1132 		pfp = kmem_zalloc(sizeof (portfop_t), KM_SLEEP);
1133 		pfp->pfop_pev = pkevp;
1134 	}
1135 
1136 	pfp->pfop_vp = vp;
1137 	pfp->pfop_pid = curproc->p_pid;
1138 	pfp->pfop_pcache = pfcp;
1139 	pfp->pfop_pp = pp;
1140 	pfp->pfop_flags |= PORT_FOP_ACTIVE;
1141 	pfp->pfop_cname = cname;
1142 	pfp->pfop_clen = clen;
1143 	pfp->pfop_dvp = dvp;
1144 	pfp->pfop_object = object;
1145 
1146 	pkevp->portkev_callback = port_fop_callback;
1147 	pkevp->portkev_arg = pfp;
1148 	pkevp->portkev_object = object;
1149 	pkevp->portkev_user = user;
1150 	pkevp->portkev_events = 0;
1151 
1152 	port_pcache_insert(pfcp, pfp);
1153 
1154 	/*
1155 	 * Register a new file events monitor for this file(vnode), if not
1156 	 * done already.
1157 	 */
1158 	if ((pvp = vp->v_fopdata) == NULL) {
1159 		port_install_fopdata(vp);
1160 		pvp = vp->v_fopdata;
1161 	}
1162 
1163 	mutex_enter(&pvp->pvp_mutex);
1164 	/*
1165 	 * if the vnode does not have the file events hooks, install it.
1166 	 */
1167 	if (pvp->pvp_femp == NULL) {
1168 		if ((femp = port_fop_femop()) != NULL) {
1169 			if (!(error = fem_install(pfp->pfop_vp, femp,
1170 			    (void *)vp, OPUNIQ, NULL, NULL))) {
1171 				pvp->pvp_femp = femp;
1172 				/*
1173 				 * add fsem_t hooks to the vfsp and add pvp to
1174 				 * the list of vnodes for this vfs.
1175 				 */
1176 				if (!(error = port_fop_pvfsadd(pvp))) {
1177 					/*
1178 					 * Hold a reference to the vnode since
1179 					 * we successfully installed the hooks.
1180 					 */
1181 					VN_HOLD(vp);
1182 				} else {
1183 					(void) fem_uninstall(vp, femp, vp);
1184 					pvp->pvp_femp = NULL;
1185 				}
1186 			}
1187 		} else {
1188 			error = EINVAL;
1189 		}
1190 	}
1191 
1192 	if (error) {
1193 		/*
1194 		 * pkevp will get freed here.
1195 		 */
1196 		pfp->pfop_cname = NULL;
1197 		port_pcache_remove_fop(pfcp, pfp);
1198 		mutex_exit(&pvp->pvp_mutex);
1199 		return (error);
1200 	}
1201 
1202 	/*
1203 	 * insert the pfp on the vnode's list. After this
1204 	 * events can get delivered.
1205 	 */
1206 	pfp->pfop_events = events;
1207 	port_fop_listinsert_head(pvp, pfp);
1208 
1209 	mutex_exit(&pvp->pvp_mutex);
1210 	/*
1211 	 * Hold the directory vnode since we have a reference now.
1212 	 */
1213 	if (dvp != NULL)
1214 		VN_HOLD(dvp);
1215 	*pfpp = pfp;
1216 	return (0);
1217 }
1218 
1219 vnode_t *
1220 port_resolve_vp(vnode_t *vp)
1221 {
1222 	vnode_t *rvp;
1223 	/*
1224 	 * special case /etc/mnttab(mntfs type). The mntfstype != 0
1225 	 * if mntfs got mounted.
1226 	 */
1227 	if (vfs_mntdummyvp && mntfstype != 0 &&
1228 	    vp->v_vfsp->vfs_fstype == mntfstype) {
1229 		VN_RELE(vp);
1230 		vp = vfs_mntdummyvp;
1231 		VN_HOLD(vfs_mntdummyvp);
1232 	}
1233 
1234 	/*
1235 	 * This should take care of lofs mounted fs systems and nfs4
1236 	 * hardlinks.
1237 	 */
1238 	if ((VOP_REALVP(vp, &rvp, NULL) == 0) && vp != rvp) {
1239 		VN_HOLD(rvp);
1240 		VN_RELE(vp);
1241 		vp = rvp;
1242 	}
1243 	return (vp);
1244 }
1245 
1246 /*
1247  * Register a file events watch on the given file associated to the port *pp.
1248  *
1249  * The association is identified by the object pointer and the pid.
1250  * The events argument contains the events to be monitored for.
1251  *
1252  * The vnode will have a VN_HOLD once the fem hooks are installed.
1253  *
1254  * Every reference(pfp) to the directory vnode will have a VN_HOLD to ensure
1255  * that the directory vnode pointer does not change.
1256  */
1257 int
1258 port_associate_fop(port_t *pp, int source, uintptr_t object, int events,
1259     void *user)
1260 {
1261 	portfop_cache_t	*pfcp;
1262 	vnode_t		*vp, *dvp, *oldvp = NULL, *olddvp = NULL, *orig;
1263 	portfop_t	*pfp;
1264 	int		error = 0;
1265 	file_obj_t	fobj;
1266 	void		*objptr;
1267 	char		*cname;
1268 	int		clen;
1269 	int		follow;
1270 
1271 	/*
1272 	 * check that events specified are valid.
1273 	 */
1274 	if ((events & ~FILE_EVENTS_MASK) != 0)
1275 		return (EINVAL);
1276 
1277 	if (get_udatamodel() == DATAMODEL_NATIVE) {
1278 		if (copyin((void *)object, &fobj, sizeof (file_obj_t)))
1279 			return (EFAULT);
1280 		objptr = (void *)&fobj;
1281 #ifdef  _SYSCALL32_IMPL
1282 	} else {
1283 		file_obj32_t	fobj32;
1284 		if (copyin((void *)object, &fobj32, sizeof (file_obj32_t)))
1285 			return (EFAULT);
1286 		objptr = (void *)&fobj32;
1287 #endif  /* _SYSCALL32_IMPL */
1288 	}
1289 
1290 	vp = dvp = NULL;
1291 
1292 	/*
1293 	 * find out if we need to follow symbolic links.
1294 	 */
1295 	follow = !(events & FILE_NOFOLLOW);
1296 	events = events & ~FILE_NOFOLLOW;
1297 
1298 	/*
1299 	 * lookup and find the vnode and its directory vnode of the given
1300 	 * file.
1301 	 */
1302 	if ((error = port_fop_getdvp(objptr, &vp, &dvp, &cname, &clen,
1303 	    follow)) != 0) {
1304 		return (error);
1305 	}
1306 
1307 	if (dvp != NULL) {
1308 		dvp = port_resolve_vp(dvp);
1309 	}
1310 
1311 	/*
1312 	 * Not found
1313 	 */
1314 	if (vp == NULL) {
1315 		error = ENOENT;
1316 		goto errout;
1317 	}
1318 
1319 	vp = port_resolve_vp(orig = vp);
1320 
1321 	if (vp != NULL && vnevent_support(vp, NULL)) {
1322 		error = ENOTSUP;
1323 		goto errout;
1324 	}
1325 
1326 	/*
1327 	 * If dvp belongs to a different filesystem just ignore it, as hard
1328 	 * links cannot exist across filesystems.  We make an exception for
1329 	 * procfs, however, the magic of which we treat semantically as a hard
1330 	 * link, allowing one to use /proc/[pid]/fd/[fd] for PORT_SOURCE_FILE
1331 	 * and avoid spurious FILE_RENAME_FROM/FILE_RENAME_TO events.
1332 	 */
1333 	if (dvp != NULL && dvp->v_vfsp != vp->v_vfsp &&
1334 	    !(orig->v_type == VPROC && vp != NULL && vp->v_type != VPROC)) {
1335 		VN_RELE(dvp);
1336 		dvp = NULL;
1337 	}
1338 
1339 	/*
1340 	 * Associate this source to the port and get the per port
1341 	 * fop cache pointer. If the source is already associated, it
1342 	 * will just return the cache pointer.
1343 	 */
1344 	if (error = port_fop_associate_source(&pfcp, pp, source)) {
1345 		goto errout;
1346 	}
1347 
1348 	/*
1349 	 * Check if there is an existing association of this file.
1350 	 */
1351 	mutex_enter(&pfcp->pfc_lock);
1352 	pfp = port_cache_lookup_fop(pfcp, curproc->p_pid, object);
1353 
1354 	/*
1355 	 * If it is not the same vnode, just discard it. VN_RELE needs to be
1356 	 * called with no locks held, therefore save vnode pointers and
1357 	 * vn_rele them later.
1358 	 */
1359 	if (pfp != NULL && (pfp->pfop_vp != vp || pfp->pfop_dvp != dvp)) {
1360 		(void) port_remove_fop(pfp, pfcp, 1, NULL, &oldvp, &olddvp);
1361 		pfp = NULL;
1362 	}
1363 
1364 	if (pfp == NULL) {
1365 		vnode_t *tvp, *tdvp;
1366 		portfop_t	*tpfp;
1367 		int error;
1368 
1369 		/*
1370 		 * Add a new association, save the file name and the
1371 		 * directory vnode pointer.
1372 		 */
1373 		if (error = port_pfp_setup(&pfp, pp, vp, pfcp, object,
1374 		    events, user, cname, clen, dvp)) {
1375 			mutex_exit(&pfcp->pfc_lock);
1376 			goto errout;
1377 		}
1378 
1379 		pfp->pfop_callrid = curthread;
1380 		/*
1381 		 * File name used, so make sure we don't free it.
1382 		 */
1383 		cname = NULL;
1384 
1385 		/*
1386 		 * We need to check if the file was removed after the
1387 		 * the lookup and before the fem hooks where added. If
1388 		 * so, return error. The vnode will still exist as we have
1389 		 * a hold on it.
1390 		 *
1391 		 * Drop the cache lock before calling port_fop_getdvp().
1392 		 * port_fop_getdvp() may block either in the vfs layer
1393 		 * or some filesystem.  Therefore there is potential
1394 		 * for deadlock if cache lock is held and if some other
1395 		 * thread is attempting to deliver file events which would
1396 		 * require getting the cache lock, while it may be holding
1397 		 * the filesystem or vfs layer locks.
1398 		 */
1399 		mutex_exit(&pfcp->pfc_lock);
1400 		tvp = NULL;
1401 		if ((error = port_fop_getdvp(objptr, &tvp, NULL,
1402 		    NULL, NULL, follow)) == 0) {
1403 			if (tvp != NULL) {
1404 				tvp = port_resolve_vp(tvp);
1405 				/*
1406 				 * This vnode pointer is just used
1407 				 * for comparison, so rele it
1408 				 */
1409 				VN_RELE(tvp);
1410 			}
1411 		}
1412 
1413 		if (error || tvp == NULL || tvp != vp) {
1414 			/*
1415 			 * Since we dropped the cache lock, make sure
1416 			 * we are still dealing with the same pfp and this
1417 			 * is the thread which registered it.
1418 			 */
1419 			mutex_enter(&pfcp->pfc_lock);
1420 			tpfp = port_cache_lookup_fop(pfcp,
1421 			    curproc->p_pid, object);
1422 
1423 			error = 0;
1424 			if (tpfp == NULL || tpfp != pfp ||
1425 			    pfp->pfop_vp != vp ||
1426 			    pfp->pfop_dvp != dvp ||
1427 			    pfp->pfop_callrid != curthread) {
1428 				/*
1429 				 * Some other event was delivered, the file
1430 				 * watch was removed or reassociated, just
1431 				 * ignore it and leave
1432 				 */
1433 				mutex_exit(&pfcp->pfc_lock);
1434 				goto errout;
1435 			}
1436 
1437 			/*
1438 			 * remove the pfp and fem hooks, if pfp still
1439 			 * active and it is not being removed from
1440 			 * the vnode list. This is checked in
1441 			 * port_remove_fop with the vnode lock held.
1442 			 * The vnode returned is VN_RELE'ed after dropping
1443 			 * the locks.
1444 			 */
1445 			tdvp = tvp = NULL;
1446 			if (port_remove_fop(pfp, pfcp, 0, NULL, &tvp, &tdvp)) {
1447 				/*
1448 				 * The pfp was removed, means no
1449 				 * events where queued. Report the
1450 				 * error now.
1451 				 */
1452 				error = EINVAL;
1453 			}
1454 			mutex_exit(&pfcp->pfc_lock);
1455 			if (tvp != NULL)
1456 				VN_RELE(tvp);
1457 			if (tdvp != NULL)
1458 				VN_RELE(tdvp);
1459 			goto errout;
1460 		}
1461 	} else {
1462 		portfop_vp_t	*pvp = vp->v_fopdata;
1463 
1464 		/*
1465 		 * Re-association of the object.
1466 		 */
1467 		mutex_enter(&pvp->pvp_mutex);
1468 
1469 		/*
1470 		 * remove any queued up event.
1471 		 */
1472 		if (port_remove_done_event(pfp->pfop_pev)) {
1473 			pfp->pfop_flags &= ~PORT_FOP_KEV_ONQ;
1474 		}
1475 
1476 		/*
1477 		 * set new events to watch.
1478 		 */
1479 		pfp->pfop_events = events;
1480 
1481 		/*
1482 		 * If not active, mark it active even if it is being
1483 		 * removed. Then it can send an exception event.
1484 		 *
1485 		 * Move it to the head, as the active ones are only
1486 		 * in the beginning. If removing, the pfp will be on
1487 		 * a temporary list, no need to move it to the front
1488 		 * all the entries will be processed. Some exception
1489 		 * events will be delivered in port_fop_excep();
1490 		 */
1491 		if (!(pfp->pfop_flags & PORT_FOP_ACTIVE)) {
1492 			pfp->pfop_flags |= PORT_FOP_ACTIVE;
1493 			if (!(pfp->pfop_flags & PORT_FOP_REMOVING)) {
1494 				pvp = (portfop_vp_t *)vp->v_fopdata;
1495 				port_fop_listremove(pvp, pfp);
1496 				port_fop_listinsert_head(pvp, pfp);
1497 			}
1498 		}
1499 		pfp->pfop_callrid = curthread;
1500 		mutex_exit(&pvp->pvp_mutex);
1501 		mutex_exit(&pfcp->pfc_lock);
1502 	}
1503 
1504 	/*
1505 	 * Compare time stamps and deliver events.
1506 	 */
1507 	if (vp->v_type != VFIFO) {
1508 		port_check_timestamp(pfcp, vp, dvp, pfp, objptr, object);
1509 	}
1510 
1511 	error = 0;
1512 
1513 	/*
1514 	 *  If we have too many watches on the vnode, discard an
1515 	 *  inactive watch.
1516 	 */
1517 	port_fop_trimpfplist(vp);
1518 
1519 errout:
1520 	/*
1521 	 * Release the hold acquired due to the lookup operation.
1522 	 */
1523 	if (vp != NULL)
1524 		VN_RELE(vp);
1525 	if (dvp != NULL)
1526 		VN_RELE(dvp);
1527 
1528 	if (oldvp != NULL)
1529 		VN_RELE(oldvp);
1530 	if (olddvp != NULL)
1531 		VN_RELE(olddvp);
1532 
1533 	/*
1534 	 * copied file name not used, free it.
1535 	 */
1536 	if (cname != NULL) {
1537 		kmem_free(cname, clen + 1);
1538 	}
1539 	return (error);
1540 }
1541 
1542 
1543 /*
1544  * The port_dissociate_fop() function dissociates the file object
1545  * from the event port and removes any events that are already on the queue.
1546  * Only the owner of the association is allowed to dissociate the file from
1547  * the port. Returns  success (0) if it was found and removed. Otherwise
1548  * ENOENT.
1549  */
1550 int
1551 port_dissociate_fop(port_t *pp, uintptr_t object)
1552 {
1553 	portfop_cache_t	*pfcp;
1554 	portfop_t	*pfp;
1555 	port_source_t	*pse;
1556 	int		active = 0;
1557 	vnode_t		*tvp = NULL, *tdvp = NULL;
1558 
1559 	pse = port_getsrc(pp, PORT_SOURCE_FILE);
1560 
1561 	/*
1562 	 * if this source is not associated or if there is no
1563 	 * cache, nothing to do just return.
1564 	 */
1565 	if (pse == NULL ||
1566 	    (pfcp = (portfop_cache_t *)pse->portsrc_data) == NULL)
1567 		return (EINVAL);
1568 
1569 	/*
1570 	 * Check if this object is on the cache. Only the owner pid
1571 	 * is allowed to dissociate.
1572 	 */
1573 	mutex_enter(&pfcp->pfc_lock);
1574 	pfp = port_cache_lookup_fop(pfcp, curproc->p_pid, object);
1575 	if (pfp == NULL) {
1576 		mutex_exit(&pfcp->pfc_lock);
1577 		return (ENOENT);
1578 	}
1579 
1580 	/*
1581 	 * If this was the last association, it will release
1582 	 * the hold on the vnode. There is a race condition where
1583 	 * the the pfp is being removed due to an exception event
1584 	 * in port_fop_sendevent()->port_fop_excep() and port_remove_fop().
1585 	 * Since port source cache lock is held, port_fop_excep() cannot
1586 	 * complete. The vnode itself will not disappear as long its pfps
1587 	 * have a reference.
1588 	 */
1589 	(void) port_remove_fop(pfp, pfcp, 1, &active, &tvp, &tdvp);
1590 	mutex_exit(&pfcp->pfc_lock);
1591 	if (tvp != NULL)
1592 		VN_RELE(tvp);
1593 	if (tdvp != NULL)
1594 		VN_RELE(tdvp);
1595 	return (active ? 0 : ENOENT);
1596 }
1597 
1598 
1599 /*
1600  * port_close() calls this function to request the PORT_SOURCE_FILE source
1601  * to remove/free all resources allocated and associated with the port.
1602  */
1603 
1604 /* ARGSUSED */
1605 static void
1606 port_close_fop(void *arg, int port, pid_t pid, int lastclose)
1607 {
1608 	port_t		*pp = arg;
1609 	portfop_cache_t	*pfcp;
1610 	portfop_t	**hashtbl;
1611 	portfop_t	*pfp;
1612 	portfop_t	*pfpnext;
1613 	int		index, i;
1614 	port_source_t	*pse;
1615 	vnode_t 	*tdvp = NULL;
1616 	vnode_t		*vpl[PORTFOP_NVP];
1617 
1618 	pse = port_getsrc(pp, PORT_SOURCE_FILE);
1619 
1620 	/*
1621 	 * No source or no cache, nothing to do.
1622 	 */
1623 	if (pse == NULL ||
1624 	    (pfcp = (portfop_cache_t *)pse->portsrc_data) == NULL)
1625 		return;
1626 	/*
1627 	 * Scan the cache and free all allocated portfop_t and port_kevent_t
1628 	 * structures of this pid. Note, no new association for this pid will
1629 	 * be possible as the port is being closed.
1630 	 *
1631 	 * The common case is that the port is not shared and all the entries
1632 	 * are of this pid and have to be freed. Since VN_RELE has to be
1633 	 * called outside the lock, we do it in batches.
1634 	 */
1635 	hashtbl = (portfop_t **)pfcp->pfc_hash;
1636 	index = i = 0;
1637 	bzero(vpl, sizeof (vpl));
1638 	mutex_enter(&pfcp->pfc_lock);
1639 	while (index < PORTFOP_HASHSIZE) {
1640 		pfp = hashtbl[index];
1641 		while (pfp != NULL && i < (PORTFOP_NVP - 1)) {
1642 			pfpnext = pfp->pfop_hashnext;
1643 			if (pid == pfp->pfop_pid) {
1644 				(void) port_remove_fop(pfp, pfcp, 1, NULL,
1645 				    &vpl[i], &tdvp);
1646 				if (vpl[i] != NULL) {
1647 					i++;
1648 				}
1649 				if (tdvp != NULL) {
1650 					vpl[i++] = tdvp;
1651 					tdvp = NULL;
1652 				}
1653 			}
1654 			pfp = pfpnext;
1655 		}
1656 		if (pfp == NULL)
1657 			index++;
1658 		/*
1659 		 * Now call VN_RELE if we have collected enough vnodes or
1660 		 * we have reached the end of the hash table.
1661 		 */
1662 		if (i >= (PORTFOP_NVP - 1) ||
1663 		    (i > 0 && index == PORTFOP_HASHSIZE)) {
1664 			mutex_exit(&pfcp->pfc_lock);
1665 			while (i > 0) {
1666 				VN_RELE(vpl[--i]);
1667 				vpl[i] = NULL;
1668 			}
1669 			mutex_enter(&pfcp->pfc_lock);
1670 		}
1671 	}
1672 
1673 	/*
1674 	 * Due to a race between port_close_fop() and port_fop()
1675 	 * trying to remove the pfp's from the port's cache, it is
1676 	 * possible that some pfp's are still in the process of being
1677 	 * freed so we wait.
1678 	 */
1679 	while (lastclose && pfcp->pfc_objcount) {
1680 		(void) cv_wait_sig(&pfcp->pfc_lclosecv, &pfcp->pfc_lock);
1681 	}
1682 	mutex_exit(&pfcp->pfc_lock);
1683 	/*
1684 	 * last close, free the cache.
1685 	 */
1686 	if (lastclose) {
1687 		ASSERT(pfcp->pfc_objcount == 0);
1688 		pse->portsrc_data = NULL;
1689 		kmem_free(pfcp, sizeof (portfop_cache_t));
1690 	}
1691 }
1692 
1693 /*
1694  * Given the list of associations(watches), it will send exception events,
1695  * if still active, and discard them. The exception events are handled
1696  * separately because, the pfp needs to be removed from the port cache and
1697  * freed as the vnode's identity is changing or being removed. To remove
1698  * the pfp from the port's cache, we need to hold the cache lock (pfc_lock).
1699  * The lock order is pfc_lock -> pvp_mutex(vnode's) mutex and that is why
1700  * the cache's lock cannot be acquired in port_fop_sendevent().
1701  */
1702 static void
1703 port_fop_excep(list_t *tlist, int op)
1704 {
1705 	portfop_t	*pfp;
1706 	portfop_cache_t *pfcp;
1707 	port_t	*pp;
1708 	port_kevent_t	*pkevp;
1709 	vnode_t		*tdvp;
1710 	int		error = 0;
1711 
1712 	while (pfp = (portfop_t *)list_head(tlist)) {
1713 		int removed = 0;
1714 		/*
1715 		 * remove from the temp list. Since PORT_FOP_REMOVING is
1716 		 * set, no other thread should attempt to perform a
1717 		 * list_remove on this pfp.
1718 		 */
1719 		list_remove(tlist, pfp);
1720 
1721 		pfcp = pfp->pfop_pcache;
1722 		mutex_enter(&pfcp->pfc_lock);
1723 
1724 		/*
1725 		 * Remove the event from the port queue if it was queued up.
1726 		 * No need to clear the PORT_FOP_KEV_ONQ flag as this pfp is
1727 		 * no longer on the vnode's list.
1728 		 */
1729 		if ((pfp->pfop_flags & PORT_FOP_KEV_ONQ)) {
1730 			removed = port_remove_done_event(pfp->pfop_pev);
1731 		}
1732 
1733 		/*
1734 		 * If still active or the event was queued up and
1735 		 * had not been collected yet, send an EXCEPTION event.
1736 		 */
1737 		if (pfp->pfop_flags & (PORT_FOP_ACTIVE) || removed) {
1738 			pp = pfp->pfop_pp;
1739 			/*
1740 			 * Allocate a port_kevent_t non cached to send this
1741 			 * event since we will be de-registering.
1742 			 * The port_kevent_t cannot be pointing back to the
1743 			 * pfp anymore.
1744 			 */
1745 			pfp->pfop_flags &= ~PORT_FOP_ACTIVE;
1746 			error = port_alloc_event_local(pp, PORT_SOURCE_FILE,
1747 			    PORT_ALLOC_DEFAULT, &pkevp);
1748 			if (!error) {
1749 
1750 				pkevp->portkev_callback = port_fop_callback;
1751 				pkevp->portkev_arg = NULL;
1752 				pkevp->portkev_object =
1753 				    pfp->pfop_pev->portkev_object;
1754 				pkevp->portkev_user =
1755 				    pfp->pfop_pev->portkev_user;
1756 				/*
1757 				 * Copy the pid of the watching process.
1758 				 */
1759 				pkevp->portkev_pid =
1760 				    pfp->pfop_pev->portkev_pid;
1761 				pkevp->portkev_events = op;
1762 				port_send_event(pkevp);
1763 			}
1764 		}
1765 		/*
1766 		 * At this point the pfp has been removed from the vnode's
1767 		 * list its cached port_kevent_t is not on the done queue.
1768 		 * Remove the pfp and free it from the cache.
1769 		 */
1770 		tdvp = pfp->pfop_dvp;
1771 		port_pcache_remove_fop(pfcp, pfp);
1772 		mutex_exit(&pfcp->pfc_lock);
1773 		if (tdvp != NULL)
1774 			VN_RELE(tdvp);
1775 	}
1776 }
1777 
1778 /*
1779  * Send the file events to all of the processes watching this
1780  * vnode. In case of hard links, the directory vnode pointer and
1781  * the file name are compared. If the names match, then the specified
1782  * event is sent or else, the FILE_ATTRIB event is sent, This is the
1783  * documented behavior.
1784  */
1785 void
1786 port_fop_sendevent(vnode_t *vp, int events, vnode_t *dvp, char *cname)
1787 {
1788 	port_kevent_t	*pkevp;
1789 	portfop_t	*pfp, *npfp;
1790 	portfop_vp_t	*pvp;
1791 	list_t		tmplist;
1792 	int		removeall = 0;
1793 
1794 	pvp = (portfop_vp_t *)vp->v_fopdata;
1795 	mutex_enter(&pvp->pvp_mutex);
1796 
1797 	/*
1798 	 * Check if the list is empty.
1799 	 *
1800 	 * All entries have been removed by some other thread.
1801 	 * The vnode may be still active and we got called,
1802 	 * but some other thread is in the process of removing the hooks.
1803 	 */
1804 	if (!list_head(&pvp->pvp_pfoplist)) {
1805 		mutex_exit(&pvp->pvp_mutex);
1806 		return;
1807 	}
1808 
1809 	if ((events & (FILE_EXCEPTION))) {
1810 		/*
1811 		 * If it is an event for which we are going to remove
1812 		 * the watches so just move it a temporary list and
1813 		 * release this vnode.
1814 		 */
1815 		list_create(&tmplist, sizeof (portfop_t),
1816 		    offsetof(portfop_t, pfop_node));
1817 
1818 		/*
1819 		 * If it is an UNMOUNT, MOUNTEDOVER or no file name has been
1820 		 * passed for an exception event, all associations need to be
1821 		 * removed.
1822 		 */
1823 		if (dvp == NULL || cname == NULL) {
1824 			removeall = 1;
1825 		}
1826 	}
1827 
1828 	if (!removeall) {
1829 		/*
1830 		 * All the active ones are in the beginning of the list.
1831 		 * Note that we process this list in reverse order to assure
1832 		 * that events are delivered in the order that they were
1833 		 * associated.
1834 		 */
1835 		for (pfp = (portfop_t *)list_tail(&pvp->pvp_pfoplist);
1836 		    pfp && !(pfp->pfop_flags & PORT_FOP_ACTIVE); pfp = npfp) {
1837 			npfp = list_prev(&pvp->pvp_pfoplist, pfp);
1838 		}
1839 
1840 		for (; pfp != NULL; pfp = npfp) {
1841 			int levents = events;
1842 
1843 			npfp = list_prev(&pvp->pvp_pfoplist, pfp);
1844 			/*
1845 			 * Hard links case - If the file is being
1846 			 * removed/renamed, and the name matches
1847 			 * the watched file, then it is an EXCEPTION
1848 			 * event or else it will be just a FILE_ATTRIB.
1849 			 */
1850 			if ((events & (FILE_EXCEPTION))) {
1851 				ASSERT(dvp != NULL && cname != NULL);
1852 				if (pfp->pfop_dvp == NULL ||
1853 				    (pfp->pfop_dvp == dvp &&
1854 				    (strcmp(cname, pfp->pfop_cname) == 0))) {
1855 					/*
1856 					 * It is an exception event, move it
1857 					 * to temp list and process it later.
1858 					 * Note we don't set the pfp->pfop_vp
1859 					 * to NULL even thought it has been
1860 					 * removed from the vnode's list. This
1861 					 * pointer is referenced in
1862 					 * port_remove_fop(). The vnode it
1863 					 * self cannot disappear until this
1864 					 * pfp gets removed and freed.
1865 					 */
1866 					port_fop_listremove(pvp, pfp);
1867 					list_insert_tail(&tmplist, (void *)pfp);
1868 					pfp->pfop_flags  |= PORT_FOP_REMOVING;
1869 					continue;
1870 				} else {
1871 					levents = FILE_ATTRIB;
1872 				}
1873 
1874 			}
1875 
1876 			if (pfp->pfop_events & levents) {
1877 				/*
1878 				 * deactivate and move it to the tail.
1879 				 * If the pfp was active, it cannot be
1880 				 * on the port's done queue.
1881 				 */
1882 				pfp->pfop_flags &= ~PORT_FOP_ACTIVE;
1883 				port_fop_listremove(pvp, pfp);
1884 				port_fop_listinsert_tail(pvp, pfp);
1885 
1886 				pkevp = pfp->pfop_pev;
1887 				pkevp->portkev_events |=
1888 				    (levents & pfp->pfop_events);
1889 				port_send_event(pkevp);
1890 				pfp->pfop_flags |= PORT_FOP_KEV_ONQ;
1891 			}
1892 		}
1893 	}
1894 
1895 
1896 	if ((events & (FILE_EXCEPTION))) {
1897 		if (!removeall) {
1898 			/*
1899 			 * Check the inactive associations and remove them if
1900 			 * the file name matches.
1901 			 */
1902 			for (; pfp; pfp = npfp) {
1903 				npfp = list_next(&pvp->pvp_pfoplist, pfp);
1904 				if (dvp == NULL || cname == NULL ||
1905 				    pfp->pfop_dvp == NULL ||
1906 				    (pfp->pfop_dvp == dvp &&
1907 				    (strcmp(cname, pfp->pfop_cname) == 0))) {
1908 					port_fop_listremove(pvp, pfp);
1909 					list_insert_tail(&tmplist, (void *)pfp);
1910 					pfp->pfop_flags  |= PORT_FOP_REMOVING;
1911 				}
1912 			}
1913 		} else {
1914 			/*
1915 			 * Can be optimized to avoid two pass over this list
1916 			 * by having a flag in the vnode's portfop_vp_t
1917 			 * structure to indicate that it is going away,
1918 			 * Or keep the list short by reusing inactive watches.
1919 			 */
1920 			port_fop_listmove(pvp, &tmplist);
1921 			for (pfp = (portfop_t *)list_head(&tmplist);
1922 			    pfp; pfp = list_next(&tmplist, pfp)) {
1923 				pfp->pfop_flags |= PORT_FOP_REMOVING;
1924 			}
1925 		}
1926 
1927 		/*
1928 		 * Uninstall the fem hooks if there are no more associations.
1929 		 * This will release the pvp mutex.
1930 		 *
1931 		 * Even thought all entries may have been removed,
1932 		 * the vnode itself cannot disappear as there will be a
1933 		 * hold on it due to this call to port_fop_sendevent. This is
1934 		 * important to syncronize with a port_dissociate_fop() call
1935 		 * that may be attempting to remove an object from the vnode's.
1936 		 */
1937 		if (port_fop_femuninstall(vp))
1938 			VN_RELE(vp);
1939 
1940 		/*
1941 		 * Send exception events and discard the watch entries.
1942 		 */
1943 		port_fop_excep(&tmplist, events);
1944 		list_destroy(&tmplist);
1945 
1946 	} else {
1947 		mutex_exit(&pvp->pvp_mutex);
1948 
1949 		/*
1950 		 * trim the list.
1951 		 */
1952 		port_fop_trimpfplist(vp);
1953 	}
1954 }
1955 
1956 /*
1957  * Given the file operation, map it to the event types and send.
1958  */
1959 void
1960 port_fop(vnode_t *vp, int op, int retval)
1961 {
1962 	int event = 0;
1963 	/*
1964 	 * deliver events only if the operation was successful.
1965 	 */
1966 	if (retval)
1967 		return;
1968 
1969 	/*
1970 	 * These events occurring on the watched file.
1971 	 */
1972 	if (op & FOP_MODIFIED_MASK) {
1973 		event  = FILE_MODIFIED;
1974 	}
1975 	if (op & FOP_ACCESS_MASK) {
1976 		event  |= FILE_ACCESS;
1977 	}
1978 	if (op & FOP_ATTRIB_MASK) {
1979 		event  |= FILE_ATTRIB;
1980 	}
1981 	if (op & FOP_TRUNC_MASK) {
1982 		event  |= FILE_TRUNC;
1983 	}
1984 	if (event) {
1985 		port_fop_sendevent(vp, 	event, NULL, NULL);
1986 	}
1987 }
1988 
1989 static int port_forceunmount(vfs_t *vfsp)
1990 {
1991 	char *fsname = vfssw[vfsp->vfs_fstype].vsw_name;
1992 
1993 	if (fsname == NULL) {
1994 		return (0);
1995 	}
1996 
1997 	if (strcmp(fsname, MNTTYPE_NFS) == 0) {
1998 		return (1);
1999 	}
2000 
2001 	if (strcmp(fsname, MNTTYPE_NFS3) == 0) {
2002 		return (1);
2003 	}
2004 
2005 	if (strcmp(fsname, MNTTYPE_NFS4) == 0) {
2006 		return (1);
2007 	}
2008 	return (0);
2009 }
2010 /*
2011  * ----- the unmount filesystem op(fsem) hook.
2012  */
2013 int
2014 port_fop_unmount(fsemarg_t *vf, int flag, cred_t *cr)
2015 {
2016 	vfs_t	*vfsp = (vfs_t *)vf->fa_fnode->fn_available;
2017 	kmutex_t	*mtx;
2018 	portfop_vfs_t	*pvfsp, **ppvfsp;
2019 	portfop_vp_t	*pvp;
2020 	int error;
2021 	int fmfs;
2022 
2023 	fmfs = port_forceunmount(vfsp);
2024 
2025 	mtx = &(portvfs_hash[PORTFOP_PVFSHASH(vfsp)].pvfshash_mutex);
2026 	ppvfsp = &(portvfs_hash[PORTFOP_PVFSHASH(vfsp)].pvfshash_pvfsp);
2027 	pvfsp = NULL;
2028 	mutex_enter(mtx);
2029 	/*
2030 	 * since this fsem hook is triggered, the vfsp has to be on
2031 	 * the hash list.
2032 	 */
2033 	for (pvfsp = *ppvfsp; pvfsp->pvfs != vfsp; pvfsp = pvfsp->pvfs_next)
2034 	;
2035 
2036 	/*
2037 	 * For some of the filesystems, allow unmounts to proceed only if
2038 	 * there are no files being watched or it is a forced unmount.
2039 	 */
2040 	if (fmfs && !(flag & MS_FORCE) &&
2041 	    !list_is_empty(&pvfsp->pvfs_pvplist)) {
2042 		mutex_exit(mtx);
2043 		return (EBUSY);
2044 	}
2045 
2046 	/*
2047 	 * Indicate that the unmount is in process. Don't remove it yet.
2048 	 * The underlying filesystem unmount routine sets the VFS_UNMOUNTED
2049 	 * flag on the vfs_t structure. But we call the filesystem unmount
2050 	 * routine after removing all the file watches for this filesystem,
2051 	 * otherwise the unmount will fail due to active vnodes.
2052 	 * Meanwhile setting pvfsp->unmount = 1 will prevent any thread
2053 	 * attempting to add a file watch.
2054 	 */
2055 	pvfsp->pvfs_unmount = 1;
2056 	mutex_exit(mtx);
2057 
2058 	/*
2059 	 * uninstall the fsem hooks.
2060 	 */
2061 	(void) fsem_uninstall(vfsp, (fsem_t *)pvfsp->pvfs_fsemp, vfsp);
2062 
2063 	while (pvp = list_head(&pvfsp->pvfs_pvplist)) {
2064 		list_remove(&pvfsp->pvfs_pvplist, pvp);
2065 		/*
2066 		 * This should send an UNMOUNTED event to all the
2067 		 * watched vnode of this filesystem and uninstall
2068 		 * the fem hooks. We release the hold on the vnode here
2069 		 * because port_fop_femuninstall() will not do it if
2070 		 * unmount is in process.
2071 		 */
2072 		port_fop_sendevent(pvp->pvp_vp, UNMOUNTED, NULL, NULL);
2073 		VN_RELE(pvp->pvp_vp);
2074 	}
2075 
2076 	error = vfsnext_unmount(vf, flag, cr);
2077 
2078 	/*
2079 	 * we free the pvfsp after the unmount has been completed.
2080 	 */
2081 	mutex_enter(mtx);
2082 	for (; *ppvfsp && (*ppvfsp)->pvfs != vfsp;
2083 	    ppvfsp = &(*ppvfsp)->pvfs_next)
2084 	;
2085 
2086 	/*
2087 	 * remove and free it.
2088 	 */
2089 	ASSERT(list_head(&pvfsp->pvfs_pvplist) == NULL);
2090 	if (*ppvfsp) {
2091 		pvfsp = *ppvfsp;
2092 		*ppvfsp = pvfsp->pvfs_next;
2093 	}
2094 	mutex_exit(mtx);
2095 	kmem_free(pvfsp, sizeof (portfop_vfs_t));
2096 	return (error);
2097 }
2098 
2099 /*
2100  * ------------------------------file op hooks--------------------------
2101  * The O_TRUNC operation is caught with the VOP_SETATTR(AT_SIZE) call.
2102  */
2103 static int
2104 port_fop_open(femarg_t *vf, int mode, cred_t *cr, caller_context_t *ct)
2105 {
2106 	int		retval;
2107 	vnode_t		*vp = (vnode_t *)vf->fa_fnode->fn_available;
2108 
2109 	retval = vnext_open(vf, mode, cr, ct);
2110 	port_fop(vp, FOP_FILE_OPEN, retval);
2111 	return (retval);
2112 }
2113 
2114 static int
2115 port_fop_write(femarg_t *vf, struct uio *uiop, int ioflag, struct cred *cr,
2116     caller_context_t *ct)
2117 {
2118 	int		retval;
2119 	vnode_t		*vp = (vnode_t *)vf->fa_fnode->fn_available;
2120 
2121 	retval =  vnext_write(vf, uiop, ioflag, cr, ct);
2122 	port_fop(vp, FOP_FILE_WRITE, retval);
2123 	return (retval);
2124 }
2125 
2126 static int
2127 port_fop_map(femarg_t *vf, offset_t off, struct as *as, caddr_t *addrp,
2128     size_t len, uchar_t prot, uchar_t maxport, uint_t flags, cred_t *cr,
2129     caller_context_t *ct)
2130 {
2131 	int		retval;
2132 	vnode_t		*vp = (vnode_t *)vf->fa_fnode->fn_available;
2133 
2134 	retval =  vnext_map(vf, off, as, addrp, len, prot, maxport,
2135 	    flags, cr, ct);
2136 	port_fop(vp, FOP_FILE_MAP, retval);
2137 	return (retval);
2138 }
2139 
2140 static int
2141 port_fop_read(femarg_t *vf, struct uio *uiop, int ioflag, struct cred *cr,
2142     caller_context_t *ct)
2143 {
2144 	int		retval;
2145 	vnode_t		*vp = (vnode_t *)vf->fa_fnode->fn_available;
2146 
2147 	retval =  vnext_read(vf, uiop, ioflag, cr, ct);
2148 	port_fop(vp, FOP_FILE_READ, retval);
2149 	return (retval);
2150 }
2151 
2152 
2153 /*
2154  * AT_SIZE - is for the open(O_TRUNC) case.
2155  */
2156 int
2157 port_fop_setattr(femarg_t *vf, vattr_t *vap, int flags, cred_t *cr,
2158     caller_context_t *ct)
2159 {
2160 	int		retval;
2161 	vnode_t		*vp = (vnode_t *)vf->fa_fnode->fn_available;
2162 	int		events = 0;
2163 
2164 	retval = vnext_setattr(vf, vap, flags, cr, ct);
2165 	if (vap->va_mask & AT_SIZE) {
2166 		events |= FOP_FILE_TRUNC;
2167 	}
2168 	if (vap->va_mask & (AT_SIZE|AT_MTIME)) {
2169 		events |= FOP_FILE_SETATTR_MTIME;
2170 	}
2171 	if (vap->va_mask & AT_ATIME) {
2172 		events |= FOP_FILE_SETATTR_ATIME;
2173 	}
2174 	events |= FOP_FILE_SETATTR_CTIME;
2175 
2176 	port_fop(vp, events, retval);
2177 	return (retval);
2178 }
2179 
2180 int
2181 port_fop_create(femarg_t *vf, char *name, vattr_t *vap, vcexcl_t excl,
2182     int mode, vnode_t **vpp, cred_t *cr, int flag,
2183     caller_context_t *ct, vsecattr_t *vsecp)
2184 {
2185 	int		retval, got = 1;
2186 	vnode_t		*vp = (vnode_t *)vf->fa_fnode->fn_available;
2187 	vattr_t		vatt, vatt1;
2188 
2189 	/*
2190 	 * If the file already exists, then there will be no change
2191 	 * to the directory. Therefore, we need to compare the
2192 	 * modification time of the directory to determine if the
2193 	 * file was actually created.
2194 	 */
2195 	vatt.va_mask = AT_ATIME|AT_MTIME|AT_CTIME;
2196 	if (VOP_GETATTR(vp, &vatt, 0, CRED(), ct)) {
2197 		got = 0;
2198 	}
2199 	retval = vnext_create(vf, name, vap, excl, mode, vpp, cr,
2200 	    flag, ct, vsecp);
2201 
2202 	vatt1.va_mask = AT_ATIME|AT_MTIME|AT_CTIME;
2203 	if (got && !VOP_GETATTR(vp, &vatt1, 0, CRED(), ct)) {
2204 		if ((vatt1.va_mtime.tv_sec > vatt.va_mtime.tv_sec ||
2205 		    (vatt1.va_mtime.tv_sec = vatt.va_mtime.tv_sec &&
2206 		    vatt1.va_mtime.tv_nsec > vatt.va_mtime.tv_nsec))) {
2207 			/*
2208 			 * File was created.
2209 			 */
2210 			port_fop(vp, FOP_FILE_CREATE, retval);
2211 		}
2212 	}
2213 	return (retval);
2214 }
2215 
2216 int
2217 port_fop_remove(femarg_t *vf, char *nm, cred_t *cr, caller_context_t *ct,
2218     int flags)
2219 {
2220 	int		retval;
2221 	vnode_t		*vp = (vnode_t *)vf->fa_fnode->fn_available;
2222 
2223 	retval = vnext_remove(vf, nm, cr, ct, flags);
2224 	port_fop(vp, FOP_FILE_REMOVE, retval);
2225 	return (retval);
2226 }
2227 
2228 int
2229 port_fop_link(femarg_t *vf, vnode_t *svp, char *tnm, cred_t *cr,
2230     caller_context_t *ct, int flags)
2231 {
2232 	int		retval;
2233 	vnode_t		*vp = (vnode_t *)vf->fa_fnode->fn_available;
2234 
2235 	retval = vnext_link(vf, svp, tnm, cr, ct, flags);
2236 	port_fop(vp, FOP_FILE_LINK, retval);
2237 	return (retval);
2238 }
2239 
2240 /*
2241  * Rename operation is allowed only when from and to directories are
2242  * on the same filesystem. This is checked in vn_rename().
2243  * The target directory is notified thru a VNEVENT by the filesystem
2244  * if the source dir != target dir.
2245  */
2246 int
2247 port_fop_rename(femarg_t *vf, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr,
2248     caller_context_t *ct, int flags)
2249 {
2250 	int		retval;
2251 	vnode_t		*vp = (vnode_t *)vf->fa_fnode->fn_available;
2252 
2253 	retval = vnext_rename(vf, snm, tdvp, tnm, cr, ct, flags);
2254 	port_fop(vp, FOP_FILE_RENAMESRC, retval);
2255 	return (retval);
2256 }
2257 
2258 int
2259 port_fop_mkdir(femarg_t *vf, char *dirname, vattr_t *vap, vnode_t **vpp,
2260     cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp)
2261 {
2262 	int		retval;
2263 	vnode_t		*vp = (vnode_t *)vf->fa_fnode->fn_available;
2264 
2265 	retval = vnext_mkdir(vf, dirname, vap, vpp, cr, ct, flags, vsecp);
2266 	port_fop(vp, FOP_FILE_MKDIR, retval);
2267 	return (retval);
2268 }
2269 
2270 int
2271 port_fop_rmdir(femarg_t *vf, char *nm, vnode_t *cdir, cred_t *cr,
2272     caller_context_t *ct, int flags)
2273 {
2274 	int		retval;
2275 	vnode_t		*vp = (vnode_t *)vf->fa_fnode->fn_available;
2276 
2277 	retval = vnext_rmdir(vf, nm, cdir, cr, ct, flags);
2278 	port_fop(vp, FOP_FILE_RMDIR, retval);
2279 	return (retval);
2280 }
2281 
2282 int
2283 port_fop_readdir(femarg_t *vf, uio_t *uiop, cred_t *cr, int *eofp,
2284     caller_context_t *ct, int flags)
2285 {
2286 	int		retval;
2287 	vnode_t		*vp = (vnode_t *)vf->fa_fnode->fn_available;
2288 
2289 	retval = vnext_readdir(vf, uiop, cr, eofp, ct, flags);
2290 	port_fop(vp, FOP_FILE_READDIR, retval);
2291 	return (retval);
2292 }
2293 
2294 int
2295 port_fop_symlink(femarg_t *vf, char *linkname, vattr_t *vap, char *target,
2296     cred_t *cr, caller_context_t *ct, int flags)
2297 {
2298 	int		retval;
2299 	vnode_t		*vp = (vnode_t *)vf->fa_fnode->fn_available;
2300 
2301 	retval = vnext_symlink(vf, linkname, vap, target, cr, ct, flags);
2302 	port_fop(vp, FOP_FILE_SYMLINK, retval);
2303 	return (retval);
2304 }
2305 
2306 /*
2307  * acl, facl call this.
2308  */
2309 int
2310 port_fop_setsecattr(femarg_t *vf, vsecattr_t *vsap, int flags, cred_t *cr,
2311     caller_context_t *ct)
2312 {
2313 	int	retval;
2314 	vnode_t		*vp = (vnode_t *)vf->fa_fnode->fn_available;
2315 	retval = vnext_setsecattr(vf, vsap, flags, cr, ct);
2316 	port_fop(vp, FOP_FILE_SETSECATTR, retval);
2317 	return (retval);
2318 }
2319 
2320 /*
2321  * these are events on the watched file/directory
2322  */
2323 int
2324 port_fop_vnevent(femarg_t *vf, vnevent_t vnevent, vnode_t *dvp, char *name,
2325     caller_context_t *ct)
2326 {
2327 	vnode_t		*vp = (vnode_t *)vf->fa_fnode->fn_available;
2328 
2329 	switch (vnevent) {
2330 	case	VE_RENAME_SRC:
2331 			port_fop_sendevent(vp, FILE_RENAME_FROM, dvp, name);
2332 		break;
2333 	case	VE_RENAME_DEST:
2334 			port_fop_sendevent(vp, FILE_RENAME_TO, dvp, name);
2335 		break;
2336 	case	VE_REMOVE:
2337 			port_fop_sendevent(vp, FILE_DELETE, dvp, name);
2338 		break;
2339 	case	VE_RMDIR:
2340 			port_fop_sendevent(vp, FILE_DELETE, dvp, name);
2341 		break;
2342 	case	VE_CREATE:
2343 			port_fop_sendevent(vp,
2344 			    FILE_MODIFIED|FILE_ATTRIB|FILE_TRUNC, NULL, NULL);
2345 		break;
2346 	case	VE_LINK:
2347 			port_fop_sendevent(vp, FILE_ATTRIB, NULL, NULL);
2348 		break;
2349 
2350 	case	VE_RENAME_DEST_DIR:
2351 			port_fop_sendevent(vp, FILE_MODIFIED|FILE_ATTRIB,
2352 			    NULL, NULL);
2353 		break;
2354 
2355 	case	VE_MOUNTEDOVER:
2356 			port_fop_sendevent(vp, MOUNTEDOVER, NULL, NULL);
2357 		break;
2358 	case	VE_TRUNCATE:
2359 			port_fop_sendevent(vp, FILE_TRUNC, NULL, NULL);
2360 		break;
2361 	default:
2362 		break;
2363 	}
2364 	return (vnext_vnevent(vf, vnevent, dvp, name, ct));
2365 }
2366