xref: /illumos-gate/usr/src/uts/common/os/port_subr.c (revision 88f8b78a88cbdc6d8c1af5c3e54bc49d25095c98)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * This file containts all the functions required for interactions of
31  * event sources with the event port file system.
32  */
33 
34 #include <sys/types.h>
35 #include <sys/conf.h>
36 #include <sys/stat.h>
37 #include <sys/errno.h>
38 #include <sys/kmem.h>
39 #include <sys/debug.h>
40 #include <sys/file.h>
41 #include <sys/sysmacros.h>
42 #include <sys/systm.h>
43 #include <sys/bitmap.h>
44 #include <sys/rctl.h>
45 #include <sys/atomic.h>
46 #include <sys/poll_impl.h>
47 #include <sys/port_impl.h>
48 
49 /*
50  * Maximum number of elements allowed to be passed in a single call of a
51  * port function (port_sendn(), port_getn().  We need to allocate kernel memory
52  * for all of them at once, so we can't let it scale without limit.
53  */
54 uint_t		port_max_list = PORT_MAX_LIST;
55 port_control_t	port_control;	/* Event port framework main structure */
56 
57 /*
58  * The port_send_event() function is used by all event sources to submit
59  * trigerred events to a port. All the data  required for the event management
60  * is already stored in the port_kevent_t structure.
61  * The event port internal data is stored in the port_kevent_t structure
62  * during the allocation time (see port_alloc_event()). The data related to
63  * the event itself and to the event source management is stored in the
64  * port_kevent_t structure between the allocation time and submit time
65  * (see port_init_event()).
66  *
67  * This function is often called from interrupt level.
68  */
69 int
70 port_send_event(port_kevent_t *pkevp)
71 {
72 	port_queue_t	*portq;
73 
74 	portq = &pkevp->portkev_port->port_queue;
75 	mutex_enter(&portq->portq_mutex);
76 
77 	if (pkevp->portkev_flags & PORT_KEV_DONEQ) {
78 		/* Event already in the port queue */
79 		mutex_exit(&portq->portq_mutex);
80 		return (0);
81 	}
82 
83 	/* put event in the port queue */
84 	list_insert_tail(&portq->portq_list, pkevp);
85 	portq->portq_nent++;
86 
87 	/*
88 	 * Remove PORTQ_WAIT_EVENTS flags to indicate that new events are
89 	 * available.
90 	 */
91 	portq->portq_flags &= ~PORTQ_WAIT_EVENTS;
92 	pkevp->portkev_flags |= PORT_KEV_DONEQ;		/* event enqueued */
93 
94 	/* Check if thread is in port_close() waiting for outstanding events */
95 	if (portq->portq_flags & PORTQ_CLOSE) {
96 		/* Check if all outstanding events are already in port queue */
97 		if (pkevp->portkev_port->port_curr <= portq->portq_nent)
98 			cv_signal(&portq->portq_closecv);
99 	}
100 
101 	if (portq->portq_getn == 0) {
102 		/*
103 		 * No thread retrieving events -> check if enough events are
104 		 * available to satify waiting threads.
105 		 */
106 		if (portq->portq_thread &&
107 		    (portq->portq_nent >= portq->portq_nget))
108 			cv_signal(&portq->portq_thread->portget_cv);
109 	}
110 
111 	if (portq->portq_flags & PORTQ_POLLIN) {
112 		portq->portq_flags &= ~PORTQ_POLLIN;
113 		mutex_exit(&portq->portq_mutex);
114 		pollwakeup(&pkevp->portkev_port->port_pollhd, POLLIN);
115 	} else {
116 		mutex_exit(&portq->portq_mutex);
117 	}
118 	return (0);
119 }
120 
121 /*
122  * The port_alloc_event() function has to be used by all event sources
123  * to request an slot for event notification.
124  * The slot reservation could be denied because of lack of resources.
125  * For that reason the event source should allocate an event slot as early
126  * as possible and be prepared to get an error code instead of the
127  * port event pointer.
128  * Al current event sources allocate an event slot during a system call
129  * entry. They return an error code to the application if an event slot
130  * could not be reserved.
131  * It is also recommended to associate the event source with the port
132  * before some other port function is used.
133  * The port argument is a file descriptor obtained by the application as
134  * a return value of port_create().
135  * Possible values of flags are:
136  * PORT_ALLOC_DEFAULT
137  *	This is the standard type of port events. port_get(n) will free this
138  *	type of event structures as soon as the events are delivered to the
139  *	application.
140  * PORT_ALLOC_PRIVATE
141  *	This type of event will be use for private use of the event source.
142  *	The port_get(n) function will deliver events of such an structure to
143  *	the application but it will not free the event structure itself.
144  *	The event source must free this structure using port_free_event().
145  * PORT_ALLOC_CACHED
146  *	This type of events is used when the event source helds an own
147  *	cache.
148  *	The port_get(n) function will deliver events of such an structure to
149  *	the application but it will not free the event structure itself.
150  *	The event source must free this structure using port_free_event().
151  */
152 int
153 port_alloc_event(int port, int flags, int source, port_kevent_t **pkevpp)
154 {
155 	port_t		*pp;
156 	file_t		*fp;
157 
158 	if ((fp = getf(port)) == NULL)
159 		return (EBADF);
160 
161 	if (fp->f_vnode->v_type != VPORT) {
162 		releasef(port);
163 		return (EBADFD);
164 	}
165 
166 	pp = VTOEP(fp->f_vnode);
167 
168 	/*
169 	 * port_max_events is controlled by the resource control
170 	 * process.port-max-events
171 	 */
172 	if (pp->port_curr >= pp->port_max_events) {
173 		releasef(port);
174 		return (EAGAIN);
175 	}
176 
177 	*pkevpp = kmem_cache_alloc(port_control.pc_cache, KM_NOSLEEP);
178 	if (*pkevpp == NULL) {
179 		releasef(port);
180 		return (ENOMEM);
181 	}
182 	atomic_add_32(&pp->port_curr, 1);
183 	(*pkevpp)->portkev_source = source;
184 	(*pkevpp)->portkev_flags = flags;
185 	(*pkevpp)->portkev_pid = curproc->p_pid;
186 	(*pkevpp)->portkev_port = pp;
187 	(*pkevpp)->portkev_callback = NULL;
188 	releasef(port);
189 	return (0);
190 }
191 
192 /*
193  * This function is faster than the standard port_alloc_event() and
194  * can be used when the event source already allocated an event from
195  * a port.
196  */
197 int
198 port_dup_event(port_kevent_t *pkevp, port_kevent_t **pkevdupp, int flags)
199 {
200 	int	error;
201 
202 	error = port_alloc_event_local(pkevp->portkev_port,
203 	    pkevp->portkev_source, flags, pkevdupp);
204 	if (error == 0)
205 		(*pkevdupp)->portkev_pid = pkevp->portkev_pid;
206 	return (error);
207 }
208 
209 /*
210  * port_alloc_event_local() is reserved for internal use only.
211  * It is doing the same job as port_alloc_event() but with the event port
212  * pointer as the first argument.
213  * The check of the validity of the port file descriptor is skipped here.
214  */
215 int
216 port_alloc_event_local(port_t *pp, int source, int flags,
217     port_kevent_t **pkevpp)
218 {
219 	if (pp->port_curr >= pp->port_max_events)
220 		return (EAGAIN);
221 
222 	*pkevpp = kmem_cache_alloc(port_control.pc_cache, KM_NOSLEEP);
223 	if (*pkevpp == NULL)
224 		return (ENOMEM);
225 
226 	atomic_add_32(&pp->port_curr, 1);
227 	(*pkevpp)->portkev_flags = flags;
228 	(*pkevpp)->portkev_port = pp;
229 	(*pkevpp)->portkev_source = source;
230 	(*pkevpp)->portkev_pid = curproc->p_pid;
231 	return (0);
232 }
233 
234 /*
235  * port_alloc_event_block() has the same functionality of port_alloc_event() +
236  * - it blocks if not enough event slots are available and
237  * - it blocks if not enough memory is available.
238  * Currently port_dispatch() is using this function to increase the
239  * reliability of event delivery for library event sources.
240  */
241 int
242 port_alloc_event_block(port_t *pp, int source, int flags,
243     port_kevent_t **pkevp)
244 {
245 	int		rval;
246 
247 	if (pp->port_curr >= pp->port_max_events) {
248 		mutex_enter(&pp->port_mutex);
249 		pp->port_flags |= PORT_EVENTS;
250 		while (pp->port_curr >= pp->port_max_events) {
251 			rval = cv_wait_sig(&pp->port_cv, &pp->port_mutex);
252 			if (rval == 0) {
253 				/* signal detected */
254 				mutex_exit(&pp->port_mutex);
255 				return (EINTR);
256 			}
257 		}
258 		mutex_exit(&pp->port_mutex);
259 	}
260 
261 	*pkevp = kmem_cache_alloc(port_control.pc_cache, KM_SLEEP);
262 	atomic_add_32(&pp->port_curr, 1);
263 	(*pkevp)->portkev_flags = flags;
264 	(*pkevp)->portkev_port = pp;
265 	(*pkevp)->portkev_source = source;
266 	(*pkevp)->portkev_pid = curproc->p_pid;
267 	return (0);
268 }
269 
270 /*
271  * Take an event out of the port queue
272  */
273 static void
274 port_remove_event_doneq(port_kevent_t *pkevp, port_queue_t *portq)
275 {
276 	ASSERT(MUTEX_HELD(&portq->portq_mutex));
277 	list_remove(&portq->portq_list, pkevp);
278 	portq->portq_nent--;
279 	pkevp->portkev_flags &= ~PORT_KEV_DONEQ;
280 }
281 
282 /*
283  * The port_remove_done_event() function takes a fired event out of the
284  * port queue.
285  * Currently this function is required to cancel a fired event because
286  * the application is delivering new association data (see port_associate_fd()).
287  */
288 void
289 port_remove_done_event(port_kevent_t *pkevp)
290 {
291 	port_queue_t	*portq;
292 
293 	portq = &pkevp->portkev_port->port_queue;
294 	mutex_enter(&portq->portq_mutex);
295 	/* wait for port_get() or port_getn() */
296 	mutex_enter(&portq->portq_block_mutex);
297 	if (pkevp->portkev_flags & PORT_KEV_DONEQ) {
298 		/* event still in port queue */
299 		if (portq->portq_getn) {
300 			/*
301 			 * There could be still fired events in the temp queue;
302 			 * push those events back to the port queue and
303 			 * remove requested event afterwards.
304 			 */
305 			port_push_eventq(portq);
306 		}
307 		/* now remove event from the port queue */
308 		port_remove_event_doneq(pkevp, portq);
309 	}
310 	mutex_exit(&portq->portq_block_mutex);
311 	mutex_exit(&portq->portq_mutex);
312 }
313 
314 /*
315  * Return port event back to the kmem_cache.
316  * If the event is currently in the port queue the event itself will only
317  * be set as invalid. The port_get(n) function will not deliver such events
318  * to the application and it will return them back to the kmem_cache.
319  */
320 void
321 port_free_event(port_kevent_t *pkevp)
322 {
323 	port_queue_t	*portq;
324 	port_t		*pp;
325 
326 	pp = pkevp->portkev_port;
327 	if (pp == NULL)
328 		return;
329 	if (pkevp->portkev_flags & PORT_ALLOC_PRIVATE) {
330 		port_free_event_local(pkevp, 0);
331 		return;
332 	}
333 
334 	portq = &pp->port_queue;
335 	mutex_enter(&portq->portq_mutex);
336 	mutex_enter(&portq->portq_block_mutex);
337 	if (pkevp->portkev_flags & PORT_KEV_DONEQ) {
338 		pkevp->portkev_flags |= PORT_KEV_FREE;
339 		pkevp->portkev_callback = NULL;
340 		mutex_exit(&portq->portq_block_mutex);
341 		mutex_exit(&portq->portq_mutex);
342 		return;
343 	}
344 	mutex_exit(&portq->portq_block_mutex);
345 
346 	if (pkevp->portkev_flags & PORT_KEV_CACHED) {
347 		mutex_exit(&portq->portq_mutex);
348 		return;
349 	}
350 
351 	atomic_add_32(&pp->port_curr, -1);
352 	if (portq->portq_flags & PORTQ_CLOSE) {
353 		/*
354 		 * Another thread is closing the event port.
355 		 * That thread will sleep until all allocated event
356 		 * structures returned to the event port framework.
357 		 * The portq_mutex is used to synchronize the status
358 		 * of the allocated event structures (port_curr).
359 		 */
360 		if (pp->port_curr <= portq->portq_nent)
361 			cv_signal(&portq->portq_closecv);
362 	}
363 	mutex_exit(&portq->portq_mutex);
364 	port_free_event_local(pkevp, 1);
365 }
366 
367 /*
368  * This event port internal function is used by port_free_event() and
369  * other port internal functions to return event structures back to the
370  * kmem_cache.
371  */
372 void
373 port_free_event_local(port_kevent_t *pkevp, int counter)
374 {
375 	port_t	*pp = pkevp->portkev_port;
376 
377 	ASSERT(pp != NULL);
378 	if (counter == 0)
379 		atomic_add_32(&pp->port_curr, -1);
380 	pkevp->portkev_callback = NULL;
381 	pkevp->portkev_flags = 0;
382 	pkevp->portkev_port = NULL;
383 	kmem_cache_free(port_control.pc_cache, pkevp);
384 
385 	/* Check if blocking calls are waiting for event slots */
386 	if (pp->port_flags & PORT_EVENTS) {
387 		mutex_enter(&pp->port_mutex);
388 		pp->port_flags &= ~PORT_EVENTS;
389 		cv_signal(&pp->port_cv);
390 		mutex_exit(&pp->port_mutex);
391 	}
392 
393 	/* Submit a POLLOUT event if requested */
394 	if (pp->port_queue.portq_flags & PORTQ_POLLOUT) {
395 		port_queue_t	*portq = &pp->port_queue;
396 		mutex_enter(&portq->portq_mutex);
397 		portq->portq_flags &= ~PORTQ_POLLOUT;
398 		mutex_exit(&portq->portq_mutex);
399 		pollwakeup(&pp->port_pollhd, POLLOUT);
400 	}
401 }
402 
403 /*
404  * port_init_event(port_event_t *pev, uintptr_t object, void *user,
405  *	int (*port_callback)(void *, int *, pid_t, int, void *), void *sysarg);
406  *	This function initializes most of the "wired" elements of the port
407  *	event structure. This is normally being used just after the allocation
408  *	of the port event structure.
409  *	pkevp	: pointer to the port event structure
410  *	object	: object associated with this event structure
411  *	user	: user defined pointer delivered with the association function
412  *	port_callback:
413  *		  Address of the callback function which will be called
414  *		  - just before the event is delivered to the application.
415  *		    The callback function is called in user context and can be
416  *		    used for copyouts, e.g.
417  *		  - on close() or dissociation of the event. The sub-system
418  *		    must remove immediately every existing association of
419  *		    some object with this event.
420  *	sysarg	: event source propietary data
421  */
422 void
423 port_init_event(port_kevent_t *pkevp, uintptr_t object, void *user,
424     int (*port_callback)(void *, int *, pid_t, int, void *),
425     void *sysarg)
426 {
427 	pkevp->portkev_object = object;
428 	pkevp->portkev_user = user;
429 	pkevp->portkev_callback = port_callback;
430 	pkevp->portkev_arg = sysarg;
431 }
432 
433 /*
434  * This routine removes a portfd_t from the fd cache's hash table.
435  */
436 void
437 port_pcache_remove_fd(port_fdcache_t *pcp, portfd_t *pfd)
438 {
439 	polldat_t	*lpdp;
440 	polldat_t	*cpdp;
441 	portfd_t	**bucket;
442 	polldat_t	*pdp = PFTOD(pfd);
443 
444 	ASSERT(MUTEX_HELD(&pcp->pc_lock));
445 	bucket = PORT_FD_BUCKET(pcp, pdp->pd_fd);
446 	cpdp = PFTOD(*bucket);
447 	if (pdp == cpdp) {
448 		*bucket = PDTOF(pdp->pd_hashnext);
449 		pcp->pc_fdcount--;
450 		kmem_free(pfd, sizeof (portfd_t));
451 		return;
452 	}
453 
454 	while (cpdp != NULL) {
455 		lpdp = cpdp;
456 		cpdp = cpdp->pd_hashnext;
457 		if (cpdp == pdp) {
458 			/* polldat struct found */
459 			lpdp->pd_hashnext = pdp->pd_hashnext;
460 			pcp->pc_fdcount--;
461 			break;
462 		}
463 	}
464 	ASSERT(cpdp != NULL);
465 	kmem_free(pfd, sizeof (portfd_t));
466 }
467 
468 /*
469  * The port_push_eventq() function is used to move all remaining events
470  * from the temporary queue used in port_get(n)() to the standard port
471  * queue.
472  */
473 void
474 port_push_eventq(port_queue_t *portq)
475 {
476 	/*
477 	 * Append temporary portq_get_list to the port queue. On return
478 	 * the temporary portq_get_list is empty.
479 	 */
480 	list_move_tail(&portq->portq_list, &portq->portq_get_list);
481 	portq->portq_nent += portq->portq_tnent;
482 	portq->portq_tnent = 0;
483 }
484 
485 /*
486  * The port_remove_fd_object() function frees all resources associated with
487  * delivered portfd_t structure.
488  */
489 void
490 port_remove_fd_object(portfd_t *pfd, port_t *pp, port_fdcache_t *pcp)
491 {
492 	port_queue_t	*portq;
493 	polldat_t	*pdp = PFTOD(pfd);
494 	port_kevent_t	*pkevp;
495 	int		error;
496 
497 	ASSERT(MUTEX_HELD(&pcp->pc_lock));
498 	if (pdp->pd_php != NULL) {
499 		pollhead_delete(pdp->pd_php, pdp);
500 		pdp->pd_php = NULL;
501 	}
502 	pkevp =  pdp->pd_portev;
503 	portq = &pp->port_queue;
504 	mutex_enter(&portq->portq_mutex);
505 	mutex_enter(&portq->portq_block_mutex);
506 	if (pkevp->portkev_flags & PORT_KEV_DONEQ) {
507 		if (portq->portq_getn && portq->portq_tnent) {
508 			/*
509 			 * move events from the temporary "get" queue
510 			 * back to the port queue
511 			 */
512 			port_push_eventq(portq);
513 		}
514 		/* cleanup merged port queue */
515 		port_remove_event_doneq(pkevp, portq);
516 	}
517 	mutex_exit(&portq->portq_block_mutex);
518 	mutex_exit(&portq->portq_mutex);
519 	if (pkevp->portkev_callback) {
520 		(void) (*pkevp->portkev_callback)(pkevp->portkev_arg,
521 		    &error, pkevp->portkev_pid, PORT_CALLBACK_DISSOCIATE,
522 		    pkevp);
523 	}
524 	port_free_event_local(pkevp, 0);
525 
526 	/* remove polldat struct */
527 	port_pcache_remove_fd(pcp, pfd);
528 }
529 
530 /*
531  * The port_close_fd() function dissociates a file descriptor from a port
532  * and removes all allocated resources.
533  * close(2) detects in the uf_entry_t structure that the fd is associated
534  * with a port (at least one port).
535  * The fd can be associated with several ports.
536  */
537 void
538 port_close_pfd(portfd_t *pfd)
539 {
540 	port_t		*pp;
541 	port_fdcache_t	*pcp;
542 
543 	/* only association owner is allowed to remove the association */
544 	if (curproc->p_pid != PFTOD(pfd)->pd_portev->portkev_pid)
545 		return;
546 	pp = PFTOD(pfd)->pd_portev->portkev_port;
547 	pcp = pp->port_queue.portq_pcp;
548 	mutex_enter(&pcp->pc_lock);
549 	port_remove_fd_object(pfd, pp, pcp);
550 	mutex_exit(&pcp->pc_lock);
551 }
552 
553 /*
554  * The port_associate_ksource() function associates an event source with a port.
555  * On port_close() all associated sources are requested to free all local
556  * resources associated with the event port.
557  * The association of a source with a port can only be done one time. Further
558  * calls of this function will only increment the reference counter.
559  * The allocated port_source_t structure is removed from the port as soon as
560  * the reference counter becomes 0.
561  */
562 /* ARGSUSED */
563 int
564 port_associate_ksource(int port, int source, port_source_t **portsrc,
565     void (*port_src_close)(void *, int, pid_t, int), void *arg,
566     int (*port_src_associate)(port_kevent_t *, int, int, uintptr_t, void *))
567 {
568 	port_t		*pp;
569 	file_t		*fp;
570 	port_source_t	**ps;
571 	port_source_t	*pse;
572 
573 	if ((fp = getf(port)) == NULL)
574 		return (EBADF);
575 
576 	if (fp->f_vnode->v_type != VPORT) {
577 		releasef(port);
578 		return (EBADFD);
579 	}
580 	pp = VTOEP(fp->f_vnode);
581 
582 	mutex_enter(&pp->port_queue.portq_source_mutex);
583 	ps = &pp->port_queue.portq_scache[PORT_SHASH(source)];
584 	for (pse = *ps; pse != NULL; pse = pse->portsrc_next) {
585 		if (pse->portsrc_source == source)
586 			break;
587 	}
588 
589 	if (pse == NULL) {
590 		/* Create association of the event source with the port */
591 		pse = kmem_zalloc(sizeof (port_source_t), KM_NOSLEEP);
592 		if (pse == NULL) {
593 			mutex_exit(&pp->port_queue.portq_source_mutex);
594 			releasef(port);
595 			return (ENOMEM);
596 		}
597 		pse->portsrc_source = source;
598 		pse->portsrc_close = port_src_close;
599 		pse->portsrc_closearg = arg;
600 		pse->portsrc_cnt = 1;
601 		if (*ps)
602 			pse->portsrc_next = (*ps)->portsrc_next;
603 		*ps = pse;
604 	} else {
605 		/* entry already available, source is only requesting count */
606 		pse->portsrc_cnt++;
607 	}
608 	mutex_exit(&pp->port_queue.portq_source_mutex);
609 	releasef(port);
610 	if (portsrc)
611 		*portsrc = pse;
612 	return (0);
613 }
614 
615 /*
616  * The port_dissociate_ksource() function dissociates an event source from
617  * a port.
618  */
619 int
620 port_dissociate_ksource(int port, int source, port_source_t *ps)
621 {
622 	port_t		*pp;
623 	file_t		*fp;
624 	port_source_t	**psh;
625 
626 	if (ps == NULL)
627 		return (EINVAL);
628 
629 	if ((fp = getf(port)) == NULL)
630 		return (EBADF);
631 
632 	if (fp->f_vnode->v_type != VPORT) {
633 		releasef(port);
634 		return (EBADFD);
635 	}
636 	pp = VTOEP(fp->f_vnode);
637 
638 	mutex_enter(&pp->port_queue.portq_source_mutex);
639 	if (--ps->portsrc_cnt == 0) {
640 		/* last association removed -> free source structure */
641 		if (ps->portsrc_prev == NULL) {
642 			/* first entry */
643 			psh = &pp->port_queue.portq_scache[PORT_SHASH(source)];
644 			*psh = ps->portsrc_next;
645 			if (ps->portsrc_next)
646 				ps->portsrc_next->portsrc_prev = NULL;
647 		} else {
648 			ps->portsrc_prev->portsrc_next = ps->portsrc_next;
649 			if (ps->portsrc_next)
650 				ps->portsrc_next->portsrc_prev =
651 				    ps->portsrc_prev;
652 		}
653 		kmem_free(ps, sizeof (port_source_t));
654 	}
655 	mutex_exit(&pp->port_queue.portq_source_mutex);
656 	releasef(port);
657 	return (0);
658 }
659