/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" /* * This file containts all the functions required for interactions of * event sources with the event port file system. */ #include <sys/types.h> #include <sys/conf.h> #include <sys/stat.h> #include <sys/errno.h> #include <sys/kmem.h> #include <sys/debug.h> #include <sys/file.h> #include <sys/sysmacros.h> #include <sys/systm.h> #include <sys/bitmap.h> #include <sys/rctl.h> #include <sys/atomic.h> #include <sys/poll_impl.h> #include <sys/port_impl.h> /* * Maximum number of elements allowed to be passed in a single call of a * port function (port_sendn(), port_getn(). We need to allocate kernel memory * for all of them at once, so we can't let it scale without limit. */ uint_t port_max_list = PORT_MAX_LIST; port_control_t port_control; /* Event port framework main structure */ /* * Block other threads from using a port. * We enter holding portq->portq_mutex but * we may drop and reacquire this lock. * Callers must deal with this fact. */ void port_block(port_queue_t *portq) { ASSERT(MUTEX_HELD(&portq->portq_mutex)); while (portq->portq_flags & PORTQ_BLOCKED) cv_wait(&portq->portq_block_cv, &portq->portq_mutex); portq->portq_flags |= PORTQ_BLOCKED; } /* * Undo port_block(portq). */ void port_unblock(port_queue_t *portq) { ASSERT(MUTEX_HELD(&portq->portq_mutex)); portq->portq_flags &= ~PORTQ_BLOCKED; cv_signal(&portq->portq_block_cv); } /* * Called from pollwakeup(PORT_SOURCE_FD source) to determine * if the port's fd needs to be notified of poll events. If yes, * we mark the port indicating that pollwakeup() is referring * it so that the port_t does not disappear. pollwakeup() * calls port_pollwkdone() after notifying. In port_pollwkdone(), * we clear the hold on the port_t (clear PORTQ_POLLWK_PEND). */ int port_pollwkup(port_t *pp) { int events = 0; port_queue_t *portq; portq = &pp->port_queue; mutex_enter(&portq->portq_mutex); /* * Normally, we should not have a situation where PORTQ_POLLIN * and PORTQ_POLLWK_PEND are set at the same time, but it is * possible. So, in pollwakeup() we ensure that no new fd's get * added to the pollhead between the time it notifies poll events * and calls poll_wkupdone() where we clear the PORTQ_POLLWK_PEND flag. */ if (portq->portq_flags & PORTQ_POLLIN && !(portq->portq_flags & PORTQ_POLLWK_PEND)) { portq->portq_flags &= ~PORTQ_POLLIN; portq->portq_flags |= PORTQ_POLLWK_PEND; events = POLLIN; } mutex_exit(&portq->portq_mutex); return (events); } void port_pollwkdone(port_t *pp) { port_queue_t *portq; portq = &pp->port_queue; ASSERT(portq->portq_flags & PORTQ_POLLWK_PEND); mutex_enter(&portq->portq_mutex); portq->portq_flags &= ~PORTQ_POLLWK_PEND; cv_signal(&pp->port_cv); mutex_exit(&portq->portq_mutex); } /* * The port_send_event() function is used by all event sources to submit * trigerred events to a port. All the data required for the event management * is already stored in the port_kevent_t structure. * The event port internal data is stored in the port_kevent_t structure * during the allocation time (see port_alloc_event()). The data related to * the event itself and to the event source management is stored in the * port_kevent_t structure between the allocation time and submit time * (see port_init_event()). * * This function is often called from interrupt level. */ void port_send_event(port_kevent_t *pkevp) { port_queue_t *portq; portq = &pkevp->portkev_port->port_queue; mutex_enter(&portq->portq_mutex); if (pkevp->portkev_flags & PORT_KEV_DONEQ) { /* Event already in the port queue */ if (pkevp->portkev_source == PORT_SOURCE_FD) { mutex_exit(&pkevp->portkev_lock); } mutex_exit(&portq->portq_mutex); return; } /* put event in the port queue */ list_insert_tail(&portq->portq_list, pkevp); portq->portq_nent++; /* * Remove the PORTQ_WAIT_EVENTS flag to indicate * that new events are available. */ portq->portq_flags &= ~PORTQ_WAIT_EVENTS; pkevp->portkev_flags |= PORT_KEV_DONEQ; /* event enqueued */ if (pkevp->portkev_source == PORT_SOURCE_FD) { mutex_exit(&pkevp->portkev_lock); } /* Check if thread is in port_close() waiting for outstanding events */ if (portq->portq_flags & PORTQ_CLOSE) { /* Check if all outstanding events are already in port queue */ if (pkevp->portkev_port->port_curr <= portq->portq_nent) cv_signal(&portq->portq_closecv); } if (portq->portq_getn == 0) { /* * No thread retrieving events -> check if enough events are * available to satify waiting threads. */ if (portq->portq_thread && (portq->portq_nent >= portq->portq_nget)) cv_signal(&portq->portq_thread->portget_cv); } /* * If some thread is polling the port's fd, then notify it. * For PORT_SOURCE_FD source, we don't need to call pollwakeup() * here as it will result in a recursive call(PORT_SOURCE_FD source * is pollwakeup()). Therefore pollwakeup() itself will notify the * ports if being polled. */ if (pkevp->portkev_source != PORT_SOURCE_FD && portq->portq_flags & PORTQ_POLLIN) { portq->portq_flags &= ~PORTQ_POLLIN; mutex_exit(&portq->portq_mutex); pollwakeup(&pkevp->portkev_port->port_pollhd, POLLIN); } else { mutex_exit(&portq->portq_mutex); } } /* * The port_alloc_event() function has to be used by all event sources * to request an slot for event notification. * The slot reservation could be denied because of lack of resources. * For that reason the event source should allocate an event slot as early * as possible and be prepared to get an error code instead of the * port event pointer. * Al current event sources allocate an event slot during a system call * entry. They return an error code to the application if an event slot * could not be reserved. * It is also recommended to associate the event source with the port * before some other port function is used. * The port argument is a file descriptor obtained by the application as * a return value of port_create(). * Possible values of flags are: * PORT_ALLOC_DEFAULT * This is the standard type of port events. port_get(n) will free this * type of event structures as soon as the events are delivered to the * application. * PORT_ALLOC_PRIVATE * This type of event will be use for private use of the event source. * The port_get(n) function will deliver events of such an structure to * the application but it will not free the event structure itself. * The event source must free this structure using port_free_event(). * PORT_ALLOC_CACHED * This type of events is used when the event source helds an own * cache. * The port_get(n) function will deliver events of such an structure to * the application but it will not free the event structure itself. * The event source must free this structure using port_free_event(). */ int port_alloc_event(int port, int flags, int source, port_kevent_t **pkevpp) { port_t *pp; file_t *fp; port_kevent_t *pkevp; if ((fp = getf(port)) == NULL) return (EBADF); if (fp->f_vnode->v_type != VPORT) { releasef(port); return (EBADFD); } pkevp = kmem_cache_alloc(port_control.pc_cache, KM_NOSLEEP); if (pkevp == NULL) { releasef(port); return (ENOMEM); } /* * port_max_events is controlled by the resource control * process.port-max-events */ pp = VTOEP(fp->f_vnode); mutex_enter(&pp->port_queue.portq_mutex); if (pp->port_curr >= pp->port_max_events) { mutex_exit(&pp->port_queue.portq_mutex); kmem_cache_free(port_control.pc_cache, pkevp); releasef(port); return (EAGAIN); } pp->port_curr++; mutex_exit(&pp->port_queue.portq_mutex); bzero(pkevp, sizeof (port_kevent_t)); mutex_init(&pkevp->portkev_lock, NULL, MUTEX_DEFAULT, NULL); pkevp->portkev_source = source; pkevp->portkev_flags = flags; pkevp->portkev_pid = curproc->p_pid; pkevp->portkev_port = pp; *pkevpp = pkevp; releasef(port); return (0); } /* * This function is faster than the standard port_alloc_event() and * can be used when the event source already allocated an event from * a port. */ int port_dup_event(port_kevent_t *pkevp, port_kevent_t **pkevdupp, int flags) { int error; error = port_alloc_event_local(pkevp->portkev_port, pkevp->portkev_source, flags, pkevdupp); if (error == 0) (*pkevdupp)->portkev_pid = pkevp->portkev_pid; return (error); } /* * port_alloc_event_local() is reserved for internal use only. * It is doing the same job as port_alloc_event() but with the event port * pointer as the first argument. * The check of the validity of the port file descriptor is skipped here. */ int port_alloc_event_local(port_t *pp, int source, int flags, port_kevent_t **pkevpp) { port_kevent_t *pkevp; pkevp = kmem_cache_alloc(port_control.pc_cache, KM_NOSLEEP); if (pkevp == NULL) return (ENOMEM); mutex_enter(&pp->port_queue.portq_mutex); if (pp->port_curr >= pp->port_max_events) { mutex_exit(&pp->port_queue.portq_mutex); kmem_cache_free(port_control.pc_cache, pkevp); return (EAGAIN); } pp->port_curr++; mutex_exit(&pp->port_queue.portq_mutex); bzero(pkevp, sizeof (port_kevent_t)); mutex_init(&pkevp->portkev_lock, NULL, MUTEX_DEFAULT, NULL); pkevp->portkev_flags = flags; pkevp->portkev_port = pp; pkevp->portkev_source = source; pkevp->portkev_pid = curproc->p_pid; *pkevpp = pkevp; return (0); } /* * port_alloc_event_block() has the same functionality of port_alloc_event() + * - it blocks if not enough event slots are available and * - it blocks if not enough memory is available. * Currently port_dispatch() is using this function to increase the * reliability of event delivery for library event sources. */ int port_alloc_event_block(port_t *pp, int source, int flags, port_kevent_t **pkevpp) { port_kevent_t *pkevp = kmem_cache_alloc(port_control.pc_cache, KM_SLEEP); mutex_enter(&pp->port_queue.portq_mutex); while (pp->port_curr >= pp->port_max_events) { if (!cv_wait_sig(&pp->port_cv, &pp->port_queue.portq_mutex)) { /* signal detected */ mutex_exit(&pp->port_queue.portq_mutex); kmem_cache_free(port_control.pc_cache, pkevp); return (EINTR); } } pp->port_curr++; mutex_exit(&pp->port_queue.portq_mutex); bzero(pkevp, sizeof (port_kevent_t)); mutex_init(&pkevp->portkev_lock, NULL, MUTEX_DEFAULT, NULL); pkevp->portkev_flags = flags; pkevp->portkev_port = pp; pkevp->portkev_source = source; pkevp->portkev_pid = curproc->p_pid; *pkevpp = pkevp; return (0); } /* * Take an event out of the port queue */ static void port_remove_event_doneq(port_kevent_t *pkevp, port_queue_t *portq) { ASSERT(MUTEX_HELD(&portq->portq_mutex)); list_remove(&portq->portq_list, pkevp); portq->portq_nent--; pkevp->portkev_flags &= ~PORT_KEV_DONEQ; } /* * The port_remove_done_event() function takes a fired event out of the * port queue. * Currently this function is required to cancel a fired event because * the application is delivering new association data (see port_associate_fd()). */ void port_remove_done_event(port_kevent_t *pkevp) { port_queue_t *portq; portq = &pkevp->portkev_port->port_queue; mutex_enter(&portq->portq_mutex); /* wait for port_get() or port_getn() */ port_block(portq); if (pkevp->portkev_flags & PORT_KEV_DONEQ) { /* event still in port queue */ if (portq->portq_getn) { /* * There could be still fired events in the temp queue; * push those events back to the port queue and * remove requested event afterwards. */ port_push_eventq(portq); } /* now remove event from the port queue */ port_remove_event_doneq(pkevp, portq); } port_unblock(portq); mutex_exit(&portq->portq_mutex); } /* * Return port event back to the kmem_cache. * If the event is currently in the port queue the event itself will only * be set as invalid. The port_get(n) function will not deliver such events * to the application and it will return them back to the kmem_cache. */ void port_free_event(port_kevent_t *pkevp) { port_queue_t *portq; port_t *pp; pp = pkevp->portkev_port; if (pp == NULL) return; if (pkevp->portkev_flags & PORT_ALLOC_PRIVATE) { port_free_event_local(pkevp, 0); return; } portq = &pp->port_queue; mutex_enter(&portq->portq_mutex); port_block(portq); if (pkevp->portkev_flags & PORT_KEV_DONEQ) { pkevp->portkev_flags |= PORT_KEV_FREE; pkevp->portkev_callback = NULL; port_unblock(portq); mutex_exit(&portq->portq_mutex); return; } port_unblock(portq); if (pkevp->portkev_flags & PORT_KEV_CACHED) { mutex_exit(&portq->portq_mutex); return; } if (--pp->port_curr < pp->port_max_events) cv_signal(&pp->port_cv); if (portq->portq_flags & PORTQ_CLOSE) { /* * Another thread is closing the event port. * That thread will sleep until all allocated event * structures returned to the event port framework. * The portq_mutex is used to synchronize the status * of the allocated event structures (port_curr). */ if (pp->port_curr <= portq->portq_nent) cv_signal(&portq->portq_closecv); } mutex_exit(&portq->portq_mutex); port_free_event_local(pkevp, 1); } /* * This event port internal function is used by port_free_event() and * other port internal functions to return event structures back to the * kmem_cache. */ void port_free_event_local(port_kevent_t *pkevp, int counter) { port_t *pp = pkevp->portkev_port; port_queue_t *portq = &pp->port_queue; int wakeup; pkevp->portkev_callback = NULL; pkevp->portkev_flags = 0; pkevp->portkev_port = NULL; mutex_destroy(&pkevp->portkev_lock); kmem_cache_free(port_control.pc_cache, pkevp); mutex_enter(&portq->portq_mutex); if (counter == 0) { if (--pp->port_curr < pp->port_max_events) cv_signal(&pp->port_cv); } wakeup = (portq->portq_flags & PORTQ_POLLOUT); portq->portq_flags &= ~PORTQ_POLLOUT; mutex_exit(&portq->portq_mutex); /* Submit a POLLOUT event if requested */ if (wakeup) pollwakeup(&pp->port_pollhd, POLLOUT); } /* * port_init_event(port_event_t *pev, uintptr_t object, void *user, * int (*port_callback)(void *, int *, pid_t, int, void *), void *sysarg); * This function initializes most of the "wired" elements of the port * event structure. This is normally being used just after the allocation * of the port event structure. * pkevp : pointer to the port event structure * object : object associated with this event structure * user : user defined pointer delivered with the association function * port_callback: * Address of the callback function which will be called * - just before the event is delivered to the application. * The callback function is called in user context and can be * used for copyouts, e.g. * - on close() or dissociation of the event. The sub-system * must remove immediately every existing association of * some object with this event. * sysarg : event source propietary data */ void port_init_event(port_kevent_t *pkevp, uintptr_t object, void *user, int (*port_callback)(void *, int *, pid_t, int, void *), void *sysarg) { pkevp->portkev_object = object; pkevp->portkev_user = user; pkevp->portkev_callback = port_callback; pkevp->portkev_arg = sysarg; } /* * This routine removes a portfd_t from the fd cache's hash table. */ void port_pcache_remove_fd(port_fdcache_t *pcp, portfd_t *pfd) { polldat_t *lpdp; polldat_t *cpdp; portfd_t **bucket; polldat_t *pdp = PFTOD(pfd); ASSERT(MUTEX_HELD(&pcp->pc_lock)); bucket = PORT_FD_BUCKET(pcp, pdp->pd_fd); cpdp = PFTOD(*bucket); if (pdp == cpdp) { *bucket = PDTOF(pdp->pd_hashnext); if (--pcp->pc_fdcount == 0) { /* * signal the thread which may have blocked in * port_close_sourcefd() on lastclose waiting * for pc_fdcount to drop to 0. */ cv_signal(&pcp->pc_lclosecv); } kmem_free(pfd, sizeof (portfd_t)); return; } while (cpdp != NULL) { lpdp = cpdp; cpdp = cpdp->pd_hashnext; if (cpdp == pdp) { /* polldat struct found */ lpdp->pd_hashnext = pdp->pd_hashnext; if (--pcp->pc_fdcount == 0) { /* * signal the thread which may have blocked in * port_close_sourcefd() on lastclose waiting * for pc_fdcount to drop to 0. */ cv_signal(&pcp->pc_lclosecv); } break; } } ASSERT(cpdp != NULL); kmem_free(pfd, sizeof (portfd_t)); } /* * The port_push_eventq() function is used to move all remaining events * from the temporary queue used in port_get(n)() to the standard port * queue. */ void port_push_eventq(port_queue_t *portq) { /* * Append temporary portq_get_list to the port queue. On return * the temporary portq_get_list is empty. */ list_move_tail(&portq->portq_list, &portq->portq_get_list); portq->portq_nent += portq->portq_tnent; portq->portq_tnent = 0; } /* * The port_remove_fd_object() function frees all resources associated with * delivered portfd_t structure. */ void port_remove_fd_object(portfd_t *pfd, port_t *pp, port_fdcache_t *pcp) { port_queue_t *portq; polldat_t *pdp = PFTOD(pfd); port_kevent_t *pkevp; int error; ASSERT(MUTEX_HELD(&pcp->pc_lock)); if (pdp->pd_php != NULL) { pollhead_delete(pdp->pd_php, pdp); pdp->pd_php = NULL; } pkevp = pdp->pd_portev; portq = &pp->port_queue; mutex_enter(&portq->portq_mutex); port_block(portq); if (pkevp->portkev_flags & PORT_KEV_DONEQ) { if (portq->portq_getn && portq->portq_tnent) { /* * move events from the temporary "get" queue * back to the port queue */ port_push_eventq(portq); } /* cleanup merged port queue */ port_remove_event_doneq(pkevp, portq); } port_unblock(portq); mutex_exit(&portq->portq_mutex); if (pkevp->portkev_callback) { (void) (*pkevp->portkev_callback)(pkevp->portkev_arg, &error, pkevp->portkev_pid, PORT_CALLBACK_DISSOCIATE, pkevp); } port_free_event_local(pkevp, 0); /* remove polldat struct */ port_pcache_remove_fd(pcp, pfd); } /* * The port_close_fd() function dissociates a file descriptor from a port * and removes all allocated resources. * close(2) detects in the uf_entry_t structure that the fd is associated * with a port (at least one port). * The fd can be associated with several ports. */ void port_close_pfd(portfd_t *pfd) { port_t *pp; port_fdcache_t *pcp; /* * the portfd_t passed in should be for this proc. */ ASSERT(curproc->p_pid == PFTOD(pfd)->pd_portev->portkev_pid); pp = PFTOD(pfd)->pd_portev->portkev_port; pcp = pp->port_queue.portq_pcp; mutex_enter(&pcp->pc_lock); port_remove_fd_object(pfd, pp, pcp); mutex_exit(&pcp->pc_lock); } /* * The port_associate_ksource() function associates an event source with a port. * On port_close() all associated sources are requested to free all local * resources associated with the event port. * The association of a source with a port can only be done one time. Further * calls of this function will only increment the reference counter. * The allocated port_source_t structure is removed from the port as soon as * the reference counter becomes 0. */ /* ARGSUSED */ int port_associate_ksource(int port, int source, port_source_t **portsrc, void (*port_src_close)(void *, int, pid_t, int), void *arg, int (*port_src_associate)(port_kevent_t *, int, int, uintptr_t, void *)) { port_t *pp; file_t *fp; port_source_t **ps; port_source_t *pse; if ((fp = getf(port)) == NULL) return (EBADF); if (fp->f_vnode->v_type != VPORT) { releasef(port); return (EBADFD); } pp = VTOEP(fp->f_vnode); mutex_enter(&pp->port_queue.portq_source_mutex); ps = &pp->port_queue.portq_scache[PORT_SHASH(source)]; for (pse = *ps; pse != NULL; pse = pse->portsrc_next) { if (pse->portsrc_source == source) break; } if (pse == NULL) { /* Create association of the event source with the port */ pse = kmem_zalloc(sizeof (port_source_t), KM_NOSLEEP); if (pse == NULL) { mutex_exit(&pp->port_queue.portq_source_mutex); releasef(port); return (ENOMEM); } pse->portsrc_source = source; pse->portsrc_close = port_src_close; pse->portsrc_closearg = arg; pse->portsrc_cnt = 1; if (*ps) pse->portsrc_next = (*ps)->portsrc_next; *ps = pse; } else { /* entry already available, source is only requesting count */ pse->portsrc_cnt++; } mutex_exit(&pp->port_queue.portq_source_mutex); releasef(port); if (portsrc) *portsrc = pse; return (0); } /* * The port_dissociate_ksource() function dissociates an event source from * a port. */ int port_dissociate_ksource(int port, int source, port_source_t *ps) { port_t *pp; file_t *fp; port_source_t **psh; if (ps == NULL) return (EINVAL); if ((fp = getf(port)) == NULL) return (EBADF); if (fp->f_vnode->v_type != VPORT) { releasef(port); return (EBADFD); } pp = VTOEP(fp->f_vnode); mutex_enter(&pp->port_queue.portq_source_mutex); if (--ps->portsrc_cnt == 0) { /* last association removed -> free source structure */ if (ps->portsrc_prev == NULL) { /* first entry */ psh = &pp->port_queue.portq_scache[PORT_SHASH(source)]; *psh = ps->portsrc_next; if (ps->portsrc_next) ps->portsrc_next->portsrc_prev = NULL; } else { ps->portsrc_prev->portsrc_next = ps->portsrc_next; if (ps->portsrc_next) ps->portsrc_next->portsrc_prev = ps->portsrc_prev; } kmem_free(ps, sizeof (port_source_t)); } mutex_exit(&pp->port_queue.portq_source_mutex); releasef(port); return (0); }