/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only * (the "License"). You may not use this file except in compliance * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2003 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * Copyright 2015, Joyent, Inc. */ #ifndef _SYS_POLL_IMPL_H #define _SYS_POLL_IMPL_H /* * Caching Poll Subsystem: * * Each kernel thread (1), if engaged in poll system call, has a reference to * a pollstate_t (2), which contains relevant flags and locks. The pollstate_t * contains a pointer to a pollcache_t (3), which caches the state of previous * calls to poll. A bitmap (4) is stored inside the poll cache, where each * bit represents a file descriptor. The bits are set if the corresponding * device has a polled event pending. Only fds with their bit set will be * examined on the next poll invocation. The pollstate_t also contains a list * of fd sets (5), which are represented by the pollcacheset_t type. These * structures keep track of the pollfd_t arrays (6) passed in from userland. * Each polled file descriptor has a corresponding polldat_t which can be * chained onto a device's pollhead, and these are kept in a hash table (7) * inside the pollcache_t. The hash table allows efficient conversion of a * given fd to its corresponding polldat_t. * * (1) (2) * +-----------+ +-------------+ * | kthread_t |--->| pollstate_t |-->+-------------+ (6) * +-----------+ +-------------+(5)| pcacheset_t |->[_][_][_][_] pollfd_t * | +-------------+ * | | pcacheset_t |->[_][_][_][_] pollfd_t * (1a) | +-------------+ * +---------------+ | * | /dev/poll tbl | | * +-v-------------+ | * | | * +------------------+ | * (7) (3) V v * polldat hash +-------------+ (4) bitmap representing fd space * [_][_][_][_]<----| |--->000010010010001010101010101010110 * | | | | | pollcache_t | * . v . . | | * [polldat_t] +-------------+ * | * [polldat_t] * | * v * NULL * * * Both poll system call and /dev/poll use the pollcache_t structure * definition and the routines managing the structure. But poll(2) and * /dev/poll have their own copy of the structures. The /dev/poll driver * table (1a) contains an array of pointers, each pointing at a pollcache_t * struct (3). A device minor number is used as an device table index. * */ #include #if defined(_KERNEL) || defined(_KMEMUSER) #include #include #include #ifdef __cplusplus extern "C" { #endif /* * Typedefs */ struct pollcache; struct pollstate; struct pcachelink; struct polldat; typedef struct pollcache pollcache_t; typedef struct pollstate pollstate_t; typedef struct pcachelink pcachelink_t; typedef struct polldat polldat_t; /* * description of pollcacheset structure */ typedef struct pollcacheset { uintptr_t pcs_usradr; /* usr pollfd array address */ pollfd_t *pcs_pollfd; /* cached poll lists */ size_t pcs_nfds; /* number of poll fd in cached list */ ulong_t pcs_count; /* for LU replacement policy */ } pollcacheset_t; #define POLLFDSETS 2 /* * Maximum depth for recusive poll operations. */ #define POLLMAXDEPTH 5 /* * State information kept by each polling thread */ struct pollstate { pollfd_t *ps_pollfd; /* hold the current poll list */ size_t ps_nfds; /* size of ps_pollfd */ kmutex_t ps_lock; /* mutex for sleep/wakeup */ pollcache_t *ps_pcache; /* cached poll fd set */ pollcacheset_t *ps_pcacheset; /* cached poll lists */ int ps_nsets; /* no. of cached poll sets */ pollfd_t *ps_dpbuf; /* return pollfd buf used by devpoll */ size_t ps_dpbufsize; /* size of ps_dpbuf */ int ps_depth; /* epoll recursion depth */ pollcache_t *ps_pc_stack[POLLMAXDEPTH]; /* epoll recursion state */ pollcache_t *ps_contend_pc; /* pollcache waited on */ pollstate_t *ps_contend_nextp; /* next in contender list */ pollstate_t **ps_contend_pnextp; /* pointer-to-previous-next */ int ps_flags; /* state flags */ }; /* pollstate flags */ #define POLLSTATE_STALEMATE 0x1 #define POLLSTATE_ULFAIL 0x2 /* pollstate_enter results */ #define PSE_SUCCESS 0 #define PSE_FAIL_DEPTH 1 #define PSE_FAIL_LOOP 2 #define PSE_FAIL_DEADLOCK 3 #define PSE_FAIL_POLLSTATE 4 /* * poll cache size defines */ #define POLLCHUNKSHIFT 8 /* hash table increment size is 256 */ #define POLLHASHCHUNKSZ (1 << POLLCHUNKSHIFT) #define POLLHASHINC 2 /* poll hash table growth factor */ #define POLLHASHTHRESHOLD 2 /* poll hash list length threshold */ #define POLLHASH(x, y) ((y) % (x)) /* poll hash function */ /* * poll.c assumes the POLLMAPCHUNK is power of 2 */ #define POLLMAPCHUNK 2048 /* bitmap inc -- each for 2K of polled fd's */ /* * used to refrence from watched fd back to the fd position in cached * poll list for quick revents update. */ typedef struct xref { ssize_t xf_position; /* xref fd position in poll fd list */ short xf_refcnt; /* ref cnt of same fd in poll list */ } xref_t; #define POLLPOSINVAL (-1L) /* xf_position is invalid */ #define POLLPOSTRANS (-2L) /* xf_position is transient state */ typedef enum pclstate { PCL_INIT = 0, /* just allocated/zeroed, prior */ PCL_VALID, /* linked with both parent and child pollcaches */ PCL_STALE, /* still linked but marked stale, pending refresh */ PCL_INVALID, /* dissociated from one pollcache, awaiting cleanup */ PCL_FREE /* only meant to indicate use-after-free */ } pclstate_t; /* * The pcachelink struct creates an association between parent and child * pollcaches in a recursive /dev/poll operation. Fields are protected by * pcl_lock although manipulation of pcl_child_next or pcl_parent_next also * requires holding pc_lock in the respective pcl_parent_pc or pcl_child_pc * pollcache. */ struct pcachelink { kmutex_t pcl_lock; /* protects contents */ pclstate_t pcl_state; /* status of link entry */ int pcl_refcnt; /* ref cnt of linked pcaches */ pollcache_t *pcl_child_pc; /* child pollcache */ pollcache_t *pcl_parent_pc; /* parent pollcache */ pcachelink_t *pcl_child_next; /* next in child list */ pcachelink_t *pcl_parent_next; /* next in parents list */ }; /* * polldat is an entry for a cached poll fd. A polldat struct can be in * poll cache table as well as on pollhead ph_list, which is used by * pollwakeup to wake up a sleeping poller. There should be one polldat * per polled fd hanging off pollstate struct. */ struct polldat { int pd_fd; /* cached poll fd */ int pd_events; /* union of all polled events */ file_t *pd_fp; /* used to detect fd reuse */ pollhead_t *pd_php; /* used to undo poll registration */ kthread_t *pd_thread; /* used for waking up a sleep thrd */ pollcache_t *pd_pcache; /* a ptr to the pollcache of this fd */ polldat_t *pd_next; /* next on pollhead's ph_list */ polldat_t *pd_hashnext; /* next on pollhead's ph_list */ int pd_count; /* total count from all ref'ed sets */ int pd_nsets; /* num of xref sets, used by poll(2) */ xref_t *pd_ref; /* ptr to xref info, 1 for each set */ port_kevent_t *pd_portev; /* associated port event struct */ uint64_t pd_epolldata; /* epoll data, if any */ }; /* * One cache for each thread that polls. Points to a bitmap (used by pollwakeup) * and a hash table of polldats. * The offset of pc_lock field must be kept in sync with the pc_lock offset * of port_fdcache_t, both structs implement pc_lock with offset 0 (see also * pollrelock()). */ struct pollcache { kmutex_t pc_lock; /* lock to protect pollcache */ ulong_t *pc_bitmap; /* point to poll fd bitmap */ polldat_t **pc_hash; /* points to a hash table of ptrs */ int pc_mapend; /* the largest fd encountered so far */ int pc_mapsize; /* the size of current map */ int pc_hashsize; /* the size of current hash table */ int pc_fdcount; /* track how many fd's are hashed */ int pc_flag; /* see pc_flag define below */ int pc_busy; /* can only exit when its 0 */ kmutex_t pc_no_exit; /* protects pc_busy*, can't be nested */ kcondvar_t pc_busy_cv; /* cv to wait on if ps_busy != 0 */ kcondvar_t pc_cv; /* cv to wait on if needed */ pid_t pc_pid; /* for check acc rights, devpoll only */ int pc_mapstart; /* where search start, devpoll only */ pcachelink_t *pc_parents; /* linked list of epoll parents */ pcachelink_t *pc_children; /* linked list of epoll children */ }; /* pc_flag */ #define PC_POLLWAKE 0x02 /* pollwakeup() occurred */ #if defined(_KERNEL) /* * Internal routines. */ extern void pollnotify(pollcache_t *, int); /* * public poll head interfaces (see poll.h): * * pollhead_clean clean up all polldats on a pollhead list */ extern void pollhead_clean(pollhead_t *); /* * private poll head interfaces: * * pollhead_insert adds a polldat to a pollhead list * pollhead_delete removes a polldat from a pollhead list */ extern void pollhead_insert(pollhead_t *, polldat_t *); extern void pollhead_delete(pollhead_t *, polldat_t *); /* * poll state interfaces: * * pollstate_create initializes per-thread pollstate * pollstate_destroy cleans up per-thread pollstate * pollstate_enter safely lock pollcache for pollstate * pollstate_exit unlock pollcache from pollstate */ extern pollstate_t *pollstate_create(void); extern void pollstate_destroy(pollstate_t *); extern int pollstate_enter(pollcache_t *); extern void pollstate_exit(pollcache_t *); /* * public pcache interfaces: * * pcache_alloc allocate a poll cache skeleton * pcache_create creates all poll cache supporting data struct * pcache_insert cache a poll fd, calls pcache_insert_fd * pcache_lookup given an fd list, returns a cookie * pcache_poll polls the cache for fd's having events on them * pcache_clean clean up all the pollhead and fpollinfo reference * pcache_destroy destroys the pcache */ extern pollcache_t *pcache_alloc(); extern void pcache_create(pollcache_t *, nfds_t); extern int pcache_insert(pollstate_t *, file_t *, pollfd_t *, int *, ssize_t, int); extern int pcache_poll(pollfd_t *, pollstate_t *, nfds_t, int *, int); extern void pcache_clean(pollcache_t *); extern void pcache_destroy(pollcache_t *); /* * private pcache interfaces: * * pcache_lookup_fd lookup an fd, returns a polldat * pcache_alloc_fd allocates and returns a polldat * pcache_insert_fd insert an fd into pcache (called by pcache_insert) * pcache_delete_fd insert an fd into pcache (called by pcacheset_delete_fd) * pcache_grow_hashtbl grows the pollcache hash table and rehash * pcache_grow_map grows the pollcache bitmap * pcache_update_xref update cross ref (from polldat back to cacheset) info * pcache_clean_entry cleanup an entry in pcache and more... * pcache_wake_parents wake linked parent pollcaches */ extern polldat_t *pcache_lookup_fd(pollcache_t *, int); extern polldat_t *pcache_alloc_fd(int); extern void pcache_insert_fd(pollcache_t *, polldat_t *, nfds_t); extern int pcache_delete_fd(pollstate_t *, int, size_t, int, uint_t); extern void pcache_grow_hashtbl(pollcache_t *, nfds_t); extern void pcache_grow_map(pollcache_t *, int); extern void pcache_update_xref(pollcache_t *, int, ssize_t, int); extern void pcache_clean_entry(pollstate_t *, int); extern void pcache_wake_parents(pollcache_t *); /* * pcacheset interfaces: * * pcacheset_create creates new pcachesets (easier for dynamic pcachesets) * pcacheset_destroy destroys a pcacheset * pcacheset_cache_list caches and polls a new poll list * pcacheset_remove_list removes (usually a partial) cached poll list * pcacheset_resolve resolves extant pcacheset and fd list * pcacheset_cmp compares a pcacheset with an fd list * pcacheset_invalidate invalidate entries in pcachesets * pcacheset_reset_count resets the usage counter of pcachesets * pcacheset_replace selects a poll cacheset for replacement */ extern pollcacheset_t *pcacheset_create(int); extern void pcacheset_destroy(pollcacheset_t *, int); extern int pcacheset_cache_list(pollstate_t *, pollfd_t *, int *, int); extern void pcacheset_remove_list(pollstate_t *, pollfd_t *, int, int, int, int); extern int pcacheset_resolve(pollstate_t *, nfds_t, int *, int); extern int pcacheset_cmp(pollfd_t *, pollfd_t *, pollfd_t *, int); extern void pcacheset_invalidate(pollstate_t *, polldat_t *); extern void pcacheset_reset_count(pollstate_t *, int); extern int pcacheset_replace(pollstate_t *); #endif /* defined(_KERNEL) */ #ifdef __cplusplus } #endif #endif /* defined(_KERNEL) || defined(_KMEMUSER) */ #endif /* _SYS_POLL_IMPL_H */