xref: /titanic_53/usr/src/uts/common/sys/poll_impl.h (revision a5eb7107f06a6e23e8e77e8d3a84c1ff90a73ac6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Copyright (c) 2014, Joyent, Inc. All rights reserved.
29  */
30 
31 #ifndef _SYS_POLL_IMPL_H
32 #define	_SYS_POLL_IMPL_H
33 
34 /*
35  * Caching Poll Subsystem:
36  *
37  * Each kernel thread (1), if engaged in poll system call, has a reference to
38  * a pollstate_t (2), which contains relevant flags and locks.  The pollstate_t
39  * contains a pointer to a pcache_t (3), which caches the state of previous
40  * calls to poll.  A bitmap (4) is stored inside the poll cache, where each
41  * bit represents a file descriptor.  The bits are set if the corresponding
42  * device has a polled event pending.  Only fds with their bit set will be
43  * examined on the next poll invocation.  The pollstate_t also contains a list
44  * of fd sets (5), which are represented by the pollcacheset_t type.  These
45  * structures keep track of the pollfd_t arrays (6) passed in from userland.
46  * Each polled file descriptor has a corresponding polldat_t which can be
47  * chained onto a device's pollhead, and these are kept in a hash table (7)
48  * inside the pcache_t.  The hash table allows efficient conversion of a
49  * given fd to its corresponding polldat_t.
50  *
51  * (1)              (2)
52  * +-----------+    +-------------+
53  * | kthread_t |--->| pollstate_t |-->+-------------+  (6)
54  * +-----------+    +-------------+(5)| pcacheset_t |->[_][_][_][_] pollfd_t
55  *                          |         +-------------+
56  *                          |         | pcacheset_t |->[_][_][_][_] pollfd_t
57  * (1a)                     |         +-------------+
58  * +---------------+	    |
59  * | /dev/poll tbl |	    |
60  * +-v-------------+	    |
61  *   |			    |
62  *   +------------------+   |
63  * (7)              (3) V   v
64  * polldat hash     +-------------+    (4) bitmap representing fd space
65  * [_][_][_][_]<----|             |--->000010010010001010101010101010110
66  *  |  |  |  |      | pollcache_t |
67  *  .  v  .  .      |             |
68  *    [polldat_t]   +-------------+
69  *     |
70  *    [polldat_t]
71  *     |
72  *     v
73  *     NULL
74  *
75  *
76  * Both poll system call and /dev/poll use the pollcache_t structure
77  * definition and the routines managing the structure. But poll(2) and
78  * /dev/poll have their own copy of the structures. The /dev/poll driver
79  * table (1a) contains an array of pointers, each pointing at a pcache_t
80  * struct (3). A device minor number is used as an device table index.
81  *
82  */
83 #include <sys/poll.h>
84 
85 #if defined(_KERNEL) || defined(_KMEMUSER)
86 
87 #include <sys/thread.h>
88 #include <sys/file.h>
89 
90 #ifdef	__cplusplus
91 extern "C" {
92 #endif
93 
94 /*
95  * description of pollcacheset structure
96  */
97 typedef struct pollcacheset {
98 	uintptr_t	pcs_usradr;	/* usr pollfd array address */
99 	pollfd_t	*pcs_pollfd;	/* cached poll lists */
100 	size_t		pcs_nfds;	/* number of poll fd in cached list */
101 	ulong_t		pcs_count;	/* for LU replacement policy */
102 } pollcacheset_t;
103 
104 #define	POLLFDSETS	2
105 
106 /*
107  * State information kept by each polling thread
108  */
109 typedef struct pollstate {
110 	pollfd_t	*ps_pollfd;	/* hold the current poll list */
111 	size_t		ps_nfds;	/* size of ps_pollfd */
112 	kmutex_t	ps_lock;	/* mutex for sleep/wakeup */
113 	struct pollcache *ps_pcache;	/* cached poll fd set */
114 	pollcacheset_t	*ps_pcacheset;	/* cached poll lists */
115 	int		ps_nsets;	/* no. of cached poll sets */
116 	pollfd_t	*ps_dpbuf;	/* return pollfd buf used by devpoll */
117 	size_t		ps_dpbufsize;	/* size of ps_dpbuf */
118 } pollstate_t;
119 
120 /*
121  * poll cache size defines
122  */
123 #define	POLLCHUNKSHIFT		8	/* hash table increment size is 256 */
124 #define	POLLHASHCHUNKSZ		(1 << POLLCHUNKSHIFT)
125 #define	POLLHASHINC		2	/* poll hash table growth factor */
126 #define	POLLHASHTHRESHOLD	2	/* poll hash list length threshold */
127 #define	POLLHASH(x, y)	((y) % (x))	/* poll hash function */
128 
129 /*
130  * poll.c assumes the POLLMAPCHUNK is power of 2
131  */
132 #define	POLLMAPCHUNK	2048	/* bitmap inc -- each for 2K of polled fd's */
133 
134 /*
135  * used to refrence from watched fd back to the fd position in cached
136  * poll list for quick revents update.
137  */
138 typedef struct xref {
139 	ssize_t	xf_position;    /* xref fd position in poll fd list */
140 	short	xf_refcnt;	/* ref cnt of same fd in poll list */
141 } xref_t;
142 
143 #define	POLLPOSINVAL	(-1L)	/* xf_position is invalid */
144 #define	POLLPOSTRANS	(-2L)	/* xf_position is transient state */
145 
146 /*
147  * polldat is an entry for a cached poll fd. A polldat struct can be in
148  * poll cache table as well as on pollhead ph_list, which is used by
149  * pollwakeup to wake up a sleeping poller. There should be one polldat
150  * per polled fd hanging off pollstate struct.
151  */
152 typedef struct polldat {
153 	int		pd_fd;		/* cached poll fd */
154 	int		pd_events;	/* union of all polled events */
155 	file_t		*pd_fp;		/* used to detect fd reuse */
156 	pollhead_t	*pd_php;	/* used to undo poll registration */
157 	kthread_t	*pd_thread;	/* used for waking up a sleep thrd */
158 	struct pollcache *pd_pcache;	/* a ptr to the pollcache of this fd */
159 	struct polldat	*pd_next;	/* next on pollhead's ph_list */
160 	struct polldat	*pd_hashnext;	/* next on pollhead's ph_list */
161 	int		pd_count;	/* total count from all ref'ed sets */
162 	int		pd_nsets;	/* num of xref sets, used by poll(2) */
163 	xref_t		*pd_ref;	/* ptr to xref info, 1 for each set */
164 	struct port_kevent *pd_portev;	/* associated port event struct */
165 	uint64_t	pd_epolldata;	/* epoll data, if any */
166 } polldat_t;
167 
168 /*
169  * One cache for each thread that polls. Points to a bitmap (used by pollwakeup)
170  * and a hash table of polldats.
171  * The offset of pc_lock field must be kept in sync with the pc_lock offset
172  * of port_fdcache_t, both structs implement pc_lock with offset 0 (see also
173  * pollrelock()).
174  */
175 typedef struct pollcache {
176 	kmutex_t	pc_lock;	/* lock to protect pollcache */
177 	ulong_t		*pc_bitmap;	/* point to poll fd bitmap */
178 	polldat_t	**pc_hash;	/* points to a hash table of ptrs */
179 	int		pc_mapend;	/* the largest fd encountered so far */
180 	int		pc_mapsize;	/* the size of current map */
181 	int		pc_hashsize;	/* the size of current hash table */
182 	int		pc_fdcount;	/* track how many fd's are hashed */
183 	int		pc_flag;	/* see pc_flag define below */
184 	int		pc_busy;	/* can only exit when its 0 */
185 	kmutex_t	pc_no_exit;	/* protects pc_busy*, can't be nested */
186 	kcondvar_t	pc_busy_cv;	/* cv to wait on if ps_busy != 0 */
187 	kcondvar_t	pc_cv;		/* cv to wait on if needed */
188 	pid_t		pc_pid;		/* for check acc rights, devpoll only */
189 	int		pc_mapstart;	/* where search start, devpoll only */
190 } pollcache_t;
191 
192 /* pc_flag */
193 #define	PC_POLLWAKE	0x02	/* pollwakeup() occurred */
194 #define	PC_WRITEWANTED	0x04	/* writer wishes to modify the pollcache_t */
195 
196 #if defined(_KERNEL)
197 /*
198  * Internal routines.
199  */
200 extern void pollnotify(pollcache_t *, int);
201 
202 /*
203  * public poll head interfaces (see poll.h):
204  *
205  *  pollhead_clean      clean up all polldats on a pollhead list
206  */
207 extern void pollhead_clean(pollhead_t *);
208 
209 /*
210  * private poll head interfaces:
211  *
212  *  pollhead_insert     adds a polldat to a pollhead list
213  *  pollhead_delete     removes a polldat from a pollhead list
214  */
215 extern void pollhead_insert(pollhead_t *, polldat_t *);
216 extern void pollhead_delete(pollhead_t *, polldat_t *);
217 
218 /*
219  * poll state interfaces:
220  *
221  *  pollstate_create    creates per-thread pollstate
222  *  pollstate_destroy   cleans up per-thread pollstate
223  */
224 extern pollstate_t *pollstate_create(void);
225 extern void pollstate_destroy(pollstate_t *);
226 
227 /*
228  * public pcache interfaces:
229  *
230  *  pcache_alloc	allocate a poll cache skeleton
231  *  pcache_create       creates all poll cache supporting data struct
232  *  pcache_insert	cache a poll fd, calls pcache_insert_fd
233  *  pcache_lookup       given an fd list, returns a cookie
234  *  pcache_poll         polls the cache for fd's having events on them
235  *  pcache_clean        clean up all the pollhead and fpollinfo reference
236  *  pcache_destroy      destroys the pcache
237  */
238 extern pollcache_t *pcache_alloc();
239 extern void pcache_create(pollcache_t *, nfds_t);
240 extern int pcache_insert(pollstate_t *, file_t *, pollfd_t *, int *, ssize_t,
241     int);
242 extern int pcache_poll(pollfd_t *, pollstate_t *, nfds_t, int *, int);
243 extern void pcache_clean(pollcache_t *);
244 extern void pcache_destroy(pollcache_t *);
245 
246 /*
247  * private pcache interfaces:
248  *
249  *  pcache_lookup_fd	lookup an fd, returns a polldat
250  *  pcache_alloc_fd	allocates and returns a polldat
251  *  pcache_insert_fd	insert an fd into pcache (called by pcache_insert)
252  *  pcache_delete_fd	insert an fd into pcache (called by pcacheset_delete_fd)
253  *  pcache_grow_hashtbl	grows the pollcache hash table and rehash
254  *  pcache_grow_map	grows the pollcache bitmap
255  *  pcache_update_xref	update cross ref (from polldat back to cacheset) info
256  *  pcache_clean_entry	cleanup an entry in pcache and more...
257  */
258 extern polldat_t *pcache_lookup_fd(pollcache_t *, int);
259 extern polldat_t *pcache_alloc_fd(int);
260 extern void pcache_insert_fd(pollcache_t *, polldat_t *, nfds_t);
261 extern int pcache_delete_fd(pollstate_t *, int, size_t, int, uint_t);
262 extern void pcache_grow_hashtbl(pollcache_t *, nfds_t);
263 extern void pcache_grow_map(pollcache_t *, int);
264 extern void pcache_update_xref(pollcache_t *, int, ssize_t, int);
265 extern void pcache_clean_entry(pollstate_t *, int);
266 
267 /*
268  * pcacheset interfaces:
269  *
270  * pcacheset_create     creates new pcachesets (easier for dynamic pcachesets)
271  * pcacheset_destroy    destroys a pcacheset
272  * pcacheset_cache_list caches and polls a new poll list
273  * pcacheset_remove_list removes (usually a partial) cached poll list
274  * pcacheset_resolve    resolves extant pcacheset and fd list
275  * pcacheset_cmp        compares a pcacheset with an fd list
276  * pcacheset_invalidate invalidate entries in pcachesets
277  * pcacheset_reset_count resets the usage counter of pcachesets
278  * pcacheset_replace	selects a poll cacheset for replacement
279  */
280 extern pollcacheset_t *pcacheset_create(int);
281 extern void pcacheset_destroy(pollcacheset_t *, int);
282 extern int pcacheset_cache_list(pollstate_t *, pollfd_t *, int *, int);
283 extern void pcacheset_remove_list(pollstate_t *, pollfd_t *, int, int, int,
284     int);
285 extern int pcacheset_resolve(pollstate_t *, nfds_t, int *, int);
286 extern int pcacheset_cmp(pollfd_t *, pollfd_t *, pollfd_t *, int);
287 extern void pcacheset_invalidate(pollstate_t *, polldat_t *);
288 extern void pcacheset_reset_count(pollstate_t *, int);
289 extern int pcacheset_replace(pollstate_t *);
290 
291 #endif /* defined(_KERNEL) */
292 
293 #ifdef	__cplusplus
294 }
295 #endif
296 
297 #endif /* defined(_KERNEL) || defined(_KMEMUSER) */
298 
299 #endif	/* _SYS_POLL_IMPL_H */
300