xref: /freebsd/usr.sbin/bhyve/mevent.c (revision b08fc26cbdd00df6852e71e1be58fa9cc92019f0)
1 /*-
2  * Copyright (c) 2011 NetApp, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 /*
30  * Micro event library for FreeBSD, designed for a single i/o thread
31  * using kqueue, and having events be persistent by default.
32  */
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 #include <assert.h>
38 #include <err.h>
39 #include <errno.h>
40 #include <stdlib.h>
41 #include <stdio.h>
42 #include <string.h>
43 #include <sysexits.h>
44 #include <unistd.h>
45 
46 #include <sys/types.h>
47 #ifndef WITHOUT_CAPSICUM
48 #include <sys/capsicum.h>
49 #endif
50 #include <sys/event.h>
51 #include <sys/time.h>
52 
53 #include <pthread.h>
54 #include <pthread_np.h>
55 
56 #include "mevent.h"
57 
58 #define	MEVENT_MAX	64
59 
60 #define	MEV_ADD		1
61 #define	MEV_ENABLE	2
62 #define	MEV_DISABLE	3
63 #define	MEV_DEL_PENDING	4
64 
65 extern char *vmname;
66 
67 static pthread_t mevent_tid;
68 static int mevent_timid = 43;
69 static int mevent_pipefd[2];
70 static pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER;
71 
72 struct mevent {
73 	void	(*me_func)(int, enum ev_type, void *);
74 #define me_msecs me_fd
75 	int	me_fd;
76 	int	me_timid;
77 	enum ev_type me_type;
78 	void    *me_param;
79 	int	me_cq;
80 	int	me_state;
81 	int	me_closefd;
82 	LIST_ENTRY(mevent) me_list;
83 };
84 
85 static LIST_HEAD(listhead, mevent) global_head, change_head;
86 
87 static void
88 mevent_qlock(void)
89 {
90 	pthread_mutex_lock(&mevent_lmutex);
91 }
92 
93 static void
94 mevent_qunlock(void)
95 {
96 	pthread_mutex_unlock(&mevent_lmutex);
97 }
98 
99 static void
100 mevent_pipe_read(int fd, enum ev_type type, void *param)
101 {
102 	char buf[MEVENT_MAX];
103 	int status;
104 
105 	/*
106 	 * Drain the pipe read side. The fd is non-blocking so this is
107 	 * safe to do.
108 	 */
109 	do {
110 		status = read(fd, buf, sizeof(buf));
111 	} while (status == MEVENT_MAX);
112 }
113 
114 static void
115 mevent_notify(void)
116 {
117 	char c;
118 
119 	/*
120 	 * If calling from outside the i/o thread, write a byte on the
121 	 * pipe to force the i/o thread to exit the blocking kevent call.
122 	 */
123 	if (mevent_pipefd[1] != 0 && pthread_self() != mevent_tid) {
124 		write(mevent_pipefd[1], &c, 1);
125 	}
126 }
127 
128 static int
129 mevent_kq_filter(struct mevent *mevp)
130 {
131 	int retval;
132 
133 	retval = 0;
134 
135 	if (mevp->me_type == EVF_READ)
136 		retval = EVFILT_READ;
137 
138 	if (mevp->me_type == EVF_WRITE)
139 		retval = EVFILT_WRITE;
140 
141 	if (mevp->me_type == EVF_TIMER)
142 		retval = EVFILT_TIMER;
143 
144 	if (mevp->me_type == EVF_SIGNAL)
145 		retval = EVFILT_SIGNAL;
146 
147 	return (retval);
148 }
149 
150 static int
151 mevent_kq_flags(struct mevent *mevp)
152 {
153 	int ret;
154 
155 	switch (mevp->me_state) {
156 	case MEV_ADD:
157 		ret = EV_ADD;		/* implicitly enabled */
158 		break;
159 	case MEV_ENABLE:
160 		ret = EV_ENABLE;
161 		break;
162 	case MEV_DISABLE:
163 		ret = EV_DISABLE;
164 		break;
165 	case MEV_DEL_PENDING:
166 		ret = EV_DELETE;
167 		break;
168 	default:
169 		assert(0);
170 		break;
171 	}
172 
173 	return (ret);
174 }
175 
176 static int
177 mevent_kq_fflags(struct mevent *mevp)
178 {
179 	/* XXX nothing yet, perhaps EV_EOF for reads ? */
180 	return (0);
181 }
182 
183 static int
184 mevent_build(int mfd, struct kevent *kev)
185 {
186 	struct mevent *mevp, *tmpp;
187 	int i;
188 
189 	i = 0;
190 
191 	mevent_qlock();
192 
193 	LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) {
194 		if (mevp->me_closefd) {
195 			/*
196 			 * A close of the file descriptor will remove the
197 			 * event
198 			 */
199 			close(mevp->me_fd);
200 		} else {
201 			if (mevp->me_type == EVF_TIMER) {
202 				kev[i].ident = mevp->me_timid;
203 				kev[i].data = mevp->me_msecs;
204 			} else {
205 				kev[i].ident = mevp->me_fd;
206 				kev[i].data = 0;
207 			}
208 			kev[i].filter = mevent_kq_filter(mevp);
209 			kev[i].flags = mevent_kq_flags(mevp);
210 			kev[i].fflags = mevent_kq_fflags(mevp);
211 			kev[i].udata = mevp;
212 			i++;
213 		}
214 
215 		mevp->me_cq = 0;
216 		LIST_REMOVE(mevp, me_list);
217 
218 		if (mevp->me_state == MEV_DEL_PENDING) {
219 			free(mevp);
220 		} else {
221 			LIST_INSERT_HEAD(&global_head, mevp, me_list);
222 		}
223 
224 		assert(i < MEVENT_MAX);
225 	}
226 
227 	mevent_qunlock();
228 
229 	return (i);
230 }
231 
232 static void
233 mevent_handle(struct kevent *kev, int numev)
234 {
235 	struct mevent *mevp;
236 	int i;
237 
238 	for (i = 0; i < numev; i++) {
239 		mevp = kev[i].udata;
240 
241 		/* XXX check for EV_ERROR ? */
242 
243 		(*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param);
244 	}
245 }
246 
247 struct mevent *
248 mevent_add(int tfd, enum ev_type type,
249 	   void (*func)(int, enum ev_type, void *), void *param)
250 {
251 	struct mevent *lp, *mevp;
252 
253 	if (tfd < 0 || func == NULL) {
254 		return (NULL);
255 	}
256 
257 	mevp = NULL;
258 
259 	mevent_qlock();
260 
261 	/*
262 	 * Verify that the fd/type tuple is not present in any list
263 	 */
264 	LIST_FOREACH(lp, &global_head, me_list) {
265 		if (type != EVF_TIMER && lp->me_fd == tfd &&
266 		    lp->me_type == type) {
267 			goto exit;
268 		}
269 	}
270 
271 	LIST_FOREACH(lp, &change_head, me_list) {
272 		if (type != EVF_TIMER && lp->me_fd == tfd &&
273 		    lp->me_type == type) {
274 			goto exit;
275 		}
276 	}
277 
278 	/*
279 	 * Allocate an entry, populate it, and add it to the change list.
280 	 */
281 	mevp = calloc(1, sizeof(struct mevent));
282 	if (mevp == NULL) {
283 		goto exit;
284 	}
285 
286 	if (type == EVF_TIMER) {
287 		mevp->me_msecs = tfd;
288 		mevp->me_timid = mevent_timid++;
289 	} else
290 		mevp->me_fd = tfd;
291 	mevp->me_type = type;
292 	mevp->me_func = func;
293 	mevp->me_param = param;
294 
295 	LIST_INSERT_HEAD(&change_head, mevp, me_list);
296 	mevp->me_cq = 1;
297 	mevp->me_state = MEV_ADD;
298 	mevent_notify();
299 
300 exit:
301 	mevent_qunlock();
302 
303 	return (mevp);
304 }
305 
306 static int
307 mevent_update(struct mevent *evp, int newstate)
308 {
309 	/*
310 	 * It's not possible to enable/disable a deleted event
311 	 */
312 	if (evp->me_state == MEV_DEL_PENDING)
313 		return (EINVAL);
314 
315 	/*
316 	 * No update needed if state isn't changing
317 	 */
318 	if (evp->me_state == newstate)
319 		return (0);
320 
321 	mevent_qlock();
322 
323 	evp->me_state = newstate;
324 
325 	/*
326 	 * Place the entry onto the changed list if not already there.
327 	 */
328 	if (evp->me_cq == 0) {
329 		evp->me_cq = 1;
330 		LIST_REMOVE(evp, me_list);
331 		LIST_INSERT_HEAD(&change_head, evp, me_list);
332 		mevent_notify();
333 	}
334 
335 	mevent_qunlock();
336 
337 	return (0);
338 }
339 
340 int
341 mevent_enable(struct mevent *evp)
342 {
343 
344 	return (mevent_update(evp, MEV_ENABLE));
345 }
346 
347 int
348 mevent_disable(struct mevent *evp)
349 {
350 
351 	return (mevent_update(evp, MEV_DISABLE));
352 }
353 
354 static int
355 mevent_delete_event(struct mevent *evp, int closefd)
356 {
357 	mevent_qlock();
358 
359 	/*
360          * Place the entry onto the changed list if not already there, and
361 	 * mark as to be deleted.
362          */
363         if (evp->me_cq == 0) {
364 		evp->me_cq = 1;
365 		LIST_REMOVE(evp, me_list);
366 		LIST_INSERT_HEAD(&change_head, evp, me_list);
367 		mevent_notify();
368         }
369 	evp->me_state = MEV_DEL_PENDING;
370 
371 	if (closefd)
372 		evp->me_closefd = 1;
373 
374 	mevent_qunlock();
375 
376 	return (0);
377 }
378 
379 int
380 mevent_delete(struct mevent *evp)
381 {
382 
383 	return (mevent_delete_event(evp, 0));
384 }
385 
386 int
387 mevent_delete_close(struct mevent *evp)
388 {
389 
390 	return (mevent_delete_event(evp, 1));
391 }
392 
393 static void
394 mevent_set_name(void)
395 {
396 
397 	pthread_set_name_np(mevent_tid, "mevent");
398 }
399 
400 void
401 mevent_dispatch(void)
402 {
403 	struct kevent changelist[MEVENT_MAX];
404 	struct kevent eventlist[MEVENT_MAX];
405 	struct mevent *pipev;
406 	int mfd;
407 	int numev;
408 	int ret;
409 #ifndef WITHOUT_CAPSICUM
410 	cap_rights_t rights;
411 #endif
412 
413 	mevent_tid = pthread_self();
414 	mevent_set_name();
415 
416 	mfd = kqueue();
417 	assert(mfd > 0);
418 
419 #ifndef WITHOUT_CAPSICUM
420 	cap_rights_init(&rights, CAP_KQUEUE);
421 	if (cap_rights_limit(mfd, &rights) == -1 && errno != ENOSYS)
422 		errx(EX_OSERR, "Unable to apply rights for sandbox");
423 #endif
424 
425 	/*
426 	 * Open the pipe that will be used for other threads to force
427 	 * the blocking kqueue call to exit by writing to it. Set the
428 	 * descriptor to non-blocking.
429 	 */
430 	ret = pipe(mevent_pipefd);
431 	if (ret < 0) {
432 		perror("pipe");
433 		exit(0);
434 	}
435 
436 #ifndef WITHOUT_CAPSICUM
437 	cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
438 	if (cap_rights_limit(mevent_pipefd[0], &rights) == -1 && errno != ENOSYS)
439 		errx(EX_OSERR, "Unable to apply rights for sandbox");
440 	if (cap_rights_limit(mevent_pipefd[1], &rights) == -1 && errno != ENOSYS)
441 		errx(EX_OSERR, "Unable to apply rights for sandbox");
442 #endif
443 
444 	/*
445 	 * Add internal event handler for the pipe write fd
446 	 */
447 	pipev = mevent_add(mevent_pipefd[0], EVF_READ, mevent_pipe_read, NULL);
448 	assert(pipev != NULL);
449 
450 	for (;;) {
451 		/*
452 		 * Build changelist if required.
453 		 * XXX the changelist can be put into the blocking call
454 		 * to eliminate the extra syscall. Currently better for
455 		 * debug.
456 		 */
457 		numev = mevent_build(mfd, changelist);
458 		if (numev) {
459 			ret = kevent(mfd, changelist, numev, NULL, 0, NULL);
460 			if (ret == -1) {
461 				perror("Error return from kevent change");
462 			}
463 		}
464 
465 		/*
466 		 * Block awaiting events
467 		 */
468 		ret = kevent(mfd, NULL, 0, eventlist, MEVENT_MAX, NULL);
469 		if (ret == -1 && errno != EINTR) {
470 			perror("Error return from kevent monitor");
471 		}
472 
473 		/*
474 		 * Handle reported events
475 		 */
476 		mevent_handle(eventlist, ret);
477 	}
478 }
479