xref: /illumos-gate/usr/src/cmd/bhyve/common/mevent.c (revision 5c4a5fe16715fb423db76577a6883b5bbecdbe45)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * Copyright 2018 Joyent, Inc.
31  * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
32  */
33 
34 /*
35  * Micro event library for FreeBSD, designed for a single i/o thread
36  * using kqueue, and having events be persistent by default.
37  */
38 
39 
40 #include <assert.h>
41 #ifndef WITHOUT_CAPSICUM
42 #include <capsicum_helpers.h>
43 #endif
44 #include <err.h>
45 #include <errno.h>
46 #include <stdbool.h>
47 #include <stdlib.h>
48 #include <stdio.h>
49 #include <string.h>
50 #include <sysexits.h>
51 #include <unistd.h>
52 
53 #include <sys/types.h>
54 #ifndef WITHOUT_CAPSICUM
55 #include <sys/capsicum.h>
56 #endif
57 #ifdef __FreeBSD__
58 #include <sys/event.h>
59 #else
60 #include <port.h>
61 #include <sys/poll.h>
62 #include <sys/siginfo.h>
63 #include <sys/queue.h>
64 #include <sys/debug.h>
65 #include <sys/stat.h>
66 #endif
67 #include <sys/time.h>
68 
69 #include <pthread.h>
70 #include <pthread_np.h>
71 
72 #include "mevent.h"
73 
74 #define	MEVENT_MAX	64
75 
76 #ifndef __FreeBSD__
77 #define	EV_ENABLE	0x01
78 #define	EV_ADD		EV_ENABLE
79 #define	EV_DISABLE	0x02
80 #define	EV_DELETE	0x04
81 
82 static int mevent_file_poll_interval_ms = 5000;
83 #endif
84 
85 static pthread_t mevent_tid;
86 static pthread_once_t mevent_once = PTHREAD_ONCE_INIT;
87 #ifdef __FreeBSD__
88 static int mevent_timid = 43;
89 #endif
90 static int mevent_pipefd[2];
91 static int mfd;
92 static pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER;
93 
94 struct mevent {
95 	void	(*me_func)(int, enum ev_type, void *);
96 #define me_msecs me_fd
97 	int	me_fd;
98 #ifdef __FreeBSD__
99 	int	me_timid;
100 #else
101 	timer_t me_timid;
102 #endif
103 	enum ev_type me_type;
104 	void    *me_param;
105 	int	me_cq;
106 	int	me_state; /* Desired kevent flags. */
107 	int	me_closefd;
108 	int	me_fflags;
109 #ifndef __FreeBSD__
110 	port_notify_t	me_notify;
111 	struct sigevent	me_sigev;
112 	boolean_t	me_auto_requeue;
113 	struct {
114 		int	mp_fd;
115 		off_t	mp_size;
116 		void	(*mp_func)(int, enum ev_type, void *);
117 		void    *mp_param;
118 	} me_poll;
119 #endif
120 	LIST_ENTRY(mevent) me_list;
121 };
122 
LIST_HEAD(listhead,mevent)123 static LIST_HEAD(listhead, mevent) global_head, change_head;
124 
125 static void
126 mevent_qlock(void)
127 {
128 	pthread_mutex_lock(&mevent_lmutex);
129 }
130 
131 static void
mevent_qunlock(void)132 mevent_qunlock(void)
133 {
134 	pthread_mutex_unlock(&mevent_lmutex);
135 }
136 
137 static void
mevent_pipe_read(int fd,enum ev_type type __unused,void * param __unused)138 mevent_pipe_read(int fd, enum ev_type type __unused, void *param __unused)
139 {
140 	char buf[MEVENT_MAX];
141 	int status;
142 
143 	/*
144 	 * Drain the pipe read side. The fd is non-blocking so this is
145 	 * safe to do.
146 	 */
147 	do {
148 		status = read(fd, buf, sizeof(buf));
149 	} while (status == MEVENT_MAX);
150 }
151 
152 static void
mevent_notify(void)153 mevent_notify(void)
154 {
155 	char c = '\0';
156 
157 	/*
158 	 * If calling from outside the i/o thread, write a byte on the
159 	 * pipe to force the i/o thread to exit the blocking kevent call.
160 	 */
161 	if (mevent_pipefd[1] != 0 && pthread_self() != mevent_tid) {
162 		write(mevent_pipefd[1], &c, 1);
163 	}
164 }
165 
166 static void
mevent_init(void)167 mevent_init(void)
168 {
169 #ifndef WITHOUT_CAPSICUM
170 	cap_rights_t rights;
171 #endif
172 
173 #ifdef __FreeBSD__
174 	mfd = kqueue();
175 #else
176 	mfd = port_create();
177 #endif
178 	assert(mfd > 0);
179 
180 #ifndef WITHOUT_CAPSICUM
181 	cap_rights_init(&rights, CAP_KQUEUE);
182 	if (caph_rights_limit(mfd, &rights) == -1)
183 		errx(EX_OSERR, "Unable to apply rights for sandbox");
184 #endif
185 
186 	LIST_INIT(&change_head);
187 	LIST_INIT(&global_head);
188 }
189 
190 
191 #ifdef __FreeBSD__
192 static int
mevent_kq_filter(struct mevent * mevp)193 mevent_kq_filter(struct mevent *mevp)
194 {
195 	int retval;
196 
197 	retval = 0;
198 
199 	if (mevp->me_type == EVF_READ)
200 		retval = EVFILT_READ;
201 
202 	if (mevp->me_type == EVF_WRITE)
203 		retval = EVFILT_WRITE;
204 
205 	if (mevp->me_type == EVF_TIMER)
206 		retval = EVFILT_TIMER;
207 
208 	if (mevp->me_type == EVF_SIGNAL)
209 		retval = EVFILT_SIGNAL;
210 
211 	if (mevp->me_type == EVF_VNODE)
212 		retval = EVFILT_VNODE;
213 
214 	return (retval);
215 }
216 
217 static int
mevent_kq_flags(struct mevent * mevp)218 mevent_kq_flags(struct mevent *mevp)
219 {
220 	int retval;
221 
222 	retval = mevp->me_state;
223 
224 	if (mevp->me_type == EVF_VNODE)
225 		retval |= EV_CLEAR;
226 
227 	return (retval);
228 }
229 
230 static int
mevent_kq_fflags(struct mevent * mevp)231 mevent_kq_fflags(struct mevent *mevp)
232 {
233 	int retval;
234 
235 	retval = 0;
236 
237 	switch (mevp->me_type) {
238 	case EVF_VNODE:
239 		if ((mevp->me_fflags & EVFF_ATTRIB) != 0)
240 			retval |= NOTE_ATTRIB;
241 		break;
242 	case EVF_READ:
243 	case EVF_WRITE:
244 	case EVF_TIMER:
245 	case EVF_SIGNAL:
246 		break;
247 	}
248 
249 	return (retval);
250 }
251 
252 static void
mevent_populate(struct mevent * mevp,struct kevent * kev)253 mevent_populate(struct mevent *mevp, struct kevent *kev)
254 {
255 	if (mevp->me_type == EVF_TIMER) {
256 		kev->ident = mevp->me_timid;
257 		kev->data = mevp->me_msecs;
258 	} else {
259 		kev->ident = mevp->me_fd;
260 		kev->data = 0;
261 	}
262 	kev->filter = mevent_kq_filter(mevp);
263 	kev->flags = mevent_kq_flags(mevp);
264 	kev->fflags = mevent_kq_fflags(mevp);
265 	kev->udata = mevp;
266 }
267 
268 static int
mevent_build(struct kevent * kev)269 mevent_build(struct kevent *kev)
270 {
271 	struct mevent *mevp, *tmpp;
272 	int i;
273 
274 	i = 0;
275 
276 	mevent_qlock();
277 
278 	LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) {
279 		if (mevp->me_closefd) {
280 			/*
281 			 * A close of the file descriptor will remove the
282 			 * event
283 			 */
284 			close(mevp->me_fd);
285 		} else {
286 			assert((mevp->me_state & EV_ADD) == 0);
287 			mevent_populate(mevp, &kev[i]);
288 			i++;
289 		}
290 
291 		mevp->me_cq = 0;
292 		LIST_REMOVE(mevp, me_list);
293 
294 		if (mevp->me_state & EV_DELETE) {
295 			free(mevp);
296 		} else {
297 			LIST_INSERT_HEAD(&global_head, mevp, me_list);
298 		}
299 
300 		assert(i < MEVENT_MAX);
301 	}
302 
303 	mevent_qunlock();
304 
305 	return (i);
306 }
307 
308 static void
mevent_handle(struct kevent * kev,int numev)309 mevent_handle(struct kevent *kev, int numev)
310 {
311 	struct mevent *mevp;
312 	int i;
313 
314 	for (i = 0; i < numev; i++) {
315 		mevp = kev[i].udata;
316 
317 		/* XXX check for EV_ERROR ? */
318 
319 		(*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param);
320 	}
321 }
322 
323 #else /* __FreeBSD__ */
324 
325 static boolean_t
mevent_clarify_state(struct mevent * mevp)326 mevent_clarify_state(struct mevent *mevp)
327 {
328 	const int state = mevp->me_state;
329 
330 	if ((state & EV_DELETE) != 0) {
331 		/* All other intents are overriden by delete. */
332 		mevp->me_state = EV_DELETE;
333 		return (B_TRUE);
334 	}
335 
336 	/*
337 	 * Without a distinction between EV_ADD and EV_ENABLE in our emulation,
338 	 * handling the add-disabled case means eliding the portfs operation
339 	 * when both flags are present.
340 	 *
341 	 * This is not a concern for subsequent enable/disable operations, as
342 	 * mevent_update() toggles the flags properly so they are not left in
343 	 * conflict.
344 	 */
345 	if (state == (EV_ENABLE|EV_DISABLE)) {
346 		mevp->me_state = EV_DISABLE;
347 		return (B_FALSE);
348 	}
349 
350 	return (B_TRUE);
351 }
352 
353 static void
mevent_poll_file_attrib(int fd,enum ev_type type,void * param)354 mevent_poll_file_attrib(int fd, enum ev_type type, void *param)
355 {
356 	struct mevent *mevp = param;
357 	struct stat st;
358 
359 	if (fstat(mevp->me_poll.mp_fd, &st) != 0) {
360 		(void) fprintf(stderr, "%s: fstat(%d) failed: %s\n",
361 		    __func__, fd, strerror(errno));
362 		return;
363 	}
364 
365 	/*
366 	 * The only current consumer of file attribute monitoring is
367 	 * blockif, which wants to know about size changes.
368 	 */
369 	if (mevp->me_poll.mp_size != st.st_size) {
370 		mevp->me_poll.mp_size = st.st_size;
371 
372 		(*mevp->me_poll.mp_func)(mevp->me_poll.mp_fd, EVF_VNODE,
373 		    mevp->me_poll.mp_param);
374 	}
375 }
376 
377 static void
mevent_update_one_readwrite(struct mevent * mevp)378 mevent_update_one_readwrite(struct mevent *mevp)
379 {
380 	int portfd = mevp->me_notify.portnfy_port;
381 
382 	mevp->me_auto_requeue = B_FALSE;
383 
384 	switch (mevp->me_state) {
385 	case EV_ENABLE:
386 	{
387 		const int events = (mevp->me_type == EVF_READ) ?
388 		    POLLIN : POLLOUT;
389 
390 		if (port_associate(portfd, PORT_SOURCE_FD, mevp->me_fd,
391 		    events, mevp) != 0) {
392 			(void) fprintf(stderr,
393 			    "port_associate fd %d %p failed: %s\n",
394 			    mevp->me_fd, mevp, strerror(errno));
395 		}
396 		return;
397 	}
398 	case EV_DISABLE:
399 	case EV_DELETE:
400 		/*
401 		 * A disable that comes in while an event is being
402 		 * handled will result in an ENOENT.
403 		 */
404 		if (port_dissociate(portfd, PORT_SOURCE_FD,
405 		    mevp->me_fd) != 0 && errno != ENOENT) {
406 			(void) fprintf(stderr, "port_dissociate "
407 			    "portfd %d fd %d mevp %p failed: %s\n",
408 			    portfd, mevp->me_fd, mevp, strerror(errno));
409 		}
410 		return;
411 	default:
412 		(void) fprintf(stderr, "%s: unhandled state %d\n", __func__,
413 		    mevp->me_state);
414 		abort();
415 	}
416 }
417 
418 static void
mevent_update_one_timer(struct mevent * mevp)419 mevent_update_one_timer(struct mevent *mevp)
420 {
421 	mevp->me_auto_requeue = B_TRUE;
422 
423 	switch (mevp->me_state) {
424 	case EV_ENABLE:
425 	{
426 		struct itimerspec it = { 0 };
427 
428 		mevp->me_sigev.sigev_notify = SIGEV_PORT;
429 		mevp->me_sigev.sigev_value.sival_ptr = &mevp->me_notify;
430 
431 		if (timer_create(CLOCK_REALTIME, &mevp->me_sigev,
432 		    &mevp->me_timid) != 0) {
433 			(void) fprintf(stderr, "timer_create failed: %s",
434 			    strerror(errno));
435 			return;
436 		}
437 
438 		/* The first timeout */
439 		it.it_value.tv_sec = mevp->me_msecs / MILLISEC;
440 		it.it_value.tv_nsec =
441 			MSEC2NSEC(mevp->me_msecs % MILLISEC);
442 		/* Repeat at the same interval */
443 		it.it_interval = it.it_value;
444 
445 		if (timer_settime(mevp->me_timid, 0, &it, NULL) != 0) {
446 			(void) fprintf(stderr, "timer_settime failed: %s",
447 			    strerror(errno));
448 		}
449 		return;
450 	}
451 	case EV_DISABLE:
452 	case EV_DELETE:
453 		if (timer_delete(mevp->me_timid) != 0) {
454 			(void) fprintf(stderr, "timer_delete failed: %s",
455 			    strerror(errno));
456 		}
457 		mevp->me_timid = -1;
458 		return;
459 	default:
460 		(void) fprintf(stderr, "%s: unhandled state %d\n", __func__,
461 		    mevp->me_state);
462 		abort();
463 	}
464 }
465 
466 static void
mevent_update_one_vnode(struct mevent * mevp)467 mevent_update_one_vnode(struct mevent *mevp)
468 {
469 	switch (mevp->me_state) {
470 	case EV_ENABLE:
471 	{
472 		struct stat st;
473 		int events = 0;
474 
475 		if ((mevp->me_fflags & EVFF_ATTRIB) != 0)
476 			events |= FILE_ATTRIB;
477 
478 		assert(events != 0);
479 
480 		/*
481 		 * It is tempting to use the PORT_SOURCE_FILE type for this in
482 		 * conjunction with the FILE_ATTRIB event type. Unfortunately
483 		 * this event type triggers on any change to the file's
484 		 * ctime, and therefore for every write as well as attribute
485 		 * changes. It also does not work for ZVOLs.
486 		 *
487 		 * Convert this to a timer event and poll for the file
488 		 * attribute changes that we care about.
489 		 */
490 
491 		if (fstat(mevp->me_fd, &st) != 0) {
492 			(void) fprintf(stderr, "fstat(%d) failed: %s\n",
493 			    mevp->me_fd, strerror(errno));
494 			return;
495 		}
496 
497 		mevp->me_poll.mp_fd = mevp->me_fd;
498 		mevp->me_poll.mp_size = st.st_size;
499 
500 		mevp->me_poll.mp_func = mevp->me_func;
501 		mevp->me_poll.mp_param = mevp->me_param;
502 		mevp->me_func = mevent_poll_file_attrib;
503 		mevp->me_param = mevp;
504 
505 		mevp->me_type = EVF_TIMER;
506 		mevp->me_timid = -1;
507 		mevp->me_msecs = mevent_file_poll_interval_ms;
508 		mevent_update_one_timer(mevp);
509 
510 		return;
511 	}
512 	case EV_DISABLE:
513 	case EV_DELETE:
514 		/*
515 		 * These events do not really exist as they are converted to
516 		 * timers; fall through to abort.
517 		 */
518 	default:
519 		(void) fprintf(stderr, "%s: unhandled state %d\n", __func__,
520 		    mevp->me_state);
521 		abort();
522 	}
523 }
524 
525 static void
mevent_update_one(struct mevent * mevp)526 mevent_update_one(struct mevent *mevp)
527 {
528 	switch (mevp->me_type) {
529 	case EVF_READ:
530 	case EVF_WRITE:
531 		mevent_update_one_readwrite(mevp);
532 		break;
533 	case EVF_TIMER:
534 		mevent_update_one_timer(mevp);
535 		break;
536 	case EVF_VNODE:
537 		mevent_update_one_vnode(mevp);
538 		break;
539 	case EVF_SIGNAL: /* EVF_SIGNAL not yet implemented. */
540 	default:
541 		(void) fprintf(stderr, "%s: unhandled event type %d\n",
542 		    __func__, mevp->me_type);
543 		abort();
544 	}
545 }
546 
547 static void
mevent_populate(struct mevent * mevp)548 mevent_populate(struct mevent *mevp)
549 {
550 	mevp->me_notify.portnfy_port = mfd;
551 	mevp->me_notify.portnfy_user = mevp;
552 }
553 
554 static void
mevent_update_pending()555 mevent_update_pending()
556 {
557 	struct mevent *mevp, *tmpp;
558 
559 	mevent_qlock();
560 
561 	LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) {
562 		mevent_populate(mevp);
563 		if (mevp->me_closefd) {
564 			/*
565 			 * A close of the file descriptor will remove the
566 			 * event
567 			 */
568 			(void) close(mevp->me_fd);
569 			mevp->me_fd = -1;
570 		} else {
571 			if (mevent_clarify_state(mevp)) {
572 				mevent_update_one(mevp);
573 			}
574 		}
575 
576 		mevp->me_cq = 0;
577 		LIST_REMOVE(mevp, me_list);
578 
579 		if (mevp->me_state & EV_DELETE) {
580 			free(mevp);
581 		} else {
582 			LIST_INSERT_HEAD(&global_head, mevp, me_list);
583 		}
584 	}
585 
586 	mevent_qunlock();
587 }
588 
589 static void
mevent_handle_pe(port_event_t * pe)590 mevent_handle_pe(port_event_t *pe)
591 {
592 	struct mevent *mevp = pe->portev_user;
593 
594 	(*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param);
595 
596 	mevent_qlock();
597 	if (!mevp->me_cq && !mevp->me_auto_requeue) {
598 		mevent_update_one(mevp);
599 	}
600 	mevent_qunlock();
601 }
602 #endif
603 
604 static struct mevent *
mevent_add_state(int tfd,enum ev_type type,void (* func)(int,enum ev_type,void *),void * param,int state,int fflags)605 mevent_add_state(int tfd, enum ev_type type,
606 	   void (*func)(int, enum ev_type, void *), void *param,
607 	   int state, int fflags)
608 {
609 #ifdef __FreeBSD__
610 	struct kevent kev;
611 #endif
612 	struct mevent *lp, *mevp;
613 #ifdef __FreeBSD__
614 	int ret;
615 #endif
616 
617 	if (tfd < 0 || func == NULL) {
618 		return (NULL);
619 	}
620 
621 	mevp = NULL;
622 
623 	pthread_once(&mevent_once, mevent_init);
624 
625 	mevent_qlock();
626 
627 	/*
628 	 * Verify that the fd/type tuple is not present in any list
629 	 */
630 	LIST_FOREACH(lp, &global_head, me_list) {
631 		if (type != EVF_TIMER && lp->me_fd == tfd &&
632 		    lp->me_type == type) {
633 			goto exit;
634 		}
635 	}
636 
637 	LIST_FOREACH(lp, &change_head, me_list) {
638 		if (type != EVF_TIMER && lp->me_fd == tfd &&
639 		    lp->me_type == type) {
640 			goto exit;
641 		}
642 	}
643 
644 	/*
645 	 * Allocate an entry and populate it.
646 	 */
647 	mevp = calloc(1, sizeof(struct mevent));
648 	if (mevp == NULL) {
649 		goto exit;
650 	}
651 
652 	if (type == EVF_TIMER) {
653 		mevp->me_msecs = tfd;
654 #ifdef __FreeBSD__
655 		mevp->me_timid = mevent_timid++;
656 #else
657 		mevp->me_timid = -1;
658 #endif
659 	} else
660 		mevp->me_fd = tfd;
661 	mevp->me_type = type;
662 	mevp->me_func = func;
663 	mevp->me_param = param;
664 	mevp->me_state = state;
665 	mevp->me_fflags = fflags;
666 
667 	/*
668 	 * Try to add the event.  If this fails, report the failure to
669 	 * the caller.
670 	 */
671 #ifdef __FreeBSD__
672 	mevent_populate(mevp, &kev);
673 	ret = kevent(mfd, &kev, 1, NULL, 0, NULL);
674 	if (ret == -1) {
675 		free(mevp);
676 		mevp = NULL;
677 		goto exit;
678 	}
679 	mevp->me_state &= ~EV_ADD;
680 #else
681 	mevent_populate(mevp);
682 	if (mevent_clarify_state(mevp))
683 		mevent_update_one(mevp);
684 #endif
685 
686 	LIST_INSERT_HEAD(&global_head, mevp, me_list);
687 
688 exit:
689 	mevent_qunlock();
690 
691 	return (mevp);
692 }
693 
694 struct mevent *
mevent_add(int tfd,enum ev_type type,void (* func)(int,enum ev_type,void *),void * param)695 mevent_add(int tfd, enum ev_type type,
696 	   void (*func)(int, enum ev_type, void *), void *param)
697 {
698 
699 	return (mevent_add_state(tfd, type, func, param, EV_ADD, 0));
700 }
701 
702 struct mevent *
mevent_add_flags(int tfd,enum ev_type type,int fflags,void (* func)(int,enum ev_type,void *),void * param)703 mevent_add_flags(int tfd, enum ev_type type, int fflags,
704 		 void (*func)(int, enum ev_type, void *), void *param)
705 {
706 
707 	return (mevent_add_state(tfd, type, func, param, EV_ADD, fflags));
708 }
709 
710 struct mevent *
mevent_add_disabled(int tfd,enum ev_type type,void (* func)(int,enum ev_type,void *),void * param)711 mevent_add_disabled(int tfd, enum ev_type type,
712 		    void (*func)(int, enum ev_type, void *), void *param)
713 {
714 
715 	return (mevent_add_state(tfd, type, func, param, EV_ADD | EV_DISABLE, 0));
716 }
717 
718 static int
mevent_update(struct mevent * evp,bool enable)719 mevent_update(struct mevent *evp, bool enable)
720 {
721 	int newstate;
722 
723 	mevent_qlock();
724 
725 	/*
726 	 * It's not possible to enable/disable a deleted event
727 	 */
728 	assert((evp->me_state & EV_DELETE) == 0);
729 
730 	newstate = evp->me_state;
731 	if (enable) {
732 		newstate |= EV_ENABLE;
733 		newstate &= ~EV_DISABLE;
734 	} else {
735 		newstate |= EV_DISABLE;
736 		newstate &= ~EV_ENABLE;
737 	}
738 
739 	/*
740 	 * No update needed if state isn't changing
741 	 */
742 	if (evp->me_state != newstate) {
743 		evp->me_state = newstate;
744 
745 		/*
746 		 * Place the entry onto the changed list if not
747 		 * already there.
748 		 */
749 		if (evp->me_cq == 0) {
750 			evp->me_cq = 1;
751 			LIST_REMOVE(evp, me_list);
752 			LIST_INSERT_HEAD(&change_head, evp, me_list);
753 			mevent_notify();
754 		}
755 	}
756 
757 	mevent_qunlock();
758 
759 	return (0);
760 }
761 
762 int
mevent_enable(struct mevent * evp)763 mevent_enable(struct mevent *evp)
764 {
765 
766 	return (mevent_update(evp, true));
767 }
768 
769 int
mevent_disable(struct mevent * evp)770 mevent_disable(struct mevent *evp)
771 {
772 
773 	return (mevent_update(evp, false));
774 }
775 
776 static int
mevent_delete_event(struct mevent * evp,int closefd)777 mevent_delete_event(struct mevent *evp, int closefd)
778 {
779 	mevent_qlock();
780 
781 	/*
782          * Place the entry onto the changed list if not already there, and
783 	 * mark as to be deleted.
784          */
785         if (evp->me_cq == 0) {
786 		evp->me_cq = 1;
787 		LIST_REMOVE(evp, me_list);
788 		LIST_INSERT_HEAD(&change_head, evp, me_list);
789 		mevent_notify();
790         }
791 	evp->me_state = EV_DELETE;
792 
793 	if (closefd)
794 		evp->me_closefd = 1;
795 
796 	mevent_qunlock();
797 
798 	return (0);
799 }
800 
801 int
mevent_delete(struct mevent * evp)802 mevent_delete(struct mevent *evp)
803 {
804 
805 	return (mevent_delete_event(evp, 0));
806 }
807 
808 int
mevent_delete_close(struct mevent * evp)809 mevent_delete_close(struct mevent *evp)
810 {
811 
812 	return (mevent_delete_event(evp, 1));
813 }
814 
815 static void
mevent_set_name(void)816 mevent_set_name(void)
817 {
818 
819 	pthread_set_name_np(mevent_tid, "mevent");
820 }
821 
822 void
mevent_dispatch(void)823 mevent_dispatch(void)
824 {
825 #ifdef __FreeBSD__
826 	struct kevent changelist[MEVENT_MAX];
827 	struct kevent eventlist[MEVENT_MAX];
828 	struct mevent *pipev;
829 	int numev;
830 #else
831 	struct mevent *pipev;
832 #endif
833 	int ret;
834 #ifndef WITHOUT_CAPSICUM
835 	cap_rights_t rights;
836 #endif
837 
838 	mevent_tid = pthread_self();
839 	mevent_set_name();
840 
841 	pthread_once(&mevent_once, mevent_init);
842 
843 	/*
844 	 * Open the pipe that will be used for other threads to force
845 	 * the blocking kqueue call to exit by writing to it. Set the
846 	 * descriptor to non-blocking.
847 	 */
848 	ret = pipe(mevent_pipefd);
849 	if (ret < 0) {
850 		perror("pipe");
851 		exit(0);
852 	}
853 
854 #ifndef WITHOUT_CAPSICUM
855 	cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
856 	if (caph_rights_limit(mevent_pipefd[0], &rights) == -1)
857 		errx(EX_OSERR, "Unable to apply rights for sandbox");
858 	if (caph_rights_limit(mevent_pipefd[1], &rights) == -1)
859 		errx(EX_OSERR, "Unable to apply rights for sandbox");
860 #endif
861 
862 	/*
863 	 * Add internal event handler for the pipe write fd
864 	 */
865 	pipev = mevent_add(mevent_pipefd[0], EVF_READ, mevent_pipe_read, NULL);
866 	assert(pipev != NULL);
867 
868 	for (;;) {
869 #ifdef __FreeBSD__
870 		/*
871 		 * Build changelist if required.
872 		 * XXX the changelist can be put into the blocking call
873 		 * to eliminate the extra syscall. Currently better for
874 		 * debug.
875 		 */
876 		numev = mevent_build(changelist);
877 		if (numev) {
878 			ret = kevent(mfd, changelist, numev, NULL, 0, NULL);
879 			if (ret == -1) {
880 				perror("Error return from kevent change");
881 			}
882 		}
883 
884 		/*
885 		 * Block awaiting events
886 		 */
887 		ret = kevent(mfd, NULL, 0, eventlist, MEVENT_MAX, NULL);
888 		if (ret == -1 && errno != EINTR) {
889 			perror("Error return from kevent monitor");
890 		}
891 
892 		/*
893 		 * Handle reported events
894 		 */
895 		mevent_handle(eventlist, ret);
896 
897 #else /* __FreeBSD__ */
898 		port_event_t pev;
899 
900 		/* Handle any pending updates */
901 		mevent_update_pending();
902 
903 		/* Block awaiting events */
904 		ret = port_get(mfd, &pev, NULL);
905 		if (ret != 0) {
906 			if (errno != EINTR)
907 				perror("Error return from port_get");
908 			continue;
909 		}
910 
911 		/* Handle reported event */
912 		mevent_handle_pe(&pev);
913 #endif /* __FreeBSD__ */
914 	}
915 }
916