1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 /* 32 * Micro event library for FreeBSD, designed for a single i/o thread 33 * using kqueue, and having events be persistent by default. 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include <assert.h> 40 #ifndef WITHOUT_CAPSICUM 41 #include <capsicum_helpers.h> 42 #endif 43 #include <err.h> 44 #include <errno.h> 45 #include <stdbool.h> 46 #include <stdlib.h> 47 #include <stdio.h> 48 #include <string.h> 49 #include <sysexits.h> 50 #include <unistd.h> 51 52 #include <sys/types.h> 53 #ifndef WITHOUT_CAPSICUM 54 #include <sys/capsicum.h> 55 #endif 56 #include <sys/event.h> 57 #include <sys/time.h> 58 59 #include <pthread.h> 60 #include <pthread_np.h> 61 62 #include "mevent.h" 63 64 #define MEVENT_MAX 64 65 66 static pthread_t mevent_tid; 67 static int mevent_timid = 43; 68 static int mevent_pipefd[2]; 69 static pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER; 70 71 struct mevent { 72 void (*me_func)(int, enum ev_type, void *); 73 #define me_msecs me_fd 74 int me_fd; 75 int me_timid; 76 enum ev_type me_type; 77 void *me_param; 78 int me_cq; 79 int me_state; /* Desired kevent flags. */ 80 int me_closefd; 81 LIST_ENTRY(mevent) me_list; 82 }; 83 84 static LIST_HEAD(listhead, mevent) global_head, change_head; 85 86 static void 87 mevent_qlock(void) 88 { 89 pthread_mutex_lock(&mevent_lmutex); 90 } 91 92 static void 93 mevent_qunlock(void) 94 { 95 pthread_mutex_unlock(&mevent_lmutex); 96 } 97 98 static void 99 mevent_pipe_read(int fd, enum ev_type type, void *param) 100 { 101 char buf[MEVENT_MAX]; 102 int status; 103 104 /* 105 * Drain the pipe read side. The fd is non-blocking so this is 106 * safe to do. 107 */ 108 do { 109 status = read(fd, buf, sizeof(buf)); 110 } while (status == MEVENT_MAX); 111 } 112 113 static void 114 mevent_notify(void) 115 { 116 char c = '\0'; 117 118 /* 119 * If calling from outside the i/o thread, write a byte on the 120 * pipe to force the i/o thread to exit the blocking kevent call. 121 */ 122 if (mevent_pipefd[1] != 0 && pthread_self() != mevent_tid) { 123 write(mevent_pipefd[1], &c, 1); 124 } 125 } 126 127 static int 128 mevent_kq_filter(struct mevent *mevp) 129 { 130 int retval; 131 132 retval = 0; 133 134 if (mevp->me_type == EVF_READ) 135 retval = EVFILT_READ; 136 137 if (mevp->me_type == EVF_WRITE) 138 retval = EVFILT_WRITE; 139 140 if (mevp->me_type == EVF_TIMER) 141 retval = EVFILT_TIMER; 142 143 if (mevp->me_type == EVF_SIGNAL) 144 retval = EVFILT_SIGNAL; 145 146 return (retval); 147 } 148 149 static int 150 mevent_kq_flags(struct mevent *mevp) 151 { 152 return (mevp->me_state); 153 } 154 155 static int 156 mevent_kq_fflags(struct mevent *mevp) 157 { 158 /* XXX nothing yet, perhaps EV_EOF for reads ? */ 159 return (0); 160 } 161 162 static int 163 mevent_build(int mfd, struct kevent *kev) 164 { 165 struct mevent *mevp, *tmpp; 166 int i; 167 168 i = 0; 169 170 mevent_qlock(); 171 172 LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) { 173 if (mevp->me_closefd) { 174 /* 175 * A close of the file descriptor will remove the 176 * event 177 */ 178 close(mevp->me_fd); 179 } else { 180 if (mevp->me_type == EVF_TIMER) { 181 kev[i].ident = mevp->me_timid; 182 kev[i].data = mevp->me_msecs; 183 } else { 184 kev[i].ident = mevp->me_fd; 185 kev[i].data = 0; 186 } 187 kev[i].filter = mevent_kq_filter(mevp); 188 kev[i].flags = mevent_kq_flags(mevp); 189 kev[i].fflags = mevent_kq_fflags(mevp); 190 kev[i].udata = mevp; 191 i++; 192 } 193 194 mevp->me_cq = 0; 195 LIST_REMOVE(mevp, me_list); 196 197 if (mevp->me_state & EV_DELETE) { 198 free(mevp); 199 } else { 200 /* 201 * We need to add the event only once, so we can 202 * reset the EV_ADD bit after it has been propagated 203 * to the kevent() arguments the first time. 204 */ 205 mevp->me_state &= ~EV_ADD; 206 LIST_INSERT_HEAD(&global_head, mevp, me_list); 207 } 208 209 assert(i < MEVENT_MAX); 210 } 211 212 mevent_qunlock(); 213 214 return (i); 215 } 216 217 static void 218 mevent_handle(struct kevent *kev, int numev) 219 { 220 struct mevent *mevp; 221 int i; 222 223 for (i = 0; i < numev; i++) { 224 mevp = kev[i].udata; 225 226 /* XXX check for EV_ERROR ? */ 227 228 (*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param); 229 } 230 } 231 232 static struct mevent * 233 mevent_add_state(int tfd, enum ev_type type, 234 void (*func)(int, enum ev_type, void *), void *param, 235 int state) 236 { 237 struct mevent *lp, *mevp; 238 239 if (tfd < 0 || func == NULL) { 240 return (NULL); 241 } 242 243 mevp = NULL; 244 245 mevent_qlock(); 246 247 /* 248 * Verify that the fd/type tuple is not present in any list 249 */ 250 LIST_FOREACH(lp, &global_head, me_list) { 251 if (type != EVF_TIMER && lp->me_fd == tfd && 252 lp->me_type == type) { 253 goto exit; 254 } 255 } 256 257 LIST_FOREACH(lp, &change_head, me_list) { 258 if (type != EVF_TIMER && lp->me_fd == tfd && 259 lp->me_type == type) { 260 goto exit; 261 } 262 } 263 264 /* 265 * Allocate an entry, populate it, and add it to the change list. 266 */ 267 mevp = calloc(1, sizeof(struct mevent)); 268 if (mevp == NULL) { 269 goto exit; 270 } 271 272 if (type == EVF_TIMER) { 273 mevp->me_msecs = tfd; 274 mevp->me_timid = mevent_timid++; 275 } else 276 mevp->me_fd = tfd; 277 mevp->me_type = type; 278 mevp->me_func = func; 279 mevp->me_param = param; 280 281 LIST_INSERT_HEAD(&change_head, mevp, me_list); 282 mevp->me_cq = 1; 283 mevp->me_state = state; 284 mevent_notify(); 285 286 exit: 287 mevent_qunlock(); 288 289 return (mevp); 290 } 291 292 struct mevent * 293 mevent_add(int tfd, enum ev_type type, 294 void (*func)(int, enum ev_type, void *), void *param) 295 { 296 297 return (mevent_add_state(tfd, type, func, param, EV_ADD)); 298 } 299 300 struct mevent * 301 mevent_add_disabled(int tfd, enum ev_type type, 302 void (*func)(int, enum ev_type, void *), void *param) 303 { 304 305 return (mevent_add_state(tfd, type, func, param, EV_ADD | EV_DISABLE)); 306 } 307 308 static int 309 mevent_update(struct mevent *evp, bool enable) 310 { 311 int newstate; 312 313 mevent_qlock(); 314 315 /* 316 * It's not possible to enable/disable a deleted event 317 */ 318 assert((evp->me_state & EV_DELETE) == 0); 319 320 newstate = evp->me_state; 321 if (enable) { 322 newstate |= EV_ENABLE; 323 newstate &= ~EV_DISABLE; 324 } else { 325 newstate |= EV_DISABLE; 326 newstate &= ~EV_ENABLE; 327 } 328 329 /* 330 * No update needed if state isn't changing 331 */ 332 if (evp->me_state != newstate) { 333 evp->me_state = newstate; 334 335 /* 336 * Place the entry onto the changed list if not 337 * already there. 338 */ 339 if (evp->me_cq == 0) { 340 evp->me_cq = 1; 341 LIST_REMOVE(evp, me_list); 342 LIST_INSERT_HEAD(&change_head, evp, me_list); 343 mevent_notify(); 344 } 345 } 346 347 mevent_qunlock(); 348 349 return (0); 350 } 351 352 int 353 mevent_enable(struct mevent *evp) 354 { 355 356 return (mevent_update(evp, true)); 357 } 358 359 int 360 mevent_disable(struct mevent *evp) 361 { 362 363 return (mevent_update(evp, false)); 364 } 365 366 static int 367 mevent_delete_event(struct mevent *evp, int closefd) 368 { 369 mevent_qlock(); 370 371 /* 372 * Place the entry onto the changed list if not already there, and 373 * mark as to be deleted. 374 */ 375 if (evp->me_cq == 0) { 376 evp->me_cq = 1; 377 LIST_REMOVE(evp, me_list); 378 LIST_INSERT_HEAD(&change_head, evp, me_list); 379 mevent_notify(); 380 } 381 evp->me_state = EV_DELETE; 382 383 if (closefd) 384 evp->me_closefd = 1; 385 386 mevent_qunlock(); 387 388 return (0); 389 } 390 391 int 392 mevent_delete(struct mevent *evp) 393 { 394 395 return (mevent_delete_event(evp, 0)); 396 } 397 398 int 399 mevent_delete_close(struct mevent *evp) 400 { 401 402 return (mevent_delete_event(evp, 1)); 403 } 404 405 static void 406 mevent_set_name(void) 407 { 408 409 pthread_set_name_np(mevent_tid, "mevent"); 410 } 411 412 void 413 mevent_dispatch(void) 414 { 415 struct kevent changelist[MEVENT_MAX]; 416 struct kevent eventlist[MEVENT_MAX]; 417 struct mevent *pipev; 418 int mfd; 419 int numev; 420 int ret; 421 #ifndef WITHOUT_CAPSICUM 422 cap_rights_t rights; 423 #endif 424 425 mevent_tid = pthread_self(); 426 mevent_set_name(); 427 428 mfd = kqueue(); 429 assert(mfd > 0); 430 431 #ifndef WITHOUT_CAPSICUM 432 cap_rights_init(&rights, CAP_KQUEUE); 433 if (caph_rights_limit(mfd, &rights) == -1) 434 errx(EX_OSERR, "Unable to apply rights for sandbox"); 435 #endif 436 437 /* 438 * Open the pipe that will be used for other threads to force 439 * the blocking kqueue call to exit by writing to it. Set the 440 * descriptor to non-blocking. 441 */ 442 ret = pipe(mevent_pipefd); 443 if (ret < 0) { 444 perror("pipe"); 445 exit(0); 446 } 447 448 #ifndef WITHOUT_CAPSICUM 449 cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE); 450 if (caph_rights_limit(mevent_pipefd[0], &rights) == -1) 451 errx(EX_OSERR, "Unable to apply rights for sandbox"); 452 if (caph_rights_limit(mevent_pipefd[1], &rights) == -1) 453 errx(EX_OSERR, "Unable to apply rights for sandbox"); 454 #endif 455 456 /* 457 * Add internal event handler for the pipe write fd 458 */ 459 pipev = mevent_add(mevent_pipefd[0], EVF_READ, mevent_pipe_read, NULL); 460 assert(pipev != NULL); 461 462 for (;;) { 463 /* 464 * Build changelist if required. 465 * XXX the changelist can be put into the blocking call 466 * to eliminate the extra syscall. Currently better for 467 * debug. 468 */ 469 numev = mevent_build(mfd, changelist); 470 if (numev) { 471 ret = kevent(mfd, changelist, numev, NULL, 0, NULL); 472 if (ret == -1) { 473 perror("Error return from kevent change"); 474 } 475 } 476 477 /* 478 * Block awaiting events 479 */ 480 ret = kevent(mfd, NULL, 0, eventlist, MEVENT_MAX, NULL); 481 if (ret == -1 && errno != EINTR) { 482 perror("Error return from kevent monitor"); 483 } 484 485 /* 486 * Handle reported events 487 */ 488 mevent_handle(eventlist, ret); 489 } 490 } 491