1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * Micro event library for FreeBSD, designed for a single i/o thread 31 * using kqueue, and having events be persistent by default. 32 */ 33 34 #include <sys/cdefs.h> 35 #include <assert.h> 36 #ifndef WITHOUT_CAPSICUM 37 #include <capsicum_helpers.h> 38 #endif 39 #include <err.h> 40 #include <errno.h> 41 #include <stdbool.h> 42 #include <stdlib.h> 43 #include <stdio.h> 44 #include <string.h> 45 #include <sysexits.h> 46 #include <unistd.h> 47 48 #include <sys/types.h> 49 #ifndef WITHOUT_CAPSICUM 50 #include <sys/capsicum.h> 51 #endif 52 #include <sys/event.h> 53 #include <sys/time.h> 54 55 #include <pthread.h> 56 #include <pthread_np.h> 57 58 #include "mevent.h" 59 60 #define MEVENT_MAX 64 61 62 static pthread_t mevent_tid; 63 static pthread_once_t mevent_once = PTHREAD_ONCE_INIT; 64 static int mevent_timid = 43; 65 static int mevent_pipefd[2]; 66 static int mfd; 67 static pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER; 68 69 struct mevent { 70 void (*me_func)(int, enum ev_type, void *); 71 #define me_msecs me_fd 72 int me_fd; 73 int me_timid; 74 enum ev_type me_type; 75 void *me_param; 76 int me_cq; 77 int me_state; /* Desired kevent flags. */ 78 int me_closefd; 79 int me_fflags; 80 LIST_ENTRY(mevent) me_list; 81 }; 82 83 enum mevent_update_type { 84 UPDATE_ENABLE, 85 UPDATE_DISABLE, 86 UPDATE_TIMER, 87 }; 88 89 static LIST_HEAD(listhead, mevent) global_head, change_head; 90 91 static void 92 mevent_qlock(void) 93 { 94 pthread_mutex_lock(&mevent_lmutex); 95 } 96 97 static void 98 mevent_qunlock(void) 99 { 100 pthread_mutex_unlock(&mevent_lmutex); 101 } 102 103 static void 104 mevent_pipe_read(int fd, enum ev_type type __unused, void *param __unused) 105 { 106 char buf[MEVENT_MAX]; 107 int status; 108 109 /* 110 * Drain the pipe read side. The fd is non-blocking so this is 111 * safe to do. 112 */ 113 do { 114 status = read(fd, buf, sizeof(buf)); 115 } while (status == MEVENT_MAX); 116 } 117 118 static void 119 mevent_notify(void) 120 { 121 char c = '\0'; 122 123 /* 124 * If calling from outside the i/o thread, write a byte on the 125 * pipe to force the i/o thread to exit the blocking kevent call. 126 */ 127 if (mevent_pipefd[1] != 0 && pthread_self() != mevent_tid) { 128 write(mevent_pipefd[1], &c, 1); 129 } 130 } 131 132 static void 133 mevent_init(void) 134 { 135 #ifndef WITHOUT_CAPSICUM 136 cap_rights_t rights; 137 #endif 138 139 mfd = kqueue(); 140 assert(mfd > 0); 141 142 #ifndef WITHOUT_CAPSICUM 143 cap_rights_init(&rights, CAP_KQUEUE); 144 if (caph_rights_limit(mfd, &rights) == -1) 145 errx(EX_OSERR, "Unable to apply rights for sandbox"); 146 #endif 147 148 LIST_INIT(&change_head); 149 LIST_INIT(&global_head); 150 } 151 152 static int 153 mevent_kq_filter(struct mevent *mevp) 154 { 155 int retval; 156 157 retval = 0; 158 159 if (mevp->me_type == EVF_READ) 160 retval = EVFILT_READ; 161 162 if (mevp->me_type == EVF_WRITE) 163 retval = EVFILT_WRITE; 164 165 if (mevp->me_type == EVF_TIMER) 166 retval = EVFILT_TIMER; 167 168 if (mevp->me_type == EVF_SIGNAL) 169 retval = EVFILT_SIGNAL; 170 171 if (mevp->me_type == EVF_VNODE) 172 retval = EVFILT_VNODE; 173 174 return (retval); 175 } 176 177 static int 178 mevent_kq_flags(struct mevent *mevp) 179 { 180 int retval; 181 182 retval = mevp->me_state; 183 184 if (mevp->me_type == EVF_VNODE) 185 retval |= EV_CLEAR; 186 187 return (retval); 188 } 189 190 static int 191 mevent_kq_fflags(struct mevent *mevp) 192 { 193 int retval; 194 195 retval = 0; 196 197 switch (mevp->me_type) { 198 case EVF_VNODE: 199 if ((mevp->me_fflags & EVFF_ATTRIB) != 0) 200 retval |= NOTE_ATTRIB; 201 break; 202 case EVF_READ: 203 case EVF_WRITE: 204 case EVF_TIMER: 205 case EVF_SIGNAL: 206 break; 207 } 208 209 return (retval); 210 } 211 212 static void 213 mevent_populate(struct mevent *mevp, struct kevent *kev) 214 { 215 if (mevp->me_type == EVF_TIMER) { 216 kev->ident = mevp->me_timid; 217 kev->data = mevp->me_msecs; 218 } else { 219 kev->ident = mevp->me_fd; 220 kev->data = 0; 221 } 222 kev->filter = mevent_kq_filter(mevp); 223 kev->flags = mevent_kq_flags(mevp); 224 kev->fflags = mevent_kq_fflags(mevp); 225 kev->udata = mevp; 226 } 227 228 static int 229 mevent_build(struct kevent *kev) 230 { 231 struct mevent *mevp, *tmpp; 232 int i; 233 234 i = 0; 235 236 mevent_qlock(); 237 238 LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) { 239 if (mevp->me_closefd) { 240 /* 241 * A close of the file descriptor will remove the 242 * event 243 */ 244 close(mevp->me_fd); 245 } else { 246 mevent_populate(mevp, &kev[i]); 247 i++; 248 } 249 250 mevp->me_cq = 0; 251 LIST_REMOVE(mevp, me_list); 252 253 if (mevp->me_state & EV_DELETE) { 254 free(mevp); 255 } else { 256 LIST_INSERT_HEAD(&global_head, mevp, me_list); 257 } 258 259 assert(i < MEVENT_MAX); 260 } 261 262 mevent_qunlock(); 263 264 return (i); 265 } 266 267 static void 268 mevent_handle(struct kevent *kev, int numev) 269 { 270 struct mevent *mevp; 271 int i; 272 273 for (i = 0; i < numev; i++) { 274 mevp = kev[i].udata; 275 276 /* XXX check for EV_ERROR ? */ 277 278 (*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param); 279 } 280 } 281 282 static struct mevent * 283 mevent_add_state(int tfd, enum ev_type type, 284 void (*func)(int, enum ev_type, void *), void *param, 285 int state, int fflags) 286 { 287 struct kevent kev; 288 struct mevent *lp, *mevp; 289 int ret; 290 291 if (tfd < 0 || func == NULL) { 292 return (NULL); 293 } 294 295 mevp = NULL; 296 297 pthread_once(&mevent_once, mevent_init); 298 299 mevent_qlock(); 300 301 /* 302 * Verify that the fd/type tuple is not present in any list 303 */ 304 LIST_FOREACH(lp, &global_head, me_list) { 305 if (type != EVF_TIMER && lp->me_fd == tfd && 306 lp->me_type == type) { 307 goto exit; 308 } 309 } 310 311 LIST_FOREACH(lp, &change_head, me_list) { 312 if (type != EVF_TIMER && lp->me_fd == tfd && 313 lp->me_type == type) { 314 goto exit; 315 } 316 } 317 318 /* 319 * Allocate an entry and populate it. 320 */ 321 mevp = calloc(1, sizeof(struct mevent)); 322 if (mevp == NULL) { 323 goto exit; 324 } 325 326 if (type == EVF_TIMER) { 327 mevp->me_msecs = tfd; 328 mevp->me_timid = mevent_timid++; 329 } else 330 mevp->me_fd = tfd; 331 mevp->me_type = type; 332 mevp->me_func = func; 333 mevp->me_param = param; 334 mevp->me_state = state; 335 mevp->me_fflags = fflags; 336 337 /* 338 * Try to add the event. If this fails, report the failure to 339 * the caller. 340 */ 341 mevent_populate(mevp, &kev); 342 ret = kevent(mfd, &kev, 1, NULL, 0, NULL); 343 if (ret == -1) { 344 free(mevp); 345 mevp = NULL; 346 goto exit; 347 } 348 349 mevp->me_state &= ~EV_ADD; 350 LIST_INSERT_HEAD(&global_head, mevp, me_list); 351 352 exit: 353 mevent_qunlock(); 354 355 return (mevp); 356 } 357 358 struct mevent * 359 mevent_add(int tfd, enum ev_type type, 360 void (*func)(int, enum ev_type, void *), void *param) 361 { 362 363 return (mevent_add_state(tfd, type, func, param, EV_ADD, 0)); 364 } 365 366 struct mevent * 367 mevent_add_flags(int tfd, enum ev_type type, int fflags, 368 void (*func)(int, enum ev_type, void *), void *param) 369 { 370 371 return (mevent_add_state(tfd, type, func, param, EV_ADD, fflags)); 372 } 373 374 struct mevent * 375 mevent_add_disabled(int tfd, enum ev_type type, 376 void (*func)(int, enum ev_type, void *), void *param) 377 { 378 379 return (mevent_add_state(tfd, type, func, param, EV_ADD | EV_DISABLE, 0)); 380 } 381 382 static int 383 mevent_update(struct mevent *evp, enum mevent_update_type type, int msecs) 384 { 385 int newstate; 386 387 mevent_qlock(); 388 389 /* 390 * It's not possible to update a deleted event 391 */ 392 assert((evp->me_state & EV_DELETE) == 0); 393 394 newstate = evp->me_state; 395 if (type == UPDATE_ENABLE) { 396 newstate |= EV_ENABLE; 397 newstate &= ~EV_DISABLE; 398 } else if (type == UPDATE_DISABLE) { 399 newstate |= EV_DISABLE; 400 newstate &= ~EV_ENABLE; 401 } else { 402 assert(type == UPDATE_TIMER); 403 assert(evp->me_type == EVF_TIMER); 404 newstate |= EV_ADD; 405 evp->me_msecs = msecs; 406 } 407 408 /* 409 * No update needed if enable/disable had no effect 410 */ 411 if (evp->me_state != newstate || type == UPDATE_TIMER) { 412 evp->me_state = newstate; 413 414 /* 415 * Place the entry onto the changed list if not 416 * already there. 417 */ 418 if (evp->me_cq == 0) { 419 evp->me_cq = 1; 420 LIST_REMOVE(evp, me_list); 421 LIST_INSERT_HEAD(&change_head, evp, me_list); 422 mevent_notify(); 423 } 424 } 425 426 mevent_qunlock(); 427 428 return (0); 429 } 430 431 int 432 mevent_enable(struct mevent *evp) 433 { 434 return (mevent_update(evp, UPDATE_ENABLE, -1)); 435 } 436 437 int 438 mevent_disable(struct mevent *evp) 439 { 440 return (mevent_update(evp, UPDATE_DISABLE, -1)); 441 } 442 443 int 444 mevent_timer_update(struct mevent *evp, int msecs) 445 { 446 return (mevent_update(evp, UPDATE_TIMER, msecs)); 447 } 448 449 static int 450 mevent_delete_event(struct mevent *evp, int closefd) 451 { 452 mevent_qlock(); 453 454 /* 455 * Place the entry onto the changed list if not already there, and 456 * mark as to be deleted. 457 */ 458 if (evp->me_cq == 0) { 459 evp->me_cq = 1; 460 LIST_REMOVE(evp, me_list); 461 LIST_INSERT_HEAD(&change_head, evp, me_list); 462 mevent_notify(); 463 } 464 evp->me_state = EV_DELETE; 465 466 if (closefd) 467 evp->me_closefd = 1; 468 469 mevent_qunlock(); 470 471 return (0); 472 } 473 474 int 475 mevent_delete(struct mevent *evp) 476 { 477 478 return (mevent_delete_event(evp, 0)); 479 } 480 481 int 482 mevent_delete_close(struct mevent *evp) 483 { 484 485 return (mevent_delete_event(evp, 1)); 486 } 487 488 static void 489 mevent_set_name(void) 490 { 491 492 pthread_set_name_np(mevent_tid, "mevent"); 493 } 494 495 void 496 mevent_dispatch(void) 497 { 498 struct kevent changelist[MEVENT_MAX]; 499 struct kevent eventlist[MEVENT_MAX]; 500 struct mevent *pipev; 501 int numev; 502 int ret; 503 #ifndef WITHOUT_CAPSICUM 504 cap_rights_t rights; 505 #endif 506 507 mevent_tid = pthread_self(); 508 mevent_set_name(); 509 510 pthread_once(&mevent_once, mevent_init); 511 512 /* 513 * Open the pipe that will be used for other threads to force 514 * the blocking kqueue call to exit by writing to it. Set the 515 * descriptor to non-blocking. 516 */ 517 ret = pipe(mevent_pipefd); 518 if (ret < 0) { 519 perror("pipe"); 520 exit(0); 521 } 522 523 #ifndef WITHOUT_CAPSICUM 524 cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE); 525 if (caph_rights_limit(mevent_pipefd[0], &rights) == -1) 526 errx(EX_OSERR, "Unable to apply rights for sandbox"); 527 if (caph_rights_limit(mevent_pipefd[1], &rights) == -1) 528 errx(EX_OSERR, "Unable to apply rights for sandbox"); 529 #endif 530 531 /* 532 * Add internal event handler for the pipe write fd 533 */ 534 pipev = mevent_add(mevent_pipefd[0], EVF_READ, mevent_pipe_read, NULL); 535 assert(pipev != NULL); 536 537 for (;;) { 538 /* 539 * Build changelist if required. 540 * XXX the changelist can be put into the blocking call 541 * to eliminate the extra syscall. Currently better for 542 * debug. 543 */ 544 numev = mevent_build(changelist); 545 if (numev) { 546 ret = kevent(mfd, changelist, numev, NULL, 0, NULL); 547 if (ret == -1) { 548 perror("Error return from kevent change"); 549 } 550 } 551 552 /* 553 * Block awaiting events 554 */ 555 ret = kevent(mfd, NULL, 0, eventlist, MEVENT_MAX, NULL); 556 if (ret == -1 && errno != EINTR) { 557 perror("Error return from kevent monitor"); 558 } 559 560 /* 561 * Handle reported events 562 */ 563 mevent_handle(eventlist, ret); 564 } 565 } 566