1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * Micro event library for FreeBSD, designed for a single i/o thread 31 * using kqueue, and having events be persistent by default. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include <assert.h> 38 #ifndef WITHOUT_CAPSICUM 39 #include <capsicum_helpers.h> 40 #endif 41 #include <err.h> 42 #include <errno.h> 43 #include <stdbool.h> 44 #include <stdlib.h> 45 #include <stdio.h> 46 #include <string.h> 47 #include <sysexits.h> 48 #include <unistd.h> 49 50 #include <sys/types.h> 51 #ifndef WITHOUT_CAPSICUM 52 #include <sys/capsicum.h> 53 #endif 54 #include <sys/event.h> 55 #include <sys/time.h> 56 57 #include <pthread.h> 58 #include <pthread_np.h> 59 60 #include "mevent.h" 61 62 #define MEVENT_MAX 64 63 64 static pthread_t mevent_tid; 65 static pthread_once_t mevent_once = PTHREAD_ONCE_INIT; 66 static int mevent_timid = 43; 67 static int mevent_pipefd[2]; 68 static int mfd; 69 static pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER; 70 71 struct mevent { 72 void (*me_func)(int, enum ev_type, void *); 73 #define me_msecs me_fd 74 int me_fd; 75 int me_timid; 76 enum ev_type me_type; 77 void *me_param; 78 int me_cq; 79 int me_state; /* Desired kevent flags. */ 80 int me_closefd; 81 int me_fflags; 82 LIST_ENTRY(mevent) me_list; 83 }; 84 85 static LIST_HEAD(listhead, mevent) global_head, change_head; 86 87 static void 88 mevent_qlock(void) 89 { 90 pthread_mutex_lock(&mevent_lmutex); 91 } 92 93 static void 94 mevent_qunlock(void) 95 { 96 pthread_mutex_unlock(&mevent_lmutex); 97 } 98 99 static void 100 mevent_pipe_read(int fd, enum ev_type type __unused, void *param __unused) 101 { 102 char buf[MEVENT_MAX]; 103 int status; 104 105 /* 106 * Drain the pipe read side. The fd is non-blocking so this is 107 * safe to do. 108 */ 109 do { 110 status = read(fd, buf, sizeof(buf)); 111 } while (status == MEVENT_MAX); 112 } 113 114 static void 115 mevent_notify(void) 116 { 117 char c = '\0'; 118 119 /* 120 * If calling from outside the i/o thread, write a byte on the 121 * pipe to force the i/o thread to exit the blocking kevent call. 122 */ 123 if (mevent_pipefd[1] != 0 && pthread_self() != mevent_tid) { 124 write(mevent_pipefd[1], &c, 1); 125 } 126 } 127 128 static void 129 mevent_init(void) 130 { 131 #ifndef WITHOUT_CAPSICUM 132 cap_rights_t rights; 133 #endif 134 135 mfd = kqueue(); 136 assert(mfd > 0); 137 138 #ifndef WITHOUT_CAPSICUM 139 cap_rights_init(&rights, CAP_KQUEUE); 140 if (caph_rights_limit(mfd, &rights) == -1) 141 errx(EX_OSERR, "Unable to apply rights for sandbox"); 142 #endif 143 144 LIST_INIT(&change_head); 145 LIST_INIT(&global_head); 146 } 147 148 static int 149 mevent_kq_filter(struct mevent *mevp) 150 { 151 int retval; 152 153 retval = 0; 154 155 if (mevp->me_type == EVF_READ) 156 retval = EVFILT_READ; 157 158 if (mevp->me_type == EVF_WRITE) 159 retval = EVFILT_WRITE; 160 161 if (mevp->me_type == EVF_TIMER) 162 retval = EVFILT_TIMER; 163 164 if (mevp->me_type == EVF_SIGNAL) 165 retval = EVFILT_SIGNAL; 166 167 if (mevp->me_type == EVF_VNODE) 168 retval = EVFILT_VNODE; 169 170 return (retval); 171 } 172 173 static int 174 mevent_kq_flags(struct mevent *mevp) 175 { 176 int retval; 177 178 retval = mevp->me_state; 179 180 if (mevp->me_type == EVF_VNODE) 181 retval |= EV_CLEAR; 182 183 return (retval); 184 } 185 186 static int 187 mevent_kq_fflags(struct mevent *mevp) 188 { 189 int retval; 190 191 retval = 0; 192 193 switch (mevp->me_type) { 194 case EVF_VNODE: 195 if ((mevp->me_fflags & EVFF_ATTRIB) != 0) 196 retval |= NOTE_ATTRIB; 197 break; 198 case EVF_READ: 199 case EVF_WRITE: 200 case EVF_TIMER: 201 case EVF_SIGNAL: 202 break; 203 } 204 205 return (retval); 206 } 207 208 static void 209 mevent_populate(struct mevent *mevp, struct kevent *kev) 210 { 211 if (mevp->me_type == EVF_TIMER) { 212 kev->ident = mevp->me_timid; 213 kev->data = mevp->me_msecs; 214 } else { 215 kev->ident = mevp->me_fd; 216 kev->data = 0; 217 } 218 kev->filter = mevent_kq_filter(mevp); 219 kev->flags = mevent_kq_flags(mevp); 220 kev->fflags = mevent_kq_fflags(mevp); 221 kev->udata = mevp; 222 } 223 224 static int 225 mevent_build(struct kevent *kev) 226 { 227 struct mevent *mevp, *tmpp; 228 int i; 229 230 i = 0; 231 232 mevent_qlock(); 233 234 LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) { 235 if (mevp->me_closefd) { 236 /* 237 * A close of the file descriptor will remove the 238 * event 239 */ 240 close(mevp->me_fd); 241 } else { 242 assert((mevp->me_state & EV_ADD) == 0); 243 mevent_populate(mevp, &kev[i]); 244 i++; 245 } 246 247 mevp->me_cq = 0; 248 LIST_REMOVE(mevp, me_list); 249 250 if (mevp->me_state & EV_DELETE) { 251 free(mevp); 252 } else { 253 LIST_INSERT_HEAD(&global_head, mevp, me_list); 254 } 255 256 assert(i < MEVENT_MAX); 257 } 258 259 mevent_qunlock(); 260 261 return (i); 262 } 263 264 static void 265 mevent_handle(struct kevent *kev, int numev) 266 { 267 struct mevent *mevp; 268 int i; 269 270 for (i = 0; i < numev; i++) { 271 mevp = kev[i].udata; 272 273 /* XXX check for EV_ERROR ? */ 274 275 (*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param); 276 } 277 } 278 279 static struct mevent * 280 mevent_add_state(int tfd, enum ev_type type, 281 void (*func)(int, enum ev_type, void *), void *param, 282 int state, int fflags) 283 { 284 struct kevent kev; 285 struct mevent *lp, *mevp; 286 int ret; 287 288 if (tfd < 0 || func == NULL) { 289 return (NULL); 290 } 291 292 mevp = NULL; 293 294 pthread_once(&mevent_once, mevent_init); 295 296 mevent_qlock(); 297 298 /* 299 * Verify that the fd/type tuple is not present in any list 300 */ 301 LIST_FOREACH(lp, &global_head, me_list) { 302 if (type != EVF_TIMER && lp->me_fd == tfd && 303 lp->me_type == type) { 304 goto exit; 305 } 306 } 307 308 LIST_FOREACH(lp, &change_head, me_list) { 309 if (type != EVF_TIMER && lp->me_fd == tfd && 310 lp->me_type == type) { 311 goto exit; 312 } 313 } 314 315 /* 316 * Allocate an entry and populate it. 317 */ 318 mevp = calloc(1, sizeof(struct mevent)); 319 if (mevp == NULL) { 320 goto exit; 321 } 322 323 if (type == EVF_TIMER) { 324 mevp->me_msecs = tfd; 325 mevp->me_timid = mevent_timid++; 326 } else 327 mevp->me_fd = tfd; 328 mevp->me_type = type; 329 mevp->me_func = func; 330 mevp->me_param = param; 331 mevp->me_state = state; 332 mevp->me_fflags = fflags; 333 334 /* 335 * Try to add the event. If this fails, report the failure to 336 * the caller. 337 */ 338 mevent_populate(mevp, &kev); 339 ret = kevent(mfd, &kev, 1, NULL, 0, NULL); 340 if (ret == -1) { 341 free(mevp); 342 mevp = NULL; 343 goto exit; 344 } 345 346 mevp->me_state &= ~EV_ADD; 347 LIST_INSERT_HEAD(&global_head, mevp, me_list); 348 349 exit: 350 mevent_qunlock(); 351 352 return (mevp); 353 } 354 355 struct mevent * 356 mevent_add(int tfd, enum ev_type type, 357 void (*func)(int, enum ev_type, void *), void *param) 358 { 359 360 return (mevent_add_state(tfd, type, func, param, EV_ADD, 0)); 361 } 362 363 struct mevent * 364 mevent_add_flags(int tfd, enum ev_type type, int fflags, 365 void (*func)(int, enum ev_type, void *), void *param) 366 { 367 368 return (mevent_add_state(tfd, type, func, param, EV_ADD, fflags)); 369 } 370 371 struct mevent * 372 mevent_add_disabled(int tfd, enum ev_type type, 373 void (*func)(int, enum ev_type, void *), void *param) 374 { 375 376 return (mevent_add_state(tfd, type, func, param, EV_ADD | EV_DISABLE, 0)); 377 } 378 379 static int 380 mevent_update(struct mevent *evp, bool enable) 381 { 382 int newstate; 383 384 mevent_qlock(); 385 386 /* 387 * It's not possible to enable/disable a deleted event 388 */ 389 assert((evp->me_state & EV_DELETE) == 0); 390 391 newstate = evp->me_state; 392 if (enable) { 393 newstate |= EV_ENABLE; 394 newstate &= ~EV_DISABLE; 395 } else { 396 newstate |= EV_DISABLE; 397 newstate &= ~EV_ENABLE; 398 } 399 400 /* 401 * No update needed if state isn't changing 402 */ 403 if (evp->me_state != newstate) { 404 evp->me_state = newstate; 405 406 /* 407 * Place the entry onto the changed list if not 408 * already there. 409 */ 410 if (evp->me_cq == 0) { 411 evp->me_cq = 1; 412 LIST_REMOVE(evp, me_list); 413 LIST_INSERT_HEAD(&change_head, evp, me_list); 414 mevent_notify(); 415 } 416 } 417 418 mevent_qunlock(); 419 420 return (0); 421 } 422 423 int 424 mevent_enable(struct mevent *evp) 425 { 426 427 return (mevent_update(evp, true)); 428 } 429 430 int 431 mevent_disable(struct mevent *evp) 432 { 433 434 return (mevent_update(evp, false)); 435 } 436 437 static int 438 mevent_delete_event(struct mevent *evp, int closefd) 439 { 440 mevent_qlock(); 441 442 /* 443 * Place the entry onto the changed list if not already there, and 444 * mark as to be deleted. 445 */ 446 if (evp->me_cq == 0) { 447 evp->me_cq = 1; 448 LIST_REMOVE(evp, me_list); 449 LIST_INSERT_HEAD(&change_head, evp, me_list); 450 mevent_notify(); 451 } 452 evp->me_state = EV_DELETE; 453 454 if (closefd) 455 evp->me_closefd = 1; 456 457 mevent_qunlock(); 458 459 return (0); 460 } 461 462 int 463 mevent_delete(struct mevent *evp) 464 { 465 466 return (mevent_delete_event(evp, 0)); 467 } 468 469 int 470 mevent_delete_close(struct mevent *evp) 471 { 472 473 return (mevent_delete_event(evp, 1)); 474 } 475 476 static void 477 mevent_set_name(void) 478 { 479 480 pthread_set_name_np(mevent_tid, "mevent"); 481 } 482 483 void 484 mevent_dispatch(void) 485 { 486 struct kevent changelist[MEVENT_MAX]; 487 struct kevent eventlist[MEVENT_MAX]; 488 struct mevent *pipev; 489 int numev; 490 int ret; 491 #ifndef WITHOUT_CAPSICUM 492 cap_rights_t rights; 493 #endif 494 495 mevent_tid = pthread_self(); 496 mevent_set_name(); 497 498 pthread_once(&mevent_once, mevent_init); 499 500 /* 501 * Open the pipe that will be used for other threads to force 502 * the blocking kqueue call to exit by writing to it. Set the 503 * descriptor to non-blocking. 504 */ 505 ret = pipe(mevent_pipefd); 506 if (ret < 0) { 507 perror("pipe"); 508 exit(0); 509 } 510 511 #ifndef WITHOUT_CAPSICUM 512 cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE); 513 if (caph_rights_limit(mevent_pipefd[0], &rights) == -1) 514 errx(EX_OSERR, "Unable to apply rights for sandbox"); 515 if (caph_rights_limit(mevent_pipefd[1], &rights) == -1) 516 errx(EX_OSERR, "Unable to apply rights for sandbox"); 517 #endif 518 519 /* 520 * Add internal event handler for the pipe write fd 521 */ 522 pipev = mevent_add(mevent_pipefd[0], EVF_READ, mevent_pipe_read, NULL); 523 assert(pipev != NULL); 524 525 for (;;) { 526 /* 527 * Build changelist if required. 528 * XXX the changelist can be put into the blocking call 529 * to eliminate the extra syscall. Currently better for 530 * debug. 531 */ 532 numev = mevent_build(changelist); 533 if (numev) { 534 ret = kevent(mfd, changelist, numev, NULL, 0, NULL); 535 if (ret == -1) { 536 perror("Error return from kevent change"); 537 } 538 } 539 540 /* 541 * Block awaiting events 542 */ 543 ret = kevent(mfd, NULL, 0, eventlist, MEVENT_MAX, NULL); 544 if (ret == -1 && errno != EINTR) { 545 perror("Error return from kevent monitor"); 546 } 547 548 /* 549 * Handle reported events 550 */ 551 mevent_handle(eventlist, ret); 552 } 553 } 554