1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 /* 32 * Micro event library for FreeBSD, designed for a single i/o thread 33 * using kqueue, and having events be persistent by default. 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include <assert.h> 40 #ifndef WITHOUT_CAPSICUM 41 #include <capsicum_helpers.h> 42 #endif 43 #include <err.h> 44 #include <errno.h> 45 #include <stdbool.h> 46 #include <stdlib.h> 47 #include <stdio.h> 48 #include <string.h> 49 #include <sysexits.h> 50 #include <unistd.h> 51 52 #include <sys/types.h> 53 #ifndef WITHOUT_CAPSICUM 54 #include <sys/capsicum.h> 55 #endif 56 #include <sys/event.h> 57 #include <sys/time.h> 58 59 #include <pthread.h> 60 #include <pthread_np.h> 61 62 #include "mevent.h" 63 64 #define MEVENT_MAX 64 65 66 static pthread_t mevent_tid; 67 static pthread_once_t mevent_once = PTHREAD_ONCE_INIT; 68 static int mevent_timid = 43; 69 static int mevent_pipefd[2]; 70 static int mfd; 71 static pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER; 72 73 struct mevent { 74 void (*me_func)(int, enum ev_type, void *); 75 #define me_msecs me_fd 76 int me_fd; 77 int me_timid; 78 enum ev_type me_type; 79 void *me_param; 80 int me_cq; 81 int me_state; /* Desired kevent flags. */ 82 int me_closefd; 83 int me_fflags; 84 LIST_ENTRY(mevent) me_list; 85 }; 86 87 static LIST_HEAD(listhead, mevent) global_head, change_head; 88 89 static void 90 mevent_qlock(void) 91 { 92 pthread_mutex_lock(&mevent_lmutex); 93 } 94 95 static void 96 mevent_qunlock(void) 97 { 98 pthread_mutex_unlock(&mevent_lmutex); 99 } 100 101 static void 102 mevent_pipe_read(int fd, enum ev_type type, void *param) 103 { 104 char buf[MEVENT_MAX]; 105 int status; 106 107 /* 108 * Drain the pipe read side. The fd is non-blocking so this is 109 * safe to do. 110 */ 111 do { 112 status = read(fd, buf, sizeof(buf)); 113 } while (status == MEVENT_MAX); 114 } 115 116 static void 117 mevent_notify(void) 118 { 119 char c = '\0'; 120 121 /* 122 * If calling from outside the i/o thread, write a byte on the 123 * pipe to force the i/o thread to exit the blocking kevent call. 124 */ 125 if (mevent_pipefd[1] != 0 && pthread_self() != mevent_tid) { 126 write(mevent_pipefd[1], &c, 1); 127 } 128 } 129 130 static void 131 mevent_init(void) 132 { 133 #ifndef WITHOUT_CAPSICUM 134 cap_rights_t rights; 135 #endif 136 137 mfd = kqueue(); 138 assert(mfd > 0); 139 140 #ifndef WITHOUT_CAPSICUM 141 cap_rights_init(&rights, CAP_KQUEUE); 142 if (caph_rights_limit(mfd, &rights) == -1) 143 errx(EX_OSERR, "Unable to apply rights for sandbox"); 144 #endif 145 146 LIST_INIT(&change_head); 147 LIST_INIT(&global_head); 148 } 149 150 static int 151 mevent_kq_filter(struct mevent *mevp) 152 { 153 int retval; 154 155 retval = 0; 156 157 if (mevp->me_type == EVF_READ) 158 retval = EVFILT_READ; 159 160 if (mevp->me_type == EVF_WRITE) 161 retval = EVFILT_WRITE; 162 163 if (mevp->me_type == EVF_TIMER) 164 retval = EVFILT_TIMER; 165 166 if (mevp->me_type == EVF_SIGNAL) 167 retval = EVFILT_SIGNAL; 168 169 if (mevp->me_type == EVF_VNODE) 170 retval = EVFILT_VNODE; 171 172 return (retval); 173 } 174 175 static int 176 mevent_kq_flags(struct mevent *mevp) 177 { 178 int retval; 179 180 retval = mevp->me_state; 181 182 if (mevp->me_type == EVF_VNODE) 183 retval |= EV_CLEAR; 184 185 return (retval); 186 } 187 188 static int 189 mevent_kq_fflags(struct mevent *mevp) 190 { 191 int retval; 192 193 retval = 0; 194 195 switch (mevp->me_type) { 196 case EVF_VNODE: 197 if ((mevp->me_fflags & EVFF_ATTRIB) != 0) 198 retval |= NOTE_ATTRIB; 199 break; 200 case EVF_READ: 201 case EVF_WRITE: 202 case EVF_TIMER: 203 case EVF_SIGNAL: 204 break; 205 } 206 207 return (retval); 208 } 209 210 static void 211 mevent_populate(struct mevent *mevp, struct kevent *kev) 212 { 213 if (mevp->me_type == EVF_TIMER) { 214 kev->ident = mevp->me_timid; 215 kev->data = mevp->me_msecs; 216 } else { 217 kev->ident = mevp->me_fd; 218 kev->data = 0; 219 } 220 kev->filter = mevent_kq_filter(mevp); 221 kev->flags = mevent_kq_flags(mevp); 222 kev->fflags = mevent_kq_fflags(mevp); 223 kev->udata = mevp; 224 } 225 226 static int 227 mevent_build(struct kevent *kev) 228 { 229 struct mevent *mevp, *tmpp; 230 int i; 231 232 i = 0; 233 234 mevent_qlock(); 235 236 LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) { 237 if (mevp->me_closefd) { 238 /* 239 * A close of the file descriptor will remove the 240 * event 241 */ 242 close(mevp->me_fd); 243 } else { 244 assert((mevp->me_state & EV_ADD) == 0); 245 mevent_populate(mevp, &kev[i]); 246 i++; 247 } 248 249 mevp->me_cq = 0; 250 LIST_REMOVE(mevp, me_list); 251 252 if (mevp->me_state & EV_DELETE) { 253 free(mevp); 254 } else { 255 LIST_INSERT_HEAD(&global_head, mevp, me_list); 256 } 257 258 assert(i < MEVENT_MAX); 259 } 260 261 mevent_qunlock(); 262 263 return (i); 264 } 265 266 static void 267 mevent_handle(struct kevent *kev, int numev) 268 { 269 struct mevent *mevp; 270 int i; 271 272 for (i = 0; i < numev; i++) { 273 mevp = kev[i].udata; 274 275 /* XXX check for EV_ERROR ? */ 276 277 (*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param); 278 } 279 } 280 281 static struct mevent * 282 mevent_add_state(int tfd, enum ev_type type, 283 void (*func)(int, enum ev_type, void *), void *param, 284 int state, int fflags) 285 { 286 struct kevent kev; 287 struct mevent *lp, *mevp; 288 int ret; 289 290 if (tfd < 0 || func == NULL) { 291 return (NULL); 292 } 293 294 mevp = NULL; 295 296 pthread_once(&mevent_once, mevent_init); 297 298 mevent_qlock(); 299 300 /* 301 * Verify that the fd/type tuple is not present in any list 302 */ 303 LIST_FOREACH(lp, &global_head, me_list) { 304 if (type != EVF_TIMER && lp->me_fd == tfd && 305 lp->me_type == type) { 306 goto exit; 307 } 308 } 309 310 LIST_FOREACH(lp, &change_head, me_list) { 311 if (type != EVF_TIMER && lp->me_fd == tfd && 312 lp->me_type == type) { 313 goto exit; 314 } 315 } 316 317 /* 318 * Allocate an entry and populate it. 319 */ 320 mevp = calloc(1, sizeof(struct mevent)); 321 if (mevp == NULL) { 322 goto exit; 323 } 324 325 if (type == EVF_TIMER) { 326 mevp->me_msecs = tfd; 327 mevp->me_timid = mevent_timid++; 328 } else 329 mevp->me_fd = tfd; 330 mevp->me_type = type; 331 mevp->me_func = func; 332 mevp->me_param = param; 333 mevp->me_state = state; 334 mevp->me_fflags = fflags; 335 336 /* 337 * Try to add the event. If this fails, report the failure to 338 * the caller. 339 */ 340 mevent_populate(mevp, &kev); 341 ret = kevent(mfd, &kev, 1, NULL, 0, NULL); 342 if (ret == -1) { 343 free(mevp); 344 mevp = NULL; 345 goto exit; 346 } 347 348 mevp->me_state &= ~EV_ADD; 349 LIST_INSERT_HEAD(&global_head, mevp, me_list); 350 351 exit: 352 mevent_qunlock(); 353 354 return (mevp); 355 } 356 357 struct mevent * 358 mevent_add(int tfd, enum ev_type type, 359 void (*func)(int, enum ev_type, void *), void *param) 360 { 361 362 return (mevent_add_state(tfd, type, func, param, EV_ADD, 0)); 363 } 364 365 struct mevent * 366 mevent_add_flags(int tfd, enum ev_type type, int fflags, 367 void (*func)(int, enum ev_type, void *), void *param) 368 { 369 370 return (mevent_add_state(tfd, type, func, param, EV_ADD, fflags)); 371 } 372 373 struct mevent * 374 mevent_add_disabled(int tfd, enum ev_type type, 375 void (*func)(int, enum ev_type, void *), void *param) 376 { 377 378 return (mevent_add_state(tfd, type, func, param, EV_ADD | EV_DISABLE, 0)); 379 } 380 381 static int 382 mevent_update(struct mevent *evp, bool enable) 383 { 384 int newstate; 385 386 mevent_qlock(); 387 388 /* 389 * It's not possible to enable/disable a deleted event 390 */ 391 assert((evp->me_state & EV_DELETE) == 0); 392 393 newstate = evp->me_state; 394 if (enable) { 395 newstate |= EV_ENABLE; 396 newstate &= ~EV_DISABLE; 397 } else { 398 newstate |= EV_DISABLE; 399 newstate &= ~EV_ENABLE; 400 } 401 402 /* 403 * No update needed if state isn't changing 404 */ 405 if (evp->me_state != newstate) { 406 evp->me_state = newstate; 407 408 /* 409 * Place the entry onto the changed list if not 410 * already there. 411 */ 412 if (evp->me_cq == 0) { 413 evp->me_cq = 1; 414 LIST_REMOVE(evp, me_list); 415 LIST_INSERT_HEAD(&change_head, evp, me_list); 416 mevent_notify(); 417 } 418 } 419 420 mevent_qunlock(); 421 422 return (0); 423 } 424 425 int 426 mevent_enable(struct mevent *evp) 427 { 428 429 return (mevent_update(evp, true)); 430 } 431 432 int 433 mevent_disable(struct mevent *evp) 434 { 435 436 return (mevent_update(evp, false)); 437 } 438 439 static int 440 mevent_delete_event(struct mevent *evp, int closefd) 441 { 442 mevent_qlock(); 443 444 /* 445 * Place the entry onto the changed list if not already there, and 446 * mark as to be deleted. 447 */ 448 if (evp->me_cq == 0) { 449 evp->me_cq = 1; 450 LIST_REMOVE(evp, me_list); 451 LIST_INSERT_HEAD(&change_head, evp, me_list); 452 mevent_notify(); 453 } 454 evp->me_state = EV_DELETE; 455 456 if (closefd) 457 evp->me_closefd = 1; 458 459 mevent_qunlock(); 460 461 return (0); 462 } 463 464 int 465 mevent_delete(struct mevent *evp) 466 { 467 468 return (mevent_delete_event(evp, 0)); 469 } 470 471 int 472 mevent_delete_close(struct mevent *evp) 473 { 474 475 return (mevent_delete_event(evp, 1)); 476 } 477 478 static void 479 mevent_set_name(void) 480 { 481 482 pthread_set_name_np(mevent_tid, "mevent"); 483 } 484 485 void 486 mevent_dispatch(void) 487 { 488 struct kevent changelist[MEVENT_MAX]; 489 struct kevent eventlist[MEVENT_MAX]; 490 struct mevent *pipev; 491 int numev; 492 int ret; 493 #ifndef WITHOUT_CAPSICUM 494 cap_rights_t rights; 495 #endif 496 497 mevent_tid = pthread_self(); 498 mevent_set_name(); 499 500 pthread_once(&mevent_once, mevent_init); 501 502 /* 503 * Open the pipe that will be used for other threads to force 504 * the blocking kqueue call to exit by writing to it. Set the 505 * descriptor to non-blocking. 506 */ 507 ret = pipe(mevent_pipefd); 508 if (ret < 0) { 509 perror("pipe"); 510 exit(0); 511 } 512 513 #ifndef WITHOUT_CAPSICUM 514 cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE); 515 if (caph_rights_limit(mevent_pipefd[0], &rights) == -1) 516 errx(EX_OSERR, "Unable to apply rights for sandbox"); 517 if (caph_rights_limit(mevent_pipefd[1], &rights) == -1) 518 errx(EX_OSERR, "Unable to apply rights for sandbox"); 519 #endif 520 521 /* 522 * Add internal event handler for the pipe write fd 523 */ 524 pipev = mevent_add(mevent_pipefd[0], EVF_READ, mevent_pipe_read, NULL); 525 assert(pipev != NULL); 526 527 for (;;) { 528 /* 529 * Build changelist if required. 530 * XXX the changelist can be put into the blocking call 531 * to eliminate the extra syscall. Currently better for 532 * debug. 533 */ 534 numev = mevent_build(changelist); 535 if (numev) { 536 ret = kevent(mfd, changelist, numev, NULL, 0, NULL); 537 if (ret == -1) { 538 perror("Error return from kevent change"); 539 } 540 } 541 542 /* 543 * Block awaiting events 544 */ 545 ret = kevent(mfd, NULL, 0, eventlist, MEVENT_MAX, NULL); 546 if (ret == -1 && errno != EINTR) { 547 perror("Error return from kevent monitor"); 548 } 549 550 /* 551 * Handle reported events 552 */ 553 mevent_handle(eventlist, ret); 554 } 555 } 556