1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * Micro event library for FreeBSD, designed for a single i/o thread 31 * using kqueue, and having events be persistent by default. 32 */ 33 34 #include <sys/cdefs.h> 35 #include <assert.h> 36 #ifndef WITHOUT_CAPSICUM 37 #include <capsicum_helpers.h> 38 #endif 39 #include <err.h> 40 #include <errno.h> 41 #include <stdbool.h> 42 #include <stdlib.h> 43 #include <stdio.h> 44 #include <string.h> 45 #include <sysexits.h> 46 #include <unistd.h> 47 48 #include <sys/types.h> 49 #ifndef WITHOUT_CAPSICUM 50 #include <sys/capsicum.h> 51 #endif 52 #include <sys/event.h> 53 #include <sys/time.h> 54 55 #include <pthread.h> 56 #include <pthread_np.h> 57 58 #include "mevent.h" 59 60 #define MEVENT_MAX 64 61 62 static pthread_t mevent_tid; 63 static pthread_once_t mevent_once = PTHREAD_ONCE_INIT; 64 static int mevent_timid = 43; 65 static int mevent_pipefd[2]; 66 static int mfd; 67 static pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER; 68 69 struct mevent { 70 void (*me_func)(int, enum ev_type, void *); 71 #define me_msecs me_fd 72 int me_fd; 73 int me_timid; 74 enum ev_type me_type; 75 void *me_param; 76 int me_cq; 77 int me_state; /* Desired kevent flags. */ 78 int me_closefd; 79 int me_fflags; 80 LIST_ENTRY(mevent) me_list; 81 }; 82 83 static LIST_HEAD(listhead, mevent) global_head, change_head; 84 85 static void 86 mevent_qlock(void) 87 { 88 pthread_mutex_lock(&mevent_lmutex); 89 } 90 91 static void 92 mevent_qunlock(void) 93 { 94 pthread_mutex_unlock(&mevent_lmutex); 95 } 96 97 static void 98 mevent_pipe_read(int fd, enum ev_type type __unused, void *param __unused) 99 { 100 char buf[MEVENT_MAX]; 101 int status; 102 103 /* 104 * Drain the pipe read side. The fd is non-blocking so this is 105 * safe to do. 106 */ 107 do { 108 status = read(fd, buf, sizeof(buf)); 109 } while (status == MEVENT_MAX); 110 } 111 112 static void 113 mevent_notify(void) 114 { 115 char c = '\0'; 116 117 /* 118 * If calling from outside the i/o thread, write a byte on the 119 * pipe to force the i/o thread to exit the blocking kevent call. 120 */ 121 if (mevent_pipefd[1] != 0 && pthread_self() != mevent_tid) { 122 write(mevent_pipefd[1], &c, 1); 123 } 124 } 125 126 static void 127 mevent_init(void) 128 { 129 #ifndef WITHOUT_CAPSICUM 130 cap_rights_t rights; 131 #endif 132 133 mfd = kqueue(); 134 assert(mfd > 0); 135 136 #ifndef WITHOUT_CAPSICUM 137 cap_rights_init(&rights, CAP_KQUEUE); 138 if (caph_rights_limit(mfd, &rights) == -1) 139 errx(EX_OSERR, "Unable to apply rights for sandbox"); 140 #endif 141 142 LIST_INIT(&change_head); 143 LIST_INIT(&global_head); 144 } 145 146 static int 147 mevent_kq_filter(struct mevent *mevp) 148 { 149 int retval; 150 151 retval = 0; 152 153 if (mevp->me_type == EVF_READ) 154 retval = EVFILT_READ; 155 156 if (mevp->me_type == EVF_WRITE) 157 retval = EVFILT_WRITE; 158 159 if (mevp->me_type == EVF_TIMER) 160 retval = EVFILT_TIMER; 161 162 if (mevp->me_type == EVF_SIGNAL) 163 retval = EVFILT_SIGNAL; 164 165 if (mevp->me_type == EVF_VNODE) 166 retval = EVFILT_VNODE; 167 168 return (retval); 169 } 170 171 static int 172 mevent_kq_flags(struct mevent *mevp) 173 { 174 int retval; 175 176 retval = mevp->me_state; 177 178 if (mevp->me_type == EVF_VNODE) 179 retval |= EV_CLEAR; 180 181 return (retval); 182 } 183 184 static int 185 mevent_kq_fflags(struct mevent *mevp) 186 { 187 int retval; 188 189 retval = 0; 190 191 switch (mevp->me_type) { 192 case EVF_VNODE: 193 if ((mevp->me_fflags & EVFF_ATTRIB) != 0) 194 retval |= NOTE_ATTRIB; 195 break; 196 case EVF_READ: 197 case EVF_WRITE: 198 case EVF_TIMER: 199 case EVF_SIGNAL: 200 break; 201 } 202 203 return (retval); 204 } 205 206 static void 207 mevent_populate(struct mevent *mevp, struct kevent *kev) 208 { 209 if (mevp->me_type == EVF_TIMER) { 210 kev->ident = mevp->me_timid; 211 kev->data = mevp->me_msecs; 212 } else { 213 kev->ident = mevp->me_fd; 214 kev->data = 0; 215 } 216 kev->filter = mevent_kq_filter(mevp); 217 kev->flags = mevent_kq_flags(mevp); 218 kev->fflags = mevent_kq_fflags(mevp); 219 kev->udata = mevp; 220 } 221 222 static int 223 mevent_build(struct kevent *kev) 224 { 225 struct mevent *mevp, *tmpp; 226 int i; 227 228 i = 0; 229 230 mevent_qlock(); 231 232 LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) { 233 if (mevp->me_closefd) { 234 /* 235 * A close of the file descriptor will remove the 236 * event 237 */ 238 close(mevp->me_fd); 239 } else { 240 assert((mevp->me_state & EV_ADD) == 0); 241 mevent_populate(mevp, &kev[i]); 242 i++; 243 } 244 245 mevp->me_cq = 0; 246 LIST_REMOVE(mevp, me_list); 247 248 if (mevp->me_state & EV_DELETE) { 249 free(mevp); 250 } else { 251 LIST_INSERT_HEAD(&global_head, mevp, me_list); 252 } 253 254 assert(i < MEVENT_MAX); 255 } 256 257 mevent_qunlock(); 258 259 return (i); 260 } 261 262 static void 263 mevent_handle(struct kevent *kev, int numev) 264 { 265 struct mevent *mevp; 266 int i; 267 268 for (i = 0; i < numev; i++) { 269 mevp = kev[i].udata; 270 271 /* XXX check for EV_ERROR ? */ 272 273 (*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param); 274 } 275 } 276 277 static struct mevent * 278 mevent_add_state(int tfd, enum ev_type type, 279 void (*func)(int, enum ev_type, void *), void *param, 280 int state, int fflags) 281 { 282 struct kevent kev; 283 struct mevent *lp, *mevp; 284 int ret; 285 286 if (tfd < 0 || func == NULL) { 287 return (NULL); 288 } 289 290 mevp = NULL; 291 292 pthread_once(&mevent_once, mevent_init); 293 294 mevent_qlock(); 295 296 /* 297 * Verify that the fd/type tuple is not present in any list 298 */ 299 LIST_FOREACH(lp, &global_head, me_list) { 300 if (type != EVF_TIMER && lp->me_fd == tfd && 301 lp->me_type == type) { 302 goto exit; 303 } 304 } 305 306 LIST_FOREACH(lp, &change_head, me_list) { 307 if (type != EVF_TIMER && lp->me_fd == tfd && 308 lp->me_type == type) { 309 goto exit; 310 } 311 } 312 313 /* 314 * Allocate an entry and populate it. 315 */ 316 mevp = calloc(1, sizeof(struct mevent)); 317 if (mevp == NULL) { 318 goto exit; 319 } 320 321 if (type == EVF_TIMER) { 322 mevp->me_msecs = tfd; 323 mevp->me_timid = mevent_timid++; 324 } else 325 mevp->me_fd = tfd; 326 mevp->me_type = type; 327 mevp->me_func = func; 328 mevp->me_param = param; 329 mevp->me_state = state; 330 mevp->me_fflags = fflags; 331 332 /* 333 * Try to add the event. If this fails, report the failure to 334 * the caller. 335 */ 336 mevent_populate(mevp, &kev); 337 ret = kevent(mfd, &kev, 1, NULL, 0, NULL); 338 if (ret == -1) { 339 free(mevp); 340 mevp = NULL; 341 goto exit; 342 } 343 344 mevp->me_state &= ~EV_ADD; 345 LIST_INSERT_HEAD(&global_head, mevp, me_list); 346 347 exit: 348 mevent_qunlock(); 349 350 return (mevp); 351 } 352 353 struct mevent * 354 mevent_add(int tfd, enum ev_type type, 355 void (*func)(int, enum ev_type, void *), void *param) 356 { 357 358 return (mevent_add_state(tfd, type, func, param, EV_ADD, 0)); 359 } 360 361 struct mevent * 362 mevent_add_flags(int tfd, enum ev_type type, int fflags, 363 void (*func)(int, enum ev_type, void *), void *param) 364 { 365 366 return (mevent_add_state(tfd, type, func, param, EV_ADD, fflags)); 367 } 368 369 struct mevent * 370 mevent_add_disabled(int tfd, enum ev_type type, 371 void (*func)(int, enum ev_type, void *), void *param) 372 { 373 374 return (mevent_add_state(tfd, type, func, param, EV_ADD | EV_DISABLE, 0)); 375 } 376 377 static int 378 mevent_update(struct mevent *evp, bool enable) 379 { 380 int newstate; 381 382 mevent_qlock(); 383 384 /* 385 * It's not possible to enable/disable a deleted event 386 */ 387 assert((evp->me_state & EV_DELETE) == 0); 388 389 newstate = evp->me_state; 390 if (enable) { 391 newstate |= EV_ENABLE; 392 newstate &= ~EV_DISABLE; 393 } else { 394 newstate |= EV_DISABLE; 395 newstate &= ~EV_ENABLE; 396 } 397 398 /* 399 * No update needed if state isn't changing 400 */ 401 if (evp->me_state != newstate) { 402 evp->me_state = newstate; 403 404 /* 405 * Place the entry onto the changed list if not 406 * already there. 407 */ 408 if (evp->me_cq == 0) { 409 evp->me_cq = 1; 410 LIST_REMOVE(evp, me_list); 411 LIST_INSERT_HEAD(&change_head, evp, me_list); 412 mevent_notify(); 413 } 414 } 415 416 mevent_qunlock(); 417 418 return (0); 419 } 420 421 int 422 mevent_enable(struct mevent *evp) 423 { 424 425 return (mevent_update(evp, true)); 426 } 427 428 int 429 mevent_disable(struct mevent *evp) 430 { 431 432 return (mevent_update(evp, false)); 433 } 434 435 static int 436 mevent_delete_event(struct mevent *evp, int closefd) 437 { 438 mevent_qlock(); 439 440 /* 441 * Place the entry onto the changed list if not already there, and 442 * mark as to be deleted. 443 */ 444 if (evp->me_cq == 0) { 445 evp->me_cq = 1; 446 LIST_REMOVE(evp, me_list); 447 LIST_INSERT_HEAD(&change_head, evp, me_list); 448 mevent_notify(); 449 } 450 evp->me_state = EV_DELETE; 451 452 if (closefd) 453 evp->me_closefd = 1; 454 455 mevent_qunlock(); 456 457 return (0); 458 } 459 460 int 461 mevent_delete(struct mevent *evp) 462 { 463 464 return (mevent_delete_event(evp, 0)); 465 } 466 467 int 468 mevent_delete_close(struct mevent *evp) 469 { 470 471 return (mevent_delete_event(evp, 1)); 472 } 473 474 static void 475 mevent_set_name(void) 476 { 477 478 pthread_set_name_np(mevent_tid, "mevent"); 479 } 480 481 void 482 mevent_dispatch(void) 483 { 484 struct kevent changelist[MEVENT_MAX]; 485 struct kevent eventlist[MEVENT_MAX]; 486 struct mevent *pipev; 487 int numev; 488 int ret; 489 #ifndef WITHOUT_CAPSICUM 490 cap_rights_t rights; 491 #endif 492 493 mevent_tid = pthread_self(); 494 mevent_set_name(); 495 496 pthread_once(&mevent_once, mevent_init); 497 498 /* 499 * Open the pipe that will be used for other threads to force 500 * the blocking kqueue call to exit by writing to it. Set the 501 * descriptor to non-blocking. 502 */ 503 ret = pipe(mevent_pipefd); 504 if (ret < 0) { 505 perror("pipe"); 506 exit(0); 507 } 508 509 #ifndef WITHOUT_CAPSICUM 510 cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE); 511 if (caph_rights_limit(mevent_pipefd[0], &rights) == -1) 512 errx(EX_OSERR, "Unable to apply rights for sandbox"); 513 if (caph_rights_limit(mevent_pipefd[1], &rights) == -1) 514 errx(EX_OSERR, "Unable to apply rights for sandbox"); 515 #endif 516 517 /* 518 * Add internal event handler for the pipe write fd 519 */ 520 pipev = mevent_add(mevent_pipefd[0], EVF_READ, mevent_pipe_read, NULL); 521 assert(pipev != NULL); 522 523 for (;;) { 524 /* 525 * Build changelist if required. 526 * XXX the changelist can be put into the blocking call 527 * to eliminate the extra syscall. Currently better for 528 * debug. 529 */ 530 numev = mevent_build(changelist); 531 if (numev) { 532 ret = kevent(mfd, changelist, numev, NULL, 0, NULL); 533 if (ret == -1) { 534 perror("Error return from kevent change"); 535 } 536 } 537 538 /* 539 * Block awaiting events 540 */ 541 ret = kevent(mfd, NULL, 0, eventlist, MEVENT_MAX, NULL); 542 if (ret == -1 && errno != EINTR) { 543 perror("Error return from kevent monitor"); 544 } 545 546 /* 547 * Handle reported events 548 */ 549 mevent_handle(eventlist, ret); 550 } 551 } 552