1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 /* 32 * Micro event library for FreeBSD, designed for a single i/o thread 33 * using kqueue, and having events be persistent by default. 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include <assert.h> 40 #ifndef WITHOUT_CAPSICUM 41 #include <capsicum_helpers.h> 42 #endif 43 #include <err.h> 44 #include <errno.h> 45 #include <stdbool.h> 46 #include <stdlib.h> 47 #include <stdio.h> 48 #include <string.h> 49 #include <sysexits.h> 50 #include <unistd.h> 51 52 #include <sys/types.h> 53 #ifndef WITHOUT_CAPSICUM 54 #include <sys/capsicum.h> 55 #endif 56 #include <sys/event.h> 57 #include <sys/time.h> 58 59 #include <pthread.h> 60 #include <pthread_np.h> 61 62 #include "mevent.h" 63 64 #define MEVENT_MAX 64 65 66 static pthread_t mevent_tid; 67 static pthread_once_t mevent_once = PTHREAD_ONCE_INIT; 68 static int mevent_timid = 43; 69 static int mevent_pipefd[2]; 70 static int mfd; 71 static pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER; 72 73 struct mevent { 74 void (*me_func)(int, enum ev_type, void *); 75 #define me_msecs me_fd 76 int me_fd; 77 int me_timid; 78 enum ev_type me_type; 79 void *me_param; 80 int me_cq; 81 int me_state; /* Desired kevent flags. */ 82 int me_closefd; 83 int me_fflags; 84 LIST_ENTRY(mevent) me_list; 85 }; 86 87 static LIST_HEAD(listhead, mevent) global_head, change_head; 88 89 static void 90 mevent_qlock(void) 91 { 92 pthread_mutex_lock(&mevent_lmutex); 93 } 94 95 static void 96 mevent_qunlock(void) 97 { 98 pthread_mutex_unlock(&mevent_lmutex); 99 } 100 101 static void 102 mevent_pipe_read(int fd, enum ev_type type, void *param) 103 { 104 char buf[MEVENT_MAX]; 105 int status; 106 107 /* 108 * Drain the pipe read side. The fd is non-blocking so this is 109 * safe to do. 110 */ 111 do { 112 status = read(fd, buf, sizeof(buf)); 113 } while (status == MEVENT_MAX); 114 } 115 116 static void 117 mevent_notify(void) 118 { 119 char c = '\0'; 120 121 /* 122 * If calling from outside the i/o thread, write a byte on the 123 * pipe to force the i/o thread to exit the blocking kevent call. 124 */ 125 if (mevent_pipefd[1] != 0 && pthread_self() != mevent_tid) { 126 write(mevent_pipefd[1], &c, 1); 127 } 128 } 129 130 static void 131 mevent_init(void) 132 { 133 #ifndef WITHOUT_CAPSICUM 134 cap_rights_t rights; 135 #endif 136 137 mfd = kqueue(); 138 assert(mfd > 0); 139 140 #ifndef WITHOUT_CAPSICUM 141 cap_rights_init(&rights, CAP_KQUEUE); 142 if (caph_rights_limit(mfd, &rights) == -1) 143 errx(EX_OSERR, "Unable to apply rights for sandbox"); 144 #endif 145 146 LIST_INIT(&change_head); 147 LIST_INIT(&global_head); 148 } 149 150 static int 151 mevent_kq_filter(struct mevent *mevp) 152 { 153 int retval; 154 155 retval = 0; 156 157 if (mevp->me_type == EVF_READ) 158 retval = EVFILT_READ; 159 160 if (mevp->me_type == EVF_WRITE) 161 retval = EVFILT_WRITE; 162 163 if (mevp->me_type == EVF_TIMER) 164 retval = EVFILT_TIMER; 165 166 if (mevp->me_type == EVF_SIGNAL) 167 retval = EVFILT_SIGNAL; 168 169 if (mevp->me_type == EVF_VNODE) 170 retval = EVFILT_VNODE; 171 172 return (retval); 173 } 174 175 static int 176 mevent_kq_flags(struct mevent *mevp) 177 { 178 return (mevp->me_state); 179 } 180 181 static int 182 mevent_kq_fflags(struct mevent *mevp) 183 { 184 int retval; 185 186 retval = 0; 187 188 switch (mevp->me_type) { 189 case EVF_VNODE: 190 if ((mevp->me_fflags & EVFF_ATTRIB) != 0) 191 retval |= NOTE_ATTRIB; 192 break; 193 case EVF_READ: 194 case EVF_WRITE: 195 case EVF_TIMER: 196 case EVF_SIGNAL: 197 break; 198 } 199 200 return (retval); 201 } 202 203 static void 204 mevent_populate(struct mevent *mevp, struct kevent *kev) 205 { 206 if (mevp->me_type == EVF_TIMER) { 207 kev->ident = mevp->me_timid; 208 kev->data = mevp->me_msecs; 209 } else { 210 kev->ident = mevp->me_fd; 211 kev->data = 0; 212 } 213 kev->filter = mevent_kq_filter(mevp); 214 kev->flags = mevent_kq_flags(mevp); 215 kev->fflags = mevent_kq_fflags(mevp); 216 kev->udata = mevp; 217 } 218 219 static int 220 mevent_build(struct kevent *kev) 221 { 222 struct mevent *mevp, *tmpp; 223 int i; 224 225 i = 0; 226 227 mevent_qlock(); 228 229 LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) { 230 if (mevp->me_closefd) { 231 /* 232 * A close of the file descriptor will remove the 233 * event 234 */ 235 close(mevp->me_fd); 236 } else { 237 assert((mevp->me_state & EV_ADD) == 0); 238 mevent_populate(mevp, &kev[i]); 239 i++; 240 } 241 242 mevp->me_cq = 0; 243 LIST_REMOVE(mevp, me_list); 244 245 if (mevp->me_state & EV_DELETE) { 246 free(mevp); 247 } else { 248 LIST_INSERT_HEAD(&global_head, mevp, me_list); 249 } 250 251 assert(i < MEVENT_MAX); 252 } 253 254 mevent_qunlock(); 255 256 return (i); 257 } 258 259 static void 260 mevent_handle(struct kevent *kev, int numev) 261 { 262 struct mevent *mevp; 263 int i; 264 265 for (i = 0; i < numev; i++) { 266 mevp = kev[i].udata; 267 268 /* XXX check for EV_ERROR ? */ 269 270 (*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param); 271 } 272 } 273 274 static struct mevent * 275 mevent_add_state(int tfd, enum ev_type type, 276 void (*func)(int, enum ev_type, void *), void *param, 277 int state, int fflags) 278 { 279 struct kevent kev; 280 struct mevent *lp, *mevp; 281 int ret; 282 283 if (tfd < 0 || func == NULL) { 284 return (NULL); 285 } 286 287 mevp = NULL; 288 289 pthread_once(&mevent_once, mevent_init); 290 291 mevent_qlock(); 292 293 /* 294 * Verify that the fd/type tuple is not present in any list 295 */ 296 LIST_FOREACH(lp, &global_head, me_list) { 297 if (type != EVF_TIMER && lp->me_fd == tfd && 298 lp->me_type == type) { 299 goto exit; 300 } 301 } 302 303 LIST_FOREACH(lp, &change_head, me_list) { 304 if (type != EVF_TIMER && lp->me_fd == tfd && 305 lp->me_type == type) { 306 goto exit; 307 } 308 } 309 310 /* 311 * Allocate an entry and populate it. 312 */ 313 mevp = calloc(1, sizeof(struct mevent)); 314 if (mevp == NULL) { 315 goto exit; 316 } 317 318 if (type == EVF_TIMER) { 319 mevp->me_msecs = tfd; 320 mevp->me_timid = mevent_timid++; 321 } else 322 mevp->me_fd = tfd; 323 mevp->me_type = type; 324 mevp->me_func = func; 325 mevp->me_param = param; 326 mevp->me_state = state; 327 mevp->me_fflags = fflags; 328 329 /* 330 * Try to add the event. If this fails, report the failure to 331 * the caller. 332 */ 333 mevent_populate(mevp, &kev); 334 ret = kevent(mfd, &kev, 1, NULL, 0, NULL); 335 if (ret == -1) { 336 free(mevp); 337 mevp = NULL; 338 goto exit; 339 } 340 341 mevp->me_state &= ~EV_ADD; 342 LIST_INSERT_HEAD(&global_head, mevp, me_list); 343 344 exit: 345 mevent_qunlock(); 346 347 return (mevp); 348 } 349 350 struct mevent * 351 mevent_add(int tfd, enum ev_type type, 352 void (*func)(int, enum ev_type, void *), void *param) 353 { 354 355 return (mevent_add_state(tfd, type, func, param, EV_ADD, 0)); 356 } 357 358 struct mevent * 359 mevent_add_flags(int tfd, enum ev_type type, int fflags, 360 void (*func)(int, enum ev_type, void *), void *param) 361 { 362 363 return (mevent_add_state(tfd, type, func, param, EV_ADD, fflags)); 364 } 365 366 struct mevent * 367 mevent_add_disabled(int tfd, enum ev_type type, 368 void (*func)(int, enum ev_type, void *), void *param) 369 { 370 371 return (mevent_add_state(tfd, type, func, param, EV_ADD | EV_DISABLE, 0)); 372 } 373 374 static int 375 mevent_update(struct mevent *evp, bool enable) 376 { 377 int newstate; 378 379 mevent_qlock(); 380 381 /* 382 * It's not possible to enable/disable a deleted event 383 */ 384 assert((evp->me_state & EV_DELETE) == 0); 385 386 newstate = evp->me_state; 387 if (enable) { 388 newstate |= EV_ENABLE; 389 newstate &= ~EV_DISABLE; 390 } else { 391 newstate |= EV_DISABLE; 392 newstate &= ~EV_ENABLE; 393 } 394 395 /* 396 * No update needed if state isn't changing 397 */ 398 if (evp->me_state != newstate) { 399 evp->me_state = newstate; 400 401 /* 402 * Place the entry onto the changed list if not 403 * already there. 404 */ 405 if (evp->me_cq == 0) { 406 evp->me_cq = 1; 407 LIST_REMOVE(evp, me_list); 408 LIST_INSERT_HEAD(&change_head, evp, me_list); 409 mevent_notify(); 410 } 411 } 412 413 mevent_qunlock(); 414 415 return (0); 416 } 417 418 int 419 mevent_enable(struct mevent *evp) 420 { 421 422 return (mevent_update(evp, true)); 423 } 424 425 int 426 mevent_disable(struct mevent *evp) 427 { 428 429 return (mevent_update(evp, false)); 430 } 431 432 static int 433 mevent_delete_event(struct mevent *evp, int closefd) 434 { 435 mevent_qlock(); 436 437 /* 438 * Place the entry onto the changed list if not already there, and 439 * mark as to be deleted. 440 */ 441 if (evp->me_cq == 0) { 442 evp->me_cq = 1; 443 LIST_REMOVE(evp, me_list); 444 LIST_INSERT_HEAD(&change_head, evp, me_list); 445 mevent_notify(); 446 } 447 evp->me_state = EV_DELETE; 448 449 if (closefd) 450 evp->me_closefd = 1; 451 452 mevent_qunlock(); 453 454 return (0); 455 } 456 457 int 458 mevent_delete(struct mevent *evp) 459 { 460 461 return (mevent_delete_event(evp, 0)); 462 } 463 464 int 465 mevent_delete_close(struct mevent *evp) 466 { 467 468 return (mevent_delete_event(evp, 1)); 469 } 470 471 static void 472 mevent_set_name(void) 473 { 474 475 pthread_set_name_np(mevent_tid, "mevent"); 476 } 477 478 void 479 mevent_dispatch(void) 480 { 481 struct kevent changelist[MEVENT_MAX]; 482 struct kevent eventlist[MEVENT_MAX]; 483 struct mevent *pipev; 484 int numev; 485 int ret; 486 #ifndef WITHOUT_CAPSICUM 487 cap_rights_t rights; 488 #endif 489 490 mevent_tid = pthread_self(); 491 mevent_set_name(); 492 493 pthread_once(&mevent_once, mevent_init); 494 495 /* 496 * Open the pipe that will be used for other threads to force 497 * the blocking kqueue call to exit by writing to it. Set the 498 * descriptor to non-blocking. 499 */ 500 ret = pipe(mevent_pipefd); 501 if (ret < 0) { 502 perror("pipe"); 503 exit(0); 504 } 505 506 #ifndef WITHOUT_CAPSICUM 507 cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE); 508 if (caph_rights_limit(mevent_pipefd[0], &rights) == -1) 509 errx(EX_OSERR, "Unable to apply rights for sandbox"); 510 if (caph_rights_limit(mevent_pipefd[1], &rights) == -1) 511 errx(EX_OSERR, "Unable to apply rights for sandbox"); 512 #endif 513 514 /* 515 * Add internal event handler for the pipe write fd 516 */ 517 pipev = mevent_add(mevent_pipefd[0], EVF_READ, mevent_pipe_read, NULL); 518 assert(pipev != NULL); 519 520 for (;;) { 521 /* 522 * Build changelist if required. 523 * XXX the changelist can be put into the blocking call 524 * to eliminate the extra syscall. Currently better for 525 * debug. 526 */ 527 numev = mevent_build(changelist); 528 if (numev) { 529 ret = kevent(mfd, changelist, numev, NULL, 0, NULL); 530 if (ret == -1) { 531 perror("Error return from kevent change"); 532 } 533 } 534 535 /* 536 * Block awaiting events 537 */ 538 ret = kevent(mfd, NULL, 0, eventlist, MEVENT_MAX, NULL); 539 if (ret == -1 && errno != EINTR) { 540 perror("Error return from kevent monitor"); 541 } 542 543 /* 544 * Handle reported events 545 */ 546 mevent_handle(eventlist, ret); 547 } 548 } 549