1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 /* 32 * Micro event library for FreeBSD, designed for a single i/o thread 33 * using kqueue, and having events be persistent by default. 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include <assert.h> 40 #ifndef WITHOUT_CAPSICUM 41 #include <capsicum_helpers.h> 42 #endif 43 #include <err.h> 44 #include <errno.h> 45 #include <stdbool.h> 46 #include <stdlib.h> 47 #include <stdio.h> 48 #include <string.h> 49 #include <sysexits.h> 50 #include <unistd.h> 51 52 #include <sys/types.h> 53 #ifndef WITHOUT_CAPSICUM 54 #include <sys/capsicum.h> 55 #endif 56 #include <sys/event.h> 57 #include <sys/time.h> 58 59 #include <pthread.h> 60 #include <pthread_np.h> 61 62 #include "mevent.h" 63 64 #define MEVENT_MAX 64 65 66 static pthread_t mevent_tid; 67 static pthread_once_t mevent_once = PTHREAD_ONCE_INIT; 68 static int mevent_timid = 43; 69 static int mevent_pipefd[2]; 70 static int mfd; 71 static pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER; 72 73 struct mevent { 74 void (*me_func)(int, enum ev_type, void *); 75 #define me_msecs me_fd 76 int me_fd; 77 int me_timid; 78 enum ev_type me_type; 79 void *me_param; 80 int me_cq; 81 int me_state; /* Desired kevent flags. */ 82 int me_closefd; 83 int me_fflags; 84 LIST_ENTRY(mevent) me_list; 85 }; 86 87 static LIST_HEAD(listhead, mevent) global_head, change_head; 88 89 static void 90 mevent_qlock(void) 91 { 92 pthread_mutex_lock(&mevent_lmutex); 93 } 94 95 static void 96 mevent_qunlock(void) 97 { 98 pthread_mutex_unlock(&mevent_lmutex); 99 } 100 101 static void 102 mevent_pipe_read(int fd, enum ev_type type, void *param) 103 { 104 char buf[MEVENT_MAX]; 105 int status; 106 107 /* 108 * Drain the pipe read side. The fd is non-blocking so this is 109 * safe to do. 110 */ 111 do { 112 status = read(fd, buf, sizeof(buf)); 113 } while (status == MEVENT_MAX); 114 } 115 116 static void 117 mevent_notify(void) 118 { 119 char c = '\0'; 120 121 /* 122 * If calling from outside the i/o thread, write a byte on the 123 * pipe to force the i/o thread to exit the blocking kevent call. 124 */ 125 if (mevent_pipefd[1] != 0 && pthread_self() != mevent_tid) { 126 write(mevent_pipefd[1], &c, 1); 127 } 128 } 129 130 static void 131 mevent_init(void) 132 { 133 #ifndef WITHOUT_CAPSICUM 134 cap_rights_t rights; 135 #endif 136 137 mfd = kqueue(); 138 assert(mfd > 0); 139 140 #ifndef WITHOUT_CAPSICUM 141 cap_rights_init(&rights, CAP_KQUEUE); 142 if (caph_rights_limit(mfd, &rights) == -1) 143 errx(EX_OSERR, "Unable to apply rights for sandbox"); 144 #endif 145 146 LIST_INIT(&change_head); 147 LIST_INIT(&global_head); 148 } 149 150 static int 151 mevent_kq_filter(struct mevent *mevp) 152 { 153 int retval; 154 155 retval = 0; 156 157 if (mevp->me_type == EVF_READ) 158 retval = EVFILT_READ; 159 160 if (mevp->me_type == EVF_WRITE) 161 retval = EVFILT_WRITE; 162 163 if (mevp->me_type == EVF_TIMER) 164 retval = EVFILT_TIMER; 165 166 if (mevp->me_type == EVF_SIGNAL) 167 retval = EVFILT_SIGNAL; 168 169 if (mevp->me_type == EVF_VNODE) 170 retval = EVFILT_VNODE; 171 172 return (retval); 173 } 174 175 static int 176 mevent_kq_flags(struct mevent *mevp) 177 { 178 return (mevp->me_state); 179 } 180 181 static int 182 mevent_kq_fflags(struct mevent *mevp) 183 { 184 int retval; 185 186 retval = 0; 187 188 switch (mevp->me_type) { 189 case EVF_VNODE: 190 if ((mevp->me_fflags & EVFF_ATTRIB) != 0) 191 retval |= NOTE_ATTRIB; 192 break; 193 } 194 195 return (retval); 196 } 197 198 static void 199 mevent_populate(struct mevent *mevp, struct kevent *kev) 200 { 201 if (mevp->me_type == EVF_TIMER) { 202 kev->ident = mevp->me_timid; 203 kev->data = mevp->me_msecs; 204 } else { 205 kev->ident = mevp->me_fd; 206 kev->data = 0; 207 } 208 kev->filter = mevent_kq_filter(mevp); 209 kev->flags = mevent_kq_flags(mevp); 210 kev->fflags = mevent_kq_fflags(mevp); 211 kev->udata = mevp; 212 } 213 214 static int 215 mevent_build(struct kevent *kev) 216 { 217 struct mevent *mevp, *tmpp; 218 int i; 219 220 i = 0; 221 222 mevent_qlock(); 223 224 LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) { 225 if (mevp->me_closefd) { 226 /* 227 * A close of the file descriptor will remove the 228 * event 229 */ 230 close(mevp->me_fd); 231 } else { 232 assert((mevp->me_state & EV_ADD) == 0); 233 mevent_populate(mevp, &kev[i]); 234 i++; 235 } 236 237 mevp->me_cq = 0; 238 LIST_REMOVE(mevp, me_list); 239 240 if (mevp->me_state & EV_DELETE) { 241 free(mevp); 242 } else { 243 LIST_INSERT_HEAD(&global_head, mevp, me_list); 244 } 245 246 assert(i < MEVENT_MAX); 247 } 248 249 mevent_qunlock(); 250 251 return (i); 252 } 253 254 static void 255 mevent_handle(struct kevent *kev, int numev) 256 { 257 struct mevent *mevp; 258 int i; 259 260 for (i = 0; i < numev; i++) { 261 mevp = kev[i].udata; 262 263 /* XXX check for EV_ERROR ? */ 264 265 (*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param); 266 } 267 } 268 269 static struct mevent * 270 mevent_add_state(int tfd, enum ev_type type, 271 void (*func)(int, enum ev_type, void *), void *param, 272 int state, int fflags) 273 { 274 struct kevent kev; 275 struct mevent *lp, *mevp; 276 int ret; 277 278 if (tfd < 0 || func == NULL) { 279 return (NULL); 280 } 281 282 mevp = NULL; 283 284 pthread_once(&mevent_once, mevent_init); 285 286 mevent_qlock(); 287 288 /* 289 * Verify that the fd/type tuple is not present in any list 290 */ 291 LIST_FOREACH(lp, &global_head, me_list) { 292 if (type != EVF_TIMER && lp->me_fd == tfd && 293 lp->me_type == type) { 294 goto exit; 295 } 296 } 297 298 LIST_FOREACH(lp, &change_head, me_list) { 299 if (type != EVF_TIMER && lp->me_fd == tfd && 300 lp->me_type == type) { 301 goto exit; 302 } 303 } 304 305 /* 306 * Allocate an entry and populate it. 307 */ 308 mevp = calloc(1, sizeof(struct mevent)); 309 if (mevp == NULL) { 310 goto exit; 311 } 312 313 if (type == EVF_TIMER) { 314 mevp->me_msecs = tfd; 315 mevp->me_timid = mevent_timid++; 316 } else 317 mevp->me_fd = tfd; 318 mevp->me_type = type; 319 mevp->me_func = func; 320 mevp->me_param = param; 321 mevp->me_state = state; 322 mevp->me_fflags = fflags; 323 324 /* 325 * Try to add the event. If this fails, report the failure to 326 * the caller. 327 */ 328 mevent_populate(mevp, &kev); 329 ret = kevent(mfd, &kev, 1, NULL, 0, NULL); 330 if (ret == -1) { 331 free(mevp); 332 mevp = NULL; 333 goto exit; 334 } 335 336 mevp->me_state &= ~EV_ADD; 337 LIST_INSERT_HEAD(&global_head, mevp, me_list); 338 339 exit: 340 mevent_qunlock(); 341 342 return (mevp); 343 } 344 345 struct mevent * 346 mevent_add(int tfd, enum ev_type type, 347 void (*func)(int, enum ev_type, void *), void *param) 348 { 349 350 return (mevent_add_state(tfd, type, func, param, EV_ADD, 0)); 351 } 352 353 struct mevent * 354 mevent_add_flags(int tfd, enum ev_type type, int fflags, 355 void (*func)(int, enum ev_type, void *), void *param) 356 { 357 358 return (mevent_add_state(tfd, type, func, param, EV_ADD, fflags)); 359 } 360 361 struct mevent * 362 mevent_add_disabled(int tfd, enum ev_type type, 363 void (*func)(int, enum ev_type, void *), void *param) 364 { 365 366 return (mevent_add_state(tfd, type, func, param, EV_ADD | EV_DISABLE, 0)); 367 } 368 369 static int 370 mevent_update(struct mevent *evp, bool enable) 371 { 372 int newstate; 373 374 mevent_qlock(); 375 376 /* 377 * It's not possible to enable/disable a deleted event 378 */ 379 assert((evp->me_state & EV_DELETE) == 0); 380 381 newstate = evp->me_state; 382 if (enable) { 383 newstate |= EV_ENABLE; 384 newstate &= ~EV_DISABLE; 385 } else { 386 newstate |= EV_DISABLE; 387 newstate &= ~EV_ENABLE; 388 } 389 390 /* 391 * No update needed if state isn't changing 392 */ 393 if (evp->me_state != newstate) { 394 evp->me_state = newstate; 395 396 /* 397 * Place the entry onto the changed list if not 398 * already there. 399 */ 400 if (evp->me_cq == 0) { 401 evp->me_cq = 1; 402 LIST_REMOVE(evp, me_list); 403 LIST_INSERT_HEAD(&change_head, evp, me_list); 404 mevent_notify(); 405 } 406 } 407 408 mevent_qunlock(); 409 410 return (0); 411 } 412 413 int 414 mevent_enable(struct mevent *evp) 415 { 416 417 return (mevent_update(evp, true)); 418 } 419 420 int 421 mevent_disable(struct mevent *evp) 422 { 423 424 return (mevent_update(evp, false)); 425 } 426 427 static int 428 mevent_delete_event(struct mevent *evp, int closefd) 429 { 430 mevent_qlock(); 431 432 /* 433 * Place the entry onto the changed list if not already there, and 434 * mark as to be deleted. 435 */ 436 if (evp->me_cq == 0) { 437 evp->me_cq = 1; 438 LIST_REMOVE(evp, me_list); 439 LIST_INSERT_HEAD(&change_head, evp, me_list); 440 mevent_notify(); 441 } 442 evp->me_state = EV_DELETE; 443 444 if (closefd) 445 evp->me_closefd = 1; 446 447 mevent_qunlock(); 448 449 return (0); 450 } 451 452 int 453 mevent_delete(struct mevent *evp) 454 { 455 456 return (mevent_delete_event(evp, 0)); 457 } 458 459 int 460 mevent_delete_close(struct mevent *evp) 461 { 462 463 return (mevent_delete_event(evp, 1)); 464 } 465 466 static void 467 mevent_set_name(void) 468 { 469 470 pthread_set_name_np(mevent_tid, "mevent"); 471 } 472 473 void 474 mevent_dispatch(void) 475 { 476 struct kevent changelist[MEVENT_MAX]; 477 struct kevent eventlist[MEVENT_MAX]; 478 struct mevent *pipev; 479 int numev; 480 int ret; 481 #ifndef WITHOUT_CAPSICUM 482 cap_rights_t rights; 483 #endif 484 485 mevent_tid = pthread_self(); 486 mevent_set_name(); 487 488 pthread_once(&mevent_once, mevent_init); 489 490 /* 491 * Open the pipe that will be used for other threads to force 492 * the blocking kqueue call to exit by writing to it. Set the 493 * descriptor to non-blocking. 494 */ 495 ret = pipe(mevent_pipefd); 496 if (ret < 0) { 497 perror("pipe"); 498 exit(0); 499 } 500 501 #ifndef WITHOUT_CAPSICUM 502 cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE); 503 if (caph_rights_limit(mevent_pipefd[0], &rights) == -1) 504 errx(EX_OSERR, "Unable to apply rights for sandbox"); 505 if (caph_rights_limit(mevent_pipefd[1], &rights) == -1) 506 errx(EX_OSERR, "Unable to apply rights for sandbox"); 507 #endif 508 509 /* 510 * Add internal event handler for the pipe write fd 511 */ 512 pipev = mevent_add(mevent_pipefd[0], EVF_READ, mevent_pipe_read, NULL); 513 assert(pipev != NULL); 514 515 for (;;) { 516 /* 517 * Build changelist if required. 518 * XXX the changelist can be put into the blocking call 519 * to eliminate the extra syscall. Currently better for 520 * debug. 521 */ 522 numev = mevent_build(changelist); 523 if (numev) { 524 ret = kevent(mfd, changelist, numev, NULL, 0, NULL); 525 if (ret == -1) { 526 perror("Error return from kevent change"); 527 } 528 } 529 530 /* 531 * Block awaiting events 532 */ 533 ret = kevent(mfd, NULL, 0, eventlist, MEVENT_MAX, NULL); 534 if (ret == -1 && errno != EINTR) { 535 perror("Error return from kevent monitor"); 536 } 537 538 /* 539 * Handle reported events 540 */ 541 mevent_handle(eventlist, ret); 542 } 543 } 544