1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 /* 32 * Micro event library for FreeBSD, designed for a single i/o thread 33 * using kqueue, and having events be persistent by default. 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include <assert.h> 40 #ifndef WITHOUT_CAPSICUM 41 #include <capsicum_helpers.h> 42 #endif 43 #include <err.h> 44 #include <errno.h> 45 #include <stdlib.h> 46 #include <stdio.h> 47 #include <string.h> 48 #include <sysexits.h> 49 #include <unistd.h> 50 51 #include <sys/types.h> 52 #ifndef WITHOUT_CAPSICUM 53 #include <sys/capsicum.h> 54 #endif 55 #include <sys/event.h> 56 #include <sys/time.h> 57 58 #include <pthread.h> 59 #include <pthread_np.h> 60 61 #include "mevent.h" 62 63 #define MEVENT_MAX 64 64 65 #define MEV_ADD 1 66 #define MEV_ENABLE 2 67 #define MEV_DISABLE 3 68 #define MEV_DEL_PENDING 4 69 #define MEV_ADD_DISABLED 5 70 71 extern char *vmname; 72 73 static pthread_t mevent_tid; 74 static int mevent_timid = 43; 75 static int mevent_pipefd[2]; 76 static pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER; 77 78 struct mevent { 79 void (*me_func)(int, enum ev_type, void *); 80 #define me_msecs me_fd 81 int me_fd; 82 int me_timid; 83 enum ev_type me_type; 84 void *me_param; 85 int me_cq; 86 int me_state; 87 int me_closefd; 88 LIST_ENTRY(mevent) me_list; 89 }; 90 91 static LIST_HEAD(listhead, mevent) global_head, change_head; 92 93 static void 94 mevent_qlock(void) 95 { 96 pthread_mutex_lock(&mevent_lmutex); 97 } 98 99 static void 100 mevent_qunlock(void) 101 { 102 pthread_mutex_unlock(&mevent_lmutex); 103 } 104 105 static void 106 mevent_pipe_read(int fd, enum ev_type type, void *param) 107 { 108 char buf[MEVENT_MAX]; 109 int status; 110 111 /* 112 * Drain the pipe read side. The fd is non-blocking so this is 113 * safe to do. 114 */ 115 do { 116 status = read(fd, buf, sizeof(buf)); 117 } while (status == MEVENT_MAX); 118 } 119 120 static void 121 mevent_notify(void) 122 { 123 char c = '\0'; 124 125 /* 126 * If calling from outside the i/o thread, write a byte on the 127 * pipe to force the i/o thread to exit the blocking kevent call. 128 */ 129 if (mevent_pipefd[1] != 0 && pthread_self() != mevent_tid) { 130 write(mevent_pipefd[1], &c, 1); 131 } 132 } 133 134 static int 135 mevent_kq_filter(struct mevent *mevp) 136 { 137 int retval; 138 139 retval = 0; 140 141 if (mevp->me_type == EVF_READ) 142 retval = EVFILT_READ; 143 144 if (mevp->me_type == EVF_WRITE) 145 retval = EVFILT_WRITE; 146 147 if (mevp->me_type == EVF_TIMER) 148 retval = EVFILT_TIMER; 149 150 if (mevp->me_type == EVF_SIGNAL) 151 retval = EVFILT_SIGNAL; 152 153 return (retval); 154 } 155 156 static int 157 mevent_kq_flags(struct mevent *mevp) 158 { 159 int ret; 160 161 switch (mevp->me_state) { 162 case MEV_ADD: 163 ret = EV_ADD; /* implicitly enabled */ 164 break; 165 case MEV_ADD_DISABLED: 166 ret = EV_ADD | EV_DISABLE; 167 break; 168 case MEV_ENABLE: 169 ret = EV_ENABLE; 170 break; 171 case MEV_DISABLE: 172 ret = EV_DISABLE; 173 break; 174 case MEV_DEL_PENDING: 175 ret = EV_DELETE; 176 break; 177 default: 178 assert(0); 179 break; 180 } 181 182 return (ret); 183 } 184 185 static int 186 mevent_kq_fflags(struct mevent *mevp) 187 { 188 /* XXX nothing yet, perhaps EV_EOF for reads ? */ 189 return (0); 190 } 191 192 static int 193 mevent_build(int mfd, struct kevent *kev) 194 { 195 struct mevent *mevp, *tmpp; 196 int i; 197 198 i = 0; 199 200 mevent_qlock(); 201 202 LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) { 203 if (mevp->me_closefd) { 204 /* 205 * A close of the file descriptor will remove the 206 * event 207 */ 208 close(mevp->me_fd); 209 } else { 210 if (mevp->me_type == EVF_TIMER) { 211 kev[i].ident = mevp->me_timid; 212 kev[i].data = mevp->me_msecs; 213 } else { 214 kev[i].ident = mevp->me_fd; 215 kev[i].data = 0; 216 } 217 kev[i].filter = mevent_kq_filter(mevp); 218 kev[i].flags = mevent_kq_flags(mevp); 219 kev[i].fflags = mevent_kq_fflags(mevp); 220 kev[i].udata = mevp; 221 i++; 222 } 223 224 mevp->me_cq = 0; 225 LIST_REMOVE(mevp, me_list); 226 227 if (mevp->me_state == MEV_DEL_PENDING) { 228 free(mevp); 229 } else { 230 LIST_INSERT_HEAD(&global_head, mevp, me_list); 231 } 232 233 assert(i < MEVENT_MAX); 234 } 235 236 mevent_qunlock(); 237 238 return (i); 239 } 240 241 static void 242 mevent_handle(struct kevent *kev, int numev) 243 { 244 struct mevent *mevp; 245 int i; 246 247 for (i = 0; i < numev; i++) { 248 mevp = kev[i].udata; 249 250 /* XXX check for EV_ERROR ? */ 251 252 (*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param); 253 } 254 } 255 256 static struct mevent * 257 mevent_add_state(int tfd, enum ev_type type, 258 void (*func)(int, enum ev_type, void *), void *param, 259 int state) 260 { 261 struct mevent *lp, *mevp; 262 263 if (tfd < 0 || func == NULL) { 264 return (NULL); 265 } 266 267 mevp = NULL; 268 269 mevent_qlock(); 270 271 /* 272 * Verify that the fd/type tuple is not present in any list 273 */ 274 LIST_FOREACH(lp, &global_head, me_list) { 275 if (type != EVF_TIMER && lp->me_fd == tfd && 276 lp->me_type == type) { 277 goto exit; 278 } 279 } 280 281 LIST_FOREACH(lp, &change_head, me_list) { 282 if (type != EVF_TIMER && lp->me_fd == tfd && 283 lp->me_type == type) { 284 goto exit; 285 } 286 } 287 288 /* 289 * Allocate an entry, populate it, and add it to the change list. 290 */ 291 mevp = calloc(1, sizeof(struct mevent)); 292 if (mevp == NULL) { 293 goto exit; 294 } 295 296 if (type == EVF_TIMER) { 297 mevp->me_msecs = tfd; 298 mevp->me_timid = mevent_timid++; 299 } else 300 mevp->me_fd = tfd; 301 mevp->me_type = type; 302 mevp->me_func = func; 303 mevp->me_param = param; 304 305 LIST_INSERT_HEAD(&change_head, mevp, me_list); 306 mevp->me_cq = 1; 307 mevp->me_state = state; 308 mevent_notify(); 309 310 exit: 311 mevent_qunlock(); 312 313 return (mevp); 314 } 315 316 struct mevent * 317 mevent_add(int tfd, enum ev_type type, 318 void (*func)(int, enum ev_type, void *), void *param) 319 { 320 321 return mevent_add_state(tfd, type, func, param, MEV_ADD); 322 } 323 324 struct mevent * 325 mevent_add_disabled(int tfd, enum ev_type type, 326 void (*func)(int, enum ev_type, void *), void *param) 327 { 328 329 return mevent_add_state(tfd, type, func, param, MEV_ADD_DISABLED); 330 } 331 332 static int 333 mevent_update(struct mevent *evp, int newstate) 334 { 335 /* 336 * It's not possible to enable/disable a deleted event 337 */ 338 if (evp->me_state == MEV_DEL_PENDING) 339 return (EINVAL); 340 341 /* 342 * No update needed if state isn't changing 343 */ 344 if (evp->me_state == newstate) 345 return (0); 346 347 mevent_qlock(); 348 349 evp->me_state = newstate; 350 351 /* 352 * Place the entry onto the changed list if not already there. 353 */ 354 if (evp->me_cq == 0) { 355 evp->me_cq = 1; 356 LIST_REMOVE(evp, me_list); 357 LIST_INSERT_HEAD(&change_head, evp, me_list); 358 mevent_notify(); 359 } 360 361 mevent_qunlock(); 362 363 return (0); 364 } 365 366 int 367 mevent_enable(struct mevent *evp) 368 { 369 370 return (mevent_update(evp, MEV_ENABLE)); 371 } 372 373 int 374 mevent_disable(struct mevent *evp) 375 { 376 377 return (mevent_update(evp, MEV_DISABLE)); 378 } 379 380 static int 381 mevent_delete_event(struct mevent *evp, int closefd) 382 { 383 mevent_qlock(); 384 385 /* 386 * Place the entry onto the changed list if not already there, and 387 * mark as to be deleted. 388 */ 389 if (evp->me_cq == 0) { 390 evp->me_cq = 1; 391 LIST_REMOVE(evp, me_list); 392 LIST_INSERT_HEAD(&change_head, evp, me_list); 393 mevent_notify(); 394 } 395 evp->me_state = MEV_DEL_PENDING; 396 397 if (closefd) 398 evp->me_closefd = 1; 399 400 mevent_qunlock(); 401 402 return (0); 403 } 404 405 int 406 mevent_delete(struct mevent *evp) 407 { 408 409 return (mevent_delete_event(evp, 0)); 410 } 411 412 int 413 mevent_delete_close(struct mevent *evp) 414 { 415 416 return (mevent_delete_event(evp, 1)); 417 } 418 419 static void 420 mevent_set_name(void) 421 { 422 423 pthread_set_name_np(mevent_tid, "mevent"); 424 } 425 426 void 427 mevent_dispatch(void) 428 { 429 struct kevent changelist[MEVENT_MAX]; 430 struct kevent eventlist[MEVENT_MAX]; 431 struct mevent *pipev; 432 int mfd; 433 int numev; 434 int ret; 435 #ifndef WITHOUT_CAPSICUM 436 cap_rights_t rights; 437 #endif 438 439 mevent_tid = pthread_self(); 440 mevent_set_name(); 441 442 mfd = kqueue(); 443 assert(mfd > 0); 444 445 #ifndef WITHOUT_CAPSICUM 446 cap_rights_init(&rights, CAP_KQUEUE); 447 if (caph_rights_limit(mfd, &rights) == -1) 448 errx(EX_OSERR, "Unable to apply rights for sandbox"); 449 #endif 450 451 /* 452 * Open the pipe that will be used for other threads to force 453 * the blocking kqueue call to exit by writing to it. Set the 454 * descriptor to non-blocking. 455 */ 456 ret = pipe(mevent_pipefd); 457 if (ret < 0) { 458 perror("pipe"); 459 exit(0); 460 } 461 462 #ifndef WITHOUT_CAPSICUM 463 cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE); 464 if (caph_rights_limit(mevent_pipefd[0], &rights) == -1) 465 errx(EX_OSERR, "Unable to apply rights for sandbox"); 466 if (caph_rights_limit(mevent_pipefd[1], &rights) == -1) 467 errx(EX_OSERR, "Unable to apply rights for sandbox"); 468 #endif 469 470 /* 471 * Add internal event handler for the pipe write fd 472 */ 473 pipev = mevent_add(mevent_pipefd[0], EVF_READ, mevent_pipe_read, NULL); 474 assert(pipev != NULL); 475 476 for (;;) { 477 /* 478 * Build changelist if required. 479 * XXX the changelist can be put into the blocking call 480 * to eliminate the extra syscall. Currently better for 481 * debug. 482 */ 483 numev = mevent_build(mfd, changelist); 484 if (numev) { 485 ret = kevent(mfd, changelist, numev, NULL, 0, NULL); 486 if (ret == -1) { 487 perror("Error return from kevent change"); 488 } 489 } 490 491 /* 492 * Block awaiting events 493 */ 494 ret = kevent(mfd, NULL, 0, eventlist, MEVENT_MAX, NULL); 495 if (ret == -1 && errno != EINTR) { 496 perror("Error return from kevent monitor"); 497 } 498 499 /* 500 * Handle reported events 501 */ 502 mevent_handle(eventlist, ret); 503 } 504 } 505