1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 /* 32 * Micro event library for FreeBSD, designed for a single i/o thread 33 * using kqueue, and having events be persistent by default. 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include <assert.h> 40 #ifndef WITHOUT_CAPSICUM 41 #include <capsicum_helpers.h> 42 #endif 43 #include <err.h> 44 #include <errno.h> 45 #include <stdlib.h> 46 #include <stdio.h> 47 #include <string.h> 48 #include <sysexits.h> 49 #include <unistd.h> 50 51 #include <sys/types.h> 52 #ifndef WITHOUT_CAPSICUM 53 #include <sys/capsicum.h> 54 #endif 55 #include <sys/event.h> 56 #include <sys/time.h> 57 58 #include <pthread.h> 59 #include <pthread_np.h> 60 61 #include "mevent.h" 62 63 #define MEVENT_MAX 64 64 65 #define MEV_ADD 1 66 #define MEV_ENABLE 2 67 #define MEV_DISABLE 3 68 #define MEV_DEL_PENDING 4 69 70 extern char *vmname; 71 72 static pthread_t mevent_tid; 73 static int mevent_timid = 43; 74 static int mevent_pipefd[2]; 75 static pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER; 76 77 struct mevent { 78 void (*me_func)(int, enum ev_type, void *); 79 #define me_msecs me_fd 80 int me_fd; 81 int me_timid; 82 enum ev_type me_type; 83 void *me_param; 84 int me_cq; 85 int me_state; 86 int me_closefd; 87 LIST_ENTRY(mevent) me_list; 88 }; 89 90 static LIST_HEAD(listhead, mevent) global_head, change_head; 91 92 static void 93 mevent_qlock(void) 94 { 95 pthread_mutex_lock(&mevent_lmutex); 96 } 97 98 static void 99 mevent_qunlock(void) 100 { 101 pthread_mutex_unlock(&mevent_lmutex); 102 } 103 104 static void 105 mevent_pipe_read(int fd, enum ev_type type, void *param) 106 { 107 char buf[MEVENT_MAX]; 108 int status; 109 110 /* 111 * Drain the pipe read side. The fd is non-blocking so this is 112 * safe to do. 113 */ 114 do { 115 status = read(fd, buf, sizeof(buf)); 116 } while (status == MEVENT_MAX); 117 } 118 119 static void 120 mevent_notify(void) 121 { 122 char c; 123 124 /* 125 * If calling from outside the i/o thread, write a byte on the 126 * pipe to force the i/o thread to exit the blocking kevent call. 127 */ 128 if (mevent_pipefd[1] != 0 && pthread_self() != mevent_tid) { 129 write(mevent_pipefd[1], &c, 1); 130 } 131 } 132 133 static int 134 mevent_kq_filter(struct mevent *mevp) 135 { 136 int retval; 137 138 retval = 0; 139 140 if (mevp->me_type == EVF_READ) 141 retval = EVFILT_READ; 142 143 if (mevp->me_type == EVF_WRITE) 144 retval = EVFILT_WRITE; 145 146 if (mevp->me_type == EVF_TIMER) 147 retval = EVFILT_TIMER; 148 149 if (mevp->me_type == EVF_SIGNAL) 150 retval = EVFILT_SIGNAL; 151 152 return (retval); 153 } 154 155 static int 156 mevent_kq_flags(struct mevent *mevp) 157 { 158 int ret; 159 160 switch (mevp->me_state) { 161 case MEV_ADD: 162 ret = EV_ADD; /* implicitly enabled */ 163 break; 164 case MEV_ENABLE: 165 ret = EV_ENABLE; 166 break; 167 case MEV_DISABLE: 168 ret = EV_DISABLE; 169 break; 170 case MEV_DEL_PENDING: 171 ret = EV_DELETE; 172 break; 173 default: 174 assert(0); 175 break; 176 } 177 178 return (ret); 179 } 180 181 static int 182 mevent_kq_fflags(struct mevent *mevp) 183 { 184 /* XXX nothing yet, perhaps EV_EOF for reads ? */ 185 return (0); 186 } 187 188 static int 189 mevent_build(int mfd, struct kevent *kev) 190 { 191 struct mevent *mevp, *tmpp; 192 int i; 193 194 i = 0; 195 196 mevent_qlock(); 197 198 LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) { 199 if (mevp->me_closefd) { 200 /* 201 * A close of the file descriptor will remove the 202 * event 203 */ 204 close(mevp->me_fd); 205 } else { 206 if (mevp->me_type == EVF_TIMER) { 207 kev[i].ident = mevp->me_timid; 208 kev[i].data = mevp->me_msecs; 209 } else { 210 kev[i].ident = mevp->me_fd; 211 kev[i].data = 0; 212 } 213 kev[i].filter = mevent_kq_filter(mevp); 214 kev[i].flags = mevent_kq_flags(mevp); 215 kev[i].fflags = mevent_kq_fflags(mevp); 216 kev[i].udata = mevp; 217 i++; 218 } 219 220 mevp->me_cq = 0; 221 LIST_REMOVE(mevp, me_list); 222 223 if (mevp->me_state == MEV_DEL_PENDING) { 224 free(mevp); 225 } else { 226 LIST_INSERT_HEAD(&global_head, mevp, me_list); 227 } 228 229 assert(i < MEVENT_MAX); 230 } 231 232 mevent_qunlock(); 233 234 return (i); 235 } 236 237 static void 238 mevent_handle(struct kevent *kev, int numev) 239 { 240 struct mevent *mevp; 241 int i; 242 243 for (i = 0; i < numev; i++) { 244 mevp = kev[i].udata; 245 246 /* XXX check for EV_ERROR ? */ 247 248 (*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param); 249 } 250 } 251 252 struct mevent * 253 mevent_add(int tfd, enum ev_type type, 254 void (*func)(int, enum ev_type, void *), void *param) 255 { 256 struct mevent *lp, *mevp; 257 258 if (tfd < 0 || func == NULL) { 259 return (NULL); 260 } 261 262 mevp = NULL; 263 264 mevent_qlock(); 265 266 /* 267 * Verify that the fd/type tuple is not present in any list 268 */ 269 LIST_FOREACH(lp, &global_head, me_list) { 270 if (type != EVF_TIMER && lp->me_fd == tfd && 271 lp->me_type == type) { 272 goto exit; 273 } 274 } 275 276 LIST_FOREACH(lp, &change_head, me_list) { 277 if (type != EVF_TIMER && lp->me_fd == tfd && 278 lp->me_type == type) { 279 goto exit; 280 } 281 } 282 283 /* 284 * Allocate an entry, populate it, and add it to the change list. 285 */ 286 mevp = calloc(1, sizeof(struct mevent)); 287 if (mevp == NULL) { 288 goto exit; 289 } 290 291 if (type == EVF_TIMER) { 292 mevp->me_msecs = tfd; 293 mevp->me_timid = mevent_timid++; 294 } else 295 mevp->me_fd = tfd; 296 mevp->me_type = type; 297 mevp->me_func = func; 298 mevp->me_param = param; 299 300 LIST_INSERT_HEAD(&change_head, mevp, me_list); 301 mevp->me_cq = 1; 302 mevp->me_state = MEV_ADD; 303 mevent_notify(); 304 305 exit: 306 mevent_qunlock(); 307 308 return (mevp); 309 } 310 311 static int 312 mevent_update(struct mevent *evp, int newstate) 313 { 314 /* 315 * It's not possible to enable/disable a deleted event 316 */ 317 if (evp->me_state == MEV_DEL_PENDING) 318 return (EINVAL); 319 320 /* 321 * No update needed if state isn't changing 322 */ 323 if (evp->me_state == newstate) 324 return (0); 325 326 mevent_qlock(); 327 328 evp->me_state = newstate; 329 330 /* 331 * Place the entry onto the changed list if not already there. 332 */ 333 if (evp->me_cq == 0) { 334 evp->me_cq = 1; 335 LIST_REMOVE(evp, me_list); 336 LIST_INSERT_HEAD(&change_head, evp, me_list); 337 mevent_notify(); 338 } 339 340 mevent_qunlock(); 341 342 return (0); 343 } 344 345 int 346 mevent_enable(struct mevent *evp) 347 { 348 349 return (mevent_update(evp, MEV_ENABLE)); 350 } 351 352 int 353 mevent_disable(struct mevent *evp) 354 { 355 356 return (mevent_update(evp, MEV_DISABLE)); 357 } 358 359 static int 360 mevent_delete_event(struct mevent *evp, int closefd) 361 { 362 mevent_qlock(); 363 364 /* 365 * Place the entry onto the changed list if not already there, and 366 * mark as to be deleted. 367 */ 368 if (evp->me_cq == 0) { 369 evp->me_cq = 1; 370 LIST_REMOVE(evp, me_list); 371 LIST_INSERT_HEAD(&change_head, evp, me_list); 372 mevent_notify(); 373 } 374 evp->me_state = MEV_DEL_PENDING; 375 376 if (closefd) 377 evp->me_closefd = 1; 378 379 mevent_qunlock(); 380 381 return (0); 382 } 383 384 int 385 mevent_delete(struct mevent *evp) 386 { 387 388 return (mevent_delete_event(evp, 0)); 389 } 390 391 int 392 mevent_delete_close(struct mevent *evp) 393 { 394 395 return (mevent_delete_event(evp, 1)); 396 } 397 398 static void 399 mevent_set_name(void) 400 { 401 402 pthread_set_name_np(mevent_tid, "mevent"); 403 } 404 405 void 406 mevent_dispatch(void) 407 { 408 struct kevent changelist[MEVENT_MAX]; 409 struct kevent eventlist[MEVENT_MAX]; 410 struct mevent *pipev; 411 int mfd; 412 int numev; 413 int ret; 414 #ifndef WITHOUT_CAPSICUM 415 cap_rights_t rights; 416 #endif 417 418 mevent_tid = pthread_self(); 419 mevent_set_name(); 420 421 mfd = kqueue(); 422 assert(mfd > 0); 423 424 #ifndef WITHOUT_CAPSICUM 425 cap_rights_init(&rights, CAP_KQUEUE); 426 if (caph_rights_limit(mfd, &rights) == -1) 427 errx(EX_OSERR, "Unable to apply rights for sandbox"); 428 #endif 429 430 /* 431 * Open the pipe that will be used for other threads to force 432 * the blocking kqueue call to exit by writing to it. Set the 433 * descriptor to non-blocking. 434 */ 435 ret = pipe(mevent_pipefd); 436 if (ret < 0) { 437 perror("pipe"); 438 exit(0); 439 } 440 441 #ifndef WITHOUT_CAPSICUM 442 cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE); 443 if (caph_rights_limit(mevent_pipefd[0], &rights) == -1) 444 errx(EX_OSERR, "Unable to apply rights for sandbox"); 445 if (caph_rights_limit(mevent_pipefd[1], &rights) == -1) 446 errx(EX_OSERR, "Unable to apply rights for sandbox"); 447 #endif 448 449 /* 450 * Add internal event handler for the pipe write fd 451 */ 452 pipev = mevent_add(mevent_pipefd[0], EVF_READ, mevent_pipe_read, NULL); 453 assert(pipev != NULL); 454 455 for (;;) { 456 /* 457 * Build changelist if required. 458 * XXX the changelist can be put into the blocking call 459 * to eliminate the extra syscall. Currently better for 460 * debug. 461 */ 462 numev = mevent_build(mfd, changelist); 463 if (numev) { 464 ret = kevent(mfd, changelist, numev, NULL, 0, NULL); 465 if (ret == -1) { 466 perror("Error return from kevent change"); 467 } 468 } 469 470 /* 471 * Block awaiting events 472 */ 473 ret = kevent(mfd, NULL, 0, eventlist, MEVENT_MAX, NULL); 474 if (ret == -1 && errno != EINTR) { 475 perror("Error return from kevent monitor"); 476 } 477 478 /* 479 * Handle reported events 480 */ 481 mevent_handle(eventlist, ret); 482 } 483 } 484