1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 /* 32 * Micro event library for FreeBSD, designed for a single i/o thread 33 * using kqueue, and having events be persistent by default. 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include <assert.h> 40 #include <err.h> 41 #include <errno.h> 42 #include <stdlib.h> 43 #include <stdio.h> 44 #include <string.h> 45 #include <sysexits.h> 46 #include <unistd.h> 47 48 #include <sys/types.h> 49 #ifndef WITHOUT_CAPSICUM 50 #include <sys/capsicum.h> 51 #endif 52 #include <sys/event.h> 53 #include <sys/time.h> 54 55 #include <pthread.h> 56 #include <pthread_np.h> 57 58 #include "mevent.h" 59 60 #define MEVENT_MAX 64 61 62 #define MEV_ADD 1 63 #define MEV_ENABLE 2 64 #define MEV_DISABLE 3 65 #define MEV_DEL_PENDING 4 66 67 extern char *vmname; 68 69 static pthread_t mevent_tid; 70 static int mevent_timid = 43; 71 static int mevent_pipefd[2]; 72 static pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER; 73 74 struct mevent { 75 void (*me_func)(int, enum ev_type, void *); 76 #define me_msecs me_fd 77 int me_fd; 78 int me_timid; 79 enum ev_type me_type; 80 void *me_param; 81 int me_cq; 82 int me_state; 83 int me_closefd; 84 LIST_ENTRY(mevent) me_list; 85 }; 86 87 static LIST_HEAD(listhead, mevent) global_head, change_head; 88 89 static void 90 mevent_qlock(void) 91 { 92 pthread_mutex_lock(&mevent_lmutex); 93 } 94 95 static void 96 mevent_qunlock(void) 97 { 98 pthread_mutex_unlock(&mevent_lmutex); 99 } 100 101 static void 102 mevent_pipe_read(int fd, enum ev_type type, void *param) 103 { 104 char buf[MEVENT_MAX]; 105 int status; 106 107 /* 108 * Drain the pipe read side. The fd is non-blocking so this is 109 * safe to do. 110 */ 111 do { 112 status = read(fd, buf, sizeof(buf)); 113 } while (status == MEVENT_MAX); 114 } 115 116 static void 117 mevent_notify(void) 118 { 119 char c; 120 121 /* 122 * If calling from outside the i/o thread, write a byte on the 123 * pipe to force the i/o thread to exit the blocking kevent call. 124 */ 125 if (mevent_pipefd[1] != 0 && pthread_self() != mevent_tid) { 126 write(mevent_pipefd[1], &c, 1); 127 } 128 } 129 130 static int 131 mevent_kq_filter(struct mevent *mevp) 132 { 133 int retval; 134 135 retval = 0; 136 137 if (mevp->me_type == EVF_READ) 138 retval = EVFILT_READ; 139 140 if (mevp->me_type == EVF_WRITE) 141 retval = EVFILT_WRITE; 142 143 if (mevp->me_type == EVF_TIMER) 144 retval = EVFILT_TIMER; 145 146 if (mevp->me_type == EVF_SIGNAL) 147 retval = EVFILT_SIGNAL; 148 149 return (retval); 150 } 151 152 static int 153 mevent_kq_flags(struct mevent *mevp) 154 { 155 int ret; 156 157 switch (mevp->me_state) { 158 case MEV_ADD: 159 ret = EV_ADD; /* implicitly enabled */ 160 break; 161 case MEV_ENABLE: 162 ret = EV_ENABLE; 163 break; 164 case MEV_DISABLE: 165 ret = EV_DISABLE; 166 break; 167 case MEV_DEL_PENDING: 168 ret = EV_DELETE; 169 break; 170 default: 171 assert(0); 172 break; 173 } 174 175 return (ret); 176 } 177 178 static int 179 mevent_kq_fflags(struct mevent *mevp) 180 { 181 /* XXX nothing yet, perhaps EV_EOF for reads ? */ 182 return (0); 183 } 184 185 static int 186 mevent_build(int mfd, struct kevent *kev) 187 { 188 struct mevent *mevp, *tmpp; 189 int i; 190 191 i = 0; 192 193 mevent_qlock(); 194 195 LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) { 196 if (mevp->me_closefd) { 197 /* 198 * A close of the file descriptor will remove the 199 * event 200 */ 201 close(mevp->me_fd); 202 } else { 203 if (mevp->me_type == EVF_TIMER) { 204 kev[i].ident = mevp->me_timid; 205 kev[i].data = mevp->me_msecs; 206 } else { 207 kev[i].ident = mevp->me_fd; 208 kev[i].data = 0; 209 } 210 kev[i].filter = mevent_kq_filter(mevp); 211 kev[i].flags = mevent_kq_flags(mevp); 212 kev[i].fflags = mevent_kq_fflags(mevp); 213 kev[i].udata = mevp; 214 i++; 215 } 216 217 mevp->me_cq = 0; 218 LIST_REMOVE(mevp, me_list); 219 220 if (mevp->me_state == MEV_DEL_PENDING) { 221 free(mevp); 222 } else { 223 LIST_INSERT_HEAD(&global_head, mevp, me_list); 224 } 225 226 assert(i < MEVENT_MAX); 227 } 228 229 mevent_qunlock(); 230 231 return (i); 232 } 233 234 static void 235 mevent_handle(struct kevent *kev, int numev) 236 { 237 struct mevent *mevp; 238 int i; 239 240 for (i = 0; i < numev; i++) { 241 mevp = kev[i].udata; 242 243 /* XXX check for EV_ERROR ? */ 244 245 (*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param); 246 } 247 } 248 249 struct mevent * 250 mevent_add(int tfd, enum ev_type type, 251 void (*func)(int, enum ev_type, void *), void *param) 252 { 253 struct mevent *lp, *mevp; 254 255 if (tfd < 0 || func == NULL) { 256 return (NULL); 257 } 258 259 mevp = NULL; 260 261 mevent_qlock(); 262 263 /* 264 * Verify that the fd/type tuple is not present in any list 265 */ 266 LIST_FOREACH(lp, &global_head, me_list) { 267 if (type != EVF_TIMER && lp->me_fd == tfd && 268 lp->me_type == type) { 269 goto exit; 270 } 271 } 272 273 LIST_FOREACH(lp, &change_head, me_list) { 274 if (type != EVF_TIMER && lp->me_fd == tfd && 275 lp->me_type == type) { 276 goto exit; 277 } 278 } 279 280 /* 281 * Allocate an entry, populate it, and add it to the change list. 282 */ 283 mevp = calloc(1, sizeof(struct mevent)); 284 if (mevp == NULL) { 285 goto exit; 286 } 287 288 if (type == EVF_TIMER) { 289 mevp->me_msecs = tfd; 290 mevp->me_timid = mevent_timid++; 291 } else 292 mevp->me_fd = tfd; 293 mevp->me_type = type; 294 mevp->me_func = func; 295 mevp->me_param = param; 296 297 LIST_INSERT_HEAD(&change_head, mevp, me_list); 298 mevp->me_cq = 1; 299 mevp->me_state = MEV_ADD; 300 mevent_notify(); 301 302 exit: 303 mevent_qunlock(); 304 305 return (mevp); 306 } 307 308 static int 309 mevent_update(struct mevent *evp, int newstate) 310 { 311 /* 312 * It's not possible to enable/disable a deleted event 313 */ 314 if (evp->me_state == MEV_DEL_PENDING) 315 return (EINVAL); 316 317 /* 318 * No update needed if state isn't changing 319 */ 320 if (evp->me_state == newstate) 321 return (0); 322 323 mevent_qlock(); 324 325 evp->me_state = newstate; 326 327 /* 328 * Place the entry onto the changed list if not already there. 329 */ 330 if (evp->me_cq == 0) { 331 evp->me_cq = 1; 332 LIST_REMOVE(evp, me_list); 333 LIST_INSERT_HEAD(&change_head, evp, me_list); 334 mevent_notify(); 335 } 336 337 mevent_qunlock(); 338 339 return (0); 340 } 341 342 int 343 mevent_enable(struct mevent *evp) 344 { 345 346 return (mevent_update(evp, MEV_ENABLE)); 347 } 348 349 int 350 mevent_disable(struct mevent *evp) 351 { 352 353 return (mevent_update(evp, MEV_DISABLE)); 354 } 355 356 static int 357 mevent_delete_event(struct mevent *evp, int closefd) 358 { 359 mevent_qlock(); 360 361 /* 362 * Place the entry onto the changed list if not already there, and 363 * mark as to be deleted. 364 */ 365 if (evp->me_cq == 0) { 366 evp->me_cq = 1; 367 LIST_REMOVE(evp, me_list); 368 LIST_INSERT_HEAD(&change_head, evp, me_list); 369 mevent_notify(); 370 } 371 evp->me_state = MEV_DEL_PENDING; 372 373 if (closefd) 374 evp->me_closefd = 1; 375 376 mevent_qunlock(); 377 378 return (0); 379 } 380 381 int 382 mevent_delete(struct mevent *evp) 383 { 384 385 return (mevent_delete_event(evp, 0)); 386 } 387 388 int 389 mevent_delete_close(struct mevent *evp) 390 { 391 392 return (mevent_delete_event(evp, 1)); 393 } 394 395 static void 396 mevent_set_name(void) 397 { 398 399 pthread_set_name_np(mevent_tid, "mevent"); 400 } 401 402 void 403 mevent_dispatch(void) 404 { 405 struct kevent changelist[MEVENT_MAX]; 406 struct kevent eventlist[MEVENT_MAX]; 407 struct mevent *pipev; 408 int mfd; 409 int numev; 410 int ret; 411 #ifndef WITHOUT_CAPSICUM 412 cap_rights_t rights; 413 #endif 414 415 mevent_tid = pthread_self(); 416 mevent_set_name(); 417 418 mfd = kqueue(); 419 assert(mfd > 0); 420 421 #ifndef WITHOUT_CAPSICUM 422 cap_rights_init(&rights, CAP_KQUEUE); 423 if (cap_rights_limit(mfd, &rights) == -1 && errno != ENOSYS) 424 errx(EX_OSERR, "Unable to apply rights for sandbox"); 425 #endif 426 427 /* 428 * Open the pipe that will be used for other threads to force 429 * the blocking kqueue call to exit by writing to it. Set the 430 * descriptor to non-blocking. 431 */ 432 ret = pipe(mevent_pipefd); 433 if (ret < 0) { 434 perror("pipe"); 435 exit(0); 436 } 437 438 #ifndef WITHOUT_CAPSICUM 439 cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE); 440 if (cap_rights_limit(mevent_pipefd[0], &rights) == -1 && errno != ENOSYS) 441 errx(EX_OSERR, "Unable to apply rights for sandbox"); 442 if (cap_rights_limit(mevent_pipefd[1], &rights) == -1 && errno != ENOSYS) 443 errx(EX_OSERR, "Unable to apply rights for sandbox"); 444 #endif 445 446 /* 447 * Add internal event handler for the pipe write fd 448 */ 449 pipev = mevent_add(mevent_pipefd[0], EVF_READ, mevent_pipe_read, NULL); 450 assert(pipev != NULL); 451 452 for (;;) { 453 /* 454 * Build changelist if required. 455 * XXX the changelist can be put into the blocking call 456 * to eliminate the extra syscall. Currently better for 457 * debug. 458 */ 459 numev = mevent_build(mfd, changelist); 460 if (numev) { 461 ret = kevent(mfd, changelist, numev, NULL, 0, NULL); 462 if (ret == -1) { 463 perror("Error return from kevent change"); 464 } 465 } 466 467 /* 468 * Block awaiting events 469 */ 470 ret = kevent(mfd, NULL, 0, eventlist, MEVENT_MAX, NULL); 471 if (ret == -1 && errno != EINTR) { 472 perror("Error return from kevent monitor"); 473 } 474 475 /* 476 * Handle reported events 477 */ 478 mevent_handle(eventlist, ret); 479 } 480 } 481