1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2017 Joyent, Inc. 14 * Copyright 2023 Oxide Computer Company 15 */ 16 17 /* 18 * Support for the signalfd facility, a Linux-borne facility for 19 * file descriptor-based synchronous signal consumption. 20 * 21 * As described on the signalfd(3C) man page, the general idea behind these 22 * file descriptors is that they can be used to synchronously consume signals 23 * via the read(2) syscall. While that capability already exists with the 24 * sigwaitinfo(3C) function, signalfd holds an advantage since it is file 25 * descriptor based: It is able use the event facilities (poll(2), /dev/poll, 26 * event ports) to notify interested parties when consumable signals arrive. 27 * 28 * The signalfd lifecycle begins When a process opens /dev/signalfd. A minor 29 * will be allocated for them along with an associated signalfd_state_t struct. 30 * It is there where the mask of desired signals resides. 31 * 32 * Reading from the signalfd is straightforward and mimics the kernel behavior 33 * for sigtimedwait(). Signals continue to live on either the proc's p_sig, or 34 * thread's t_sig, member. During a read operation, those which match the mask 35 * are consumed so they are no longer pending. 36 * 37 * The poll side is more complex. Every time a signal is delivered, all of the 38 * signalfds on the process need to be examined in order to pollwake threads 39 * waiting for signal arrival. 40 * 41 * When a thread polling on a signalfd requires a pollhead, several steps must 42 * be taken to safely ensure the proper result. A sigfd_proc_state_t is 43 * created for the calling process if it does not yet exist. It is there where 44 * a list of signalfd_poller_t structures reside which associate pollheads to 45 * signalfd_state_t entries. The sigfd_proc_state_t list is walked to find any 46 * signalfd_poller_t which is both associated with the polling process and 47 * corresponds to the signalfd resource being polled. If none matching those 48 * conditions is found, then a new one with the appropriate associations is 49 * created. 50 * 51 * The complications imposed by fork(2) are why the pollhead is stored in the 52 * associated signalfd_poller_t instead of directly in the signalfd_state_t. 53 * More than one process can hold a reference to the signalfd at a time but 54 * arriving signals should wake only process-local pollers. Additionally, 55 * signalfd_close is called only when the last referencing fd is closed, hiding 56 * occurrences of preceeding threads which released their references. This 57 * necessitates a pollhead for each signalfd/process pair when being polled. 58 * Doing so ensures that those pollheads will live long enough for the greater 59 * poll machinery can act upon them without risk of use-after-free. When a 60 * signalfd is closed, existing signalfd_poller_t instances are dissociated from 61 * their respective processes, causing pollwake() calls for any blocked pollers. 62 * 63 * When a signal arrives in a process polling on signalfd, signalfd_pollwake_cb 64 * is called via the pointer in sigfd_proc_state_t. It will walk over the 65 * signalfd_poller_t entries present in the list, searching for any possessing a 66 * signal mask which matches the incoming signal. (Changes to the signal mask 67 * held in signalfd_state_t is propagated to the signalfd_poller_t instance to 68 * avoid the need for additional locks during the callback.) The approach of 69 * keeping the poller list in p_sigfd was chosen because a process is likely to 70 * use few signalfds relative to its total file descriptors. It reduces the 71 * work required for each received signal. 72 * 73 * When matching signalfd_poller_t entries are encountered in the poller list 74 * during signalfd_pollwake_cb, they are dispatched into signalfd_wakeq to 75 * perform the pollwake. This is due to a lock ordering conflict between 76 * signalfd_poll and signalfd_pollwake_cb. The former acquires 77 * pollcache_t`pc_lock before proc_t`p_lock. The latter (via sigtoproc) 78 * reverses the order. Defering the pollwake into a taskq means it can be 79 * performed without proc_t`p_lock held, avoiding the deadlock. 80 * 81 * Poller entries in sigfd_proc_state_t`sigfd_list are cleaned up under two 82 * different circumstances. When a signalfd instance is being closed, it will 83 * dissociate all of its remaining signalfd_poller_t instances from their 84 * polling processes. When a process (which polled on signalfd instance(s) 85 * which have not yet been closed) exits, the exit helper (signalfd_exit_helper) 86 * is called, and it dissociates all signalfd_poller_t instances tied to the 87 * existing process. 88 * 89 * The structures associated with signalfd state are designed to operate 90 * correctly across fork, but there is one caveat that applies. Using 91 * fork-shared signalfd descriptors in conjuction with fork-shared caching poll 92 * descriptors (such as /dev/poll or event ports) will result in missed poll 93 * wake-ups. This is caused by the pollhead identity of signalfd descriptors 94 * being dependent on the process they are polled from. Because it has a 95 * thread-local cache, poll(2) is unaffected by this limitation. 96 * 97 * Lock ordering: 98 * 99 * Calling signalfd_poll: 100 * 1. pollcache_t`pc_lock 101 * 2. signalfd_state_t`sfd_lock 102 * 3. proc_t`p_lock 103 * 104 * Signal delivery, waking a pollhead: 105 * 1. proc_t`p_lock 106 * 2. signalfd_poller_t`sp_lock 107 * 108 * Process exit, cleaning up signalfd pollers: 109 * 1. proc_t`p_lock 110 * 2. signalfd_poller_t`sp_lock 111 * 112 * Waking a pollhead, from taskq: 113 * 1. signalfd_poller_t`sp_lock 114 * ... Disjoint from signalfd_poller_t`sp_lock hold ... 115 * 1. pollcache_t`pc_lock 116 * 117 * Closing signalfd, dissociating pollers: 118 * 1. signalfd_state_t`sfd_lock 119 * 2. pidlock 120 * 3. proc_t`p_lock 121 * 122 */ 123 124 #include <sys/ddi.h> 125 #include <sys/sunddi.h> 126 #include <sys/signalfd.h> 127 #include <sys/conf.h> 128 #include <sys/sysmacros.h> 129 #include <sys/filio.h> 130 #include <sys/stat.h> 131 #include <sys/file.h> 132 #include <sys/schedctl.h> 133 #include <sys/id_space.h> 134 #include <sys/sdt.h> 135 #include <sys/disp.h> 136 #include <sys/taskq_impl.h> 137 #include <sys/condvar.h> 138 #include <sys/stdbool.h> 139 140 /* Per-instance signalfd device state: */ 141 typedef struct signalfd_state { 142 kmutex_t sfd_lock; /* protects fields below */ 143 list_t sfd_pollers; 144 k_sigset_t sfd_mask; /* signal mask for this instance */ 145 minor_t sfd_minor; /* dev minor, fixed at creation */ 146 } signalfd_state_t; 147 148 typedef struct signalfd_poller { 149 /* 150 * List node referenced by containing signalfd_state_t 151 * Protected by signalfd_state`sfd_lock 152 */ 153 list_node_t sp_state_node; 154 155 /* 156 * List node referenced by containing sigfd_proc_state_t 157 * Protected by proc_t`plock 158 */ 159 list_node_t sp_proc_node; 160 161 pollhead_t sp_pollhead; 162 163 /* 164 * The signalfd_state_t to which this poller is associated. 165 * It remains fixed after its initialization at creation time. 166 */ 167 signalfd_state_t *sp_state; 168 169 /* 170 * The proc_t to which this poller is associated. 171 * It is initialized under the protection of proc_t`p_lock when this 172 * poller is created. It is NULLed out, again under the protection of 173 * proc_t`p_lock, when the poller is dissociated from the process. 174 */ 175 proc_t *sp_proc; 176 177 kmutex_t sp_lock; /* protects fields below */ 178 kcondvar_t sp_cv; /* CV for cleaning up */ 179 short sp_pollev; /* Event(s) pending delivery */ 180 bool sp_pending; /* pollwakeup() via taskq in progress */ 181 taskq_ent_t sp_taskent; /* pollwakeup() dispatch taskq */ 182 k_sigset_t sp_mask; /* signal match mask */ 183 } signalfd_poller_t; 184 185 static dev_info_t *signalfd_devi; /* device info */ 186 static id_space_t *signalfd_minors; /* minor number arena */ 187 static void *signalfd_softstate; /* softstate pointer */ 188 static taskq_t *signalfd_wakeq; /* pollwake event taskq */ 189 190 static void 191 signalfd_proc_clean(proc_t *p) 192 { 193 sigfd_proc_state_t *pstate = p->p_sigfd; 194 195 ASSERT(MUTEX_HELD(&p->p_lock)); 196 ASSERT(pstate != NULL); 197 VERIFY(list_is_empty(&pstate->sigfd_list)); 198 199 p->p_sigfd = NULL; 200 list_destroy(&pstate->sigfd_list); 201 kmem_free(pstate, sizeof (*pstate)); 202 } 203 204 static void 205 signalfd_wake_task(void *arg) 206 { 207 signalfd_poller_t *sp = arg; 208 209 mutex_enter(&sp->sp_lock); 210 VERIFY(sp->sp_pollev != 0); 211 VERIFY(sp->sp_pending); 212 do { 213 const short pollev = sp->sp_pollev; 214 const bool is_err = (pollev & POLLERR) != 0; 215 sp->sp_pollev = 0; 216 mutex_exit(&sp->sp_lock); 217 218 /* 219 * Actions against the pollhead and associated pollcache(s) are 220 * taken without signalfd_poller_t`sp_lock held, since the chain 221 * of dependencies through pollcache_t`pc_lock and 222 * signalfd_state_t`sfd_lock form a potential for deadlock. 223 */ 224 pollwakeup(&sp->sp_pollhead, pollev); 225 if (is_err) { 226 pollhead_clean(&sp->sp_pollhead); 227 } 228 229 mutex_enter(&sp->sp_lock); 230 /* 231 * Once pollhead/pollcache actions are complete, check for newly 232 * queued events which could have appeared in the mean time. We 233 * can bail immediately if POLLER was being delivered, since the 234 * underlying resource is undergoing clean-up. 235 */ 236 if (is_err) { 237 break; 238 } 239 } while (sp->sp_pollev != 0); 240 241 /* 242 * Indicate that wake task processing is complete. 243 * 244 * Wake any thread waiting for event delivery to complete if this poller 245 * is being torn down. 246 */ 247 sp->sp_pending = false; 248 cv_signal(&sp->sp_cv); 249 mutex_exit(&sp->sp_lock); 250 } 251 252 static void 253 signalfd_poller_wake(signalfd_poller_t *sp, short ev) 254 { 255 ASSERT(MUTEX_HELD(&sp->sp_lock)); 256 257 sp->sp_pollev |= ev; 258 if (!sp->sp_pending) { 259 sp->sp_pending = true; 260 taskq_dispatch_ent(signalfd_wakeq, signalfd_wake_task, sp, 0, 261 &sp->sp_taskent); 262 } 263 } 264 265 /* 266 * Notification callback associated to processes which are being polled for 267 * signalfd events. Called by sigtoproc(). 268 */ 269 static void 270 signalfd_pollwake_cb(void *arg0, int sig) 271 { 272 proc_t *p = (proc_t *)arg0; 273 sigfd_proc_state_t *pstate = (sigfd_proc_state_t *)p->p_sigfd; 274 275 ASSERT(MUTEX_HELD(&p->p_lock)); 276 ASSERT(pstate != NULL); 277 278 list_t *pollers = &pstate->sigfd_list; 279 for (signalfd_poller_t *sp = list_head(pollers); sp != NULL; 280 sp = list_next(pollers, sp)) { 281 mutex_enter(&sp->sp_lock); 282 if (sigismember(&sp->sp_mask, sig)) { 283 signalfd_poller_wake(sp, POLLRDNORM | POLLIN); 284 } 285 mutex_exit(&sp->sp_lock); 286 } 287 } 288 289 /* 290 * Get the sigfd_proc_state_t for a given process, allocating one if necessary. 291 * 292 * Must be called with p_lock held, which may be dropped and reacquired during 293 * the allocation. 294 */ 295 static sigfd_proc_state_t * 296 signalfd_proc_pstate(proc_t *p) 297 { 298 ASSERT(MUTEX_HELD(&p->p_lock)); 299 300 sigfd_proc_state_t *pstate = p->p_sigfd; 301 if (pstate == NULL) { 302 mutex_exit(&p->p_lock); 303 pstate = kmem_zalloc(sizeof (*pstate), KM_SLEEP); 304 list_create(&pstate->sigfd_list, 305 sizeof (signalfd_poller_t), 306 offsetof(signalfd_poller_t, sp_proc_node)); 307 pstate->sigfd_pollwake_cb = signalfd_pollwake_cb; 308 309 /* Check again, after blocking for the alloc. */ 310 mutex_enter(&p->p_lock); 311 if (p->p_sigfd == NULL) { 312 p->p_sigfd = pstate; 313 } else { 314 /* Someone beat us to it */ 315 list_destroy(&pstate->sigfd_list); 316 kmem_free(pstate, sizeof (*pstate)); 317 pstate = p->p_sigfd; 318 } 319 } 320 321 return (pstate); 322 } 323 324 static signalfd_poller_t * 325 signalfd_poller_associate(signalfd_state_t *state, proc_t *p) 326 { 327 sigfd_proc_state_t *pstate; 328 list_t *pollers; 329 signalfd_poller_t *sp; 330 331 ASSERT(MUTEX_HELD(&state->sfd_lock)); 332 333 mutex_enter(&p->p_lock); 334 335 pstate = signalfd_proc_pstate(p); 336 pollers = &pstate->sigfd_list; 337 338 /* 339 * Check if there is already a signalfd_poller_t allocated for this 340 * signalfd_state_t/proc_t pair. 341 */ 342 for (sp = list_head(pollers); sp != NULL; sp = list_next(pollers, sp)) { 343 if (sp->sp_state == state) { 344 mutex_exit(&p->p_lock); 345 return (sp); 346 } 347 } 348 349 /* 350 * No existing poller found, so allocate one. Since sfd_lock remains 351 * held, there is no risk of some other operation racing with us to 352 * create such a poller. 353 */ 354 mutex_exit(&p->p_lock); 355 356 sp = kmem_zalloc(sizeof (*sp), KM_SLEEP); 357 mutex_init(&sp->sp_lock, NULL, MUTEX_DEFAULT, NULL); 358 cv_init(&sp->sp_cv, NULL, CV_DEFAULT, NULL); 359 sigorset(&sp->sp_mask, &state->sfd_mask); 360 sp->sp_state = state; 361 sp->sp_proc = p; 362 363 mutex_enter(&p->p_lock); 364 /* 365 * Fetch the pstate again, since it could have been freed or reallocated 366 * in the time p_lock was dropped. 367 */ 368 pstate = signalfd_proc_pstate(p); 369 370 list_insert_tail(&pstate->sigfd_list, sp); 371 list_insert_tail(&state->sfd_pollers, sp); 372 mutex_exit(&p->p_lock); 373 374 return (sp); 375 } 376 377 static void 378 signalfd_pollers_dissociate(signalfd_state_t *state) 379 { 380 ASSERT(MUTEX_HELD(&state->sfd_lock)); 381 382 mutex_enter(&pidlock); 383 384 signalfd_poller_t *sp; 385 list_t *pollers = &state->sfd_pollers; 386 for (sp = list_head(pollers); sp != NULL; sp = list_next(pollers, sp)) { 387 proc_t *p = sp->sp_proc; 388 389 if (p == NULL) { 390 continue; 391 } 392 393 /* 394 * Even if the process in question is racing us to clean-up in 395 * proc_exit(), it will be unable to exit (and free itself) 396 * since we hold pidlock. This prevents us from otherwise 397 * attempting to lock a p_lock which was freed. 398 */ 399 mutex_enter(&p->p_lock); 400 if (sp->sp_proc == NULL) { 401 mutex_exit(&p->p_lock); 402 continue; 403 } 404 VERIFY3P(sp->sp_proc, ==, p); 405 VERIFY3P(sp->sp_state, ==, state); 406 VERIFY3P(p->p_sigfd, !=, NULL); 407 408 sigfd_proc_state_t *pstate = p->p_sigfd; 409 list_remove(&pstate->sigfd_list, sp); 410 sp->sp_proc = NULL; 411 412 /* Wake any lingering pollers referencing the pollhead */ 413 mutex_enter(&sp->sp_lock); 414 signalfd_poller_wake(sp, POLLERR); 415 mutex_exit(&sp->sp_lock); 416 417 if (list_is_empty(&pstate->sigfd_list)) { 418 /* 419 * If this poller was the last associated against the 420 * process, then clean up its state as well. 421 */ 422 signalfd_proc_clean(p); 423 } 424 mutex_exit(&p->p_lock); 425 } 426 mutex_exit(&pidlock); 427 } 428 429 static void 430 signalfd_pollers_free(signalfd_state_t *state) 431 { 432 ASSERT(MUTEX_HELD(&state->sfd_lock)); 433 434 signalfd_poller_t *sp; 435 while ((sp = list_remove_head(&state->sfd_pollers)) != NULL) { 436 ASSERT3P(sp->sp_proc, ==, NULL); 437 438 mutex_enter(&sp->sp_lock); 439 while (sp->sp_pending) { 440 cv_wait(&sp->sp_cv, &sp->sp_lock); 441 } 442 /* 443 * With the poller dissociated from its polling process, and any 444 * lingering events delivered, the pollhead should be empty. 445 */ 446 ASSERT3P(sp->sp_pollhead.ph_list, ==, NULL); 447 448 cv_destroy(&sp->sp_cv); 449 mutex_destroy(&sp->sp_lock); 450 kmem_free(sp, sizeof (*sp)); 451 } 452 } 453 454 /* 455 * Callback for cleaning up signalfd state from a process during proc_exit(). 456 */ 457 static void 458 signalfd_exit_helper(void) 459 { 460 proc_t *p = curproc; 461 462 mutex_enter(&p->p_lock); 463 464 sigfd_proc_state_t *pstate = p->p_sigfd; 465 if (pstate == NULL) { 466 mutex_exit(&p->p_lock); 467 return; 468 } 469 470 signalfd_poller_t *sp; 471 while ((sp = list_remove_head(&pstate->sigfd_list)) != NULL) { 472 /* 473 * Having been removed from the sigfd_list, make it clear that 474 * this signalfd_poller_t is disssociated from the process. 475 */ 476 sp->sp_proc = NULL; 477 478 /* Wake any lingering pollers referencing the pollhead */ 479 mutex_enter(&sp->sp_lock); 480 signalfd_poller_wake(sp, POLLERR); 481 mutex_exit(&sp->sp_lock); 482 } 483 signalfd_proc_clean(p); 484 mutex_exit(&p->p_lock); 485 } 486 487 _NOTE(ARGSUSED(1)) 488 static int 489 signalfd_open(dev_t *devp, int flag, int otyp, cred_t *cr) 490 { 491 if (getminor(*devp) != SIGNALFDMNRN_SIGNALFD) { 492 return (ENXIO); 493 } 494 495 const minor_t minor = (minor_t)id_allocff_nosleep(signalfd_minors); 496 if (minor == -1) { 497 return (ENOMEM); 498 } 499 500 if (ddi_soft_state_zalloc(signalfd_softstate, minor) != DDI_SUCCESS) { 501 id_free(signalfd_minors, minor); 502 return (ENODEV); 503 } 504 505 signalfd_state_t *state = ddi_get_soft_state(signalfd_softstate, minor); 506 mutex_init(&state->sfd_lock, NULL, MUTEX_DEFAULT, NULL); 507 list_create(&state->sfd_pollers, sizeof (signalfd_poller_t), 508 offsetof(signalfd_poller_t, sp_state_node)); 509 state->sfd_minor = minor; 510 511 const major_t major = getemajor(*devp); 512 *devp = makedevice(major, minor); 513 514 return (0); 515 } 516 517 /* 518 * Consume one signal from our set in a manner similar to sigtimedwait(). 519 * The block parameter is used to control whether we wait for a signal or 520 * return immediately if no signal is pending. We use the thread's t_sigwait 521 * member in the same way that it is used by sigtimedwait. 522 * 523 * Return 0 if we successfully consumed a signal or an errno if not. 524 */ 525 static int 526 signalfd_consume_signal(k_sigset_t set, uio_t *uio, bool should_block) 527 { 528 kthread_t *t = curthread; 529 klwp_t *lwp = ttolwp(t); 530 proc_t *p = ttoproc(t); 531 int ret = 0; 532 533 /* 534 * Identify signals of interest so they can be processed, even if other 535 * parts of the machinery would be poised to ignore them. 536 */ 537 t->t_sigwait = set; 538 539 mutex_enter(&p->p_lock); 540 541 /* Set thread signal mask to unmask those in the specified set. */ 542 schedctl_finish_sigblock(t); 543 const k_sigset_t oldmask = t->t_hold; 544 sigdiffset(&t->t_hold, &t->t_sigwait); 545 546 if (should_block) { 547 do { 548 ret = cv_waituntil_sig(&t->t_delay_cv, &p->p_lock, 549 NULL, 0); 550 } while (ret > 0); 551 } else { 552 mutex_exit(&p->p_lock); 553 if (issig(FORREAL) == 0) { 554 ret = -1; 555 } 556 mutex_enter(&p->p_lock); 557 } 558 559 /* 560 * Restore thread's signal mask to its previous value. 561 * Set t_sig_check so post_syscall sees new t_hold mask. 562 */ 563 t->t_hold = oldmask; 564 t->t_sig_check = 1; 565 566 if (ret == -1) { 567 /* no signals pending */ 568 mutex_exit(&p->p_lock); 569 sigemptyset(&t->t_sigwait); 570 return (EAGAIN); 571 } 572 573 /* Do not bother with signal if it is not in request set. */ 574 if (lwp->lwp_cursig == 0 || 575 !sigismember(&t->t_sigwait, lwp->lwp_cursig)) { 576 /* 577 * lwp_cursig is zero if pokelwps() awakened cv_wait_sig(). 578 * This happens if some other thread in this process called 579 * forkall() or exit(). 580 */ 581 mutex_exit(&p->p_lock); 582 sigemptyset(&t->t_sigwait); 583 return (EINTR); 584 } 585 586 /* Convert signal info into external, datamodel independent, struct. */ 587 signalfd_siginfo_t ssi; 588 bzero(&ssi, sizeof (ssi)); 589 if (lwp->lwp_curinfo != NULL) { 590 k_siginfo_t *infop = &lwp->lwp_curinfo->sq_info; 591 592 ssi.ssi_signo = infop->si_signo; 593 ssi.ssi_errno = infop->si_errno; 594 ssi.ssi_code = infop->si_code; 595 ssi.ssi_pid = infop->si_pid; 596 ssi.ssi_uid = infop->si_uid; 597 ssi.ssi_fd = infop->si_fd; 598 ssi.ssi_band = infop->si_band; 599 ssi.ssi_trapno = infop->si_trapno; 600 ssi.ssi_status = infop->si_status; 601 ssi.ssi_utime = infop->si_utime; 602 ssi.ssi_stime = infop->si_stime; 603 ssi.ssi_addr = (uint64_t)(intptr_t)infop->si_addr; 604 605 DTRACE_PROC2(signal__clear, int, 0, ksiginfo_t *, infop); 606 } else { 607 /* Convert to the format expected by the probe. */ 608 k_siginfo_t info = { 609 .si_signo = lwp->lwp_cursig, 610 .si_code = SI_NOINFO, 611 }; 612 613 ssi.ssi_signo = info.si_signo; 614 ssi.ssi_code = info.si_code; 615 616 DTRACE_PROC2(signal__clear, int, 0, ksiginfo_t *, &info); 617 } 618 619 lwp->lwp_ru.nsignals++; 620 lwp->lwp_cursig = 0; 621 lwp->lwp_extsig = 0; 622 if (lwp->lwp_curinfo != NULL) { 623 siginfofree(lwp->lwp_curinfo); 624 lwp->lwp_curinfo = NULL; 625 } 626 mutex_exit(&p->p_lock); 627 628 ret = uiomove(&ssi, sizeof (ssi), UIO_READ, uio); 629 sigemptyset(&t->t_sigwait); 630 return (ret); 631 } 632 633 /* 634 * This is similar to sigtimedwait. Based on the fd mode, we may wait until a 635 * signal within our specified set is posted. We consume as many available 636 * signals within our set as we can. 637 */ 638 _NOTE(ARGSUSED(2)) 639 static int 640 signalfd_read(dev_t dev, uio_t *uio, cred_t *cr) 641 { 642 signalfd_state_t *state; 643 k_sigset_t set; 644 bool should_block = true, got_one = false; 645 int res; 646 647 state = ddi_get_soft_state(signalfd_softstate, getminor(dev)); 648 if (state == NULL) { 649 return (ENXIO); 650 } 651 652 if (uio->uio_resid < sizeof (signalfd_siginfo_t)) { 653 return (EINVAL); 654 } 655 656 if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) { 657 should_block = false; 658 } 659 660 mutex_enter(&state->sfd_lock); 661 set = state->sfd_mask; 662 mutex_exit(&state->sfd_lock); 663 664 if (sigisempty(&set)) 665 return (set_errno(EINVAL)); 666 667 do { 668 res = signalfd_consume_signal(set, uio, should_block); 669 670 if (res == 0) { 671 /* 672 * After consuming one signal, do not block while 673 * trying to consume more. 674 */ 675 got_one = true; 676 should_block = false; 677 678 /* 679 * Refresh the matching signal set in case it was 680 * updated during the wait. 681 */ 682 mutex_enter(&state->sfd_lock); 683 set = state->sfd_mask; 684 mutex_exit(&state->sfd_lock); 685 if (sigisempty(&set)) 686 break; 687 } 688 } while (res == 0 && uio->uio_resid >= sizeof (signalfd_siginfo_t)); 689 690 if (got_one) 691 res = 0; 692 693 return (res); 694 } 695 696 /* 697 * If ksigset_t's were a single word, we would do: 698 * return (((p->p_sig | t->t_sig) & set) & fillset); 699 */ 700 static int 701 signalfd_sig_pending(proc_t *p, kthread_t *t, k_sigset_t set) 702 { 703 return (((p->p_sig.__sigbits[0] | t->t_sig.__sigbits[0]) & 704 set.__sigbits[0]) | 705 ((p->p_sig.__sigbits[1] | t->t_sig.__sigbits[1]) & 706 set.__sigbits[1]) | 707 (((p->p_sig.__sigbits[2] | t->t_sig.__sigbits[2]) & 708 set.__sigbits[2]) & FILLSET2)); 709 } 710 711 static int 712 signalfd_poll(dev_t dev, short events, int anyyet, short *reventsp, 713 struct pollhead **phpp) 714 { 715 signalfd_state_t *state; 716 short revents = 0; 717 kthread_t *t = curthread; 718 proc_t *p = ttoproc(t); 719 720 state = ddi_get_soft_state(signalfd_softstate, getminor(dev)); 721 if (state == NULL) { 722 return (ENXIO); 723 } 724 725 mutex_enter(&state->sfd_lock); 726 if (signalfd_sig_pending(p, t, state->sfd_mask) != 0) { 727 revents |= POLLRDNORM | POLLIN; 728 } 729 730 *reventsp = revents & events; 731 if ((*reventsp == 0 && !anyyet) || (events & POLLET) != 0) { 732 signalfd_poller_t *sp; 733 734 sp = signalfd_poller_associate(state, p); 735 *phpp = &sp->sp_pollhead; 736 } 737 mutex_exit(&state->sfd_lock); 738 739 return (0); 740 } 741 742 static void 743 signalfd_set_mask(signalfd_state_t *state, const sigset_t *umask) 744 { 745 k_sigset_t kmask; 746 747 sigutok(umask, &kmask); 748 749 mutex_enter(&state->sfd_lock); 750 state->sfd_mask = kmask; 751 list_t *pollers = &state->sfd_pollers; 752 for (signalfd_poller_t *sp = list_head(pollers); sp != NULL; 753 sp = list_next(pollers, sp)) { 754 mutex_enter(&sp->sp_lock); 755 sp->sp_mask = kmask; 756 mutex_exit(&sp->sp_lock); 757 } 758 mutex_exit(&state->sfd_lock); 759 } 760 761 _NOTE(ARGSUSED(4)) 762 static int 763 signalfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) 764 { 765 signalfd_state_t *state; 766 sigset_t mask; 767 768 state = ddi_get_soft_state(signalfd_softstate, getminor(dev)); 769 if (state == NULL) { 770 return (ENXIO); 771 } 772 773 switch (cmd) { 774 case SIGNALFDIOC_MASK: 775 if (ddi_copyin((caddr_t)arg, &mask, sizeof (mask), md) != 0) { 776 return (EFAULT); 777 } 778 signalfd_set_mask(state, &mask); 779 return (0); 780 781 default: 782 break; 783 } 784 785 return (ENOTTY); 786 } 787 788 _NOTE(ARGSUSED(1)) 789 static int 790 signalfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p) 791 { 792 signalfd_state_t *state; 793 const minor_t minor = getminor(dev); 794 795 state = ddi_get_soft_state(signalfd_softstate, minor); 796 if (state == NULL) { 797 return (ENXIO); 798 } 799 800 /* 801 * With this signalfd instance being closed, sfd_lock is a formality, as 802 * nothing else should be reaching for it to add pollers at this point. 803 */ 804 mutex_enter(&state->sfd_lock); 805 806 /* Dissociate any pollers from their respective processes */ 807 signalfd_pollers_dissociate(state); 808 809 /* ... and free all those (now-dissociated) pollers */ 810 signalfd_pollers_free(state); 811 ASSERT(list_is_empty(&state->sfd_pollers)); 812 813 mutex_destroy(&state->sfd_lock); 814 ddi_soft_state_free(signalfd_softstate, minor); 815 id_free(signalfd_minors, minor); 816 817 return (0); 818 } 819 820 static int 821 signalfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 822 { 823 if (cmd != DDI_ATTACH || signalfd_devi != NULL) { 824 return (DDI_FAILURE); 825 } 826 827 signalfd_minors = id_space_create("signalfd_minors", 1, L_MAXMIN32 + 1); 828 if (signalfd_minors == NULL) { 829 cmn_err(CE_WARN, "signalfd couldn't create id space"); 830 return (DDI_FAILURE); 831 } 832 833 if (ddi_soft_state_init(&signalfd_softstate, 834 sizeof (signalfd_state_t), 0) != 0) { 835 cmn_err(CE_WARN, "signalfd failed to create soft state"); 836 id_space_destroy(signalfd_minors); 837 return (DDI_FAILURE); 838 } 839 840 if (ddi_create_minor_node(devi, "signalfd", S_IFCHR, 841 SIGNALFDMNRN_SIGNALFD, DDI_PSEUDO, 0) == DDI_FAILURE) { 842 cmn_err(CE_NOTE, "signalfd couldn't create minor node"); 843 ddi_soft_state_fini(&signalfd_softstate); 844 id_space_destroy(signalfd_minors); 845 return (DDI_FAILURE); 846 } 847 848 849 sigfd_exit_helper = signalfd_exit_helper; 850 851 signalfd_wakeq = taskq_create("signalfd_wake", 1, minclsyspri, 852 0, INT_MAX, TASKQ_PREPOPULATE); 853 854 ddi_report_dev(devi); 855 signalfd_devi = devi; 856 857 return (DDI_SUCCESS); 858 } 859 860 _NOTE(ARGSUSED(0)) 861 static int 862 signalfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 863 { 864 if (cmd != DDI_DETACH) { 865 return (DDI_FAILURE); 866 } 867 868 /* 869 * With all of the instances gone, it is safe to both destroy the waker 870 * taskq (which must be empty) and tear down the exit helper (which must 871 * be unreachable with no proc_t`p_sigfd associations). 872 */ 873 taskq_destroy(signalfd_wakeq); 874 sigfd_exit_helper = NULL; 875 876 id_space_destroy(signalfd_minors); 877 ddi_soft_state_fini(&signalfd_softstate); 878 ddi_remove_minor_node(signalfd_devi, NULL); 879 signalfd_devi = NULL; 880 881 return (DDI_SUCCESS); 882 } 883 884 _NOTE(ARGSUSED(0)) 885 static int 886 signalfd_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 887 { 888 int error; 889 890 switch (infocmd) { 891 case DDI_INFO_DEVT2DEVINFO: 892 *result = (void *)signalfd_devi; 893 error = DDI_SUCCESS; 894 break; 895 case DDI_INFO_DEVT2INSTANCE: 896 *result = (void *)0; 897 error = DDI_SUCCESS; 898 break; 899 default: 900 error = DDI_FAILURE; 901 } 902 return (error); 903 } 904 905 static struct cb_ops signalfd_cb_ops = { 906 signalfd_open, /* open */ 907 signalfd_close, /* close */ 908 nulldev, /* strategy */ 909 nulldev, /* print */ 910 nodev, /* dump */ 911 signalfd_read, /* read */ 912 nodev, /* write */ 913 signalfd_ioctl, /* ioctl */ 914 nodev, /* devmap */ 915 nodev, /* mmap */ 916 nodev, /* segmap */ 917 signalfd_poll, /* poll */ 918 ddi_prop_op, /* cb_prop_op */ 919 0, /* streamtab */ 920 D_NEW | D_MP /* Driver compatibility flag */ 921 }; 922 923 static struct dev_ops signalfd_ops = { 924 DEVO_REV, /* devo_rev */ 925 0, /* refcnt */ 926 signalfd_info, /* get_dev_info */ 927 nulldev, /* identify */ 928 nulldev, /* probe */ 929 signalfd_attach, /* attach */ 930 signalfd_detach, /* detach */ 931 nodev, /* reset */ 932 &signalfd_cb_ops, /* driver operations */ 933 NULL, /* bus operations */ 934 nodev, /* dev power */ 935 ddi_quiesce_not_needed, /* quiesce */ 936 }; 937 938 static struct modldrv modldrv = { 939 &mod_driverops, /* module type (this is a pseudo driver) */ 940 "signalfd support", /* name of module */ 941 &signalfd_ops, /* driver ops */ 942 }; 943 944 static struct modlinkage modlinkage = { 945 MODREV_1, 946 (void *)&modldrv, 947 NULL 948 }; 949 950 int 951 _init(void) 952 { 953 return (mod_install(&modlinkage)); 954 } 955 956 int 957 _info(struct modinfo *modinfop) 958 { 959 return (mod_info(&modlinkage, modinfop)); 960 } 961 962 int 963 _fini(void) 964 { 965 return (mod_remove(&modlinkage)); 966 } 967