1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2017 Joyent, Inc. 14 */ 15 16 /* 17 * Support for the signalfd facility, a Linux-borne facility for 18 * file descriptor-based synchronous signal consumption. 19 * 20 * As described on the signalfd(3C) man page, the general idea behind these 21 * file descriptors is that they can be used to synchronously consume signals 22 * via the read(2) syscall. While that capability already exists with the 23 * sigwaitinfo(3C) function, signalfd holds an advantage since it is file 24 * descriptor based: It is able use the event facilities (poll(2), /dev/poll, 25 * event ports) to notify interested parties when consumable signals arrive. 26 * 27 * The signalfd lifecycle begins When a process opens /dev/signalfd. A minor 28 * will be allocated for them along with an associated signalfd_state_t struct. 29 * It is there where the mask of desired signals resides. 30 * 31 * Reading from the signalfd is straightforward and mimics the kernel behavior 32 * for sigtimedwait(). Signals continue to live on either the proc's p_sig, or 33 * thread's t_sig, member. During a read operation, those which match the mask 34 * are consumed so they are no longer pending. 35 * 36 * The poll side is more complex. Every time a signal is delivered, all of the 37 * signalfds on the process need to be examined in order to pollwake threads 38 * waiting for signal arrival. 39 * 40 * When a thread polling on a signalfd requires a pollhead, several steps must 41 * be taken to safely ensure the proper result. A sigfd_proc_state_t is 42 * created for the calling process if it does not yet exist. It is there where 43 * a list of sigfd_poll_waiter_t structures reside which associate pollheads to 44 * signalfd_state_t entries. The sigfd_proc_state_t list is walked to find a 45 * sigfd_poll_waiter_t matching the signalfd_state_t which corresponds to the 46 * polled resource. If one is found, it is reused. Otherwise a new one is 47 * created, incrementing the refcount on the signalfd_state_t, and it is added 48 * to the sigfd_poll_waiter_t list. 49 * 50 * The complications imposed by fork(2) are why the pollhead is stored in the 51 * associated sigfd_poll_waiter_t instead of directly in the signalfd_state_t. 52 * More than one process can hold a reference to the signalfd at a time but 53 * arriving signals should wake only process-local pollers. Additionally, 54 * signalfd_close is called only when the last referencing fd is closed, hiding 55 * occurrences of preceeding threads which released their references. This 56 * necessitates reference counting on the signalfd_state_t so it is able to 57 * persist after close until all poll references have been cleansed. Doing so 58 * ensures that blocked pollers which hold references to the signalfd_state_t 59 * will be able to do clean-up after the descriptor itself has been closed. 60 * 61 * When a signal arrives in a process polling on signalfd, signalfd_pollwake_cb 62 * is called via the pointer in sigfd_proc_state_t. It will walk over the 63 * sigfd_poll_waiter_t entries present in the list, searching for any 64 * associated with a signalfd_state_t with a matching signal mask. The 65 * approach of keeping the poller list in p_sigfd was chosen because a process 66 * is likely to use few signalfds relative to its total file descriptors. It 67 * reduces the work required for each received signal. 68 * 69 * When matching sigfd_poll_waiter_t entries are encountered in the poller list 70 * during signalfd_pollwake_cb, they are dispatched into signalfd_wakeq to 71 * perform the pollwake. This is due to a lock ordering conflict between 72 * signalfd_poll and signalfd_pollwake_cb. The former acquires 73 * pollcache_t`pc_lock before proc_t`p_lock. The latter (via sigtoproc) 74 * reverses the order. Defering the pollwake into a taskq means it can be 75 * performed without proc_t`p_lock held, avoiding the deadlock. 76 * 77 * The sigfd_list is self-cleaning; as signalfd_pollwake_cb is called, the list 78 * will clear out on its own. Any remaining per-process state which remains 79 * will be cleaned up by the exit helper (signalfd_exit_helper). 80 * 81 * The structures associated with signalfd state are designed to operate 82 * correctly across fork, but there is one caveat that applies. Using 83 * fork-shared signalfd descriptors in conjuction with fork-shared caching poll 84 * descriptors (such as /dev/poll or event ports) will result in missed poll 85 * wake-ups. This is caused by the pollhead identity of signalfd descriptors 86 * being dependent on the process they are polled from. Because it has a 87 * thread-local cache, poll(2) is unaffected by this limitation. 88 * 89 * Lock ordering: 90 * 91 * 1. signalfd_lock 92 * 2. signalfd_state_t`sfd_lock 93 * 94 * 1. proc_t`p_lock (to walk p_sigfd) 95 * 2. signalfd_state_t`sfd_lock 96 * 2a. signalfd_lock (after sfd_lock is dropped, when sfd_count falls to 0) 97 */ 98 99 #include <sys/ddi.h> 100 #include <sys/sunddi.h> 101 #include <sys/signalfd.h> 102 #include <sys/conf.h> 103 #include <sys/sysmacros.h> 104 #include <sys/filio.h> 105 #include <sys/stat.h> 106 #include <sys/file.h> 107 #include <sys/schedctl.h> 108 #include <sys/id_space.h> 109 #include <sys/sdt.h> 110 #include <sys/disp.h> 111 #include <sys/taskq_impl.h> 112 113 typedef struct signalfd_state signalfd_state_t; 114 115 struct signalfd_state { 116 list_node_t sfd_list; /* node in global list */ 117 kmutex_t sfd_lock; /* protects fields below */ 118 uint_t sfd_count; /* ref count */ 119 boolean_t sfd_valid; /* valid while open */ 120 k_sigset_t sfd_set; /* signals for this fd */ 121 }; 122 123 typedef struct sigfd_poll_waiter { 124 list_node_t spw_list; 125 signalfd_state_t *spw_state; 126 pollhead_t spw_pollhd; 127 taskq_ent_t spw_taskent; 128 short spw_pollev; 129 } sigfd_poll_waiter_t; 130 131 /* 132 * Protects global state in signalfd_devi, signalfd_minor, signalfd_softstate, 133 * and signalfd_state (including sfd_list field of members) 134 */ 135 static kmutex_t signalfd_lock; 136 static dev_info_t *signalfd_devi; /* device info */ 137 static id_space_t *signalfd_minor; /* minor number arena */ 138 static void *signalfd_softstate; /* softstate pointer */ 139 static list_t signalfd_state; /* global list of state */ 140 static taskq_t *signalfd_wakeq; /* pollwake event taskq */ 141 142 143 static void 144 signalfd_state_enter_locked(signalfd_state_t *state) 145 { 146 ASSERT(MUTEX_HELD(&state->sfd_lock)); 147 ASSERT(state->sfd_count > 0); 148 VERIFY(state->sfd_valid == B_TRUE); 149 150 state->sfd_count++; 151 } 152 153 static void 154 signalfd_state_release(signalfd_state_t *state, boolean_t force_invalidate) 155 { 156 mutex_enter(&state->sfd_lock); 157 158 if (force_invalidate) { 159 state->sfd_valid = B_FALSE; 160 } 161 162 ASSERT(state->sfd_count > 0); 163 if (state->sfd_count == 1) { 164 VERIFY(state->sfd_valid == B_FALSE); 165 mutex_exit(&state->sfd_lock); 166 if (force_invalidate) { 167 /* 168 * The invalidation performed in signalfd_close is done 169 * while signalfd_lock is held. 170 */ 171 ASSERT(MUTEX_HELD(&signalfd_lock)); 172 list_remove(&signalfd_state, state); 173 } else { 174 ASSERT(MUTEX_NOT_HELD(&signalfd_lock)); 175 mutex_enter(&signalfd_lock); 176 list_remove(&signalfd_state, state); 177 mutex_exit(&signalfd_lock); 178 } 179 kmem_free(state, sizeof (*state)); 180 return; 181 } 182 state->sfd_count--; 183 mutex_exit(&state->sfd_lock); 184 } 185 186 static sigfd_poll_waiter_t * 187 signalfd_wake_list_add(sigfd_proc_state_t *pstate, signalfd_state_t *state) 188 { 189 list_t *lst = &pstate->sigfd_list; 190 sigfd_poll_waiter_t *pw; 191 192 for (pw = list_head(lst); pw != NULL; pw = list_next(lst, pw)) { 193 if (pw->spw_state == state) 194 break; 195 } 196 197 if (pw == NULL) { 198 pw = kmem_zalloc(sizeof (*pw), KM_SLEEP); 199 200 mutex_enter(&state->sfd_lock); 201 signalfd_state_enter_locked(state); 202 pw->spw_state = state; 203 mutex_exit(&state->sfd_lock); 204 list_insert_head(lst, pw); 205 } 206 return (pw); 207 } 208 209 static sigfd_poll_waiter_t * 210 signalfd_wake_list_rm(sigfd_proc_state_t *pstate, signalfd_state_t *state) 211 { 212 list_t *lst = &pstate->sigfd_list; 213 sigfd_poll_waiter_t *pw; 214 215 for (pw = list_head(lst); pw != NULL; pw = list_next(lst, pw)) { 216 if (pw->spw_state == state) { 217 break; 218 } 219 } 220 221 if (pw != NULL) { 222 list_remove(lst, pw); 223 pw->spw_state = NULL; 224 signalfd_state_release(state, B_FALSE); 225 } 226 227 return (pw); 228 } 229 230 static void 231 signalfd_wake_list_cleanup(proc_t *p) 232 { 233 sigfd_proc_state_t *pstate = p->p_sigfd; 234 sigfd_poll_waiter_t *pw; 235 list_t *lst; 236 237 ASSERT(MUTEX_HELD(&p->p_lock)); 238 ASSERT(pstate != NULL); 239 240 lst = &pstate->sigfd_list; 241 while ((pw = list_remove_head(lst)) != NULL) { 242 signalfd_state_t *state = pw->spw_state; 243 244 pw->spw_state = NULL; 245 signalfd_state_release(state, B_FALSE); 246 247 pollwakeup(&pw->spw_pollhd, POLLERR); 248 pollhead_clean(&pw->spw_pollhd); 249 kmem_free(pw, sizeof (*pw)); 250 } 251 list_destroy(lst); 252 253 p->p_sigfd = NULL; 254 kmem_free(pstate, sizeof (*pstate)); 255 } 256 257 static void 258 signalfd_exit_helper(void) 259 { 260 proc_t *p = curproc; 261 262 mutex_enter(&p->p_lock); 263 signalfd_wake_list_cleanup(p); 264 mutex_exit(&p->p_lock); 265 } 266 267 /* 268 * Perform pollwake for a sigfd_poll_waiter_t entry. 269 * Thanks to the strict and conflicting lock orders required for signalfd_poll 270 * (pc_lock before p_lock) and signalfd_pollwake_cb (p_lock before pc_lock), 271 * this is relegated to a taskq to avoid deadlock. 272 */ 273 static void 274 signalfd_wake_task(void *arg) 275 { 276 sigfd_poll_waiter_t *pw = arg; 277 signalfd_state_t *state = pw->spw_state; 278 279 pw->spw_state = NULL; 280 signalfd_state_release(state, B_FALSE); 281 pollwakeup(&pw->spw_pollhd, pw->spw_pollev); 282 pollhead_clean(&pw->spw_pollhd); 283 kmem_free(pw, sizeof (*pw)); 284 } 285 286 /* 287 * Called every time a signal is delivered to the process so that we can 288 * see if any signal stream needs a pollwakeup. We maintain a list of 289 * signal state elements so that we don't have to look at every file descriptor 290 * on the process. If necessary, a further optimization would be to maintain a 291 * signal set mask that is a union of all of the sets in the list so that 292 * we don't even traverse the list if the signal is not in one of the elements. 293 * However, since the list is likely to be very short, this is not currently 294 * being done. A more complex data structure might also be used, but it is 295 * unclear what that would be since each signal set needs to be checked for a 296 * match. 297 */ 298 static void 299 signalfd_pollwake_cb(void *arg0, int sig) 300 { 301 proc_t *p = (proc_t *)arg0; 302 sigfd_proc_state_t *pstate = (sigfd_proc_state_t *)p->p_sigfd; 303 list_t *lst; 304 sigfd_poll_waiter_t *pw; 305 306 ASSERT(MUTEX_HELD(&p->p_lock)); 307 ASSERT(pstate != NULL); 308 309 lst = &pstate->sigfd_list; 310 pw = list_head(lst); 311 while (pw != NULL) { 312 signalfd_state_t *state = pw->spw_state; 313 sigfd_poll_waiter_t *next; 314 315 mutex_enter(&state->sfd_lock); 316 if (!state->sfd_valid) { 317 pw->spw_pollev = POLLERR; 318 } else if (sigismember(&state->sfd_set, sig)) { 319 pw->spw_pollev = POLLRDNORM | POLLIN; 320 } else { 321 mutex_exit(&state->sfd_lock); 322 pw = list_next(lst, pw); 323 continue; 324 } 325 mutex_exit(&state->sfd_lock); 326 327 /* 328 * Pull the sigfd_poll_waiter_t out of the list and dispatch it 329 * to perform a pollwake. This cannot be done synchronously 330 * since signalfd_poll and signalfd_pollwake_cb have 331 * conflicting lock orders which can deadlock. 332 */ 333 next = list_next(lst, pw); 334 list_remove(lst, pw); 335 taskq_dispatch_ent(signalfd_wakeq, signalfd_wake_task, pw, 0, 336 &pw->spw_taskent); 337 pw = next; 338 } 339 } 340 341 _NOTE(ARGSUSED(1)) 342 static int 343 signalfd_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) 344 { 345 signalfd_state_t *state, **sstate; 346 major_t major = getemajor(*devp); 347 minor_t minor = getminor(*devp); 348 349 if (minor != SIGNALFDMNRN_SIGNALFD) 350 return (ENXIO); 351 352 mutex_enter(&signalfd_lock); 353 354 minor = (minor_t)id_allocff(signalfd_minor); 355 if (ddi_soft_state_zalloc(signalfd_softstate, minor) != DDI_SUCCESS) { 356 id_free(signalfd_minor, minor); 357 mutex_exit(&signalfd_lock); 358 return (ENODEV); 359 } 360 361 state = kmem_zalloc(sizeof (*state), KM_SLEEP); 362 state->sfd_valid = B_TRUE; 363 state->sfd_count = 1; 364 list_insert_head(&signalfd_state, (void *)state); 365 366 sstate = ddi_get_soft_state(signalfd_softstate, minor); 367 *sstate = state; 368 *devp = makedevice(major, minor); 369 370 mutex_exit(&signalfd_lock); 371 372 return (0); 373 } 374 375 /* 376 * Consume one signal from our set in a manner similar to sigtimedwait(). 377 * The block parameter is used to control whether we wait for a signal or 378 * return immediately if no signal is pending. We use the thread's t_sigwait 379 * member in the same way that it is used by sigtimedwait. 380 * 381 * Return 0 if we successfully consumed a signal or an errno if not. 382 */ 383 static int 384 consume_signal(k_sigset_t set, uio_t *uio, boolean_t block) 385 { 386 k_sigset_t oldmask; 387 kthread_t *t = curthread; 388 klwp_t *lwp = ttolwp(t); 389 proc_t *p = ttoproc(t); 390 timespec_t now; 391 timespec_t *rqtp = NULL; /* null means blocking */ 392 int timecheck = 0; 393 int ret = 0; 394 k_siginfo_t info, *infop; 395 signalfd_siginfo_t ssi, *ssp = &ssi; 396 397 if (block == B_FALSE) { 398 timecheck = timechanged; 399 gethrestime(&now); 400 rqtp = &now; /* non-blocking check for pending signals */ 401 } 402 403 t->t_sigwait = set; 404 405 mutex_enter(&p->p_lock); 406 /* 407 * set the thread's signal mask to unmask those signals in the 408 * specified set. 409 */ 410 schedctl_finish_sigblock(t); 411 oldmask = t->t_hold; 412 sigdiffset(&t->t_hold, &t->t_sigwait); 413 414 /* 415 * Based on rqtp, wait indefinitely until we take a signal in our set 416 * or return immediately if there are no signals pending from our set. 417 */ 418 while ((ret = cv_waituntil_sig(&t->t_delay_cv, &p->p_lock, rqtp, 419 timecheck)) > 0) 420 continue; 421 422 /* Restore thread's signal mask to its previous value. */ 423 t->t_hold = oldmask; 424 t->t_sig_check = 1; /* so post_syscall sees new t_hold mask */ 425 426 if (ret == -1) { 427 /* no signals pending */ 428 mutex_exit(&p->p_lock); 429 sigemptyset(&t->t_sigwait); 430 return (EAGAIN); /* no signals pending */ 431 } 432 433 /* Don't bother with signal if it is not in request set. */ 434 if (lwp->lwp_cursig == 0 || 435 !sigismember(&t->t_sigwait, lwp->lwp_cursig)) { 436 mutex_exit(&p->p_lock); 437 /* 438 * lwp_cursig is zero if pokelwps() awakened cv_wait_sig(). 439 * This happens if some other thread in this process called 440 * forkall() or exit(). 441 */ 442 sigemptyset(&t->t_sigwait); 443 return (EINTR); 444 } 445 446 if (lwp->lwp_curinfo) { 447 infop = &lwp->lwp_curinfo->sq_info; 448 } else { 449 infop = &info; 450 bzero(infop, sizeof (info)); 451 infop->si_signo = lwp->lwp_cursig; 452 infop->si_code = SI_NOINFO; 453 } 454 455 lwp->lwp_ru.nsignals++; 456 457 DTRACE_PROC2(signal__clear, int, ret, ksiginfo_t *, infop); 458 lwp->lwp_cursig = 0; 459 lwp->lwp_extsig = 0; 460 mutex_exit(&p->p_lock); 461 462 /* Convert k_siginfo into external, datamodel independent, struct. */ 463 bzero(ssp, sizeof (*ssp)); 464 ssp->ssi_signo = infop->si_signo; 465 ssp->ssi_errno = infop->si_errno; 466 ssp->ssi_code = infop->si_code; 467 ssp->ssi_pid = infop->si_pid; 468 ssp->ssi_uid = infop->si_uid; 469 ssp->ssi_fd = infop->si_fd; 470 ssp->ssi_band = infop->si_band; 471 ssp->ssi_trapno = infop->si_trapno; 472 ssp->ssi_status = infop->si_status; 473 ssp->ssi_utime = infop->si_utime; 474 ssp->ssi_stime = infop->si_stime; 475 ssp->ssi_addr = (uint64_t)(intptr_t)infop->si_addr; 476 477 ret = uiomove(ssp, sizeof (*ssp), UIO_READ, uio); 478 479 if (lwp->lwp_curinfo) { 480 siginfofree(lwp->lwp_curinfo); 481 lwp->lwp_curinfo = NULL; 482 } 483 sigemptyset(&t->t_sigwait); 484 return (ret); 485 } 486 487 /* 488 * This is similar to sigtimedwait. Based on the fd mode we may wait until a 489 * signal within our specified set is posted. We consume as many available 490 * signals within our set as we can. 491 */ 492 _NOTE(ARGSUSED(2)) 493 static int 494 signalfd_read(dev_t dev, uio_t *uio, cred_t *cr) 495 { 496 signalfd_state_t *state, **sstate; 497 minor_t minor = getminor(dev); 498 boolean_t block = B_TRUE; 499 k_sigset_t set; 500 boolean_t got_one = B_FALSE; 501 int res; 502 503 if (uio->uio_resid < sizeof (signalfd_siginfo_t)) 504 return (EINVAL); 505 506 sstate = ddi_get_soft_state(signalfd_softstate, minor); 507 state = *sstate; 508 509 if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) 510 block = B_FALSE; 511 512 mutex_enter(&state->sfd_lock); 513 set = state->sfd_set; 514 mutex_exit(&state->sfd_lock); 515 516 if (sigisempty(&set)) 517 return (set_errno(EINVAL)); 518 519 do { 520 res = consume_signal(set, uio, block); 521 522 if (res == 0) { 523 /* 524 * After consuming one signal, do not block while 525 * trying to consume more. 526 */ 527 got_one = B_TRUE; 528 block = B_FALSE; 529 530 /* 531 * Refresh the matching signal set in case it was 532 * updated during the wait. 533 */ 534 mutex_enter(&state->sfd_lock); 535 set = state->sfd_set; 536 mutex_exit(&state->sfd_lock); 537 if (sigisempty(&set)) 538 break; 539 } 540 } while (res == 0 && uio->uio_resid >= sizeof (signalfd_siginfo_t)); 541 542 if (got_one) 543 res = 0; 544 545 return (res); 546 } 547 548 /* 549 * If ksigset_t's were a single word, we would do: 550 * return (((p->p_sig | t->t_sig) & set) & fillset); 551 */ 552 static int 553 signalfd_sig_pending(proc_t *p, kthread_t *t, k_sigset_t set) 554 { 555 return (((p->p_sig.__sigbits[0] | t->t_sig.__sigbits[0]) & 556 set.__sigbits[0]) | 557 ((p->p_sig.__sigbits[1] | t->t_sig.__sigbits[1]) & 558 set.__sigbits[1]) | 559 (((p->p_sig.__sigbits[2] | t->t_sig.__sigbits[2]) & 560 set.__sigbits[2]) & FILLSET2)); 561 } 562 563 static int 564 signalfd_poll(dev_t dev, short events, int anyyet, short *reventsp, 565 struct pollhead **phpp) 566 { 567 signalfd_state_t *state, **sstate; 568 minor_t minor = getminor(dev); 569 kthread_t *t = curthread; 570 proc_t *p = ttoproc(t); 571 short revents = 0; 572 573 sstate = ddi_get_soft_state(signalfd_softstate, minor); 574 state = *sstate; 575 576 mutex_enter(&state->sfd_lock); 577 578 if (signalfd_sig_pending(p, t, state->sfd_set) != 0) 579 revents |= POLLRDNORM | POLLIN; 580 581 mutex_exit(&state->sfd_lock); 582 583 *reventsp = revents & events; 584 if ((*reventsp == 0 && !anyyet) || (events & POLLET)) { 585 sigfd_proc_state_t *pstate; 586 sigfd_poll_waiter_t *pw; 587 588 /* 589 * Enable pollwakeup handling. 590 */ 591 mutex_enter(&p->p_lock); 592 if ((pstate = (sigfd_proc_state_t *)p->p_sigfd) == NULL) { 593 594 mutex_exit(&p->p_lock); 595 pstate = kmem_zalloc(sizeof (*pstate), KM_SLEEP); 596 list_create(&pstate->sigfd_list, 597 sizeof (sigfd_poll_waiter_t), 598 offsetof(sigfd_poll_waiter_t, spw_list)); 599 pstate->sigfd_pollwake_cb = signalfd_pollwake_cb; 600 601 /* Check again, after blocking for the alloc. */ 602 mutex_enter(&p->p_lock); 603 if (p->p_sigfd == NULL) { 604 p->p_sigfd = pstate; 605 } else { 606 /* someone beat us to it */ 607 list_destroy(&pstate->sigfd_list); 608 kmem_free(pstate, sizeof (*pstate)); 609 pstate = p->p_sigfd; 610 } 611 } 612 613 pw = signalfd_wake_list_add(pstate, state); 614 *phpp = &pw->spw_pollhd; 615 mutex_exit(&p->p_lock); 616 } 617 618 return (0); 619 } 620 621 _NOTE(ARGSUSED(4)) 622 static int 623 signalfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) 624 { 625 signalfd_state_t *state, **sstate; 626 minor_t minor = getminor(dev); 627 sigset_t mask; 628 629 sstate = ddi_get_soft_state(signalfd_softstate, minor); 630 state = *sstate; 631 632 switch (cmd) { 633 case SIGNALFDIOC_MASK: 634 if (ddi_copyin((caddr_t)arg, (caddr_t)&mask, sizeof (sigset_t), 635 md) != 0) 636 return (set_errno(EFAULT)); 637 638 mutex_enter(&state->sfd_lock); 639 sigutok(&mask, &state->sfd_set); 640 mutex_exit(&state->sfd_lock); 641 642 return (0); 643 644 default: 645 break; 646 } 647 648 return (ENOTTY); 649 } 650 651 _NOTE(ARGSUSED(1)) 652 static int 653 signalfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p) 654 { 655 signalfd_state_t *state, **sstate; 656 sigfd_poll_waiter_t *pw = NULL; 657 minor_t minor = getminor(dev); 658 proc_t *p = curproc; 659 660 sstate = ddi_get_soft_state(signalfd_softstate, minor); 661 state = *sstate; 662 663 /* Make sure state is removed from this proc's pollwake list. */ 664 mutex_enter(&p->p_lock); 665 if (p->p_sigfd != NULL) { 666 sigfd_proc_state_t *pstate = p->p_sigfd; 667 668 pw = signalfd_wake_list_rm(pstate, state); 669 if (list_is_empty(&pstate->sigfd_list)) { 670 signalfd_wake_list_cleanup(p); 671 } 672 } 673 mutex_exit(&p->p_lock); 674 675 if (pw != NULL) { 676 pollwakeup(&pw->spw_pollhd, POLLERR); 677 pollhead_clean(&pw->spw_pollhd); 678 kmem_free(pw, sizeof (*pw)); 679 } 680 681 mutex_enter(&signalfd_lock); 682 683 *sstate = NULL; 684 ddi_soft_state_free(signalfd_softstate, minor); 685 id_free(signalfd_minor, minor); 686 687 signalfd_state_release(state, B_TRUE); 688 689 mutex_exit(&signalfd_lock); 690 691 return (0); 692 } 693 694 static int 695 signalfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 696 { 697 if (cmd != DDI_ATTACH || signalfd_devi != NULL) 698 return (DDI_FAILURE); 699 700 mutex_enter(&signalfd_lock); 701 702 signalfd_minor = id_space_create("signalfd_minor", 1, L_MAXMIN32 + 1); 703 if (signalfd_minor == NULL) { 704 cmn_err(CE_WARN, "signalfd couldn't create id space"); 705 mutex_exit(&signalfd_lock); 706 return (DDI_FAILURE); 707 } 708 709 if (ddi_soft_state_init(&signalfd_softstate, 710 sizeof (signalfd_state_t *), 0) != 0) { 711 cmn_err(CE_WARN, "signalfd failed to create soft state"); 712 id_space_destroy(signalfd_minor); 713 mutex_exit(&signalfd_lock); 714 return (DDI_FAILURE); 715 } 716 717 if (ddi_create_minor_node(devi, "signalfd", S_IFCHR, 718 SIGNALFDMNRN_SIGNALFD, DDI_PSEUDO, NULL) == DDI_FAILURE) { 719 cmn_err(CE_NOTE, "/dev/signalfd couldn't create minor node"); 720 ddi_soft_state_fini(&signalfd_softstate); 721 id_space_destroy(signalfd_minor); 722 mutex_exit(&signalfd_lock); 723 return (DDI_FAILURE); 724 } 725 726 ddi_report_dev(devi); 727 signalfd_devi = devi; 728 729 sigfd_exit_helper = signalfd_exit_helper; 730 731 list_create(&signalfd_state, sizeof (signalfd_state_t), 732 offsetof(signalfd_state_t, sfd_list)); 733 734 signalfd_wakeq = taskq_create("signalfd_wake", 1, minclsyspri, 735 0, INT_MAX, TASKQ_PREPOPULATE); 736 737 mutex_exit(&signalfd_lock); 738 739 return (DDI_SUCCESS); 740 } 741 742 _NOTE(ARGSUSED(0)) 743 static int 744 signalfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 745 { 746 switch (cmd) { 747 case DDI_DETACH: 748 break; 749 750 default: 751 return (DDI_FAILURE); 752 } 753 754 mutex_enter(&signalfd_lock); 755 756 if (!list_is_empty(&signalfd_state)) { 757 /* 758 * There are dangling poll waiters holding signalfd_state_t 759 * entries on the global list. Detach is not possible until 760 * they purge themselves. 761 */ 762 mutex_exit(&signalfd_lock); 763 return (DDI_FAILURE); 764 } 765 list_destroy(&signalfd_state); 766 767 /* 768 * With no remaining entries in the signalfd_state list, the wake taskq 769 * should be empty with no possibility for new entries. 770 */ 771 taskq_destroy(signalfd_wakeq); 772 773 id_space_destroy(signalfd_minor); 774 775 ddi_remove_minor_node(signalfd_devi, NULL); 776 signalfd_devi = NULL; 777 sigfd_exit_helper = NULL; 778 779 ddi_soft_state_fini(&signalfd_softstate); 780 mutex_exit(&signalfd_lock); 781 782 return (DDI_SUCCESS); 783 } 784 785 _NOTE(ARGSUSED(0)) 786 static int 787 signalfd_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 788 { 789 int error; 790 791 switch (infocmd) { 792 case DDI_INFO_DEVT2DEVINFO: 793 *result = (void *)signalfd_devi; 794 error = DDI_SUCCESS; 795 break; 796 case DDI_INFO_DEVT2INSTANCE: 797 *result = (void *)0; 798 error = DDI_SUCCESS; 799 break; 800 default: 801 error = DDI_FAILURE; 802 } 803 return (error); 804 } 805 806 static struct cb_ops signalfd_cb_ops = { 807 signalfd_open, /* open */ 808 signalfd_close, /* close */ 809 nulldev, /* strategy */ 810 nulldev, /* print */ 811 nodev, /* dump */ 812 signalfd_read, /* read */ 813 nodev, /* write */ 814 signalfd_ioctl, /* ioctl */ 815 nodev, /* devmap */ 816 nodev, /* mmap */ 817 nodev, /* segmap */ 818 signalfd_poll, /* poll */ 819 ddi_prop_op, /* cb_prop_op */ 820 0, /* streamtab */ 821 D_NEW | D_MP /* Driver compatibility flag */ 822 }; 823 824 static struct dev_ops signalfd_ops = { 825 DEVO_REV, /* devo_rev */ 826 0, /* refcnt */ 827 signalfd_info, /* get_dev_info */ 828 nulldev, /* identify */ 829 nulldev, /* probe */ 830 signalfd_attach, /* attach */ 831 signalfd_detach, /* detach */ 832 nodev, /* reset */ 833 &signalfd_cb_ops, /* driver operations */ 834 NULL, /* bus operations */ 835 nodev, /* dev power */ 836 ddi_quiesce_not_needed, /* quiesce */ 837 }; 838 839 static struct modldrv modldrv = { 840 &mod_driverops, /* module type (this is a pseudo driver) */ 841 "signalfd support", /* name of module */ 842 &signalfd_ops, /* driver ops */ 843 }; 844 845 static struct modlinkage modlinkage = { 846 MODREV_1, 847 (void *)&modldrv, 848 NULL 849 }; 850 851 int 852 _init(void) 853 { 854 return (mod_install(&modlinkage)); 855 } 856 857 int 858 _info(struct modinfo *modinfop) 859 { 860 return (mod_info(&modlinkage, modinfop)); 861 } 862 863 int 864 _fini(void) 865 { 866 return (mod_remove(&modlinkage)); 867 } 868