1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2017 Joyent, Inc.
14 * Copyright 2023 Oxide Computer Company
15 */
16
17 /*
18 * Support for the signalfd facility, a Linux-borne facility for
19 * file descriptor-based synchronous signal consumption.
20 *
21 * As described on the signalfd(3C) man page, the general idea behind these
22 * file descriptors is that they can be used to synchronously consume signals
23 * via the read(2) syscall. While that capability already exists with the
24 * sigwaitinfo(3C) function, signalfd holds an advantage since it is file
25 * descriptor based: It is able use the event facilities (poll(2), /dev/poll,
26 * event ports) to notify interested parties when consumable signals arrive.
27 *
28 * The signalfd lifecycle begins When a process opens /dev/signalfd. A minor
29 * will be allocated for them along with an associated signalfd_state_t struct.
30 * It is there where the mask of desired signals resides.
31 *
32 * Reading from the signalfd is straightforward and mimics the kernel behavior
33 * for sigtimedwait(). Signals continue to live on either the proc's p_sig, or
34 * thread's t_sig, member. During a read operation, those which match the mask
35 * are consumed so they are no longer pending.
36 *
37 * The poll side is more complex. Every time a signal is delivered, all of the
38 * signalfds on the process need to be examined in order to pollwake threads
39 * waiting for signal arrival.
40 *
41 * When a thread polling on a signalfd requires a pollhead, several steps must
42 * be taken to safely ensure the proper result. A sigfd_proc_state_t is
43 * created for the calling process if it does not yet exist. It is there where
44 * a list of signalfd_poller_t structures reside which associate pollheads to
45 * signalfd_state_t entries. The sigfd_proc_state_t list is walked to find any
46 * signalfd_poller_t which is both associated with the polling process and
47 * corresponds to the signalfd resource being polled. If none matching those
48 * conditions is found, then a new one with the appropriate associations is
49 * created.
50 *
51 * The complications imposed by fork(2) are why the pollhead is stored in the
52 * associated signalfd_poller_t instead of directly in the signalfd_state_t.
53 * More than one process can hold a reference to the signalfd at a time but
54 * arriving signals should wake only process-local pollers. Additionally,
55 * signalfd_close is called only when the last referencing fd is closed, hiding
56 * occurrences of preceeding threads which released their references. This
57 * necessitates a pollhead for each signalfd/process pair when being polled.
58 * Doing so ensures that those pollheads will live long enough for the greater
59 * poll machinery can act upon them without risk of use-after-free. When a
60 * signalfd is closed, existing signalfd_poller_t instances are dissociated from
61 * their respective processes, causing pollwake() calls for any blocked pollers.
62 *
63 * When a signal arrives in a process polling on signalfd, signalfd_pollwake_cb
64 * is called via the pointer in sigfd_proc_state_t. It will walk over the
65 * signalfd_poller_t entries present in the list, searching for any possessing a
66 * signal mask which matches the incoming signal. (Changes to the signal mask
67 * held in signalfd_state_t is propagated to the signalfd_poller_t instance to
68 * avoid the need for additional locks during the callback.) The approach of
69 * keeping the poller list in p_sigfd was chosen because a process is likely to
70 * use few signalfds relative to its total file descriptors. It reduces the
71 * work required for each received signal.
72 *
73 * When matching signalfd_poller_t entries are encountered in the poller list
74 * during signalfd_pollwake_cb, they are dispatched into signalfd_wakeq to
75 * perform the pollwake. This is due to a lock ordering conflict between
76 * signalfd_poll and signalfd_pollwake_cb. The former acquires
77 * pollcache_t`pc_lock before proc_t`p_lock. The latter (via sigtoproc)
78 * reverses the order. Defering the pollwake into a taskq means it can be
79 * performed without proc_t`p_lock held, avoiding the deadlock.
80 *
81 * Poller entries in sigfd_proc_state_t`sigfd_list are cleaned up under two
82 * different circumstances. When a signalfd instance is being closed, it will
83 * dissociate all of its remaining signalfd_poller_t instances from their
84 * polling processes. When a process (which polled on signalfd instance(s)
85 * which have not yet been closed) exits, the exit helper (signalfd_exit_helper)
86 * is called, and it dissociates all signalfd_poller_t instances tied to the
87 * existing process.
88 *
89 * The structures associated with signalfd state are designed to operate
90 * correctly across fork, but there is one caveat that applies. Using
91 * fork-shared signalfd descriptors in conjuction with fork-shared caching poll
92 * descriptors (such as /dev/poll or event ports) will result in missed poll
93 * wake-ups. This is caused by the pollhead identity of signalfd descriptors
94 * being dependent on the process they are polled from. Because it has a
95 * thread-local cache, poll(2) is unaffected by this limitation.
96 *
97 * Lock ordering:
98 *
99 * Calling signalfd_poll:
100 * 1. pollcache_t`pc_lock
101 * 2. signalfd_state_t`sfd_lock
102 * 3. proc_t`p_lock
103 *
104 * Signal delivery, waking a pollhead:
105 * 1. proc_t`p_lock
106 * 2. signalfd_poller_t`sp_lock
107 *
108 * Process exit, cleaning up signalfd pollers:
109 * 1. proc_t`p_lock
110 * 2. signalfd_poller_t`sp_lock
111 *
112 * Waking a pollhead, from taskq:
113 * 1. signalfd_poller_t`sp_lock
114 * ... Disjoint from signalfd_poller_t`sp_lock hold ...
115 * 1. pollcache_t`pc_lock
116 *
117 * Closing signalfd, dissociating pollers:
118 * 1. signalfd_state_t`sfd_lock
119 * 2. pidlock
120 * 3. proc_t`p_lock
121 *
122 */
123
124 #include <sys/ddi.h>
125 #include <sys/sunddi.h>
126 #include <sys/signalfd.h>
127 #include <sys/conf.h>
128 #include <sys/sysmacros.h>
129 #include <sys/filio.h>
130 #include <sys/stat.h>
131 #include <sys/file.h>
132 #include <sys/schedctl.h>
133 #include <sys/id_space.h>
134 #include <sys/sdt.h>
135 #include <sys/disp.h>
136 #include <sys/taskq_impl.h>
137 #include <sys/condvar.h>
138 #include <sys/stdbool.h>
139
140 /* Per-instance signalfd device state: */
141 typedef struct signalfd_state {
142 kmutex_t sfd_lock; /* protects fields below */
143 list_t sfd_pollers;
144 k_sigset_t sfd_mask; /* signal mask for this instance */
145 minor_t sfd_minor; /* dev minor, fixed at creation */
146 } signalfd_state_t;
147
148 typedef struct signalfd_poller {
149 /*
150 * List node referenced by containing signalfd_state_t
151 * Protected by signalfd_state`sfd_lock
152 */
153 list_node_t sp_state_node;
154
155 /*
156 * List node referenced by containing sigfd_proc_state_t
157 * Protected by proc_t`plock
158 */
159 list_node_t sp_proc_node;
160
161 pollhead_t sp_pollhead;
162
163 /*
164 * The signalfd_state_t to which this poller is associated.
165 * It remains fixed after its initialization at creation time.
166 */
167 signalfd_state_t *sp_state;
168
169 /*
170 * The proc_t to which this poller is associated.
171 * It is initialized under the protection of proc_t`p_lock when this
172 * poller is created. It is NULLed out, again under the protection of
173 * proc_t`p_lock, when the poller is dissociated from the process.
174 */
175 proc_t *sp_proc;
176
177 kmutex_t sp_lock; /* protects fields below */
178 kcondvar_t sp_cv; /* CV for cleaning up */
179 short sp_pollev; /* Event(s) pending delivery */
180 bool sp_pending; /* pollwakeup() via taskq in progress */
181 taskq_ent_t sp_taskent; /* pollwakeup() dispatch taskq */
182 k_sigset_t sp_mask; /* signal match mask */
183 } signalfd_poller_t;
184
185 static dev_info_t *signalfd_devi; /* device info */
186 static id_space_t *signalfd_minors; /* minor number arena */
187 static void *signalfd_softstate; /* softstate pointer */
188 static taskq_t *signalfd_wakeq; /* pollwake event taskq */
189
190 static void
signalfd_proc_clean(proc_t * p)191 signalfd_proc_clean(proc_t *p)
192 {
193 sigfd_proc_state_t *pstate = p->p_sigfd;
194
195 ASSERT(MUTEX_HELD(&p->p_lock));
196 ASSERT(pstate != NULL);
197 VERIFY(list_is_empty(&pstate->sigfd_list));
198
199 p->p_sigfd = NULL;
200 list_destroy(&pstate->sigfd_list);
201 kmem_free(pstate, sizeof (*pstate));
202 }
203
204 static void
signalfd_wake_task(void * arg)205 signalfd_wake_task(void *arg)
206 {
207 signalfd_poller_t *sp = arg;
208
209 mutex_enter(&sp->sp_lock);
210 VERIFY(sp->sp_pollev != 0);
211 VERIFY(sp->sp_pending);
212 do {
213 const short pollev = sp->sp_pollev;
214 const bool is_err = (pollev & POLLERR) != 0;
215 sp->sp_pollev = 0;
216 mutex_exit(&sp->sp_lock);
217
218 /*
219 * Actions against the pollhead and associated pollcache(s) are
220 * taken without signalfd_poller_t`sp_lock held, since the chain
221 * of dependencies through pollcache_t`pc_lock and
222 * signalfd_state_t`sfd_lock form a potential for deadlock.
223 */
224 pollwakeup(&sp->sp_pollhead, pollev);
225 if (is_err) {
226 pollhead_clean(&sp->sp_pollhead);
227 }
228
229 mutex_enter(&sp->sp_lock);
230 /*
231 * Once pollhead/pollcache actions are complete, check for newly
232 * queued events which could have appeared in the mean time. We
233 * can bail immediately if POLLER was being delivered, since the
234 * underlying resource is undergoing clean-up.
235 */
236 if (is_err) {
237 break;
238 }
239 } while (sp->sp_pollev != 0);
240
241 /*
242 * Indicate that wake task processing is complete.
243 *
244 * Wake any thread waiting for event delivery to complete if this poller
245 * is being torn down.
246 */
247 sp->sp_pending = false;
248 cv_signal(&sp->sp_cv);
249 mutex_exit(&sp->sp_lock);
250 }
251
252 static void
signalfd_poller_wake(signalfd_poller_t * sp,short ev)253 signalfd_poller_wake(signalfd_poller_t *sp, short ev)
254 {
255 ASSERT(MUTEX_HELD(&sp->sp_lock));
256
257 sp->sp_pollev |= ev;
258 if (!sp->sp_pending) {
259 sp->sp_pending = true;
260 taskq_dispatch_ent(signalfd_wakeq, signalfd_wake_task, sp, 0,
261 &sp->sp_taskent);
262 }
263 }
264
265 /*
266 * Notification callback associated to processes which are being polled for
267 * signalfd events. Called by sigtoproc().
268 */
269 static void
signalfd_pollwake_cb(void * arg0,int sig)270 signalfd_pollwake_cb(void *arg0, int sig)
271 {
272 proc_t *p = (proc_t *)arg0;
273 sigfd_proc_state_t *pstate = (sigfd_proc_state_t *)p->p_sigfd;
274
275 ASSERT(MUTEX_HELD(&p->p_lock));
276 ASSERT(pstate != NULL);
277
278 list_t *pollers = &pstate->sigfd_list;
279 for (signalfd_poller_t *sp = list_head(pollers); sp != NULL;
280 sp = list_next(pollers, sp)) {
281 mutex_enter(&sp->sp_lock);
282 if (sigismember(&sp->sp_mask, sig)) {
283 signalfd_poller_wake(sp, POLLRDNORM | POLLIN);
284 }
285 mutex_exit(&sp->sp_lock);
286 }
287 }
288
289 /*
290 * Get the sigfd_proc_state_t for a given process, allocating one if necessary.
291 *
292 * Must be called with p_lock held, which may be dropped and reacquired during
293 * the allocation.
294 */
295 static sigfd_proc_state_t *
signalfd_proc_pstate(proc_t * p)296 signalfd_proc_pstate(proc_t *p)
297 {
298 ASSERT(MUTEX_HELD(&p->p_lock));
299
300 sigfd_proc_state_t *pstate = p->p_sigfd;
301 if (pstate == NULL) {
302 mutex_exit(&p->p_lock);
303 pstate = kmem_zalloc(sizeof (*pstate), KM_SLEEP);
304 list_create(&pstate->sigfd_list,
305 sizeof (signalfd_poller_t),
306 offsetof(signalfd_poller_t, sp_proc_node));
307 pstate->sigfd_pollwake_cb = signalfd_pollwake_cb;
308
309 /* Check again, after blocking for the alloc. */
310 mutex_enter(&p->p_lock);
311 if (p->p_sigfd == NULL) {
312 p->p_sigfd = pstate;
313 } else {
314 /* Someone beat us to it */
315 list_destroy(&pstate->sigfd_list);
316 kmem_free(pstate, sizeof (*pstate));
317 pstate = p->p_sigfd;
318 }
319 }
320
321 return (pstate);
322 }
323
324 static signalfd_poller_t *
signalfd_poller_associate(signalfd_state_t * state,proc_t * p)325 signalfd_poller_associate(signalfd_state_t *state, proc_t *p)
326 {
327 sigfd_proc_state_t *pstate;
328 list_t *pollers;
329 signalfd_poller_t *sp;
330
331 ASSERT(MUTEX_HELD(&state->sfd_lock));
332
333 mutex_enter(&p->p_lock);
334
335 pstate = signalfd_proc_pstate(p);
336 pollers = &pstate->sigfd_list;
337
338 /*
339 * Check if there is already a signalfd_poller_t allocated for this
340 * signalfd_state_t/proc_t pair.
341 */
342 for (sp = list_head(pollers); sp != NULL; sp = list_next(pollers, sp)) {
343 if (sp->sp_state == state) {
344 mutex_exit(&p->p_lock);
345 return (sp);
346 }
347 }
348
349 /*
350 * No existing poller found, so allocate one. Since sfd_lock remains
351 * held, there is no risk of some other operation racing with us to
352 * create such a poller.
353 */
354 mutex_exit(&p->p_lock);
355
356 sp = kmem_zalloc(sizeof (*sp), KM_SLEEP);
357 mutex_init(&sp->sp_lock, NULL, MUTEX_DEFAULT, NULL);
358 cv_init(&sp->sp_cv, NULL, CV_DEFAULT, NULL);
359 sigorset(&sp->sp_mask, &state->sfd_mask);
360 sp->sp_state = state;
361 sp->sp_proc = p;
362
363 mutex_enter(&p->p_lock);
364 /*
365 * Fetch the pstate again, since it could have been freed or reallocated
366 * in the time p_lock was dropped.
367 */
368 pstate = signalfd_proc_pstate(p);
369
370 list_insert_tail(&pstate->sigfd_list, sp);
371 list_insert_tail(&state->sfd_pollers, sp);
372 mutex_exit(&p->p_lock);
373
374 return (sp);
375 }
376
377 static void
signalfd_pollers_dissociate(signalfd_state_t * state)378 signalfd_pollers_dissociate(signalfd_state_t *state)
379 {
380 ASSERT(MUTEX_HELD(&state->sfd_lock));
381
382 mutex_enter(&pidlock);
383
384 signalfd_poller_t *sp;
385 list_t *pollers = &state->sfd_pollers;
386 for (sp = list_head(pollers); sp != NULL; sp = list_next(pollers, sp)) {
387 proc_t *p = sp->sp_proc;
388
389 if (p == NULL) {
390 continue;
391 }
392
393 /*
394 * Even if the process in question is racing us to clean-up in
395 * proc_exit(), it will be unable to exit (and free itself)
396 * since we hold pidlock. This prevents us from otherwise
397 * attempting to lock a p_lock which was freed.
398 */
399 mutex_enter(&p->p_lock);
400 if (sp->sp_proc == NULL) {
401 mutex_exit(&p->p_lock);
402 continue;
403 }
404 VERIFY3P(sp->sp_proc, ==, p);
405 VERIFY3P(sp->sp_state, ==, state);
406 VERIFY3P(p->p_sigfd, !=, NULL);
407
408 sigfd_proc_state_t *pstate = p->p_sigfd;
409 list_remove(&pstate->sigfd_list, sp);
410 sp->sp_proc = NULL;
411
412 /* Wake any lingering pollers referencing the pollhead */
413 mutex_enter(&sp->sp_lock);
414 signalfd_poller_wake(sp, POLLERR);
415 mutex_exit(&sp->sp_lock);
416
417 if (list_is_empty(&pstate->sigfd_list)) {
418 /*
419 * If this poller was the last associated against the
420 * process, then clean up its state as well.
421 */
422 signalfd_proc_clean(p);
423 }
424 mutex_exit(&p->p_lock);
425 }
426 mutex_exit(&pidlock);
427 }
428
429 static void
signalfd_pollers_free(signalfd_state_t * state)430 signalfd_pollers_free(signalfd_state_t *state)
431 {
432 ASSERT(MUTEX_HELD(&state->sfd_lock));
433
434 signalfd_poller_t *sp;
435 while ((sp = list_remove_head(&state->sfd_pollers)) != NULL) {
436 ASSERT3P(sp->sp_proc, ==, NULL);
437
438 mutex_enter(&sp->sp_lock);
439 while (sp->sp_pending) {
440 cv_wait(&sp->sp_cv, &sp->sp_lock);
441 }
442 /*
443 * With the poller dissociated from its polling process, and any
444 * lingering events delivered, the pollhead should be empty.
445 */
446 ASSERT3P(sp->sp_pollhead.ph_list, ==, NULL);
447
448 cv_destroy(&sp->sp_cv);
449 mutex_destroy(&sp->sp_lock);
450 kmem_free(sp, sizeof (*sp));
451 }
452 }
453
454 /*
455 * Callback for cleaning up signalfd state from a process during proc_exit().
456 */
457 static void
signalfd_exit_helper(void)458 signalfd_exit_helper(void)
459 {
460 proc_t *p = curproc;
461
462 mutex_enter(&p->p_lock);
463
464 sigfd_proc_state_t *pstate = p->p_sigfd;
465 if (pstate == NULL) {
466 mutex_exit(&p->p_lock);
467 return;
468 }
469
470 signalfd_poller_t *sp;
471 while ((sp = list_remove_head(&pstate->sigfd_list)) != NULL) {
472 /*
473 * Having been removed from the sigfd_list, make it clear that
474 * this signalfd_poller_t is disssociated from the process.
475 */
476 sp->sp_proc = NULL;
477
478 /* Wake any lingering pollers referencing the pollhead */
479 mutex_enter(&sp->sp_lock);
480 signalfd_poller_wake(sp, POLLERR);
481 mutex_exit(&sp->sp_lock);
482 }
483 signalfd_proc_clean(p);
484 mutex_exit(&p->p_lock);
485 }
486
487 _NOTE(ARGSUSED(1))
488 static int
signalfd_open(dev_t * devp,int flag,int otyp,cred_t * cr)489 signalfd_open(dev_t *devp, int flag, int otyp, cred_t *cr)
490 {
491 if (getminor(*devp) != SIGNALFDMNRN_SIGNALFD) {
492 return (ENXIO);
493 }
494
495 const minor_t minor = (minor_t)id_allocff_nosleep(signalfd_minors);
496 if (minor == -1) {
497 return (ENOMEM);
498 }
499
500 if (ddi_soft_state_zalloc(signalfd_softstate, minor) != DDI_SUCCESS) {
501 id_free(signalfd_minors, minor);
502 return (ENODEV);
503 }
504
505 signalfd_state_t *state = ddi_get_soft_state(signalfd_softstate, minor);
506 mutex_init(&state->sfd_lock, NULL, MUTEX_DEFAULT, NULL);
507 list_create(&state->sfd_pollers, sizeof (signalfd_poller_t),
508 offsetof(signalfd_poller_t, sp_state_node));
509 state->sfd_minor = minor;
510
511 const major_t major = getemajor(*devp);
512 *devp = makedevice(major, minor);
513
514 return (0);
515 }
516
517 /*
518 * Consume one signal from our set in a manner similar to sigtimedwait().
519 * The block parameter is used to control whether we wait for a signal or
520 * return immediately if no signal is pending. We use the thread's t_sigwait
521 * member in the same way that it is used by sigtimedwait.
522 *
523 * Return 0 if we successfully consumed a signal or an errno if not.
524 */
525 static int
signalfd_consume_signal(k_sigset_t set,uio_t * uio,bool should_block)526 signalfd_consume_signal(k_sigset_t set, uio_t *uio, bool should_block)
527 {
528 kthread_t *t = curthread;
529 klwp_t *lwp = ttolwp(t);
530 proc_t *p = ttoproc(t);
531 int ret = 0;
532
533 /*
534 * Identify signals of interest so they can be processed, even if other
535 * parts of the machinery would be poised to ignore them.
536 */
537 t->t_sigwait = set;
538
539 mutex_enter(&p->p_lock);
540
541 /* Set thread signal mask to unmask those in the specified set. */
542 schedctl_finish_sigblock(t);
543 const k_sigset_t oldmask = t->t_hold;
544 sigdiffset(&t->t_hold, &t->t_sigwait);
545
546 if (should_block) {
547 do {
548 ret = cv_waituntil_sig(&t->t_delay_cv, &p->p_lock,
549 NULL, 0);
550 } while (ret > 0);
551 } else {
552 mutex_exit(&p->p_lock);
553 if (issig(FORREAL) == 0) {
554 ret = -1;
555 }
556 mutex_enter(&p->p_lock);
557 }
558
559 /*
560 * Restore thread's signal mask to its previous value.
561 * Set t_sig_check so post_syscall sees new t_hold mask.
562 */
563 t->t_hold = oldmask;
564 t->t_sig_check = 1;
565
566 if (ret == -1) {
567 /* no signals pending */
568 mutex_exit(&p->p_lock);
569 sigemptyset(&t->t_sigwait);
570 return (EAGAIN);
571 }
572
573 /* Do not bother with signal if it is not in request set. */
574 if (lwp->lwp_cursig == 0 ||
575 !sigismember(&t->t_sigwait, lwp->lwp_cursig)) {
576 /*
577 * lwp_cursig is zero if pokelwps() awakened cv_wait_sig().
578 * This happens if some other thread in this process called
579 * forkall() or exit().
580 */
581 mutex_exit(&p->p_lock);
582 sigemptyset(&t->t_sigwait);
583 return (EINTR);
584 }
585
586 /* Convert signal info into external, datamodel independent, struct. */
587 signalfd_siginfo_t ssi;
588 bzero(&ssi, sizeof (ssi));
589 if (lwp->lwp_curinfo != NULL) {
590 k_siginfo_t *infop = &lwp->lwp_curinfo->sq_info;
591
592 ssi.ssi_signo = infop->si_signo;
593 ssi.ssi_errno = infop->si_errno;
594 ssi.ssi_code = infop->si_code;
595 ssi.ssi_pid = infop->si_pid;
596 ssi.ssi_uid = infop->si_uid;
597 ssi.ssi_fd = infop->si_fd;
598 ssi.ssi_band = infop->si_band;
599 ssi.ssi_trapno = infop->si_trapno;
600 ssi.ssi_status = infop->si_status;
601 ssi.ssi_utime = infop->si_utime;
602 ssi.ssi_stime = infop->si_stime;
603 ssi.ssi_addr = (uint64_t)(intptr_t)infop->si_addr;
604
605 DTRACE_PROC2(signal__clear, int, 0, ksiginfo_t *, infop);
606 } else {
607 /* Convert to the format expected by the probe. */
608 k_siginfo_t info = {
609 .si_signo = lwp->lwp_cursig,
610 .si_code = SI_NOINFO,
611 };
612
613 ssi.ssi_signo = info.si_signo;
614 ssi.ssi_code = info.si_code;
615
616 DTRACE_PROC2(signal__clear, int, 0, ksiginfo_t *, &info);
617 }
618
619 lwp->lwp_ru.nsignals++;
620 lwp->lwp_cursig = 0;
621 lwp->lwp_extsig = 0;
622 if (lwp->lwp_curinfo != NULL) {
623 siginfofree(lwp->lwp_curinfo);
624 lwp->lwp_curinfo = NULL;
625 }
626 mutex_exit(&p->p_lock);
627
628 ret = uiomove(&ssi, sizeof (ssi), UIO_READ, uio);
629 sigemptyset(&t->t_sigwait);
630 return (ret);
631 }
632
633 /*
634 * This is similar to sigtimedwait. Based on the fd mode, we may wait until a
635 * signal within our specified set is posted. We consume as many available
636 * signals within our set as we can.
637 */
638 _NOTE(ARGSUSED(2))
639 static int
signalfd_read(dev_t dev,uio_t * uio,cred_t * cr)640 signalfd_read(dev_t dev, uio_t *uio, cred_t *cr)
641 {
642 signalfd_state_t *state;
643 k_sigset_t set;
644 bool should_block = true, got_one = false;
645 int res;
646
647 state = ddi_get_soft_state(signalfd_softstate, getminor(dev));
648 if (state == NULL) {
649 return (ENXIO);
650 }
651
652 if (uio->uio_resid < sizeof (signalfd_siginfo_t)) {
653 return (EINVAL);
654 }
655
656 if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) {
657 should_block = false;
658 }
659
660 mutex_enter(&state->sfd_lock);
661 set = state->sfd_mask;
662 mutex_exit(&state->sfd_lock);
663
664 if (sigisempty(&set))
665 return (set_errno(EINVAL));
666
667 do {
668 res = signalfd_consume_signal(set, uio, should_block);
669
670 if (res == 0) {
671 /*
672 * After consuming one signal, do not block while
673 * trying to consume more.
674 */
675 got_one = true;
676 should_block = false;
677
678 /*
679 * Refresh the matching signal set in case it was
680 * updated during the wait.
681 */
682 mutex_enter(&state->sfd_lock);
683 set = state->sfd_mask;
684 mutex_exit(&state->sfd_lock);
685 if (sigisempty(&set))
686 break;
687 }
688 } while (res == 0 && uio->uio_resid >= sizeof (signalfd_siginfo_t));
689
690 if (got_one)
691 res = 0;
692
693 return (res);
694 }
695
696 /*
697 * If ksigset_t's were a single word, we would do:
698 * return (((p->p_sig | t->t_sig) & set) & fillset);
699 */
700 static int
signalfd_sig_pending(proc_t * p,kthread_t * t,k_sigset_t set)701 signalfd_sig_pending(proc_t *p, kthread_t *t, k_sigset_t set)
702 {
703 return (((p->p_sig.__sigbits[0] | t->t_sig.__sigbits[0]) &
704 set.__sigbits[0]) |
705 ((p->p_sig.__sigbits[1] | t->t_sig.__sigbits[1]) &
706 set.__sigbits[1]) |
707 (((p->p_sig.__sigbits[2] | t->t_sig.__sigbits[2]) &
708 set.__sigbits[2]) & FILLSET2));
709 }
710
711 static int
signalfd_poll(dev_t dev,short events,int anyyet,short * reventsp,struct pollhead ** phpp)712 signalfd_poll(dev_t dev, short events, int anyyet, short *reventsp,
713 struct pollhead **phpp)
714 {
715 signalfd_state_t *state;
716 short revents = 0;
717 kthread_t *t = curthread;
718 proc_t *p = ttoproc(t);
719
720 state = ddi_get_soft_state(signalfd_softstate, getminor(dev));
721 if (state == NULL) {
722 return (ENXIO);
723 }
724
725 mutex_enter(&state->sfd_lock);
726 if (signalfd_sig_pending(p, t, state->sfd_mask) != 0) {
727 revents |= POLLRDNORM | POLLIN;
728 }
729
730 *reventsp = revents & events;
731 if ((*reventsp == 0 && !anyyet) || (events & POLLET) != 0) {
732 signalfd_poller_t *sp;
733
734 sp = signalfd_poller_associate(state, p);
735 *phpp = &sp->sp_pollhead;
736 }
737 mutex_exit(&state->sfd_lock);
738
739 return (0);
740 }
741
742 static void
signalfd_set_mask(signalfd_state_t * state,const sigset_t * umask)743 signalfd_set_mask(signalfd_state_t *state, const sigset_t *umask)
744 {
745 k_sigset_t kmask;
746
747 sigutok(umask, &kmask);
748
749 mutex_enter(&state->sfd_lock);
750 state->sfd_mask = kmask;
751 list_t *pollers = &state->sfd_pollers;
752 for (signalfd_poller_t *sp = list_head(pollers); sp != NULL;
753 sp = list_next(pollers, sp)) {
754 mutex_enter(&sp->sp_lock);
755 sp->sp_mask = kmask;
756 mutex_exit(&sp->sp_lock);
757 }
758 mutex_exit(&state->sfd_lock);
759 }
760
761 _NOTE(ARGSUSED(4))
762 static int
signalfd_ioctl(dev_t dev,int cmd,intptr_t arg,int md,cred_t * cr,int * rv)763 signalfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
764 {
765 signalfd_state_t *state;
766 sigset_t mask;
767
768 state = ddi_get_soft_state(signalfd_softstate, getminor(dev));
769 if (state == NULL) {
770 return (ENXIO);
771 }
772
773 switch (cmd) {
774 case SIGNALFDIOC_MASK:
775 if (ddi_copyin((caddr_t)arg, &mask, sizeof (mask), md) != 0) {
776 return (EFAULT);
777 }
778 signalfd_set_mask(state, &mask);
779 return (0);
780
781 default:
782 break;
783 }
784
785 return (ENOTTY);
786 }
787
788 _NOTE(ARGSUSED(1))
789 static int
signalfd_close(dev_t dev,int flag,int otyp,cred_t * cred_p)790 signalfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
791 {
792 signalfd_state_t *state;
793 const minor_t minor = getminor(dev);
794
795 state = ddi_get_soft_state(signalfd_softstate, minor);
796 if (state == NULL) {
797 return (ENXIO);
798 }
799
800 /*
801 * With this signalfd instance being closed, sfd_lock is a formality, as
802 * nothing else should be reaching for it to add pollers at this point.
803 */
804 mutex_enter(&state->sfd_lock);
805
806 /* Dissociate any pollers from their respective processes */
807 signalfd_pollers_dissociate(state);
808
809 /* ... and free all those (now-dissociated) pollers */
810 signalfd_pollers_free(state);
811 ASSERT(list_is_empty(&state->sfd_pollers));
812
813 mutex_destroy(&state->sfd_lock);
814 ddi_soft_state_free(signalfd_softstate, minor);
815 id_free(signalfd_minors, minor);
816
817 return (0);
818 }
819
820 static int
signalfd_attach(dev_info_t * devi,ddi_attach_cmd_t cmd)821 signalfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
822 {
823 if (cmd != DDI_ATTACH || signalfd_devi != NULL) {
824 return (DDI_FAILURE);
825 }
826
827 signalfd_minors = id_space_create("signalfd_minors", 1, L_MAXMIN32 + 1);
828 if (signalfd_minors == NULL) {
829 cmn_err(CE_WARN, "signalfd couldn't create id space");
830 return (DDI_FAILURE);
831 }
832
833 if (ddi_soft_state_init(&signalfd_softstate,
834 sizeof (signalfd_state_t), 0) != 0) {
835 cmn_err(CE_WARN, "signalfd failed to create soft state");
836 id_space_destroy(signalfd_minors);
837 return (DDI_FAILURE);
838 }
839
840 if (ddi_create_minor_node(devi, "signalfd", S_IFCHR,
841 SIGNALFDMNRN_SIGNALFD, DDI_PSEUDO, 0) == DDI_FAILURE) {
842 cmn_err(CE_NOTE, "signalfd couldn't create minor node");
843 ddi_soft_state_fini(&signalfd_softstate);
844 id_space_destroy(signalfd_minors);
845 return (DDI_FAILURE);
846 }
847
848
849 sigfd_exit_helper = signalfd_exit_helper;
850
851 signalfd_wakeq = taskq_create("signalfd_wake", 1, minclsyspri,
852 0, INT_MAX, TASKQ_PREPOPULATE);
853
854 ddi_report_dev(devi);
855 signalfd_devi = devi;
856
857 return (DDI_SUCCESS);
858 }
859
860 _NOTE(ARGSUSED(0))
861 static int
signalfd_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)862 signalfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
863 {
864 if (cmd != DDI_DETACH) {
865 return (DDI_FAILURE);
866 }
867
868 /*
869 * With all of the instances gone, it is safe to both destroy the waker
870 * taskq (which must be empty) and tear down the exit helper (which must
871 * be unreachable with no proc_t`p_sigfd associations).
872 */
873 taskq_destroy(signalfd_wakeq);
874 sigfd_exit_helper = NULL;
875
876 id_space_destroy(signalfd_minors);
877 ddi_soft_state_fini(&signalfd_softstate);
878 ddi_remove_minor_node(signalfd_devi, NULL);
879 signalfd_devi = NULL;
880
881 return (DDI_SUCCESS);
882 }
883
884 _NOTE(ARGSUSED(0))
885 static int
signalfd_info(dev_info_t * dip,ddi_info_cmd_t infocmd,void * arg,void ** result)886 signalfd_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
887 {
888 int error;
889
890 switch (infocmd) {
891 case DDI_INFO_DEVT2DEVINFO:
892 *result = (void *)signalfd_devi;
893 error = DDI_SUCCESS;
894 break;
895 case DDI_INFO_DEVT2INSTANCE:
896 *result = (void *)0;
897 error = DDI_SUCCESS;
898 break;
899 default:
900 error = DDI_FAILURE;
901 }
902 return (error);
903 }
904
905 static struct cb_ops signalfd_cb_ops = {
906 signalfd_open, /* open */
907 signalfd_close, /* close */
908 nulldev, /* strategy */
909 nulldev, /* print */
910 nodev, /* dump */
911 signalfd_read, /* read */
912 nodev, /* write */
913 signalfd_ioctl, /* ioctl */
914 nodev, /* devmap */
915 nodev, /* mmap */
916 nodev, /* segmap */
917 signalfd_poll, /* poll */
918 ddi_prop_op, /* cb_prop_op */
919 0, /* streamtab */
920 D_NEW | D_MP /* Driver compatibility flag */
921 };
922
923 static struct dev_ops signalfd_ops = {
924 DEVO_REV, /* devo_rev */
925 0, /* refcnt */
926 signalfd_info, /* get_dev_info */
927 nulldev, /* identify */
928 nulldev, /* probe */
929 signalfd_attach, /* attach */
930 signalfd_detach, /* detach */
931 nodev, /* reset */
932 &signalfd_cb_ops, /* driver operations */
933 NULL, /* bus operations */
934 nodev, /* dev power */
935 ddi_quiesce_not_needed, /* quiesce */
936 };
937
938 static struct modldrv modldrv = {
939 &mod_driverops, /* module type (this is a pseudo driver) */
940 "signalfd support", /* name of module */
941 &signalfd_ops, /* driver ops */
942 };
943
944 static struct modlinkage modlinkage = {
945 MODREV_1,
946 (void *)&modldrv,
947 NULL
948 };
949
950 int
_init(void)951 _init(void)
952 {
953 return (mod_install(&modlinkage));
954 }
955
956 int
_info(struct modinfo * modinfop)957 _info(struct modinfo *modinfop)
958 {
959 return (mod_info(&modlinkage, modinfop));
960 }
961
962 int
_fini(void)963 _fini(void)
964 {
965 return (mod_remove(&modlinkage));
966 }
967