xref: /titanic_50/usr/src/uts/common/io/signalfd.c (revision 3d729aecc03ea6ebb9bd5d56b8dccd24f57daa41)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2015 Joyent, Inc.
14  */
15 
16 /*
17  * Support for the signalfd facility, a Linux-borne facility for
18  * file descriptor-based synchronous signal consumption.
19  *
20  * As described on the signalfd(3C) man page, the general idea behind these
21  * file descriptors is that they can be used to synchronously consume signals
22  * via the read(2) syscall. That capability already exists with the
23  * sigwaitinfo(3C) function but the key advantage of signalfd is that, because
24  * it is file descriptor based, poll(2) can be used to determine when signals
25  * are available to be consumed.
26  *
27  * The general implementation uses signalfd_state to hold both the signal set
28  * and poll head for an open file descriptor. Because a process can be using
29  * different sigfds with different signal sets, each signalfd_state poll head
30  * can be thought of as an independent signal stream and the thread(s) waiting
31  * on that stream will get poll notification when any signal in the
32  * corresponding set is received.
33  *
34  * The sigfd_proc_state_t struct lives on the proc_t and maintains per-proc
35  * state for function callbacks and data when the proc needs to do work during
36  * signal delivery for pollwakeup.
37  *
38  * The read side of the implementation is straightforward and mimics the
39  * kernel behavior for sigtimedwait(). Signals continue to live on either
40  * the proc's p_sig, or thread's t_sig, member. Read consumes the signal so
41  * that it is no longer pending.
42  *
43  * The poll side is more complex since all of the sigfds on the process need
44  * to be examined every time a signal is delivered to the process in order to
45  * pollwake any thread waiting in poll for that signal.
46  *
47  * Because it is likely that a process will only be using one, or a few, sigfds,
48  * but many total file descriptors, we maintain a list of sigfds which need
49  * pollwakeup. The list lives on the proc's p_sigfd struct. In this way only
50  * zero, or a few, of the state structs will need to be examined every time a
51  * signal is delivered to the process, instead of having to examine all of the
52  * file descriptors to find the state structs. When a state struct with a
53  * matching signal set is found then pollwakeup is called.
54  *
55  * The sigfd_list is self-cleaning; as signalfd_pollwake_cb is called, the list
56  * will clear out on its own. There is an exit helper (signalfd_exit_helper)
57  * which cleans up any remaining per-proc state when the process exits.
58  *
59  * The main complexity with signalfd is the interaction of forking and polling.
60  * This interaction is complex because now two processes have a fd that
61  * references the same dev_t (and its associated signalfd_state), but signals
62  * go to only one of those processes. Also, we don't know when one of the
63  * processes closes its fd because our 'close' entry point is only called when
64  * the last fd is closed (which could be by either process).
65  *
66  * Because the state struct is referenced by both file descriptors, and the
67  * state struct represents a signal stream needing a pollwakeup, if both
68  * processes were polling then both processes would get a pollwakeup when a
69  * signal arrives for either process (that is, the pollhead is associated with
70  * our dev_t so when a signal arrives the pollwakeup wakes up all waiters).
71  *
72  * Fortunately this is not a common problem in practice, but the implementation
73  * attempts to mitigate unexpected behavior. The typical behavior is that the
74  * parent has been polling the signalfd (which is why it was open in the first
75  * place) and the parent might have a pending signalfd_state (with the
76  * pollhead) on its per-process sigfd_list. After the fork the child will
77  * simply close that fd (among others) as part of the typical fork/close/exec
78  * pattern. Because the child will never poll that fd, it will never get any
79  * state onto its own sigfd_list (the child starts with a null list). The
80  * intention is that the child sees no pollwakeup activity for signals unless
81  * it explicitly reinvokes poll on the sigfd.
82  *
83  * As background, there are two primary polling cases to consider when the
84  * parent process forks:
85  * 1) If any thread is blocked in poll(2) then both the parent and child will
86  *    return from the poll syscall with EINTR. This means that if either
87  *    process wants to re-poll on a sigfd then it needs to re-run poll and
88  *    would come back in to the signalfd_poll entry point. The parent would
89  *    already have the dev_t's state on its sigfd_list and the child would not
90  *    have anything there unless it called poll again on its fd.
91  * 2) If the process is using /dev/poll(7D) then the polling info is being
92  *    cached by the poll device and the process might not currently be blocked
93  *    on anything polling related. A subsequent DP_POLL ioctl will not invoke
94  *    our signalfd_poll entry point again. Because the parent still has its
95  *    sigfd_list setup, an incoming signal will hit our signalfd_pollwake_cb
96  *    entry point, which in turn calls pollwake, and /dev/poll will do the
97  *    right thing on DP_POLL. The child will not have a sigfd_list yet so the
98  *    signal will not cause a pollwakeup. The dp code does its own handling for
99  *    cleaning up its cache.
100  *
101  * This leaves only one odd corner case. If the parent and child both use
102  * the dup-ed sigfd to poll then when a signal is delivered to either process
103  * there is no way to determine which one should get the pollwakeup (since
104  * both processes will be queued on the same signal stream poll head). What
105  * happens in this case is that both processes will return from poll, but only
106  * one of them will actually have a signal to read. The other will return
107  * from read with EAGAIN, or block. This case is actually similar to the
108  * situation within a single process which got two different sigfd's with the
109  * same mask (or poll on two fd's that are dup-ed). Both would return from poll
110  * when a signal arrives but only one read would consume the signal and the
111  * other read would fail or block. Applications which poll on shared fd's
112  * cannot assume that a subsequent read will actually obtain data.
113  */
114 
115 #include <sys/ddi.h>
116 #include <sys/sunddi.h>
117 #include <sys/signalfd.h>
118 #include <sys/conf.h>
119 #include <sys/sysmacros.h>
120 #include <sys/filio.h>
121 #include <sys/stat.h>
122 #include <sys/file.h>
123 #include <sys/schedctl.h>
124 #include <sys/id_space.h>
125 #include <sys/sdt.h>
126 
127 typedef struct signalfd_state signalfd_state_t;
128 
129 struct signalfd_state {
130 	kmutex_t sfd_lock;			/* lock protecting state */
131 	pollhead_t sfd_pollhd;			/* poll head */
132 	k_sigset_t sfd_set;			/* signals for this fd */
133 	signalfd_state_t *sfd_next;		/* next state on global list */
134 };
135 
136 /*
137  * Internal global variables.
138  */
139 static kmutex_t		signalfd_lock;		/* lock protecting state */
140 static dev_info_t	*signalfd_devi;		/* device info */
141 static id_space_t	*signalfd_minor;	/* minor number arena */
142 static void		*signalfd_softstate;	/* softstate pointer */
143 static signalfd_state_t	*signalfd_state;	/* global list of state */
144 
145 /*
146  * If we don't already have an entry in the proc's list for this state, add one.
147  */
148 static void
signalfd_wake_list_add(signalfd_state_t * state)149 signalfd_wake_list_add(signalfd_state_t *state)
150 {
151 	proc_t *p = curproc;
152 	list_t *lst;
153 	sigfd_wake_list_t *wlp;
154 
155 	ASSERT(MUTEX_HELD(&p->p_lock));
156 	ASSERT(p->p_sigfd != NULL);
157 
158 	lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list;
159 	for (wlp = list_head(lst); wlp != NULL; wlp = list_next(lst, wlp)) {
160 		if (wlp->sigfd_wl_state == state)
161 			break;
162 	}
163 
164 	if (wlp == NULL) {
165 		wlp = kmem_zalloc(sizeof (sigfd_wake_list_t), KM_SLEEP);
166 		wlp->sigfd_wl_state = state;
167 		list_insert_head(lst, wlp);
168 	}
169 }
170 
171 static void
signalfd_wake_rm(list_t * lst,sigfd_wake_list_t * wlp)172 signalfd_wake_rm(list_t *lst, sigfd_wake_list_t *wlp)
173 {
174 	list_remove(lst, wlp);
175 	kmem_free(wlp, sizeof (sigfd_wake_list_t));
176 }
177 
178 static void
signalfd_wake_list_rm(proc_t * p,signalfd_state_t * state)179 signalfd_wake_list_rm(proc_t *p, signalfd_state_t *state)
180 {
181 	sigfd_wake_list_t *wlp;
182 	list_t *lst;
183 
184 	ASSERT(MUTEX_HELD(&p->p_lock));
185 
186 	if (p->p_sigfd == NULL)
187 		return;
188 
189 	lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list;
190 	for (wlp = list_head(lst); wlp != NULL; wlp = list_next(lst, wlp)) {
191 		if (wlp->sigfd_wl_state == state) {
192 			signalfd_wake_rm(lst, wlp);
193 			break;
194 		}
195 	}
196 
197 	if (list_is_empty(lst)) {
198 		((sigfd_proc_state_t *)p->p_sigfd)->sigfd_pollwake_cb = NULL;
199 		list_destroy(lst);
200 		kmem_free(p->p_sigfd, sizeof (sigfd_proc_state_t));
201 		p->p_sigfd = NULL;
202 	}
203 }
204 
205 static void
signalfd_wake_list_cleanup(proc_t * p)206 signalfd_wake_list_cleanup(proc_t *p)
207 {
208 	sigfd_wake_list_t *wlp;
209 	list_t *lst;
210 
211 	ASSERT(MUTEX_HELD(&p->p_lock));
212 
213 	((sigfd_proc_state_t *)p->p_sigfd)->sigfd_pollwake_cb = NULL;
214 
215 	lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list;
216 	while (!list_is_empty(lst)) {
217 		wlp = (sigfd_wake_list_t *)list_remove_head(lst);
218 		kmem_free(wlp, sizeof (sigfd_wake_list_t));
219 	}
220 }
221 
222 static void
signalfd_exit_helper(void)223 signalfd_exit_helper(void)
224 {
225 	proc_t *p = curproc;
226 	list_t *lst;
227 
228 	/* This being non-null is the only way we can get here */
229 	ASSERT(p->p_sigfd != NULL);
230 
231 	mutex_enter(&p->p_lock);
232 	lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list;
233 
234 	signalfd_wake_list_cleanup(p);
235 	list_destroy(lst);
236 	kmem_free(p->p_sigfd, sizeof (sigfd_proc_state_t));
237 	p->p_sigfd = NULL;
238 	mutex_exit(&p->p_lock);
239 }
240 
241 /*
242  * Called every time a signal is delivered to the process so that we can
243  * see if any signal stream needs a pollwakeup. We maintain a list of
244  * signal state elements so that we don't have to look at every file descriptor
245  * on the process. If necessary, a further optimization would be to maintain a
246  * signal set mask that is a union of all of the sets in the list so that
247  * we don't even traverse the list if the signal is not in one of the elements.
248  * However, since the list is likely to be very short, this is not currently
249  * being done. A more complex data structure might also be used, but it is
250  * unclear what that would be since each signal set needs to be checked for a
251  * match.
252  */
253 static void
signalfd_pollwake_cb(void * arg0,int sig)254 signalfd_pollwake_cb(void *arg0, int sig)
255 {
256 	proc_t *p = (proc_t *)arg0;
257 	list_t *lst;
258 	sigfd_wake_list_t *wlp;
259 
260 	ASSERT(MUTEX_HELD(&p->p_lock));
261 
262 	if (p->p_sigfd == NULL)
263 		return;
264 
265 	lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list;
266 	wlp = list_head(lst);
267 	while (wlp != NULL) {
268 		signalfd_state_t *state = wlp->sigfd_wl_state;
269 
270 		mutex_enter(&state->sfd_lock);
271 
272 		if (sigismember(&state->sfd_set, sig) &&
273 		    state->sfd_pollhd.ph_list != NULL) {
274 			sigfd_wake_list_t *tmp = wlp;
275 
276 			/* remove it from the list */
277 			wlp = list_next(lst, wlp);
278 			signalfd_wake_rm(lst, tmp);
279 
280 			mutex_exit(&state->sfd_lock);
281 			pollwakeup(&state->sfd_pollhd, POLLRDNORM | POLLIN);
282 		} else {
283 			mutex_exit(&state->sfd_lock);
284 			wlp = list_next(lst, wlp);
285 		}
286 	}
287 }
288 
289 _NOTE(ARGSUSED(1))
290 static int
signalfd_open(dev_t * devp,int flag,int otyp,cred_t * cred_p)291 signalfd_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
292 {
293 	signalfd_state_t *state;
294 	major_t major = getemajor(*devp);
295 	minor_t minor = getminor(*devp);
296 
297 	if (minor != SIGNALFDMNRN_SIGNALFD)
298 		return (ENXIO);
299 
300 	mutex_enter(&signalfd_lock);
301 
302 	minor = (minor_t)id_allocff(signalfd_minor);
303 
304 	if (ddi_soft_state_zalloc(signalfd_softstate, minor) != DDI_SUCCESS) {
305 		id_free(signalfd_minor, minor);
306 		mutex_exit(&signalfd_lock);
307 		return (ENODEV);
308 	}
309 
310 	state = ddi_get_soft_state(signalfd_softstate, minor);
311 	*devp = makedevice(major, minor);
312 
313 	state->sfd_next = signalfd_state;
314 	signalfd_state = state;
315 
316 	mutex_exit(&signalfd_lock);
317 
318 	return (0);
319 }
320 
321 /*
322  * Consume one signal from our set in a manner similar to sigtimedwait().
323  * The block parameter is used to control whether we wait for a signal or
324  * return immediately if no signal is pending. We use the thread's t_sigwait
325  * member in the same way that it is used by sigtimedwait.
326  *
327  * Return 0 if we successfully consumed a signal or an errno if not.
328  */
329 static int
consume_signal(k_sigset_t set,uio_t * uio,boolean_t block)330 consume_signal(k_sigset_t set, uio_t *uio, boolean_t block)
331 {
332 	k_sigset_t oldmask;
333 	kthread_t *t = curthread;
334 	klwp_t *lwp = ttolwp(t);
335 	proc_t *p = ttoproc(t);
336 	timespec_t now;
337 	timespec_t *rqtp = NULL;	/* null means blocking */
338 	int timecheck = 0;
339 	int ret = 0;
340 	k_siginfo_t info, *infop;
341 	signalfd_siginfo_t ssi, *ssp = &ssi;
342 
343 	if (block == B_FALSE) {
344 		timecheck = timechanged;
345 		gethrestime(&now);
346 		rqtp = &now;	/* non-blocking check for pending signals */
347 	}
348 
349 	t->t_sigwait = set;
350 
351 	mutex_enter(&p->p_lock);
352 	/*
353 	 * set the thread's signal mask to unmask those signals in the
354 	 * specified set.
355 	 */
356 	schedctl_finish_sigblock(t);
357 	oldmask = t->t_hold;
358 	sigdiffset(&t->t_hold, &t->t_sigwait);
359 
360 	/*
361 	 * Based on rqtp, wait indefinitely until we take a signal in our set
362 	 * or return immediately if there are no signals pending from our set.
363 	 */
364 	while ((ret = cv_waituntil_sig(&t->t_delay_cv, &p->p_lock, rqtp,
365 	    timecheck)) > 0)
366 		continue;
367 
368 	/* Restore thread's signal mask to its previous value. */
369 	t->t_hold = oldmask;
370 	t->t_sig_check = 1;	/* so post_syscall sees new t_hold mask */
371 
372 	if (ret == -1) {
373 		/* no signals pending */
374 		mutex_exit(&p->p_lock);
375 		sigemptyset(&t->t_sigwait);
376 		return (EAGAIN);	/* no signals pending */
377 	}
378 
379 	/* Don't bother with signal if it is not in request set. */
380 	if (lwp->lwp_cursig == 0 ||
381 	    !sigismember(&t->t_sigwait, lwp->lwp_cursig)) {
382 		mutex_exit(&p->p_lock);
383 		/*
384 		 * lwp_cursig is zero if pokelwps() awakened cv_wait_sig().
385 		 * This happens if some other thread in this process called
386 		 * forkall() or exit().
387 		 */
388 		sigemptyset(&t->t_sigwait);
389 		return (EINTR);
390 	}
391 
392 	if (lwp->lwp_curinfo) {
393 		infop = &lwp->lwp_curinfo->sq_info;
394 	} else {
395 		infop = &info;
396 		bzero(infop, sizeof (info));
397 		infop->si_signo = lwp->lwp_cursig;
398 		infop->si_code = SI_NOINFO;
399 	}
400 
401 	lwp->lwp_ru.nsignals++;
402 
403 	DTRACE_PROC2(signal__clear, int, ret, ksiginfo_t *, infop);
404 	lwp->lwp_cursig = 0;
405 	lwp->lwp_extsig = 0;
406 	mutex_exit(&p->p_lock);
407 
408 	/* Convert k_siginfo into external, datamodel independent, struct. */
409 	bzero(ssp, sizeof (*ssp));
410 	ssp->ssi_signo = infop->si_signo;
411 	ssp->ssi_errno = infop->si_errno;
412 	ssp->ssi_code = infop->si_code;
413 	ssp->ssi_pid = infop->si_pid;
414 	ssp->ssi_uid = infop->si_uid;
415 	ssp->ssi_fd = infop->si_fd;
416 	ssp->ssi_band = infop->si_band;
417 	ssp->ssi_trapno = infop->si_trapno;
418 	ssp->ssi_status = infop->si_status;
419 	ssp->ssi_utime = infop->si_utime;
420 	ssp->ssi_stime = infop->si_stime;
421 	ssp->ssi_addr = (uint64_t)(intptr_t)infop->si_addr;
422 
423 	ret = uiomove(ssp, sizeof (*ssp), UIO_READ, uio);
424 
425 	if (lwp->lwp_curinfo) {
426 		siginfofree(lwp->lwp_curinfo);
427 		lwp->lwp_curinfo = NULL;
428 	}
429 	sigemptyset(&t->t_sigwait);
430 	return (ret);
431 }
432 
433 /*
434  * This is similar to sigtimedwait. Based on the fd mode we may wait until a
435  * signal within our specified set is posted. We consume as many available
436  * signals within our set as we can.
437  */
438 _NOTE(ARGSUSED(2))
439 static int
signalfd_read(dev_t dev,uio_t * uio,cred_t * cr)440 signalfd_read(dev_t dev, uio_t *uio, cred_t *cr)
441 {
442 	signalfd_state_t *state;
443 	minor_t minor = getminor(dev);
444 	boolean_t block = B_TRUE;
445 	k_sigset_t set;
446 	boolean_t got_one = B_FALSE;
447 	int res;
448 
449 	if (uio->uio_resid < sizeof (signalfd_siginfo_t))
450 		return (EINVAL);
451 
452 	state = ddi_get_soft_state(signalfd_softstate, minor);
453 
454 	if (uio->uio_fmode & (FNDELAY|FNONBLOCK))
455 		block = B_FALSE;
456 
457 	mutex_enter(&state->sfd_lock);
458 	set = state->sfd_set;
459 	mutex_exit(&state->sfd_lock);
460 
461 	if (sigisempty(&set))
462 		return (set_errno(EINVAL));
463 
464 	do  {
465 		res = consume_signal(state->sfd_set, uio, block);
466 		if (res == 0)
467 			got_one = B_TRUE;
468 
469 		/*
470 		 * After consuming one signal we won't block trying to consume
471 		 * further signals.
472 		 */
473 		block = B_FALSE;
474 	} while (res == 0 && uio->uio_resid >= sizeof (signalfd_siginfo_t));
475 
476 	if (got_one)
477 		res = 0;
478 
479 	return (res);
480 }
481 
482 /*
483  * If ksigset_t's were a single word, we would do:
484  *      return (((p->p_sig | t->t_sig) & set) & fillset);
485  */
486 static int
signalfd_sig_pending(proc_t * p,kthread_t * t,k_sigset_t set)487 signalfd_sig_pending(proc_t *p, kthread_t *t, k_sigset_t set)
488 {
489 	return (((p->p_sig.__sigbits[0] | t->t_sig.__sigbits[0]) &
490 	    set.__sigbits[0]) |
491 	    ((p->p_sig.__sigbits[1] | t->t_sig.__sigbits[1]) &
492 	    set.__sigbits[1]) |
493 	    (((p->p_sig.__sigbits[2] | t->t_sig.__sigbits[2]) &
494 	    set.__sigbits[2]) & FILLSET2));
495 }
496 
497 _NOTE(ARGSUSED(4))
498 static int
signalfd_poll(dev_t dev,short events,int anyyet,short * reventsp,struct pollhead ** phpp)499 signalfd_poll(dev_t dev, short events, int anyyet, short *reventsp,
500     struct pollhead **phpp)
501 {
502 	signalfd_state_t *state;
503 	minor_t minor = getminor(dev);
504 	kthread_t *t = curthread;
505 	proc_t *p = ttoproc(t);
506 	short revents = 0;
507 
508 	state = ddi_get_soft_state(signalfd_softstate, minor);
509 
510 	mutex_enter(&state->sfd_lock);
511 
512 	if (signalfd_sig_pending(p, t, state->sfd_set) != 0)
513 		revents |= POLLRDNORM | POLLIN;
514 
515 	mutex_exit(&state->sfd_lock);
516 
517 	if (!(*reventsp = revents & events) && !anyyet) {
518 		*phpp = &state->sfd_pollhd;
519 
520 		/*
521 		 * Enable pollwakeup handling.
522 		 */
523 		if (p->p_sigfd == NULL) {
524 			sigfd_proc_state_t *pstate;
525 
526 			pstate = kmem_zalloc(sizeof (sigfd_proc_state_t),
527 			    KM_SLEEP);
528 			list_create(&pstate->sigfd_list,
529 			    sizeof (sigfd_wake_list_t),
530 			    offsetof(sigfd_wake_list_t, sigfd_wl_lst));
531 
532 			mutex_enter(&p->p_lock);
533 			/* check again now that we're locked */
534 			if (p->p_sigfd == NULL) {
535 				p->p_sigfd = pstate;
536 			} else {
537 				/* someone beat us to it */
538 				list_destroy(&pstate->sigfd_list);
539 				kmem_free(pstate, sizeof (sigfd_proc_state_t));
540 			}
541 			mutex_exit(&p->p_lock);
542 		}
543 
544 		mutex_enter(&p->p_lock);
545 		if (((sigfd_proc_state_t *)p->p_sigfd)->sigfd_pollwake_cb ==
546 		    NULL) {
547 			((sigfd_proc_state_t *)p->p_sigfd)->sigfd_pollwake_cb =
548 			    signalfd_pollwake_cb;
549 		}
550 		signalfd_wake_list_add(state);
551 		mutex_exit(&p->p_lock);
552 	}
553 
554 	return (0);
555 }
556 
557 _NOTE(ARGSUSED(4))
558 static int
signalfd_ioctl(dev_t dev,int cmd,intptr_t arg,int md,cred_t * cr,int * rv)559 signalfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
560 {
561 	signalfd_state_t *state;
562 	minor_t minor = getminor(dev);
563 	sigset_t mask;
564 
565 	state = ddi_get_soft_state(signalfd_softstate, minor);
566 
567 	switch (cmd) {
568 	case SIGNALFDIOC_MASK:
569 		if (ddi_copyin((caddr_t)arg, (caddr_t)&mask, sizeof (sigset_t),
570 		    md) != 0)
571 			return (set_errno(EFAULT));
572 
573 		mutex_enter(&state->sfd_lock);
574 		sigutok(&mask, &state->sfd_set);
575 		mutex_exit(&state->sfd_lock);
576 
577 		return (0);
578 
579 	default:
580 		break;
581 	}
582 
583 	return (ENOTTY);
584 }
585 
586 _NOTE(ARGSUSED(1))
587 static int
signalfd_close(dev_t dev,int flag,int otyp,cred_t * cred_p)588 signalfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
589 {
590 	signalfd_state_t *state, **sp;
591 	minor_t minor = getminor(dev);
592 	proc_t *p = curproc;
593 
594 	state = ddi_get_soft_state(signalfd_softstate, minor);
595 
596 	if (state->sfd_pollhd.ph_list != NULL) {
597 		pollwakeup(&state->sfd_pollhd, POLLERR);
598 		pollhead_clean(&state->sfd_pollhd);
599 	}
600 
601 	/* Make sure our state is removed from our proc's pollwake list. */
602 	mutex_enter(&p->p_lock);
603 	signalfd_wake_list_rm(p, state);
604 	mutex_exit(&p->p_lock);
605 
606 	mutex_enter(&signalfd_lock);
607 
608 	/* Remove our state from our global list. */
609 	for (sp = &signalfd_state; *sp != state; sp = &((*sp)->sfd_next))
610 		VERIFY(*sp != NULL);
611 
612 	*sp = (*sp)->sfd_next;
613 
614 	ddi_soft_state_free(signalfd_softstate, minor);
615 	id_free(signalfd_minor, minor);
616 
617 	mutex_exit(&signalfd_lock);
618 
619 	return (0);
620 }
621 
622 static int
signalfd_attach(dev_info_t * devi,ddi_attach_cmd_t cmd)623 signalfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
624 {
625 	if (cmd != DDI_ATTACH || signalfd_devi != NULL)
626 		return (DDI_FAILURE);
627 
628 	mutex_enter(&signalfd_lock);
629 
630 	signalfd_minor = id_space_create("signalfd_minor", 1, L_MAXMIN32 + 1);
631 	if (signalfd_minor == NULL) {
632 		cmn_err(CE_WARN, "signalfd couldn't create id space");
633 		mutex_exit(&signalfd_lock);
634 		return (DDI_FAILURE);
635 	}
636 
637 	if (ddi_soft_state_init(&signalfd_softstate,
638 	    sizeof (signalfd_state_t), 0) != 0) {
639 		cmn_err(CE_WARN, "signalfd failed to create soft state");
640 		id_space_destroy(signalfd_minor);
641 		mutex_exit(&signalfd_lock);
642 		return (DDI_FAILURE);
643 	}
644 
645 	if (ddi_create_minor_node(devi, "signalfd", S_IFCHR,
646 	    SIGNALFDMNRN_SIGNALFD, DDI_PSEUDO, NULL) == DDI_FAILURE) {
647 		cmn_err(CE_NOTE, "/dev/signalfd couldn't create minor node");
648 		ddi_soft_state_fini(&signalfd_softstate);
649 		id_space_destroy(signalfd_minor);
650 		mutex_exit(&signalfd_lock);
651 		return (DDI_FAILURE);
652 	}
653 
654 	ddi_report_dev(devi);
655 	signalfd_devi = devi;
656 
657 	sigfd_exit_helper = signalfd_exit_helper;
658 
659 	mutex_exit(&signalfd_lock);
660 
661 	return (DDI_SUCCESS);
662 }
663 
664 _NOTE(ARGSUSED(0))
665 static int
signalfd_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)666 signalfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
667 {
668 	switch (cmd) {
669 	case DDI_DETACH:
670 		break;
671 
672 	default:
673 		return (DDI_FAILURE);
674 	}
675 
676 	/* list should be empty */
677 	VERIFY(signalfd_state == NULL);
678 
679 	mutex_enter(&signalfd_lock);
680 	id_space_destroy(signalfd_minor);
681 
682 	ddi_remove_minor_node(signalfd_devi, NULL);
683 	signalfd_devi = NULL;
684 	sigfd_exit_helper = NULL;
685 
686 	ddi_soft_state_fini(&signalfd_softstate);
687 	mutex_exit(&signalfd_lock);
688 
689 	return (DDI_SUCCESS);
690 }
691 
692 _NOTE(ARGSUSED(0))
693 static int
signalfd_info(dev_info_t * dip,ddi_info_cmd_t infocmd,void * arg,void ** result)694 signalfd_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
695 {
696 	int error;
697 
698 	switch (infocmd) {
699 	case DDI_INFO_DEVT2DEVINFO:
700 		*result = (void *)signalfd_devi;
701 		error = DDI_SUCCESS;
702 		break;
703 	case DDI_INFO_DEVT2INSTANCE:
704 		*result = (void *)0;
705 		error = DDI_SUCCESS;
706 		break;
707 	default:
708 		error = DDI_FAILURE;
709 	}
710 	return (error);
711 }
712 
713 static struct cb_ops signalfd_cb_ops = {
714 	signalfd_open,		/* open */
715 	signalfd_close,		/* close */
716 	nulldev,		/* strategy */
717 	nulldev,		/* print */
718 	nodev,			/* dump */
719 	signalfd_read,		/* read */
720 	nodev,			/* write */
721 	signalfd_ioctl,		/* ioctl */
722 	nodev,			/* devmap */
723 	nodev,			/* mmap */
724 	nodev,			/* segmap */
725 	signalfd_poll,		/* poll */
726 	ddi_prop_op,		/* cb_prop_op */
727 	0,			/* streamtab  */
728 	D_NEW | D_MP		/* Driver compatibility flag */
729 };
730 
731 static struct dev_ops signalfd_ops = {
732 	DEVO_REV,		/* devo_rev */
733 	0,			/* refcnt */
734 	signalfd_info,		/* get_dev_info */
735 	nulldev,		/* identify */
736 	nulldev,		/* probe */
737 	signalfd_attach,	/* attach */
738 	signalfd_detach,	/* detach */
739 	nodev,			/* reset */
740 	&signalfd_cb_ops,	/* driver operations */
741 	NULL,			/* bus operations */
742 	nodev,			/* dev power */
743 	ddi_quiesce_not_needed,	/* quiesce */
744 };
745 
746 static struct modldrv modldrv = {
747 	&mod_driverops,		/* module type (this is a pseudo driver) */
748 	"signalfd support",	/* name of module */
749 	&signalfd_ops,		/* driver ops */
750 };
751 
752 static struct modlinkage modlinkage = {
753 	MODREV_1,
754 	(void *)&modldrv,
755 	NULL
756 };
757 
758 int
_init(void)759 _init(void)
760 {
761 	return (mod_install(&modlinkage));
762 }
763 
764 int
_info(struct modinfo * modinfop)765 _info(struct modinfo *modinfop)
766 {
767 	return (mod_info(&modlinkage, modinfop));
768 }
769 
770 int
_fini(void)771 _fini(void)
772 {
773 	return (mod_remove(&modlinkage));
774 }
775