xref: /titanic_51/usr/src/uts/common/io/timerfd.c (revision 6a72db4a7fa12c3e0d1c1cf91a07390739fa0fbf)
1*6a72db4aSBryan Cantrill /*
2*6a72db4aSBryan Cantrill  * This file and its contents are supplied under the terms of the
3*6a72db4aSBryan Cantrill  * Common Development and Distribution License ("CDDL"), version 1.0.
4*6a72db4aSBryan Cantrill  * You may only use this file in accordance with the terms of version
5*6a72db4aSBryan Cantrill  * 1.0 of the CDDL.
6*6a72db4aSBryan Cantrill  *
7*6a72db4aSBryan Cantrill  * A full copy of the text of the CDDL should have accompanied this
8*6a72db4aSBryan Cantrill  * source.  A copy of the CDDL is also available via the Internet at
9*6a72db4aSBryan Cantrill  * http://www.illumos.org/license/CDDL.
10*6a72db4aSBryan Cantrill  */
11*6a72db4aSBryan Cantrill 
12*6a72db4aSBryan Cantrill /*
13*6a72db4aSBryan Cantrill  * Copyright (c) 2015 Joyent, Inc.  All rights reserved.
14*6a72db4aSBryan Cantrill  */
15*6a72db4aSBryan Cantrill 
16*6a72db4aSBryan Cantrill /*
17*6a72db4aSBryan Cantrill  * Support for the timerfd facility, a Linux-borne facility that allows
18*6a72db4aSBryan Cantrill  * POSIX.1b timers to be created and manipulated via a file descriptor
19*6a72db4aSBryan Cantrill  * interface.
20*6a72db4aSBryan Cantrill  */
21*6a72db4aSBryan Cantrill 
22*6a72db4aSBryan Cantrill #include <sys/ddi.h>
23*6a72db4aSBryan Cantrill #include <sys/sunddi.h>
24*6a72db4aSBryan Cantrill #include <sys/timerfd.h>
25*6a72db4aSBryan Cantrill #include <sys/conf.h>
26*6a72db4aSBryan Cantrill #include <sys/vmem.h>
27*6a72db4aSBryan Cantrill #include <sys/sysmacros.h>
28*6a72db4aSBryan Cantrill #include <sys/filio.h>
29*6a72db4aSBryan Cantrill #include <sys/stat.h>
30*6a72db4aSBryan Cantrill #include <sys/file.h>
31*6a72db4aSBryan Cantrill #include <sys/timer.h>
32*6a72db4aSBryan Cantrill 
33*6a72db4aSBryan Cantrill struct timerfd_state;
34*6a72db4aSBryan Cantrill typedef struct timerfd_state timerfd_state_t;
35*6a72db4aSBryan Cantrill 
36*6a72db4aSBryan Cantrill struct timerfd_state {
37*6a72db4aSBryan Cantrill 	kmutex_t tfd_lock;			/* lock protecting state */
38*6a72db4aSBryan Cantrill 	kcondvar_t tfd_cv;			/* condvar */
39*6a72db4aSBryan Cantrill 	pollhead_t tfd_pollhd;			/* poll head */
40*6a72db4aSBryan Cantrill 	uint64_t tfd_fired;			/* # of times fired */
41*6a72db4aSBryan Cantrill 	itimer_t tfd_itimer;			/* underlying itimer */
42*6a72db4aSBryan Cantrill 	timerfd_state_t *tfd_next;		/* next state on global list */
43*6a72db4aSBryan Cantrill };
44*6a72db4aSBryan Cantrill 
45*6a72db4aSBryan Cantrill /*
46*6a72db4aSBryan Cantrill  * Internal global variables.
47*6a72db4aSBryan Cantrill  */
48*6a72db4aSBryan Cantrill static kmutex_t		timerfd_lock;		/* lock protecting state */
49*6a72db4aSBryan Cantrill static dev_info_t	*timerfd_devi;		/* device info */
50*6a72db4aSBryan Cantrill static vmem_t		*timerfd_minor;		/* minor number arena */
51*6a72db4aSBryan Cantrill static void		*timerfd_softstate;	/* softstate pointer */
52*6a72db4aSBryan Cantrill static timerfd_state_t	*timerfd_state;		/* global list of state */
53*6a72db4aSBryan Cantrill 
54*6a72db4aSBryan Cantrill static itimer_t *
55*6a72db4aSBryan Cantrill timerfd_itimer_lock(timerfd_state_t *state)
56*6a72db4aSBryan Cantrill {
57*6a72db4aSBryan Cantrill 	itimer_t *it = &state->tfd_itimer;
58*6a72db4aSBryan Cantrill 
59*6a72db4aSBryan Cantrill 	mutex_enter(&state->tfd_lock);
60*6a72db4aSBryan Cantrill 
61*6a72db4aSBryan Cantrill 	while (it->it_lock & ITLK_LOCKED) {
62*6a72db4aSBryan Cantrill 		it->it_blockers++;
63*6a72db4aSBryan Cantrill 		cv_wait(&it->it_cv, &state->tfd_lock);
64*6a72db4aSBryan Cantrill 		it->it_blockers--;
65*6a72db4aSBryan Cantrill 	}
66*6a72db4aSBryan Cantrill 
67*6a72db4aSBryan Cantrill 	it->it_lock |= ITLK_LOCKED;
68*6a72db4aSBryan Cantrill 
69*6a72db4aSBryan Cantrill 	mutex_exit(&state->tfd_lock);
70*6a72db4aSBryan Cantrill 
71*6a72db4aSBryan Cantrill 	return (it);
72*6a72db4aSBryan Cantrill }
73*6a72db4aSBryan Cantrill 
74*6a72db4aSBryan Cantrill static void
75*6a72db4aSBryan Cantrill timerfd_itimer_unlock(timerfd_state_t *state, itimer_t *it)
76*6a72db4aSBryan Cantrill {
77*6a72db4aSBryan Cantrill 	VERIFY(it == &state->tfd_itimer);
78*6a72db4aSBryan Cantrill 	VERIFY(it->it_lock & ITLK_LOCKED);
79*6a72db4aSBryan Cantrill 
80*6a72db4aSBryan Cantrill 	mutex_enter(&state->tfd_lock);
81*6a72db4aSBryan Cantrill 
82*6a72db4aSBryan Cantrill 	it->it_lock &= ~ITLK_LOCKED;
83*6a72db4aSBryan Cantrill 
84*6a72db4aSBryan Cantrill 	if (it->it_blockers)
85*6a72db4aSBryan Cantrill 		cv_signal(&it->it_cv);
86*6a72db4aSBryan Cantrill 
87*6a72db4aSBryan Cantrill 	mutex_exit(&state->tfd_lock);
88*6a72db4aSBryan Cantrill }
89*6a72db4aSBryan Cantrill 
90*6a72db4aSBryan Cantrill static void
91*6a72db4aSBryan Cantrill timerfd_fire(itimer_t *it)
92*6a72db4aSBryan Cantrill {
93*6a72db4aSBryan Cantrill 	timerfd_state_t *state = it->it_frontend;
94*6a72db4aSBryan Cantrill 	uint64_t oval;
95*6a72db4aSBryan Cantrill 
96*6a72db4aSBryan Cantrill 	mutex_enter(&state->tfd_lock);
97*6a72db4aSBryan Cantrill 	oval = state->tfd_fired++;
98*6a72db4aSBryan Cantrill 	mutex_exit(&state->tfd_lock);
99*6a72db4aSBryan Cantrill 
100*6a72db4aSBryan Cantrill 	if (oval == 0) {
101*6a72db4aSBryan Cantrill 		cv_broadcast(&state->tfd_cv);
102*6a72db4aSBryan Cantrill 		pollwakeup(&state->tfd_pollhd, POLLRDNORM | POLLIN);
103*6a72db4aSBryan Cantrill 	}
104*6a72db4aSBryan Cantrill }
105*6a72db4aSBryan Cantrill 
106*6a72db4aSBryan Cantrill /*ARGSUSED*/
107*6a72db4aSBryan Cantrill static int
108*6a72db4aSBryan Cantrill timerfd_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
109*6a72db4aSBryan Cantrill {
110*6a72db4aSBryan Cantrill 	timerfd_state_t *state;
111*6a72db4aSBryan Cantrill 	major_t major = getemajor(*devp);
112*6a72db4aSBryan Cantrill 	minor_t minor = getminor(*devp);
113*6a72db4aSBryan Cantrill 
114*6a72db4aSBryan Cantrill 	if (minor != TIMERFDMNRN_TIMERFD)
115*6a72db4aSBryan Cantrill 		return (ENXIO);
116*6a72db4aSBryan Cantrill 
117*6a72db4aSBryan Cantrill 	mutex_enter(&timerfd_lock);
118*6a72db4aSBryan Cantrill 
119*6a72db4aSBryan Cantrill 	minor = (minor_t)(uintptr_t)vmem_alloc(timerfd_minor, 1,
120*6a72db4aSBryan Cantrill 	    VM_BESTFIT | VM_SLEEP);
121*6a72db4aSBryan Cantrill 
122*6a72db4aSBryan Cantrill 	if (ddi_soft_state_zalloc(timerfd_softstate, minor) != DDI_SUCCESS) {
123*6a72db4aSBryan Cantrill 		vmem_free(timerfd_minor, (void *)(uintptr_t)minor, 1);
124*6a72db4aSBryan Cantrill 		mutex_exit(&timerfd_lock);
125*6a72db4aSBryan Cantrill 		return (NULL);
126*6a72db4aSBryan Cantrill 	}
127*6a72db4aSBryan Cantrill 
128*6a72db4aSBryan Cantrill 	state = ddi_get_soft_state(timerfd_softstate, minor);
129*6a72db4aSBryan Cantrill 	*devp = makedevice(major, minor);
130*6a72db4aSBryan Cantrill 
131*6a72db4aSBryan Cantrill 	state->tfd_next = timerfd_state;
132*6a72db4aSBryan Cantrill 	timerfd_state = state;
133*6a72db4aSBryan Cantrill 
134*6a72db4aSBryan Cantrill 	mutex_exit(&timerfd_lock);
135*6a72db4aSBryan Cantrill 
136*6a72db4aSBryan Cantrill 	return (0);
137*6a72db4aSBryan Cantrill }
138*6a72db4aSBryan Cantrill 
139*6a72db4aSBryan Cantrill /*ARGSUSED*/
140*6a72db4aSBryan Cantrill static int
141*6a72db4aSBryan Cantrill timerfd_read(dev_t dev, uio_t *uio, cred_t *cr)
142*6a72db4aSBryan Cantrill {
143*6a72db4aSBryan Cantrill 	timerfd_state_t *state;
144*6a72db4aSBryan Cantrill 	minor_t minor = getminor(dev);
145*6a72db4aSBryan Cantrill 	uint64_t val;
146*6a72db4aSBryan Cantrill 	int err;
147*6a72db4aSBryan Cantrill 
148*6a72db4aSBryan Cantrill 	if (uio->uio_resid < sizeof (val))
149*6a72db4aSBryan Cantrill 		return (EINVAL);
150*6a72db4aSBryan Cantrill 
151*6a72db4aSBryan Cantrill 	state = ddi_get_soft_state(timerfd_softstate, minor);
152*6a72db4aSBryan Cantrill 
153*6a72db4aSBryan Cantrill 	mutex_enter(&state->tfd_lock);
154*6a72db4aSBryan Cantrill 
155*6a72db4aSBryan Cantrill 	while (state->tfd_fired == 0) {
156*6a72db4aSBryan Cantrill 		if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) {
157*6a72db4aSBryan Cantrill 			mutex_exit(&state->tfd_lock);
158*6a72db4aSBryan Cantrill 			return (EAGAIN);
159*6a72db4aSBryan Cantrill 		}
160*6a72db4aSBryan Cantrill 
161*6a72db4aSBryan Cantrill 		if (!cv_wait_sig_swap(&state->tfd_cv, &state->tfd_lock)) {
162*6a72db4aSBryan Cantrill 			mutex_exit(&state->tfd_lock);
163*6a72db4aSBryan Cantrill 			return (EINTR);
164*6a72db4aSBryan Cantrill 		}
165*6a72db4aSBryan Cantrill 	}
166*6a72db4aSBryan Cantrill 
167*6a72db4aSBryan Cantrill 	/*
168*6a72db4aSBryan Cantrill 	 * Our tfd_fired is non-zero; slurp its value and then clear it.
169*6a72db4aSBryan Cantrill 	 */
170*6a72db4aSBryan Cantrill 	val = state->tfd_fired;
171*6a72db4aSBryan Cantrill 	state->tfd_fired = 0;
172*6a72db4aSBryan Cantrill 	mutex_exit(&state->tfd_lock);
173*6a72db4aSBryan Cantrill 
174*6a72db4aSBryan Cantrill 	err = uiomove(&val, sizeof (val), UIO_READ, uio);
175*6a72db4aSBryan Cantrill 
176*6a72db4aSBryan Cantrill 	return (err);
177*6a72db4aSBryan Cantrill }
178*6a72db4aSBryan Cantrill 
179*6a72db4aSBryan Cantrill /*ARGSUSED*/
180*6a72db4aSBryan Cantrill static int
181*6a72db4aSBryan Cantrill timerfd_poll(dev_t dev, short events, int anyyet, short *reventsp,
182*6a72db4aSBryan Cantrill     struct pollhead **phpp)
183*6a72db4aSBryan Cantrill {
184*6a72db4aSBryan Cantrill 	timerfd_state_t *state;
185*6a72db4aSBryan Cantrill 	minor_t minor = getminor(dev);
186*6a72db4aSBryan Cantrill 	short revents = 0;
187*6a72db4aSBryan Cantrill 
188*6a72db4aSBryan Cantrill 	state = ddi_get_soft_state(timerfd_softstate, minor);
189*6a72db4aSBryan Cantrill 
190*6a72db4aSBryan Cantrill 	mutex_enter(&state->tfd_lock);
191*6a72db4aSBryan Cantrill 
192*6a72db4aSBryan Cantrill 	if (state->tfd_fired > 0)
193*6a72db4aSBryan Cantrill 		revents |= POLLRDNORM | POLLIN;
194*6a72db4aSBryan Cantrill 
195*6a72db4aSBryan Cantrill 	if (!(*reventsp = revents & events) && !anyyet)
196*6a72db4aSBryan Cantrill 		*phpp = &state->tfd_pollhd;
197*6a72db4aSBryan Cantrill 
198*6a72db4aSBryan Cantrill 	mutex_exit(&state->tfd_lock);
199*6a72db4aSBryan Cantrill 
200*6a72db4aSBryan Cantrill 	return (0);
201*6a72db4aSBryan Cantrill }
202*6a72db4aSBryan Cantrill 
203*6a72db4aSBryan Cantrill static int
204*6a72db4aSBryan Cantrill timerfd_copyin(uintptr_t addr, itimerspec_t *dest)
205*6a72db4aSBryan Cantrill {
206*6a72db4aSBryan Cantrill 	if (get_udatamodel() == DATAMODEL_NATIVE) {
207*6a72db4aSBryan Cantrill 		if (copyin((void *)addr, dest, sizeof (itimerspec_t)) != 0)
208*6a72db4aSBryan Cantrill 			return (EFAULT);
209*6a72db4aSBryan Cantrill 	} else {
210*6a72db4aSBryan Cantrill 		itimerspec32_t dest32;
211*6a72db4aSBryan Cantrill 
212*6a72db4aSBryan Cantrill 		if (copyin((void *)addr, &dest32, sizeof (itimerspec32_t)) != 0)
213*6a72db4aSBryan Cantrill 			return (EFAULT);
214*6a72db4aSBryan Cantrill 
215*6a72db4aSBryan Cantrill 		ITIMERSPEC32_TO_ITIMERSPEC(dest, &dest32);
216*6a72db4aSBryan Cantrill 	}
217*6a72db4aSBryan Cantrill 
218*6a72db4aSBryan Cantrill 	if (itimerspecfix(&dest->it_value) ||
219*6a72db4aSBryan Cantrill 	    (itimerspecfix(&dest->it_interval) &&
220*6a72db4aSBryan Cantrill 	    timerspecisset(&dest->it_value))) {
221*6a72db4aSBryan Cantrill 		return (EINVAL);
222*6a72db4aSBryan Cantrill 	}
223*6a72db4aSBryan Cantrill 
224*6a72db4aSBryan Cantrill 	return (0);
225*6a72db4aSBryan Cantrill }
226*6a72db4aSBryan Cantrill 
227*6a72db4aSBryan Cantrill static int
228*6a72db4aSBryan Cantrill timerfd_copyout(itimerspec_t *src, uintptr_t addr)
229*6a72db4aSBryan Cantrill {
230*6a72db4aSBryan Cantrill 	if (get_udatamodel() == DATAMODEL_NATIVE) {
231*6a72db4aSBryan Cantrill 		if (copyout(src, (void *)addr, sizeof (itimerspec_t)) != 0)
232*6a72db4aSBryan Cantrill 			return (EFAULT);
233*6a72db4aSBryan Cantrill 	} else {
234*6a72db4aSBryan Cantrill 		itimerspec32_t src32;
235*6a72db4aSBryan Cantrill 
236*6a72db4aSBryan Cantrill 		if (ITIMERSPEC_OVERFLOW(src))
237*6a72db4aSBryan Cantrill 			return (EOVERFLOW);
238*6a72db4aSBryan Cantrill 
239*6a72db4aSBryan Cantrill 		ITIMERSPEC_TO_ITIMERSPEC32(&src32, src);
240*6a72db4aSBryan Cantrill 
241*6a72db4aSBryan Cantrill 		if (copyout(&src32, (void *)addr, sizeof (itimerspec32_t)) != 0)
242*6a72db4aSBryan Cantrill 			return (EFAULT);
243*6a72db4aSBryan Cantrill 	}
244*6a72db4aSBryan Cantrill 
245*6a72db4aSBryan Cantrill 	return (0);
246*6a72db4aSBryan Cantrill }
247*6a72db4aSBryan Cantrill 
248*6a72db4aSBryan Cantrill /*ARGSUSED*/
249*6a72db4aSBryan Cantrill static int
250*6a72db4aSBryan Cantrill timerfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
251*6a72db4aSBryan Cantrill {
252*6a72db4aSBryan Cantrill 	itimerspec_t when, oval;
253*6a72db4aSBryan Cantrill 	timerfd_state_t *state;
254*6a72db4aSBryan Cantrill 	minor_t minor = getminor(dev);
255*6a72db4aSBryan Cantrill 	int err;
256*6a72db4aSBryan Cantrill 	itimer_t *it;
257*6a72db4aSBryan Cantrill 
258*6a72db4aSBryan Cantrill 	state = ddi_get_soft_state(timerfd_softstate, minor);
259*6a72db4aSBryan Cantrill 
260*6a72db4aSBryan Cantrill 	switch (cmd) {
261*6a72db4aSBryan Cantrill 	case TIMERFDIOC_CREATE: {
262*6a72db4aSBryan Cantrill 		if (arg == TIMERFD_MONOTONIC)
263*6a72db4aSBryan Cantrill 			arg = CLOCK_MONOTONIC;
264*6a72db4aSBryan Cantrill 
265*6a72db4aSBryan Cantrill 		it = timerfd_itimer_lock(state);
266*6a72db4aSBryan Cantrill 
267*6a72db4aSBryan Cantrill 		if (it->it_backend != NULL) {
268*6a72db4aSBryan Cantrill 			timerfd_itimer_unlock(state, it);
269*6a72db4aSBryan Cantrill 			return (EEXIST);
270*6a72db4aSBryan Cantrill 		}
271*6a72db4aSBryan Cantrill 
272*6a72db4aSBryan Cantrill 		if ((it->it_backend = clock_get_backend(arg)) == NULL) {
273*6a72db4aSBryan Cantrill 			timerfd_itimer_unlock(state, it);
274*6a72db4aSBryan Cantrill 			return (EINVAL);
275*6a72db4aSBryan Cantrill 		}
276*6a72db4aSBryan Cantrill 
277*6a72db4aSBryan Cantrill 		/*
278*6a72db4aSBryan Cantrill 		 * We need to provide a proc structure only for purposes
279*6a72db4aSBryan Cantrill 		 * of locking CLOCK_REALTIME-based timers -- it is safe to
280*6a72db4aSBryan Cantrill 		 * provide p0 here.
281*6a72db4aSBryan Cantrill 		 */
282*6a72db4aSBryan Cantrill 		it->it_proc = &p0;
283*6a72db4aSBryan Cantrill 
284*6a72db4aSBryan Cantrill 		err = it->it_backend->clk_timer_create(it, timerfd_fire);
285*6a72db4aSBryan Cantrill 
286*6a72db4aSBryan Cantrill 		if (err != 0) {
287*6a72db4aSBryan Cantrill 			it->it_backend = NULL;
288*6a72db4aSBryan Cantrill 			timerfd_itimer_unlock(state, it);
289*6a72db4aSBryan Cantrill 			return (err);
290*6a72db4aSBryan Cantrill 		}
291*6a72db4aSBryan Cantrill 
292*6a72db4aSBryan Cantrill 		it->it_frontend = state;
293*6a72db4aSBryan Cantrill 		timerfd_itimer_unlock(state, it);
294*6a72db4aSBryan Cantrill 
295*6a72db4aSBryan Cantrill 		return (0);
296*6a72db4aSBryan Cantrill 	}
297*6a72db4aSBryan Cantrill 
298*6a72db4aSBryan Cantrill 	case TIMERFDIOC_GETTIME: {
299*6a72db4aSBryan Cantrill 		it = timerfd_itimer_lock(state);
300*6a72db4aSBryan Cantrill 
301*6a72db4aSBryan Cantrill 		if (it->it_backend == NULL) {
302*6a72db4aSBryan Cantrill 			timerfd_itimer_unlock(state, it);
303*6a72db4aSBryan Cantrill 			return (ENODEV);
304*6a72db4aSBryan Cantrill 		}
305*6a72db4aSBryan Cantrill 
306*6a72db4aSBryan Cantrill 		err = it->it_backend->clk_timer_gettime(it, &when);
307*6a72db4aSBryan Cantrill 		timerfd_itimer_unlock(state, it);
308*6a72db4aSBryan Cantrill 
309*6a72db4aSBryan Cantrill 		if (err != 0)
310*6a72db4aSBryan Cantrill 			return (err);
311*6a72db4aSBryan Cantrill 
312*6a72db4aSBryan Cantrill 		if ((err = timerfd_copyout(&when, arg)) != 0)
313*6a72db4aSBryan Cantrill 			return (err);
314*6a72db4aSBryan Cantrill 
315*6a72db4aSBryan Cantrill 		return (0);
316*6a72db4aSBryan Cantrill 	}
317*6a72db4aSBryan Cantrill 
318*6a72db4aSBryan Cantrill 	case TIMERFDIOC_SETTIME: {
319*6a72db4aSBryan Cantrill 		timerfd_settime_t st;
320*6a72db4aSBryan Cantrill 
321*6a72db4aSBryan Cantrill 		if (copyin((void *)arg, &st, sizeof (st)) != 0)
322*6a72db4aSBryan Cantrill 			return (EFAULT);
323*6a72db4aSBryan Cantrill 
324*6a72db4aSBryan Cantrill 		if ((err = timerfd_copyin(st.tfd_settime_value, &when)) != 0)
325*6a72db4aSBryan Cantrill 			return (err);
326*6a72db4aSBryan Cantrill 
327*6a72db4aSBryan Cantrill 		it = timerfd_itimer_lock(state);
328*6a72db4aSBryan Cantrill 
329*6a72db4aSBryan Cantrill 		if (it->it_backend == NULL) {
330*6a72db4aSBryan Cantrill 			timerfd_itimer_unlock(state, it);
331*6a72db4aSBryan Cantrill 			return (ENODEV);
332*6a72db4aSBryan Cantrill 		}
333*6a72db4aSBryan Cantrill 
334*6a72db4aSBryan Cantrill 		if (st.tfd_settime_ovalue != NULL) {
335*6a72db4aSBryan Cantrill 			err = it->it_backend->clk_timer_gettime(it, &oval);
336*6a72db4aSBryan Cantrill 
337*6a72db4aSBryan Cantrill 			if (err != 0) {
338*6a72db4aSBryan Cantrill 				timerfd_itimer_unlock(state, it);
339*6a72db4aSBryan Cantrill 				return (err);
340*6a72db4aSBryan Cantrill 			}
341*6a72db4aSBryan Cantrill 		}
342*6a72db4aSBryan Cantrill 
343*6a72db4aSBryan Cantrill 		/*
344*6a72db4aSBryan Cantrill 		 * Before we set the time, we're going to clear tfd_fired.
345*6a72db4aSBryan Cantrill 		 * This can potentially race with the (old) timer firing, but
346*6a72db4aSBryan Cantrill 		 * the window is deceptively difficult to close:  if we were
347*6a72db4aSBryan Cantrill 		 * to simply clear tfd_fired after the call to the backend
348*6a72db4aSBryan Cantrill 		 * returned, we would run the risk of plowing a firing of the
349*6a72db4aSBryan Cantrill 		 * new timer.  Ultimately, the race can only be resolved by
350*6a72db4aSBryan Cantrill 		 * the backend, which would likely need to be extended with a
351*6a72db4aSBryan Cantrill 		 * function to call back into when the timer is between states
352*6a72db4aSBryan Cantrill 		 * (that is, after the timer can no longer fire with the old
353*6a72db4aSBryan Cantrill 		 * timer value, but before it can fire with the new one).
354*6a72db4aSBryan Cantrill 		 * This is straightforward enough for backends that set a
355*6a72db4aSBryan Cantrill 		 * timer's value by deleting the old one and adding the new
356*6a72db4aSBryan Cantrill 		 * one, but for those that modify the timer value in place
357*6a72db4aSBryan Cantrill 		 * (e.g., cyclics), the required serialization is necessarily
358*6a72db4aSBryan Cantrill 		 * delicate:  the function would have to be callable from
359*6a72db4aSBryan Cantrill 		 * arbitrary interrupt context.  While implementing all of
360*6a72db4aSBryan Cantrill 		 * this is possible, it does not (for the moment) seem worth
361*6a72db4aSBryan Cantrill 		 * it: if the timer is firing at essentially the same moment
362*6a72db4aSBryan Cantrill 		 * that it's being reprogrammed, there is a higher-level race
363*6a72db4aSBryan Cantrill 		 * with respect to timerfd usage that the progam itself will
364*6a72db4aSBryan Cantrill 		 * have to properly resolve -- and it seems reasonable to
365*6a72db4aSBryan Cantrill 		 * simply allow the program to resolve it in this case.
366*6a72db4aSBryan Cantrill 		 */
367*6a72db4aSBryan Cantrill 		mutex_enter(&state->tfd_lock);
368*6a72db4aSBryan Cantrill 		state->tfd_fired = 0;
369*6a72db4aSBryan Cantrill 		mutex_exit(&state->tfd_lock);
370*6a72db4aSBryan Cantrill 
371*6a72db4aSBryan Cantrill 		err = it->it_backend->clk_timer_settime(it,
372*6a72db4aSBryan Cantrill 		    st.tfd_settime_flags & TFD_TIMER_ABSTIME ?
373*6a72db4aSBryan Cantrill 		    TIMER_ABSTIME : TIMER_RELTIME, &when);
374*6a72db4aSBryan Cantrill 		timerfd_itimer_unlock(state, it);
375*6a72db4aSBryan Cantrill 
376*6a72db4aSBryan Cantrill 		if (err != 0 || st.tfd_settime_ovalue == NULL)
377*6a72db4aSBryan Cantrill 			return (err);
378*6a72db4aSBryan Cantrill 
379*6a72db4aSBryan Cantrill 		if ((err = timerfd_copyout(&oval, st.tfd_settime_ovalue)) != 0)
380*6a72db4aSBryan Cantrill 			return (err);
381*6a72db4aSBryan Cantrill 
382*6a72db4aSBryan Cantrill 		return (0);
383*6a72db4aSBryan Cantrill 	}
384*6a72db4aSBryan Cantrill 
385*6a72db4aSBryan Cantrill 	default:
386*6a72db4aSBryan Cantrill 		break;
387*6a72db4aSBryan Cantrill 	}
388*6a72db4aSBryan Cantrill 
389*6a72db4aSBryan Cantrill 	return (ENOTTY);
390*6a72db4aSBryan Cantrill }
391*6a72db4aSBryan Cantrill 
392*6a72db4aSBryan Cantrill /*ARGSUSED*/
393*6a72db4aSBryan Cantrill static int
394*6a72db4aSBryan Cantrill timerfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
395*6a72db4aSBryan Cantrill {
396*6a72db4aSBryan Cantrill 	timerfd_state_t *state, **sp;
397*6a72db4aSBryan Cantrill 	itimer_t *it;
398*6a72db4aSBryan Cantrill 	minor_t minor = getminor(dev);
399*6a72db4aSBryan Cantrill 
400*6a72db4aSBryan Cantrill 	state = ddi_get_soft_state(timerfd_softstate, minor);
401*6a72db4aSBryan Cantrill 
402*6a72db4aSBryan Cantrill 	if (state->tfd_pollhd.ph_list != NULL) {
403*6a72db4aSBryan Cantrill 		pollwakeup(&state->tfd_pollhd, POLLERR);
404*6a72db4aSBryan Cantrill 		pollhead_clean(&state->tfd_pollhd);
405*6a72db4aSBryan Cantrill 	}
406*6a72db4aSBryan Cantrill 
407*6a72db4aSBryan Cantrill 	/*
408*6a72db4aSBryan Cantrill 	 * No one can get to this timer; we don't need to lock it -- we can
409*6a72db4aSBryan Cantrill 	 * just call on the backend to delete it.
410*6a72db4aSBryan Cantrill 	 */
411*6a72db4aSBryan Cantrill 	it = &state->tfd_itimer;
412*6a72db4aSBryan Cantrill 
413*6a72db4aSBryan Cantrill 	if (it->it_backend != NULL)
414*6a72db4aSBryan Cantrill 		it->it_backend->clk_timer_delete(it);
415*6a72db4aSBryan Cantrill 
416*6a72db4aSBryan Cantrill 	mutex_enter(&timerfd_lock);
417*6a72db4aSBryan Cantrill 
418*6a72db4aSBryan Cantrill 	/*
419*6a72db4aSBryan Cantrill 	 * Remove our state from our global list.
420*6a72db4aSBryan Cantrill 	 */
421*6a72db4aSBryan Cantrill 	for (sp = &timerfd_state; *sp != state; sp = &((*sp)->tfd_next))
422*6a72db4aSBryan Cantrill 		VERIFY(*sp != NULL);
423*6a72db4aSBryan Cantrill 
424*6a72db4aSBryan Cantrill 	*sp = (*sp)->tfd_next;
425*6a72db4aSBryan Cantrill 
426*6a72db4aSBryan Cantrill 	ddi_soft_state_free(timerfd_softstate, minor);
427*6a72db4aSBryan Cantrill 	vmem_free(timerfd_minor, (void *)(uintptr_t)minor, 1);
428*6a72db4aSBryan Cantrill 
429*6a72db4aSBryan Cantrill 	mutex_exit(&timerfd_lock);
430*6a72db4aSBryan Cantrill 
431*6a72db4aSBryan Cantrill 	return (0);
432*6a72db4aSBryan Cantrill }
433*6a72db4aSBryan Cantrill 
434*6a72db4aSBryan Cantrill static int
435*6a72db4aSBryan Cantrill timerfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
436*6a72db4aSBryan Cantrill {
437*6a72db4aSBryan Cantrill 	switch (cmd) {
438*6a72db4aSBryan Cantrill 	case DDI_ATTACH:
439*6a72db4aSBryan Cantrill 		break;
440*6a72db4aSBryan Cantrill 
441*6a72db4aSBryan Cantrill 	case DDI_RESUME:
442*6a72db4aSBryan Cantrill 		return (DDI_SUCCESS);
443*6a72db4aSBryan Cantrill 
444*6a72db4aSBryan Cantrill 	default:
445*6a72db4aSBryan Cantrill 		return (DDI_FAILURE);
446*6a72db4aSBryan Cantrill 	}
447*6a72db4aSBryan Cantrill 
448*6a72db4aSBryan Cantrill 	mutex_enter(&timerfd_lock);
449*6a72db4aSBryan Cantrill 
450*6a72db4aSBryan Cantrill 	if (ddi_soft_state_init(&timerfd_softstate,
451*6a72db4aSBryan Cantrill 	    sizeof (timerfd_state_t), 0) != 0) {
452*6a72db4aSBryan Cantrill 		cmn_err(CE_NOTE, "/dev/timerfd failed to create soft state");
453*6a72db4aSBryan Cantrill 		mutex_exit(&timerfd_lock);
454*6a72db4aSBryan Cantrill 		return (DDI_FAILURE);
455*6a72db4aSBryan Cantrill 	}
456*6a72db4aSBryan Cantrill 
457*6a72db4aSBryan Cantrill 	if (ddi_create_minor_node(devi, "timerfd", S_IFCHR,
458*6a72db4aSBryan Cantrill 	    TIMERFDMNRN_TIMERFD, DDI_PSEUDO, NULL) == DDI_FAILURE) {
459*6a72db4aSBryan Cantrill 		cmn_err(CE_NOTE, "/dev/timerfd couldn't create minor node");
460*6a72db4aSBryan Cantrill 		ddi_soft_state_fini(&timerfd_softstate);
461*6a72db4aSBryan Cantrill 		mutex_exit(&timerfd_lock);
462*6a72db4aSBryan Cantrill 		return (DDI_FAILURE);
463*6a72db4aSBryan Cantrill 	}
464*6a72db4aSBryan Cantrill 
465*6a72db4aSBryan Cantrill 	ddi_report_dev(devi);
466*6a72db4aSBryan Cantrill 	timerfd_devi = devi;
467*6a72db4aSBryan Cantrill 
468*6a72db4aSBryan Cantrill 	timerfd_minor = vmem_create("timerfd_minor", (void *)TIMERFDMNRN_CLONE,
469*6a72db4aSBryan Cantrill 	    UINT32_MAX - TIMERFDMNRN_CLONE, 1, NULL, NULL, NULL, 0,
470*6a72db4aSBryan Cantrill 	    VM_SLEEP | VMC_IDENTIFIER);
471*6a72db4aSBryan Cantrill 
472*6a72db4aSBryan Cantrill 	mutex_exit(&timerfd_lock);
473*6a72db4aSBryan Cantrill 
474*6a72db4aSBryan Cantrill 	return (DDI_SUCCESS);
475*6a72db4aSBryan Cantrill }
476*6a72db4aSBryan Cantrill 
477*6a72db4aSBryan Cantrill /*ARGSUSED*/
478*6a72db4aSBryan Cantrill static int
479*6a72db4aSBryan Cantrill timerfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
480*6a72db4aSBryan Cantrill {
481*6a72db4aSBryan Cantrill 	switch (cmd) {
482*6a72db4aSBryan Cantrill 	case DDI_DETACH:
483*6a72db4aSBryan Cantrill 		break;
484*6a72db4aSBryan Cantrill 
485*6a72db4aSBryan Cantrill 	case DDI_SUSPEND:
486*6a72db4aSBryan Cantrill 		return (DDI_SUCCESS);
487*6a72db4aSBryan Cantrill 
488*6a72db4aSBryan Cantrill 	default:
489*6a72db4aSBryan Cantrill 		return (DDI_FAILURE);
490*6a72db4aSBryan Cantrill 	}
491*6a72db4aSBryan Cantrill 
492*6a72db4aSBryan Cantrill 	mutex_enter(&timerfd_lock);
493*6a72db4aSBryan Cantrill 	vmem_destroy(timerfd_minor);
494*6a72db4aSBryan Cantrill 
495*6a72db4aSBryan Cantrill 	ddi_remove_minor_node(timerfd_devi, NULL);
496*6a72db4aSBryan Cantrill 	timerfd_devi = NULL;
497*6a72db4aSBryan Cantrill 
498*6a72db4aSBryan Cantrill 	ddi_soft_state_fini(&timerfd_softstate);
499*6a72db4aSBryan Cantrill 	mutex_exit(&timerfd_lock);
500*6a72db4aSBryan Cantrill 
501*6a72db4aSBryan Cantrill 	return (DDI_SUCCESS);
502*6a72db4aSBryan Cantrill }
503*6a72db4aSBryan Cantrill 
504*6a72db4aSBryan Cantrill /*ARGSUSED*/
505*6a72db4aSBryan Cantrill static int
506*6a72db4aSBryan Cantrill timerfd_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
507*6a72db4aSBryan Cantrill {
508*6a72db4aSBryan Cantrill 	int error;
509*6a72db4aSBryan Cantrill 
510*6a72db4aSBryan Cantrill 	switch (infocmd) {
511*6a72db4aSBryan Cantrill 	case DDI_INFO_DEVT2DEVINFO:
512*6a72db4aSBryan Cantrill 		*result = (void *)timerfd_devi;
513*6a72db4aSBryan Cantrill 		error = DDI_SUCCESS;
514*6a72db4aSBryan Cantrill 		break;
515*6a72db4aSBryan Cantrill 	case DDI_INFO_DEVT2INSTANCE:
516*6a72db4aSBryan Cantrill 		*result = (void *)0;
517*6a72db4aSBryan Cantrill 		error = DDI_SUCCESS;
518*6a72db4aSBryan Cantrill 		break;
519*6a72db4aSBryan Cantrill 	default:
520*6a72db4aSBryan Cantrill 		error = DDI_FAILURE;
521*6a72db4aSBryan Cantrill 	}
522*6a72db4aSBryan Cantrill 	return (error);
523*6a72db4aSBryan Cantrill }
524*6a72db4aSBryan Cantrill 
525*6a72db4aSBryan Cantrill static struct cb_ops timerfd_cb_ops = {
526*6a72db4aSBryan Cantrill 	timerfd_open,		/* open */
527*6a72db4aSBryan Cantrill 	timerfd_close,		/* close */
528*6a72db4aSBryan Cantrill 	nulldev,		/* strategy */
529*6a72db4aSBryan Cantrill 	nulldev,		/* print */
530*6a72db4aSBryan Cantrill 	nodev,			/* dump */
531*6a72db4aSBryan Cantrill 	timerfd_read,		/* read */
532*6a72db4aSBryan Cantrill 	nodev,			/* write */
533*6a72db4aSBryan Cantrill 	timerfd_ioctl,		/* ioctl */
534*6a72db4aSBryan Cantrill 	nodev,			/* devmap */
535*6a72db4aSBryan Cantrill 	nodev,			/* mmap */
536*6a72db4aSBryan Cantrill 	nodev,			/* segmap */
537*6a72db4aSBryan Cantrill 	timerfd_poll,		/* poll */
538*6a72db4aSBryan Cantrill 	ddi_prop_op,		/* cb_prop_op */
539*6a72db4aSBryan Cantrill 	0,			/* streamtab  */
540*6a72db4aSBryan Cantrill 	D_NEW | D_MP		/* Driver compatibility flag */
541*6a72db4aSBryan Cantrill };
542*6a72db4aSBryan Cantrill 
543*6a72db4aSBryan Cantrill static struct dev_ops timerfd_ops = {
544*6a72db4aSBryan Cantrill 	DEVO_REV,		/* devo_rev */
545*6a72db4aSBryan Cantrill 	0,			/* refcnt */
546*6a72db4aSBryan Cantrill 	timerfd_info,		/* get_dev_info */
547*6a72db4aSBryan Cantrill 	nulldev,		/* identify */
548*6a72db4aSBryan Cantrill 	nulldev,		/* probe */
549*6a72db4aSBryan Cantrill 	timerfd_attach,		/* attach */
550*6a72db4aSBryan Cantrill 	timerfd_detach,		/* detach */
551*6a72db4aSBryan Cantrill 	nodev,			/* reset */
552*6a72db4aSBryan Cantrill 	&timerfd_cb_ops,	/* driver operations */
553*6a72db4aSBryan Cantrill 	NULL,			/* bus operations */
554*6a72db4aSBryan Cantrill 	nodev,			/* dev power */
555*6a72db4aSBryan Cantrill 	ddi_quiesce_not_needed,	/* quiesce */
556*6a72db4aSBryan Cantrill };
557*6a72db4aSBryan Cantrill 
558*6a72db4aSBryan Cantrill static struct modldrv modldrv = {
559*6a72db4aSBryan Cantrill 	&mod_driverops,		/* module type (this is a pseudo driver) */
560*6a72db4aSBryan Cantrill 	"timerfd support",	/* name of module */
561*6a72db4aSBryan Cantrill 	&timerfd_ops,		/* driver ops */
562*6a72db4aSBryan Cantrill };
563*6a72db4aSBryan Cantrill 
564*6a72db4aSBryan Cantrill static struct modlinkage modlinkage = {
565*6a72db4aSBryan Cantrill 	MODREV_1,
566*6a72db4aSBryan Cantrill 	(void *)&modldrv,
567*6a72db4aSBryan Cantrill 	NULL
568*6a72db4aSBryan Cantrill };
569*6a72db4aSBryan Cantrill 
570*6a72db4aSBryan Cantrill int
571*6a72db4aSBryan Cantrill _init(void)
572*6a72db4aSBryan Cantrill {
573*6a72db4aSBryan Cantrill 	return (mod_install(&modlinkage));
574*6a72db4aSBryan Cantrill }
575*6a72db4aSBryan Cantrill 
576*6a72db4aSBryan Cantrill int
577*6a72db4aSBryan Cantrill _info(struct modinfo *modinfop)
578*6a72db4aSBryan Cantrill {
579*6a72db4aSBryan Cantrill 	return (mod_info(&modlinkage, modinfop));
580*6a72db4aSBryan Cantrill }
581*6a72db4aSBryan Cantrill 
582*6a72db4aSBryan Cantrill int
583*6a72db4aSBryan Cantrill _fini(void)
584*6a72db4aSBryan Cantrill {
585*6a72db4aSBryan Cantrill 	return (mod_remove(&modlinkage));
586*6a72db4aSBryan Cantrill }
587