xref: /titanic_52/usr/src/uts/common/io/timerfd.c (revision 7fd791373689a6af05e27efec3b1ab556e02aa23)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright (c) 2015 Joyent, Inc.  All rights reserved.
14  */
15 
16 /*
17  * Support for the timerfd facility, a Linux-borne facility that allows
18  * POSIX.1b timers to be created and manipulated via a file descriptor
19  * interface.
20  */
21 
22 #include <sys/ddi.h>
23 #include <sys/sunddi.h>
24 #include <sys/timerfd.h>
25 #include <sys/conf.h>
26 #include <sys/vmem.h>
27 #include <sys/sysmacros.h>
28 #include <sys/filio.h>
29 #include <sys/stat.h>
30 #include <sys/file.h>
31 #include <sys/timer.h>
32 
33 struct timerfd_state;
34 typedef struct timerfd_state timerfd_state_t;
35 
36 struct timerfd_state {
37 	kmutex_t tfd_lock;			/* lock protecting state */
38 	kcondvar_t tfd_cv;			/* condvar */
39 	pollhead_t tfd_pollhd;			/* poll head */
40 	uint64_t tfd_fired;			/* # of times fired */
41 	itimer_t tfd_itimer;			/* underlying itimer */
42 	timerfd_state_t *tfd_next;		/* next state on global list */
43 };
44 
45 /*
46  * Internal global variables.
47  */
48 static kmutex_t		timerfd_lock;		/* lock protecting state */
49 static dev_info_t	*timerfd_devi;		/* device info */
50 static vmem_t		*timerfd_minor;		/* minor number arena */
51 static void		*timerfd_softstate;	/* softstate pointer */
52 static timerfd_state_t	*timerfd_state;		/* global list of state */
53 
54 static itimer_t *
55 timerfd_itimer_lock(timerfd_state_t *state)
56 {
57 	itimer_t *it = &state->tfd_itimer;
58 
59 	mutex_enter(&state->tfd_lock);
60 
61 	while (it->it_lock & ITLK_LOCKED) {
62 		it->it_blockers++;
63 		cv_wait(&it->it_cv, &state->tfd_lock);
64 		it->it_blockers--;
65 	}
66 
67 	it->it_lock |= ITLK_LOCKED;
68 
69 	mutex_exit(&state->tfd_lock);
70 
71 	return (it);
72 }
73 
74 static void
75 timerfd_itimer_unlock(timerfd_state_t *state, itimer_t *it)
76 {
77 	VERIFY(it == &state->tfd_itimer);
78 	VERIFY(it->it_lock & ITLK_LOCKED);
79 
80 	mutex_enter(&state->tfd_lock);
81 
82 	it->it_lock &= ~ITLK_LOCKED;
83 
84 	if (it->it_blockers)
85 		cv_signal(&it->it_cv);
86 
87 	mutex_exit(&state->tfd_lock);
88 }
89 
90 static void
91 timerfd_fire(itimer_t *it)
92 {
93 	timerfd_state_t *state = it->it_frontend;
94 	uint64_t oval;
95 
96 	mutex_enter(&state->tfd_lock);
97 	oval = state->tfd_fired++;
98 	mutex_exit(&state->tfd_lock);
99 
100 	if (oval == 0) {
101 		cv_broadcast(&state->tfd_cv);
102 		pollwakeup(&state->tfd_pollhd, POLLRDNORM | POLLIN);
103 	}
104 }
105 
106 /*ARGSUSED*/
107 static int
108 timerfd_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
109 {
110 	timerfd_state_t *state;
111 	major_t major = getemajor(*devp);
112 	minor_t minor = getminor(*devp);
113 
114 	if (minor != TIMERFDMNRN_TIMERFD)
115 		return (ENXIO);
116 
117 	mutex_enter(&timerfd_lock);
118 
119 	minor = (minor_t)(uintptr_t)vmem_alloc(timerfd_minor, 1,
120 	    VM_BESTFIT | VM_SLEEP);
121 
122 	if (ddi_soft_state_zalloc(timerfd_softstate, minor) != DDI_SUCCESS) {
123 		vmem_free(timerfd_minor, (void *)(uintptr_t)minor, 1);
124 		mutex_exit(&timerfd_lock);
125 		return (NULL);
126 	}
127 
128 	state = ddi_get_soft_state(timerfd_softstate, minor);
129 	*devp = makedevice(major, minor);
130 
131 	state->tfd_next = timerfd_state;
132 	timerfd_state = state;
133 
134 	mutex_exit(&timerfd_lock);
135 
136 	return (0);
137 }
138 
139 /*ARGSUSED*/
140 static int
141 timerfd_read(dev_t dev, uio_t *uio, cred_t *cr)
142 {
143 	timerfd_state_t *state;
144 	minor_t minor = getminor(dev);
145 	uint64_t val;
146 	int err;
147 
148 	if (uio->uio_resid < sizeof (val))
149 		return (EINVAL);
150 
151 	state = ddi_get_soft_state(timerfd_softstate, minor);
152 
153 	mutex_enter(&state->tfd_lock);
154 
155 	while (state->tfd_fired == 0) {
156 		if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) {
157 			mutex_exit(&state->tfd_lock);
158 			return (EAGAIN);
159 		}
160 
161 		if (!cv_wait_sig_swap(&state->tfd_cv, &state->tfd_lock)) {
162 			mutex_exit(&state->tfd_lock);
163 			return (EINTR);
164 		}
165 	}
166 
167 	/*
168 	 * Our tfd_fired is non-zero; slurp its value and then clear it.
169 	 */
170 	val = state->tfd_fired;
171 	state->tfd_fired = 0;
172 	mutex_exit(&state->tfd_lock);
173 
174 	err = uiomove(&val, sizeof (val), UIO_READ, uio);
175 
176 	return (err);
177 }
178 
179 /*ARGSUSED*/
180 static int
181 timerfd_poll(dev_t dev, short events, int anyyet, short *reventsp,
182     struct pollhead **phpp)
183 {
184 	timerfd_state_t *state;
185 	minor_t minor = getminor(dev);
186 	short revents = 0;
187 
188 	state = ddi_get_soft_state(timerfd_softstate, minor);
189 
190 	mutex_enter(&state->tfd_lock);
191 
192 	if (state->tfd_fired > 0)
193 		revents |= POLLRDNORM | POLLIN;
194 
195 	if (!(*reventsp = revents & events) && !anyyet)
196 		*phpp = &state->tfd_pollhd;
197 
198 	mutex_exit(&state->tfd_lock);
199 
200 	return (0);
201 }
202 
203 static int
204 timerfd_copyin(uintptr_t addr, itimerspec_t *dest)
205 {
206 	if (get_udatamodel() == DATAMODEL_NATIVE) {
207 		if (copyin((void *)addr, dest, sizeof (itimerspec_t)) != 0)
208 			return (EFAULT);
209 	} else {
210 		itimerspec32_t dest32;
211 
212 		if (copyin((void *)addr, &dest32, sizeof (itimerspec32_t)) != 0)
213 			return (EFAULT);
214 
215 		ITIMERSPEC32_TO_ITIMERSPEC(dest, &dest32);
216 	}
217 
218 	if (itimerspecfix(&dest->it_value) ||
219 	    (itimerspecfix(&dest->it_interval) &&
220 	    timerspecisset(&dest->it_value))) {
221 		return (EINVAL);
222 	}
223 
224 	return (0);
225 }
226 
227 static int
228 timerfd_copyout(itimerspec_t *src, uintptr_t addr)
229 {
230 	if (get_udatamodel() == DATAMODEL_NATIVE) {
231 		if (copyout(src, (void *)addr, sizeof (itimerspec_t)) != 0)
232 			return (EFAULT);
233 	} else {
234 		itimerspec32_t src32;
235 
236 		if (ITIMERSPEC_OVERFLOW(src))
237 			return (EOVERFLOW);
238 
239 		ITIMERSPEC_TO_ITIMERSPEC32(&src32, src);
240 
241 		if (copyout(&src32, (void *)addr, sizeof (itimerspec32_t)) != 0)
242 			return (EFAULT);
243 	}
244 
245 	return (0);
246 }
247 
248 /*ARGSUSED*/
249 static int
250 timerfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
251 {
252 	itimerspec_t when, oval;
253 	timerfd_state_t *state;
254 	minor_t minor = getminor(dev);
255 	int err;
256 	itimer_t *it;
257 
258 	state = ddi_get_soft_state(timerfd_softstate, minor);
259 
260 	switch (cmd) {
261 	case TIMERFDIOC_CREATE: {
262 		if (arg == TIMERFD_MONOTONIC)
263 			arg = CLOCK_MONOTONIC;
264 
265 		it = timerfd_itimer_lock(state);
266 
267 		if (it->it_backend != NULL) {
268 			timerfd_itimer_unlock(state, it);
269 			return (EEXIST);
270 		}
271 
272 		if ((it->it_backend = clock_get_backend(arg)) == NULL) {
273 			timerfd_itimer_unlock(state, it);
274 			return (EINVAL);
275 		}
276 
277 		/*
278 		 * We need to provide a proc structure only for purposes
279 		 * of locking CLOCK_REALTIME-based timers -- it is safe to
280 		 * provide p0 here.
281 		 */
282 		it->it_proc = &p0;
283 
284 		err = it->it_backend->clk_timer_create(it, timerfd_fire);
285 
286 		if (err != 0) {
287 			it->it_backend = NULL;
288 			timerfd_itimer_unlock(state, it);
289 			return (err);
290 		}
291 
292 		it->it_frontend = state;
293 		timerfd_itimer_unlock(state, it);
294 
295 		return (0);
296 	}
297 
298 	case TIMERFDIOC_GETTIME: {
299 		it = timerfd_itimer_lock(state);
300 
301 		if (it->it_backend == NULL) {
302 			timerfd_itimer_unlock(state, it);
303 			return (ENODEV);
304 		}
305 
306 		err = it->it_backend->clk_timer_gettime(it, &when);
307 		timerfd_itimer_unlock(state, it);
308 
309 		if (err != 0)
310 			return (err);
311 
312 		if ((err = timerfd_copyout(&when, arg)) != 0)
313 			return (err);
314 
315 		return (0);
316 	}
317 
318 	case TIMERFDIOC_SETTIME: {
319 		timerfd_settime_t st;
320 
321 		if (copyin((void *)arg, &st, sizeof (st)) != 0)
322 			return (EFAULT);
323 
324 		if ((err = timerfd_copyin(st.tfd_settime_value, &when)) != 0)
325 			return (err);
326 
327 		it = timerfd_itimer_lock(state);
328 
329 		if (it->it_backend == NULL) {
330 			timerfd_itimer_unlock(state, it);
331 			return (ENODEV);
332 		}
333 
334 		if (st.tfd_settime_ovalue != NULL) {
335 			err = it->it_backend->clk_timer_gettime(it, &oval);
336 
337 			if (err != 0) {
338 				timerfd_itimer_unlock(state, it);
339 				return (err);
340 			}
341 		}
342 
343 		/*
344 		 * Before we set the time, we're going to clear tfd_fired.
345 		 * This can potentially race with the (old) timer firing, but
346 		 * the window is deceptively difficult to close:  if we were
347 		 * to simply clear tfd_fired after the call to the backend
348 		 * returned, we would run the risk of plowing a firing of the
349 		 * new timer.  Ultimately, the race can only be resolved by
350 		 * the backend, which would likely need to be extended with a
351 		 * function to call back into when the timer is between states
352 		 * (that is, after the timer can no longer fire with the old
353 		 * timer value, but before it can fire with the new one).
354 		 * This is straightforward enough for backends that set a
355 		 * timer's value by deleting the old one and adding the new
356 		 * one, but for those that modify the timer value in place
357 		 * (e.g., cyclics), the required serialization is necessarily
358 		 * delicate:  the function would have to be callable from
359 		 * arbitrary interrupt context.  While implementing all of
360 		 * this is possible, it does not (for the moment) seem worth
361 		 * it: if the timer is firing at essentially the same moment
362 		 * that it's being reprogrammed, there is a higher-level race
363 		 * with respect to timerfd usage that the progam itself will
364 		 * have to properly resolve -- and it seems reasonable to
365 		 * simply allow the program to resolve it in this case.
366 		 */
367 		mutex_enter(&state->tfd_lock);
368 		state->tfd_fired = 0;
369 		mutex_exit(&state->tfd_lock);
370 
371 		err = it->it_backend->clk_timer_settime(it,
372 		    st.tfd_settime_flags & TFD_TIMER_ABSTIME ?
373 		    TIMER_ABSTIME : TIMER_RELTIME, &when);
374 		timerfd_itimer_unlock(state, it);
375 
376 		if (err != 0 || st.tfd_settime_ovalue == NULL)
377 			return (err);
378 
379 		if ((err = timerfd_copyout(&oval, st.tfd_settime_ovalue)) != 0)
380 			return (err);
381 
382 		return (0);
383 	}
384 
385 	default:
386 		break;
387 	}
388 
389 	return (ENOTTY);
390 }
391 
392 /*ARGSUSED*/
393 static int
394 timerfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
395 {
396 	timerfd_state_t *state, **sp;
397 	itimer_t *it;
398 	minor_t minor = getminor(dev);
399 
400 	state = ddi_get_soft_state(timerfd_softstate, minor);
401 
402 	if (state->tfd_pollhd.ph_list != NULL) {
403 		pollwakeup(&state->tfd_pollhd, POLLERR);
404 		pollhead_clean(&state->tfd_pollhd);
405 	}
406 
407 	/*
408 	 * No one can get to this timer; we don't need to lock it -- we can
409 	 * just call on the backend to delete it.
410 	 */
411 	it = &state->tfd_itimer;
412 
413 	if (it->it_backend != NULL)
414 		it->it_backend->clk_timer_delete(it);
415 
416 	mutex_enter(&timerfd_lock);
417 
418 	/*
419 	 * Remove our state from our global list.
420 	 */
421 	for (sp = &timerfd_state; *sp != state; sp = &((*sp)->tfd_next))
422 		VERIFY(*sp != NULL);
423 
424 	*sp = (*sp)->tfd_next;
425 
426 	ddi_soft_state_free(timerfd_softstate, minor);
427 	vmem_free(timerfd_minor, (void *)(uintptr_t)minor, 1);
428 
429 	mutex_exit(&timerfd_lock);
430 
431 	return (0);
432 }
433 
434 static int
435 timerfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
436 {
437 	switch (cmd) {
438 	case DDI_ATTACH:
439 		break;
440 
441 	case DDI_RESUME:
442 		return (DDI_SUCCESS);
443 
444 	default:
445 		return (DDI_FAILURE);
446 	}
447 
448 	mutex_enter(&timerfd_lock);
449 
450 	if (ddi_soft_state_init(&timerfd_softstate,
451 	    sizeof (timerfd_state_t), 0) != 0) {
452 		cmn_err(CE_NOTE, "/dev/timerfd failed to create soft state");
453 		mutex_exit(&timerfd_lock);
454 		return (DDI_FAILURE);
455 	}
456 
457 	if (ddi_create_minor_node(devi, "timerfd", S_IFCHR,
458 	    TIMERFDMNRN_TIMERFD, DDI_PSEUDO, NULL) == DDI_FAILURE) {
459 		cmn_err(CE_NOTE, "/dev/timerfd couldn't create minor node");
460 		ddi_soft_state_fini(&timerfd_softstate);
461 		mutex_exit(&timerfd_lock);
462 		return (DDI_FAILURE);
463 	}
464 
465 	ddi_report_dev(devi);
466 	timerfd_devi = devi;
467 
468 	timerfd_minor = vmem_create("timerfd_minor", (void *)TIMERFDMNRN_CLONE,
469 	    UINT32_MAX - TIMERFDMNRN_CLONE, 1, NULL, NULL, NULL, 0,
470 	    VM_SLEEP | VMC_IDENTIFIER);
471 
472 	mutex_exit(&timerfd_lock);
473 
474 	return (DDI_SUCCESS);
475 }
476 
477 /*ARGSUSED*/
478 static int
479 timerfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
480 {
481 	switch (cmd) {
482 	case DDI_DETACH:
483 		break;
484 
485 	case DDI_SUSPEND:
486 		return (DDI_SUCCESS);
487 
488 	default:
489 		return (DDI_FAILURE);
490 	}
491 
492 	mutex_enter(&timerfd_lock);
493 	vmem_destroy(timerfd_minor);
494 
495 	ddi_remove_minor_node(timerfd_devi, NULL);
496 	timerfd_devi = NULL;
497 
498 	ddi_soft_state_fini(&timerfd_softstate);
499 	mutex_exit(&timerfd_lock);
500 
501 	return (DDI_SUCCESS);
502 }
503 
504 /*ARGSUSED*/
505 static int
506 timerfd_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
507 {
508 	int error;
509 
510 	switch (infocmd) {
511 	case DDI_INFO_DEVT2DEVINFO:
512 		*result = (void *)timerfd_devi;
513 		error = DDI_SUCCESS;
514 		break;
515 	case DDI_INFO_DEVT2INSTANCE:
516 		*result = (void *)0;
517 		error = DDI_SUCCESS;
518 		break;
519 	default:
520 		error = DDI_FAILURE;
521 	}
522 	return (error);
523 }
524 
525 static struct cb_ops timerfd_cb_ops = {
526 	timerfd_open,		/* open */
527 	timerfd_close,		/* close */
528 	nulldev,		/* strategy */
529 	nulldev,		/* print */
530 	nodev,			/* dump */
531 	timerfd_read,		/* read */
532 	nodev,			/* write */
533 	timerfd_ioctl,		/* ioctl */
534 	nodev,			/* devmap */
535 	nodev,			/* mmap */
536 	nodev,			/* segmap */
537 	timerfd_poll,		/* poll */
538 	ddi_prop_op,		/* cb_prop_op */
539 	0,			/* streamtab  */
540 	D_NEW | D_MP		/* Driver compatibility flag */
541 };
542 
543 static struct dev_ops timerfd_ops = {
544 	DEVO_REV,		/* devo_rev */
545 	0,			/* refcnt */
546 	timerfd_info,		/* get_dev_info */
547 	nulldev,		/* identify */
548 	nulldev,		/* probe */
549 	timerfd_attach,		/* attach */
550 	timerfd_detach,		/* detach */
551 	nodev,			/* reset */
552 	&timerfd_cb_ops,	/* driver operations */
553 	NULL,			/* bus operations */
554 	nodev,			/* dev power */
555 	ddi_quiesce_not_needed,	/* quiesce */
556 };
557 
558 static struct modldrv modldrv = {
559 	&mod_driverops,		/* module type (this is a pseudo driver) */
560 	"timerfd support",	/* name of module */
561 	&timerfd_ops,		/* driver ops */
562 };
563 
564 static struct modlinkage modlinkage = {
565 	MODREV_1,
566 	(void *)&modldrv,
567 	NULL
568 };
569 
570 int
571 _init(void)
572 {
573 	return (mod_install(&modlinkage));
574 }
575 
576 int
577 _info(struct modinfo *modinfop)
578 {
579 	return (mod_info(&modlinkage, modinfop));
580 }
581 
582 int
583 _fini(void)
584 {
585 	return (mod_remove(&modlinkage));
586 }
587