1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright (c) 2015 Joyent, Inc. All rights reserved.
14 */
15
16 /*
17 * Support for the timerfd facility, a Linux-borne facility that allows
18 * POSIX.1b timers to be created and manipulated via a file descriptor
19 * interface.
20 */
21
22 #include <sys/ddi.h>
23 #include <sys/sunddi.h>
24 #include <sys/timerfd.h>
25 #include <sys/conf.h>
26 #include <sys/vmem.h>
27 #include <sys/sysmacros.h>
28 #include <sys/filio.h>
29 #include <sys/stat.h>
30 #include <sys/file.h>
31 #include <sys/timer.h>
32
33 struct timerfd_state;
34 typedef struct timerfd_state timerfd_state_t;
35
36 struct timerfd_state {
37 kmutex_t tfd_lock; /* lock protecting state */
38 kcondvar_t tfd_cv; /* condvar */
39 pollhead_t tfd_pollhd; /* poll head */
40 uint64_t tfd_fired; /* # of times fired */
41 itimer_t tfd_itimer; /* underlying itimer */
42 timerfd_state_t *tfd_next; /* next state on global list */
43 };
44
45 /*
46 * Internal global variables.
47 */
48 static kmutex_t timerfd_lock; /* lock protecting state */
49 static dev_info_t *timerfd_devi; /* device info */
50 static vmem_t *timerfd_minor; /* minor number arena */
51 static void *timerfd_softstate; /* softstate pointer */
52 static timerfd_state_t *timerfd_state; /* global list of state */
53
54 static itimer_t *
timerfd_itimer_lock(timerfd_state_t * state)55 timerfd_itimer_lock(timerfd_state_t *state)
56 {
57 itimer_t *it = &state->tfd_itimer;
58
59 mutex_enter(&state->tfd_lock);
60
61 while (it->it_lock & ITLK_LOCKED) {
62 it->it_blockers++;
63 cv_wait(&it->it_cv, &state->tfd_lock);
64 it->it_blockers--;
65 }
66
67 it->it_lock |= ITLK_LOCKED;
68
69 mutex_exit(&state->tfd_lock);
70
71 return (it);
72 }
73
74 static void
timerfd_itimer_unlock(timerfd_state_t * state,itimer_t * it)75 timerfd_itimer_unlock(timerfd_state_t *state, itimer_t *it)
76 {
77 VERIFY(it == &state->tfd_itimer);
78 VERIFY(it->it_lock & ITLK_LOCKED);
79
80 mutex_enter(&state->tfd_lock);
81
82 it->it_lock &= ~ITLK_LOCKED;
83
84 if (it->it_blockers)
85 cv_signal(&it->it_cv);
86
87 mutex_exit(&state->tfd_lock);
88 }
89
90 static void
timerfd_fire(itimer_t * it)91 timerfd_fire(itimer_t *it)
92 {
93 timerfd_state_t *state = it->it_frontend;
94 uint64_t oval;
95
96 mutex_enter(&state->tfd_lock);
97 oval = state->tfd_fired++;
98 mutex_exit(&state->tfd_lock);
99
100 if (oval == 0) {
101 cv_broadcast(&state->tfd_cv);
102 pollwakeup(&state->tfd_pollhd, POLLRDNORM | POLLIN);
103 }
104 }
105
106 /*ARGSUSED*/
107 static int
timerfd_open(dev_t * devp,int flag,int otyp,cred_t * cred_p)108 timerfd_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
109 {
110 timerfd_state_t *state;
111 major_t major = getemajor(*devp);
112 minor_t minor = getminor(*devp);
113
114 if (minor != TIMERFDMNRN_TIMERFD)
115 return (ENXIO);
116
117 mutex_enter(&timerfd_lock);
118
119 minor = (minor_t)(uintptr_t)vmem_alloc(timerfd_minor, 1,
120 VM_BESTFIT | VM_SLEEP);
121
122 if (ddi_soft_state_zalloc(timerfd_softstate, minor) != DDI_SUCCESS) {
123 vmem_free(timerfd_minor, (void *)(uintptr_t)minor, 1);
124 mutex_exit(&timerfd_lock);
125 return (NULL);
126 }
127
128 state = ddi_get_soft_state(timerfd_softstate, minor);
129 *devp = makedevice(major, minor);
130
131 state->tfd_next = timerfd_state;
132 timerfd_state = state;
133
134 mutex_exit(&timerfd_lock);
135
136 return (0);
137 }
138
139 /*ARGSUSED*/
140 static int
timerfd_read(dev_t dev,uio_t * uio,cred_t * cr)141 timerfd_read(dev_t dev, uio_t *uio, cred_t *cr)
142 {
143 timerfd_state_t *state;
144 minor_t minor = getminor(dev);
145 uint64_t val;
146 int err;
147
148 if (uio->uio_resid < sizeof (val))
149 return (EINVAL);
150
151 state = ddi_get_soft_state(timerfd_softstate, minor);
152
153 mutex_enter(&state->tfd_lock);
154
155 while (state->tfd_fired == 0) {
156 if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) {
157 mutex_exit(&state->tfd_lock);
158 return (EAGAIN);
159 }
160
161 if (!cv_wait_sig_swap(&state->tfd_cv, &state->tfd_lock)) {
162 mutex_exit(&state->tfd_lock);
163 return (EINTR);
164 }
165 }
166
167 /*
168 * Our tfd_fired is non-zero; slurp its value and then clear it.
169 */
170 val = state->tfd_fired;
171 state->tfd_fired = 0;
172 mutex_exit(&state->tfd_lock);
173
174 err = uiomove(&val, sizeof (val), UIO_READ, uio);
175
176 return (err);
177 }
178
179 /*ARGSUSED*/
180 static int
timerfd_poll(dev_t dev,short events,int anyyet,short * reventsp,struct pollhead ** phpp)181 timerfd_poll(dev_t dev, short events, int anyyet, short *reventsp,
182 struct pollhead **phpp)
183 {
184 timerfd_state_t *state;
185 minor_t minor = getminor(dev);
186 short revents = 0;
187
188 state = ddi_get_soft_state(timerfd_softstate, minor);
189
190 mutex_enter(&state->tfd_lock);
191
192 if (state->tfd_fired > 0)
193 revents |= POLLRDNORM | POLLIN;
194
195 if (!(*reventsp = revents & events) && !anyyet)
196 *phpp = &state->tfd_pollhd;
197
198 mutex_exit(&state->tfd_lock);
199
200 return (0);
201 }
202
203 static int
timerfd_copyin(uintptr_t addr,itimerspec_t * dest)204 timerfd_copyin(uintptr_t addr, itimerspec_t *dest)
205 {
206 if (get_udatamodel() == DATAMODEL_NATIVE) {
207 if (copyin((void *)addr, dest, sizeof (itimerspec_t)) != 0)
208 return (EFAULT);
209 } else {
210 itimerspec32_t dest32;
211
212 if (copyin((void *)addr, &dest32, sizeof (itimerspec32_t)) != 0)
213 return (EFAULT);
214
215 ITIMERSPEC32_TO_ITIMERSPEC(dest, &dest32);
216 }
217
218 if (itimerspecfix(&dest->it_value) ||
219 (itimerspecfix(&dest->it_interval) &&
220 timerspecisset(&dest->it_value))) {
221 return (EINVAL);
222 }
223
224 return (0);
225 }
226
227 static int
timerfd_copyout(itimerspec_t * src,uintptr_t addr)228 timerfd_copyout(itimerspec_t *src, uintptr_t addr)
229 {
230 if (get_udatamodel() == DATAMODEL_NATIVE) {
231 if (copyout(src, (void *)addr, sizeof (itimerspec_t)) != 0)
232 return (EFAULT);
233 } else {
234 itimerspec32_t src32;
235
236 if (ITIMERSPEC_OVERFLOW(src))
237 return (EOVERFLOW);
238
239 ITIMERSPEC_TO_ITIMERSPEC32(&src32, src);
240
241 if (copyout(&src32, (void *)addr, sizeof (itimerspec32_t)) != 0)
242 return (EFAULT);
243 }
244
245 return (0);
246 }
247
248 /*ARGSUSED*/
249 static int
timerfd_ioctl(dev_t dev,int cmd,intptr_t arg,int md,cred_t * cr,int * rv)250 timerfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
251 {
252 itimerspec_t when, oval;
253 timerfd_state_t *state;
254 minor_t minor = getminor(dev);
255 int err;
256 itimer_t *it;
257
258 state = ddi_get_soft_state(timerfd_softstate, minor);
259
260 switch (cmd) {
261 case TIMERFDIOC_CREATE: {
262 if (arg == TIMERFD_MONOTONIC)
263 arg = CLOCK_MONOTONIC;
264
265 it = timerfd_itimer_lock(state);
266
267 if (it->it_backend != NULL) {
268 timerfd_itimer_unlock(state, it);
269 return (EEXIST);
270 }
271
272 if ((it->it_backend = clock_get_backend(arg)) == NULL) {
273 timerfd_itimer_unlock(state, it);
274 return (EINVAL);
275 }
276
277 /*
278 * We need to provide a proc structure only for purposes
279 * of locking CLOCK_REALTIME-based timers -- it is safe to
280 * provide p0 here.
281 */
282 it->it_proc = &p0;
283
284 err = it->it_backend->clk_timer_create(it, timerfd_fire);
285
286 if (err != 0) {
287 it->it_backend = NULL;
288 timerfd_itimer_unlock(state, it);
289 return (err);
290 }
291
292 it->it_frontend = state;
293 timerfd_itimer_unlock(state, it);
294
295 return (0);
296 }
297
298 case TIMERFDIOC_GETTIME: {
299 it = timerfd_itimer_lock(state);
300
301 if (it->it_backend == NULL) {
302 timerfd_itimer_unlock(state, it);
303 return (ENODEV);
304 }
305
306 err = it->it_backend->clk_timer_gettime(it, &when);
307 timerfd_itimer_unlock(state, it);
308
309 if (err != 0)
310 return (err);
311
312 if ((err = timerfd_copyout(&when, arg)) != 0)
313 return (err);
314
315 return (0);
316 }
317
318 case TIMERFDIOC_SETTIME: {
319 timerfd_settime_t st;
320
321 if (copyin((void *)arg, &st, sizeof (st)) != 0)
322 return (EFAULT);
323
324 if ((err = timerfd_copyin(st.tfd_settime_value, &when)) != 0)
325 return (err);
326
327 it = timerfd_itimer_lock(state);
328
329 if (it->it_backend == NULL) {
330 timerfd_itimer_unlock(state, it);
331 return (ENODEV);
332 }
333
334 if (st.tfd_settime_ovalue != NULL) {
335 err = it->it_backend->clk_timer_gettime(it, &oval);
336
337 if (err != 0) {
338 timerfd_itimer_unlock(state, it);
339 return (err);
340 }
341 }
342
343 /*
344 * Before we set the time, we're going to clear tfd_fired.
345 * This can potentially race with the (old) timer firing, but
346 * the window is deceptively difficult to close: if we were
347 * to simply clear tfd_fired after the call to the backend
348 * returned, we would run the risk of plowing a firing of the
349 * new timer. Ultimately, the race can only be resolved by
350 * the backend, which would likely need to be extended with a
351 * function to call back into when the timer is between states
352 * (that is, after the timer can no longer fire with the old
353 * timer value, but before it can fire with the new one).
354 * This is straightforward enough for backends that set a
355 * timer's value by deleting the old one and adding the new
356 * one, but for those that modify the timer value in place
357 * (e.g., cyclics), the required serialization is necessarily
358 * delicate: the function would have to be callable from
359 * arbitrary interrupt context. While implementing all of
360 * this is possible, it does not (for the moment) seem worth
361 * it: if the timer is firing at essentially the same moment
362 * that it's being reprogrammed, there is a higher-level race
363 * with respect to timerfd usage that the progam itself will
364 * have to properly resolve -- and it seems reasonable to
365 * simply allow the program to resolve it in this case.
366 */
367 mutex_enter(&state->tfd_lock);
368 state->tfd_fired = 0;
369 mutex_exit(&state->tfd_lock);
370
371 err = it->it_backend->clk_timer_settime(it,
372 st.tfd_settime_flags & TFD_TIMER_ABSTIME ?
373 TIMER_ABSTIME : TIMER_RELTIME, &when);
374 timerfd_itimer_unlock(state, it);
375
376 if (err != 0 || st.tfd_settime_ovalue == NULL)
377 return (err);
378
379 if ((err = timerfd_copyout(&oval, st.tfd_settime_ovalue)) != 0)
380 return (err);
381
382 return (0);
383 }
384
385 default:
386 break;
387 }
388
389 return (ENOTTY);
390 }
391
392 /*ARGSUSED*/
393 static int
timerfd_close(dev_t dev,int flag,int otyp,cred_t * cred_p)394 timerfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
395 {
396 timerfd_state_t *state, **sp;
397 itimer_t *it;
398 minor_t minor = getminor(dev);
399
400 state = ddi_get_soft_state(timerfd_softstate, minor);
401
402 if (state->tfd_pollhd.ph_list != NULL) {
403 pollwakeup(&state->tfd_pollhd, POLLERR);
404 pollhead_clean(&state->tfd_pollhd);
405 }
406
407 /*
408 * No one can get to this timer; we don't need to lock it -- we can
409 * just call on the backend to delete it.
410 */
411 it = &state->tfd_itimer;
412
413 if (it->it_backend != NULL)
414 it->it_backend->clk_timer_delete(it);
415
416 mutex_enter(&timerfd_lock);
417
418 /*
419 * Remove our state from our global list.
420 */
421 for (sp = &timerfd_state; *sp != state; sp = &((*sp)->tfd_next))
422 VERIFY(*sp != NULL);
423
424 *sp = (*sp)->tfd_next;
425
426 ddi_soft_state_free(timerfd_softstate, minor);
427 vmem_free(timerfd_minor, (void *)(uintptr_t)minor, 1);
428
429 mutex_exit(&timerfd_lock);
430
431 return (0);
432 }
433
434 static int
timerfd_attach(dev_info_t * devi,ddi_attach_cmd_t cmd)435 timerfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
436 {
437 switch (cmd) {
438 case DDI_ATTACH:
439 break;
440
441 case DDI_RESUME:
442 return (DDI_SUCCESS);
443
444 default:
445 return (DDI_FAILURE);
446 }
447
448 mutex_enter(&timerfd_lock);
449
450 if (ddi_soft_state_init(&timerfd_softstate,
451 sizeof (timerfd_state_t), 0) != 0) {
452 cmn_err(CE_NOTE, "/dev/timerfd failed to create soft state");
453 mutex_exit(&timerfd_lock);
454 return (DDI_FAILURE);
455 }
456
457 if (ddi_create_minor_node(devi, "timerfd", S_IFCHR,
458 TIMERFDMNRN_TIMERFD, DDI_PSEUDO, NULL) == DDI_FAILURE) {
459 cmn_err(CE_NOTE, "/dev/timerfd couldn't create minor node");
460 ddi_soft_state_fini(&timerfd_softstate);
461 mutex_exit(&timerfd_lock);
462 return (DDI_FAILURE);
463 }
464
465 ddi_report_dev(devi);
466 timerfd_devi = devi;
467
468 timerfd_minor = vmem_create("timerfd_minor", (void *)TIMERFDMNRN_CLONE,
469 UINT32_MAX - TIMERFDMNRN_CLONE, 1, NULL, NULL, NULL, 0,
470 VM_SLEEP | VMC_IDENTIFIER);
471
472 mutex_exit(&timerfd_lock);
473
474 return (DDI_SUCCESS);
475 }
476
477 /*ARGSUSED*/
478 static int
timerfd_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)479 timerfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
480 {
481 switch (cmd) {
482 case DDI_DETACH:
483 break;
484
485 case DDI_SUSPEND:
486 return (DDI_SUCCESS);
487
488 default:
489 return (DDI_FAILURE);
490 }
491
492 mutex_enter(&timerfd_lock);
493 vmem_destroy(timerfd_minor);
494
495 ddi_remove_minor_node(timerfd_devi, NULL);
496 timerfd_devi = NULL;
497
498 ddi_soft_state_fini(&timerfd_softstate);
499 mutex_exit(&timerfd_lock);
500
501 return (DDI_SUCCESS);
502 }
503
504 /*ARGSUSED*/
505 static int
timerfd_info(dev_info_t * dip,ddi_info_cmd_t infocmd,void * arg,void ** result)506 timerfd_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
507 {
508 int error;
509
510 switch (infocmd) {
511 case DDI_INFO_DEVT2DEVINFO:
512 *result = (void *)timerfd_devi;
513 error = DDI_SUCCESS;
514 break;
515 case DDI_INFO_DEVT2INSTANCE:
516 *result = (void *)0;
517 error = DDI_SUCCESS;
518 break;
519 default:
520 error = DDI_FAILURE;
521 }
522 return (error);
523 }
524
525 static struct cb_ops timerfd_cb_ops = {
526 timerfd_open, /* open */
527 timerfd_close, /* close */
528 nulldev, /* strategy */
529 nulldev, /* print */
530 nodev, /* dump */
531 timerfd_read, /* read */
532 nodev, /* write */
533 timerfd_ioctl, /* ioctl */
534 nodev, /* devmap */
535 nodev, /* mmap */
536 nodev, /* segmap */
537 timerfd_poll, /* poll */
538 ddi_prop_op, /* cb_prop_op */
539 0, /* streamtab */
540 D_NEW | D_MP /* Driver compatibility flag */
541 };
542
543 static struct dev_ops timerfd_ops = {
544 DEVO_REV, /* devo_rev */
545 0, /* refcnt */
546 timerfd_info, /* get_dev_info */
547 nulldev, /* identify */
548 nulldev, /* probe */
549 timerfd_attach, /* attach */
550 timerfd_detach, /* detach */
551 nodev, /* reset */
552 &timerfd_cb_ops, /* driver operations */
553 NULL, /* bus operations */
554 nodev, /* dev power */
555 ddi_quiesce_not_needed, /* quiesce */
556 };
557
558 static struct modldrv modldrv = {
559 &mod_driverops, /* module type (this is a pseudo driver) */
560 "timerfd support", /* name of module */
561 &timerfd_ops, /* driver ops */
562 };
563
564 static struct modlinkage modlinkage = {
565 MODREV_1,
566 (void *)&modldrv,
567 NULL
568 };
569
570 int
_init(void)571 _init(void)
572 {
573 return (mod_install(&modlinkage));
574 }
575
576 int
_info(struct modinfo * modinfop)577 _info(struct modinfo *modinfop)
578 {
579 return (mod_info(&modlinkage, modinfop));
580 }
581
582 int
_fini(void)583 _fini(void)
584 {
585 return (mod_remove(&modlinkage));
586 }
587