/* * This file and its contents are supplied under the terms of the * Common Development and Distribution License ("CDDL"), version 1.0. * You may only use this file in accordance with the terms of version * 1.0 of the CDDL. * * A full copy of the text of the CDDL should have accompanied this * source. A copy of the CDDL is also available via the Internet at * http://www.illumos.org/license/CDDL. */ /* * Copyright (c) 2015 Joyent, Inc. All rights reserved. */ /* * Support for the timerfd facility, a Linux-borne facility that allows * POSIX.1b timers to be created and manipulated via a file descriptor * interface. */ #include <sys/ddi.h> #include <sys/sunddi.h> #include <sys/timerfd.h> #include <sys/conf.h> #include <sys/vmem.h> #include <sys/sysmacros.h> #include <sys/filio.h> #include <sys/stat.h> #include <sys/file.h> #include <sys/timer.h> struct timerfd_state; typedef struct timerfd_state timerfd_state_t; struct timerfd_state { kmutex_t tfd_lock; /* lock protecting state */ kcondvar_t tfd_cv; /* condvar */ pollhead_t tfd_pollhd; /* poll head */ uint64_t tfd_fired; /* # of times fired */ itimer_t tfd_itimer; /* underlying itimer */ timerfd_state_t *tfd_next; /* next state on global list */ }; /* * Internal global variables. */ static kmutex_t timerfd_lock; /* lock protecting state */ static dev_info_t *timerfd_devi; /* device info */ static vmem_t *timerfd_minor; /* minor number arena */ static void *timerfd_softstate; /* softstate pointer */ static timerfd_state_t *timerfd_state; /* global list of state */ static itimer_t * timerfd_itimer_lock(timerfd_state_t *state) { itimer_t *it = &state->tfd_itimer; mutex_enter(&state->tfd_lock); while (it->it_lock & ITLK_LOCKED) { it->it_blockers++; cv_wait(&it->it_cv, &state->tfd_lock); it->it_blockers--; } it->it_lock |= ITLK_LOCKED; mutex_exit(&state->tfd_lock); return (it); } static void timerfd_itimer_unlock(timerfd_state_t *state, itimer_t *it) { VERIFY(it == &state->tfd_itimer); VERIFY(it->it_lock & ITLK_LOCKED); mutex_enter(&state->tfd_lock); it->it_lock &= ~ITLK_LOCKED; if (it->it_blockers) cv_signal(&it->it_cv); mutex_exit(&state->tfd_lock); } static void timerfd_fire(itimer_t *it) { timerfd_state_t *state = it->it_frontend; uint64_t oval; mutex_enter(&state->tfd_lock); oval = state->tfd_fired++; mutex_exit(&state->tfd_lock); if (oval == 0) { cv_broadcast(&state->tfd_cv); pollwakeup(&state->tfd_pollhd, POLLRDNORM | POLLIN); } } /*ARGSUSED*/ static int timerfd_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) { timerfd_state_t *state; major_t major = getemajor(*devp); minor_t minor = getminor(*devp); if (minor != TIMERFDMNRN_TIMERFD) return (ENXIO); mutex_enter(&timerfd_lock); minor = (minor_t)(uintptr_t)vmem_alloc(timerfd_minor, 1, VM_BESTFIT | VM_SLEEP); if (ddi_soft_state_zalloc(timerfd_softstate, minor) != DDI_SUCCESS) { vmem_free(timerfd_minor, (void *)(uintptr_t)minor, 1); mutex_exit(&timerfd_lock); return (ENXIO); } state = ddi_get_soft_state(timerfd_softstate, minor); *devp = makedevice(major, minor); state->tfd_next = timerfd_state; timerfd_state = state; mutex_exit(&timerfd_lock); return (0); } /*ARGSUSED*/ static int timerfd_read(dev_t dev, uio_t *uio, cred_t *cr) { timerfd_state_t *state; minor_t minor = getminor(dev); uint64_t val; int err; if (uio->uio_resid < sizeof (val)) return (EINVAL); state = ddi_get_soft_state(timerfd_softstate, minor); mutex_enter(&state->tfd_lock); while (state->tfd_fired == 0) { if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) { mutex_exit(&state->tfd_lock); return (EAGAIN); } if (!cv_wait_sig_swap(&state->tfd_cv, &state->tfd_lock)) { mutex_exit(&state->tfd_lock); return (EINTR); } } /* * Our tfd_fired is non-zero; slurp its value and then clear it. */ val = state->tfd_fired; state->tfd_fired = 0; mutex_exit(&state->tfd_lock); err = uiomove(&val, sizeof (val), UIO_READ, uio); return (err); } /*ARGSUSED*/ static int timerfd_poll(dev_t dev, short events, int anyyet, short *reventsp, struct pollhead **phpp) { timerfd_state_t *state; minor_t minor = getminor(dev); short revents = 0; state = ddi_get_soft_state(timerfd_softstate, minor); mutex_enter(&state->tfd_lock); if (state->tfd_fired > 0) revents |= POLLRDNORM | POLLIN; if (!(*reventsp = revents & events) && !anyyet) *phpp = &state->tfd_pollhd; mutex_exit(&state->tfd_lock); return (0); } static int timerfd_copyin(uintptr_t addr, itimerspec_t *dest) { if (get_udatamodel() == DATAMODEL_NATIVE) { if (copyin((void *)addr, dest, sizeof (itimerspec_t)) != 0) return (EFAULT); } else { itimerspec32_t dest32; if (copyin((void *)addr, &dest32, sizeof (itimerspec32_t)) != 0) return (EFAULT); ITIMERSPEC32_TO_ITIMERSPEC(dest, &dest32); } if (itimerspecfix(&dest->it_value) || (itimerspecfix(&dest->it_interval) && timerspecisset(&dest->it_value))) { return (EINVAL); } return (0); } static int timerfd_copyout(itimerspec_t *src, uintptr_t addr) { if (get_udatamodel() == DATAMODEL_NATIVE) { if (copyout(src, (void *)addr, sizeof (itimerspec_t)) != 0) return (EFAULT); } else { itimerspec32_t src32; if (ITIMERSPEC_OVERFLOW(src)) return (EOVERFLOW); ITIMERSPEC_TO_ITIMERSPEC32(&src32, src); if (copyout(&src32, (void *)addr, sizeof (itimerspec32_t)) != 0) return (EFAULT); } return (0); } /*ARGSUSED*/ static int timerfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) { itimerspec_t when, oval; timerfd_state_t *state; minor_t minor = getminor(dev); int err; itimer_t *it; state = ddi_get_soft_state(timerfd_softstate, minor); switch (cmd) { case TIMERFDIOC_CREATE: { if (arg == TIMERFD_MONOTONIC) arg = CLOCK_MONOTONIC; it = timerfd_itimer_lock(state); if (it->it_backend != NULL) { timerfd_itimer_unlock(state, it); return (EEXIST); } if ((it->it_backend = clock_get_backend(arg)) == NULL) { timerfd_itimer_unlock(state, it); return (EINVAL); } /* * We need to provide a proc structure only for purposes * of locking CLOCK_REALTIME-based timers -- it is safe to * provide p0 here. */ it->it_proc = &p0; err = it->it_backend->clk_timer_create(it, timerfd_fire); if (err != 0) { it->it_backend = NULL; timerfd_itimer_unlock(state, it); return (err); } it->it_frontend = state; timerfd_itimer_unlock(state, it); return (0); } case TIMERFDIOC_GETTIME: { it = timerfd_itimer_lock(state); if (it->it_backend == NULL) { timerfd_itimer_unlock(state, it); return (ENODEV); } err = it->it_backend->clk_timer_gettime(it, &when); timerfd_itimer_unlock(state, it); if (err != 0) return (err); if ((err = timerfd_copyout(&when, arg)) != 0) return (err); return (0); } case TIMERFDIOC_SETTIME: { timerfd_settime_t st; if (copyin((void *)arg, &st, sizeof (st)) != 0) return (EFAULT); if ((err = timerfd_copyin(st.tfd_settime_value, &when)) != 0) return (err); it = timerfd_itimer_lock(state); if (it->it_backend == NULL) { timerfd_itimer_unlock(state, it); return (ENODEV); } if (st.tfd_settime_ovalue != 0) { err = it->it_backend->clk_timer_gettime(it, &oval); if (err != 0) { timerfd_itimer_unlock(state, it); return (err); } } /* * Before we set the time, we're going to clear tfd_fired. * This can potentially race with the (old) timer firing, but * the window is deceptively difficult to close: if we were * to simply clear tfd_fired after the call to the backend * returned, we would run the risk of plowing a firing of the * new timer. Ultimately, the race can only be resolved by * the backend, which would likely need to be extended with a * function to call back into when the timer is between states * (that is, after the timer can no longer fire with the old * timer value, but before it can fire with the new one). * This is straightforward enough for backends that set a * timer's value by deleting the old one and adding the new * one, but for those that modify the timer value in place * (e.g., cyclics), the required serialization is necessarily * delicate: the function would have to be callable from * arbitrary interrupt context. While implementing all of * this is possible, it does not (for the moment) seem worth * it: if the timer is firing at essentially the same moment * that it's being reprogrammed, there is a higher-level race * with respect to timerfd usage that the progam itself will * have to properly resolve -- and it seems reasonable to * simply allow the program to resolve it in this case. */ mutex_enter(&state->tfd_lock); state->tfd_fired = 0; mutex_exit(&state->tfd_lock); err = it->it_backend->clk_timer_settime(it, st.tfd_settime_flags & TFD_TIMER_ABSTIME ? TIMER_ABSTIME : TIMER_RELTIME, &when); timerfd_itimer_unlock(state, it); if (err != 0 || st.tfd_settime_ovalue == 0) return (err); if ((err = timerfd_copyout(&oval, st.tfd_settime_ovalue)) != 0) return (err); return (0); } default: break; } return (ENOTTY); } /*ARGSUSED*/ static int timerfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p) { timerfd_state_t *state, **sp; itimer_t *it; minor_t minor = getminor(dev); state = ddi_get_soft_state(timerfd_softstate, minor); if (state->tfd_pollhd.ph_list != NULL) { pollwakeup(&state->tfd_pollhd, POLLERR); pollhead_clean(&state->tfd_pollhd); } /* * No one can get to this timer; we don't need to lock it -- we can * just call on the backend to delete it. */ it = &state->tfd_itimer; if (it->it_backend != NULL) it->it_backend->clk_timer_delete(it); mutex_enter(&timerfd_lock); /* * Remove our state from our global list. */ for (sp = &timerfd_state; *sp != state; sp = &((*sp)->tfd_next)) VERIFY(*sp != NULL); *sp = (*sp)->tfd_next; ddi_soft_state_free(timerfd_softstate, minor); vmem_free(timerfd_minor, (void *)(uintptr_t)minor, 1); mutex_exit(&timerfd_lock); return (0); } static int timerfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) { switch (cmd) { case DDI_ATTACH: break; case DDI_RESUME: return (DDI_SUCCESS); default: return (DDI_FAILURE); } mutex_enter(&timerfd_lock); if (ddi_soft_state_init(&timerfd_softstate, sizeof (timerfd_state_t), 0) != 0) { cmn_err(CE_NOTE, "/dev/timerfd failed to create soft state"); mutex_exit(&timerfd_lock); return (DDI_FAILURE); } if (ddi_create_minor_node(devi, "timerfd", S_IFCHR, TIMERFDMNRN_TIMERFD, DDI_PSEUDO, 0) == DDI_FAILURE) { cmn_err(CE_NOTE, "/dev/timerfd couldn't create minor node"); ddi_soft_state_fini(&timerfd_softstate); mutex_exit(&timerfd_lock); return (DDI_FAILURE); } ddi_report_dev(devi); timerfd_devi = devi; timerfd_minor = vmem_create("timerfd_minor", (void *)TIMERFDMNRN_CLONE, UINT32_MAX - TIMERFDMNRN_CLONE, 1, NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER); mutex_exit(&timerfd_lock); return (DDI_SUCCESS); } /*ARGSUSED*/ static int timerfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) { switch (cmd) { case DDI_DETACH: break; case DDI_SUSPEND: return (DDI_SUCCESS); default: return (DDI_FAILURE); } mutex_enter(&timerfd_lock); vmem_destroy(timerfd_minor); ddi_remove_minor_node(timerfd_devi, NULL); timerfd_devi = NULL; ddi_soft_state_fini(&timerfd_softstate); mutex_exit(&timerfd_lock); return (DDI_SUCCESS); } /*ARGSUSED*/ static int timerfd_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) { int error; switch (infocmd) { case DDI_INFO_DEVT2DEVINFO: *result = (void *)timerfd_devi; error = DDI_SUCCESS; break; case DDI_INFO_DEVT2INSTANCE: *result = (void *)0; error = DDI_SUCCESS; break; default: error = DDI_FAILURE; } return (error); } static struct cb_ops timerfd_cb_ops = { timerfd_open, /* open */ timerfd_close, /* close */ nulldev, /* strategy */ nulldev, /* print */ nodev, /* dump */ timerfd_read, /* read */ nodev, /* write */ timerfd_ioctl, /* ioctl */ nodev, /* devmap */ nodev, /* mmap */ nodev, /* segmap */ timerfd_poll, /* poll */ ddi_prop_op, /* cb_prop_op */ 0, /* streamtab */ D_NEW | D_MP /* Driver compatibility flag */ }; static struct dev_ops timerfd_ops = { DEVO_REV, /* devo_rev */ 0, /* refcnt */ timerfd_info, /* get_dev_info */ nulldev, /* identify */ nulldev, /* probe */ timerfd_attach, /* attach */ timerfd_detach, /* detach */ nodev, /* reset */ &timerfd_cb_ops, /* driver operations */ NULL, /* bus operations */ nodev, /* dev power */ ddi_quiesce_not_needed, /* quiesce */ }; static struct modldrv modldrv = { &mod_driverops, /* module type (this is a pseudo driver) */ "timerfd support", /* name of module */ &timerfd_ops, /* driver ops */ }; static struct modlinkage modlinkage = { MODREV_1, (void *)&modldrv, NULL }; int _init(void) { return (mod_install(&modlinkage)); } int _info(struct modinfo *modinfop) { return (mod_info(&modlinkage, modinfop)); } int _fini(void) { return (mod_remove(&modlinkage)); }