1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright (c) 2015 Joyent, Inc. All rights reserved. 14 */ 15 16 /* 17 * Support for the timerfd facility, a Linux-borne facility that allows 18 * POSIX.1b timers to be created and manipulated via a file descriptor 19 * interface. 20 */ 21 22 #include <sys/ddi.h> 23 #include <sys/sunddi.h> 24 #include <sys/timerfd.h> 25 #include <sys/conf.h> 26 #include <sys/vmem.h> 27 #include <sys/sysmacros.h> 28 #include <sys/filio.h> 29 #include <sys/stat.h> 30 #include <sys/file.h> 31 #include <sys/timer.h> 32 33 struct timerfd_state; 34 typedef struct timerfd_state timerfd_state_t; 35 36 struct timerfd_state { 37 kmutex_t tfd_lock; /* lock protecting state */ 38 kcondvar_t tfd_cv; /* condvar */ 39 pollhead_t tfd_pollhd; /* poll head */ 40 uint64_t tfd_fired; /* # of times fired */ 41 itimer_t tfd_itimer; /* underlying itimer */ 42 timerfd_state_t *tfd_next; /* next state on global list */ 43 }; 44 45 /* 46 * Internal global variables. 47 */ 48 static kmutex_t timerfd_lock; /* lock protecting state */ 49 static dev_info_t *timerfd_devi; /* device info */ 50 static vmem_t *timerfd_minor; /* minor number arena */ 51 static void *timerfd_softstate; /* softstate pointer */ 52 static timerfd_state_t *timerfd_state; /* global list of state */ 53 54 static itimer_t * 55 timerfd_itimer_lock(timerfd_state_t *state) 56 { 57 itimer_t *it = &state->tfd_itimer; 58 59 mutex_enter(&state->tfd_lock); 60 61 while (it->it_lock & ITLK_LOCKED) { 62 it->it_blockers++; 63 cv_wait(&it->it_cv, &state->tfd_lock); 64 it->it_blockers--; 65 } 66 67 it->it_lock |= ITLK_LOCKED; 68 69 mutex_exit(&state->tfd_lock); 70 71 return (it); 72 } 73 74 static void 75 timerfd_itimer_unlock(timerfd_state_t *state, itimer_t *it) 76 { 77 VERIFY(it == &state->tfd_itimer); 78 VERIFY(it->it_lock & ITLK_LOCKED); 79 80 mutex_enter(&state->tfd_lock); 81 82 it->it_lock &= ~ITLK_LOCKED; 83 84 if (it->it_blockers) 85 cv_signal(&it->it_cv); 86 87 mutex_exit(&state->tfd_lock); 88 } 89 90 static void 91 timerfd_fire(itimer_t *it) 92 { 93 timerfd_state_t *state = it->it_frontend; 94 uint64_t oval; 95 96 mutex_enter(&state->tfd_lock); 97 oval = state->tfd_fired++; 98 mutex_exit(&state->tfd_lock); 99 100 if (oval == 0) { 101 cv_broadcast(&state->tfd_cv); 102 pollwakeup(&state->tfd_pollhd, POLLRDNORM | POLLIN); 103 } 104 } 105 106 /*ARGSUSED*/ 107 static int 108 timerfd_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) 109 { 110 timerfd_state_t *state; 111 major_t major = getemajor(*devp); 112 minor_t minor = getminor(*devp); 113 114 if (minor != TIMERFDMNRN_TIMERFD) 115 return (ENXIO); 116 117 mutex_enter(&timerfd_lock); 118 119 minor = (minor_t)(uintptr_t)vmem_alloc(timerfd_minor, 1, 120 VM_BESTFIT | VM_SLEEP); 121 122 if (ddi_soft_state_zalloc(timerfd_softstate, minor) != DDI_SUCCESS) { 123 vmem_free(timerfd_minor, (void *)(uintptr_t)minor, 1); 124 mutex_exit(&timerfd_lock); 125 return (NULL); 126 } 127 128 state = ddi_get_soft_state(timerfd_softstate, minor); 129 *devp = makedevice(major, minor); 130 131 state->tfd_next = timerfd_state; 132 timerfd_state = state; 133 134 mutex_exit(&timerfd_lock); 135 136 return (0); 137 } 138 139 /*ARGSUSED*/ 140 static int 141 timerfd_read(dev_t dev, uio_t *uio, cred_t *cr) 142 { 143 timerfd_state_t *state; 144 minor_t minor = getminor(dev); 145 uint64_t val; 146 int err; 147 148 if (uio->uio_resid < sizeof (val)) 149 return (EINVAL); 150 151 state = ddi_get_soft_state(timerfd_softstate, minor); 152 153 mutex_enter(&state->tfd_lock); 154 155 while (state->tfd_fired == 0) { 156 if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) { 157 mutex_exit(&state->tfd_lock); 158 return (EAGAIN); 159 } 160 161 if (!cv_wait_sig_swap(&state->tfd_cv, &state->tfd_lock)) { 162 mutex_exit(&state->tfd_lock); 163 return (EINTR); 164 } 165 } 166 167 /* 168 * Our tfd_fired is non-zero; slurp its value and then clear it. 169 */ 170 val = state->tfd_fired; 171 state->tfd_fired = 0; 172 mutex_exit(&state->tfd_lock); 173 174 err = uiomove(&val, sizeof (val), UIO_READ, uio); 175 176 return (err); 177 } 178 179 /*ARGSUSED*/ 180 static int 181 timerfd_poll(dev_t dev, short events, int anyyet, short *reventsp, 182 struct pollhead **phpp) 183 { 184 timerfd_state_t *state; 185 minor_t minor = getminor(dev); 186 short revents = 0; 187 188 state = ddi_get_soft_state(timerfd_softstate, minor); 189 190 mutex_enter(&state->tfd_lock); 191 192 if (state->tfd_fired > 0) 193 revents |= POLLRDNORM | POLLIN; 194 195 if (!(*reventsp = revents & events) && !anyyet) 196 *phpp = &state->tfd_pollhd; 197 198 mutex_exit(&state->tfd_lock); 199 200 return (0); 201 } 202 203 static int 204 timerfd_copyin(uintptr_t addr, itimerspec_t *dest) 205 { 206 if (get_udatamodel() == DATAMODEL_NATIVE) { 207 if (copyin((void *)addr, dest, sizeof (itimerspec_t)) != 0) 208 return (EFAULT); 209 } else { 210 itimerspec32_t dest32; 211 212 if (copyin((void *)addr, &dest32, sizeof (itimerspec32_t)) != 0) 213 return (EFAULT); 214 215 ITIMERSPEC32_TO_ITIMERSPEC(dest, &dest32); 216 } 217 218 if (itimerspecfix(&dest->it_value) || 219 (itimerspecfix(&dest->it_interval) && 220 timerspecisset(&dest->it_value))) { 221 return (EINVAL); 222 } 223 224 return (0); 225 } 226 227 static int 228 timerfd_copyout(itimerspec_t *src, uintptr_t addr) 229 { 230 if (get_udatamodel() == DATAMODEL_NATIVE) { 231 if (copyout(src, (void *)addr, sizeof (itimerspec_t)) != 0) 232 return (EFAULT); 233 } else { 234 itimerspec32_t src32; 235 236 if (ITIMERSPEC_OVERFLOW(src)) 237 return (EOVERFLOW); 238 239 ITIMERSPEC_TO_ITIMERSPEC32(&src32, src); 240 241 if (copyout(&src32, (void *)addr, sizeof (itimerspec32_t)) != 0) 242 return (EFAULT); 243 } 244 245 return (0); 246 } 247 248 /*ARGSUSED*/ 249 static int 250 timerfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) 251 { 252 itimerspec_t when, oval; 253 timerfd_state_t *state; 254 minor_t minor = getminor(dev); 255 int err; 256 itimer_t *it; 257 258 state = ddi_get_soft_state(timerfd_softstate, minor); 259 260 switch (cmd) { 261 case TIMERFDIOC_CREATE: { 262 if (arg == TIMERFD_MONOTONIC) 263 arg = CLOCK_MONOTONIC; 264 265 it = timerfd_itimer_lock(state); 266 267 if (it->it_backend != NULL) { 268 timerfd_itimer_unlock(state, it); 269 return (EEXIST); 270 } 271 272 if ((it->it_backend = clock_get_backend(arg)) == NULL) { 273 timerfd_itimer_unlock(state, it); 274 return (EINVAL); 275 } 276 277 /* 278 * We need to provide a proc structure only for purposes 279 * of locking CLOCK_REALTIME-based timers -- it is safe to 280 * provide p0 here. 281 */ 282 it->it_proc = &p0; 283 284 err = it->it_backend->clk_timer_create(it, timerfd_fire); 285 286 if (err != 0) { 287 it->it_backend = NULL; 288 timerfd_itimer_unlock(state, it); 289 return (err); 290 } 291 292 it->it_frontend = state; 293 timerfd_itimer_unlock(state, it); 294 295 return (0); 296 } 297 298 case TIMERFDIOC_GETTIME: { 299 it = timerfd_itimer_lock(state); 300 301 if (it->it_backend == NULL) { 302 timerfd_itimer_unlock(state, it); 303 return (ENODEV); 304 } 305 306 err = it->it_backend->clk_timer_gettime(it, &when); 307 timerfd_itimer_unlock(state, it); 308 309 if (err != 0) 310 return (err); 311 312 if ((err = timerfd_copyout(&when, arg)) != 0) 313 return (err); 314 315 return (0); 316 } 317 318 case TIMERFDIOC_SETTIME: { 319 timerfd_settime_t st; 320 321 if (copyin((void *)arg, &st, sizeof (st)) != 0) 322 return (EFAULT); 323 324 if ((err = timerfd_copyin(st.tfd_settime_value, &when)) != 0) 325 return (err); 326 327 it = timerfd_itimer_lock(state); 328 329 if (it->it_backend == NULL) { 330 timerfd_itimer_unlock(state, it); 331 return (ENODEV); 332 } 333 334 if (st.tfd_settime_ovalue != NULL) { 335 err = it->it_backend->clk_timer_gettime(it, &oval); 336 337 if (err != 0) { 338 timerfd_itimer_unlock(state, it); 339 return (err); 340 } 341 } 342 343 /* 344 * Before we set the time, we're going to clear tfd_fired. 345 * This can potentially race with the (old) timer firing, but 346 * the window is deceptively difficult to close: if we were 347 * to simply clear tfd_fired after the call to the backend 348 * returned, we would run the risk of plowing a firing of the 349 * new timer. Ultimately, the race can only be resolved by 350 * the backend, which would likely need to be extended with a 351 * function to call back into when the timer is between states 352 * (that is, after the timer can no longer fire with the old 353 * timer value, but before it can fire with the new one). 354 * This is straightforward enough for backends that set a 355 * timer's value by deleting the old one and adding the new 356 * one, but for those that modify the timer value in place 357 * (e.g., cyclics), the required serialization is necessarily 358 * delicate: the function would have to be callable from 359 * arbitrary interrupt context. While implementing all of 360 * this is possible, it does not (for the moment) seem worth 361 * it: if the timer is firing at essentially the same moment 362 * that it's being reprogrammed, there is a higher-level race 363 * with respect to timerfd usage that the progam itself will 364 * have to properly resolve -- and it seems reasonable to 365 * simply allow the program to resolve it in this case. 366 */ 367 mutex_enter(&state->tfd_lock); 368 state->tfd_fired = 0; 369 mutex_exit(&state->tfd_lock); 370 371 err = it->it_backend->clk_timer_settime(it, 372 st.tfd_settime_flags & TFD_TIMER_ABSTIME ? 373 TIMER_ABSTIME : TIMER_RELTIME, &when); 374 timerfd_itimer_unlock(state, it); 375 376 if (err != 0 || st.tfd_settime_ovalue == NULL) 377 return (err); 378 379 if ((err = timerfd_copyout(&oval, st.tfd_settime_ovalue)) != 0) 380 return (err); 381 382 return (0); 383 } 384 385 default: 386 break; 387 } 388 389 return (ENOTTY); 390 } 391 392 /*ARGSUSED*/ 393 static int 394 timerfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p) 395 { 396 timerfd_state_t *state, **sp; 397 itimer_t *it; 398 minor_t minor = getminor(dev); 399 400 state = ddi_get_soft_state(timerfd_softstate, minor); 401 402 if (state->tfd_pollhd.ph_list != NULL) { 403 pollwakeup(&state->tfd_pollhd, POLLERR); 404 pollhead_clean(&state->tfd_pollhd); 405 } 406 407 /* 408 * No one can get to this timer; we don't need to lock it -- we can 409 * just call on the backend to delete it. 410 */ 411 it = &state->tfd_itimer; 412 413 if (it->it_backend != NULL) 414 it->it_backend->clk_timer_delete(it); 415 416 mutex_enter(&timerfd_lock); 417 418 /* 419 * Remove our state from our global list. 420 */ 421 for (sp = &timerfd_state; *sp != state; sp = &((*sp)->tfd_next)) 422 VERIFY(*sp != NULL); 423 424 *sp = (*sp)->tfd_next; 425 426 ddi_soft_state_free(timerfd_softstate, minor); 427 vmem_free(timerfd_minor, (void *)(uintptr_t)minor, 1); 428 429 mutex_exit(&timerfd_lock); 430 431 return (0); 432 } 433 434 static int 435 timerfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 436 { 437 switch (cmd) { 438 case DDI_ATTACH: 439 break; 440 441 case DDI_RESUME: 442 return (DDI_SUCCESS); 443 444 default: 445 return (DDI_FAILURE); 446 } 447 448 mutex_enter(&timerfd_lock); 449 450 if (ddi_soft_state_init(&timerfd_softstate, 451 sizeof (timerfd_state_t), 0) != 0) { 452 cmn_err(CE_NOTE, "/dev/timerfd failed to create soft state"); 453 mutex_exit(&timerfd_lock); 454 return (DDI_FAILURE); 455 } 456 457 if (ddi_create_minor_node(devi, "timerfd", S_IFCHR, 458 TIMERFDMNRN_TIMERFD, DDI_PSEUDO, NULL) == DDI_FAILURE) { 459 cmn_err(CE_NOTE, "/dev/timerfd couldn't create minor node"); 460 ddi_soft_state_fini(&timerfd_softstate); 461 mutex_exit(&timerfd_lock); 462 return (DDI_FAILURE); 463 } 464 465 ddi_report_dev(devi); 466 timerfd_devi = devi; 467 468 timerfd_minor = vmem_create("timerfd_minor", (void *)TIMERFDMNRN_CLONE, 469 UINT32_MAX - TIMERFDMNRN_CLONE, 1, NULL, NULL, NULL, 0, 470 VM_SLEEP | VMC_IDENTIFIER); 471 472 mutex_exit(&timerfd_lock); 473 474 return (DDI_SUCCESS); 475 } 476 477 /*ARGSUSED*/ 478 static int 479 timerfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 480 { 481 switch (cmd) { 482 case DDI_DETACH: 483 break; 484 485 case DDI_SUSPEND: 486 return (DDI_SUCCESS); 487 488 default: 489 return (DDI_FAILURE); 490 } 491 492 mutex_enter(&timerfd_lock); 493 vmem_destroy(timerfd_minor); 494 495 ddi_remove_minor_node(timerfd_devi, NULL); 496 timerfd_devi = NULL; 497 498 ddi_soft_state_fini(&timerfd_softstate); 499 mutex_exit(&timerfd_lock); 500 501 return (DDI_SUCCESS); 502 } 503 504 /*ARGSUSED*/ 505 static int 506 timerfd_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 507 { 508 int error; 509 510 switch (infocmd) { 511 case DDI_INFO_DEVT2DEVINFO: 512 *result = (void *)timerfd_devi; 513 error = DDI_SUCCESS; 514 break; 515 case DDI_INFO_DEVT2INSTANCE: 516 *result = (void *)0; 517 error = DDI_SUCCESS; 518 break; 519 default: 520 error = DDI_FAILURE; 521 } 522 return (error); 523 } 524 525 static struct cb_ops timerfd_cb_ops = { 526 timerfd_open, /* open */ 527 timerfd_close, /* close */ 528 nulldev, /* strategy */ 529 nulldev, /* print */ 530 nodev, /* dump */ 531 timerfd_read, /* read */ 532 nodev, /* write */ 533 timerfd_ioctl, /* ioctl */ 534 nodev, /* devmap */ 535 nodev, /* mmap */ 536 nodev, /* segmap */ 537 timerfd_poll, /* poll */ 538 ddi_prop_op, /* cb_prop_op */ 539 0, /* streamtab */ 540 D_NEW | D_MP /* Driver compatibility flag */ 541 }; 542 543 static struct dev_ops timerfd_ops = { 544 DEVO_REV, /* devo_rev */ 545 0, /* refcnt */ 546 timerfd_info, /* get_dev_info */ 547 nulldev, /* identify */ 548 nulldev, /* probe */ 549 timerfd_attach, /* attach */ 550 timerfd_detach, /* detach */ 551 nodev, /* reset */ 552 &timerfd_cb_ops, /* driver operations */ 553 NULL, /* bus operations */ 554 nodev, /* dev power */ 555 ddi_quiesce_not_needed, /* quiesce */ 556 }; 557 558 static struct modldrv modldrv = { 559 &mod_driverops, /* module type (this is a pseudo driver) */ 560 "timerfd support", /* name of module */ 561 &timerfd_ops, /* driver ops */ 562 }; 563 564 static struct modlinkage modlinkage = { 565 MODREV_1, 566 (void *)&modldrv, 567 NULL 568 }; 569 570 int 571 _init(void) 572 { 573 return (mod_install(&modlinkage)); 574 } 575 576 int 577 _info(struct modinfo *modinfop) 578 { 579 return (mod_info(&modlinkage, modinfop)); 580 } 581 582 int 583 _fini(void) 584 { 585 return (mod_remove(&modlinkage)); 586 } 587