1 /* 2 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 3 * Use is subject to license terms. 4 */ 5 6 #pragma ident "%Z%%M% %I% %E% SMI" 7 8 /* 9 * Copyright (c) 1982, 1986 Regents of the University of California. 10 * All rights reserved. The Berkeley software License Agreement 11 * specifies the terms and conditions for redistribution. 12 */ 13 14 #include <sys/param.h> 15 #include <sys/user.h> 16 #include <sys/vnode.h> 17 #include <sys/proc.h> 18 #include <sys/time.h> 19 #include <sys/systm.h> 20 #include <sys/kmem.h> 21 #include <sys/cmn_err.h> 22 #include <sys/cpuvar.h> 23 #include <sys/timer.h> 24 #include <sys/debug.h> 25 #include <sys/sysmacros.h> 26 #include <sys/cyclic.h> 27 28 static void realitexpire(void *); 29 static void realprofexpire(void *); 30 static void timeval_advance(struct timeval *, struct timeval *); 31 32 kmutex_t tod_lock; /* protects time-of-day stuff */ 33 34 /* 35 * Constant to define the minimum interval value of the ITIMER_REALPROF timer. 36 * Value is in microseconds; defaults to 500 usecs. Setting this value 37 * significantly lower may allow for denial-of-service attacks. 38 */ 39 int itimer_realprof_minimum = 500; 40 41 /* 42 * macro to compare a timeval to a timestruc 43 */ 44 45 #define TVTSCMP(tvp, tsp, cmp) \ 46 /* CSTYLED */ \ 47 ((tvp)->tv_sec cmp (tsp)->tv_sec || \ 48 ((tvp)->tv_sec == (tsp)->tv_sec && \ 49 /* CSTYLED */ \ 50 (tvp)->tv_usec * 1000 cmp (tsp)->tv_nsec)) 51 52 /* 53 * Time of day and interval timer support. 54 * 55 * These routines provide the kernel entry points to get and set 56 * the time-of-day and per-process interval timers. Subroutines 57 * here provide support for adding and subtracting timeval structures 58 * and decrementing interval timers, optionally reloading the interval 59 * timers when they expire. 60 */ 61 62 /* 63 * SunOS function to generate monotonically increasing time values. 64 */ 65 void 66 uniqtime(struct timeval *tv) 67 { 68 static struct timeval last; 69 timestruc_t ts; 70 time_t sec; 71 int usec, nsec; 72 73 /* 74 * protect modification of last 75 */ 76 mutex_enter(&tod_lock); 77 gethrestime(&ts); 78 79 /* 80 * Fast algorithm to convert nsec to usec -- see hrt2ts() 81 * in common/os/timers.c for a full description. 82 */ 83 nsec = ts.tv_nsec; 84 usec = nsec + (nsec >> 2); 85 usec = nsec + (usec >> 1); 86 usec = nsec + (usec >> 2); 87 usec = nsec + (usec >> 4); 88 usec = nsec - (usec >> 3); 89 usec = nsec + (usec >> 2); 90 usec = nsec + (usec >> 3); 91 usec = nsec + (usec >> 4); 92 usec = nsec + (usec >> 1); 93 usec = nsec + (usec >> 6); 94 usec = usec >> 10; 95 sec = ts.tv_sec; 96 97 /* 98 * Try to keep timestamps unique, but don't be obsessive about 99 * it in the face of large differences. 100 */ 101 if ((sec <= last.tv_sec) && /* same or lower seconds, and */ 102 ((sec != last.tv_sec) || /* either different second or */ 103 (usec <= last.tv_usec)) && /* lower microsecond, and */ 104 ((last.tv_sec - sec) <= 5)) { /* not way back in time */ 105 sec = last.tv_sec; 106 usec = last.tv_usec + 1; 107 if (usec >= MICROSEC) { 108 usec -= MICROSEC; 109 sec++; 110 } 111 } 112 last.tv_sec = sec; 113 last.tv_usec = usec; 114 mutex_exit(&tod_lock); 115 116 tv->tv_sec = sec; 117 tv->tv_usec = usec; 118 } 119 120 /* 121 * Timestamps are exported from the kernel in several places. 122 * Such timestamps are commonly used for either uniqueness or for 123 * sequencing - truncation to 32-bits is fine for uniqueness, 124 * but sequencing is going to take more work as we get closer to 2038! 125 */ 126 void 127 uniqtime32(struct timeval32 *tv32p) 128 { 129 struct timeval tv; 130 131 uniqtime(&tv); 132 TIMEVAL_TO_TIMEVAL32(tv32p, &tv); 133 } 134 135 int 136 gettimeofday(struct timeval *tp) 137 { 138 struct timeval atv; 139 140 if (tp) { 141 uniqtime(&atv); 142 if (get_udatamodel() == DATAMODEL_NATIVE) { 143 if (copyout(&atv, tp, sizeof (atv))) 144 return (set_errno(EFAULT)); 145 } else { 146 struct timeval32 tv32; 147 148 if (TIMEVAL_OVERFLOW(&atv)) 149 return (set_errno(EOVERFLOW)); 150 TIMEVAL_TO_TIMEVAL32(&tv32, &atv); 151 152 if (copyout(&tv32, tp, sizeof (tv32))) 153 return (set_errno(EFAULT)); 154 } 155 } 156 return (0); 157 } 158 159 int 160 getitimer(uint_t which, struct itimerval *itv) 161 { 162 int error; 163 164 if (get_udatamodel() == DATAMODEL_NATIVE) 165 error = xgetitimer(which, itv, 0); 166 else { 167 struct itimerval kitv; 168 169 if ((error = xgetitimer(which, &kitv, 1)) == 0) { 170 if (ITIMERVAL_OVERFLOW(&kitv)) { 171 error = EOVERFLOW; 172 } else { 173 struct itimerval32 itv32; 174 175 ITIMERVAL_TO_ITIMERVAL32(&itv32, &kitv); 176 if (copyout(&itv32, itv, sizeof (itv32)) != 0) 177 error = EFAULT; 178 } 179 } 180 } 181 182 return (error ? (set_errno(error)) : 0); 183 } 184 185 int 186 xgetitimer(uint_t which, struct itimerval *itv, int iskaddr) 187 { 188 struct proc *p = curproc; 189 struct timeval now; 190 struct itimerval aitv; 191 hrtime_t ts, first, interval, remain; 192 193 mutex_enter(&p->p_lock); 194 195 switch (which) { 196 case ITIMER_VIRTUAL: 197 case ITIMER_PROF: 198 aitv = ttolwp(curthread)->lwp_timer[which]; 199 break; 200 201 case ITIMER_REAL: 202 uniqtime(&now); 203 aitv = p->p_realitimer; 204 205 if (timerisset(&aitv.it_value)) { 206 /*CSTYLED*/ 207 if (timercmp(&aitv.it_value, &now, <)) { 208 timerclear(&aitv.it_value); 209 } else { 210 timevalsub(&aitv.it_value, &now); 211 } 212 } 213 break; 214 215 case ITIMER_REALPROF: 216 if (curproc->p_rprof_cyclic == CYCLIC_NONE) { 217 bzero(&aitv, sizeof (aitv)); 218 break; 219 } 220 221 aitv = curproc->p_rprof_timer; 222 223 first = tv2hrt(&aitv.it_value); 224 interval = tv2hrt(&aitv.it_interval); 225 226 if ((ts = gethrtime()) < first) { 227 /* 228 * We haven't gone off for the first time; the time 229 * remaining is simply the first time we will go 230 * off minus the current time. 231 */ 232 remain = first - ts; 233 } else { 234 if (interval == 0) { 235 /* 236 * This was set as a one-shot, and we've 237 * already gone off; there is no time 238 * remaining. 239 */ 240 remain = 0; 241 } else { 242 /* 243 * We have a non-zero interval; we need to 244 * determine how far we are into the current 245 * interval, and subtract that from the 246 * interval to determine the time remaining. 247 */ 248 remain = interval - ((ts - first) % interval); 249 } 250 } 251 252 hrt2tv(remain, &aitv.it_value); 253 break; 254 255 default: 256 mutex_exit(&p->p_lock); 257 return (EINVAL); 258 } 259 260 mutex_exit(&p->p_lock); 261 262 if (iskaddr) { 263 bcopy(&aitv, itv, sizeof (*itv)); 264 } else { 265 ASSERT(get_udatamodel() == DATAMODEL_NATIVE); 266 if (copyout(&aitv, itv, sizeof (*itv))) 267 return (EFAULT); 268 } 269 270 return (0); 271 } 272 273 274 int 275 setitimer(uint_t which, struct itimerval *itv, struct itimerval *oitv) 276 { 277 int error; 278 279 if (oitv != NULL) 280 if ((error = getitimer(which, oitv)) != 0) 281 return (error); 282 283 if (itv == NULL) 284 return (0); 285 286 if (get_udatamodel() == DATAMODEL_NATIVE) 287 error = xsetitimer(which, itv, 0); 288 else { 289 struct itimerval32 itv32; 290 struct itimerval kitv; 291 292 if (copyin(itv, &itv32, sizeof (itv32))) 293 error = EFAULT; 294 ITIMERVAL32_TO_ITIMERVAL(&kitv, &itv32); 295 error = xsetitimer(which, &kitv, 1); 296 } 297 298 return (error ? (set_errno(error)) : 0); 299 } 300 301 int 302 xsetitimer(uint_t which, struct itimerval *itv, int iskaddr) 303 { 304 struct itimerval aitv; 305 struct timeval now; 306 struct proc *p = curproc; 307 kthread_t *t; 308 timeout_id_t tmp_id; 309 cyc_handler_t hdlr; 310 cyc_time_t when; 311 cyclic_id_t cyclic; 312 hrtime_t ts; 313 int min; 314 315 if (itv == NULL) 316 return (0); 317 318 if (iskaddr) { 319 bcopy(itv, &aitv, sizeof (aitv)); 320 } else { 321 ASSERT(get_udatamodel() == DATAMODEL_NATIVE); 322 if (copyin(itv, &aitv, sizeof (aitv))) 323 return (EFAULT); 324 } 325 326 if (which == ITIMER_REALPROF) { 327 min = MAX((int)(cyclic_getres() / (NANOSEC / MICROSEC)), 328 itimer_realprof_minimum); 329 } else { 330 min = usec_per_tick; 331 } 332 333 if (itimerfix(&aitv.it_value, min) || 334 (itimerfix(&aitv.it_interval, min) && timerisset(&aitv.it_value))) 335 return (EINVAL); 336 337 mutex_enter(&p->p_lock); 338 switch (which) { 339 case ITIMER_REAL: 340 /* 341 * The SITBUSY flag prevents conflicts with multiple 342 * threads attempting to perform setitimer(ITIMER_REAL) 343 * at the same time, even when we drop p->p_lock below. 344 * Any blocked thread returns successfully because the 345 * effect is the same as if it got here first, finished, 346 * and the other thread then came through and destroyed 347 * what it did. We are just protecting the system from 348 * malfunctioning due to the race condition. 349 */ 350 if (p->p_flag & SITBUSY) { 351 mutex_exit(&p->p_lock); 352 return (0); 353 } 354 p->p_flag |= SITBUSY; 355 while ((tmp_id = p->p_itimerid) != 0) { 356 /* 357 * Avoid deadlock in callout_delete (called from 358 * untimeout) which may go to sleep (while holding 359 * p_lock). Drop p_lock and re-acquire it after 360 * untimeout returns. Need to clear p_itimerid 361 * while holding p_lock. 362 */ 363 p->p_itimerid = 0; 364 mutex_exit(&p->p_lock); 365 (void) untimeout(tmp_id); 366 mutex_enter(&p->p_lock); 367 } 368 if (timerisset(&aitv.it_value)) { 369 uniqtime(&now); 370 timevaladd(&aitv.it_value, &now); 371 p->p_itimerid = realtime_timeout(realitexpire, 372 p, hzto(&aitv.it_value)); 373 } 374 p->p_realitimer = aitv; 375 p->p_flag &= ~SITBUSY; 376 break; 377 378 case ITIMER_REALPROF: 379 cyclic = p->p_rprof_cyclic; 380 p->p_rprof_cyclic = CYCLIC_NONE; 381 382 mutex_exit(&p->p_lock); 383 384 /* 385 * We're now going to acquire cpu_lock, remove the old cyclic 386 * if necessary, and add our new cyclic. 387 */ 388 mutex_enter(&cpu_lock); 389 390 if (cyclic != CYCLIC_NONE) 391 cyclic_remove(cyclic); 392 393 if (!timerisset(&aitv.it_value)) { 394 /* 395 * If we were passed a value of 0, we're done. 396 */ 397 mutex_exit(&cpu_lock); 398 return (0); 399 } 400 401 hdlr.cyh_func = realprofexpire; 402 hdlr.cyh_arg = p; 403 hdlr.cyh_level = CY_LOW_LEVEL; 404 405 when.cyt_when = (ts = gethrtime() + tv2hrt(&aitv.it_value)); 406 when.cyt_interval = tv2hrt(&aitv.it_interval); 407 408 if (when.cyt_interval == 0) { 409 /* 410 * Using the same logic as for CLOCK_HIGHRES timers, we 411 * set the interval to be INT64_MAX - when.cyt_when to 412 * effect a one-shot; see the comment in clock_highres.c 413 * for more details on why this works. 414 */ 415 when.cyt_interval = INT64_MAX - when.cyt_when; 416 } 417 418 cyclic = cyclic_add(&hdlr, &when); 419 420 mutex_exit(&cpu_lock); 421 422 /* 423 * We have now successfully added the cyclic. Reacquire 424 * p_lock, and see if anyone has snuck in. 425 */ 426 mutex_enter(&p->p_lock); 427 428 if (p->p_rprof_cyclic != CYCLIC_NONE) { 429 /* 430 * We're racing with another thread establishing an 431 * ITIMER_REALPROF interval timer. We'll let the other 432 * thread win (this is a race at the application level, 433 * so letting the other thread win is acceptable). 434 */ 435 mutex_exit(&p->p_lock); 436 mutex_enter(&cpu_lock); 437 cyclic_remove(cyclic); 438 mutex_exit(&cpu_lock); 439 440 return (0); 441 } 442 443 /* 444 * Success. Set our tracking variables in the proc structure, 445 * cancel any outstanding ITIMER_PROF, and allocate the 446 * per-thread SIGPROF buffers, if possible. 447 */ 448 hrt2tv(ts, &aitv.it_value); 449 p->p_rprof_timer = aitv; 450 p->p_rprof_cyclic = cyclic; 451 452 t = p->p_tlist; 453 do { 454 struct itimerval *itvp; 455 456 itvp = &ttolwp(t)->lwp_timer[ITIMER_PROF]; 457 timerclear(&itvp->it_interval); 458 timerclear(&itvp->it_value); 459 460 if (t->t_rprof != NULL) 461 continue; 462 463 t->t_rprof = 464 kmem_zalloc(sizeof (struct rprof), KM_NOSLEEP); 465 aston(t); 466 } while ((t = t->t_forw) != p->p_tlist); 467 468 break; 469 470 case ITIMER_VIRTUAL: 471 ttolwp(curthread)->lwp_timer[ITIMER_VIRTUAL] = aitv; 472 break; 473 474 case ITIMER_PROF: 475 if (p->p_rprof_cyclic != CYCLIC_NONE) { 476 /* 477 * Silently ignore ITIMER_PROF if ITIMER_REALPROF 478 * is in effect. 479 */ 480 break; 481 } 482 483 ttolwp(curthread)->lwp_timer[ITIMER_PROF] = aitv; 484 break; 485 486 default: 487 mutex_exit(&p->p_lock); 488 return (EINVAL); 489 } 490 mutex_exit(&p->p_lock); 491 return (0); 492 } 493 494 /* 495 * Real interval timer expired: 496 * send process whose timer expired an alarm signal. 497 * If time is not set up to reload, then just return. 498 * Else compute next time timer should go off which is > current time. 499 * This is where delay in processing this timeout causes multiple 500 * SIGALRM calls to be compressed into one. 501 */ 502 static void 503 realitexpire(void *arg) 504 { 505 struct proc *p = arg; 506 struct timeval *valp = &p->p_realitimer.it_value; 507 struct timeval *intervalp = &p->p_realitimer.it_interval; 508 #if !defined(_LP64) 509 clock_t ticks; 510 #endif 511 512 mutex_enter(&p->p_lock); 513 #if !defined(_LP64) 514 if ((ticks = hzto(valp)) > 1) { 515 /* 516 * If we are executing before we were meant to, it must be 517 * because of an overflow in a prior hzto() calculation. 518 * In this case, we want to go to sleep for the recalculated 519 * number of ticks. For the special meaning of the value "1" 520 * see comment in timespectohz(). 521 */ 522 p->p_itimerid = realtime_timeout(realitexpire, p, ticks); 523 mutex_exit(&p->p_lock); 524 return; 525 } 526 #endif 527 sigtoproc(p, NULL, SIGALRM); 528 if (!timerisset(intervalp)) { 529 timerclear(valp); 530 p->p_itimerid = 0; 531 } else { 532 /* advance timer value past current time */ 533 timeval_advance(valp, intervalp); 534 p->p_itimerid = realtime_timeout(realitexpire, p, hzto(valp)); 535 } 536 mutex_exit(&p->p_lock); 537 } 538 539 /* 540 * Real time profiling interval timer expired: 541 * Increment microstate counters for each lwp in the process 542 * and ensure that running lwps are kicked into the kernel. 543 * If time is not set up to reload, then just return. 544 * Else compute next time timer should go off which is > current time, 545 * as above. 546 */ 547 static void 548 realprofexpire(void *arg) 549 { 550 struct proc *p = arg; 551 kthread_t *t; 552 553 mutex_enter(&p->p_lock); 554 if ((t = p->p_tlist) == NULL) { 555 mutex_exit(&p->p_lock); 556 return; 557 } 558 do { 559 int mstate; 560 561 /* 562 * Attempt to allocate the SIGPROF buffer, but don't sleep. 563 */ 564 if (t->t_rprof == NULL) 565 t->t_rprof = kmem_zalloc(sizeof (struct rprof), 566 KM_NOSLEEP); 567 if (t->t_rprof == NULL) 568 continue; 569 570 thread_lock(t); 571 switch (t->t_state) { 572 case TS_SLEEP: 573 /* 574 * Don't touch the lwp is it is swapped out. 575 */ 576 if (!(t->t_schedflag & TS_LOAD)) { 577 mstate = LMS_SLEEP; 578 break; 579 } 580 switch (mstate = ttolwp(t)->lwp_mstate.ms_prev) { 581 case LMS_TFAULT: 582 case LMS_DFAULT: 583 case LMS_KFAULT: 584 case LMS_USER_LOCK: 585 break; 586 default: 587 mstate = LMS_SLEEP; 588 break; 589 } 590 break; 591 case TS_RUN: 592 case TS_WAIT: 593 mstate = LMS_WAIT_CPU; 594 break; 595 case TS_ONPROC: 596 switch (mstate = t->t_mstate) { 597 case LMS_USER: 598 case LMS_SYSTEM: 599 case LMS_TRAP: 600 break; 601 default: 602 mstate = LMS_SYSTEM; 603 break; 604 } 605 break; 606 default: 607 mstate = t->t_mstate; 608 break; 609 } 610 t->t_rprof->rp_anystate = 1; 611 t->t_rprof->rp_state[mstate]++; 612 aston(t); 613 /* 614 * force the thread into the kernel 615 * if it is not already there. 616 */ 617 if (t->t_state == TS_ONPROC && t->t_cpu != CPU) 618 poke_cpu(t->t_cpu->cpu_id); 619 thread_unlock(t); 620 } while ((t = t->t_forw) != p->p_tlist); 621 622 mutex_exit(&p->p_lock); 623 } 624 625 /* 626 * Advances timer value past the current time of day. See the detailed 627 * comment for this logic in realitsexpire(), above. 628 */ 629 static void 630 timeval_advance(struct timeval *valp, struct timeval *intervalp) 631 { 632 int cnt2nth; 633 struct timeval interval2nth; 634 635 for (;;) { 636 interval2nth = *intervalp; 637 for (cnt2nth = 0; ; cnt2nth++) { 638 timevaladd(valp, &interval2nth); 639 /*CSTYLED*/ 640 if (TVTSCMP(valp, &hrestime, >)) 641 break; 642 timevaladd(&interval2nth, &interval2nth); 643 } 644 if (cnt2nth == 0) 645 break; 646 timevalsub(valp, &interval2nth); 647 } 648 } 649 650 /* 651 * Check that a proposed value to load into the .it_value or .it_interval 652 * part of an interval timer is acceptable, and set it to at least a 653 * specified minimal value. 654 */ 655 int 656 itimerfix(struct timeval *tv, int minimum) 657 { 658 if (tv->tv_sec < 0 || tv->tv_sec > 100000000 || 659 tv->tv_usec < 0 || tv->tv_usec >= MICROSEC) 660 return (EINVAL); 661 if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < minimum) 662 tv->tv_usec = minimum; 663 return (0); 664 } 665 666 /* 667 * Same as itimerfix, except a) it takes a timespec instead of a timeval and 668 * b) it doesn't truncate based on timeout granularity; consumers of this 669 * interface (e.g. timer_settime()) depend on the passed timespec not being 670 * modified implicitly. 671 */ 672 int 673 itimerspecfix(timespec_t *tv) 674 { 675 if (tv->tv_sec < 0 || tv->tv_nsec < 0 || tv->tv_nsec >= NANOSEC) 676 return (EINVAL); 677 return (0); 678 } 679 680 /* 681 * Decrement an interval timer by a specified number 682 * of microseconds, which must be less than a second, 683 * i.e. < 1000000. If the timer expires, then reload 684 * it. In this case, carry over (usec - old value) to 685 * reducint the value reloaded into the timer so that 686 * the timer does not drift. This routine assumes 687 * that it is called in a context where the timers 688 * on which it is operating cannot change in value. 689 */ 690 int 691 itimerdecr(struct itimerval *itp, int usec) 692 { 693 if (itp->it_value.tv_usec < usec) { 694 if (itp->it_value.tv_sec == 0) { 695 /* expired, and already in next interval */ 696 usec -= itp->it_value.tv_usec; 697 goto expire; 698 } 699 itp->it_value.tv_usec += MICROSEC; 700 itp->it_value.tv_sec--; 701 } 702 itp->it_value.tv_usec -= usec; 703 usec = 0; 704 if (timerisset(&itp->it_value)) 705 return (1); 706 /* expired, exactly at end of interval */ 707 expire: 708 if (timerisset(&itp->it_interval)) { 709 itp->it_value = itp->it_interval; 710 itp->it_value.tv_usec -= usec; 711 if (itp->it_value.tv_usec < 0) { 712 itp->it_value.tv_usec += MICROSEC; 713 itp->it_value.tv_sec--; 714 } 715 } else 716 itp->it_value.tv_usec = 0; /* sec is already 0 */ 717 return (0); 718 } 719 720 /* 721 * Add and subtract routines for timevals. 722 * N.B.: subtract routine doesn't deal with 723 * results which are before the beginning, 724 * it just gets very confused in this case. 725 * Caveat emptor. 726 */ 727 void 728 timevaladd(struct timeval *t1, struct timeval *t2) 729 { 730 t1->tv_sec += t2->tv_sec; 731 t1->tv_usec += t2->tv_usec; 732 timevalfix(t1); 733 } 734 735 void 736 timevalsub(struct timeval *t1, struct timeval *t2) 737 { 738 t1->tv_sec -= t2->tv_sec; 739 t1->tv_usec -= t2->tv_usec; 740 timevalfix(t1); 741 } 742 743 void 744 timevalfix(struct timeval *t1) 745 { 746 if (t1->tv_usec < 0) { 747 t1->tv_sec--; 748 t1->tv_usec += MICROSEC; 749 } 750 if (t1->tv_usec >= MICROSEC) { 751 t1->tv_sec++; 752 t1->tv_usec -= MICROSEC; 753 } 754 } 755 756 /* 757 * Same as the routines above. These routines take a timespec instead 758 * of a timeval. 759 */ 760 void 761 timespecadd(timespec_t *t1, timespec_t *t2) 762 { 763 t1->tv_sec += t2->tv_sec; 764 t1->tv_nsec += t2->tv_nsec; 765 timespecfix(t1); 766 } 767 768 void 769 timespecsub(timespec_t *t1, timespec_t *t2) 770 { 771 t1->tv_sec -= t2->tv_sec; 772 t1->tv_nsec -= t2->tv_nsec; 773 timespecfix(t1); 774 } 775 776 void 777 timespecfix(timespec_t *t1) 778 { 779 if (t1->tv_nsec < 0) { 780 t1->tv_sec--; 781 t1->tv_nsec += NANOSEC; 782 } else { 783 if (t1->tv_nsec >= NANOSEC) { 784 t1->tv_sec++; 785 t1->tv_nsec -= NANOSEC; 786 } 787 } 788 } 789 790 /* 791 * Compute number of hz until specified time. 792 * Used to compute third argument to timeout() from an absolute time. 793 */ 794 clock_t 795 hzto(struct timeval *tv) 796 { 797 timespec_t ts, now; 798 799 ts.tv_sec = tv->tv_sec; 800 ts.tv_nsec = tv->tv_usec * 1000; 801 gethrestime_lasttick(&now); 802 803 return (timespectohz(&ts, now)); 804 } 805 806 /* 807 * Compute number of hz until specified time for a given timespec value. 808 * Used to compute third argument to timeout() from an absolute time. 809 */ 810 clock_t 811 timespectohz(timespec_t *tv, timespec_t now) 812 { 813 clock_t ticks; 814 time_t sec; 815 int nsec; 816 817 /* 818 * Compute number of ticks we will see between now and 819 * the target time; returns "1" if the destination time 820 * is before the next tick, so we always get some delay, 821 * and returns LONG_MAX ticks if we would overflow. 822 */ 823 sec = tv->tv_sec - now.tv_sec; 824 nsec = tv->tv_nsec - now.tv_nsec + nsec_per_tick - 1; 825 826 if (nsec < 0) { 827 sec--; 828 nsec += NANOSEC; 829 } else if (nsec >= NANOSEC) { 830 sec++; 831 nsec -= NANOSEC; 832 } 833 834 ticks = NSEC_TO_TICK(nsec); 835 836 /* 837 * Compute ticks, accounting for negative and overflow as above. 838 * Overflow protection kicks in at about 70 weeks for hz=50 839 * and at about 35 weeks for hz=100. (Rather longer for the 64-bit 840 * kernel :-) 841 */ 842 if (sec < 0 || (sec == 0 && ticks < 1)) 843 ticks = 1; /* protect vs nonpositive */ 844 else if (sec > (LONG_MAX - ticks) / hz) 845 ticks = LONG_MAX; /* protect vs overflow */ 846 else 847 ticks += sec * hz; /* common case */ 848 849 return (ticks); 850 } 851 852 /* 853 * hrt2ts(): convert from hrtime_t to timestruc_t. 854 * 855 * All this routine really does is: 856 * 857 * tsp->sec = hrt / NANOSEC; 858 * tsp->nsec = hrt % NANOSEC; 859 * 860 * The black magic below avoids doing a 64-bit by 32-bit integer divide, 861 * which is quite expensive. There's actually much more going on here than 862 * it might first appear -- don't try this at home. 863 * 864 * For the adventuresome, here's an explanation of how it works. 865 * 866 * Multiplication by a fixed constant is easy -- you just do the appropriate 867 * shifts and adds. For example, to multiply by 10, we observe that 868 * 869 * x * 10 = x * (8 + 2) 870 * = (x * 8) + (x * 2) 871 * = (x << 3) + (x << 1). 872 * 873 * In general, you can read the algorithm right off the bits: the number 10 874 * is 1010 in binary; bits 1 and 3 are ones, so x * 10 = (x << 1) + (x << 3). 875 * 876 * Sometimes you can do better. For example, 15 is 1111 binary, so the normal 877 * shift/add computation is x * 15 = (x << 0) + (x << 1) + (x << 2) + (x << 3). 878 * But, it's cheaper if you capitalize on the fact that you have a run of ones: 879 * 1111 = 10000 - 1, hence x * 15 = (x << 4) - (x << 0). [You would never 880 * actually perform the operation << 0, since it's a no-op; I'm just writing 881 * it that way for clarity.] 882 * 883 * The other way you can win is if you get lucky with the prime factorization 884 * of your constant. The number 1,000,000,000, which we have to multiply 885 * by below, is a good example. One billion is 111011100110101100101000000000 886 * in binary. If you apply the bit-grouping trick, it doesn't buy you very 887 * much, because it's only a win for groups of three or more equal bits: 888 * 889 * 111011100110101100101000000000 = 1000000000000000000000000000000 890 * - 000100011001010011011000000000 891 * 892 * Thus, instead of the 13 shift/add pairs (26 operations) implied by the LHS, 893 * we have reduced this to 10 shift/add pairs (20 operations) on the RHS. 894 * This is better, but not great. 895 * 896 * However, we can factor 1,000,000,000 = 2^9 * 5^9 = 2^9 * 125 * 125 * 125, 897 * and multiply by each factor. Multiplication by 125 is particularly easy, 898 * since 128 is nearby: x * 125 = (x << 7) - x - x - x, which is just four 899 * operations. So, to multiply by 1,000,000,000, we perform three multipli- 900 * cations by 125, then << 9, a total of only 3 * 4 + 1 = 13 operations. 901 * This is the algorithm we actually use in both hrt2ts() and ts2hrt(). 902 * 903 * Division is harder; there is no equivalent of the simple shift-add algorithm 904 * we used for multiplication. However, we can convert the division problem 905 * into a multiplication problem by pre-computing the binary representation 906 * of the reciprocal of the divisor. For the case of interest, we have 907 * 908 * 1 / 1,000,000,000 = 1.0001001011100000101111101000001B-30, 909 * 910 * to 32 bits of precision. (The notation B-30 means "* 2^-30", just like 911 * E-18 means "* 10^-18".) 912 * 913 * So, to compute x / 1,000,000,000, we just multiply x by the 32-bit 914 * integer 10001001011100000101111101000001, then normalize (shift) the 915 * result. This constant has several large bits runs, so the multiply 916 * is relatively cheap: 917 * 918 * 10001001011100000101111101000001 = 10001001100000000110000001000001 919 * - 00000000000100000000000100000000 920 * 921 * Again, you can just read the algorithm right off the bits: 922 * 923 * sec = hrt; 924 * sec += (hrt << 6); 925 * sec -= (hrt << 8); 926 * sec += (hrt << 13); 927 * sec += (hrt << 14); 928 * sec -= (hrt << 20); 929 * sec += (hrt << 23); 930 * sec += (hrt << 24); 931 * sec += (hrt << 27); 932 * sec += (hrt << 31); 933 * sec >>= (32 + 30); 934 * 935 * Voila! The only problem is, since hrt is 64 bits, we need to use 96-bit 936 * arithmetic to perform this calculation. That's a waste, because ultimately 937 * we only need the highest 32 bits of the result. 938 * 939 * The first thing we do is to realize that we don't need to use all of hrt 940 * in the calculation. The lowest 30 bits can contribute at most 1 to the 941 * quotient (2^30 / 1,000,000,000 = 1.07...), so we'll deal with them later. 942 * The highest 2 bits have to be zero, or hrt won't fit in a timestruc_t. 943 * Thus, the only bits of hrt that matter for division are bits 30..61. 944 * These 32 bits are just the lower-order word of (hrt >> 30). This brings 945 * us down from 96-bit math to 64-bit math, and our algorithm becomes: 946 * 947 * tmp = (uint32_t) (hrt >> 30); 948 * sec = tmp; 949 * sec += (tmp << 6); 950 * sec -= (tmp << 8); 951 * sec += (tmp << 13); 952 * sec += (tmp << 14); 953 * sec -= (tmp << 20); 954 * sec += (tmp << 23); 955 * sec += (tmp << 24); 956 * sec += (tmp << 27); 957 * sec += (tmp << 31); 958 * sec >>= 32; 959 * 960 * Next, we're going to reduce this 64-bit computation to a 32-bit 961 * computation. We begin by rewriting the above algorithm to use relative 962 * shifts instead of absolute shifts. That is, instead of computing 963 * tmp << 6, tmp << 8, tmp << 13, etc, we'll just shift incrementally: 964 * tmp <<= 6, tmp <<= 2 (== 8 - 6), tmp <<= 5 (== 13 - 8), etc: 965 * 966 * tmp = (uint32_t) (hrt >> 30); 967 * sec = tmp; 968 * tmp <<= 6; sec += tmp; 969 * tmp <<= 2; sec -= tmp; 970 * tmp <<= 5; sec += tmp; 971 * tmp <<= 1; sec += tmp; 972 * tmp <<= 6; sec -= tmp; 973 * tmp <<= 3; sec += tmp; 974 * tmp <<= 1; sec += tmp; 975 * tmp <<= 3; sec += tmp; 976 * tmp <<= 4; sec += tmp; 977 * sec >>= 32; 978 * 979 * Now for the final step. Instead of throwing away the low 32 bits at 980 * the end, we can throw them away as we go, only keeping the high 32 bits 981 * of the product at each step. So, for example, where we now have 982 * 983 * tmp <<= 6; sec = sec + tmp; 984 * we will instead have 985 * tmp <<= 6; sec = (sec + tmp) >> 6; 986 * which is equivalent to 987 * sec = (sec >> 6) + tmp; 988 * 989 * The final shift ("sec >>= 32") goes away. 990 * 991 * All we're really doing here is long multiplication, just like we learned in 992 * grade school, except that at each step, we only look at the leftmost 32 993 * columns. The cumulative error is, at most, the sum of all the bits we 994 * throw away, which is 2^-32 + 2^-31 + ... + 2^-2 + 2^-1 == 1 - 2^-32. 995 * Thus, the final result ("sec") is correct to +/- 1. 996 * 997 * It turns out to be important to keep "sec" positive at each step, because 998 * we don't want to have to explicitly extend the sign bit. Therefore, 999 * starting with the last line of code above, each line that would have read 1000 * "sec = (sec >> n) - tmp" must be changed to "sec = tmp - (sec >> n)", and 1001 * the operators (+ or -) in all previous lines must be toggled accordingly. 1002 * Thus, we end up with: 1003 * 1004 * tmp = (uint32_t) (hrt >> 30); 1005 * sec = tmp + (sec >> 6); 1006 * sec = tmp - (tmp >> 2); 1007 * sec = tmp - (sec >> 5); 1008 * sec = tmp + (sec >> 1); 1009 * sec = tmp - (sec >> 6); 1010 * sec = tmp - (sec >> 3); 1011 * sec = tmp + (sec >> 1); 1012 * sec = tmp + (sec >> 3); 1013 * sec = tmp + (sec >> 4); 1014 * 1015 * This yields a value for sec that is accurate to +1/-1, so we have two 1016 * cases to deal with. The mysterious-looking "+ 7" in the code below biases 1017 * the rounding toward zero, so that sec is always less than or equal to 1018 * the correct value. With this modified code, sec is accurate to +0/-2, with 1019 * the -2 case being very rare in practice. With this change, we only have to 1020 * deal with one case (sec too small) in the cleanup code. 1021 * 1022 * The other modification we make is to delete the second line above 1023 * ("sec = tmp + (sec >> 6);"), since it only has an effect when bit 31 is 1024 * set, and the cleanup code can handle that rare case. This reduces the 1025 * *guaranteed* accuracy of sec to +0/-3, but speeds up the common cases. 1026 * 1027 * Finally, we compute nsec = hrt - (sec * 1,000,000,000). nsec will always 1028 * be positive (since sec is never too large), and will at most be equal to 1029 * the error in sec (times 1,000,000,000) plus the low-order 30 bits of hrt. 1030 * Thus, nsec < 3 * 1,000,000,000 + 2^30, which is less than 2^32, so we can 1031 * safely assume that nsec fits in 32 bits. Consequently, when we compute 1032 * sec * 1,000,000,000, we only need the low 32 bits, so we can just do 32-bit 1033 * arithmetic and let the high-order bits fall off the end. 1034 * 1035 * Since nsec < 3 * 1,000,000,000 + 2^30 == 4,073,741,824, the cleanup loop: 1036 * 1037 * while (nsec >= NANOSEC) { 1038 * nsec -= NANOSEC; 1039 * sec++; 1040 * } 1041 * 1042 * is guaranteed to complete in at most 4 iterations. In practice, the loop 1043 * completes in 0 or 1 iteration over 95% of the time. 1044 * 1045 * On an SS2, this implementation of hrt2ts() takes 1.7 usec, versus about 1046 * 35 usec for software division -- about 20 times faster. 1047 */ 1048 void 1049 hrt2ts(hrtime_t hrt, timestruc_t *tsp) 1050 { 1051 uint32_t sec, nsec, tmp; 1052 1053 tmp = (uint32_t)(hrt >> 30); 1054 sec = tmp - (tmp >> 2); 1055 sec = tmp - (sec >> 5); 1056 sec = tmp + (sec >> 1); 1057 sec = tmp - (sec >> 6) + 7; 1058 sec = tmp - (sec >> 3); 1059 sec = tmp + (sec >> 1); 1060 sec = tmp + (sec >> 3); 1061 sec = tmp + (sec >> 4); 1062 tmp = (sec << 7) - sec - sec - sec; 1063 tmp = (tmp << 7) - tmp - tmp - tmp; 1064 tmp = (tmp << 7) - tmp - tmp - tmp; 1065 nsec = (uint32_t)hrt - (tmp << 9); 1066 while (nsec >= NANOSEC) { 1067 nsec -= NANOSEC; 1068 sec++; 1069 } 1070 tsp->tv_sec = (time_t)sec; 1071 tsp->tv_nsec = nsec; 1072 } 1073 1074 /* 1075 * Convert from timestruc_t to hrtime_t. 1076 * 1077 * The code below is equivalent to: 1078 * 1079 * hrt = tsp->tv_sec * NANOSEC + tsp->tv_nsec; 1080 * 1081 * but requires no integer multiply. 1082 */ 1083 hrtime_t 1084 ts2hrt(const timestruc_t *tsp) 1085 { 1086 hrtime_t hrt; 1087 1088 hrt = tsp->tv_sec; 1089 hrt = (hrt << 7) - hrt - hrt - hrt; 1090 hrt = (hrt << 7) - hrt - hrt - hrt; 1091 hrt = (hrt << 7) - hrt - hrt - hrt; 1092 hrt = (hrt << 9) + tsp->tv_nsec; 1093 return (hrt); 1094 } 1095 1096 /* 1097 * For the various 32-bit "compatibility" paths in the system. 1098 */ 1099 void 1100 hrt2ts32(hrtime_t hrt, timestruc32_t *ts32p) 1101 { 1102 timestruc_t ts; 1103 1104 hrt2ts(hrt, &ts); 1105 TIMESPEC_TO_TIMESPEC32(ts32p, &ts); 1106 } 1107 1108 /* 1109 * If this ever becomes performance critical (ha!), we can borrow the 1110 * code from ts2hrt(), above, to multiply tv_sec by 1,000,000 and the 1111 * straightforward (x << 10) - (x << 5) + (x << 3) to multiply tv_usec by 1112 * 1,000. For now, we'll opt for readability (besides, the compiler does 1113 * a passable job of optimizing constant multiplication into shifts and adds). 1114 */ 1115 hrtime_t 1116 tv2hrt(struct timeval *tvp) 1117 { 1118 return ((hrtime_t)tvp->tv_sec * NANOSEC + 1119 (hrtime_t)tvp->tv_usec * (NANOSEC / MICROSEC)); 1120 } 1121 1122 void 1123 hrt2tv(hrtime_t hrt, struct timeval *tvp) 1124 { 1125 uint32_t sec, nsec, tmp; 1126 uint32_t q, r, t; 1127 1128 tmp = (uint32_t)(hrt >> 30); 1129 sec = tmp - (tmp >> 2); 1130 sec = tmp - (sec >> 5); 1131 sec = tmp + (sec >> 1); 1132 sec = tmp - (sec >> 6) + 7; 1133 sec = tmp - (sec >> 3); 1134 sec = tmp + (sec >> 1); 1135 sec = tmp + (sec >> 3); 1136 sec = tmp + (sec >> 4); 1137 tmp = (sec << 7) - sec - sec - sec; 1138 tmp = (tmp << 7) - tmp - tmp - tmp; 1139 tmp = (tmp << 7) - tmp - tmp - tmp; 1140 nsec = (uint32_t)hrt - (tmp << 9); 1141 while (nsec >= NANOSEC) { 1142 nsec -= NANOSEC; 1143 sec++; 1144 } 1145 tvp->tv_sec = (time_t)sec; 1146 /* 1147 * this routine is very similar to hr2ts, but requires microseconds 1148 * instead of nanoseconds, so an interger divide by 1000 routine 1149 * completes the conversion 1150 */ 1151 t = (nsec >> 7) + (nsec >> 8) + (nsec >> 12); 1152 q = (nsec >> 1) + t + (nsec >> 15) + (t >> 11) + (t >> 14); 1153 q = q >> 9; 1154 r = nsec - q*1000; 1155 tvp->tv_usec = q + ((r + 24) >> 10); 1156 1157 } 1158 1159 int 1160 nanosleep(timespec_t *rqtp, timespec_t *rmtp) 1161 { 1162 timespec_t rqtime; 1163 timespec_t rmtime; 1164 timespec_t now; 1165 int ret = 1; 1166 model_t datamodel = get_udatamodel(); 1167 1168 if (datamodel == DATAMODEL_NATIVE) { 1169 if (copyin(rqtp, &rqtime, sizeof (rqtime))) 1170 return (set_errno(EFAULT)); 1171 } else { 1172 timespec32_t rqtime32; 1173 1174 if (copyin(rqtp, &rqtime32, sizeof (rqtime32))) 1175 return (set_errno(EFAULT)); 1176 TIMESPEC32_TO_TIMESPEC(&rqtime, &rqtime32); 1177 } 1178 1179 if (rqtime.tv_sec < 0 || rqtime.tv_nsec < 0 || 1180 rqtime.tv_nsec >= NANOSEC) 1181 return (set_errno(EINVAL)); 1182 1183 if (timerspecisset(&rqtime)) { 1184 gethrestime(&now); 1185 timespecadd(&rqtime, &now); 1186 mutex_enter(&curthread->t_delay_lock); 1187 while ((ret = cv_waituntil_sig(&curthread->t_delay_cv, 1188 &curthread->t_delay_lock, &rqtime)) > 0) 1189 continue; 1190 mutex_exit(&curthread->t_delay_lock); 1191 } 1192 1193 if (rmtp) { 1194 /* 1195 * If cv_waituntil_sig() returned due to a signal, and 1196 * there is time remaining, then set the time remaining. 1197 * Else set time remaining to zero 1198 */ 1199 rmtime.tv_sec = rmtime.tv_nsec = 0; 1200 if (ret == 0) { 1201 timespec_t delta = rqtime; 1202 1203 gethrestime(&now); 1204 timespecsub(&delta, &now); 1205 if (delta.tv_sec > 0 || (delta.tv_sec == 0 && 1206 delta.tv_nsec > 0)) 1207 rmtime = delta; 1208 } 1209 1210 if (datamodel == DATAMODEL_NATIVE) { 1211 if (copyout(&rmtime, rmtp, sizeof (rmtime))) 1212 return (set_errno(EFAULT)); 1213 } else { 1214 timespec32_t rmtime32; 1215 1216 TIMESPEC_TO_TIMESPEC32(&rmtime32, &rmtime); 1217 if (copyout(&rmtime32, rmtp, sizeof (rmtime32))) 1218 return (set_errno(EFAULT)); 1219 } 1220 } 1221 1222 if (ret == 0) 1223 return (set_errno(EINTR)); 1224 return (0); 1225 } 1226 1227 /* 1228 * Routines to convert standard UNIX time (seconds since Jan 1, 1970) 1229 * into year/month/day/hour/minute/second format, and back again. 1230 * Note: these routines require tod_lock held to protect cached state. 1231 */ 1232 static int days_thru_month[64] = { 1233 0, 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366, 0, 0, 1234 0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, 0, 0, 1235 0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, 0, 0, 1236 0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, 0, 0, 1237 }; 1238 1239 todinfo_t saved_tod; 1240 int saved_utc = -60; 1241 1242 todinfo_t 1243 utc_to_tod(time_t utc) 1244 { 1245 long dse, day, month, year; 1246 todinfo_t tod; 1247 1248 ASSERT(MUTEX_HELD(&tod_lock)); 1249 1250 if (utc < 0) /* should never happen */ 1251 utc = 0; 1252 1253 saved_tod.tod_sec += utc - saved_utc; 1254 saved_utc = utc; 1255 if (saved_tod.tod_sec >= 0 && saved_tod.tod_sec < 60) 1256 return (saved_tod); /* only the seconds changed */ 1257 1258 dse = utc / 86400; /* days since epoch */ 1259 1260 tod.tod_sec = utc % 60; 1261 tod.tod_min = (utc % 3600) / 60; 1262 tod.tod_hour = (utc % 86400) / 3600; 1263 tod.tod_dow = (dse + 4) % 7 + 1; /* epoch was a Thursday */ 1264 1265 year = dse / 365 + 72; /* first guess -- always a bit too large */ 1266 do { 1267 year--; 1268 day = dse - 365 * (year - 70) - ((year - 69) >> 2); 1269 } while (day < 0); 1270 1271 month = ((year & 3) << 4) + 1; 1272 while (day >= days_thru_month[month + 1]) 1273 month++; 1274 1275 tod.tod_day = day - days_thru_month[month] + 1; 1276 tod.tod_month = month & 15; 1277 tod.tod_year = year; 1278 1279 saved_tod = tod; 1280 return (tod); 1281 } 1282 1283 time_t 1284 tod_to_utc(todinfo_t tod) 1285 { 1286 time_t utc; 1287 int year = tod.tod_year; 1288 int month = tod.tod_month + ((year & 3) << 4); 1289 #ifdef DEBUG 1290 /* only warn once, not each time called */ 1291 static int year_warn = 1; 1292 static int month_warn = 1; 1293 static int day_warn = 1; 1294 static int hour_warn = 1; 1295 static int min_warn = 1; 1296 static int sec_warn = 1; 1297 int days_diff = days_thru_month[month + 1] - days_thru_month[month]; 1298 #endif 1299 1300 ASSERT(MUTEX_HELD(&tod_lock)); 1301 1302 #ifdef DEBUG 1303 if (year_warn && (year < 70 || year > 8029)) { 1304 cmn_err(CE_WARN, 1305 "The hardware real-time clock appears to have the " 1306 "wrong years value %d -- time needs to be reset\n", 1307 year); 1308 year_warn = 0; 1309 } 1310 1311 if (month_warn && (tod.tod_month < 1 || tod.tod_month > 12)) { 1312 cmn_err(CE_WARN, 1313 "The hardware real-time clock appears to have the " 1314 "wrong months value %d -- time needs to be reset\n", 1315 tod.tod_month); 1316 month_warn = 0; 1317 } 1318 1319 if (day_warn && (tod.tod_day < 1 || tod.tod_day > days_diff)) { 1320 cmn_err(CE_WARN, 1321 "The hardware real-time clock appears to have the " 1322 "wrong days value %d -- time needs to be reset\n", 1323 tod.tod_day); 1324 day_warn = 0; 1325 } 1326 1327 if (hour_warn && (tod.tod_hour < 0 || tod.tod_hour > 23)) { 1328 cmn_err(CE_WARN, 1329 "The hardware real-time clock appears to have the " 1330 "wrong hours value %d -- time needs to be reset\n", 1331 tod.tod_hour); 1332 hour_warn = 0; 1333 } 1334 1335 if (min_warn && (tod.tod_min < 0 || tod.tod_min > 59)) { 1336 cmn_err(CE_WARN, 1337 "The hardware real-time clock appears to have the " 1338 "wrong minutes value %d -- time needs to be reset\n", 1339 tod.tod_min); 1340 min_warn = 0; 1341 } 1342 1343 if (sec_warn && (tod.tod_sec < 0 || tod.tod_sec > 59)) { 1344 cmn_err(CE_WARN, 1345 "The hardware real-time clock appears to have the " 1346 "wrong seconds value %d -- time needs to be reset\n", 1347 tod.tod_sec); 1348 sec_warn = 0; 1349 } 1350 #endif 1351 1352 utc = (year - 70); /* next 3 lines: utc = 365y + y/4 */ 1353 utc += (utc << 3) + (utc << 6); 1354 utc += (utc << 2) + ((year - 69) >> 2); 1355 utc += days_thru_month[month] + tod.tod_day - 1; 1356 utc = (utc << 3) + (utc << 4) + tod.tod_hour; /* 24 * day + hour */ 1357 utc = (utc << 6) - (utc << 2) + tod.tod_min; /* 60 * hour + min */ 1358 utc = (utc << 6) - (utc << 2) + tod.tod_sec; /* 60 * min + sec */ 1359 1360 return (utc); 1361 } 1362