1 /* 2 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 3 * Use is subject to license terms. 4 */ 5 6 #pragma ident "%Z%%M% %I% %E% SMI" 7 8 /* 9 * Copyright (c) 1982, 1986 Regents of the University of California. 10 * All rights reserved. The Berkeley software License Agreement 11 * specifies the terms and conditions for redistribution. 12 */ 13 14 #include <sys/param.h> 15 #include <sys/user.h> 16 #include <sys/vnode.h> 17 #include <sys/proc.h> 18 #include <sys/time.h> 19 #include <sys/systm.h> 20 #include <sys/kmem.h> 21 #include <sys/cmn_err.h> 22 #include <sys/cpuvar.h> 23 #include <sys/timer.h> 24 #include <sys/debug.h> 25 #include <sys/sysmacros.h> 26 #include <sys/cyclic.h> 27 28 static void realitexpire(void *); 29 static void realprofexpire(void *); 30 static void timeval_advance(struct timeval *, struct timeval *); 31 32 kmutex_t tod_lock; /* protects time-of-day stuff */ 33 34 /* 35 * Constant to define the minimum interval value of the ITIMER_REALPROF timer. 36 * Value is in microseconds; defaults to 500 usecs. Setting this value 37 * significantly lower may allow for denial-of-service attacks. 38 */ 39 int itimer_realprof_minimum = 500; 40 41 /* 42 * macro to compare a timeval to a timestruc 43 */ 44 45 #define TVTSCMP(tvp, tsp, cmp) \ 46 /* CSTYLED */ \ 47 ((tvp)->tv_sec cmp (tsp)->tv_sec || \ 48 ((tvp)->tv_sec == (tsp)->tv_sec && \ 49 /* CSTYLED */ \ 50 (tvp)->tv_usec * 1000 cmp (tsp)->tv_nsec)) 51 52 /* 53 * Time of day and interval timer support. 54 * 55 * These routines provide the kernel entry points to get and set 56 * the time-of-day and per-process interval timers. Subroutines 57 * here provide support for adding and subtracting timeval structures 58 * and decrementing interval timers, optionally reloading the interval 59 * timers when they expire. 60 */ 61 62 /* 63 * SunOS function to generate monotonically increasing time values. 64 */ 65 void 66 uniqtime(struct timeval *tv) 67 { 68 static struct timeval last; 69 timestruc_t ts; 70 time_t sec; 71 int usec, nsec; 72 73 /* 74 * protect modification of last 75 */ 76 mutex_enter(&tod_lock); 77 gethrestime(&ts); 78 79 /* 80 * Fast algorithm to convert nsec to usec -- see hrt2ts() 81 * in common/os/timers.c for a full description. 82 */ 83 nsec = ts.tv_nsec; 84 usec = nsec + (nsec >> 2); 85 usec = nsec + (usec >> 1); 86 usec = nsec + (usec >> 2); 87 usec = nsec + (usec >> 4); 88 usec = nsec - (usec >> 3); 89 usec = nsec + (usec >> 2); 90 usec = nsec + (usec >> 3); 91 usec = nsec + (usec >> 4); 92 usec = nsec + (usec >> 1); 93 usec = nsec + (usec >> 6); 94 usec = usec >> 10; 95 sec = ts.tv_sec; 96 97 /* 98 * Try to keep timestamps unique, but don't be obsessive about 99 * it in the face of large differences. 100 */ 101 if ((sec <= last.tv_sec) && /* same or lower seconds, and */ 102 ((sec != last.tv_sec) || /* either different second or */ 103 (usec <= last.tv_usec)) && /* lower microsecond, and */ 104 ((last.tv_sec - sec) <= 5)) { /* not way back in time */ 105 sec = last.tv_sec; 106 usec = last.tv_usec + 1; 107 if (usec >= MICROSEC) { 108 usec -= MICROSEC; 109 sec++; 110 } 111 } 112 last.tv_sec = sec; 113 last.tv_usec = usec; 114 mutex_exit(&tod_lock); 115 116 tv->tv_sec = sec; 117 tv->tv_usec = usec; 118 } 119 120 /* 121 * Timestamps are exported from the kernel in several places. 122 * Such timestamps are commonly used for either uniqueness or for 123 * sequencing - truncation to 32-bits is fine for uniqueness, 124 * but sequencing is going to take more work as we get closer to 2038! 125 */ 126 void 127 uniqtime32(struct timeval32 *tv32p) 128 { 129 struct timeval tv; 130 131 uniqtime(&tv); 132 TIMEVAL_TO_TIMEVAL32(tv32p, &tv); 133 } 134 135 int 136 gettimeofday(struct timeval *tp) 137 { 138 struct timeval atv; 139 140 if (tp) { 141 uniqtime(&atv); 142 if (get_udatamodel() == DATAMODEL_NATIVE) { 143 if (copyout(&atv, tp, sizeof (atv))) 144 return (set_errno(EFAULT)); 145 } else { 146 struct timeval32 tv32; 147 148 if (TIMEVAL_OVERFLOW(&atv)) 149 return (set_errno(EOVERFLOW)); 150 TIMEVAL_TO_TIMEVAL32(&tv32, &atv); 151 152 if (copyout(&tv32, tp, sizeof (tv32))) 153 return (set_errno(EFAULT)); 154 } 155 } 156 return (0); 157 } 158 159 int 160 getitimer(uint_t which, struct itimerval *itv) 161 { 162 int error; 163 164 if (get_udatamodel() == DATAMODEL_NATIVE) 165 error = xgetitimer(which, itv, 0); 166 else { 167 struct itimerval kitv; 168 169 if ((error = xgetitimer(which, &kitv, 1)) == 0) { 170 if (ITIMERVAL_OVERFLOW(&kitv)) { 171 error = EOVERFLOW; 172 } else { 173 struct itimerval32 itv32; 174 175 ITIMERVAL_TO_ITIMERVAL32(&itv32, &kitv); 176 if (copyout(&itv32, itv, sizeof (itv32)) != 0) 177 error = EFAULT; 178 } 179 } 180 } 181 182 return (error ? (set_errno(error)) : 0); 183 } 184 185 int 186 xgetitimer(uint_t which, struct itimerval *itv, int iskaddr) 187 { 188 struct proc *p = curproc; 189 struct timeval now; 190 struct itimerval aitv; 191 hrtime_t ts, first, interval, remain; 192 193 mutex_enter(&p->p_lock); 194 195 switch (which) { 196 case ITIMER_VIRTUAL: 197 case ITIMER_PROF: 198 aitv = ttolwp(curthread)->lwp_timer[which]; 199 break; 200 201 case ITIMER_REAL: 202 uniqtime(&now); 203 aitv = p->p_realitimer; 204 205 if (timerisset(&aitv.it_value)) { 206 /*CSTYLED*/ 207 if (timercmp(&aitv.it_value, &now, <)) { 208 timerclear(&aitv.it_value); 209 } else { 210 timevalsub(&aitv.it_value, &now); 211 } 212 } 213 break; 214 215 case ITIMER_REALPROF: 216 if (curproc->p_rprof_cyclic == CYCLIC_NONE) { 217 bzero(&aitv, sizeof (aitv)); 218 break; 219 } 220 221 aitv = curproc->p_rprof_timer; 222 223 first = tv2hrt(&aitv.it_value); 224 interval = tv2hrt(&aitv.it_interval); 225 226 if ((ts = gethrtime()) < first) { 227 /* 228 * We haven't gone off for the first time; the time 229 * remaining is simply the first time we will go 230 * off minus the current time. 231 */ 232 remain = first - ts; 233 } else { 234 if (interval == 0) { 235 /* 236 * This was set as a one-shot, and we've 237 * already gone off; there is no time 238 * remaining. 239 */ 240 remain = 0; 241 } else { 242 /* 243 * We have a non-zero interval; we need to 244 * determine how far we are into the current 245 * interval, and subtract that from the 246 * interval to determine the time remaining. 247 */ 248 remain = interval - ((ts - first) % interval); 249 } 250 } 251 252 hrt2tv(remain, &aitv.it_value); 253 break; 254 255 default: 256 mutex_exit(&p->p_lock); 257 return (EINVAL); 258 } 259 260 mutex_exit(&p->p_lock); 261 262 if (iskaddr) { 263 bcopy(&aitv, itv, sizeof (*itv)); 264 } else { 265 ASSERT(get_udatamodel() == DATAMODEL_NATIVE); 266 if (copyout(&aitv, itv, sizeof (*itv))) 267 return (EFAULT); 268 } 269 270 return (0); 271 } 272 273 274 int 275 setitimer(uint_t which, struct itimerval *itv, struct itimerval *oitv) 276 { 277 int error; 278 279 if (oitv != NULL) 280 if ((error = getitimer(which, oitv)) != 0) 281 return (error); 282 283 if (itv == NULL) 284 return (0); 285 286 if (get_udatamodel() == DATAMODEL_NATIVE) 287 error = xsetitimer(which, itv, 0); 288 else { 289 struct itimerval32 itv32; 290 struct itimerval kitv; 291 292 if (copyin(itv, &itv32, sizeof (itv32))) 293 error = EFAULT; 294 ITIMERVAL32_TO_ITIMERVAL(&kitv, &itv32); 295 error = xsetitimer(which, &kitv, 1); 296 } 297 298 return (error ? (set_errno(error)) : 0); 299 } 300 301 int 302 xsetitimer(uint_t which, struct itimerval *itv, int iskaddr) 303 { 304 struct itimerval aitv; 305 struct timeval now; 306 struct proc *p = curproc; 307 kthread_t *t; 308 timeout_id_t tmp_id; 309 cyc_handler_t hdlr; 310 cyc_time_t when; 311 cyclic_id_t cyclic; 312 hrtime_t ts; 313 int min; 314 315 if (itv == NULL) 316 return (0); 317 318 if (iskaddr) { 319 bcopy(itv, &aitv, sizeof (aitv)); 320 } else { 321 ASSERT(get_udatamodel() == DATAMODEL_NATIVE); 322 if (copyin(itv, &aitv, sizeof (aitv))) 323 return (EFAULT); 324 } 325 326 if (which == ITIMER_REALPROF) { 327 min = MAX((int)(cyclic_getres() / (NANOSEC / MICROSEC)), 328 itimer_realprof_minimum); 329 } else { 330 min = usec_per_tick; 331 } 332 333 if (itimerfix(&aitv.it_value, min) || 334 (itimerfix(&aitv.it_interval, min) && timerisset(&aitv.it_value))) 335 return (EINVAL); 336 337 mutex_enter(&p->p_lock); 338 switch (which) { 339 case ITIMER_REAL: 340 /* 341 * The SITBUSY flag prevents conflicts with multiple 342 * threads attempting to perform setitimer(ITIMER_REAL) 343 * at the same time, even when we drop p->p_lock below. 344 * Any blocked thread returns successfully because the 345 * effect is the same as if it got here first, finished, 346 * and the other thread then came through and destroyed 347 * what it did. We are just protecting the system from 348 * malfunctioning due to the race condition. 349 */ 350 if (p->p_flag & SITBUSY) { 351 mutex_exit(&p->p_lock); 352 return (0); 353 } 354 p->p_flag |= SITBUSY; 355 while ((tmp_id = p->p_itimerid) != 0) { 356 /* 357 * Avoid deadlock in callout_delete (called from 358 * untimeout) which may go to sleep (while holding 359 * p_lock). Drop p_lock and re-acquire it after 360 * untimeout returns. Need to clear p_itimerid 361 * while holding p_lock. 362 */ 363 p->p_itimerid = 0; 364 mutex_exit(&p->p_lock); 365 (void) untimeout(tmp_id); 366 mutex_enter(&p->p_lock); 367 } 368 if (timerisset(&aitv.it_value)) { 369 uniqtime(&now); 370 timevaladd(&aitv.it_value, &now); 371 p->p_itimerid = realtime_timeout(realitexpire, 372 p, hzto(&aitv.it_value)); 373 } 374 p->p_realitimer = aitv; 375 p->p_flag &= ~SITBUSY; 376 break; 377 378 case ITIMER_REALPROF: 379 cyclic = p->p_rprof_cyclic; 380 p->p_rprof_cyclic = CYCLIC_NONE; 381 382 mutex_exit(&p->p_lock); 383 384 /* 385 * We're now going to acquire cpu_lock, remove the old cyclic 386 * if necessary, and add our new cyclic. 387 */ 388 mutex_enter(&cpu_lock); 389 390 if (cyclic != CYCLIC_NONE) 391 cyclic_remove(cyclic); 392 393 if (!timerisset(&aitv.it_value)) { 394 /* 395 * If we were passed a value of 0, we're done. 396 */ 397 mutex_exit(&cpu_lock); 398 return (0); 399 } 400 401 hdlr.cyh_func = realprofexpire; 402 hdlr.cyh_arg = p; 403 hdlr.cyh_level = CY_LOW_LEVEL; 404 405 when.cyt_when = (ts = gethrtime() + tv2hrt(&aitv.it_value)); 406 when.cyt_interval = tv2hrt(&aitv.it_interval); 407 408 if (when.cyt_interval == 0) { 409 /* 410 * Using the same logic as for CLOCK_HIGHRES timers, we 411 * set the interval to be INT64_MAX - when.cyt_when to 412 * effect a one-shot; see the comment in clock_highres.c 413 * for more details on why this works. 414 */ 415 when.cyt_interval = INT64_MAX - when.cyt_when; 416 } 417 418 cyclic = cyclic_add(&hdlr, &when); 419 420 mutex_exit(&cpu_lock); 421 422 /* 423 * We have now successfully added the cyclic. Reacquire 424 * p_lock, and see if anyone has snuck in. 425 */ 426 mutex_enter(&p->p_lock); 427 428 if (p->p_rprof_cyclic != CYCLIC_NONE) { 429 /* 430 * We're racing with another thread establishing an 431 * ITIMER_REALPROF interval timer. We'll let the other 432 * thread win (this is a race at the application level, 433 * so letting the other thread win is acceptable). 434 */ 435 mutex_exit(&p->p_lock); 436 mutex_enter(&cpu_lock); 437 cyclic_remove(cyclic); 438 mutex_exit(&cpu_lock); 439 440 return (0); 441 } 442 443 /* 444 * Success. Set our tracking variables in the proc structure, 445 * cancel any outstanding ITIMER_PROF, and allocate the 446 * per-thread SIGPROF buffers, if possible. 447 */ 448 hrt2tv(ts, &aitv.it_value); 449 p->p_rprof_timer = aitv; 450 p->p_rprof_cyclic = cyclic; 451 452 t = p->p_tlist; 453 do { 454 struct itimerval *itvp; 455 456 itvp = &ttolwp(t)->lwp_timer[ITIMER_PROF]; 457 timerclear(&itvp->it_interval); 458 timerclear(&itvp->it_value); 459 460 if (t->t_rprof != NULL) 461 continue; 462 463 t->t_rprof = 464 kmem_zalloc(sizeof (struct rprof), KM_NOSLEEP); 465 aston(t); 466 } while ((t = t->t_forw) != p->p_tlist); 467 468 break; 469 470 case ITIMER_VIRTUAL: 471 ttolwp(curthread)->lwp_timer[ITIMER_VIRTUAL] = aitv; 472 break; 473 474 case ITIMER_PROF: 475 if (p->p_rprof_cyclic != CYCLIC_NONE) { 476 /* 477 * Silently ignore ITIMER_PROF if ITIMER_REALPROF 478 * is in effect. 479 */ 480 break; 481 } 482 483 ttolwp(curthread)->lwp_timer[ITIMER_PROF] = aitv; 484 break; 485 486 default: 487 mutex_exit(&p->p_lock); 488 return (EINVAL); 489 } 490 mutex_exit(&p->p_lock); 491 return (0); 492 } 493 494 /* 495 * Real interval timer expired: 496 * send process whose timer expired an alarm signal. 497 * If time is not set up to reload, then just return. 498 * Else compute next time timer should go off which is > current time. 499 * This is where delay in processing this timeout causes multiple 500 * SIGALRM calls to be compressed into one. 501 */ 502 static void 503 realitexpire(void *arg) 504 { 505 struct proc *p = arg; 506 struct timeval *valp = &p->p_realitimer.it_value; 507 struct timeval *intervalp = &p->p_realitimer.it_interval; 508 #if !defined(_LP64) 509 clock_t ticks; 510 #endif 511 512 mutex_enter(&p->p_lock); 513 #if !defined(_LP64) 514 if ((ticks = hzto(valp)) > 1) { 515 /* 516 * If we are executing before we were meant to, it must be 517 * because of an overflow in a prior hzto() calculation. 518 * In this case, we want to go to sleep for the recalculated 519 * number of ticks. For the special meaning of the value "1" 520 * see comment in timespectohz(). 521 */ 522 p->p_itimerid = realtime_timeout(realitexpire, p, ticks); 523 mutex_exit(&p->p_lock); 524 return; 525 } 526 #endif 527 sigtoproc(p, NULL, SIGALRM); 528 if (!timerisset(intervalp)) { 529 timerclear(valp); 530 p->p_itimerid = 0; 531 } else { 532 /* advance timer value past current time */ 533 timeval_advance(valp, intervalp); 534 p->p_itimerid = realtime_timeout(realitexpire, p, hzto(valp)); 535 } 536 mutex_exit(&p->p_lock); 537 } 538 539 /* 540 * Real time profiling interval timer expired: 541 * Increment microstate counters for each lwp in the process 542 * and ensure that running lwps are kicked into the kernel. 543 * If time is not set up to reload, then just return. 544 * Else compute next time timer should go off which is > current time, 545 * as above. 546 */ 547 static void 548 realprofexpire(void *arg) 549 { 550 struct proc *p = arg; 551 kthread_t *t; 552 553 mutex_enter(&p->p_lock); 554 if ((t = p->p_tlist) == NULL) { 555 mutex_exit(&p->p_lock); 556 return; 557 } 558 do { 559 int mstate; 560 561 /* 562 * Attempt to allocate the SIGPROF buffer, but don't sleep. 563 */ 564 if (t->t_rprof == NULL) 565 t->t_rprof = kmem_zalloc(sizeof (struct rprof), 566 KM_NOSLEEP); 567 if (t->t_rprof == NULL) 568 continue; 569 570 thread_lock(t); 571 switch (t->t_state) { 572 case TS_SLEEP: 573 /* 574 * Don't touch the lwp is it is swapped out. 575 */ 576 if (!(t->t_schedflag & TS_LOAD)) { 577 mstate = LMS_SLEEP; 578 break; 579 } 580 switch (mstate = ttolwp(t)->lwp_mstate.ms_prev) { 581 case LMS_TFAULT: 582 case LMS_DFAULT: 583 case LMS_KFAULT: 584 case LMS_USER_LOCK: 585 break; 586 default: 587 mstate = LMS_SLEEP; 588 break; 589 } 590 break; 591 case TS_RUN: 592 mstate = LMS_WAIT_CPU; 593 break; 594 case TS_ONPROC: 595 switch (mstate = t->t_mstate) { 596 case LMS_USER: 597 case LMS_SYSTEM: 598 case LMS_TRAP: 599 break; 600 default: 601 mstate = LMS_SYSTEM; 602 break; 603 } 604 break; 605 default: 606 mstate = t->t_mstate; 607 break; 608 } 609 t->t_rprof->rp_anystate = 1; 610 t->t_rprof->rp_state[mstate]++; 611 aston(t); 612 /* 613 * force the thread into the kernel 614 * if it is not already there. 615 */ 616 if (t->t_state == TS_ONPROC && t->t_cpu != CPU) 617 poke_cpu(t->t_cpu->cpu_id); 618 thread_unlock(t); 619 } while ((t = t->t_forw) != p->p_tlist); 620 621 mutex_exit(&p->p_lock); 622 } 623 624 /* 625 * Advances timer value past the current time of day. See the detailed 626 * comment for this logic in realitsexpire(), above. 627 */ 628 static void 629 timeval_advance(struct timeval *valp, struct timeval *intervalp) 630 { 631 int cnt2nth; 632 struct timeval interval2nth; 633 634 for (;;) { 635 interval2nth = *intervalp; 636 for (cnt2nth = 0; ; cnt2nth++) { 637 timevaladd(valp, &interval2nth); 638 /*CSTYLED*/ 639 if (TVTSCMP(valp, &hrestime, >)) 640 break; 641 timevaladd(&interval2nth, &interval2nth); 642 } 643 if (cnt2nth == 0) 644 break; 645 timevalsub(valp, &interval2nth); 646 } 647 } 648 649 /* 650 * Check that a proposed value to load into the .it_value or .it_interval 651 * part of an interval timer is acceptable, and set it to at least a 652 * specified minimal value. 653 */ 654 int 655 itimerfix(struct timeval *tv, int minimum) 656 { 657 if (tv->tv_sec < 0 || tv->tv_sec > 100000000 || 658 tv->tv_usec < 0 || tv->tv_usec >= MICROSEC) 659 return (EINVAL); 660 if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < minimum) 661 tv->tv_usec = minimum; 662 return (0); 663 } 664 665 /* 666 * Same as itimerfix, except a) it takes a timespec instead of a timeval and 667 * b) it doesn't truncate based on timeout granularity; consumers of this 668 * interface (e.g. timer_settime()) depend on the passed timespec not being 669 * modified implicitly. 670 */ 671 int 672 itimerspecfix(timespec_t *tv) 673 { 674 if (tv->tv_sec < 0 || tv->tv_nsec < 0 || tv->tv_nsec >= NANOSEC) 675 return (EINVAL); 676 return (0); 677 } 678 679 /* 680 * Decrement an interval timer by a specified number 681 * of microseconds, which must be less than a second, 682 * i.e. < 1000000. If the timer expires, then reload 683 * it. In this case, carry over (usec - old value) to 684 * reducint the value reloaded into the timer so that 685 * the timer does not drift. This routine assumes 686 * that it is called in a context where the timers 687 * on which it is operating cannot change in value. 688 */ 689 int 690 itimerdecr(struct itimerval *itp, int usec) 691 { 692 if (itp->it_value.tv_usec < usec) { 693 if (itp->it_value.tv_sec == 0) { 694 /* expired, and already in next interval */ 695 usec -= itp->it_value.tv_usec; 696 goto expire; 697 } 698 itp->it_value.tv_usec += MICROSEC; 699 itp->it_value.tv_sec--; 700 } 701 itp->it_value.tv_usec -= usec; 702 usec = 0; 703 if (timerisset(&itp->it_value)) 704 return (1); 705 /* expired, exactly at end of interval */ 706 expire: 707 if (timerisset(&itp->it_interval)) { 708 itp->it_value = itp->it_interval; 709 itp->it_value.tv_usec -= usec; 710 if (itp->it_value.tv_usec < 0) { 711 itp->it_value.tv_usec += MICROSEC; 712 itp->it_value.tv_sec--; 713 } 714 } else 715 itp->it_value.tv_usec = 0; /* sec is already 0 */ 716 return (0); 717 } 718 719 /* 720 * Add and subtract routines for timevals. 721 * N.B.: subtract routine doesn't deal with 722 * results which are before the beginning, 723 * it just gets very confused in this case. 724 * Caveat emptor. 725 */ 726 void 727 timevaladd(struct timeval *t1, struct timeval *t2) 728 { 729 t1->tv_sec += t2->tv_sec; 730 t1->tv_usec += t2->tv_usec; 731 timevalfix(t1); 732 } 733 734 void 735 timevalsub(struct timeval *t1, struct timeval *t2) 736 { 737 t1->tv_sec -= t2->tv_sec; 738 t1->tv_usec -= t2->tv_usec; 739 timevalfix(t1); 740 } 741 742 void 743 timevalfix(struct timeval *t1) 744 { 745 if (t1->tv_usec < 0) { 746 t1->tv_sec--; 747 t1->tv_usec += MICROSEC; 748 } 749 if (t1->tv_usec >= MICROSEC) { 750 t1->tv_sec++; 751 t1->tv_usec -= MICROSEC; 752 } 753 } 754 755 /* 756 * Same as the routines above. These routines take a timespec instead 757 * of a timeval. 758 */ 759 void 760 timespecadd(timespec_t *t1, timespec_t *t2) 761 { 762 t1->tv_sec += t2->tv_sec; 763 t1->tv_nsec += t2->tv_nsec; 764 timespecfix(t1); 765 } 766 767 void 768 timespecsub(timespec_t *t1, timespec_t *t2) 769 { 770 t1->tv_sec -= t2->tv_sec; 771 t1->tv_nsec -= t2->tv_nsec; 772 timespecfix(t1); 773 } 774 775 void 776 timespecfix(timespec_t *t1) 777 { 778 if (t1->tv_nsec < 0) { 779 t1->tv_sec--; 780 t1->tv_nsec += NANOSEC; 781 } else { 782 if (t1->tv_nsec >= NANOSEC) { 783 t1->tv_sec++; 784 t1->tv_nsec -= NANOSEC; 785 } 786 } 787 } 788 789 /* 790 * Compute number of hz until specified time. 791 * Used to compute third argument to timeout() from an absolute time. 792 */ 793 clock_t 794 hzto(struct timeval *tv) 795 { 796 timespec_t ts, now; 797 798 ts.tv_sec = tv->tv_sec; 799 ts.tv_nsec = tv->tv_usec * 1000; 800 gethrestime_lasttick(&now); 801 802 return (timespectohz(&ts, now)); 803 } 804 805 /* 806 * Compute number of hz until specified time for a given timespec value. 807 * Used to compute third argument to timeout() from an absolute time. 808 */ 809 clock_t 810 timespectohz(timespec_t *tv, timespec_t now) 811 { 812 clock_t ticks; 813 time_t sec; 814 int nsec; 815 816 /* 817 * Compute number of ticks we will see between now and 818 * the target time; returns "1" if the destination time 819 * is before the next tick, so we always get some delay, 820 * and returns LONG_MAX ticks if we would overflow. 821 */ 822 sec = tv->tv_sec - now.tv_sec; 823 nsec = tv->tv_nsec - now.tv_nsec + nsec_per_tick - 1; 824 825 if (nsec < 0) { 826 sec--; 827 nsec += NANOSEC; 828 } else if (nsec >= NANOSEC) { 829 sec++; 830 nsec -= NANOSEC; 831 } 832 833 ticks = NSEC_TO_TICK(nsec); 834 835 /* 836 * Compute ticks, accounting for negative and overflow as above. 837 * Overflow protection kicks in at about 70 weeks for hz=50 838 * and at about 35 weeks for hz=100. (Rather longer for the 64-bit 839 * kernel :-) 840 */ 841 if (sec < 0 || (sec == 0 && ticks < 1)) 842 ticks = 1; /* protect vs nonpositive */ 843 else if (sec > (LONG_MAX - ticks) / hz) 844 ticks = LONG_MAX; /* protect vs overflow */ 845 else 846 ticks += sec * hz; /* common case */ 847 848 return (ticks); 849 } 850 851 /* 852 * hrt2ts(): convert from hrtime_t to timestruc_t. 853 * 854 * All this routine really does is: 855 * 856 * tsp->sec = hrt / NANOSEC; 857 * tsp->nsec = hrt % NANOSEC; 858 * 859 * The black magic below avoids doing a 64-bit by 32-bit integer divide, 860 * which is quite expensive. There's actually much more going on here than 861 * it might first appear -- don't try this at home. 862 * 863 * For the adventuresome, here's an explanation of how it works. 864 * 865 * Multiplication by a fixed constant is easy -- you just do the appropriate 866 * shifts and adds. For example, to multiply by 10, we observe that 867 * 868 * x * 10 = x * (8 + 2) 869 * = (x * 8) + (x * 2) 870 * = (x << 3) + (x << 1). 871 * 872 * In general, you can read the algorithm right off the bits: the number 10 873 * is 1010 in binary; bits 1 and 3 are ones, so x * 10 = (x << 1) + (x << 3). 874 * 875 * Sometimes you can do better. For example, 15 is 1111 binary, so the normal 876 * shift/add computation is x * 15 = (x << 0) + (x << 1) + (x << 2) + (x << 3). 877 * But, it's cheaper if you capitalize on the fact that you have a run of ones: 878 * 1111 = 10000 - 1, hence x * 15 = (x << 4) - (x << 0). [You would never 879 * actually perform the operation << 0, since it's a no-op; I'm just writing 880 * it that way for clarity.] 881 * 882 * The other way you can win is if you get lucky with the prime factorization 883 * of your constant. The number 1,000,000,000, which we have to multiply 884 * by below, is a good example. One billion is 111011100110101100101000000000 885 * in binary. If you apply the bit-grouping trick, it doesn't buy you very 886 * much, because it's only a win for groups of three or more equal bits: 887 * 888 * 111011100110101100101000000000 = 1000000000000000000000000000000 889 * - 000100011001010011011000000000 890 * 891 * Thus, instead of the 13 shift/add pairs (26 operations) implied by the LHS, 892 * we have reduced this to 10 shift/add pairs (20 operations) on the RHS. 893 * This is better, but not great. 894 * 895 * However, we can factor 1,000,000,000 = 2^9 * 5^9 = 2^9 * 125 * 125 * 125, 896 * and multiply by each factor. Multiplication by 125 is particularly easy, 897 * since 128 is nearby: x * 125 = (x << 7) - x - x - x, which is just four 898 * operations. So, to multiply by 1,000,000,000, we perform three multipli- 899 * cations by 125, then << 9, a total of only 3 * 4 + 1 = 13 operations. 900 * This is the algorithm we actually use in both hrt2ts() and ts2hrt(). 901 * 902 * Division is harder; there is no equivalent of the simple shift-add algorithm 903 * we used for multiplication. However, we can convert the division problem 904 * into a multiplication problem by pre-computing the binary representation 905 * of the reciprocal of the divisor. For the case of interest, we have 906 * 907 * 1 / 1,000,000,000 = 1.0001001011100000101111101000001B-30, 908 * 909 * to 32 bits of precision. (The notation B-30 means "* 2^-30", just like 910 * E-18 means "* 10^-18".) 911 * 912 * So, to compute x / 1,000,000,000, we just multiply x by the 32-bit 913 * integer 10001001011100000101111101000001, then normalize (shift) the 914 * result. This constant has several large bits runs, so the multiply 915 * is relatively cheap: 916 * 917 * 10001001011100000101111101000001 = 10001001100000000110000001000001 918 * - 00000000000100000000000100000000 919 * 920 * Again, you can just read the algorithm right off the bits: 921 * 922 * sec = hrt; 923 * sec += (hrt << 6); 924 * sec -= (hrt << 8); 925 * sec += (hrt << 13); 926 * sec += (hrt << 14); 927 * sec -= (hrt << 20); 928 * sec += (hrt << 23); 929 * sec += (hrt << 24); 930 * sec += (hrt << 27); 931 * sec += (hrt << 31); 932 * sec >>= (32 + 30); 933 * 934 * Voila! The only problem is, since hrt is 64 bits, we need to use 96-bit 935 * arithmetic to perform this calculation. That's a waste, because ultimately 936 * we only need the highest 32 bits of the result. 937 * 938 * The first thing we do is to realize that we don't need to use all of hrt 939 * in the calculation. The lowest 30 bits can contribute at most 1 to the 940 * quotient (2^30 / 1,000,000,000 = 1.07...), so we'll deal with them later. 941 * The highest 2 bits have to be zero, or hrt won't fit in a timestruc_t. 942 * Thus, the only bits of hrt that matter for division are bits 30..61. 943 * These 32 bits are just the lower-order word of (hrt >> 30). This brings 944 * us down from 96-bit math to 64-bit math, and our algorithm becomes: 945 * 946 * tmp = (uint32_t) (hrt >> 30); 947 * sec = tmp; 948 * sec += (tmp << 6); 949 * sec -= (tmp << 8); 950 * sec += (tmp << 13); 951 * sec += (tmp << 14); 952 * sec -= (tmp << 20); 953 * sec += (tmp << 23); 954 * sec += (tmp << 24); 955 * sec += (tmp << 27); 956 * sec += (tmp << 31); 957 * sec >>= 32; 958 * 959 * Next, we're going to reduce this 64-bit computation to a 32-bit 960 * computation. We begin by rewriting the above algorithm to use relative 961 * shifts instead of absolute shifts. That is, instead of computing 962 * tmp << 6, tmp << 8, tmp << 13, etc, we'll just shift incrementally: 963 * tmp <<= 6, tmp <<= 2 (== 8 - 6), tmp <<= 5 (== 13 - 8), etc: 964 * 965 * tmp = (uint32_t) (hrt >> 30); 966 * sec = tmp; 967 * tmp <<= 6; sec += tmp; 968 * tmp <<= 2; sec -= tmp; 969 * tmp <<= 5; sec += tmp; 970 * tmp <<= 1; sec += tmp; 971 * tmp <<= 6; sec -= tmp; 972 * tmp <<= 3; sec += tmp; 973 * tmp <<= 1; sec += tmp; 974 * tmp <<= 3; sec += tmp; 975 * tmp <<= 4; sec += tmp; 976 * sec >>= 32; 977 * 978 * Now for the final step. Instead of throwing away the low 32 bits at 979 * the end, we can throw them away as we go, only keeping the high 32 bits 980 * of the product at each step. So, for example, where we now have 981 * 982 * tmp <<= 6; sec = sec + tmp; 983 * we will instead have 984 * tmp <<= 6; sec = (sec + tmp) >> 6; 985 * which is equivalent to 986 * sec = (sec >> 6) + tmp; 987 * 988 * The final shift ("sec >>= 32") goes away. 989 * 990 * All we're really doing here is long multiplication, just like we learned in 991 * grade school, except that at each step, we only look at the leftmost 32 992 * columns. The cumulative error is, at most, the sum of all the bits we 993 * throw away, which is 2^-32 + 2^-31 + ... + 2^-2 + 2^-1 == 1 - 2^-32. 994 * Thus, the final result ("sec") is correct to +/- 1. 995 * 996 * It turns out to be important to keep "sec" positive at each step, because 997 * we don't want to have to explicitly extend the sign bit. Therefore, 998 * starting with the last line of code above, each line that would have read 999 * "sec = (sec >> n) - tmp" must be changed to "sec = tmp - (sec >> n)", and 1000 * the operators (+ or -) in all previous lines must be toggled accordingly. 1001 * Thus, we end up with: 1002 * 1003 * tmp = (uint32_t) (hrt >> 30); 1004 * sec = tmp + (sec >> 6); 1005 * sec = tmp - (tmp >> 2); 1006 * sec = tmp - (sec >> 5); 1007 * sec = tmp + (sec >> 1); 1008 * sec = tmp - (sec >> 6); 1009 * sec = tmp - (sec >> 3); 1010 * sec = tmp + (sec >> 1); 1011 * sec = tmp + (sec >> 3); 1012 * sec = tmp + (sec >> 4); 1013 * 1014 * This yields a value for sec that is accurate to +1/-1, so we have two 1015 * cases to deal with. The mysterious-looking "+ 7" in the code below biases 1016 * the rounding toward zero, so that sec is always less than or equal to 1017 * the correct value. With this modified code, sec is accurate to +0/-2, with 1018 * the -2 case being very rare in practice. With this change, we only have to 1019 * deal with one case (sec too small) in the cleanup code. 1020 * 1021 * The other modification we make is to delete the second line above 1022 * ("sec = tmp + (sec >> 6);"), since it only has an effect when bit 31 is 1023 * set, and the cleanup code can handle that rare case. This reduces the 1024 * *guaranteed* accuracy of sec to +0/-3, but speeds up the common cases. 1025 * 1026 * Finally, we compute nsec = hrt - (sec * 1,000,000,000). nsec will always 1027 * be positive (since sec is never too large), and will at most be equal to 1028 * the error in sec (times 1,000,000,000) plus the low-order 30 bits of hrt. 1029 * Thus, nsec < 3 * 1,000,000,000 + 2^30, which is less than 2^32, so we can 1030 * safely assume that nsec fits in 32 bits. Consequently, when we compute 1031 * sec * 1,000,000,000, we only need the low 32 bits, so we can just do 32-bit 1032 * arithmetic and let the high-order bits fall off the end. 1033 * 1034 * Since nsec < 3 * 1,000,000,000 + 2^30 == 4,073,741,824, the cleanup loop: 1035 * 1036 * while (nsec >= NANOSEC) { 1037 * nsec -= NANOSEC; 1038 * sec++; 1039 * } 1040 * 1041 * is guaranteed to complete in at most 4 iterations. In practice, the loop 1042 * completes in 0 or 1 iteration over 95% of the time. 1043 * 1044 * On an SS2, this implementation of hrt2ts() takes 1.7 usec, versus about 1045 * 35 usec for software division -- about 20 times faster. 1046 */ 1047 void 1048 hrt2ts(hrtime_t hrt, timestruc_t *tsp) 1049 { 1050 uint32_t sec, nsec, tmp; 1051 1052 tmp = (uint32_t)(hrt >> 30); 1053 sec = tmp - (tmp >> 2); 1054 sec = tmp - (sec >> 5); 1055 sec = tmp + (sec >> 1); 1056 sec = tmp - (sec >> 6) + 7; 1057 sec = tmp - (sec >> 3); 1058 sec = tmp + (sec >> 1); 1059 sec = tmp + (sec >> 3); 1060 sec = tmp + (sec >> 4); 1061 tmp = (sec << 7) - sec - sec - sec; 1062 tmp = (tmp << 7) - tmp - tmp - tmp; 1063 tmp = (tmp << 7) - tmp - tmp - tmp; 1064 nsec = (uint32_t)hrt - (tmp << 9); 1065 while (nsec >= NANOSEC) { 1066 nsec -= NANOSEC; 1067 sec++; 1068 } 1069 tsp->tv_sec = (time_t)sec; 1070 tsp->tv_nsec = nsec; 1071 } 1072 1073 /* 1074 * Convert from timestruc_t to hrtime_t. 1075 * 1076 * The code below is equivalent to: 1077 * 1078 * hrt = tsp->tv_sec * NANOSEC + tsp->tv_nsec; 1079 * 1080 * but requires no integer multiply. 1081 */ 1082 hrtime_t 1083 ts2hrt(const timestruc_t *tsp) 1084 { 1085 hrtime_t hrt; 1086 1087 hrt = tsp->tv_sec; 1088 hrt = (hrt << 7) - hrt - hrt - hrt; 1089 hrt = (hrt << 7) - hrt - hrt - hrt; 1090 hrt = (hrt << 7) - hrt - hrt - hrt; 1091 hrt = (hrt << 9) + tsp->tv_nsec; 1092 return (hrt); 1093 } 1094 1095 /* 1096 * For the various 32-bit "compatibility" paths in the system. 1097 */ 1098 void 1099 hrt2ts32(hrtime_t hrt, timestruc32_t *ts32p) 1100 { 1101 timestruc_t ts; 1102 1103 hrt2ts(hrt, &ts); 1104 TIMESPEC_TO_TIMESPEC32(ts32p, &ts); 1105 } 1106 1107 /* 1108 * If this ever becomes performance critical (ha!), we can borrow the 1109 * code from ts2hrt(), above, to multiply tv_sec by 1,000,000 and the 1110 * straightforward (x << 10) - (x << 5) + (x << 3) to multiply tv_usec by 1111 * 1,000. For now, we'll opt for readability (besides, the compiler does 1112 * a passable job of optimizing constant multiplication into shifts and adds). 1113 */ 1114 hrtime_t 1115 tv2hrt(struct timeval *tvp) 1116 { 1117 return ((hrtime_t)tvp->tv_sec * NANOSEC + 1118 (hrtime_t)tvp->tv_usec * (NANOSEC / MICROSEC)); 1119 } 1120 1121 void 1122 hrt2tv(hrtime_t hrt, struct timeval *tvp) 1123 { 1124 uint32_t sec, nsec, tmp; 1125 uint32_t q, r, t; 1126 1127 tmp = (uint32_t)(hrt >> 30); 1128 sec = tmp - (tmp >> 2); 1129 sec = tmp - (sec >> 5); 1130 sec = tmp + (sec >> 1); 1131 sec = tmp - (sec >> 6) + 7; 1132 sec = tmp - (sec >> 3); 1133 sec = tmp + (sec >> 1); 1134 sec = tmp + (sec >> 3); 1135 sec = tmp + (sec >> 4); 1136 tmp = (sec << 7) - sec - sec - sec; 1137 tmp = (tmp << 7) - tmp - tmp - tmp; 1138 tmp = (tmp << 7) - tmp - tmp - tmp; 1139 nsec = (uint32_t)hrt - (tmp << 9); 1140 while (nsec >= NANOSEC) { 1141 nsec -= NANOSEC; 1142 sec++; 1143 } 1144 tvp->tv_sec = (time_t)sec; 1145 /* 1146 * this routine is very similar to hr2ts, but requires microseconds 1147 * instead of nanoseconds, so an interger divide by 1000 routine 1148 * completes the conversion 1149 */ 1150 t = (nsec >> 7) + (nsec >> 8) + (nsec >> 12); 1151 q = (nsec >> 1) + t + (nsec >> 15) + (t >> 11) + (t >> 14); 1152 q = q >> 9; 1153 r = nsec - q*1000; 1154 tvp->tv_usec = q + ((r + 24) >> 10); 1155 1156 } 1157 1158 int 1159 nanosleep(timespec_t *rqtp, timespec_t *rmtp) 1160 { 1161 timespec_t rqtime; 1162 timespec_t rmtime; 1163 timespec_t now; 1164 int ret = 1; 1165 model_t datamodel = get_udatamodel(); 1166 1167 if (datamodel == DATAMODEL_NATIVE) { 1168 if (copyin(rqtp, &rqtime, sizeof (rqtime))) 1169 return (set_errno(EFAULT)); 1170 } else { 1171 timespec32_t rqtime32; 1172 1173 if (copyin(rqtp, &rqtime32, sizeof (rqtime32))) 1174 return (set_errno(EFAULT)); 1175 TIMESPEC32_TO_TIMESPEC(&rqtime, &rqtime32); 1176 } 1177 1178 if (rqtime.tv_sec < 0 || rqtime.tv_nsec < 0 || 1179 rqtime.tv_nsec >= NANOSEC) 1180 return (set_errno(EINVAL)); 1181 1182 if (timerspecisset(&rqtime)) { 1183 gethrestime(&now); 1184 timespecadd(&rqtime, &now); 1185 mutex_enter(&curthread->t_delay_lock); 1186 while ((ret = cv_waituntil_sig(&curthread->t_delay_cv, 1187 &curthread->t_delay_lock, &rqtime)) > 0) 1188 continue; 1189 mutex_exit(&curthread->t_delay_lock); 1190 } 1191 1192 if (rmtp) { 1193 /* 1194 * If cv_waituntil_sig() returned due to a signal, and 1195 * there is time remaining, then set the time remaining. 1196 * Else set time remaining to zero 1197 */ 1198 rmtime.tv_sec = rmtime.tv_nsec = 0; 1199 if (ret == 0) { 1200 timespec_t delta = rqtime; 1201 1202 gethrestime(&now); 1203 timespecsub(&delta, &now); 1204 if (delta.tv_sec > 0 || (delta.tv_sec == 0 && 1205 delta.tv_nsec > 0)) 1206 rmtime = delta; 1207 } 1208 1209 if (datamodel == DATAMODEL_NATIVE) { 1210 if (copyout(&rmtime, rmtp, sizeof (rmtime))) 1211 return (set_errno(EFAULT)); 1212 } else { 1213 timespec32_t rmtime32; 1214 1215 TIMESPEC_TO_TIMESPEC32(&rmtime32, &rmtime); 1216 if (copyout(&rmtime32, rmtp, sizeof (rmtime32))) 1217 return (set_errno(EFAULT)); 1218 } 1219 } 1220 1221 if (ret == 0) 1222 return (set_errno(EINTR)); 1223 return (0); 1224 } 1225 1226 /* 1227 * Routines to convert standard UNIX time (seconds since Jan 1, 1970) 1228 * into year/month/day/hour/minute/second format, and back again. 1229 * Note: these routines require tod_lock held to protect cached state. 1230 */ 1231 static int days_thru_month[64] = { 1232 0, 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366, 0, 0, 1233 0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, 0, 0, 1234 0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, 0, 0, 1235 0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, 0, 0, 1236 }; 1237 1238 todinfo_t saved_tod; 1239 int saved_utc = -60; 1240 1241 todinfo_t 1242 utc_to_tod(time_t utc) 1243 { 1244 long dse, day, month, year; 1245 todinfo_t tod; 1246 1247 ASSERT(MUTEX_HELD(&tod_lock)); 1248 1249 if (utc < 0) /* should never happen */ 1250 utc = 0; 1251 1252 saved_tod.tod_sec += utc - saved_utc; 1253 saved_utc = utc; 1254 if (saved_tod.tod_sec >= 0 && saved_tod.tod_sec < 60) 1255 return (saved_tod); /* only the seconds changed */ 1256 1257 dse = utc / 86400; /* days since epoch */ 1258 1259 tod.tod_sec = utc % 60; 1260 tod.tod_min = (utc % 3600) / 60; 1261 tod.tod_hour = (utc % 86400) / 3600; 1262 tod.tod_dow = (dse + 4) % 7 + 1; /* epoch was a Thursday */ 1263 1264 year = dse / 365 + 72; /* first guess -- always a bit too large */ 1265 do { 1266 year--; 1267 day = dse - 365 * (year - 70) - ((year - 69) >> 2); 1268 } while (day < 0); 1269 1270 month = ((year & 3) << 4) + 1; 1271 while (day >= days_thru_month[month + 1]) 1272 month++; 1273 1274 tod.tod_day = day - days_thru_month[month] + 1; 1275 tod.tod_month = month & 15; 1276 tod.tod_year = year; 1277 1278 saved_tod = tod; 1279 return (tod); 1280 } 1281 1282 time_t 1283 tod_to_utc(todinfo_t tod) 1284 { 1285 time_t utc; 1286 int year = tod.tod_year; 1287 int month = tod.tod_month + ((year & 3) << 4); 1288 #ifdef DEBUG 1289 /* only warn once, not each time called */ 1290 static int year_warn = 1; 1291 static int month_warn = 1; 1292 static int day_warn = 1; 1293 static int hour_warn = 1; 1294 static int min_warn = 1; 1295 static int sec_warn = 1; 1296 int days_diff = days_thru_month[month + 1] - days_thru_month[month]; 1297 #endif 1298 1299 ASSERT(MUTEX_HELD(&tod_lock)); 1300 1301 #ifdef DEBUG 1302 if (year_warn && (year < 70 || year > 8029)) { 1303 cmn_err(CE_WARN, 1304 "The hardware real-time clock appears to have the " 1305 "wrong years value %d -- time needs to be reset\n", 1306 year); 1307 year_warn = 0; 1308 } 1309 1310 if (month_warn && (tod.tod_month < 1 || tod.tod_month > 12)) { 1311 cmn_err(CE_WARN, 1312 "The hardware real-time clock appears to have the " 1313 "wrong months value %d -- time needs to be reset\n", 1314 tod.tod_month); 1315 month_warn = 0; 1316 } 1317 1318 if (day_warn && (tod.tod_day < 1 || tod.tod_day > days_diff)) { 1319 cmn_err(CE_WARN, 1320 "The hardware real-time clock appears to have the " 1321 "wrong days value %d -- time needs to be reset\n", 1322 tod.tod_day); 1323 day_warn = 0; 1324 } 1325 1326 if (hour_warn && (tod.tod_hour < 0 || tod.tod_hour > 23)) { 1327 cmn_err(CE_WARN, 1328 "The hardware real-time clock appears to have the " 1329 "wrong hours value %d -- time needs to be reset\n", 1330 tod.tod_hour); 1331 hour_warn = 0; 1332 } 1333 1334 if (min_warn && (tod.tod_min < 0 || tod.tod_min > 59)) { 1335 cmn_err(CE_WARN, 1336 "The hardware real-time clock appears to have the " 1337 "wrong minutes value %d -- time needs to be reset\n", 1338 tod.tod_min); 1339 min_warn = 0; 1340 } 1341 1342 if (sec_warn && (tod.tod_sec < 0 || tod.tod_sec > 59)) { 1343 cmn_err(CE_WARN, 1344 "The hardware real-time clock appears to have the " 1345 "wrong seconds value %d -- time needs to be reset\n", 1346 tod.tod_sec); 1347 sec_warn = 0; 1348 } 1349 #endif 1350 1351 utc = (year - 70); /* next 3 lines: utc = 365y + y/4 */ 1352 utc += (utc << 3) + (utc << 6); 1353 utc += (utc << 2) + ((year - 69) >> 2); 1354 utc += days_thru_month[month] + tod.tod_day - 1; 1355 utc = (utc << 3) + (utc << 4) + tod.tod_hour; /* 24 * day + hour */ 1356 utc = (utc << 6) - (utc << 2) + tod.tod_min; /* 60 * hour + min */ 1357 utc = (utc << 6) - (utc << 2) + tod.tod_sec; /* 60 * min + sec */ 1358 1359 return (utc); 1360 } 1361