1 /*- 2 * Copyright (c) 1997, 1998 Poul-Henning Kamp <phk@FreeBSD.org> 3 * Copyright (c) 1982, 1986, 1991, 1993 4 * The Regents of the University of California. All rights reserved. 5 * (c) UNIX System Laboratories, Inc. 6 * All or some portions of this file are derived from material licensed 7 * to the University of California by American Telephone and Telegraph 8 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 9 * the permission of UNIX System Laboratories, Inc. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 40 * $FreeBSD$ 41 */ 42 43 #include "opt_ntp.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/dkstat.h> 48 #include <sys/callout.h> 49 #include <sys/kernel.h> 50 #include <sys/proc.h> 51 #include <sys/malloc.h> 52 #include <sys/resourcevar.h> 53 #include <sys/signalvar.h> 54 #include <sys/timex.h> 55 #include <sys/timepps.h> 56 #include <vm/vm.h> 57 #include <sys/lock.h> 58 #include <vm/pmap.h> 59 #include <vm/vm_map.h> 60 #include <sys/sysctl.h> 61 62 #include <machine/cpu.h> 63 #include <machine/limits.h> 64 65 #ifdef GPROF 66 #include <sys/gmon.h> 67 #endif 68 69 #if defined(SMP) && defined(BETTER_CLOCK) 70 #include <machine/smp.h> 71 #endif 72 73 /* 74 * Number of timecounters used to implement stable storage 75 */ 76 #ifndef NTIMECOUNTER 77 #define NTIMECOUNTER 5 78 #endif 79 80 static MALLOC_DEFINE(M_TIMECOUNTER, "timecounter", 81 "Timecounter stable storage"); 82 83 static void initclocks __P((void *dummy)); 84 SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL) 85 86 static void tco_forward __P((int force)); 87 static void tco_setscales __P((struct timecounter *tc)); 88 static __inline unsigned tco_delta __P((struct timecounter *tc)); 89 90 /* Some of these don't belong here, but it's easiest to concentrate them. */ 91 #if defined(SMP) && defined(BETTER_CLOCK) 92 long cp_time[CPUSTATES]; 93 #else 94 static long cp_time[CPUSTATES]; 95 #endif 96 97 long tk_cancc; 98 long tk_nin; 99 long tk_nout; 100 long tk_rawcc; 101 102 time_t time_second; 103 104 struct timeval boottime; 105 SYSCTL_STRUCT(_kern, KERN_BOOTTIME, boottime, CTLFLAG_RD, 106 &boottime, timeval, "System boottime"); 107 108 /* 109 * Which update policy to use. 110 * 0 - every tick, bad hardware may fail with "calcru negative..." 111 * 1 - more resistent to the above hardware, but less efficient. 112 */ 113 static int tco_method; 114 115 /* 116 * Implement a dummy timecounter which we can use until we get a real one 117 * in the air. This allows the console and other early stuff to use 118 * timeservices. 119 */ 120 121 static unsigned 122 dummy_get_timecount(struct timecounter *tc) 123 { 124 static unsigned now; 125 return (++now); 126 } 127 128 static struct timecounter dummy_timecounter = { 129 dummy_get_timecount, 130 0, 131 ~0u, 132 1000000, 133 "dummy" 134 }; 135 136 struct timecounter *timecounter = &dummy_timecounter; 137 138 /* 139 * Clock handling routines. 140 * 141 * This code is written to operate with two timers that run independently of 142 * each other. 143 * 144 * The main timer, running hz times per second, is used to trigger interval 145 * timers, timeouts and rescheduling as needed. 146 * 147 * The second timer handles kernel and user profiling, 148 * and does resource use estimation. If the second timer is programmable, 149 * it is randomized to avoid aliasing between the two clocks. For example, 150 * the randomization prevents an adversary from always giving up the cpu 151 * just before its quantum expires. Otherwise, it would never accumulate 152 * cpu ticks. The mean frequency of the second timer is stathz. 153 * 154 * If no second timer exists, stathz will be zero; in this case we drive 155 * profiling and statistics off the main clock. This WILL NOT be accurate; 156 * do not do it unless absolutely necessary. 157 * 158 * The statistics clock may (or may not) be run at a higher rate while 159 * profiling. This profile clock runs at profhz. We require that profhz 160 * be an integral multiple of stathz. 161 * 162 * If the statistics clock is running fast, it must be divided by the ratio 163 * profhz/stathz for statistics. (For profiling, every tick counts.) 164 * 165 * Time-of-day is maintained using a "timecounter", which may or may 166 * not be related to the hardware generating the above mentioned 167 * interrupts. 168 */ 169 170 int stathz; 171 int profhz; 172 static int profprocs; 173 int ticks; 174 static int psdiv, pscnt; /* prof => stat divider */ 175 int psratio; /* ratio: prof / stat */ 176 177 /* 178 * Initialize clock frequencies and start both clocks running. 179 */ 180 /* ARGSUSED*/ 181 static void 182 initclocks(dummy) 183 void *dummy; 184 { 185 register int i; 186 187 /* 188 * Set divisors to 1 (normal case) and let the machine-specific 189 * code do its bit. 190 */ 191 psdiv = pscnt = 1; 192 cpu_initclocks(); 193 194 /* 195 * Compute profhz/stathz, and fix profhz if needed. 196 */ 197 i = stathz ? stathz : hz; 198 if (profhz == 0) 199 profhz = i; 200 psratio = profhz / i; 201 } 202 203 /* 204 * The real-time timer, interrupting hz times per second. 205 */ 206 void 207 hardclock(frame) 208 register struct clockframe *frame; 209 { 210 register struct proc *p; 211 212 p = curproc; 213 if (p) { 214 register struct pstats *pstats; 215 216 /* 217 * Run current process's virtual and profile time, as needed. 218 */ 219 pstats = p->p_stats; 220 if (CLKF_USERMODE(frame) && 221 timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 222 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 223 psignal(p, SIGVTALRM); 224 if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) && 225 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 226 psignal(p, SIGPROF); 227 } 228 229 #if defined(SMP) && defined(BETTER_CLOCK) 230 forward_hardclock(pscnt); 231 #endif 232 233 /* 234 * If no separate statistics clock is available, run it from here. 235 */ 236 if (stathz == 0) 237 statclock(frame); 238 239 tco_forward(0); 240 ticks++; 241 242 /* 243 * Process callouts at a very low cpu priority, so we don't keep the 244 * relatively high clock interrupt priority any longer than necessary. 245 */ 246 if (TAILQ_FIRST(&callwheel[ticks & callwheelmask]) != NULL) { 247 if (CLKF_BASEPRI(frame)) { 248 /* 249 * Save the overhead of a software interrupt; 250 * it will happen as soon as we return, so do it now. 251 */ 252 (void)splsoftclock(); 253 softclock(); 254 } else 255 setsoftclock(); 256 } else if (softticks + 1 == ticks) 257 ++softticks; 258 } 259 260 /* 261 * Compute number of ticks in the specified amount of time. 262 */ 263 int 264 tvtohz(tv) 265 struct timeval *tv; 266 { 267 register unsigned long ticks; 268 register long sec, usec; 269 270 /* 271 * If the number of usecs in the whole seconds part of the time 272 * difference fits in a long, then the total number of usecs will 273 * fit in an unsigned long. Compute the total and convert it to 274 * ticks, rounding up and adding 1 to allow for the current tick 275 * to expire. Rounding also depends on unsigned long arithmetic 276 * to avoid overflow. 277 * 278 * Otherwise, if the number of ticks in the whole seconds part of 279 * the time difference fits in a long, then convert the parts to 280 * ticks separately and add, using similar rounding methods and 281 * overflow avoidance. This method would work in the previous 282 * case but it is slightly slower and assumes that hz is integral. 283 * 284 * Otherwise, round the time difference down to the maximum 285 * representable value. 286 * 287 * If ints have 32 bits, then the maximum value for any timeout in 288 * 10ms ticks is 248 days. 289 */ 290 sec = tv->tv_sec; 291 usec = tv->tv_usec; 292 if (usec < 0) { 293 sec--; 294 usec += 1000000; 295 } 296 if (sec < 0) { 297 #ifdef DIAGNOSTIC 298 if (usec > 0) { 299 sec++; 300 usec -= 1000000; 301 } 302 printf("tvotohz: negative time difference %ld sec %ld usec\n", 303 sec, usec); 304 #endif 305 ticks = 1; 306 } else if (sec <= LONG_MAX / 1000000) 307 ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1)) 308 / tick + 1; 309 else if (sec <= LONG_MAX / hz) 310 ticks = sec * hz 311 + ((unsigned long)usec + (tick - 1)) / tick + 1; 312 else 313 ticks = LONG_MAX; 314 if (ticks > INT_MAX) 315 ticks = INT_MAX; 316 return ((int)ticks); 317 } 318 319 /* 320 * Start profiling on a process. 321 * 322 * Kernel profiling passes proc0 which never exits and hence 323 * keeps the profile clock running constantly. 324 */ 325 void 326 startprofclock(p) 327 register struct proc *p; 328 { 329 int s; 330 331 if ((p->p_flag & P_PROFIL) == 0) { 332 p->p_flag |= P_PROFIL; 333 if (++profprocs == 1 && stathz != 0) { 334 s = splstatclock(); 335 psdiv = pscnt = psratio; 336 setstatclockrate(profhz); 337 splx(s); 338 } 339 } 340 } 341 342 /* 343 * Stop profiling on a process. 344 */ 345 void 346 stopprofclock(p) 347 register struct proc *p; 348 { 349 int s; 350 351 if (p->p_flag & P_PROFIL) { 352 p->p_flag &= ~P_PROFIL; 353 if (--profprocs == 0 && stathz != 0) { 354 s = splstatclock(); 355 psdiv = pscnt = 1; 356 setstatclockrate(stathz); 357 splx(s); 358 } 359 } 360 } 361 362 /* 363 * Statistics clock. Grab profile sample, and if divider reaches 0, 364 * do process and kernel statistics. 365 */ 366 void 367 statclock(frame) 368 register struct clockframe *frame; 369 { 370 #ifdef GPROF 371 register struct gmonparam *g; 372 int i; 373 #endif 374 register struct proc *p; 375 struct pstats *pstats; 376 long rss; 377 struct rusage *ru; 378 struct vmspace *vm; 379 380 if (curproc != NULL && CLKF_USERMODE(frame)) { 381 p = curproc; 382 if (p->p_flag & P_PROFIL) 383 addupc_intr(p, CLKF_PC(frame), 1); 384 #if defined(SMP) && defined(BETTER_CLOCK) 385 if (stathz != 0) 386 forward_statclock(pscnt); 387 #endif 388 if (--pscnt > 0) 389 return; 390 /* 391 * Came from user mode; CPU was in user state. 392 * If this process is being profiled record the tick. 393 */ 394 p->p_uticks++; 395 if (p->p_nice > NZERO) 396 cp_time[CP_NICE]++; 397 else 398 cp_time[CP_USER]++; 399 } else { 400 #ifdef GPROF 401 /* 402 * Kernel statistics are just like addupc_intr, only easier. 403 */ 404 g = &_gmonparam; 405 if (g->state == GMON_PROF_ON) { 406 i = CLKF_PC(frame) - g->lowpc; 407 if (i < g->textsize) { 408 i /= HISTFRACTION * sizeof(*g->kcount); 409 g->kcount[i]++; 410 } 411 } 412 #endif 413 #if defined(SMP) && defined(BETTER_CLOCK) 414 if (stathz != 0) 415 forward_statclock(pscnt); 416 #endif 417 if (--pscnt > 0) 418 return; 419 /* 420 * Came from kernel mode, so we were: 421 * - handling an interrupt, 422 * - doing syscall or trap work on behalf of the current 423 * user process, or 424 * - spinning in the idle loop. 425 * Whichever it is, charge the time as appropriate. 426 * Note that we charge interrupts to the current process, 427 * regardless of whether they are ``for'' that process, 428 * so that we know how much of its real time was spent 429 * in ``non-process'' (i.e., interrupt) work. 430 */ 431 p = curproc; 432 if (CLKF_INTR(frame)) { 433 if (p != NULL) 434 p->p_iticks++; 435 cp_time[CP_INTR]++; 436 } else if (p != NULL) { 437 p->p_sticks++; 438 cp_time[CP_SYS]++; 439 } else 440 cp_time[CP_IDLE]++; 441 } 442 pscnt = psdiv; 443 444 /* 445 * We maintain statistics shown by user-level statistics 446 * programs: the amount of time in each cpu state. 447 */ 448 449 /* 450 * We adjust the priority of the current process. The priority of 451 * a process gets worse as it accumulates CPU time. The cpu usage 452 * estimator (p_estcpu) is increased here. The formula for computing 453 * priorities (in kern_synch.c) will compute a different value each 454 * time p_estcpu increases by 4. The cpu usage estimator ramps up 455 * quite quickly when the process is running (linearly), and decays 456 * away exponentially, at a rate which is proportionally slower when 457 * the system is busy. The basic principal is that the system will 458 * 90% forget that the process used a lot of CPU time in 5 * loadav 459 * seconds. This causes the system to favor processes which haven't 460 * run much recently, and to round-robin among other processes. 461 */ 462 if (p != NULL) { 463 p->p_cpticks++; 464 if (++p->p_estcpu == 0) 465 p->p_estcpu--; 466 if ((p->p_estcpu & 3) == 0) { 467 resetpriority(p); 468 if (p->p_priority >= PUSER) 469 p->p_priority = p->p_usrpri; 470 } 471 472 /* Update resource usage integrals and maximums. */ 473 if ((pstats = p->p_stats) != NULL && 474 (ru = &pstats->p_ru) != NULL && 475 (vm = p->p_vmspace) != NULL) { 476 ru->ru_ixrss += pgtok(vm->vm_tsize); 477 ru->ru_idrss += pgtok(vm->vm_dsize); 478 ru->ru_isrss += pgtok(vm->vm_ssize); 479 rss = pgtok(vmspace_resident_count(vm)); 480 if (ru->ru_maxrss < rss) 481 ru->ru_maxrss = rss; 482 } 483 } 484 } 485 486 /* 487 * Return information about system clocks. 488 */ 489 static int 490 sysctl_kern_clockrate SYSCTL_HANDLER_ARGS 491 { 492 struct clockinfo clkinfo; 493 /* 494 * Construct clockinfo structure. 495 */ 496 clkinfo.hz = hz; 497 clkinfo.tick = tick; 498 clkinfo.tickadj = tickadj; 499 clkinfo.profhz = profhz; 500 clkinfo.stathz = stathz ? stathz : hz; 501 return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req)); 502 } 503 504 SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD, 505 0, 0, sysctl_kern_clockrate, "S,clockinfo",""); 506 507 static __inline unsigned 508 tco_delta(struct timecounter *tc) 509 { 510 511 return ((tc->tc_get_timecount(tc) - tc->tc_offset_count) & 512 tc->tc_counter_mask); 513 } 514 515 /* 516 * We have eight functions for looking at the clock, four for 517 * microseconds and four for nanoseconds. For each there is fast 518 * but less precise version "get{nano|micro}[up]time" which will 519 * return a time which is up to 1/HZ previous to the call, whereas 520 * the raw version "{nano|micro}[up]time" will return a timestamp 521 * which is as precise as possible. The "up" variants return the 522 * time relative to system boot, these are well suited for time 523 * interval measurements. 524 */ 525 526 void 527 getmicrotime(struct timeval *tvp) 528 { 529 struct timecounter *tc; 530 531 if (!tco_method) { 532 tc = timecounter; 533 *tvp = tc->tc_microtime; 534 } else { 535 microtime(tvp); 536 } 537 } 538 539 void 540 getnanotime(struct timespec *tsp) 541 { 542 struct timecounter *tc; 543 544 if (!tco_method) { 545 tc = timecounter; 546 *tsp = tc->tc_nanotime; 547 } else { 548 nanotime(tsp); 549 } 550 } 551 552 void 553 microtime(struct timeval *tv) 554 { 555 struct timecounter *tc; 556 557 tc = timecounter; 558 tv->tv_sec = tc->tc_offset_sec; 559 tv->tv_usec = tc->tc_offset_micro; 560 tv->tv_usec += ((u_int64_t)tco_delta(tc) * tc->tc_scale_micro) >> 32; 561 tv->tv_usec += boottime.tv_usec; 562 tv->tv_sec += boottime.tv_sec; 563 while (tv->tv_usec >= 1000000) { 564 tv->tv_usec -= 1000000; 565 tv->tv_sec++; 566 } 567 } 568 569 void 570 nanotime(struct timespec *ts) 571 { 572 unsigned count; 573 u_int64_t delta; 574 struct timecounter *tc; 575 576 tc = timecounter; 577 ts->tv_sec = tc->tc_offset_sec; 578 count = tco_delta(tc); 579 delta = tc->tc_offset_nano; 580 delta += ((u_int64_t)count * tc->tc_scale_nano_f); 581 delta >>= 32; 582 delta += ((u_int64_t)count * tc->tc_scale_nano_i); 583 delta += boottime.tv_usec * 1000; 584 ts->tv_sec += boottime.tv_sec; 585 while (delta >= 1000000000) { 586 delta -= 1000000000; 587 ts->tv_sec++; 588 } 589 ts->tv_nsec = delta; 590 } 591 592 void 593 getmicrouptime(struct timeval *tvp) 594 { 595 struct timecounter *tc; 596 597 if (!tco_method) { 598 tc = timecounter; 599 tvp->tv_sec = tc->tc_offset_sec; 600 tvp->tv_usec = tc->tc_offset_micro; 601 } else { 602 microuptime(tvp); 603 } 604 } 605 606 void 607 getnanouptime(struct timespec *tsp) 608 { 609 struct timecounter *tc; 610 611 if (!tco_method) { 612 tc = timecounter; 613 tsp->tv_sec = tc->tc_offset_sec; 614 tsp->tv_nsec = tc->tc_offset_nano >> 32; 615 } else { 616 nanouptime(tsp); 617 } 618 } 619 620 void 621 microuptime(struct timeval *tv) 622 { 623 struct timecounter *tc; 624 625 tc = timecounter; 626 tv->tv_sec = tc->tc_offset_sec; 627 tv->tv_usec = tc->tc_offset_micro; 628 tv->tv_usec += ((u_int64_t)tco_delta(tc) * tc->tc_scale_micro) >> 32; 629 if (tv->tv_usec >= 1000000) { 630 tv->tv_usec -= 1000000; 631 tv->tv_sec++; 632 } 633 } 634 635 void 636 nanouptime(struct timespec *ts) 637 { 638 unsigned count; 639 u_int64_t delta; 640 struct timecounter *tc; 641 642 tc = timecounter; 643 ts->tv_sec = tc->tc_offset_sec; 644 count = tco_delta(tc); 645 delta = tc->tc_offset_nano; 646 delta += ((u_int64_t)count * tc->tc_scale_nano_f); 647 delta >>= 32; 648 delta += ((u_int64_t)count * tc->tc_scale_nano_i); 649 if (delta >= 1000000000) { 650 delta -= 1000000000; 651 ts->tv_sec++; 652 } 653 ts->tv_nsec = delta; 654 } 655 656 static void 657 tco_setscales(struct timecounter *tc) 658 { 659 u_int64_t scale; 660 661 scale = 1000000000LL << 32; 662 scale += tc->tc_adjustment; 663 scale /= tc->tc_tweak->tc_frequency; 664 tc->tc_scale_micro = scale / 1000; 665 tc->tc_scale_nano_f = scale & 0xffffffff; 666 tc->tc_scale_nano_i = scale >> 32; 667 } 668 669 void 670 update_timecounter(struct timecounter *tc) 671 { 672 tco_setscales(tc); 673 } 674 675 void 676 init_timecounter(struct timecounter *tc) 677 { 678 struct timespec ts1; 679 struct timecounter *t1, *t2, *t3; 680 int i; 681 682 tc->tc_adjustment = 0; 683 tc->tc_tweak = tc; 684 tco_setscales(tc); 685 tc->tc_offset_count = tc->tc_get_timecount(tc); 686 if (timecounter == &dummy_timecounter) 687 tc->tc_avail = tc; 688 else { 689 tc->tc_avail = timecounter->tc_tweak->tc_avail; 690 timecounter->tc_tweak->tc_avail = tc; 691 } 692 MALLOC(t1, struct timecounter *, sizeof *t1, M_TIMECOUNTER, M_WAITOK); 693 tc->tc_other = t1; 694 *t1 = *tc; 695 t2 = t1; 696 for (i = 1; i < NTIMECOUNTER; i++) { 697 MALLOC(t3, struct timecounter *, sizeof *t3, 698 M_TIMECOUNTER, M_WAITOK); 699 *t3 = *tc; 700 t3->tc_other = t2; 701 t2 = t3; 702 } 703 t1->tc_other = t3; 704 tc = t1; 705 706 printf("Timecounter \"%s\" frequency %lu Hz\n", 707 tc->tc_name, (u_long)tc->tc_frequency); 708 709 /* XXX: For now always start using the counter. */ 710 tc->tc_offset_count = tc->tc_get_timecount(tc); 711 nanouptime(&ts1); 712 tc->tc_offset_nano = (u_int64_t)ts1.tv_nsec << 32; 713 tc->tc_offset_micro = ts1.tv_nsec / 1000; 714 tc->tc_offset_sec = ts1.tv_sec; 715 timecounter = tc; 716 } 717 718 void 719 set_timecounter(struct timespec *ts) 720 { 721 struct timespec ts2; 722 723 nanouptime(&ts2); 724 boottime.tv_sec = ts->tv_sec - ts2.tv_sec; 725 boottime.tv_usec = (ts->tv_nsec - ts2.tv_nsec) / 1000; 726 if (boottime.tv_usec < 0) { 727 boottime.tv_usec += 1000000; 728 boottime.tv_sec--; 729 } 730 /* fiddle all the little crinkly bits around the fiords... */ 731 tco_forward(1); 732 } 733 734 static void 735 switch_timecounter(struct timecounter *newtc) 736 { 737 int s; 738 struct timecounter *tc; 739 struct timespec ts; 740 741 s = splclock(); 742 tc = timecounter; 743 if (newtc->tc_tweak == tc->tc_tweak) { 744 splx(s); 745 return; 746 } 747 newtc = newtc->tc_tweak->tc_other; 748 nanouptime(&ts); 749 newtc->tc_offset_sec = ts.tv_sec; 750 newtc->tc_offset_nano = (u_int64_t)ts.tv_nsec << 32; 751 newtc->tc_offset_micro = ts.tv_nsec / 1000; 752 newtc->tc_offset_count = newtc->tc_get_timecount(newtc); 753 tco_setscales(newtc); 754 timecounter = newtc; 755 splx(s); 756 } 757 758 static struct timecounter * 759 sync_other_counter(void) 760 { 761 struct timecounter *tc, *tcn, *tco; 762 unsigned delta; 763 764 tco = timecounter; 765 tc = tco->tc_other; 766 tcn = tc->tc_other; 767 *tc = *tco; 768 tc->tc_other = tcn; 769 delta = tco_delta(tc); 770 tc->tc_offset_count += delta; 771 tc->tc_offset_count &= tc->tc_counter_mask; 772 tc->tc_offset_nano += (u_int64_t)delta * tc->tc_scale_nano_f; 773 tc->tc_offset_nano += (u_int64_t)delta * tc->tc_scale_nano_i << 32; 774 return (tc); 775 } 776 777 static void 778 tco_forward(int force) 779 { 780 struct timecounter *tc, *tco; 781 782 tco = timecounter; 783 tc = sync_other_counter(); 784 /* 785 * We may be inducing a tiny error here, the tc_poll_pps() may 786 * process a latched count which happens after the tco_delta() 787 * in sync_other_counter(), which would extend the previous 788 * counters parameters into the domain of this new one. 789 * Since the timewindow is very small for this, the error is 790 * going to be only a few weenieseconds (as Dave Mills would 791 * say), so lets just not talk more about it, OK ? 792 */ 793 if (tco->tc_poll_pps) 794 tco->tc_poll_pps(tco); 795 if (timedelta != 0) { 796 tc->tc_offset_nano += (u_int64_t)(tickdelta * 1000) << 32; 797 timedelta -= tickdelta; 798 force++; 799 } 800 801 while (tc->tc_offset_nano >= 1000000000ULL << 32) { 802 tc->tc_offset_nano -= 1000000000ULL << 32; 803 tc->tc_offset_sec++; 804 ntp_update_second(tc); /* XXX only needed if xntpd runs */ 805 tco_setscales(tc); 806 force++; 807 } 808 809 if (tco_method && !force) 810 return; 811 812 tc->tc_offset_micro = (tc->tc_offset_nano / 1000) >> 32; 813 814 /* Figure out the wall-clock time */ 815 tc->tc_nanotime.tv_sec = tc->tc_offset_sec + boottime.tv_sec; 816 tc->tc_nanotime.tv_nsec = 817 (tc->tc_offset_nano >> 32) + boottime.tv_usec * 1000; 818 tc->tc_microtime.tv_usec = tc->tc_offset_micro + boottime.tv_usec; 819 if (tc->tc_nanotime.tv_nsec >= 1000000000) { 820 tc->tc_nanotime.tv_nsec -= 1000000000; 821 tc->tc_microtime.tv_usec -= 1000000; 822 tc->tc_nanotime.tv_sec++; 823 } 824 time_second = tc->tc_microtime.tv_sec = tc->tc_nanotime.tv_sec; 825 826 timecounter = tc; 827 } 828 829 SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, ""); 830 831 SYSCTL_INT(_kern_timecounter, OID_AUTO, method, CTLFLAG_RW, &tco_method, 0, 832 "This variable determines the method used for updating timecounters. " 833 "If the default algorithm (0) fails with \"calcru negative...\" messages " 834 "try the alternate algorithm (1) which handles bad hardware better." 835 836 ); 837 838 static int 839 sysctl_kern_timecounter_hardware SYSCTL_HANDLER_ARGS 840 { 841 char newname[32]; 842 struct timecounter *newtc, *tc; 843 int error; 844 845 tc = timecounter->tc_tweak; 846 strncpy(newname, tc->tc_name, sizeof(newname)); 847 error = sysctl_handle_string(oidp, &newname[0], sizeof(newname), req); 848 if (error == 0 && req->newptr != NULL && 849 strcmp(newname, tc->tc_name) != 0) { 850 for (newtc = tc->tc_avail; newtc != tc; 851 newtc = newtc->tc_avail) { 852 if (strcmp(newname, newtc->tc_name) == 0) { 853 /* Warm up new timecounter. */ 854 (void)newtc->tc_get_timecount(newtc); 855 856 switch_timecounter(newtc); 857 return (0); 858 } 859 } 860 return (EINVAL); 861 } 862 return (error); 863 } 864 865 SYSCTL_PROC(_kern_timecounter, OID_AUTO, hardware, CTLTYPE_STRING | CTLFLAG_RW, 866 0, 0, sysctl_kern_timecounter_hardware, "A", ""); 867 868 869 int 870 pps_ioctl(u_long cmd, caddr_t data, struct pps_state *pps) 871 { 872 pps_params_t *app; 873 struct pps_fetch_args *fapi; 874 #ifdef PPS_SYNC 875 struct pps_kcbind_args *kapi; 876 #endif 877 878 switch (cmd) { 879 case PPS_IOC_CREATE: 880 return (0); 881 case PPS_IOC_DESTROY: 882 return (0); 883 case PPS_IOC_SETPARAMS: 884 app = (pps_params_t *)data; 885 if (app->mode & ~pps->ppscap) 886 return (EINVAL); 887 pps->ppsparam = *app; 888 return (0); 889 case PPS_IOC_GETPARAMS: 890 app = (pps_params_t *)data; 891 *app = pps->ppsparam; 892 app->api_version = PPS_API_VERS_1; 893 return (0); 894 case PPS_IOC_GETCAP: 895 *(int*)data = pps->ppscap; 896 return (0); 897 case PPS_IOC_FETCH: 898 fapi = (struct pps_fetch_args *)data; 899 if (fapi->tsformat && fapi->tsformat != PPS_TSFMT_TSPEC) 900 return (EINVAL); 901 if (fapi->timeout.tv_sec || fapi->timeout.tv_nsec) 902 return (EOPNOTSUPP); 903 pps->ppsinfo.current_mode = pps->ppsparam.mode; 904 fapi->pps_info_buf = pps->ppsinfo; 905 return (0); 906 case PPS_IOC_KCBIND: 907 #ifdef PPS_SYNC 908 kapi = (struct pps_kcbind_args *)data; 909 /* XXX Only root should be able to do this */ 910 if (kapi->tsformat && kapi->tsformat != PPS_TSFMT_TSPEC) 911 return (EINVAL); 912 if (kapi->kernel_consumer != PPS_KC_HARDPPS) 913 return (EINVAL); 914 if (kapi->edge & ~pps->ppscap) 915 return (EINVAL); 916 pps->kcmode = kapi->edge; 917 return (0); 918 #else 919 return (EOPNOTSUPP); 920 #endif 921 default: 922 return (ENOTTY); 923 } 924 } 925 926 void 927 pps_init(struct pps_state *pps) 928 { 929 pps->ppscap |= PPS_TSFMT_TSPEC; 930 if (pps->ppscap & PPS_CAPTUREASSERT) 931 pps->ppscap |= PPS_OFFSETASSERT; 932 if (pps->ppscap & PPS_CAPTURECLEAR) 933 pps->ppscap |= PPS_OFFSETCLEAR; 934 } 935 936 void 937 pps_event(struct pps_state *pps, struct timecounter *tc, unsigned count, int event) 938 { 939 struct timespec ts, *tsp, *osp; 940 u_int64_t delta; 941 unsigned tcount, *pcount; 942 int foff, fhard; 943 pps_seq_t *pseq; 944 945 /* Things would be easier with arrays... */ 946 if (event == PPS_CAPTUREASSERT) { 947 tsp = &pps->ppsinfo.assert_timestamp; 948 osp = &pps->ppsparam.assert_offset; 949 foff = pps->ppsparam.mode & PPS_OFFSETASSERT; 950 fhard = pps->kcmode & PPS_CAPTUREASSERT; 951 pcount = &pps->ppscount[0]; 952 pseq = &pps->ppsinfo.assert_sequence; 953 } else { 954 tsp = &pps->ppsinfo.clear_timestamp; 955 osp = &pps->ppsparam.clear_offset; 956 foff = pps->ppsparam.mode & PPS_OFFSETCLEAR; 957 fhard = pps->kcmode & PPS_CAPTURECLEAR; 958 pcount = &pps->ppscount[1]; 959 pseq = &pps->ppsinfo.clear_sequence; 960 } 961 962 /* The timecounter changed: bail */ 963 if (!pps->ppstc || 964 pps->ppstc->tc_name != tc->tc_name || 965 tc->tc_name != timecounter->tc_name) { 966 pps->ppstc = tc; 967 *pcount = count; 968 return; 969 } 970 971 /* Nothing really happened */ 972 if (*pcount == count) 973 return; 974 975 *pcount = count; 976 977 /* Convert the count to timespec */ 978 ts.tv_sec = tc->tc_offset_sec; 979 tcount = count - tc->tc_offset_count; 980 tcount &= tc->tc_counter_mask; 981 delta = tc->tc_offset_nano; 982 delta += ((u_int64_t)tcount * tc->tc_scale_nano_f); 983 delta >>= 32; 984 delta += ((u_int64_t)tcount * tc->tc_scale_nano_i); 985 delta += boottime.tv_usec * 1000; 986 ts.tv_sec += boottime.tv_sec; 987 while (delta >= 1000000000) { 988 delta -= 1000000000; 989 ts.tv_sec++; 990 } 991 ts.tv_nsec = delta; 992 993 (*pseq)++; 994 *tsp = ts; 995 996 if (foff) { 997 timespecadd(tsp, osp); 998 if (tsp->tv_nsec < 0) { 999 tsp->tv_nsec += 1000000000; 1000 tsp->tv_sec -= 1; 1001 } 1002 } 1003 #ifdef PPS_SYNC 1004 if (fhard) { 1005 /* magic, at its best... */ 1006 tcount = count - pps->ppscount[2]; 1007 pps->ppscount[2] = count; 1008 tcount &= tc->tc_counter_mask; 1009 delta = ((u_int64_t)tcount * tc->tc_tweak->tc_scale_nano_f); 1010 delta >>= 32; 1011 delta += ((u_int64_t)tcount * tc->tc_tweak->tc_scale_nano_i); 1012 hardpps(tsp, delta); 1013 } 1014 #endif 1015 } 1016