1 /*- 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 39 * $Id: kern_clock.c,v 1.23 1995/12/07 12:46:37 davidg Exp $ 40 */ 41 42 /* Portions of this software are covered by the following: */ 43 /****************************************************************************** 44 * * 45 * Copyright (c) David L. Mills 1993, 1994 * 46 * * 47 * Permission to use, copy, modify, and distribute this software and its * 48 * documentation for any purpose and without fee is hereby granted, provided * 49 * that the above copyright notice appears in all copies and that both the * 50 * copyright notice and this permission notice appear in supporting * 51 * documentation, and that the name University of Delaware not be used in * 52 * advertising or publicity pertaining to distribution of the software * 53 * without specific, written prior permission. The University of Delaware * 54 * makes no representations about the suitability this software for any * 55 * purpose. It is provided "as is" without express or implied warranty. * 56 * * 57 *****************************************************************************/ 58 59 #include <sys/param.h> 60 #include <sys/systm.h> 61 #include <sys/dkstat.h> 62 #include <sys/callout.h> 63 #include <sys/kernel.h> 64 #include <sys/proc.h> 65 #include <sys/resourcevar.h> 66 #include <sys/signalvar.h> 67 #include <sys/timex.h> 68 #include <vm/vm.h> 69 #include <vm/vm_param.h> 70 #include <vm/vm_prot.h> 71 #include <vm/lock.h> 72 #include <vm/pmap.h> 73 #include <vm/vm_map.h> 74 #include <sys/sysctl.h> 75 76 #include <machine/cpu.h> 77 #include <machine/clock.h> 78 79 #ifdef GPROF 80 #include <sys/gmon.h> 81 #endif 82 83 static void initclocks __P((void *dummy)); 84 SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL) 85 86 /* Does anybody else really care about these? (yes, machdep.c) */ 87 static struct callout calltodo; 88 struct callout *callfree, *callout; 89 90 /* Some of these don't belong here, but it's easiest to concentrate them. */ 91 static long cp_time[CPUSTATES]; 92 long dk_seek[DK_NDRIVE]; 93 static long dk_time[DK_NDRIVE]; 94 long dk_wds[DK_NDRIVE]; 95 long dk_wpms[DK_NDRIVE]; 96 long dk_xfer[DK_NDRIVE]; 97 98 int dk_busy; 99 int dk_ndrive = 0; 100 char dk_names[DK_NDRIVE][DK_NAMELEN]; 101 102 long tk_cancc; 103 long tk_nin; 104 long tk_nout; 105 long tk_rawcc; 106 107 /* 108 * Clock handling routines. 109 * 110 * This code is written to operate with two timers that run independently of 111 * each other. The main clock, running hz times per second, is used to keep 112 * track of real time. The second timer handles kernel and user profiling, 113 * and does resource use estimation. If the second timer is programmable, 114 * it is randomized to avoid aliasing between the two clocks. For example, 115 * the randomization prevents an adversary from always giving up the cpu 116 * just before its quantum expires. Otherwise, it would never accumulate 117 * cpu ticks. The mean frequency of the second timer is stathz. 118 * 119 * If no second timer exists, stathz will be zero; in this case we drive 120 * profiling and statistics off the main clock. This WILL NOT be accurate; 121 * do not do it unless absolutely necessary. 122 * 123 * The statistics clock may (or may not) be run at a higher rate while 124 * profiling. This profile clock runs at profhz. We require that profhz 125 * be an integral multiple of stathz. 126 * 127 * If the statistics clock is running fast, it must be divided by the ratio 128 * profhz/stathz for statistics. (For profiling, every tick counts.) 129 */ 130 131 /* 132 * TODO: 133 * allocate more timeout table slots when table overflows. 134 */ 135 136 /* 137 * Bump a timeval by a small number of usec's. 138 */ 139 #define BUMPTIME(t, usec) { \ 140 register volatile struct timeval *tp = (t); \ 141 register long us; \ 142 \ 143 tp->tv_usec = us = tp->tv_usec + (usec); \ 144 if (us >= 1000000) { \ 145 tp->tv_usec = us - 1000000; \ 146 tp->tv_sec++; \ 147 } \ 148 } 149 150 int stathz; 151 int profhz; 152 int profprocs; 153 int ticks; 154 static int psdiv, pscnt; /* prof => stat divider */ 155 static int psratio; /* ratio: prof / stat */ 156 157 volatile struct timeval time; 158 volatile struct timeval mono_time; 159 160 /* 161 * Phase-lock loop (PLL) definitions 162 * 163 * The following variables are read and set by the ntp_adjtime() system 164 * call. 165 * 166 * time_state shows the state of the system clock, with values defined 167 * in the timex.h header file. 168 * 169 * time_status shows the status of the system clock, with bits defined 170 * in the timex.h header file. 171 * 172 * time_offset is used by the PLL to adjust the system time in small 173 * increments. 174 * 175 * time_constant determines the bandwidth or "stiffness" of the PLL. 176 * 177 * time_tolerance determines maximum frequency error or tolerance of the 178 * CPU clock oscillator and is a property of the architecture; however, 179 * in principle it could change as result of the presence of external 180 * discipline signals, for instance. 181 * 182 * time_precision is usually equal to the kernel tick variable; however, 183 * in cases where a precision clock counter or external clock is 184 * available, the resolution can be much less than this and depend on 185 * whether the external clock is working or not. 186 * 187 * time_maxerror is initialized by a ntp_adjtime() call and increased by 188 * the kernel once each second to reflect the maximum error 189 * bound growth. 190 * 191 * time_esterror is set and read by the ntp_adjtime() call, but 192 * otherwise not used by the kernel. 193 */ 194 int time_status = STA_UNSYNC; /* clock status bits */ 195 int time_state = TIME_OK; /* clock state */ 196 long time_offset = 0; /* time offset (us) */ 197 long time_constant = 0; /* pll time constant */ 198 long time_tolerance = MAXFREQ; /* frequency tolerance (scaled ppm) */ 199 long time_precision = 1; /* clock precision (us) */ 200 long time_maxerror = MAXPHASE; /* maximum error (us) */ 201 long time_esterror = MAXPHASE; /* estimated error (us) */ 202 203 /* 204 * The following variables establish the state of the PLL and the 205 * residual time and frequency offset of the local clock. The scale 206 * factors are defined in the timex.h header file. 207 * 208 * time_phase and time_freq are the phase increment and the frequency 209 * increment, respectively, of the kernel time variable at each tick of 210 * the clock. 211 * 212 * time_freq is set via ntp_adjtime() from a value stored in a file when 213 * the synchronization daemon is first started. Its value is retrieved 214 * via ntp_adjtime() and written to the file about once per hour by the 215 * daemon. 216 * 217 * time_adj is the adjustment added to the value of tick at each timer 218 * interrupt and is recomputed at each timer interrupt. 219 * 220 * time_reftime is the second's portion of the system time on the last 221 * call to ntp_adjtime(). It is used to adjust the time_freq variable 222 * and to increase the time_maxerror as the time since last update 223 * increases. 224 */ 225 static long time_phase = 0; /* phase offset (scaled us) */ 226 long time_freq = 0; /* frequency offset (scaled ppm) */ 227 static long time_adj = 0; /* tick adjust (scaled 1 / hz) */ 228 static long time_reftime = 0; /* time at last adjustment (s) */ 229 230 #ifdef PPS_SYNC 231 /* 232 * The following variables are used only if the if the kernel PPS 233 * discipline code is configured (PPS_SYNC). The scale factors are 234 * defined in the timex.h header file. 235 * 236 * pps_time contains the time at each calibration interval, as read by 237 * microtime(). 238 * 239 * pps_offset is the time offset produced by the time median filter 240 * pps_tf[], while pps_jitter is the dispersion measured by this 241 * filter. 242 * 243 * pps_freq is the frequency offset produced by the frequency median 244 * filter pps_ff[], while pps_stabil is the dispersion measured by 245 * this filter. 246 * 247 * pps_usec is latched from a high resolution counter or external clock 248 * at pps_time. Here we want the hardware counter contents only, not the 249 * contents plus the time_tv.usec as usual. 250 * 251 * pps_valid counts the number of seconds since the last PPS update. It 252 * is used as a watchdog timer to disable the PPS discipline should the 253 * PPS signal be lost. 254 * 255 * pps_glitch counts the number of seconds since the beginning of an 256 * offset burst more than tick/2 from current nominal offset. It is used 257 * mainly to suppress error bursts due to priority conflicts between the 258 * PPS interrupt and timer interrupt. 259 * 260 * pps_count counts the seconds of the calibration interval, the 261 * duration of which is pps_shift in powers of two. 262 * 263 * pps_intcnt counts the calibration intervals for use in the interval- 264 * adaptation algorithm. It's just too complicated for words. 265 */ 266 struct timeval pps_time; /* kernel time at last interval */ 267 long pps_offset = 0; /* pps time offset (us) */ 268 long pps_jitter = MAXTIME; /* pps time dispersion (jitter) (us) */ 269 long pps_tf[] = {0, 0, 0}; /* pps time offset median filter (us) */ 270 long pps_freq = 0; /* frequency offset (scaled ppm) */ 271 long pps_stabil = MAXFREQ; /* frequency dispersion (scaled ppm) */ 272 long pps_ff[] = {0, 0, 0}; /* frequency offset median filter */ 273 long pps_usec = 0; /* microsec counter at last interval */ 274 long pps_valid = PPS_VALID; /* pps signal watchdog counter */ 275 int pps_glitch = 0; /* pps signal glitch counter */ 276 int pps_count = 0; /* calibration interval counter (s) */ 277 int pps_shift = PPS_SHIFT; /* interval duration (s) (shift) */ 278 int pps_intcnt = 0; /* intervals at current duration */ 279 280 /* 281 * PPS signal quality monitors 282 * 283 * pps_jitcnt counts the seconds that have been discarded because the 284 * jitter measured by the time median filter exceeds the limit MAXTIME 285 * (100 us). 286 * 287 * pps_calcnt counts the frequency calibration intervals, which are 288 * variable from 4 s to 256 s. 289 * 290 * pps_errcnt counts the calibration intervals which have been discarded 291 * because the wander exceeds the limit MAXFREQ (100 ppm) or where the 292 * calibration interval jitter exceeds two ticks. 293 * 294 * pps_stbcnt counts the calibration intervals that have been discarded 295 * because the frequency wander exceeds the limit MAXFREQ / 4 (25 us). 296 */ 297 long pps_jitcnt = 0; /* jitter limit exceeded */ 298 long pps_calcnt = 0; /* calibration intervals */ 299 long pps_errcnt = 0; /* calibration errors */ 300 long pps_stbcnt = 0; /* stability limit exceeded */ 301 #endif /* PPS_SYNC */ 302 303 /* XXX none of this stuff works under FreeBSD */ 304 #ifdef EXT_CLOCK 305 /* 306 * External clock definitions 307 * 308 * The following definitions and declarations are used only if an 309 * external clock (HIGHBALL or TPRO) is configured on the system. 310 */ 311 #define CLOCK_INTERVAL 30 /* CPU clock update interval (s) */ 312 313 /* 314 * The clock_count variable is set to CLOCK_INTERVAL at each PPS 315 * interrupt and decremented once each second. 316 */ 317 int clock_count = 0; /* CPU clock counter */ 318 319 #ifdef HIGHBALL 320 /* 321 * The clock_offset and clock_cpu variables are used by the HIGHBALL 322 * interface. The clock_offset variable defines the offset between 323 * system time and the HIGBALL counters. The clock_cpu variable contains 324 * the offset between the system clock and the HIGHBALL clock for use in 325 * disciplining the kernel time variable. 326 */ 327 extern struct timeval clock_offset; /* Highball clock offset */ 328 long clock_cpu = 0; /* CPU clock adjust */ 329 #endif /* HIGHBALL */ 330 #endif /* EXT_CLOCK */ 331 332 /* 333 * hardupdate() - local clock update 334 * 335 * This routine is called by ntp_adjtime() to update the local clock 336 * phase and frequency. This is used to implement an adaptive-parameter, 337 * first-order, type-II phase-lock loop. The code computes new time and 338 * frequency offsets each time it is called. The hardclock() routine 339 * amortizes these offsets at each tick interrupt. If the kernel PPS 340 * discipline code is configured (PPS_SYNC), the PPS signal itself 341 * determines the new time offset, instead of the calling argument. 342 * Presumably, calls to ntp_adjtime() occur only when the caller 343 * believes the local clock is valid within some bound (+-128 ms with 344 * NTP). If the caller's time is far different than the PPS time, an 345 * argument will ensue, and it's not clear who will lose. 346 * 347 * For default SHIFT_UPDATE = 12, the offset is limited to +-512 ms, the 348 * maximum interval between updates is 4096 s and the maximum frequency 349 * offset is +-31.25 ms/s. 350 * 351 * Note: splclock() is in effect. 352 */ 353 void 354 hardupdate(offset) 355 long offset; 356 { 357 long ltemp, mtemp; 358 359 if (!(time_status & STA_PLL) && !(time_status & STA_PPSTIME)) 360 return; 361 ltemp = offset; 362 #ifdef PPS_SYNC 363 if (time_status & STA_PPSTIME && time_status & STA_PPSSIGNAL) 364 ltemp = pps_offset; 365 #endif /* PPS_SYNC */ 366 if (ltemp > MAXPHASE) 367 time_offset = MAXPHASE << SHIFT_UPDATE; 368 else if (ltemp < -MAXPHASE) 369 time_offset = -(MAXPHASE << SHIFT_UPDATE); 370 else 371 time_offset = ltemp << SHIFT_UPDATE; 372 mtemp = time.tv_sec - time_reftime; 373 time_reftime = time.tv_sec; 374 if (mtemp > MAXSEC) 375 mtemp = 0; 376 377 /* ugly multiply should be replaced */ 378 if (ltemp < 0) 379 time_freq -= (-ltemp * mtemp) >> (time_constant + 380 time_constant + SHIFT_KF - SHIFT_USEC); 381 else 382 time_freq += (ltemp * mtemp) >> (time_constant + 383 time_constant + SHIFT_KF - SHIFT_USEC); 384 if (time_freq > time_tolerance) 385 time_freq = time_tolerance; 386 else if (time_freq < -time_tolerance) 387 time_freq = -time_tolerance; 388 } 389 390 391 392 /* 393 * Initialize clock frequencies and start both clocks running. 394 */ 395 /* ARGSUSED*/ 396 static void 397 initclocks(dummy) 398 void *dummy; 399 { 400 register int i; 401 402 /* 403 * Set divisors to 1 (normal case) and let the machine-specific 404 * code do its bit. 405 */ 406 psdiv = pscnt = 1; 407 cpu_initclocks(); 408 409 /* 410 * Compute profhz/stathz, and fix profhz if needed. 411 */ 412 i = stathz ? stathz : hz; 413 if (profhz == 0) 414 profhz = i; 415 psratio = profhz / i; 416 } 417 418 /* 419 * The real-time timer, interrupting hz times per second. 420 */ 421 void 422 hardclock(frame) 423 register struct clockframe *frame; 424 { 425 register struct callout *p1; 426 register struct proc *p; 427 register int needsoft; 428 429 /* 430 * Update real-time timeout queue. 431 * At front of queue are some number of events which are ``due''. 432 * The time to these is <= 0 and if negative represents the 433 * number of ticks which have passed since it was supposed to happen. 434 * The rest of the q elements (times > 0) are events yet to happen, 435 * where the time for each is given as a delta from the previous. 436 * Decrementing just the first of these serves to decrement the time 437 * to all events. 438 */ 439 needsoft = 0; 440 for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) { 441 if (--p1->c_time > 0) 442 break; 443 needsoft = 1; 444 if (p1->c_time == 0) 445 break; 446 } 447 448 p = curproc; 449 if (p) { 450 register struct pstats *pstats; 451 452 /* 453 * Run current process's virtual and profile time, as needed. 454 */ 455 pstats = p->p_stats; 456 if (CLKF_USERMODE(frame) && 457 timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 458 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 459 psignal(p, SIGVTALRM); 460 if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && 461 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 462 psignal(p, SIGPROF); 463 } 464 465 /* 466 * If no separate statistics clock is available, run it from here. 467 */ 468 if (stathz == 0) 469 statclock(frame); 470 471 /* 472 * Increment the time-of-day. 473 */ 474 ticks++; 475 { 476 int time_update; 477 struct timeval newtime = time; 478 long ltemp; 479 480 if (timedelta == 0) { 481 time_update = CPU_THISTICKLEN(tick); 482 } else { 483 time_update = CPU_THISTICKLEN(tick) + tickdelta; 484 timedelta -= tickdelta; 485 } 486 BUMPTIME(&mono_time, time_update); 487 488 /* 489 * Compute the phase adjustment. If the low-order bits 490 * (time_phase) of the update overflow, bump the high-order bits 491 * (time_update). 492 */ 493 time_phase += time_adj; 494 if (time_phase <= -FINEUSEC) { 495 ltemp = -time_phase >> SHIFT_SCALE; 496 time_phase += ltemp << SHIFT_SCALE; 497 time_update -= ltemp; 498 } 499 else if (time_phase >= FINEUSEC) { 500 ltemp = time_phase >> SHIFT_SCALE; 501 time_phase -= ltemp << SHIFT_SCALE; 502 time_update += ltemp; 503 } 504 505 newtime.tv_usec += time_update; 506 /* 507 * On rollover of the second the phase adjustment to be used for 508 * the next second is calculated. Also, the maximum error is 509 * increased by the tolerance. If the PPS frequency discipline 510 * code is present, the phase is increased to compensate for the 511 * CPU clock oscillator frequency error. 512 * 513 * With SHIFT_SCALE = 23, the maximum frequency adjustment is 514 * +-256 us per tick, or 25.6 ms/s at a clock frequency of 100 515 * Hz. The time contribution is shifted right a minimum of two 516 * bits, while the frequency contribution is a right shift. 517 * Thus, overflow is prevented if the frequency contribution is 518 * limited to half the maximum or 15.625 ms/s. 519 */ 520 if (newtime.tv_usec >= 1000000) { 521 newtime.tv_usec -= 1000000; 522 newtime.tv_sec++; 523 time_maxerror += time_tolerance >> SHIFT_USEC; 524 if (time_offset < 0) { 525 ltemp = -time_offset >> 526 (SHIFT_KG + time_constant); 527 time_offset += ltemp; 528 time_adj = -ltemp << 529 (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); 530 } else { 531 ltemp = time_offset >> 532 (SHIFT_KG + time_constant); 533 time_offset -= ltemp; 534 time_adj = ltemp << 535 (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); 536 } 537 #ifdef PPS_SYNC 538 /* 539 * Gnaw on the watchdog counter and update the frequency 540 * computed by the pll and the PPS signal. 541 */ 542 pps_valid++; 543 if (pps_valid == PPS_VALID) { 544 pps_jitter = MAXTIME; 545 pps_stabil = MAXFREQ; 546 time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER | 547 STA_PPSWANDER | STA_PPSERROR); 548 } 549 ltemp = time_freq + pps_freq; 550 #else 551 ltemp = time_freq; 552 #endif /* PPS_SYNC */ 553 if (ltemp < 0) 554 time_adj -= -ltemp >> 555 (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE); 556 else 557 time_adj += ltemp >> 558 (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE); 559 560 /* 561 * When the CPU clock oscillator frequency is not a 562 * power of two in Hz, the SHIFT_HZ is only an 563 * approximate scale factor. In the SunOS kernel, this 564 * results in a PLL gain factor of 1/1.28 = 0.78 what it 565 * should be. In the following code the overall gain is 566 * increased by a factor of 1.25, which results in a 567 * residual error less than 3 percent. 568 */ 569 /* Same thing applies for FreeBSD --GAW */ 570 if (hz == 100) { 571 if (time_adj < 0) 572 time_adj -= -time_adj >> 2; 573 else 574 time_adj += time_adj >> 2; 575 } 576 577 /* XXX - this is really bogus, but can't be fixed until 578 xntpd's idea of the system clock is fixed to know how 579 the user wants leap seconds handled; in the mean time, 580 we assume that users of NTP are running without proper 581 leap second support (this is now the default anyway) */ 582 /* 583 * Leap second processing. If in leap-insert state at 584 * the end of the day, the system clock is set back one 585 * second; if in leap-delete state, the system clock is 586 * set ahead one second. The microtime() routine or 587 * external clock driver will insure that reported time 588 * is always monotonic. The ugly divides should be 589 * replaced. 590 */ 591 switch (time_state) { 592 593 case TIME_OK: 594 if (time_status & STA_INS) 595 time_state = TIME_INS; 596 else if (time_status & STA_DEL) 597 time_state = TIME_DEL; 598 break; 599 600 case TIME_INS: 601 if (newtime.tv_sec % 86400 == 0) { 602 newtime.tv_sec--; 603 time_state = TIME_OOP; 604 } 605 break; 606 607 case TIME_DEL: 608 if ((newtime.tv_sec + 1) % 86400 == 0) { 609 newtime.tv_sec++; 610 time_state = TIME_WAIT; 611 } 612 break; 613 614 case TIME_OOP: 615 time_state = TIME_WAIT; 616 break; 617 618 case TIME_WAIT: 619 if (!(time_status & (STA_INS | STA_DEL))) 620 time_state = TIME_OK; 621 } 622 } 623 CPU_CLOCKUPDATE(&time, &newtime); 624 } 625 626 /* 627 * Process callouts at a very low cpu priority, so we don't keep the 628 * relatively high clock interrupt priority any longer than necessary. 629 */ 630 if (needsoft) { 631 if (CLKF_BASEPRI(frame)) { 632 /* 633 * Save the overhead of a software interrupt; 634 * it will happen as soon as we return, so do it now. 635 */ 636 (void)splsoftclock(); 637 softclock(); 638 } else 639 setsoftclock(); 640 } 641 } 642 643 /* 644 * Software (low priority) clock interrupt. 645 * Run periodic events from timeout queue. 646 */ 647 /*ARGSUSED*/ 648 void 649 softclock() 650 { 651 register struct callout *c; 652 register void *arg; 653 register void (*func) __P((void *)); 654 register int s; 655 656 s = splhigh(); 657 while ((c = calltodo.c_next) != NULL && c->c_time <= 0) { 658 func = c->c_func; 659 arg = c->c_arg; 660 calltodo.c_next = c->c_next; 661 c->c_next = callfree; 662 callfree = c; 663 splx(s); 664 (*func)(arg); 665 (void) splhigh(); 666 } 667 splx(s); 668 } 669 670 /* 671 * timeout -- 672 * Execute a function after a specified length of time. 673 * 674 * untimeout -- 675 * Cancel previous timeout function call. 676 * 677 * See AT&T BCI Driver Reference Manual for specification. This 678 * implementation differs from that one in that no identification 679 * value is returned from timeout, rather, the original arguments 680 * to timeout are used to identify entries for untimeout. 681 */ 682 void 683 timeout(ftn, arg, ticks) 684 timeout_t ftn; 685 void *arg; 686 register int ticks; 687 { 688 register struct callout *new, *p, *t; 689 register int s; 690 691 if (ticks <= 0) 692 ticks = 1; 693 694 /* Lock out the clock. */ 695 s = splhigh(); 696 697 /* Fill in the next free callout structure. */ 698 if (callfree == NULL) 699 panic("timeout table full"); 700 new = callfree; 701 callfree = new->c_next; 702 new->c_arg = arg; 703 new->c_func = ftn; 704 705 /* 706 * The time for each event is stored as a difference from the time 707 * of the previous event on the queue. Walk the queue, correcting 708 * the ticks argument for queue entries passed. Correct the ticks 709 * value for the queue entry immediately after the insertion point 710 * as well. Watch out for negative c_time values; these represent 711 * overdue events. 712 */ 713 for (p = &calltodo; 714 (t = p->c_next) != NULL && ticks > t->c_time; p = t) 715 if (t->c_time > 0) 716 ticks -= t->c_time; 717 new->c_time = ticks; 718 if (t != NULL) 719 t->c_time -= ticks; 720 721 /* Insert the new entry into the queue. */ 722 p->c_next = new; 723 new->c_next = t; 724 splx(s); 725 } 726 727 void 728 untimeout(ftn, arg) 729 timeout_t ftn; 730 void *arg; 731 { 732 register struct callout *p, *t; 733 register int s; 734 735 s = splhigh(); 736 for (p = &calltodo; (t = p->c_next) != NULL; p = t) 737 if (t->c_func == ftn && t->c_arg == arg) { 738 /* Increment next entry's tick count. */ 739 if (t->c_next && t->c_time > 0) 740 t->c_next->c_time += t->c_time; 741 742 /* Move entry from callout queue to callfree queue. */ 743 p->c_next = t->c_next; 744 t->c_next = callfree; 745 callfree = t; 746 break; 747 } 748 splx(s); 749 } 750 751 /* 752 * Compute number of hz until specified time. Used to 753 * compute third argument to timeout() from an absolute time. 754 */ 755 int 756 hzto(tv) 757 struct timeval *tv; 758 { 759 register unsigned long ticks; 760 register long sec, usec; 761 int s; 762 763 /* 764 * If the number of usecs in the whole seconds part of the time 765 * difference fits in a long, then the total number of usecs will 766 * fit in an unsigned long. Compute the total and convert it to 767 * ticks, rounding up and adding 1 to allow for the current tick 768 * to expire. Rounding also depends on unsigned long arithmetic 769 * to avoid overflow. 770 * 771 * Otherwise, if the number of ticks in the whole seconds part of 772 * the time difference fits in a long, then convert the parts to 773 * ticks separately and add, using similar rounding methods and 774 * overflow avoidance. This method would work in the previous 775 * case but it is slightly slower and assumes that hz is integral. 776 * 777 * Otherwise, round the time difference down to the maximum 778 * representable value. 779 * 780 * If ints have 32 bits, then the maximum value for any timeout in 781 * 10ms ticks is 248 days. 782 */ 783 s = splclock(); 784 sec = tv->tv_sec - time.tv_sec; 785 usec = tv->tv_usec - time.tv_usec; 786 splx(s); 787 if (usec < 0) { 788 sec--; 789 usec += 1000000; 790 } 791 if (sec < 0) { 792 #ifdef DIAGNOSTIC 793 printf("hzto: negative time difference %ld sec %ld usec\n", 794 sec, usec); 795 #endif 796 ticks = 1; 797 } else if (sec <= LONG_MAX / 1000000) 798 ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1)) 799 / tick + 1; 800 else if (sec <= LONG_MAX / hz) 801 ticks = sec * hz 802 + ((unsigned long)usec + (tick - 1)) / tick + 1; 803 else 804 ticks = LONG_MAX; 805 if (ticks > INT_MAX) 806 ticks = INT_MAX; 807 return (ticks); 808 } 809 810 /* 811 * Start profiling on a process. 812 * 813 * Kernel profiling passes proc0 which never exits and hence 814 * keeps the profile clock running constantly. 815 */ 816 void 817 startprofclock(p) 818 register struct proc *p; 819 { 820 int s; 821 822 if ((p->p_flag & P_PROFIL) == 0) { 823 p->p_flag |= P_PROFIL; 824 if (++profprocs == 1 && stathz != 0) { 825 s = splstatclock(); 826 psdiv = pscnt = psratio; 827 setstatclockrate(profhz); 828 splx(s); 829 } 830 } 831 } 832 833 /* 834 * Stop profiling on a process. 835 */ 836 void 837 stopprofclock(p) 838 register struct proc *p; 839 { 840 int s; 841 842 if (p->p_flag & P_PROFIL) { 843 p->p_flag &= ~P_PROFIL; 844 if (--profprocs == 0 && stathz != 0) { 845 s = splstatclock(); 846 psdiv = pscnt = 1; 847 setstatclockrate(stathz); 848 splx(s); 849 } 850 } 851 } 852 853 /* 854 * Statistics clock. Grab profile sample, and if divider reaches 0, 855 * do process and kernel statistics. 856 */ 857 void 858 statclock(frame) 859 register struct clockframe *frame; 860 { 861 #ifdef GPROF 862 register struct gmonparam *g; 863 #endif 864 register struct proc *p = curproc; 865 register int i; 866 867 if (p) { 868 struct pstats *pstats; 869 struct rusage *ru; 870 struct vmspace *vm; 871 872 /* bump the resource usage of integral space use */ 873 if ((pstats = p->p_stats) && (ru = &pstats->p_ru) && (vm = p->p_vmspace)) { 874 ru->ru_ixrss += vm->vm_tsize * PAGE_SIZE / 1024; 875 ru->ru_idrss += vm->vm_dsize * PAGE_SIZE / 1024; 876 ru->ru_isrss += vm->vm_ssize * PAGE_SIZE / 1024; 877 if ((vm->vm_pmap.pm_stats.resident_count * PAGE_SIZE / 1024) > 878 ru->ru_maxrss) { 879 ru->ru_maxrss = 880 vm->vm_pmap.pm_stats.resident_count * PAGE_SIZE / 1024; 881 } 882 } 883 } 884 885 if (CLKF_USERMODE(frame)) { 886 if (p->p_flag & P_PROFIL) 887 addupc_intr(p, CLKF_PC(frame), 1); 888 if (--pscnt > 0) 889 return; 890 /* 891 * Came from user mode; CPU was in user state. 892 * If this process is being profiled record the tick. 893 */ 894 p->p_uticks++; 895 if (p->p_nice > NZERO) 896 cp_time[CP_NICE]++; 897 else 898 cp_time[CP_USER]++; 899 } else { 900 #ifdef GPROF 901 /* 902 * Kernel statistics are just like addupc_intr, only easier. 903 */ 904 g = &_gmonparam; 905 if (g->state == GMON_PROF_ON) { 906 i = CLKF_PC(frame) - g->lowpc; 907 if (i < g->textsize) { 908 i /= HISTFRACTION * sizeof(*g->kcount); 909 g->kcount[i]++; 910 } 911 } 912 #endif 913 if (--pscnt > 0) 914 return; 915 /* 916 * Came from kernel mode, so we were: 917 * - handling an interrupt, 918 * - doing syscall or trap work on behalf of the current 919 * user process, or 920 * - spinning in the idle loop. 921 * Whichever it is, charge the time as appropriate. 922 * Note that we charge interrupts to the current process, 923 * regardless of whether they are ``for'' that process, 924 * so that we know how much of its real time was spent 925 * in ``non-process'' (i.e., interrupt) work. 926 */ 927 if (CLKF_INTR(frame)) { 928 if (p != NULL) 929 p->p_iticks++; 930 cp_time[CP_INTR]++; 931 } else if (p != NULL) { 932 p->p_sticks++; 933 cp_time[CP_SYS]++; 934 } else 935 cp_time[CP_IDLE]++; 936 } 937 pscnt = psdiv; 938 939 /* 940 * We maintain statistics shown by user-level statistics 941 * programs: the amount of time in each cpu state, and 942 * the amount of time each of DK_NDRIVE ``drives'' is busy. 943 * 944 * XXX should either run linked list of drives, or (better) 945 * grab timestamps in the start & done code. 946 */ 947 for (i = 0; i < DK_NDRIVE; i++) 948 if (dk_busy & (1 << i)) 949 dk_time[i]++; 950 951 /* 952 * We adjust the priority of the current process. The priority of 953 * a process gets worse as it accumulates CPU time. The cpu usage 954 * estimator (p_estcpu) is increased here. The formula for computing 955 * priorities (in kern_synch.c) will compute a different value each 956 * time p_estcpu increases by 4. The cpu usage estimator ramps up 957 * quite quickly when the process is running (linearly), and decays 958 * away exponentially, at a rate which is proportionally slower when 959 * the system is busy. The basic principal is that the system will 960 * 90% forget that the process used a lot of CPU time in 5 * loadav 961 * seconds. This causes the system to favor processes which haven't 962 * run much recently, and to round-robin among other processes. 963 */ 964 if (p != NULL) { 965 p->p_cpticks++; 966 if (++p->p_estcpu == 0) 967 p->p_estcpu--; 968 if ((p->p_estcpu & 3) == 0) { 969 resetpriority(p); 970 if (p->p_priority >= PUSER) 971 p->p_priority = p->p_usrpri; 972 } 973 } 974 } 975 976 /* 977 * Return information about system clocks. 978 */ 979 static int 980 sysctl_kern_clockrate SYSCTL_HANDLER_ARGS 981 { 982 struct clockinfo clkinfo; 983 /* 984 * Construct clockinfo structure. 985 */ 986 clkinfo.hz = hz; 987 clkinfo.tick = tick; 988 clkinfo.profhz = profhz; 989 clkinfo.stathz = stathz ? stathz : hz; 990 return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req)); 991 } 992 993 SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD, 994 0, 0, sysctl_kern_clockrate, "S,clockinfo",""); 995 996 /*#ifdef PPS_SYNC*/ 997 #if 0 998 /* This code is completely bogus; if anybody ever wants to use it, get 999 * the current version from Dave Mills. */ 1000 1001 /* 1002 * hardpps() - discipline CPU clock oscillator to external pps signal 1003 * 1004 * This routine is called at each PPS interrupt in order to discipline 1005 * the CPU clock oscillator to the PPS signal. It integrates successive 1006 * phase differences between the two oscillators and calculates the 1007 * frequency offset. This is used in hardclock() to discipline the CPU 1008 * clock oscillator so that intrinsic frequency error is cancelled out. 1009 * The code requires the caller to capture the time and hardware 1010 * counter value at the designated PPS signal transition. 1011 */ 1012 void 1013 hardpps(tvp, usec) 1014 struct timeval *tvp; /* time at PPS */ 1015 long usec; /* hardware counter at PPS */ 1016 { 1017 long u_usec, v_usec, bigtick; 1018 long cal_sec, cal_usec; 1019 1020 /* 1021 * During the calibration interval adjust the starting time when 1022 * the tick overflows. At the end of the interval compute the 1023 * duration of the interval and the difference of the hardware 1024 * counters at the beginning and end of the interval. This code 1025 * is deliciously complicated by the fact valid differences may 1026 * exceed the value of tick when using long calibration 1027 * intervals and small ticks. Note that the counter can be 1028 * greater than tick if caught at just the wrong instant, but 1029 * the values returned and used here are correct. 1030 */ 1031 bigtick = (long)tick << SHIFT_USEC; 1032 pps_usec -= ntp_pll.ybar; 1033 if (pps_usec >= bigtick) 1034 pps_usec -= bigtick; 1035 if (pps_usec < 0) 1036 pps_usec += bigtick; 1037 pps_time.tv_sec++; 1038 pps_count++; 1039 if (pps_count < (1 << pps_shift)) 1040 return; 1041 pps_count = 0; 1042 ntp_pll.calcnt++; 1043 u_usec = usec << SHIFT_USEC; 1044 v_usec = pps_usec - u_usec; 1045 if (v_usec >= bigtick >> 1) 1046 v_usec -= bigtick; 1047 if (v_usec < -(bigtick >> 1)) 1048 v_usec += bigtick; 1049 if (v_usec < 0) 1050 v_usec = -(-v_usec >> ntp_pll.shift); 1051 else 1052 v_usec = v_usec >> ntp_pll.shift; 1053 pps_usec = u_usec; 1054 cal_sec = tvp->tv_sec; 1055 cal_usec = tvp->tv_usec; 1056 cal_sec -= pps_time.tv_sec; 1057 cal_usec -= pps_time.tv_usec; 1058 if (cal_usec < 0) { 1059 cal_usec += 1000000; 1060 cal_sec--; 1061 } 1062 pps_time = *tvp; 1063 1064 /* 1065 * Check for lost interrupts, noise, excessive jitter and 1066 * excessive frequency error. The number of timer ticks during 1067 * the interval may vary +-1 tick. Add to this a margin of one 1068 * tick for the PPS signal jitter and maximum frequency 1069 * deviation. If the limits are exceeded, the calibration 1070 * interval is reset to the minimum and we start over. 1071 */ 1072 u_usec = (long)tick << 1; 1073 if (!((cal_sec == -1 && cal_usec > (1000000 - u_usec)) 1074 || (cal_sec == 0 && cal_usec < u_usec)) 1075 || v_usec > ntp_pll.tolerance || v_usec < -ntp_pll.tolerance) { 1076 ntp_pll.jitcnt++; 1077 ntp_pll.shift = NTP_PLL.SHIFT; 1078 pps_dispinc = PPS_DISPINC; 1079 ntp_pll.intcnt = 0; 1080 return; 1081 } 1082 1083 /* 1084 * A three-stage median filter is used to help deglitch the pps 1085 * signal. The median sample becomes the offset estimate; the 1086 * difference between the other two samples becomes the 1087 * dispersion estimate. 1088 */ 1089 pps_mf[2] = pps_mf[1]; 1090 pps_mf[1] = pps_mf[0]; 1091 pps_mf[0] = v_usec; 1092 if (pps_mf[0] > pps_mf[1]) { 1093 if (pps_mf[1] > pps_mf[2]) { 1094 u_usec = pps_mf[1]; /* 0 1 2 */ 1095 v_usec = pps_mf[0] - pps_mf[2]; 1096 } else if (pps_mf[2] > pps_mf[0]) { 1097 u_usec = pps_mf[0]; /* 2 0 1 */ 1098 v_usec = pps_mf[2] - pps_mf[1]; 1099 } else { 1100 u_usec = pps_mf[2]; /* 0 2 1 */ 1101 v_usec = pps_mf[0] - pps_mf[1]; 1102 } 1103 } else { 1104 if (pps_mf[1] < pps_mf[2]) { 1105 u_usec = pps_mf[1]; /* 2 1 0 */ 1106 v_usec = pps_mf[2] - pps_mf[0]; 1107 } else if (pps_mf[2] < pps_mf[0]) { 1108 u_usec = pps_mf[0]; /* 1 0 2 */ 1109 v_usec = pps_mf[1] - pps_mf[2]; 1110 } else { 1111 u_usec = pps_mf[2]; /* 1 2 0 */ 1112 v_usec = pps_mf[1] - pps_mf[0]; 1113 } 1114 } 1115 1116 /* 1117 * Here the dispersion average is updated. If it is less than 1118 * the threshold pps_dispmax, the frequency average is updated 1119 * as well, but clamped to the tolerance. 1120 */ 1121 v_usec = (v_usec >> 1) - ntp_pll.disp; 1122 if (v_usec < 0) 1123 ntp_pll.disp -= -v_usec >> PPS_AVG; 1124 else 1125 ntp_pll.disp += v_usec >> PPS_AVG; 1126 if (ntp_pll.disp > pps_dispmax) { 1127 ntp_pll.discnt++; 1128 return; 1129 } 1130 if (u_usec < 0) { 1131 ntp_pll.ybar -= -u_usec >> PPS_AVG; 1132 if (ntp_pll.ybar < -ntp_pll.tolerance) 1133 ntp_pll.ybar = -ntp_pll.tolerance; 1134 u_usec = -u_usec; 1135 } else { 1136 ntp_pll.ybar += u_usec >> PPS_AVG; 1137 if (ntp_pll.ybar > ntp_pll.tolerance) 1138 ntp_pll.ybar = ntp_pll.tolerance; 1139 } 1140 1141 /* 1142 * Here the calibration interval is adjusted. If the maximum 1143 * time difference is greater than tick/4, reduce the interval 1144 * by half. If this is not the case for four consecutive 1145 * intervals, double the interval. 1146 */ 1147 if (u_usec << ntp_pll.shift > bigtick >> 2) { 1148 ntp_pll.intcnt = 0; 1149 if (ntp_pll.shift > NTP_PLL.SHIFT) { 1150 ntp_pll.shift--; 1151 pps_dispinc <<= 1; 1152 } 1153 } else if (ntp_pll.intcnt >= 4) { 1154 ntp_pll.intcnt = 0; 1155 if (ntp_pll.shift < NTP_PLL.SHIFTMAX) { 1156 ntp_pll.shift++; 1157 pps_dispinc >>= 1; 1158 } 1159 } else 1160 ntp_pll.intcnt++; 1161 } 1162 #endif /* PPS_SYNC */ 1163