1df8bae1dSRodney W. Grimes /*- 2df8bae1dSRodney W. Grimes * Copyright (c) 1982, 1986, 1991, 1993 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 4df8bae1dSRodney W. Grimes * (c) UNIX System Laboratories, Inc. 5df8bae1dSRodney W. Grimes * All or some portions of this file are derived from material licensed 6df8bae1dSRodney W. Grimes * to the University of California by American Telephone and Telegraph 7df8bae1dSRodney W. Grimes * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8df8bae1dSRodney W. Grimes * the permission of UNIX System Laboratories, Inc. 9df8bae1dSRodney W. Grimes * 10df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 11df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 12df8bae1dSRodney W. Grimes * are met: 13df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 14df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 15df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 16df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 17df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 18df8bae1dSRodney W. Grimes * 3. All advertising materials mentioning features or use of this software 19df8bae1dSRodney W. Grimes * must display the following acknowledgement: 20df8bae1dSRodney W. Grimes * This product includes software developed by the University of 21df8bae1dSRodney W. Grimes * California, Berkeley and its contributors. 22df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 23df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 24df8bae1dSRodney W. Grimes * without specific prior written permission. 25df8bae1dSRodney W. Grimes * 26df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36df8bae1dSRodney W. Grimes * SUCH DAMAGE. 37df8bae1dSRodney W. Grimes * 38df8bae1dSRodney W. Grimes * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 39eae8fc2cSSteve Passe * $Id: kern_clock.c,v 1.45 1997/11/24 15:15:27 bde Exp $ 40df8bae1dSRodney W. Grimes */ 41df8bae1dSRodney W. Grimes 423f31c649SGarrett Wollman /* Portions of this software are covered by the following: */ 433f31c649SGarrett Wollman /****************************************************************************** 443f31c649SGarrett Wollman * * 453f31c649SGarrett Wollman * Copyright (c) David L. Mills 1993, 1994 * 463f31c649SGarrett Wollman * * 473f31c649SGarrett Wollman * Permission to use, copy, modify, and distribute this software and its * 483f31c649SGarrett Wollman * documentation for any purpose and without fee is hereby granted, provided * 493f31c649SGarrett Wollman * that the above copyright notice appears in all copies and that both the * 503f31c649SGarrett Wollman * copyright notice and this permission notice appear in supporting * 513f31c649SGarrett Wollman * documentation, and that the name University of Delaware not be used in * 523f31c649SGarrett Wollman * advertising or publicity pertaining to distribution of the software * 533f31c649SGarrett Wollman * without specific, written prior permission. The University of Delaware * 543f31c649SGarrett Wollman * makes no representations about the suitability this software for any * 553f31c649SGarrett Wollman * purpose. It is provided "as is" without express or implied warranty. * 563f31c649SGarrett Wollman * * 573f31c649SGarrett Wollman *****************************************************************************/ 583f31c649SGarrett Wollman 59df8bae1dSRodney W. Grimes #include <sys/param.h> 60df8bae1dSRodney W. Grimes #include <sys/systm.h> 61df8bae1dSRodney W. Grimes #include <sys/dkstat.h> 62df8bae1dSRodney W. Grimes #include <sys/callout.h> 63df8bae1dSRodney W. Grimes #include <sys/kernel.h> 64df8bae1dSRodney W. Grimes #include <sys/proc.h> 65df8bae1dSRodney W. Grimes #include <sys/resourcevar.h> 66797f2d22SPoul-Henning Kamp #include <sys/signalvar.h> 673f31c649SGarrett Wollman #include <sys/timex.h> 688a129caeSDavid Greenman #include <vm/vm.h> 69996c772fSJohn Dyson #include <sys/lock.h> 70efeaf95aSDavid Greenman #include <vm/pmap.h> 71efeaf95aSDavid Greenman #include <vm/vm_map.h> 72797f2d22SPoul-Henning Kamp #include <sys/sysctl.h> 73df8bae1dSRodney W. Grimes 74df8bae1dSRodney W. Grimes #include <machine/cpu.h> 75835bd1ceSBruce Evans #define CLOCK_HAIR /* XXX */ 763f31c649SGarrett Wollman #include <machine/clock.h> 77b1037dcdSBruce Evans #include <machine/limits.h> 78df8bae1dSRodney W. Grimes 79df8bae1dSRodney W. Grimes #ifdef GPROF 80df8bae1dSRodney W. Grimes #include <sys/gmon.h> 81df8bae1dSRodney W. Grimes #endif 82df8bae1dSRodney W. Grimes 83eae8fc2cSSteve Passe #if defined(SMP) && defined(BETTER_CLOCK) 84eae8fc2cSSteve Passe #include <machine/smp.h> 85eae8fc2cSSteve Passe #endif 86eae8fc2cSSteve Passe 87d841aaa7SBruce Evans static void initclocks __P((void *dummy)); 882b14f991SJulian Elischer SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL) 892b14f991SJulian Elischer 90cc3d5226SBruce Evans /* Exported to machdep.c. */ 91ab36c067SJustin T. Gibbs struct callout *callout; 92ab36c067SJustin T. Gibbs struct callout_list callfree; 93ab36c067SJustin T. Gibbs int callwheelsize, callwheelbits, callwheelmask; 94ab36c067SJustin T. Gibbs struct callout_tailq *callwheel; 95f23b4c91SGarrett Wollman 96cc3d5226SBruce Evans 97f23b4c91SGarrett Wollman /* Some of these don't belong here, but it's easiest to concentrate them. */ 98eae8fc2cSSteve Passe #if defined(SMP) && defined(BETTER_CLOCK) 99eae8fc2cSSteve Passe long cp_time[CPUSTATES]; 100eae8fc2cSSteve Passe #else 10127a0b398SPoul-Henning Kamp static long cp_time[CPUSTATES]; 102eae8fc2cSSteve Passe #endif 103f23b4c91SGarrett Wollman long dk_seek[DK_NDRIVE]; 104bea0f0beSBruce Evans static long dk_time[DK_NDRIVE]; /* time busy (in statclock ticks) */ 105f23b4c91SGarrett Wollman long dk_wds[DK_NDRIVE]; 106f23b4c91SGarrett Wollman long dk_wpms[DK_NDRIVE]; 107f23b4c91SGarrett Wollman long dk_xfer[DK_NDRIVE]; 108f23b4c91SGarrett Wollman 109f23b4c91SGarrett Wollman int dk_busy; 1108478cabaSGarrett Wollman int dk_ndrive = 0; 1118478cabaSGarrett Wollman char dk_names[DK_NDRIVE][DK_NAMELEN]; 112f23b4c91SGarrett Wollman 113f23b4c91SGarrett Wollman long tk_cancc; 114f23b4c91SGarrett Wollman long tk_nin; 115f23b4c91SGarrett Wollman long tk_nout; 116f23b4c91SGarrett Wollman long tk_rawcc; 117f23b4c91SGarrett Wollman 118df8bae1dSRodney W. Grimes /* 119df8bae1dSRodney W. Grimes * Clock handling routines. 120df8bae1dSRodney W. Grimes * 121df8bae1dSRodney W. Grimes * This code is written to operate with two timers that run independently of 122df8bae1dSRodney W. Grimes * each other. The main clock, running hz times per second, is used to keep 123df8bae1dSRodney W. Grimes * track of real time. The second timer handles kernel and user profiling, 124df8bae1dSRodney W. Grimes * and does resource use estimation. If the second timer is programmable, 125df8bae1dSRodney W. Grimes * it is randomized to avoid aliasing between the two clocks. For example, 126df8bae1dSRodney W. Grimes * the randomization prevents an adversary from always giving up the cpu 127df8bae1dSRodney W. Grimes * just before its quantum expires. Otherwise, it would never accumulate 128df8bae1dSRodney W. Grimes * cpu ticks. The mean frequency of the second timer is stathz. 129df8bae1dSRodney W. Grimes * 130df8bae1dSRodney W. Grimes * If no second timer exists, stathz will be zero; in this case we drive 131df8bae1dSRodney W. Grimes * profiling and statistics off the main clock. This WILL NOT be accurate; 132df8bae1dSRodney W. Grimes * do not do it unless absolutely necessary. 133df8bae1dSRodney W. Grimes * 134df8bae1dSRodney W. Grimes * The statistics clock may (or may not) be run at a higher rate while 135df8bae1dSRodney W. Grimes * profiling. This profile clock runs at profhz. We require that profhz 136df8bae1dSRodney W. Grimes * be an integral multiple of stathz. 137df8bae1dSRodney W. Grimes * 138df8bae1dSRodney W. Grimes * If the statistics clock is running fast, it must be divided by the ratio 139df8bae1dSRodney W. Grimes * profhz/stathz for statistics. (For profiling, every tick counts.) 140df8bae1dSRodney W. Grimes */ 141df8bae1dSRodney W. Grimes 142df8bae1dSRodney W. Grimes /* 143df8bae1dSRodney W. Grimes * TODO: 144df8bae1dSRodney W. Grimes * allocate more timeout table slots when table overflows. 145df8bae1dSRodney W. Grimes */ 146df8bae1dSRodney W. Grimes 147df8bae1dSRodney W. Grimes /* 148df8bae1dSRodney W. Grimes * Bump a timeval by a small number of usec's. 149df8bae1dSRodney W. Grimes */ 150df8bae1dSRodney W. Grimes #define BUMPTIME(t, usec) { \ 151df8bae1dSRodney W. Grimes register volatile struct timeval *tp = (t); \ 152df8bae1dSRodney W. Grimes register long us; \ 153df8bae1dSRodney W. Grimes \ 154df8bae1dSRodney W. Grimes tp->tv_usec = us = tp->tv_usec + (usec); \ 155df8bae1dSRodney W. Grimes if (us >= 1000000) { \ 156df8bae1dSRodney W. Grimes tp->tv_usec = us - 1000000; \ 157df8bae1dSRodney W. Grimes tp->tv_sec++; \ 158df8bae1dSRodney W. Grimes } \ 159df8bae1dSRodney W. Grimes } 160df8bae1dSRodney W. Grimes 161df8bae1dSRodney W. Grimes int stathz; 162df8bae1dSRodney W. Grimes int profhz; 163cc3d5226SBruce Evans static int profprocs; 164df8bae1dSRodney W. Grimes int ticks; 165ab36c067SJustin T. Gibbs static int softticks; /* Like ticks, but for softclock(). */ 166ab36c067SJustin T. Gibbs static struct callout *nextsoftcheck; /* Next callout to be checked. */ 167df8bae1dSRodney W. Grimes static int psdiv, pscnt; /* prof => stat divider */ 168cc3d5226SBruce Evans int psratio; /* ratio: prof / stat */ 169df8bae1dSRodney W. Grimes 170df8bae1dSRodney W. Grimes volatile struct timeval time; 171df8bae1dSRodney W. Grimes volatile struct timeval mono_time; 172df8bae1dSRodney W. Grimes 173df8bae1dSRodney W. Grimes /* 174885bd8e4SJohn Hay * Phase/frequency-lock loop (PLL/FLL) definitions 1753f31c649SGarrett Wollman * 1763f31c649SGarrett Wollman * The following variables are read and set by the ntp_adjtime() system 1773f31c649SGarrett Wollman * call. 1783f31c649SGarrett Wollman * 1793f31c649SGarrett Wollman * time_state shows the state of the system clock, with values defined 1803f31c649SGarrett Wollman * in the timex.h header file. 1813f31c649SGarrett Wollman * 1823f31c649SGarrett Wollman * time_status shows the status of the system clock, with bits defined 1833f31c649SGarrett Wollman * in the timex.h header file. 1843f31c649SGarrett Wollman * 185885bd8e4SJohn Hay * time_offset is used by the PLL/FLL to adjust the system time in small 1863f31c649SGarrett Wollman * increments. 1873f31c649SGarrett Wollman * 1883f31c649SGarrett Wollman * time_constant determines the bandwidth or "stiffness" of the PLL. 1893f31c649SGarrett Wollman * 1903f31c649SGarrett Wollman * time_tolerance determines maximum frequency error or tolerance of the 1913f31c649SGarrett Wollman * CPU clock oscillator and is a property of the architecture; however, 1923f31c649SGarrett Wollman * in principle it could change as result of the presence of external 1933f31c649SGarrett Wollman * discipline signals, for instance. 1943f31c649SGarrett Wollman * 1953f31c649SGarrett Wollman * time_precision is usually equal to the kernel tick variable; however, 1963f31c649SGarrett Wollman * in cases where a precision clock counter or external clock is 1973f31c649SGarrett Wollman * available, the resolution can be much less than this and depend on 1983f31c649SGarrett Wollman * whether the external clock is working or not. 1993f31c649SGarrett Wollman * 2003f31c649SGarrett Wollman * time_maxerror is initialized by a ntp_adjtime() call and increased by 2013f31c649SGarrett Wollman * the kernel once each second to reflect the maximum error 2023f31c649SGarrett Wollman * bound growth. 2033f31c649SGarrett Wollman * 2043f31c649SGarrett Wollman * time_esterror is set and read by the ntp_adjtime() call, but 2053f31c649SGarrett Wollman * otherwise not used by the kernel. 2063f31c649SGarrett Wollman */ 2073f31c649SGarrett Wollman int time_status = STA_UNSYNC; /* clock status bits */ 2083f31c649SGarrett Wollman int time_state = TIME_OK; /* clock state */ 2093f31c649SGarrett Wollman long time_offset = 0; /* time offset (us) */ 2103f31c649SGarrett Wollman long time_constant = 0; /* pll time constant */ 2113f31c649SGarrett Wollman long time_tolerance = MAXFREQ; /* frequency tolerance (scaled ppm) */ 2123f31c649SGarrett Wollman long time_precision = 1; /* clock precision (us) */ 2133f31c649SGarrett Wollman long time_maxerror = MAXPHASE; /* maximum error (us) */ 2143f31c649SGarrett Wollman long time_esterror = MAXPHASE; /* estimated error (us) */ 2153f31c649SGarrett Wollman 2163f31c649SGarrett Wollman /* 217885bd8e4SJohn Hay * The following variables establish the state of the PLL/FLL and the 2183f31c649SGarrett Wollman * residual time and frequency offset of the local clock. The scale 2193f31c649SGarrett Wollman * factors are defined in the timex.h header file. 2203f31c649SGarrett Wollman * 2213f31c649SGarrett Wollman * time_phase and time_freq are the phase increment and the frequency 2223f31c649SGarrett Wollman * increment, respectively, of the kernel time variable at each tick of 2233f31c649SGarrett Wollman * the clock. 2243f31c649SGarrett Wollman * 2253f31c649SGarrett Wollman * time_freq is set via ntp_adjtime() from a value stored in a file when 2263f31c649SGarrett Wollman * the synchronization daemon is first started. Its value is retrieved 2273f31c649SGarrett Wollman * via ntp_adjtime() and written to the file about once per hour by the 2283f31c649SGarrett Wollman * daemon. 2293f31c649SGarrett Wollman * 2303f31c649SGarrett Wollman * time_adj is the adjustment added to the value of tick at each timer 231885bd8e4SJohn Hay * interrupt and is recomputed from time_phase and time_freq at each 232885bd8e4SJohn Hay * seconds rollover. 2333f31c649SGarrett Wollman * 2343f31c649SGarrett Wollman * time_reftime is the second's portion of the system time on the last 2353f31c649SGarrett Wollman * call to ntp_adjtime(). It is used to adjust the time_freq variable 2363f31c649SGarrett Wollman * and to increase the time_maxerror as the time since last update 2373f31c649SGarrett Wollman * increases. 2383f31c649SGarrett Wollman */ 23927a0b398SPoul-Henning Kamp static long time_phase = 0; /* phase offset (scaled us) */ 2403f31c649SGarrett Wollman long time_freq = 0; /* frequency offset (scaled ppm) */ 24127a0b398SPoul-Henning Kamp static long time_adj = 0; /* tick adjust (scaled 1 / hz) */ 24227a0b398SPoul-Henning Kamp static long time_reftime = 0; /* time at last adjustment (s) */ 2433f31c649SGarrett Wollman 2443f31c649SGarrett Wollman #ifdef PPS_SYNC 2453f31c649SGarrett Wollman /* 246885bd8e4SJohn Hay * The following variables are used only if the kernel PPS discipline 247885bd8e4SJohn Hay * code is configured (PPS_SYNC). The scale factors are defined in the 248885bd8e4SJohn Hay * timex.h header file. 2493f31c649SGarrett Wollman * 2503f31c649SGarrett Wollman * pps_time contains the time at each calibration interval, as read by 251885bd8e4SJohn Hay * microtime(). pps_count counts the seconds of the calibration 252885bd8e4SJohn Hay * interval, the duration of which is nominally pps_shift in powers of 253885bd8e4SJohn Hay * two. 2543f31c649SGarrett Wollman * 2553f31c649SGarrett Wollman * pps_offset is the time offset produced by the time median filter 256885bd8e4SJohn Hay * pps_tf[], while pps_jitter is the dispersion (jitter) measured by 257885bd8e4SJohn Hay * this filter. 2583f31c649SGarrett Wollman * 2593f31c649SGarrett Wollman * pps_freq is the frequency offset produced by the frequency median 260885bd8e4SJohn Hay * filter pps_ff[], while pps_stabil is the dispersion (wander) measured 261885bd8e4SJohn Hay * by this filter. 2623f31c649SGarrett Wollman * 2633f31c649SGarrett Wollman * pps_usec is latched from a high resolution counter or external clock 2643f31c649SGarrett Wollman * at pps_time. Here we want the hardware counter contents only, not the 2653f31c649SGarrett Wollman * contents plus the time_tv.usec as usual. 2663f31c649SGarrett Wollman * 2673f31c649SGarrett Wollman * pps_valid counts the number of seconds since the last PPS update. It 2683f31c649SGarrett Wollman * is used as a watchdog timer to disable the PPS discipline should the 2693f31c649SGarrett Wollman * PPS signal be lost. 2703f31c649SGarrett Wollman * 2713f31c649SGarrett Wollman * pps_glitch counts the number of seconds since the beginning of an 2723f31c649SGarrett Wollman * offset burst more than tick/2 from current nominal offset. It is used 2733f31c649SGarrett Wollman * mainly to suppress error bursts due to priority conflicts between the 2743f31c649SGarrett Wollman * PPS interrupt and timer interrupt. 2753f31c649SGarrett Wollman * 2763f31c649SGarrett Wollman * pps_intcnt counts the calibration intervals for use in the interval- 2773f31c649SGarrett Wollman * adaptation algorithm. It's just too complicated for words. 2783f31c649SGarrett Wollman */ 2793f31c649SGarrett Wollman struct timeval pps_time; /* kernel time at last interval */ 2803f31c649SGarrett Wollman long pps_offset = 0; /* pps time offset (us) */ 2813f31c649SGarrett Wollman long pps_jitter = MAXTIME; /* pps time dispersion (jitter) (us) */ 2823f31c649SGarrett Wollman long pps_tf[] = {0, 0, 0}; /* pps time offset median filter (us) */ 2833f31c649SGarrett Wollman long pps_freq = 0; /* frequency offset (scaled ppm) */ 2843f31c649SGarrett Wollman long pps_stabil = MAXFREQ; /* frequency dispersion (scaled ppm) */ 2853f31c649SGarrett Wollman long pps_ff[] = {0, 0, 0}; /* frequency offset median filter */ 2863f31c649SGarrett Wollman long pps_usec = 0; /* microsec counter at last interval */ 2873f31c649SGarrett Wollman long pps_valid = PPS_VALID; /* pps signal watchdog counter */ 2883f31c649SGarrett Wollman int pps_glitch = 0; /* pps signal glitch counter */ 2893f31c649SGarrett Wollman int pps_count = 0; /* calibration interval counter (s) */ 2903f31c649SGarrett Wollman int pps_shift = PPS_SHIFT; /* interval duration (s) (shift) */ 2913f31c649SGarrett Wollman int pps_intcnt = 0; /* intervals at current duration */ 2923f31c649SGarrett Wollman 2933f31c649SGarrett Wollman /* 2943f31c649SGarrett Wollman * PPS signal quality monitors 2953f31c649SGarrett Wollman * 2963f31c649SGarrett Wollman * pps_jitcnt counts the seconds that have been discarded because the 2973f31c649SGarrett Wollman * jitter measured by the time median filter exceeds the limit MAXTIME 2983f31c649SGarrett Wollman * (100 us). 2993f31c649SGarrett Wollman * 3003f31c649SGarrett Wollman * pps_calcnt counts the frequency calibration intervals, which are 3013f31c649SGarrett Wollman * variable from 4 s to 256 s. 3023f31c649SGarrett Wollman * 3033f31c649SGarrett Wollman * pps_errcnt counts the calibration intervals which have been discarded 3043f31c649SGarrett Wollman * because the wander exceeds the limit MAXFREQ (100 ppm) or where the 3053f31c649SGarrett Wollman * calibration interval jitter exceeds two ticks. 3063f31c649SGarrett Wollman * 3073f31c649SGarrett Wollman * pps_stbcnt counts the calibration intervals that have been discarded 3083f31c649SGarrett Wollman * because the frequency wander exceeds the limit MAXFREQ / 4 (25 us). 3093f31c649SGarrett Wollman */ 3103f31c649SGarrett Wollman long pps_jitcnt = 0; /* jitter limit exceeded */ 3113f31c649SGarrett Wollman long pps_calcnt = 0; /* calibration intervals */ 3123f31c649SGarrett Wollman long pps_errcnt = 0; /* calibration errors */ 3133f31c649SGarrett Wollman long pps_stbcnt = 0; /* stability limit exceeded */ 3143f31c649SGarrett Wollman #endif /* PPS_SYNC */ 3153f31c649SGarrett Wollman 3163f31c649SGarrett Wollman /* XXX none of this stuff works under FreeBSD */ 3173f31c649SGarrett Wollman #ifdef EXT_CLOCK 3183f31c649SGarrett Wollman /* 3193f31c649SGarrett Wollman * External clock definitions 3203f31c649SGarrett Wollman * 3213f31c649SGarrett Wollman * The following definitions and declarations are used only if an 3223f31c649SGarrett Wollman * external clock (HIGHBALL or TPRO) is configured on the system. 3233f31c649SGarrett Wollman */ 3243f31c649SGarrett Wollman #define CLOCK_INTERVAL 30 /* CPU clock update interval (s) */ 3253f31c649SGarrett Wollman 3263f31c649SGarrett Wollman /* 3273f31c649SGarrett Wollman * The clock_count variable is set to CLOCK_INTERVAL at each PPS 3283f31c649SGarrett Wollman * interrupt and decremented once each second. 3293f31c649SGarrett Wollman */ 3303f31c649SGarrett Wollman int clock_count = 0; /* CPU clock counter */ 3313f31c649SGarrett Wollman 3323f31c649SGarrett Wollman #ifdef HIGHBALL 3333f31c649SGarrett Wollman /* 3343f31c649SGarrett Wollman * The clock_offset and clock_cpu variables are used by the HIGHBALL 3353f31c649SGarrett Wollman * interface. The clock_offset variable defines the offset between 3363f31c649SGarrett Wollman * system time and the HIGBALL counters. The clock_cpu variable contains 3373f31c649SGarrett Wollman * the offset between the system clock and the HIGHBALL clock for use in 3383f31c649SGarrett Wollman * disciplining the kernel time variable. 3393f31c649SGarrett Wollman */ 3403f31c649SGarrett Wollman extern struct timeval clock_offset; /* Highball clock offset */ 3413f31c649SGarrett Wollman long clock_cpu = 0; /* CPU clock adjust */ 3423f31c649SGarrett Wollman #endif /* HIGHBALL */ 3433f31c649SGarrett Wollman #endif /* EXT_CLOCK */ 3443f31c649SGarrett Wollman 3453f31c649SGarrett Wollman /* 3463f31c649SGarrett Wollman * hardupdate() - local clock update 3473f31c649SGarrett Wollman * 3483f31c649SGarrett Wollman * This routine is called by ntp_adjtime() to update the local clock 349885bd8e4SJohn Hay * phase and frequency. The implementation is of an adaptive-parameter, 350885bd8e4SJohn Hay * hybrid phase/frequency-lock loop (PLL/FLL). The routine computes new 351885bd8e4SJohn Hay * time and frequency offset estimates for each call. If the kernel PPS 3523f31c649SGarrett Wollman * discipline code is configured (PPS_SYNC), the PPS signal itself 3533f31c649SGarrett Wollman * determines the new time offset, instead of the calling argument. 3543f31c649SGarrett Wollman * Presumably, calls to ntp_adjtime() occur only when the caller 3553f31c649SGarrett Wollman * believes the local clock is valid within some bound (+-128 ms with 3563f31c649SGarrett Wollman * NTP). If the caller's time is far different than the PPS time, an 3573f31c649SGarrett Wollman * argument will ensue, and it's not clear who will lose. 3583f31c649SGarrett Wollman * 359885bd8e4SJohn Hay * For uncompensated quartz crystal oscillatores and nominal update 360885bd8e4SJohn Hay * intervals less than 1024 s, operation should be in phase-lock mode 361885bd8e4SJohn Hay * (STA_FLL = 0), where the loop is disciplined to phase. For update 362885bd8e4SJohn Hay * intervals greater than thiss, operation should be in frequency-lock 363885bd8e4SJohn Hay * mode (STA_FLL = 1), where the loop is disciplined to frequency. 3643f31c649SGarrett Wollman * 3653f31c649SGarrett Wollman * Note: splclock() is in effect. 3663f31c649SGarrett Wollman */ 3673f31c649SGarrett Wollman void 3683f31c649SGarrett Wollman hardupdate(offset) 3693f31c649SGarrett Wollman long offset; 3703f31c649SGarrett Wollman { 3713f31c649SGarrett Wollman long ltemp, mtemp; 3723f31c649SGarrett Wollman 3733f31c649SGarrett Wollman if (!(time_status & STA_PLL) && !(time_status & STA_PPSTIME)) 3743f31c649SGarrett Wollman return; 3753f31c649SGarrett Wollman ltemp = offset; 3763f31c649SGarrett Wollman #ifdef PPS_SYNC 3773f31c649SGarrett Wollman if (time_status & STA_PPSTIME && time_status & STA_PPSSIGNAL) 3783f31c649SGarrett Wollman ltemp = pps_offset; 3793f31c649SGarrett Wollman #endif /* PPS_SYNC */ 380885bd8e4SJohn Hay 381885bd8e4SJohn Hay /* 382885bd8e4SJohn Hay * Scale the phase adjustment and clamp to the operating range. 383885bd8e4SJohn Hay */ 3843f31c649SGarrett Wollman if (ltemp > MAXPHASE) 3853f31c649SGarrett Wollman time_offset = MAXPHASE << SHIFT_UPDATE; 3863f31c649SGarrett Wollman else if (ltemp < -MAXPHASE) 3873f31c649SGarrett Wollman time_offset = -(MAXPHASE << SHIFT_UPDATE); 3883f31c649SGarrett Wollman else 3893f31c649SGarrett Wollman time_offset = ltemp << SHIFT_UPDATE; 390885bd8e4SJohn Hay 391885bd8e4SJohn Hay /* 392885bd8e4SJohn Hay * Select whether the frequency is to be controlled and in which 393885bd8e4SJohn Hay * mode (PLL or FLL). Clamp to the operating range. Ugly 394885bd8e4SJohn Hay * multiply/divide should be replaced someday. 395885bd8e4SJohn Hay */ 396885bd8e4SJohn Hay if (time_status & STA_FREQHOLD || time_reftime == 0) 397885bd8e4SJohn Hay time_reftime = time.tv_sec; 3983f31c649SGarrett Wollman mtemp = time.tv_sec - time_reftime; 3993f31c649SGarrett Wollman time_reftime = time.tv_sec; 400885bd8e4SJohn Hay if (time_status & STA_FLL) { 401885bd8e4SJohn Hay if (mtemp >= MINSEC) { 402885bd8e4SJohn Hay ltemp = ((time_offset / mtemp) << (SHIFT_USEC - 403885bd8e4SJohn Hay SHIFT_UPDATE)); 4043f31c649SGarrett Wollman if (ltemp < 0) 405885bd8e4SJohn Hay time_freq -= -ltemp >> SHIFT_KH; 4063f31c649SGarrett Wollman else 407885bd8e4SJohn Hay time_freq += ltemp >> SHIFT_KH; 408885bd8e4SJohn Hay } 409885bd8e4SJohn Hay } else { 410885bd8e4SJohn Hay if (mtemp < MAXSEC) { 411885bd8e4SJohn Hay ltemp *= mtemp; 412885bd8e4SJohn Hay if (ltemp < 0) 413885bd8e4SJohn Hay time_freq -= -ltemp >> (time_constant + 414885bd8e4SJohn Hay time_constant + SHIFT_KF - 415885bd8e4SJohn Hay SHIFT_USEC); 416885bd8e4SJohn Hay else 417885bd8e4SJohn Hay time_freq += ltemp >> (time_constant + 418885bd8e4SJohn Hay time_constant + SHIFT_KF - 419885bd8e4SJohn Hay SHIFT_USEC); 420885bd8e4SJohn Hay } 421885bd8e4SJohn Hay } 4223f31c649SGarrett Wollman if (time_freq > time_tolerance) 4233f31c649SGarrett Wollman time_freq = time_tolerance; 4243f31c649SGarrett Wollman else if (time_freq < -time_tolerance) 4253f31c649SGarrett Wollman time_freq = -time_tolerance; 4263f31c649SGarrett Wollman } 4273f31c649SGarrett Wollman 4283f31c649SGarrett Wollman 4293f31c649SGarrett Wollman 4303f31c649SGarrett Wollman /* 431df8bae1dSRodney W. Grimes * Initialize clock frequencies and start both clocks running. 432df8bae1dSRodney W. Grimes */ 4332b14f991SJulian Elischer /* ARGSUSED*/ 4342b14f991SJulian Elischer static void 435d841aaa7SBruce Evans initclocks(dummy) 436d841aaa7SBruce Evans void *dummy; 437df8bae1dSRodney W. Grimes { 438df8bae1dSRodney W. Grimes register int i; 439df8bae1dSRodney W. Grimes 440df8bae1dSRodney W. Grimes /* 441df8bae1dSRodney W. Grimes * Set divisors to 1 (normal case) and let the machine-specific 442df8bae1dSRodney W. Grimes * code do its bit. 443df8bae1dSRodney W. Grimes */ 444df8bae1dSRodney W. Grimes psdiv = pscnt = 1; 445df8bae1dSRodney W. Grimes cpu_initclocks(); 446df8bae1dSRodney W. Grimes 447df8bae1dSRodney W. Grimes /* 448df8bae1dSRodney W. Grimes * Compute profhz/stathz, and fix profhz if needed. 449df8bae1dSRodney W. Grimes */ 450df8bae1dSRodney W. Grimes i = stathz ? stathz : hz; 451df8bae1dSRodney W. Grimes if (profhz == 0) 452df8bae1dSRodney W. Grimes profhz = i; 453df8bae1dSRodney W. Grimes psratio = profhz / i; 454df8bae1dSRodney W. Grimes } 455df8bae1dSRodney W. Grimes 456df8bae1dSRodney W. Grimes /* 457df8bae1dSRodney W. Grimes * The real-time timer, interrupting hz times per second. 458df8bae1dSRodney W. Grimes */ 459df8bae1dSRodney W. Grimes void 460df8bae1dSRodney W. Grimes hardclock(frame) 461df8bae1dSRodney W. Grimes register struct clockframe *frame; 462df8bae1dSRodney W. Grimes { 463df8bae1dSRodney W. Grimes register struct proc *p; 464df8bae1dSRodney W. Grimes 465df8bae1dSRodney W. Grimes p = curproc; 466df8bae1dSRodney W. Grimes if (p) { 467df8bae1dSRodney W. Grimes register struct pstats *pstats; 468df8bae1dSRodney W. Grimes 469df8bae1dSRodney W. Grimes /* 470df8bae1dSRodney W. Grimes * Run current process's virtual and profile time, as needed. 471df8bae1dSRodney W. Grimes */ 472df8bae1dSRodney W. Grimes pstats = p->p_stats; 473df8bae1dSRodney W. Grimes if (CLKF_USERMODE(frame) && 474df8bae1dSRodney W. Grimes timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 475df8bae1dSRodney W. Grimes itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 476df8bae1dSRodney W. Grimes psignal(p, SIGVTALRM); 477df8bae1dSRodney W. Grimes if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && 478df8bae1dSRodney W. Grimes itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 479df8bae1dSRodney W. Grimes psignal(p, SIGPROF); 480df8bae1dSRodney W. Grimes } 481df8bae1dSRodney W. Grimes 482eae8fc2cSSteve Passe #if defined(SMP) && defined(BETTER_CLOCK) 483eae8fc2cSSteve Passe forward_hardclock(pscnt); 484eae8fc2cSSteve Passe #endif 485df8bae1dSRodney W. Grimes /* 486df8bae1dSRodney W. Grimes * If no separate statistics clock is available, run it from here. 487df8bae1dSRodney W. Grimes */ 488df8bae1dSRodney W. Grimes if (stathz == 0) 489df8bae1dSRodney W. Grimes statclock(frame); 490df8bae1dSRodney W. Grimes 491df8bae1dSRodney W. Grimes /* 4923f31c649SGarrett Wollman * Increment the time-of-day. 493df8bae1dSRodney W. Grimes */ 494df8bae1dSRodney W. Grimes ticks++; 4953f31c649SGarrett Wollman { 4963f31c649SGarrett Wollman int time_update; 4973f31c649SGarrett Wollman struct timeval newtime = time; 4983f31c649SGarrett Wollman long ltemp; 4993f31c649SGarrett Wollman 5003f31c649SGarrett Wollman if (timedelta == 0) { 50107e3b0c2SGarrett Wollman time_update = CPU_THISTICKLEN(tick); 5023f31c649SGarrett Wollman } else { 50307e3b0c2SGarrett Wollman time_update = CPU_THISTICKLEN(tick) + tickdelta; 504df8bae1dSRodney W. Grimes timedelta -= tickdelta; 505df8bae1dSRodney W. Grimes } 5063f31c649SGarrett Wollman BUMPTIME(&mono_time, time_update); 5073f31c649SGarrett Wollman 5083f31c649SGarrett Wollman /* 5093f31c649SGarrett Wollman * Compute the phase adjustment. If the low-order bits 5103f31c649SGarrett Wollman * (time_phase) of the update overflow, bump the high-order bits 5113f31c649SGarrett Wollman * (time_update). 5123f31c649SGarrett Wollman */ 5133f31c649SGarrett Wollman time_phase += time_adj; 5143f31c649SGarrett Wollman if (time_phase <= -FINEUSEC) { 5153f31c649SGarrett Wollman ltemp = -time_phase >> SHIFT_SCALE; 5163f31c649SGarrett Wollman time_phase += ltemp << SHIFT_SCALE; 5173f31c649SGarrett Wollman time_update -= ltemp; 5183f31c649SGarrett Wollman } 5193f31c649SGarrett Wollman else if (time_phase >= FINEUSEC) { 5203f31c649SGarrett Wollman ltemp = time_phase >> SHIFT_SCALE; 5213f31c649SGarrett Wollman time_phase -= ltemp << SHIFT_SCALE; 5223f31c649SGarrett Wollman time_update += ltemp; 5233f31c649SGarrett Wollman } 5243f31c649SGarrett Wollman 5253f31c649SGarrett Wollman newtime.tv_usec += time_update; 5263f31c649SGarrett Wollman /* 5273f31c649SGarrett Wollman * On rollover of the second the phase adjustment to be used for 5283f31c649SGarrett Wollman * the next second is calculated. Also, the maximum error is 5293f31c649SGarrett Wollman * increased by the tolerance. If the PPS frequency discipline 5303f31c649SGarrett Wollman * code is present, the phase is increased to compensate for the 5313f31c649SGarrett Wollman * CPU clock oscillator frequency error. 5323f31c649SGarrett Wollman * 533885bd8e4SJohn Hay * On a 32-bit machine and given parameters in the timex.h 534885bd8e4SJohn Hay * header file, the maximum phase adjustment is +-512 ms and 535885bd8e4SJohn Hay * maximum frequency offset is a tad less than) +-512 ppm. On a 536885bd8e4SJohn Hay * 64-bit machine, you shouldn't need to ask. 5373f31c649SGarrett Wollman */ 5383f31c649SGarrett Wollman if (newtime.tv_usec >= 1000000) { 5393f31c649SGarrett Wollman newtime.tv_usec -= 1000000; 5403f31c649SGarrett Wollman newtime.tv_sec++; 5413f31c649SGarrett Wollman time_maxerror += time_tolerance >> SHIFT_USEC; 542885bd8e4SJohn Hay 5433f31c649SGarrett Wollman /* 544885bd8e4SJohn Hay * Compute the phase adjustment for the next second. In 545885bd8e4SJohn Hay * PLL mode, the offset is reduced by a fixed factor 546885bd8e4SJohn Hay * times the time constant. In FLL mode the offset is 547885bd8e4SJohn Hay * used directly. In either mode, the maximum phase 548885bd8e4SJohn Hay * adjustment for each second is clamped so as to spread 549885bd8e4SJohn Hay * the adjustment over not more than the number of 550885bd8e4SJohn Hay * seconds between updates. 5513f31c649SGarrett Wollman */ 552885bd8e4SJohn Hay if (time_offset < 0) { 553885bd8e4SJohn Hay ltemp = -time_offset; 554885bd8e4SJohn Hay if (!(time_status & STA_FLL)) 555885bd8e4SJohn Hay ltemp >>= SHIFT_KG + time_constant; 556885bd8e4SJohn Hay if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE) 557885bd8e4SJohn Hay ltemp = (MAXPHASE / MINSEC) << 558885bd8e4SJohn Hay SHIFT_UPDATE; 559885bd8e4SJohn Hay time_offset += ltemp; 560885bd8e4SJohn Hay time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - 561885bd8e4SJohn Hay SHIFT_UPDATE); 562885bd8e4SJohn Hay } else { 563885bd8e4SJohn Hay ltemp = time_offset; 564885bd8e4SJohn Hay if (!(time_status & STA_FLL)) 565885bd8e4SJohn Hay ltemp >>= SHIFT_KG + time_constant; 566885bd8e4SJohn Hay if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE) 567885bd8e4SJohn Hay ltemp = (MAXPHASE / MINSEC) << 568885bd8e4SJohn Hay SHIFT_UPDATE; 569885bd8e4SJohn Hay time_offset -= ltemp; 570885bd8e4SJohn Hay time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - 571885bd8e4SJohn Hay SHIFT_UPDATE); 572885bd8e4SJohn Hay } 573885bd8e4SJohn Hay 574885bd8e4SJohn Hay /* 575885bd8e4SJohn Hay * Compute the frequency estimate and additional phase 576885bd8e4SJohn Hay * adjustment due to frequency error for the next 577885bd8e4SJohn Hay * second. When the PPS signal is engaged, gnaw on the 578885bd8e4SJohn Hay * watchdog counter and update the frequency computed by 579885bd8e4SJohn Hay * the pll and the PPS signal. 580885bd8e4SJohn Hay */ 581885bd8e4SJohn Hay #ifdef PPS_SYNC 5823f31c649SGarrett Wollman pps_valid++; 5833f31c649SGarrett Wollman if (pps_valid == PPS_VALID) { 5843f31c649SGarrett Wollman pps_jitter = MAXTIME; 5853f31c649SGarrett Wollman pps_stabil = MAXFREQ; 5863f31c649SGarrett Wollman time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER | 5873f31c649SGarrett Wollman STA_PPSWANDER | STA_PPSERROR); 5883f31c649SGarrett Wollman } 5893f31c649SGarrett Wollman ltemp = time_freq + pps_freq; 5903f31c649SGarrett Wollman #else 5913f31c649SGarrett Wollman ltemp = time_freq; 5923f31c649SGarrett Wollman #endif /* PPS_SYNC */ 5933f31c649SGarrett Wollman if (ltemp < 0) 5943f31c649SGarrett Wollman time_adj -= -ltemp >> 5953f31c649SGarrett Wollman (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE); 5963f31c649SGarrett Wollman else 5973f31c649SGarrett Wollman time_adj += ltemp >> 5983f31c649SGarrett Wollman (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE); 5993f31c649SGarrett Wollman 600885bd8e4SJohn Hay #if SHIFT_HZ == 7 6013f31c649SGarrett Wollman /* 6023f31c649SGarrett Wollman * When the CPU clock oscillator frequency is not a 6033f31c649SGarrett Wollman * power of two in Hz, the SHIFT_HZ is only an 6043f31c649SGarrett Wollman * approximate scale factor. In the SunOS kernel, this 6053f31c649SGarrett Wollman * results in a PLL gain factor of 1/1.28 = 0.78 what it 6063f31c649SGarrett Wollman * should be. In the following code the overall gain is 6073f31c649SGarrett Wollman * increased by a factor of 1.25, which results in a 6083f31c649SGarrett Wollman * residual error less than 3 percent. 6093f31c649SGarrett Wollman */ 6103f31c649SGarrett Wollman /* Same thing applies for FreeBSD --GAW */ 6113f31c649SGarrett Wollman if (hz == 100) { 6123f31c649SGarrett Wollman if (time_adj < 0) 6133f31c649SGarrett Wollman time_adj -= -time_adj >> 2; 6143f31c649SGarrett Wollman else 6153f31c649SGarrett Wollman time_adj += time_adj >> 2; 6163f31c649SGarrett Wollman } 617885bd8e4SJohn Hay #endif /* SHIFT_HZ */ 6183f31c649SGarrett Wollman 6193f31c649SGarrett Wollman /* XXX - this is really bogus, but can't be fixed until 6203f31c649SGarrett Wollman xntpd's idea of the system clock is fixed to know how 6213f31c649SGarrett Wollman the user wants leap seconds handled; in the mean time, 6223f31c649SGarrett Wollman we assume that users of NTP are running without proper 6233f31c649SGarrett Wollman leap second support (this is now the default anyway) */ 6243f31c649SGarrett Wollman /* 6253f31c649SGarrett Wollman * Leap second processing. If in leap-insert state at 6263f31c649SGarrett Wollman * the end of the day, the system clock is set back one 6273f31c649SGarrett Wollman * second; if in leap-delete state, the system clock is 6283f31c649SGarrett Wollman * set ahead one second. The microtime() routine or 6293f31c649SGarrett Wollman * external clock driver will insure that reported time 6303f31c649SGarrett Wollman * is always monotonic. The ugly divides should be 6313f31c649SGarrett Wollman * replaced. 6323f31c649SGarrett Wollman */ 6333f31c649SGarrett Wollman switch (time_state) { 6343f31c649SGarrett Wollman 6353f31c649SGarrett Wollman case TIME_OK: 6363f31c649SGarrett Wollman if (time_status & STA_INS) 6373f31c649SGarrett Wollman time_state = TIME_INS; 6383f31c649SGarrett Wollman else if (time_status & STA_DEL) 6393f31c649SGarrett Wollman time_state = TIME_DEL; 6403f31c649SGarrett Wollman break; 6413f31c649SGarrett Wollman 6423f31c649SGarrett Wollman case TIME_INS: 6433f31c649SGarrett Wollman if (newtime.tv_sec % 86400 == 0) { 6443f31c649SGarrett Wollman newtime.tv_sec--; 6453f31c649SGarrett Wollman time_state = TIME_OOP; 6463f31c649SGarrett Wollman } 6473f31c649SGarrett Wollman break; 6483f31c649SGarrett Wollman 6493f31c649SGarrett Wollman case TIME_DEL: 6503f31c649SGarrett Wollman if ((newtime.tv_sec + 1) % 86400 == 0) { 6513f31c649SGarrett Wollman newtime.tv_sec++; 6523f31c649SGarrett Wollman time_state = TIME_WAIT; 6533f31c649SGarrett Wollman } 6543f31c649SGarrett Wollman break; 6553f31c649SGarrett Wollman 6563f31c649SGarrett Wollman case TIME_OOP: 6573f31c649SGarrett Wollman time_state = TIME_WAIT; 6583f31c649SGarrett Wollman break; 6593f31c649SGarrett Wollman 6603f31c649SGarrett Wollman case TIME_WAIT: 6613f31c649SGarrett Wollman if (!(time_status & (STA_INS | STA_DEL))) 6623f31c649SGarrett Wollman time_state = TIME_OK; 6633f31c649SGarrett Wollman } 6643f31c649SGarrett Wollman } 6653f31c649SGarrett Wollman CPU_CLOCKUPDATE(&time, &newtime); 6663f31c649SGarrett Wollman } 667df8bae1dSRodney W. Grimes 668df8bae1dSRodney W. Grimes /* 669df8bae1dSRodney W. Grimes * Process callouts at a very low cpu priority, so we don't keep the 670df8bae1dSRodney W. Grimes * relatively high clock interrupt priority any longer than necessary. 671df8bae1dSRodney W. Grimes */ 672ab36c067SJustin T. Gibbs if (TAILQ_FIRST(&callwheel[ticks & callwheelmask]) != NULL) { 673df8bae1dSRodney W. Grimes if (CLKF_BASEPRI(frame)) { 674df8bae1dSRodney W. Grimes /* 675df8bae1dSRodney W. Grimes * Save the overhead of a software interrupt; 676df8bae1dSRodney W. Grimes * it will happen as soon as we return, so do it now. 677df8bae1dSRodney W. Grimes */ 678df8bae1dSRodney W. Grimes (void)splsoftclock(); 679df8bae1dSRodney W. Grimes softclock(); 680df8bae1dSRodney W. Grimes } else 681df8bae1dSRodney W. Grimes setsoftclock(); 682ab36c067SJustin T. Gibbs } else if (softticks + 1 == ticks) { 683ab36c067SJustin T. Gibbs ++softticks; 684df8bae1dSRodney W. Grimes } 685df8bae1dSRodney W. Grimes } 686df8bae1dSRodney W. Grimes 687df8bae1dSRodney W. Grimes /* 688ab36c067SJustin T. Gibbs * The callout mechanism is based on the work of Adam M. Costello and 689ab36c067SJustin T. Gibbs * George Varghese, published in a technical report entitled "Redesigning 690ab36c067SJustin T. Gibbs * the BSD Callout and Timer Facilities" and modified slightly for inclusion 691ab36c067SJustin T. Gibbs * in FreeBSD by Justin T. Gibbs. The original work on the data structures 692ab36c067SJustin T. Gibbs * used in this implementation was published by G.Varghese and A. Lauck in 693ab36c067SJustin T. Gibbs * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for 694ab36c067SJustin T. Gibbs * the Efficient Implementation of a Timer Facility" in the Proceedings of 695ab36c067SJustin T. Gibbs * the 11th ACM Annual Symposium on Operating Systems Principles, 696ab36c067SJustin T. Gibbs * Austin, Texas Nov 1987. 697ab36c067SJustin T. Gibbs */ 698ab36c067SJustin T. Gibbs /* 699df8bae1dSRodney W. Grimes * Software (low priority) clock interrupt. 700df8bae1dSRodney W. Grimes * Run periodic events from timeout queue. 701df8bae1dSRodney W. Grimes */ 702df8bae1dSRodney W. Grimes /*ARGSUSED*/ 703df8bae1dSRodney W. Grimes void 704df8bae1dSRodney W. Grimes softclock() 705df8bae1dSRodney W. Grimes { 706df8bae1dSRodney W. Grimes register struct callout *c; 70745327611SJustin T. Gibbs register struct callout_tailq *bucket; 708df8bae1dSRodney W. Grimes register int s; 70945327611SJustin T. Gibbs register int curticks; 710ab36c067SJustin T. Gibbs register int steps; /* 711ab36c067SJustin T. Gibbs * Number of steps taken since 712ab36c067SJustin T. Gibbs * we last allowed interrupts. 713ab36c067SJustin T. Gibbs */ 714df8bae1dSRodney W. Grimes 715ab36c067SJustin T. Gibbs #ifndef MAX_SOFTCLOCK_STEPS 716ab36c067SJustin T. Gibbs #define MAX_SOFTCLOCK_STEPS 100 /* Maximum allowed value of steps. */ 717ab36c067SJustin T. Gibbs #endif /* MAX_SOFTCLOCK_STEPS */ 718ab36c067SJustin T. Gibbs 719ab36c067SJustin T. Gibbs steps = 0; 720df8bae1dSRodney W. Grimes s = splhigh(); 721ab36c067SJustin T. Gibbs while (softticks != ticks) { 72245327611SJustin T. Gibbs softticks++; 72345327611SJustin T. Gibbs /* 72445327611SJustin T. Gibbs * softticks may be modified by hard clock, so cache 72545327611SJustin T. Gibbs * it while we work on a given bucket. 72645327611SJustin T. Gibbs */ 72745327611SJustin T. Gibbs curticks = softticks; 72845327611SJustin T. Gibbs bucket = &callwheel[curticks & callwheelmask]; 72945327611SJustin T. Gibbs c = TAILQ_FIRST(bucket); 730ab36c067SJustin T. Gibbs while (c) { 73145327611SJustin T. Gibbs if (c->c_time != curticks) { 732ab36c067SJustin T. Gibbs c = TAILQ_NEXT(c, c_links.tqe); 733ab36c067SJustin T. Gibbs ++steps; 734ab36c067SJustin T. Gibbs if (steps >= MAX_SOFTCLOCK_STEPS) { 735ab36c067SJustin T. Gibbs nextsoftcheck = c; 73645327611SJustin T. Gibbs /* Give interrupts a chance. */ 737df8bae1dSRodney W. Grimes splx(s); 738ab36c067SJustin T. Gibbs s = splhigh(); 739ab36c067SJustin T. Gibbs c = nextsoftcheck; 740ab36c067SJustin T. Gibbs steps = 0; 741df8bae1dSRodney W. Grimes } 742ab36c067SJustin T. Gibbs } else { 743ab36c067SJustin T. Gibbs void (*c_func)(void *); 744ab36c067SJustin T. Gibbs void *c_arg; 745ab36c067SJustin T. Gibbs 746ab36c067SJustin T. Gibbs nextsoftcheck = TAILQ_NEXT(c, c_links.tqe); 74745327611SJustin T. Gibbs TAILQ_REMOVE(bucket, c, c_links.tqe); 748ab36c067SJustin T. Gibbs c_func = c->c_func; 749ab36c067SJustin T. Gibbs c_arg = c->c_arg; 750ab36c067SJustin T. Gibbs c->c_func = NULL; 751ab36c067SJustin T. Gibbs SLIST_INSERT_HEAD(&callfree, c, c_links.sle); 752ab36c067SJustin T. Gibbs splx(s); 753ab36c067SJustin T. Gibbs c_func(c_arg); 754ab36c067SJustin T. Gibbs s = splhigh(); 755ab36c067SJustin T. Gibbs steps = 0; 756ab36c067SJustin T. Gibbs c = nextsoftcheck; 757ab36c067SJustin T. Gibbs } 758ab36c067SJustin T. Gibbs } 759ab36c067SJustin T. Gibbs } 760ab36c067SJustin T. Gibbs nextsoftcheck = NULL; 761df8bae1dSRodney W. Grimes splx(s); 762df8bae1dSRodney W. Grimes } 763df8bae1dSRodney W. Grimes 764df8bae1dSRodney W. Grimes /* 765df8bae1dSRodney W. Grimes * timeout -- 766df8bae1dSRodney W. Grimes * Execute a function after a specified length of time. 767df8bae1dSRodney W. Grimes * 768df8bae1dSRodney W. Grimes * untimeout -- 769df8bae1dSRodney W. Grimes * Cancel previous timeout function call. 770df8bae1dSRodney W. Grimes * 771ab36c067SJustin T. Gibbs * callout_handle_init -- 772ab36c067SJustin T. Gibbs * Initialize a handle so that using it with untimeout is benign. 773ab36c067SJustin T. Gibbs * 774df8bae1dSRodney W. Grimes * See AT&T BCI Driver Reference Manual for specification. This 775ab36c067SJustin T. Gibbs * implementation differs from that one in that although an 776ab36c067SJustin T. Gibbs * identification value is returned from timeout, the original 777ab36c067SJustin T. Gibbs * arguments to timeout as well as the identifier are used to 778ab36c067SJustin T. Gibbs * identify entries for untimeout. 779df8bae1dSRodney W. Grimes */ 780ab36c067SJustin T. Gibbs struct callout_handle 781ab36c067SJustin T. Gibbs timeout(ftn, arg, to_ticks) 782f23b4c91SGarrett Wollman timeout_t ftn; 783df8bae1dSRodney W. Grimes void *arg; 784ab36c067SJustin T. Gibbs register int to_ticks; 785df8bae1dSRodney W. Grimes { 786ab36c067SJustin T. Gibbs int s; 787ab36c067SJustin T. Gibbs struct callout *new; 788ab36c067SJustin T. Gibbs struct callout_handle handle; 789df8bae1dSRodney W. Grimes 790ab36c067SJustin T. Gibbs if (to_ticks <= 0) 791ab36c067SJustin T. Gibbs to_ticks = 1; 792df8bae1dSRodney W. Grimes 793df8bae1dSRodney W. Grimes /* Lock out the clock. */ 794df8bae1dSRodney W. Grimes s = splhigh(); 795df8bae1dSRodney W. Grimes 796df8bae1dSRodney W. Grimes /* Fill in the next free callout structure. */ 797ab36c067SJustin T. Gibbs new = SLIST_FIRST(&callfree); 798ab36c067SJustin T. Gibbs if (new == NULL) 799ab36c067SJustin T. Gibbs /* XXX Attempt to malloc first */ 800df8bae1dSRodney W. Grimes panic("timeout table full"); 801ab36c067SJustin T. Gibbs 802ab36c067SJustin T. Gibbs SLIST_REMOVE_HEAD(&callfree, c_links.sle); 803df8bae1dSRodney W. Grimes new->c_arg = arg; 804df8bae1dSRodney W. Grimes new->c_func = ftn; 80545327611SJustin T. Gibbs new->c_time = ticks + to_ticks; 80645327611SJustin T. Gibbs TAILQ_INSERT_TAIL(&callwheel[new->c_time & callwheelmask], 80745327611SJustin T. Gibbs new, c_links.tqe); 808df8bae1dSRodney W. Grimes 809df8bae1dSRodney W. Grimes splx(s); 810ab36c067SJustin T. Gibbs handle.callout = new; 811ab36c067SJustin T. Gibbs return (handle); 812df8bae1dSRodney W. Grimes } 813df8bae1dSRodney W. Grimes 814df8bae1dSRodney W. Grimes void 815ab36c067SJustin T. Gibbs untimeout(ftn, arg, handle) 816f23b4c91SGarrett Wollman timeout_t ftn; 817df8bae1dSRodney W. Grimes void *arg; 818ab36c067SJustin T. Gibbs struct callout_handle handle; 819df8bae1dSRodney W. Grimes { 820df8bae1dSRodney W. Grimes register int s; 821df8bae1dSRodney W. Grimes 822ab36c067SJustin T. Gibbs /* 823ab36c067SJustin T. Gibbs * Check for a handle that was initialized 824ab36c067SJustin T. Gibbs * by callout_handle_init, but never used 825ab36c067SJustin T. Gibbs * for a real timeout. 826ab36c067SJustin T. Gibbs */ 827ab36c067SJustin T. Gibbs if (handle.callout == NULL) 828ab36c067SJustin T. Gibbs return; 829df8bae1dSRodney W. Grimes 830ab36c067SJustin T. Gibbs s = splhigh(); 831ab36c067SJustin T. Gibbs if ((handle.callout->c_func == ftn) 832ab36c067SJustin T. Gibbs && (handle.callout->c_arg == arg)) { 833ab36c067SJustin T. Gibbs if (nextsoftcheck == handle.callout) { 834ab36c067SJustin T. Gibbs nextsoftcheck = TAILQ_NEXT(handle.callout, c_links.tqe); 835ab36c067SJustin T. Gibbs } 83645327611SJustin T. Gibbs TAILQ_REMOVE(&callwheel[handle.callout->c_time & callwheelmask], 837ab36c067SJustin T. Gibbs handle.callout, c_links.tqe); 838ab36c067SJustin T. Gibbs handle.callout->c_func = NULL; 839ab36c067SJustin T. Gibbs SLIST_INSERT_HEAD(&callfree, handle.callout, c_links.sle); 840df8bae1dSRodney W. Grimes } 841df8bae1dSRodney W. Grimes splx(s); 842df8bae1dSRodney W. Grimes } 843df8bae1dSRodney W. Grimes 8443c816944SBruce Evans void 845ab36c067SJustin T. Gibbs callout_handle_init(struct callout_handle *handle) 846ab36c067SJustin T. Gibbs { 847ab36c067SJustin T. Gibbs handle->callout = NULL; 848ab36c067SJustin T. Gibbs } 849ab36c067SJustin T. Gibbs 850ab36c067SJustin T. Gibbs void 8513c816944SBruce Evans gettime(struct timeval *tvp) 8523c816944SBruce Evans { 8533c816944SBruce Evans int s; 8543c816944SBruce Evans 8553c816944SBruce Evans s = splclock(); 8569a8f4a4cSMike Pritchard /* XXX should use microtime() iff tv_usec is used. */ 8573c816944SBruce Evans *tvp = time; 8583c816944SBruce Evans splx(s); 8593c816944SBruce Evans } 8603c816944SBruce Evans 861df8bae1dSRodney W. Grimes /* 862df8bae1dSRodney W. Grimes * Compute number of hz until specified time. Used to 863df8bae1dSRodney W. Grimes * compute third argument to timeout() from an absolute time. 864df8bae1dSRodney W. Grimes */ 865df8bae1dSRodney W. Grimes int 866df8bae1dSRodney W. Grimes hzto(tv) 867df8bae1dSRodney W. Grimes struct timeval *tv; 868df8bae1dSRodney W. Grimes { 8696976af69SBruce Evans register unsigned long ticks; 8706976af69SBruce Evans register long sec, usec; 871df8bae1dSRodney W. Grimes int s; 872df8bae1dSRodney W. Grimes 873df8bae1dSRodney W. Grimes /* 8746976af69SBruce Evans * If the number of usecs in the whole seconds part of the time 8756976af69SBruce Evans * difference fits in a long, then the total number of usecs will 8766976af69SBruce Evans * fit in an unsigned long. Compute the total and convert it to 8776976af69SBruce Evans * ticks, rounding up and adding 1 to allow for the current tick 8786976af69SBruce Evans * to expire. Rounding also depends on unsigned long arithmetic 8796976af69SBruce Evans * to avoid overflow. 880df8bae1dSRodney W. Grimes * 8816976af69SBruce Evans * Otherwise, if the number of ticks in the whole seconds part of 8826976af69SBruce Evans * the time difference fits in a long, then convert the parts to 8836976af69SBruce Evans * ticks separately and add, using similar rounding methods and 8846976af69SBruce Evans * overflow avoidance. This method would work in the previous 8856976af69SBruce Evans * case but it is slightly slower and assumes that hz is integral. 8866976af69SBruce Evans * 8876976af69SBruce Evans * Otherwise, round the time difference down to the maximum 8886976af69SBruce Evans * representable value. 8896976af69SBruce Evans * 8906976af69SBruce Evans * If ints have 32 bits, then the maximum value for any timeout in 8916976af69SBruce Evans * 10ms ticks is 248 days. 892df8bae1dSRodney W. Grimes */ 8936976af69SBruce Evans s = splclock(); 894df8bae1dSRodney W. Grimes sec = tv->tv_sec - time.tv_sec; 8956976af69SBruce Evans usec = tv->tv_usec - time.tv_usec; 896df8bae1dSRodney W. Grimes splx(s); 8976976af69SBruce Evans if (usec < 0) { 8986976af69SBruce Evans sec--; 8996976af69SBruce Evans usec += 1000000; 9006976af69SBruce Evans } 9016976af69SBruce Evans if (sec < 0) { 9026976af69SBruce Evans #ifdef DIAGNOSTIC 9036976af69SBruce Evans printf("hzto: negative time difference %ld sec %ld usec\n", 9046976af69SBruce Evans sec, usec); 9056976af69SBruce Evans #endif 9066976af69SBruce Evans ticks = 1; 9076976af69SBruce Evans } else if (sec <= LONG_MAX / 1000000) 9086976af69SBruce Evans ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1)) 9096976af69SBruce Evans / tick + 1; 9106976af69SBruce Evans else if (sec <= LONG_MAX / hz) 9116976af69SBruce Evans ticks = sec * hz 9126976af69SBruce Evans + ((unsigned long)usec + (tick - 1)) / tick + 1; 9136976af69SBruce Evans else 9146976af69SBruce Evans ticks = LONG_MAX; 9156976af69SBruce Evans if (ticks > INT_MAX) 9166976af69SBruce Evans ticks = INT_MAX; 917df8bae1dSRodney W. Grimes return (ticks); 918df8bae1dSRodney W. Grimes } 919df8bae1dSRodney W. Grimes 920df8bae1dSRodney W. Grimes /* 921df8bae1dSRodney W. Grimes * Start profiling on a process. 922df8bae1dSRodney W. Grimes * 923df8bae1dSRodney W. Grimes * Kernel profiling passes proc0 which never exits and hence 924df8bae1dSRodney W. Grimes * keeps the profile clock running constantly. 925df8bae1dSRodney W. Grimes */ 926df8bae1dSRodney W. Grimes void 927df8bae1dSRodney W. Grimes startprofclock(p) 928df8bae1dSRodney W. Grimes register struct proc *p; 929df8bae1dSRodney W. Grimes { 930df8bae1dSRodney W. Grimes int s; 931df8bae1dSRodney W. Grimes 932df8bae1dSRodney W. Grimes if ((p->p_flag & P_PROFIL) == 0) { 933df8bae1dSRodney W. Grimes p->p_flag |= P_PROFIL; 934df8bae1dSRodney W. Grimes if (++profprocs == 1 && stathz != 0) { 935df8bae1dSRodney W. Grimes s = splstatclock(); 936df8bae1dSRodney W. Grimes psdiv = pscnt = psratio; 937df8bae1dSRodney W. Grimes setstatclockrate(profhz); 938df8bae1dSRodney W. Grimes splx(s); 939df8bae1dSRodney W. Grimes } 940df8bae1dSRodney W. Grimes } 941df8bae1dSRodney W. Grimes } 942df8bae1dSRodney W. Grimes 943df8bae1dSRodney W. Grimes /* 944df8bae1dSRodney W. Grimes * Stop profiling on a process. 945df8bae1dSRodney W. Grimes */ 946df8bae1dSRodney W. Grimes void 947df8bae1dSRodney W. Grimes stopprofclock(p) 948df8bae1dSRodney W. Grimes register struct proc *p; 949df8bae1dSRodney W. Grimes { 950df8bae1dSRodney W. Grimes int s; 951df8bae1dSRodney W. Grimes 952df8bae1dSRodney W. Grimes if (p->p_flag & P_PROFIL) { 953df8bae1dSRodney W. Grimes p->p_flag &= ~P_PROFIL; 954df8bae1dSRodney W. Grimes if (--profprocs == 0 && stathz != 0) { 955df8bae1dSRodney W. Grimes s = splstatclock(); 956df8bae1dSRodney W. Grimes psdiv = pscnt = 1; 957df8bae1dSRodney W. Grimes setstatclockrate(stathz); 958df8bae1dSRodney W. Grimes splx(s); 959df8bae1dSRodney W. Grimes } 960df8bae1dSRodney W. Grimes } 961df8bae1dSRodney W. Grimes } 962df8bae1dSRodney W. Grimes 963df8bae1dSRodney W. Grimes /* 964df8bae1dSRodney W. Grimes * Statistics clock. Grab profile sample, and if divider reaches 0, 965df8bae1dSRodney W. Grimes * do process and kernel statistics. 966df8bae1dSRodney W. Grimes */ 967df8bae1dSRodney W. Grimes void 968df8bae1dSRodney W. Grimes statclock(frame) 969df8bae1dSRodney W. Grimes register struct clockframe *frame; 970df8bae1dSRodney W. Grimes { 971df8bae1dSRodney W. Grimes #ifdef GPROF 972df8bae1dSRodney W. Grimes register struct gmonparam *g; 973df8bae1dSRodney W. Grimes #endif 974f5e9e8ecSBruce Evans register struct proc *p; 975df8bae1dSRodney W. Grimes register int i; 9768a129caeSDavid Greenman struct pstats *pstats; 977f5e9e8ecSBruce Evans long rss; 9788a129caeSDavid Greenman struct rusage *ru; 9798a129caeSDavid Greenman struct vmspace *vm; 9808a129caeSDavid Greenman 981df8bae1dSRodney W. Grimes if (CLKF_USERMODE(frame)) { 982f5e9e8ecSBruce Evans p = curproc; 983df8bae1dSRodney W. Grimes if (p->p_flag & P_PROFIL) 984df8bae1dSRodney W. Grimes addupc_intr(p, CLKF_PC(frame), 1); 985eae8fc2cSSteve Passe #if defined(SMP) && defined(BETTER_CLOCK) 986eae8fc2cSSteve Passe if (stathz != 0) 987eae8fc2cSSteve Passe forward_statclock(pscnt); 988eae8fc2cSSteve Passe #endif 989df8bae1dSRodney W. Grimes if (--pscnt > 0) 990df8bae1dSRodney W. Grimes return; 991df8bae1dSRodney W. Grimes /* 992df8bae1dSRodney W. Grimes * Came from user mode; CPU was in user state. 993df8bae1dSRodney W. Grimes * If this process is being profiled record the tick. 994df8bae1dSRodney W. Grimes */ 995df8bae1dSRodney W. Grimes p->p_uticks++; 996df8bae1dSRodney W. Grimes if (p->p_nice > NZERO) 997df8bae1dSRodney W. Grimes cp_time[CP_NICE]++; 998df8bae1dSRodney W. Grimes else 999df8bae1dSRodney W. Grimes cp_time[CP_USER]++; 1000df8bae1dSRodney W. Grimes } else { 1001df8bae1dSRodney W. Grimes #ifdef GPROF 1002df8bae1dSRodney W. Grimes /* 1003df8bae1dSRodney W. Grimes * Kernel statistics are just like addupc_intr, only easier. 1004df8bae1dSRodney W. Grimes */ 1005df8bae1dSRodney W. Grimes g = &_gmonparam; 1006df8bae1dSRodney W. Grimes if (g->state == GMON_PROF_ON) { 1007df8bae1dSRodney W. Grimes i = CLKF_PC(frame) - g->lowpc; 1008df8bae1dSRodney W. Grimes if (i < g->textsize) { 1009df8bae1dSRodney W. Grimes i /= HISTFRACTION * sizeof(*g->kcount); 1010df8bae1dSRodney W. Grimes g->kcount[i]++; 1011df8bae1dSRodney W. Grimes } 1012df8bae1dSRodney W. Grimes } 1013df8bae1dSRodney W. Grimes #endif 1014eae8fc2cSSteve Passe #if defined(SMP) && defined(BETTER_CLOCK) 1015eae8fc2cSSteve Passe if (stathz != 0) 1016eae8fc2cSSteve Passe forward_statclock(pscnt); 1017eae8fc2cSSteve Passe #endif 1018df8bae1dSRodney W. Grimes if (--pscnt > 0) 1019df8bae1dSRodney W. Grimes return; 1020df8bae1dSRodney W. Grimes /* 1021df8bae1dSRodney W. Grimes * Came from kernel mode, so we were: 1022df8bae1dSRodney W. Grimes * - handling an interrupt, 1023df8bae1dSRodney W. Grimes * - doing syscall or trap work on behalf of the current 1024df8bae1dSRodney W. Grimes * user process, or 1025df8bae1dSRodney W. Grimes * - spinning in the idle loop. 1026df8bae1dSRodney W. Grimes * Whichever it is, charge the time as appropriate. 1027df8bae1dSRodney W. Grimes * Note that we charge interrupts to the current process, 1028df8bae1dSRodney W. Grimes * regardless of whether they are ``for'' that process, 1029df8bae1dSRodney W. Grimes * so that we know how much of its real time was spent 1030df8bae1dSRodney W. Grimes * in ``non-process'' (i.e., interrupt) work. 1031df8bae1dSRodney W. Grimes */ 1032f5e9e8ecSBruce Evans p = curproc; 1033df8bae1dSRodney W. Grimes if (CLKF_INTR(frame)) { 1034df8bae1dSRodney W. Grimes if (p != NULL) 1035df8bae1dSRodney W. Grimes p->p_iticks++; 1036df8bae1dSRodney W. Grimes cp_time[CP_INTR]++; 1037b672aa4bSBruce Evans } else if (p != NULL) { 1038df8bae1dSRodney W. Grimes p->p_sticks++; 1039df8bae1dSRodney W. Grimes cp_time[CP_SYS]++; 1040df8bae1dSRodney W. Grimes } else 1041df8bae1dSRodney W. Grimes cp_time[CP_IDLE]++; 1042df8bae1dSRodney W. Grimes } 1043df8bae1dSRodney W. Grimes pscnt = psdiv; 1044df8bae1dSRodney W. Grimes 1045df8bae1dSRodney W. Grimes /* 1046df8bae1dSRodney W. Grimes * We maintain statistics shown by user-level statistics 1047df8bae1dSRodney W. Grimes * programs: the amount of time in each cpu state, and 1048df8bae1dSRodney W. Grimes * the amount of time each of DK_NDRIVE ``drives'' is busy. 1049df8bae1dSRodney W. Grimes * 1050df8bae1dSRodney W. Grimes * XXX should either run linked list of drives, or (better) 1051df8bae1dSRodney W. Grimes * grab timestamps in the start & done code. 1052df8bae1dSRodney W. Grimes */ 1053df8bae1dSRodney W. Grimes for (i = 0; i < DK_NDRIVE; i++) 1054df8bae1dSRodney W. Grimes if (dk_busy & (1 << i)) 1055df8bae1dSRodney W. Grimes dk_time[i]++; 1056df8bae1dSRodney W. Grimes 1057df8bae1dSRodney W. Grimes /* 1058df8bae1dSRodney W. Grimes * We adjust the priority of the current process. The priority of 1059df8bae1dSRodney W. Grimes * a process gets worse as it accumulates CPU time. The cpu usage 1060df8bae1dSRodney W. Grimes * estimator (p_estcpu) is increased here. The formula for computing 1061df8bae1dSRodney W. Grimes * priorities (in kern_synch.c) will compute a different value each 1062df8bae1dSRodney W. Grimes * time p_estcpu increases by 4. The cpu usage estimator ramps up 1063df8bae1dSRodney W. Grimes * quite quickly when the process is running (linearly), and decays 1064df8bae1dSRodney W. Grimes * away exponentially, at a rate which is proportionally slower when 1065df8bae1dSRodney W. Grimes * the system is busy. The basic principal is that the system will 1066df8bae1dSRodney W. Grimes * 90% forget that the process used a lot of CPU time in 5 * loadav 1067df8bae1dSRodney W. Grimes * seconds. This causes the system to favor processes which haven't 1068df8bae1dSRodney W. Grimes * run much recently, and to round-robin among other processes. 1069df8bae1dSRodney W. Grimes */ 1070df8bae1dSRodney W. Grimes if (p != NULL) { 1071df8bae1dSRodney W. Grimes p->p_cpticks++; 1072df8bae1dSRodney W. Grimes if (++p->p_estcpu == 0) 1073df8bae1dSRodney W. Grimes p->p_estcpu--; 1074df8bae1dSRodney W. Grimes if ((p->p_estcpu & 3) == 0) { 1075df8bae1dSRodney W. Grimes resetpriority(p); 1076df8bae1dSRodney W. Grimes if (p->p_priority >= PUSER) 1077df8bae1dSRodney W. Grimes p->p_priority = p->p_usrpri; 1078df8bae1dSRodney W. Grimes } 1079f5e9e8ecSBruce Evans 1080f5e9e8ecSBruce Evans /* Update resource usage integrals and maximums. */ 1081f5e9e8ecSBruce Evans if ((pstats = p->p_stats) != NULL && 1082f5e9e8ecSBruce Evans (ru = &pstats->p_ru) != NULL && 1083f5e9e8ecSBruce Evans (vm = p->p_vmspace) != NULL) { 1084f5e9e8ecSBruce Evans ru->ru_ixrss += vm->vm_tsize * PAGE_SIZE / 1024; 1085f5e9e8ecSBruce Evans ru->ru_idrss += vm->vm_dsize * PAGE_SIZE / 1024; 1086f5e9e8ecSBruce Evans ru->ru_isrss += vm->vm_ssize * PAGE_SIZE / 1024; 1087f5e9e8ecSBruce Evans rss = vm->vm_pmap.pm_stats.resident_count * 1088f5e9e8ecSBruce Evans PAGE_SIZE / 1024; 1089f5e9e8ecSBruce Evans if (ru->ru_maxrss < rss) 1090f5e9e8ecSBruce Evans ru->ru_maxrss = rss; 1091f5e9e8ecSBruce Evans } 1092df8bae1dSRodney W. Grimes } 1093df8bae1dSRodney W. Grimes } 1094df8bae1dSRodney W. Grimes 1095df8bae1dSRodney W. Grimes /* 1096df8bae1dSRodney W. Grimes * Return information about system clocks. 1097df8bae1dSRodney W. Grimes */ 1098787d58f2SPoul-Henning Kamp static int 1099787d58f2SPoul-Henning Kamp sysctl_kern_clockrate SYSCTL_HANDLER_ARGS 1100df8bae1dSRodney W. Grimes { 1101df8bae1dSRodney W. Grimes struct clockinfo clkinfo; 1102df8bae1dSRodney W. Grimes /* 1103df8bae1dSRodney W. Grimes * Construct clockinfo structure. 1104df8bae1dSRodney W. Grimes */ 1105df8bae1dSRodney W. Grimes clkinfo.hz = hz; 1106df8bae1dSRodney W. Grimes clkinfo.tick = tick; 11075faa3121SJohn Hay clkinfo.tickadj = tickadj; 1108df8bae1dSRodney W. Grimes clkinfo.profhz = profhz; 1109df8bae1dSRodney W. Grimes clkinfo.stathz = stathz ? stathz : hz; 1110ae0eb976SPoul-Henning Kamp return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req)); 1111df8bae1dSRodney W. Grimes } 11123f31c649SGarrett Wollman 1113946bb7a2SPoul-Henning Kamp SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD, 111465d0bc13SPoul-Henning Kamp 0, 0, sysctl_kern_clockrate, "S,clockinfo",""); 1115787d58f2SPoul-Henning Kamp 1116885bd8e4SJohn Hay #ifdef PPS_SYNC 11173f31c649SGarrett Wollman /* 1118885bd8e4SJohn Hay * hardpps() - discipline CPU clock oscillator to external PPS signal 11193f31c649SGarrett Wollman * 11203f31c649SGarrett Wollman * This routine is called at each PPS interrupt in order to discipline 1121885bd8e4SJohn Hay * the CPU clock oscillator to the PPS signal. It measures the PPS phase 1122885bd8e4SJohn Hay * and leaves it in a handy spot for the hardclock() routine. It 1123885bd8e4SJohn Hay * integrates successive PPS phase differences and calculates the 11243f31c649SGarrett Wollman * frequency offset. This is used in hardclock() to discipline the CPU 11253f31c649SGarrett Wollman * clock oscillator so that intrinsic frequency error is cancelled out. 1126885bd8e4SJohn Hay * The code requires the caller to capture the time and hardware counter 1127885bd8e4SJohn Hay * value at the on-time PPS signal transition. 1128885bd8e4SJohn Hay * 1129885bd8e4SJohn Hay * Note that, on some Unix systems, this routine runs at an interrupt 1130885bd8e4SJohn Hay * priority level higher than the timer interrupt routine hardclock(). 1131885bd8e4SJohn Hay * Therefore, the variables used are distinct from the hardclock() 1132885bd8e4SJohn Hay * variables, except for certain exceptions: The PPS frequency pps_freq 1133885bd8e4SJohn Hay * and phase pps_offset variables are determined by this routine and 1134885bd8e4SJohn Hay * updated atomically. The time_tolerance variable can be considered a 1135885bd8e4SJohn Hay * constant, since it is infrequently changed, and then only when the 1136885bd8e4SJohn Hay * PPS signal is disabled. The watchdog counter pps_valid is updated 1137885bd8e4SJohn Hay * once per second by hardclock() and is atomically cleared in this 1138885bd8e4SJohn Hay * routine. 11393f31c649SGarrett Wollman */ 11403f31c649SGarrett Wollman void 11413f31c649SGarrett Wollman hardpps(tvp, usec) 11423f31c649SGarrett Wollman struct timeval *tvp; /* time at PPS */ 11433f31c649SGarrett Wollman long usec; /* hardware counter at PPS */ 11443f31c649SGarrett Wollman { 11453f31c649SGarrett Wollman long u_usec, v_usec, bigtick; 11463f31c649SGarrett Wollman long cal_sec, cal_usec; 11473f31c649SGarrett Wollman 11483f31c649SGarrett Wollman /* 1149885bd8e4SJohn Hay * An occasional glitch can be produced when the PPS interrupt 1150885bd8e4SJohn Hay * occurs in the hardclock() routine before the time variable is 1151885bd8e4SJohn Hay * updated. Here the offset is discarded when the difference 1152885bd8e4SJohn Hay * between it and the last one is greater than tick/2, but not 1153885bd8e4SJohn Hay * if the interval since the first discard exceeds 30 s. 1154885bd8e4SJohn Hay */ 1155885bd8e4SJohn Hay time_status |= STA_PPSSIGNAL; 1156885bd8e4SJohn Hay time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR); 1157885bd8e4SJohn Hay pps_valid = 0; 1158885bd8e4SJohn Hay u_usec = -tvp->tv_usec; 1159885bd8e4SJohn Hay if (u_usec < -500000) 1160885bd8e4SJohn Hay u_usec += 1000000; 1161885bd8e4SJohn Hay v_usec = pps_offset - u_usec; 1162885bd8e4SJohn Hay if (v_usec < 0) 1163885bd8e4SJohn Hay v_usec = -v_usec; 1164885bd8e4SJohn Hay if (v_usec > (tick >> 1)) { 1165885bd8e4SJohn Hay if (pps_glitch > MAXGLITCH) { 1166885bd8e4SJohn Hay pps_glitch = 0; 1167885bd8e4SJohn Hay pps_tf[2] = u_usec; 1168885bd8e4SJohn Hay pps_tf[1] = u_usec; 1169885bd8e4SJohn Hay } else { 1170885bd8e4SJohn Hay pps_glitch++; 1171885bd8e4SJohn Hay u_usec = pps_offset; 1172885bd8e4SJohn Hay } 1173885bd8e4SJohn Hay } else 1174885bd8e4SJohn Hay pps_glitch = 0; 1175885bd8e4SJohn Hay 1176885bd8e4SJohn Hay /* 1177885bd8e4SJohn Hay * A three-stage median filter is used to help deglitch the pps 1178885bd8e4SJohn Hay * time. The median sample becomes the time offset estimate; the 1179885bd8e4SJohn Hay * difference between the other two samples becomes the time 1180885bd8e4SJohn Hay * dispersion (jitter) estimate. 1181885bd8e4SJohn Hay */ 1182885bd8e4SJohn Hay pps_tf[2] = pps_tf[1]; 1183885bd8e4SJohn Hay pps_tf[1] = pps_tf[0]; 1184885bd8e4SJohn Hay pps_tf[0] = u_usec; 1185885bd8e4SJohn Hay if (pps_tf[0] > pps_tf[1]) { 1186885bd8e4SJohn Hay if (pps_tf[1] > pps_tf[2]) { 1187885bd8e4SJohn Hay pps_offset = pps_tf[1]; /* 0 1 2 */ 1188885bd8e4SJohn Hay v_usec = pps_tf[0] - pps_tf[2]; 1189885bd8e4SJohn Hay } else if (pps_tf[2] > pps_tf[0]) { 1190885bd8e4SJohn Hay pps_offset = pps_tf[0]; /* 2 0 1 */ 1191885bd8e4SJohn Hay v_usec = pps_tf[2] - pps_tf[1]; 1192885bd8e4SJohn Hay } else { 1193885bd8e4SJohn Hay pps_offset = pps_tf[2]; /* 0 2 1 */ 1194885bd8e4SJohn Hay v_usec = pps_tf[0] - pps_tf[1]; 1195885bd8e4SJohn Hay } 1196885bd8e4SJohn Hay } else { 1197885bd8e4SJohn Hay if (pps_tf[1] < pps_tf[2]) { 1198885bd8e4SJohn Hay pps_offset = pps_tf[1]; /* 2 1 0 */ 1199885bd8e4SJohn Hay v_usec = pps_tf[2] - pps_tf[0]; 1200885bd8e4SJohn Hay } else if (pps_tf[2] < pps_tf[0]) { 1201885bd8e4SJohn Hay pps_offset = pps_tf[0]; /* 1 0 2 */ 1202885bd8e4SJohn Hay v_usec = pps_tf[1] - pps_tf[2]; 1203885bd8e4SJohn Hay } else { 1204885bd8e4SJohn Hay pps_offset = pps_tf[2]; /* 1 2 0 */ 1205885bd8e4SJohn Hay v_usec = pps_tf[1] - pps_tf[0]; 1206885bd8e4SJohn Hay } 1207885bd8e4SJohn Hay } 1208885bd8e4SJohn Hay if (v_usec > MAXTIME) 1209885bd8e4SJohn Hay pps_jitcnt++; 1210885bd8e4SJohn Hay v_usec = (v_usec << PPS_AVG) - pps_jitter; 1211885bd8e4SJohn Hay if (v_usec < 0) 1212885bd8e4SJohn Hay pps_jitter -= -v_usec >> PPS_AVG; 1213885bd8e4SJohn Hay else 1214885bd8e4SJohn Hay pps_jitter += v_usec >> PPS_AVG; 1215885bd8e4SJohn Hay if (pps_jitter > (MAXTIME >> 1)) 1216885bd8e4SJohn Hay time_status |= STA_PPSJITTER; 1217885bd8e4SJohn Hay 1218885bd8e4SJohn Hay /* 12193f31c649SGarrett Wollman * During the calibration interval adjust the starting time when 12203f31c649SGarrett Wollman * the tick overflows. At the end of the interval compute the 12213f31c649SGarrett Wollman * duration of the interval and the difference of the hardware 12223f31c649SGarrett Wollman * counters at the beginning and end of the interval. This code 12233f31c649SGarrett Wollman * is deliciously complicated by the fact valid differences may 12243f31c649SGarrett Wollman * exceed the value of tick when using long calibration 12253f31c649SGarrett Wollman * intervals and small ticks. Note that the counter can be 12263f31c649SGarrett Wollman * greater than tick if caught at just the wrong instant, but 12273f31c649SGarrett Wollman * the values returned and used here are correct. 12283f31c649SGarrett Wollman */ 12293f31c649SGarrett Wollman bigtick = (long)tick << SHIFT_USEC; 1230885bd8e4SJohn Hay pps_usec -= pps_freq; 12313f31c649SGarrett Wollman if (pps_usec >= bigtick) 12323f31c649SGarrett Wollman pps_usec -= bigtick; 12333f31c649SGarrett Wollman if (pps_usec < 0) 12343f31c649SGarrett Wollman pps_usec += bigtick; 12353f31c649SGarrett Wollman pps_time.tv_sec++; 12363f31c649SGarrett Wollman pps_count++; 12373f31c649SGarrett Wollman if (pps_count < (1 << pps_shift)) 12383f31c649SGarrett Wollman return; 12393f31c649SGarrett Wollman pps_count = 0; 1240885bd8e4SJohn Hay pps_calcnt++; 12413f31c649SGarrett Wollman u_usec = usec << SHIFT_USEC; 12423f31c649SGarrett Wollman v_usec = pps_usec - u_usec; 12433f31c649SGarrett Wollman if (v_usec >= bigtick >> 1) 12443f31c649SGarrett Wollman v_usec -= bigtick; 12453f31c649SGarrett Wollman if (v_usec < -(bigtick >> 1)) 12463f31c649SGarrett Wollman v_usec += bigtick; 12473f31c649SGarrett Wollman if (v_usec < 0) 1248885bd8e4SJohn Hay v_usec = -(-v_usec >> pps_shift); 12493f31c649SGarrett Wollman else 1250885bd8e4SJohn Hay v_usec = v_usec >> pps_shift; 12513f31c649SGarrett Wollman pps_usec = u_usec; 12523f31c649SGarrett Wollman cal_sec = tvp->tv_sec; 12533f31c649SGarrett Wollman cal_usec = tvp->tv_usec; 12543f31c649SGarrett Wollman cal_sec -= pps_time.tv_sec; 12553f31c649SGarrett Wollman cal_usec -= pps_time.tv_usec; 12563f31c649SGarrett Wollman if (cal_usec < 0) { 12573f31c649SGarrett Wollman cal_usec += 1000000; 12583f31c649SGarrett Wollman cal_sec--; 12593f31c649SGarrett Wollman } 12603f31c649SGarrett Wollman pps_time = *tvp; 12613f31c649SGarrett Wollman 12623f31c649SGarrett Wollman /* 12633f31c649SGarrett Wollman * Check for lost interrupts, noise, excessive jitter and 12643f31c649SGarrett Wollman * excessive frequency error. The number of timer ticks during 12653f31c649SGarrett Wollman * the interval may vary +-1 tick. Add to this a margin of one 12663f31c649SGarrett Wollman * tick for the PPS signal jitter and maximum frequency 12673f31c649SGarrett Wollman * deviation. If the limits are exceeded, the calibration 12683f31c649SGarrett Wollman * interval is reset to the minimum and we start over. 12693f31c649SGarrett Wollman */ 12703f31c649SGarrett Wollman u_usec = (long)tick << 1; 12713f31c649SGarrett Wollman if (!((cal_sec == -1 && cal_usec > (1000000 - u_usec)) 12723f31c649SGarrett Wollman || (cal_sec == 0 && cal_usec < u_usec)) 1273885bd8e4SJohn Hay || v_usec > time_tolerance || v_usec < -time_tolerance) { 1274885bd8e4SJohn Hay pps_errcnt++; 1275885bd8e4SJohn Hay pps_shift = PPS_SHIFT; 1276885bd8e4SJohn Hay pps_intcnt = 0; 1277885bd8e4SJohn Hay time_status |= STA_PPSERROR; 12783f31c649SGarrett Wollman return; 12793f31c649SGarrett Wollman } 12803f31c649SGarrett Wollman 12813f31c649SGarrett Wollman /* 12823f31c649SGarrett Wollman * A three-stage median filter is used to help deglitch the pps 1283885bd8e4SJohn Hay * frequency. The median sample becomes the frequency offset 1284885bd8e4SJohn Hay * estimate; the difference between the other two samples 1285885bd8e4SJohn Hay * becomes the frequency dispersion (stability) estimate. 12863f31c649SGarrett Wollman */ 1287885bd8e4SJohn Hay pps_ff[2] = pps_ff[1]; 1288885bd8e4SJohn Hay pps_ff[1] = pps_ff[0]; 1289885bd8e4SJohn Hay pps_ff[0] = v_usec; 1290885bd8e4SJohn Hay if (pps_ff[0] > pps_ff[1]) { 1291885bd8e4SJohn Hay if (pps_ff[1] > pps_ff[2]) { 1292885bd8e4SJohn Hay u_usec = pps_ff[1]; /* 0 1 2 */ 1293885bd8e4SJohn Hay v_usec = pps_ff[0] - pps_ff[2]; 1294885bd8e4SJohn Hay } else if (pps_ff[2] > pps_ff[0]) { 1295885bd8e4SJohn Hay u_usec = pps_ff[0]; /* 2 0 1 */ 1296885bd8e4SJohn Hay v_usec = pps_ff[2] - pps_ff[1]; 12973f31c649SGarrett Wollman } else { 1298885bd8e4SJohn Hay u_usec = pps_ff[2]; /* 0 2 1 */ 1299885bd8e4SJohn Hay v_usec = pps_ff[0] - pps_ff[1]; 13003f31c649SGarrett Wollman } 13013f31c649SGarrett Wollman } else { 1302885bd8e4SJohn Hay if (pps_ff[1] < pps_ff[2]) { 1303885bd8e4SJohn Hay u_usec = pps_ff[1]; /* 2 1 0 */ 1304885bd8e4SJohn Hay v_usec = pps_ff[2] - pps_ff[0]; 1305885bd8e4SJohn Hay } else if (pps_ff[2] < pps_ff[0]) { 1306885bd8e4SJohn Hay u_usec = pps_ff[0]; /* 1 0 2 */ 1307885bd8e4SJohn Hay v_usec = pps_ff[1] - pps_ff[2]; 13083f31c649SGarrett Wollman } else { 1309885bd8e4SJohn Hay u_usec = pps_ff[2]; /* 1 2 0 */ 1310885bd8e4SJohn Hay v_usec = pps_ff[1] - pps_ff[0]; 13113f31c649SGarrett Wollman } 13123f31c649SGarrett Wollman } 13133f31c649SGarrett Wollman 13143f31c649SGarrett Wollman /* 1315885bd8e4SJohn Hay * Here the frequency dispersion (stability) is updated. If it 1316885bd8e4SJohn Hay * is less than one-fourth the maximum (MAXFREQ), the frequency 1317885bd8e4SJohn Hay * offset is updated as well, but clamped to the tolerance. It 1318885bd8e4SJohn Hay * will be processed later by the hardclock() routine. 13193f31c649SGarrett Wollman */ 1320885bd8e4SJohn Hay v_usec = (v_usec >> 1) - pps_stabil; 13213f31c649SGarrett Wollman if (v_usec < 0) 1322885bd8e4SJohn Hay pps_stabil -= -v_usec >> PPS_AVG; 13233f31c649SGarrett Wollman else 1324885bd8e4SJohn Hay pps_stabil += v_usec >> PPS_AVG; 1325885bd8e4SJohn Hay if (pps_stabil > MAXFREQ >> 2) { 1326885bd8e4SJohn Hay pps_stbcnt++; 1327885bd8e4SJohn Hay time_status |= STA_PPSWANDER; 13283f31c649SGarrett Wollman return; 13293f31c649SGarrett Wollman } 1330885bd8e4SJohn Hay if (time_status & STA_PPSFREQ) { 13313f31c649SGarrett Wollman if (u_usec < 0) { 1332885bd8e4SJohn Hay pps_freq -= -u_usec >> PPS_AVG; 1333885bd8e4SJohn Hay if (pps_freq < -time_tolerance) 1334885bd8e4SJohn Hay pps_freq = -time_tolerance; 13353f31c649SGarrett Wollman u_usec = -u_usec; 13363f31c649SGarrett Wollman } else { 1337885bd8e4SJohn Hay pps_freq += u_usec >> PPS_AVG; 1338885bd8e4SJohn Hay if (pps_freq > time_tolerance) 1339885bd8e4SJohn Hay pps_freq = time_tolerance; 1340885bd8e4SJohn Hay } 13413f31c649SGarrett Wollman } 13423f31c649SGarrett Wollman 13433f31c649SGarrett Wollman /* 13443f31c649SGarrett Wollman * Here the calibration interval is adjusted. If the maximum 13453f31c649SGarrett Wollman * time difference is greater than tick / 4, reduce the interval 13463f31c649SGarrett Wollman * by half. If this is not the case for four consecutive 13473f31c649SGarrett Wollman * intervals, double the interval. 13483f31c649SGarrett Wollman */ 1349885bd8e4SJohn Hay if (u_usec << pps_shift > bigtick >> 2) { 1350885bd8e4SJohn Hay pps_intcnt = 0; 1351885bd8e4SJohn Hay if (pps_shift > PPS_SHIFT) 1352885bd8e4SJohn Hay pps_shift--; 1353885bd8e4SJohn Hay } else if (pps_intcnt >= 4) { 1354885bd8e4SJohn Hay pps_intcnt = 0; 1355885bd8e4SJohn Hay if (pps_shift < PPS_SHIFTMAX) 1356885bd8e4SJohn Hay pps_shift++; 13573f31c649SGarrett Wollman } else 1358885bd8e4SJohn Hay pps_intcnt++; 13593f31c649SGarrett Wollman } 13603f31c649SGarrett Wollman #endif /* PPS_SYNC */ 1361