xref: /freebsd/sys/kern/kern_clock.c (revision 1c6d46f93c3ed7c82f1d6c1145d1433804859a0a)
1df8bae1dSRodney W. Grimes /*-
27ec73f64SPoul-Henning Kamp  * Copyright (c) 1997, 1998 Poul-Henning Kamp <phk@FreeBSD.org>
3df8bae1dSRodney W. Grimes  * Copyright (c) 1982, 1986, 1991, 1993
4df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
5df8bae1dSRodney W. Grimes  * (c) UNIX System Laboratories, Inc.
6df8bae1dSRodney W. Grimes  * All or some portions of this file are derived from material licensed
7df8bae1dSRodney W. Grimes  * to the University of California by American Telephone and Telegraph
8df8bae1dSRodney W. Grimes  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
9df8bae1dSRodney W. Grimes  * the permission of UNIX System Laboratories, Inc.
10df8bae1dSRodney W. Grimes  *
11df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
12df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
13df8bae1dSRodney W. Grimes  * are met:
14df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
15df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
16df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
17df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
18df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
19df8bae1dSRodney W. Grimes  * 3. All advertising materials mentioning features or use of this software
20df8bae1dSRodney W. Grimes  *    must display the following acknowledgement:
21df8bae1dSRodney W. Grimes  *	This product includes software developed by the University of
22df8bae1dSRodney W. Grimes  *	California, Berkeley and its contributors.
23df8bae1dSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
24df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
25df8bae1dSRodney W. Grimes  *    without specific prior written permission.
26df8bae1dSRodney W. Grimes  *
27df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
38df8bae1dSRodney W. Grimes  *
39df8bae1dSRodney W. Grimes  *	@(#)kern_clock.c	8.5 (Berkeley) 1/21/94
401c6d46f9SLuoqi Chen  * $Id: kern_clock.c,v 1.87 1999/02/19 14:25:34 luoqi Exp $
41df8bae1dSRodney W. Grimes  */
42df8bae1dSRodney W. Grimes 
43df8bae1dSRodney W. Grimes #include <sys/param.h>
44df8bae1dSRodney W. Grimes #include <sys/systm.h>
45df8bae1dSRodney W. Grimes #include <sys/dkstat.h>
46df8bae1dSRodney W. Grimes #include <sys/callout.h>
47df8bae1dSRodney W. Grimes #include <sys/kernel.h>
48df8bae1dSRodney W. Grimes #include <sys/proc.h>
493bac064fSPoul-Henning Kamp #include <sys/malloc.h>
50df8bae1dSRodney W. Grimes #include <sys/resourcevar.h>
51797f2d22SPoul-Henning Kamp #include <sys/signalvar.h>
523f31c649SGarrett Wollman #include <sys/timex.h>
538a129caeSDavid Greenman #include <vm/vm.h>
54996c772fSJohn Dyson #include <sys/lock.h>
55efeaf95aSDavid Greenman #include <vm/pmap.h>
56efeaf95aSDavid Greenman #include <vm/vm_map.h>
57797f2d22SPoul-Henning Kamp #include <sys/sysctl.h>
58df8bae1dSRodney W. Grimes 
59df8bae1dSRodney W. Grimes #include <machine/cpu.h>
60b1037dcdSBruce Evans #include <machine/limits.h>
61df8bae1dSRodney W. Grimes 
62df8bae1dSRodney W. Grimes #ifdef GPROF
63df8bae1dSRodney W. Grimes #include <sys/gmon.h>
64df8bae1dSRodney W. Grimes #endif
65df8bae1dSRodney W. Grimes 
66eae8fc2cSSteve Passe #if defined(SMP) && defined(BETTER_CLOCK)
67eae8fc2cSSteve Passe #include <machine/smp.h>
68eae8fc2cSSteve Passe #endif
69eae8fc2cSSteve Passe 
70c2906d55SPoul-Henning Kamp /* This is where the NTIMECOUNTER option hangs out */
71c2906d55SPoul-Henning Kamp #include "opt_ntp.h"
72c2906d55SPoul-Henning Kamp 
733bac064fSPoul-Henning Kamp /*
743bac064fSPoul-Henning Kamp  * Number of timecounters used to implement stable storage
753bac064fSPoul-Henning Kamp  */
763bac064fSPoul-Henning Kamp #ifndef NTIMECOUNTER
77c2906d55SPoul-Henning Kamp #define NTIMECOUNTER	5
783bac064fSPoul-Henning Kamp #endif
793bac064fSPoul-Henning Kamp 
803bac064fSPoul-Henning Kamp static MALLOC_DEFINE(M_TIMECOUNTER, "timecounter",
813bac064fSPoul-Henning Kamp 	"Timecounter stable storage");
823bac064fSPoul-Henning Kamp 
83d841aaa7SBruce Evans static void initclocks __P((void *dummy));
842b14f991SJulian Elischer SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL)
852b14f991SJulian Elischer 
86c2906d55SPoul-Henning Kamp static void tco_forward __P((int force));
877ec73f64SPoul-Henning Kamp static void tco_setscales __P((struct timecounter *tc));
88a58f0f8eSPoul-Henning Kamp static __inline unsigned tco_delta __P((struct timecounter *tc));
897ec73f64SPoul-Henning Kamp 
90f23b4c91SGarrett Wollman /* Some of these don't belong here, but it's easiest to concentrate them. */
91eae8fc2cSSteve Passe #if defined(SMP) && defined(BETTER_CLOCK)
92eae8fc2cSSteve Passe long cp_time[CPUSTATES];
93eae8fc2cSSteve Passe #else
9427a0b398SPoul-Henning Kamp static long cp_time[CPUSTATES];
95eae8fc2cSSteve Passe #endif
96f23b4c91SGarrett Wollman 
97f23b4c91SGarrett Wollman long tk_cancc;
98f23b4c91SGarrett Wollman long tk_nin;
99f23b4c91SGarrett Wollman long tk_nout;
100f23b4c91SGarrett Wollman long tk_rawcc;
101f23b4c91SGarrett Wollman 
102227ee8a1SPoul-Henning Kamp time_t time_second;
103227ee8a1SPoul-Henning Kamp 
104df8bae1dSRodney W. Grimes /*
105510eb5b9SPoul-Henning Kamp  * Which update policy to use.
106510eb5b9SPoul-Henning Kamp  *   0 - every tick, bad hardware may fail with "calcru negative..."
107510eb5b9SPoul-Henning Kamp  *   1 - more resistent to the above hardware, but less efficient.
108510eb5b9SPoul-Henning Kamp  */
109510eb5b9SPoul-Henning Kamp static int tco_method;
110510eb5b9SPoul-Henning Kamp 
111510eb5b9SPoul-Henning Kamp /*
1123bac064fSPoul-Henning Kamp  * Implement a dummy timecounter which we can use until we get a real one
1133bac064fSPoul-Henning Kamp  * in the air.  This allows the console and other early stuff to use
1143bac064fSPoul-Henning Kamp  * timeservices.
1153bac064fSPoul-Henning Kamp  */
1163bac064fSPoul-Henning Kamp 
1173bac064fSPoul-Henning Kamp static unsigned
1183bac064fSPoul-Henning Kamp dummy_get_timecount(struct timecounter *tc)
1193bac064fSPoul-Henning Kamp {
1203bac064fSPoul-Henning Kamp 	static unsigned now;
1213bac064fSPoul-Henning Kamp 	return (++now);
1223bac064fSPoul-Henning Kamp }
1233bac064fSPoul-Henning Kamp 
1243bac064fSPoul-Henning Kamp static struct timecounter dummy_timecounter = {
1253bac064fSPoul-Henning Kamp 	dummy_get_timecount,
1263bac064fSPoul-Henning Kamp 	0,
1273bac064fSPoul-Henning Kamp 	~0u,
1283bac064fSPoul-Henning Kamp 	1000000,
1293bac064fSPoul-Henning Kamp 	"dummy"
1303bac064fSPoul-Henning Kamp };
1313bac064fSPoul-Henning Kamp 
1323bac064fSPoul-Henning Kamp struct timecounter *timecounter = &dummy_timecounter;
1333bac064fSPoul-Henning Kamp 
1343bac064fSPoul-Henning Kamp /*
135df8bae1dSRodney W. Grimes  * Clock handling routines.
136df8bae1dSRodney W. Grimes  *
137b05dcf3cSPoul-Henning Kamp  * This code is written to operate with two timers that run independently of
138b05dcf3cSPoul-Henning Kamp  * each other.
1397ec73f64SPoul-Henning Kamp  *
140b05dcf3cSPoul-Henning Kamp  * The main timer, running hz times per second, is used to trigger interval
141b05dcf3cSPoul-Henning Kamp  * timers, timeouts and rescheduling as needed.
1427ec73f64SPoul-Henning Kamp  *
143b05dcf3cSPoul-Henning Kamp  * The second timer handles kernel and user profiling,
144b05dcf3cSPoul-Henning Kamp  * and does resource use estimation.  If the second timer is programmable,
145b05dcf3cSPoul-Henning Kamp  * it is randomized to avoid aliasing between the two clocks.  For example,
146b05dcf3cSPoul-Henning Kamp  * the randomization prevents an adversary from always giving up the cpu
147df8bae1dSRodney W. Grimes  * just before its quantum expires.  Otherwise, it would never accumulate
148df8bae1dSRodney W. Grimes  * cpu ticks.  The mean frequency of the second timer is stathz.
149b05dcf3cSPoul-Henning Kamp  *
150b05dcf3cSPoul-Henning Kamp  * If no second timer exists, stathz will be zero; in this case we drive
151b05dcf3cSPoul-Henning Kamp  * profiling and statistics off the main clock.  This WILL NOT be accurate;
152b05dcf3cSPoul-Henning Kamp  * do not do it unless absolutely necessary.
153b05dcf3cSPoul-Henning Kamp  *
154df8bae1dSRodney W. Grimes  * The statistics clock may (or may not) be run at a higher rate while
155b05dcf3cSPoul-Henning Kamp  * profiling.  This profile clock runs at profhz.  We require that profhz
156b05dcf3cSPoul-Henning Kamp  * be an integral multiple of stathz.
157b05dcf3cSPoul-Henning Kamp  *
158b05dcf3cSPoul-Henning Kamp  * If the statistics clock is running fast, it must be divided by the ratio
159b05dcf3cSPoul-Henning Kamp  * profhz/stathz for statistics.  (For profiling, every tick counts.)
160df8bae1dSRodney W. Grimes  *
1617ec73f64SPoul-Henning Kamp  * Time-of-day is maintained using a "timecounter", which may or may
1627ec73f64SPoul-Henning Kamp  * not be related to the hardware generating the above mentioned
1637ec73f64SPoul-Henning Kamp  * interrupts.
164df8bae1dSRodney W. Grimes  */
165df8bae1dSRodney W. Grimes 
166df8bae1dSRodney W. Grimes int	stathz;
167df8bae1dSRodney W. Grimes int	profhz;
168cc3d5226SBruce Evans static int profprocs;
169df8bae1dSRodney W. Grimes int	ticks;
170df8bae1dSRodney W. Grimes static int psdiv, pscnt;		/* prof => stat divider */
171cc3d5226SBruce Evans int	psratio;			/* ratio: prof / stat */
172df8bae1dSRodney W. Grimes 
173df8bae1dSRodney W. Grimes /*
174df8bae1dSRodney W. Grimes  * Initialize clock frequencies and start both clocks running.
175df8bae1dSRodney W. Grimes  */
1762b14f991SJulian Elischer /* ARGSUSED*/
1772b14f991SJulian Elischer static void
178d841aaa7SBruce Evans initclocks(dummy)
179d841aaa7SBruce Evans 	void *dummy;
180df8bae1dSRodney W. Grimes {
181df8bae1dSRodney W. Grimes 	register int i;
182df8bae1dSRodney W. Grimes 
183df8bae1dSRodney W. Grimes 	/*
184df8bae1dSRodney W. Grimes 	 * Set divisors to 1 (normal case) and let the machine-specific
185df8bae1dSRodney W. Grimes 	 * code do its bit.
186df8bae1dSRodney W. Grimes 	 */
187df8bae1dSRodney W. Grimes 	psdiv = pscnt = 1;
188df8bae1dSRodney W. Grimes 	cpu_initclocks();
189df8bae1dSRodney W. Grimes 
190df8bae1dSRodney W. Grimes 	/*
191df8bae1dSRodney W. Grimes 	 * Compute profhz/stathz, and fix profhz if needed.
192df8bae1dSRodney W. Grimes 	 */
193df8bae1dSRodney W. Grimes 	i = stathz ? stathz : hz;
194df8bae1dSRodney W. Grimes 	if (profhz == 0)
195df8bae1dSRodney W. Grimes 		profhz = i;
196df8bae1dSRodney W. Grimes 	psratio = profhz / i;
197df8bae1dSRodney W. Grimes }
198df8bae1dSRodney W. Grimes 
199df8bae1dSRodney W. Grimes /*
200df8bae1dSRodney W. Grimes  * The real-time timer, interrupting hz times per second.
201df8bae1dSRodney W. Grimes  */
202df8bae1dSRodney W. Grimes void
203df8bae1dSRodney W. Grimes hardclock(frame)
204df8bae1dSRodney W. Grimes 	register struct clockframe *frame;
205df8bae1dSRodney W. Grimes {
206df8bae1dSRodney W. Grimes 	register struct proc *p;
207df8bae1dSRodney W. Grimes 
208df8bae1dSRodney W. Grimes 	p = curproc;
209df8bae1dSRodney W. Grimes 	if (p) {
210df8bae1dSRodney W. Grimes 		register struct pstats *pstats;
211df8bae1dSRodney W. Grimes 
212df8bae1dSRodney W. Grimes 		/*
213df8bae1dSRodney W. Grimes 		 * Run current process's virtual and profile time, as needed.
214df8bae1dSRodney W. Grimes 		 */
215df8bae1dSRodney W. Grimes 		pstats = p->p_stats;
216df8bae1dSRodney W. Grimes 		if (CLKF_USERMODE(frame) &&
2174cf41af3SPoul-Henning Kamp 		    timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
218df8bae1dSRodney W. Grimes 		    itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
219df8bae1dSRodney W. Grimes 			psignal(p, SIGVTALRM);
2204cf41af3SPoul-Henning Kamp 		if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
221df8bae1dSRodney W. Grimes 		    itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
222df8bae1dSRodney W. Grimes 			psignal(p, SIGPROF);
223df8bae1dSRodney W. Grimes 	}
224df8bae1dSRodney W. Grimes 
225eae8fc2cSSteve Passe #if defined(SMP) && defined(BETTER_CLOCK)
226eae8fc2cSSteve Passe 	forward_hardclock(pscnt);
227eae8fc2cSSteve Passe #endif
228b05dcf3cSPoul-Henning Kamp 
229df8bae1dSRodney W. Grimes 	/*
230df8bae1dSRodney W. Grimes 	 * If no separate statistics clock is available, run it from here.
231df8bae1dSRodney W. Grimes 	 */
232df8bae1dSRodney W. Grimes 	if (stathz == 0)
233df8bae1dSRodney W. Grimes 		statclock(frame);
234df8bae1dSRodney W. Grimes 
235c2906d55SPoul-Henning Kamp 	tco_forward(0);
236df8bae1dSRodney W. Grimes 	ticks++;
2373f31c649SGarrett Wollman 
238b05dcf3cSPoul-Henning Kamp 	/*
239b05dcf3cSPoul-Henning Kamp 	 * Process callouts at a very low cpu priority, so we don't keep the
240b05dcf3cSPoul-Henning Kamp 	 * relatively high clock interrupt priority any longer than necessary.
241b05dcf3cSPoul-Henning Kamp 	 */
242b05dcf3cSPoul-Henning Kamp 	if (TAILQ_FIRST(&callwheel[ticks & callwheelmask]) != NULL) {
243b05dcf3cSPoul-Henning Kamp 		if (CLKF_BASEPRI(frame)) {
244b05dcf3cSPoul-Henning Kamp 			/*
245b05dcf3cSPoul-Henning Kamp 			 * Save the overhead of a software interrupt;
246b05dcf3cSPoul-Henning Kamp 			 * it will happen as soon as we return, so do it now.
247b05dcf3cSPoul-Henning Kamp 			 */
248b05dcf3cSPoul-Henning Kamp 			(void)splsoftclock();
249b05dcf3cSPoul-Henning Kamp 			softclock();
250b05dcf3cSPoul-Henning Kamp 		} else
251eeb355f7SPoul-Henning Kamp 			setsoftclock();
252b05dcf3cSPoul-Henning Kamp 	} else if (softticks + 1 == ticks)
253b05dcf3cSPoul-Henning Kamp 		++softticks;
254ab36c067SJustin T. Gibbs }
255ab36c067SJustin T. Gibbs 
256df8bae1dSRodney W. Grimes /*
257227ee8a1SPoul-Henning Kamp  * Compute number of ticks in the specified amount of time.
258df8bae1dSRodney W. Grimes  */
259df8bae1dSRodney W. Grimes int
260227ee8a1SPoul-Henning Kamp tvtohz(tv)
261df8bae1dSRodney W. Grimes 	struct timeval *tv;
262df8bae1dSRodney W. Grimes {
2636976af69SBruce Evans 	register unsigned long ticks;
2646976af69SBruce Evans 	register long sec, usec;
265df8bae1dSRodney W. Grimes 
266df8bae1dSRodney W. Grimes 	/*
2676976af69SBruce Evans 	 * If the number of usecs in the whole seconds part of the time
2686976af69SBruce Evans 	 * difference fits in a long, then the total number of usecs will
2696976af69SBruce Evans 	 * fit in an unsigned long.  Compute the total and convert it to
2706976af69SBruce Evans 	 * ticks, rounding up and adding 1 to allow for the current tick
2716976af69SBruce Evans 	 * to expire.  Rounding also depends on unsigned long arithmetic
2726976af69SBruce Evans 	 * to avoid overflow.
273df8bae1dSRodney W. Grimes 	 *
2746976af69SBruce Evans 	 * Otherwise, if the number of ticks in the whole seconds part of
2756976af69SBruce Evans 	 * the time difference fits in a long, then convert the parts to
2766976af69SBruce Evans 	 * ticks separately and add, using similar rounding methods and
2776976af69SBruce Evans 	 * overflow avoidance.  This method would work in the previous
2786976af69SBruce Evans 	 * case but it is slightly slower and assumes that hz is integral.
2796976af69SBruce Evans 	 *
2806976af69SBruce Evans 	 * Otherwise, round the time difference down to the maximum
2816976af69SBruce Evans 	 * representable value.
2826976af69SBruce Evans 	 *
2836976af69SBruce Evans 	 * If ints have 32 bits, then the maximum value for any timeout in
2846976af69SBruce Evans 	 * 10ms ticks is 248 days.
285df8bae1dSRodney W. Grimes 	 */
286227ee8a1SPoul-Henning Kamp 	sec = tv->tv_sec;
287227ee8a1SPoul-Henning Kamp 	usec = tv->tv_usec;
2886976af69SBruce Evans 	if (usec < 0) {
2896976af69SBruce Evans 		sec--;
2906976af69SBruce Evans 		usec += 1000000;
2916976af69SBruce Evans 	}
2926976af69SBruce Evans 	if (sec < 0) {
2936976af69SBruce Evans #ifdef DIAGNOSTIC
294b05dcf3cSPoul-Henning Kamp 		if (usec > 0) {
2957ec73f64SPoul-Henning Kamp 			sec++;
2967ec73f64SPoul-Henning Kamp 			usec -= 1000000;
2977ec73f64SPoul-Henning Kamp 		}
298227ee8a1SPoul-Henning Kamp 		printf("tvotohz: negative time difference %ld sec %ld usec\n",
2996976af69SBruce Evans 		       sec, usec);
3006976af69SBruce Evans #endif
3016976af69SBruce Evans 		ticks = 1;
3026976af69SBruce Evans 	} else if (sec <= LONG_MAX / 1000000)
3036976af69SBruce Evans 		ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1))
3046976af69SBruce Evans 			/ tick + 1;
3056976af69SBruce Evans 	else if (sec <= LONG_MAX / hz)
3066976af69SBruce Evans 		ticks = sec * hz
3076976af69SBruce Evans 			+ ((unsigned long)usec + (tick - 1)) / tick + 1;
3086976af69SBruce Evans 	else
3096976af69SBruce Evans 		ticks = LONG_MAX;
3106976af69SBruce Evans 	if (ticks > INT_MAX)
3116976af69SBruce Evans 		ticks = INT_MAX;
312d6116663SAlexander Langer 	return ((int)ticks);
313df8bae1dSRodney W. Grimes }
314df8bae1dSRodney W. Grimes 
315df8bae1dSRodney W. Grimes /*
316df8bae1dSRodney W. Grimes  * Start profiling on a process.
317df8bae1dSRodney W. Grimes  *
318df8bae1dSRodney W. Grimes  * Kernel profiling passes proc0 which never exits and hence
319df8bae1dSRodney W. Grimes  * keeps the profile clock running constantly.
320df8bae1dSRodney W. Grimes  */
321df8bae1dSRodney W. Grimes void
322df8bae1dSRodney W. Grimes startprofclock(p)
323df8bae1dSRodney W. Grimes 	register struct proc *p;
324df8bae1dSRodney W. Grimes {
325df8bae1dSRodney W. Grimes 	int s;
326df8bae1dSRodney W. Grimes 
327df8bae1dSRodney W. Grimes 	if ((p->p_flag & P_PROFIL) == 0) {
328df8bae1dSRodney W. Grimes 		p->p_flag |= P_PROFIL;
329df8bae1dSRodney W. Grimes 		if (++profprocs == 1 && stathz != 0) {
330df8bae1dSRodney W. Grimes 			s = splstatclock();
331df8bae1dSRodney W. Grimes 			psdiv = pscnt = psratio;
332df8bae1dSRodney W. Grimes 			setstatclockrate(profhz);
333df8bae1dSRodney W. Grimes 			splx(s);
334df8bae1dSRodney W. Grimes 		}
335df8bae1dSRodney W. Grimes 	}
336df8bae1dSRodney W. Grimes }
337df8bae1dSRodney W. Grimes 
338df8bae1dSRodney W. Grimes /*
339df8bae1dSRodney W. Grimes  * Stop profiling on a process.
340df8bae1dSRodney W. Grimes  */
341df8bae1dSRodney W. Grimes void
342df8bae1dSRodney W. Grimes stopprofclock(p)
343df8bae1dSRodney W. Grimes 	register struct proc *p;
344df8bae1dSRodney W. Grimes {
345df8bae1dSRodney W. Grimes 	int s;
346df8bae1dSRodney W. Grimes 
347df8bae1dSRodney W. Grimes 	if (p->p_flag & P_PROFIL) {
348df8bae1dSRodney W. Grimes 		p->p_flag &= ~P_PROFIL;
349df8bae1dSRodney W. Grimes 		if (--profprocs == 0 && stathz != 0) {
350df8bae1dSRodney W. Grimes 			s = splstatclock();
351df8bae1dSRodney W. Grimes 			psdiv = pscnt = 1;
352df8bae1dSRodney W. Grimes 			setstatclockrate(stathz);
353df8bae1dSRodney W. Grimes 			splx(s);
354df8bae1dSRodney W. Grimes 		}
355df8bae1dSRodney W. Grimes 	}
356df8bae1dSRodney W. Grimes }
357df8bae1dSRodney W. Grimes 
358df8bae1dSRodney W. Grimes /*
359df8bae1dSRodney W. Grimes  * Statistics clock.  Grab profile sample, and if divider reaches 0,
360df8bae1dSRodney W. Grimes  * do process and kernel statistics.
361df8bae1dSRodney W. Grimes  */
362df8bae1dSRodney W. Grimes void
363df8bae1dSRodney W. Grimes statclock(frame)
364df8bae1dSRodney W. Grimes 	register struct clockframe *frame;
365df8bae1dSRodney W. Grimes {
366df8bae1dSRodney W. Grimes #ifdef GPROF
367df8bae1dSRodney W. Grimes 	register struct gmonparam *g;
368fffd686aSBruce Evans 	int i;
369df8bae1dSRodney W. Grimes #endif
370f5e9e8ecSBruce Evans 	register struct proc *p;
3718a129caeSDavid Greenman 	struct pstats *pstats;
372f5e9e8ecSBruce Evans 	long rss;
3738a129caeSDavid Greenman 	struct rusage *ru;
3748a129caeSDavid Greenman 	struct vmspace *vm;
3758a129caeSDavid Greenman 
3768843cc35SSøren Schmidt 	if (curproc != NULL && CLKF_USERMODE(frame)) {
377f5e9e8ecSBruce Evans 		p = curproc;
378df8bae1dSRodney W. Grimes 		if (p->p_flag & P_PROFIL)
379df8bae1dSRodney W. Grimes 			addupc_intr(p, CLKF_PC(frame), 1);
380eae8fc2cSSteve Passe #if defined(SMP) && defined(BETTER_CLOCK)
381eae8fc2cSSteve Passe 		if (stathz != 0)
382eae8fc2cSSteve Passe 			forward_statclock(pscnt);
383eae8fc2cSSteve Passe #endif
384df8bae1dSRodney W. Grimes 		if (--pscnt > 0)
385df8bae1dSRodney W. Grimes 			return;
386df8bae1dSRodney W. Grimes 		/*
387df8bae1dSRodney W. Grimes 		 * Came from user mode; CPU was in user state.
388df8bae1dSRodney W. Grimes 		 * If this process is being profiled record the tick.
389df8bae1dSRodney W. Grimes 		 */
390df8bae1dSRodney W. Grimes 		p->p_uticks++;
391df8bae1dSRodney W. Grimes 		if (p->p_nice > NZERO)
392df8bae1dSRodney W. Grimes 			cp_time[CP_NICE]++;
393df8bae1dSRodney W. Grimes 		else
394df8bae1dSRodney W. Grimes 			cp_time[CP_USER]++;
395df8bae1dSRodney W. Grimes 	} else {
396df8bae1dSRodney W. Grimes #ifdef GPROF
397df8bae1dSRodney W. Grimes 		/*
398df8bae1dSRodney W. Grimes 		 * Kernel statistics are just like addupc_intr, only easier.
399df8bae1dSRodney W. Grimes 		 */
400df8bae1dSRodney W. Grimes 		g = &_gmonparam;
401df8bae1dSRodney W. Grimes 		if (g->state == GMON_PROF_ON) {
402df8bae1dSRodney W. Grimes 			i = CLKF_PC(frame) - g->lowpc;
403df8bae1dSRodney W. Grimes 			if (i < g->textsize) {
404df8bae1dSRodney W. Grimes 				i /= HISTFRACTION * sizeof(*g->kcount);
405df8bae1dSRodney W. Grimes 				g->kcount[i]++;
406df8bae1dSRodney W. Grimes 			}
407df8bae1dSRodney W. Grimes 		}
408df8bae1dSRodney W. Grimes #endif
409eae8fc2cSSteve Passe #if defined(SMP) && defined(BETTER_CLOCK)
410eae8fc2cSSteve Passe 		if (stathz != 0)
411eae8fc2cSSteve Passe 			forward_statclock(pscnt);
412eae8fc2cSSteve Passe #endif
413df8bae1dSRodney W. Grimes 		if (--pscnt > 0)
414df8bae1dSRodney W. Grimes 			return;
415df8bae1dSRodney W. Grimes 		/*
416df8bae1dSRodney W. Grimes 		 * Came from kernel mode, so we were:
417df8bae1dSRodney W. Grimes 		 * - handling an interrupt,
418df8bae1dSRodney W. Grimes 		 * - doing syscall or trap work on behalf of the current
419df8bae1dSRodney W. Grimes 		 *   user process, or
420df8bae1dSRodney W. Grimes 		 * - spinning in the idle loop.
421df8bae1dSRodney W. Grimes 		 * Whichever it is, charge the time as appropriate.
422df8bae1dSRodney W. Grimes 		 * Note that we charge interrupts to the current process,
423df8bae1dSRodney W. Grimes 		 * regardless of whether they are ``for'' that process,
424df8bae1dSRodney W. Grimes 		 * so that we know how much of its real time was spent
425df8bae1dSRodney W. Grimes 		 * in ``non-process'' (i.e., interrupt) work.
426df8bae1dSRodney W. Grimes 		 */
427f5e9e8ecSBruce Evans 		p = curproc;
428df8bae1dSRodney W. Grimes 		if (CLKF_INTR(frame)) {
429df8bae1dSRodney W. Grimes 			if (p != NULL)
430df8bae1dSRodney W. Grimes 				p->p_iticks++;
431df8bae1dSRodney W. Grimes 			cp_time[CP_INTR]++;
432b672aa4bSBruce Evans 		} else if (p != NULL) {
433df8bae1dSRodney W. Grimes 			p->p_sticks++;
434df8bae1dSRodney W. Grimes 			cp_time[CP_SYS]++;
435df8bae1dSRodney W. Grimes 		} else
436df8bae1dSRodney W. Grimes 			cp_time[CP_IDLE]++;
437df8bae1dSRodney W. Grimes 	}
438df8bae1dSRodney W. Grimes 	pscnt = psdiv;
439df8bae1dSRodney W. Grimes 
440df8bae1dSRodney W. Grimes 	/*
441df8bae1dSRodney W. Grimes 	 * We maintain statistics shown by user-level statistics
4427ea97031SJustin T. Gibbs 	 * programs:  the amount of time in each cpu state.
443df8bae1dSRodney W. Grimes 	 */
444df8bae1dSRodney W. Grimes 
445df8bae1dSRodney W. Grimes 	/*
446df8bae1dSRodney W. Grimes 	 * We adjust the priority of the current process.  The priority of
447df8bae1dSRodney W. Grimes 	 * a process gets worse as it accumulates CPU time.  The cpu usage
448df8bae1dSRodney W. Grimes 	 * estimator (p_estcpu) is increased here.  The formula for computing
449df8bae1dSRodney W. Grimes 	 * priorities (in kern_synch.c) will compute a different value each
450df8bae1dSRodney W. Grimes 	 * time p_estcpu increases by 4.  The cpu usage estimator ramps up
451df8bae1dSRodney W. Grimes 	 * quite quickly when the process is running (linearly), and decays
452df8bae1dSRodney W. Grimes 	 * away exponentially, at a rate which is proportionally slower when
453df8bae1dSRodney W. Grimes 	 * the system is busy.  The basic principal is that the system will
454df8bae1dSRodney W. Grimes 	 * 90% forget that the process used a lot of CPU time in 5 * loadav
455df8bae1dSRodney W. Grimes 	 * seconds.  This causes the system to favor processes which haven't
456df8bae1dSRodney W. Grimes 	 * run much recently, and to round-robin among other processes.
457df8bae1dSRodney W. Grimes 	 */
458df8bae1dSRodney W. Grimes 	if (p != NULL) {
459df8bae1dSRodney W. Grimes 		p->p_cpticks++;
460df8bae1dSRodney W. Grimes 		if (++p->p_estcpu == 0)
461df8bae1dSRodney W. Grimes 			p->p_estcpu--;
462df8bae1dSRodney W. Grimes 		if ((p->p_estcpu & 3) == 0) {
463df8bae1dSRodney W. Grimes 			resetpriority(p);
464df8bae1dSRodney W. Grimes 			if (p->p_priority >= PUSER)
465df8bae1dSRodney W. Grimes 				p->p_priority = p->p_usrpri;
466df8bae1dSRodney W. Grimes 		}
467f5e9e8ecSBruce Evans 
468f5e9e8ecSBruce Evans 		/* Update resource usage integrals and maximums. */
469f5e9e8ecSBruce Evans 		if ((pstats = p->p_stats) != NULL &&
470f5e9e8ecSBruce Evans 		    (ru = &pstats->p_ru) != NULL &&
471f5e9e8ecSBruce Evans 		    (vm = p->p_vmspace) != NULL) {
4721c6d46f9SLuoqi Chen 			ru->ru_ixrss += pgtok(vm->vm_tsize);
4731c6d46f9SLuoqi Chen 			ru->ru_idrss += pgtok(vm->vm_dsize);
4741c6d46f9SLuoqi Chen 			ru->ru_isrss += pgtok(vm->vm_ssize);
4751c6d46f9SLuoqi Chen 			rss = pgtok(vmspace_resident_count(vm));
476f5e9e8ecSBruce Evans 			if (ru->ru_maxrss < rss)
477f5e9e8ecSBruce Evans 				ru->ru_maxrss = rss;
478f5e9e8ecSBruce Evans         	}
479df8bae1dSRodney W. Grimes 	}
480df8bae1dSRodney W. Grimes }
481df8bae1dSRodney W. Grimes 
482df8bae1dSRodney W. Grimes /*
483df8bae1dSRodney W. Grimes  * Return information about system clocks.
484df8bae1dSRodney W. Grimes  */
485787d58f2SPoul-Henning Kamp static int
486787d58f2SPoul-Henning Kamp sysctl_kern_clockrate SYSCTL_HANDLER_ARGS
487df8bae1dSRodney W. Grimes {
488df8bae1dSRodney W. Grimes 	struct clockinfo clkinfo;
489df8bae1dSRodney W. Grimes 	/*
490df8bae1dSRodney W. Grimes 	 * Construct clockinfo structure.
491df8bae1dSRodney W. Grimes 	 */
492df8bae1dSRodney W. Grimes 	clkinfo.hz = hz;
493df8bae1dSRodney W. Grimes 	clkinfo.tick = tick;
4945faa3121SJohn Hay 	clkinfo.tickadj = tickadj;
495df8bae1dSRodney W. Grimes 	clkinfo.profhz = profhz;
496df8bae1dSRodney W. Grimes 	clkinfo.stathz = stathz ? stathz : hz;
497ae0eb976SPoul-Henning Kamp 	return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req));
498df8bae1dSRodney W. Grimes }
4993f31c649SGarrett Wollman 
500946bb7a2SPoul-Henning Kamp SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD,
50165d0bc13SPoul-Henning Kamp 	0, 0, sysctl_kern_clockrate, "S,clockinfo","");
502787d58f2SPoul-Henning Kamp 
503e796e00dSPoul-Henning Kamp static __inline unsigned
504a58f0f8eSPoul-Henning Kamp tco_delta(struct timecounter *tc)
505e796e00dSPoul-Henning Kamp {
506e796e00dSPoul-Henning Kamp 
507a58f0f8eSPoul-Henning Kamp 	return ((tc->tc_get_timecount(tc) - tc->tc_offset_count) &
508a58f0f8eSPoul-Henning Kamp 	    tc->tc_counter_mask);
509e796e00dSPoul-Henning Kamp }
510a0502b19SPoul-Henning Kamp 
511a0502b19SPoul-Henning Kamp /*
512a0502b19SPoul-Henning Kamp  * We have four functions for looking at the clock, two for microseconds
513a0502b19SPoul-Henning Kamp  * and two for nanoseconds.  For each there is fast but less precise
514a0502b19SPoul-Henning Kamp  * version "get{nano|micro}time" which will return a time which is up
515a0502b19SPoul-Henning Kamp  * to 1/HZ previous to the call, whereas the raw version "{nano|micro}time"
516a0502b19SPoul-Henning Kamp  * will return a timestamp which is as precise as possible.
517a0502b19SPoul-Henning Kamp  */
518a0502b19SPoul-Henning Kamp 
519a0502b19SPoul-Henning Kamp void
520a0502b19SPoul-Henning Kamp getmicrotime(struct timeval *tvp)
521a0502b19SPoul-Henning Kamp {
522a0502b19SPoul-Henning Kamp 	struct timecounter *tc;
523a0502b19SPoul-Henning Kamp 
524510eb5b9SPoul-Henning Kamp 	if (!tco_method) {
525a0502b19SPoul-Henning Kamp 		tc = timecounter;
526a58f0f8eSPoul-Henning Kamp 		*tvp = tc->tc_microtime;
527510eb5b9SPoul-Henning Kamp 	} else {
528510eb5b9SPoul-Henning Kamp 		microtime(tvp);
529510eb5b9SPoul-Henning Kamp 	}
53000af9731SPoul-Henning Kamp }
53100af9731SPoul-Henning Kamp 
53200af9731SPoul-Henning Kamp void
53300af9731SPoul-Henning Kamp getnanotime(struct timespec *tsp)
53400af9731SPoul-Henning Kamp {
53500af9731SPoul-Henning Kamp 	struct timecounter *tc;
53600af9731SPoul-Henning Kamp 
537510eb5b9SPoul-Henning Kamp 	if (!tco_method) {
53800af9731SPoul-Henning Kamp 		tc = timecounter;
539a58f0f8eSPoul-Henning Kamp 		*tsp = tc->tc_nanotime;
540510eb5b9SPoul-Henning Kamp 	} else {
541510eb5b9SPoul-Henning Kamp 		nanotime(tsp);
542510eb5b9SPoul-Henning Kamp 	}
54300af9731SPoul-Henning Kamp }
54400af9731SPoul-Henning Kamp 
54500af9731SPoul-Henning Kamp void
54600af9731SPoul-Henning Kamp microtime(struct timeval *tv)
54700af9731SPoul-Henning Kamp {
54800af9731SPoul-Henning Kamp 	struct timecounter *tc;
54900af9731SPoul-Henning Kamp 
55000af9731SPoul-Henning Kamp 	tc = (struct timecounter *)timecounter;
551a58f0f8eSPoul-Henning Kamp 	tv->tv_sec = tc->tc_offset_sec;
552a58f0f8eSPoul-Henning Kamp 	tv->tv_usec = tc->tc_offset_micro;
553a58f0f8eSPoul-Henning Kamp 	tv->tv_usec += ((u_int64_t)tco_delta(tc) * tc->tc_scale_micro) >> 32;
55400af9731SPoul-Henning Kamp 	tv->tv_usec += boottime.tv_usec;
55500af9731SPoul-Henning Kamp 	tv->tv_sec += boottime.tv_sec;
55691ad39c6SPoul-Henning Kamp 	while (tv->tv_usec >= 1000000) {
55700af9731SPoul-Henning Kamp 		tv->tv_usec -= 1000000;
55800af9731SPoul-Henning Kamp 		tv->tv_sec++;
55900af9731SPoul-Henning Kamp 	}
56000af9731SPoul-Henning Kamp }
56100af9731SPoul-Henning Kamp 
56200af9731SPoul-Henning Kamp void
56348115288SPoul-Henning Kamp nanotime(struct timespec *ts)
56400af9731SPoul-Henning Kamp {
565579f4456SPoul-Henning Kamp 	unsigned count;
56600af9731SPoul-Henning Kamp 	u_int64_t delta;
56700af9731SPoul-Henning Kamp 	struct timecounter *tc;
56800af9731SPoul-Henning Kamp 
56900af9731SPoul-Henning Kamp 	tc = (struct timecounter *)timecounter;
570a58f0f8eSPoul-Henning Kamp 	ts->tv_sec = tc->tc_offset_sec;
571a58f0f8eSPoul-Henning Kamp 	count = tco_delta(tc);
572a58f0f8eSPoul-Henning Kamp 	delta = tc->tc_offset_nano;
573a58f0f8eSPoul-Henning Kamp 	delta += ((u_int64_t)count * tc->tc_scale_nano_f);
57400af9731SPoul-Henning Kamp 	delta >>= 32;
575a58f0f8eSPoul-Henning Kamp 	delta += ((u_int64_t)count * tc->tc_scale_nano_i);
57600af9731SPoul-Henning Kamp 	delta += boottime.tv_usec * 1000;
57748115288SPoul-Henning Kamp 	ts->tv_sec += boottime.tv_sec;
57891ad39c6SPoul-Henning Kamp 	while (delta >= 1000000000) {
57900af9731SPoul-Henning Kamp 		delta -= 1000000000;
58048115288SPoul-Henning Kamp 		ts->tv_sec++;
58100af9731SPoul-Henning Kamp 	}
58248115288SPoul-Henning Kamp 	ts->tv_nsec = delta;
58348115288SPoul-Henning Kamp }
58448115288SPoul-Henning Kamp 
58548115288SPoul-Henning Kamp void
58648115288SPoul-Henning Kamp timecounter_timespec(unsigned count, struct timespec *ts)
58748115288SPoul-Henning Kamp {
58848115288SPoul-Henning Kamp 	u_int64_t delta;
58948115288SPoul-Henning Kamp 	struct timecounter *tc;
59048115288SPoul-Henning Kamp 
59148115288SPoul-Henning Kamp 	tc = (struct timecounter *)timecounter;
592a58f0f8eSPoul-Henning Kamp 	ts->tv_sec = tc->tc_offset_sec;
593a58f0f8eSPoul-Henning Kamp 	count -= tc->tc_offset_count;
594a58f0f8eSPoul-Henning Kamp 	count &= tc->tc_counter_mask;
595a58f0f8eSPoul-Henning Kamp 	delta = tc->tc_offset_nano;
596a58f0f8eSPoul-Henning Kamp 	delta += ((u_int64_t)count * tc->tc_scale_nano_f);
59748115288SPoul-Henning Kamp 	delta >>= 32;
598a58f0f8eSPoul-Henning Kamp 	delta += ((u_int64_t)count * tc->tc_scale_nano_i);
59948115288SPoul-Henning Kamp 	delta += boottime.tv_usec * 1000;
60048115288SPoul-Henning Kamp 	ts->tv_sec += boottime.tv_sec;
60148115288SPoul-Henning Kamp 	while (delta >= 1000000000) {
60248115288SPoul-Henning Kamp 		delta -= 1000000000;
60348115288SPoul-Henning Kamp 		ts->tv_sec++;
60448115288SPoul-Henning Kamp 	}
60548115288SPoul-Henning Kamp 	ts->tv_nsec = delta;
60600af9731SPoul-Henning Kamp }
60700af9731SPoul-Henning Kamp 
60800af9731SPoul-Henning Kamp void
609c21410e1SPoul-Henning Kamp getmicrouptime(struct timeval *tvp)
61000af9731SPoul-Henning Kamp {
61100af9731SPoul-Henning Kamp 	struct timecounter *tc;
61200af9731SPoul-Henning Kamp 
613510eb5b9SPoul-Henning Kamp 	if (!tco_method) {
61400af9731SPoul-Henning Kamp 		tc = timecounter;
615a58f0f8eSPoul-Henning Kamp 		tvp->tv_sec = tc->tc_offset_sec;
616a58f0f8eSPoul-Henning Kamp 		tvp->tv_usec = tc->tc_offset_micro;
617510eb5b9SPoul-Henning Kamp 	} else {
618510eb5b9SPoul-Henning Kamp 		microuptime(tvp);
619510eb5b9SPoul-Henning Kamp 	}
620a0502b19SPoul-Henning Kamp }
621a0502b19SPoul-Henning Kamp 
622a0502b19SPoul-Henning Kamp void
623c21410e1SPoul-Henning Kamp getnanouptime(struct timespec *tsp)
624a0502b19SPoul-Henning Kamp {
625a0502b19SPoul-Henning Kamp 	struct timecounter *tc;
626a0502b19SPoul-Henning Kamp 
627510eb5b9SPoul-Henning Kamp 	if (!tco_method) {
628a0502b19SPoul-Henning Kamp 		tc = timecounter;
629a58f0f8eSPoul-Henning Kamp 		tsp->tv_sec = tc->tc_offset_sec;
630a58f0f8eSPoul-Henning Kamp 		tsp->tv_nsec = tc->tc_offset_nano >> 32;
631510eb5b9SPoul-Henning Kamp 	} else {
632510eb5b9SPoul-Henning Kamp 		nanouptime(tsp);
633510eb5b9SPoul-Henning Kamp 	}
634a0502b19SPoul-Henning Kamp }
635a0502b19SPoul-Henning Kamp 
636c7c9a816SPoul-Henning Kamp void
637c21410e1SPoul-Henning Kamp microuptime(struct timeval *tv)
638c7c9a816SPoul-Henning Kamp {
6397ec73f64SPoul-Henning Kamp 	struct timecounter *tc;
6407ec73f64SPoul-Henning Kamp 
6417ec73f64SPoul-Henning Kamp 	tc = (struct timecounter *)timecounter;
642a58f0f8eSPoul-Henning Kamp 	tv->tv_sec = tc->tc_offset_sec;
643a58f0f8eSPoul-Henning Kamp 	tv->tv_usec = tc->tc_offset_micro;
644a58f0f8eSPoul-Henning Kamp 	tv->tv_usec += ((u_int64_t)tco_delta(tc) * tc->tc_scale_micro) >> 32;
6457ec73f64SPoul-Henning Kamp 	if (tv->tv_usec >= 1000000) {
6467ec73f64SPoul-Henning Kamp 		tv->tv_usec -= 1000000;
6477ec73f64SPoul-Henning Kamp 		tv->tv_sec++;
648c7c9a816SPoul-Henning Kamp 	}
6497ec73f64SPoul-Henning Kamp }
6507ec73f64SPoul-Henning Kamp 
6517ec73f64SPoul-Henning Kamp void
652c2906d55SPoul-Henning Kamp nanouptime(struct timespec *ts)
6537ec73f64SPoul-Henning Kamp {
654e796e00dSPoul-Henning Kamp 	unsigned count;
6557ec73f64SPoul-Henning Kamp 	u_int64_t delta;
6567ec73f64SPoul-Henning Kamp 	struct timecounter *tc;
6577ec73f64SPoul-Henning Kamp 
6587ec73f64SPoul-Henning Kamp 	tc = (struct timecounter *)timecounter;
659c2906d55SPoul-Henning Kamp 	ts->tv_sec = tc->tc_offset_sec;
660a58f0f8eSPoul-Henning Kamp 	count = tco_delta(tc);
661a58f0f8eSPoul-Henning Kamp 	delta = tc->tc_offset_nano;
662a58f0f8eSPoul-Henning Kamp 	delta += ((u_int64_t)count * tc->tc_scale_nano_f);
6637ec73f64SPoul-Henning Kamp 	delta >>= 32;
664a58f0f8eSPoul-Henning Kamp 	delta += ((u_int64_t)count * tc->tc_scale_nano_i);
6657ec73f64SPoul-Henning Kamp 	if (delta >= 1000000000) {
6667ec73f64SPoul-Henning Kamp 		delta -= 1000000000;
667c2906d55SPoul-Henning Kamp 		ts->tv_sec++;
6687ec73f64SPoul-Henning Kamp 	}
669c2906d55SPoul-Henning Kamp 	ts->tv_nsec = delta;
6707ec73f64SPoul-Henning Kamp }
6717ec73f64SPoul-Henning Kamp 
6727ec73f64SPoul-Henning Kamp static void
6737ec73f64SPoul-Henning Kamp tco_setscales(struct timecounter *tc)
6747ec73f64SPoul-Henning Kamp {
6757ec73f64SPoul-Henning Kamp 	u_int64_t scale;
6767ec73f64SPoul-Henning Kamp 
6777ec73f64SPoul-Henning Kamp 	scale = 1000000000LL << 32;
678a58f0f8eSPoul-Henning Kamp 	if (tc->tc_adjustment > 0)
679a58f0f8eSPoul-Henning Kamp 		scale += (tc->tc_adjustment * 1000LL) << 10;
6807ec73f64SPoul-Henning Kamp 	else
681a58f0f8eSPoul-Henning Kamp 		scale -= (-tc->tc_adjustment * 1000LL) << 10;
682a58f0f8eSPoul-Henning Kamp 	scale /= tc->tc_frequency;
683a58f0f8eSPoul-Henning Kamp 	tc->tc_scale_micro = scale / 1000;
684a58f0f8eSPoul-Henning Kamp 	tc->tc_scale_nano_f = scale & 0xffffffff;
685a58f0f8eSPoul-Henning Kamp 	tc->tc_scale_nano_i = scale >> 32;
6867ec73f64SPoul-Henning Kamp }
6877ec73f64SPoul-Henning Kamp 
6887ec73f64SPoul-Henning Kamp void
6897ec73f64SPoul-Henning Kamp init_timecounter(struct timecounter *tc)
6907ec73f64SPoul-Henning Kamp {
691f5ef029eSPoul-Henning Kamp 	struct timespec ts1;
6923bac064fSPoul-Henning Kamp 	struct timecounter *t1, *t2, *t3;
6937ec73f64SPoul-Henning Kamp 	int i;
6947ec73f64SPoul-Henning Kamp 
695a58f0f8eSPoul-Henning Kamp 	tc->tc_adjustment = 0;
6967ec73f64SPoul-Henning Kamp 	tco_setscales(tc);
697a58f0f8eSPoul-Henning Kamp 	tc->tc_offset_count = tc->tc_get_timecount(tc);
6983bac064fSPoul-Henning Kamp 	tc->tc_tweak = tc;
6993bac064fSPoul-Henning Kamp 	MALLOC(t1, struct timecounter *, sizeof *t1, M_TIMECOUNTER, M_WAITOK);
7003bac064fSPoul-Henning Kamp 	*t1 = *tc;
7013bac064fSPoul-Henning Kamp 	t2 = t1;
7023bac064fSPoul-Henning Kamp 	for (i = 1; i < NTIMECOUNTER; i++) {
7033bac064fSPoul-Henning Kamp 		MALLOC(t3, struct timecounter *, sizeof *t3,
7043bac064fSPoul-Henning Kamp 		    M_TIMECOUNTER, M_WAITOK);
7053bac064fSPoul-Henning Kamp 		*t3 = *tc;
7063bac064fSPoul-Henning Kamp 		t3->tc_other = t2;
7073bac064fSPoul-Henning Kamp 		t2 = t3;
7083bac064fSPoul-Henning Kamp 	}
7093bac064fSPoul-Henning Kamp 	t1->tc_other = t3;
7103bac064fSPoul-Henning Kamp 	tc = t1;
7117ec73f64SPoul-Henning Kamp 
7123bac064fSPoul-Henning Kamp 	printf("Timecounter \"%s\"  frequency %lu Hz\n",
7133bac064fSPoul-Henning Kamp 	    tc->tc_name, (u_long)tc->tc_frequency);
7147ec73f64SPoul-Henning Kamp 
7157ec73f64SPoul-Henning Kamp 	/* XXX: For now always start using the counter. */
716a58f0f8eSPoul-Henning Kamp 	tc->tc_offset_count = tc->tc_get_timecount(tc);
7176ca4ca24SPoul-Henning Kamp 	nanouptime(&ts1);
718a58f0f8eSPoul-Henning Kamp 	tc->tc_offset_nano = (u_int64_t)ts1.tv_nsec << 32;
719a58f0f8eSPoul-Henning Kamp 	tc->tc_offset_micro = ts1.tv_nsec / 1000;
720a58f0f8eSPoul-Henning Kamp 	tc->tc_offset_sec = ts1.tv_sec;
7217ec73f64SPoul-Henning Kamp 	timecounter = tc;
7227ec73f64SPoul-Henning Kamp }
7237ec73f64SPoul-Henning Kamp 
7247ec73f64SPoul-Henning Kamp void
7257ec73f64SPoul-Henning Kamp set_timecounter(struct timespec *ts)
7267ec73f64SPoul-Henning Kamp {
72700af9731SPoul-Henning Kamp 	struct timespec ts2;
7287ec73f64SPoul-Henning Kamp 
729c21410e1SPoul-Henning Kamp 	nanouptime(&ts2);
73000af9731SPoul-Henning Kamp 	boottime.tv_sec = ts->tv_sec - ts2.tv_sec;
73100af9731SPoul-Henning Kamp 	boottime.tv_usec = (ts->tv_nsec - ts2.tv_nsec) / 1000;
73200af9731SPoul-Henning Kamp 	if (boottime.tv_usec < 0) {
73300af9731SPoul-Henning Kamp 		boottime.tv_usec += 1000000;
73400af9731SPoul-Henning Kamp 		boottime.tv_sec--;
73500af9731SPoul-Henning Kamp 	}
73600af9731SPoul-Henning Kamp 	/* fiddle all the little crinkly bits around the fiords... */
737c2906d55SPoul-Henning Kamp 	tco_forward(1);
7387ec73f64SPoul-Henning Kamp }
7397ec73f64SPoul-Henning Kamp 
74000af9731SPoul-Henning Kamp 
74100af9731SPoul-Henning Kamp #if 0 /* Currently unused */
742b05dcf3cSPoul-Henning Kamp void
743b05dcf3cSPoul-Henning Kamp switch_timecounter(struct timecounter *newtc)
744b05dcf3cSPoul-Henning Kamp {
745b05dcf3cSPoul-Henning Kamp 	int s;
746b05dcf3cSPoul-Henning Kamp 	struct timecounter *tc;
747b05dcf3cSPoul-Henning Kamp 	struct timespec ts;
748b05dcf3cSPoul-Henning Kamp 
749b05dcf3cSPoul-Henning Kamp 	s = splclock();
750b05dcf3cSPoul-Henning Kamp 	tc = timecounter;
751a58f0f8eSPoul-Henning Kamp 	if (newtc == tc || newtc == tc->tc_other) {
752b05dcf3cSPoul-Henning Kamp 		splx(s);
753b05dcf3cSPoul-Henning Kamp 		return;
754b05dcf3cSPoul-Henning Kamp 	}
7556ca4ca24SPoul-Henning Kamp 	nanouptime(&ts);
756a58f0f8eSPoul-Henning Kamp 	newtc->tc_offset_sec = ts.tv_sec;
757a58f0f8eSPoul-Henning Kamp 	newtc->tc_offset_nano = (u_int64_t)ts.tv_nsec << 32;
758a58f0f8eSPoul-Henning Kamp 	newtc->tc_offset_micro = ts.tv_nsec / 1000;
759a58f0f8eSPoul-Henning Kamp 	newtc->tc_offset_count = newtc->tc_get_timecount(newtc);
760b05dcf3cSPoul-Henning Kamp 	timecounter = newtc;
761b05dcf3cSPoul-Henning Kamp 	splx(s);
762b05dcf3cSPoul-Henning Kamp }
76300af9731SPoul-Henning Kamp #endif
764b05dcf3cSPoul-Henning Kamp 
7657ec73f64SPoul-Henning Kamp static struct timecounter *
766b05dcf3cSPoul-Henning Kamp sync_other_counter(void)
7677ec73f64SPoul-Henning Kamp {
7680edd53d2SPoul-Henning Kamp 	struct timecounter *tc, *tcn, *tco;
769579f4456SPoul-Henning Kamp 	unsigned delta;
7707ec73f64SPoul-Henning Kamp 
7710edd53d2SPoul-Henning Kamp 	tco = timecounter;
7720edd53d2SPoul-Henning Kamp 	tc = tco->tc_other;
7730edd53d2SPoul-Henning Kamp 	tcn = tc->tc_other;
7740edd53d2SPoul-Henning Kamp 	*tc = *tco;
7750edd53d2SPoul-Henning Kamp 	tc->tc_other = tcn;
776a58f0f8eSPoul-Henning Kamp 	delta = tco_delta(tc);
777a58f0f8eSPoul-Henning Kamp 	tc->tc_offset_count += delta;
778a58f0f8eSPoul-Henning Kamp 	tc->tc_offset_count &= tc->tc_counter_mask;
779a58f0f8eSPoul-Henning Kamp 	tc->tc_offset_nano += (u_int64_t)delta * tc->tc_scale_nano_f;
780a58f0f8eSPoul-Henning Kamp 	tc->tc_offset_nano += (u_int64_t)delta * tc->tc_scale_nano_i << 32;
7817ec73f64SPoul-Henning Kamp 	return (tc);
7827ec73f64SPoul-Henning Kamp }
7837ec73f64SPoul-Henning Kamp 
7847ec73f64SPoul-Henning Kamp static void
785c2906d55SPoul-Henning Kamp tco_forward(int force)
7867ec73f64SPoul-Henning Kamp {
78752f8e5d6SPoul-Henning Kamp 	struct timecounter *tc, *tco;
7887ec73f64SPoul-Henning Kamp 
78952f8e5d6SPoul-Henning Kamp 	tco = timecounter;
790b05dcf3cSPoul-Henning Kamp 	tc = sync_other_counter();
7910edd53d2SPoul-Henning Kamp 	/*
7920edd53d2SPoul-Henning Kamp 	 * We may be inducing a tiny error here, the tc_poll_pps() may
7930edd53d2SPoul-Henning Kamp 	 * process a latched count which happens after the tco_delta()
7940edd53d2SPoul-Henning Kamp 	 * in sync_other_counter(), which would extend the previous
7950edd53d2SPoul-Henning Kamp 	 * counters parameters into the domain of this new one.
7960edd53d2SPoul-Henning Kamp 	 * Since the timewindow is very small for this, the error is
7970edd53d2SPoul-Henning Kamp 	 * going to be only a few weenieseconds (as Dave Mills would
7980edd53d2SPoul-Henning Kamp 	 * say), so lets just not talk more about it, OK ?
7990edd53d2SPoul-Henning Kamp 	 */
80052f8e5d6SPoul-Henning Kamp 	if (tco->tc_poll_pps)
80152f8e5d6SPoul-Henning Kamp 		tco->tc_poll_pps(tco);
802b05dcf3cSPoul-Henning Kamp 	if (timedelta != 0) {
803a58f0f8eSPoul-Henning Kamp 		tc->tc_offset_nano += (u_int64_t)(tickdelta * 1000) << 32;
8047ec73f64SPoul-Henning Kamp 		timedelta -= tickdelta;
805c2906d55SPoul-Henning Kamp 		force++;
8067ec73f64SPoul-Henning Kamp 	}
807b05dcf3cSPoul-Henning Kamp 
808a58f0f8eSPoul-Henning Kamp 	while (tc->tc_offset_nano >= 1000000000ULL << 32) {
809a58f0f8eSPoul-Henning Kamp 		tc->tc_offset_nano -= 1000000000ULL << 32;
810a58f0f8eSPoul-Henning Kamp 		tc->tc_offset_sec++;
811a58f0f8eSPoul-Henning Kamp 		tc->tc_frequency = tc->tc_tweak->tc_frequency;
812a58f0f8eSPoul-Henning Kamp 		tc->tc_adjustment = tc->tc_tweak->tc_adjustment;
8137ec73f64SPoul-Henning Kamp 		ntp_update_second(tc);	/* XXX only needed if xntpd runs */
8147ec73f64SPoul-Henning Kamp 		tco_setscales(tc);
815c2906d55SPoul-Henning Kamp 		force++;
8167ec73f64SPoul-Henning Kamp 	}
817b05dcf3cSPoul-Henning Kamp 
818510eb5b9SPoul-Henning Kamp 	if (tco_method && !force)
819c2906d55SPoul-Henning Kamp 		return;
820c2906d55SPoul-Henning Kamp 
821a58f0f8eSPoul-Henning Kamp 	tc->tc_offset_micro = (tc->tc_offset_nano / 1000) >> 32;
822b05dcf3cSPoul-Henning Kamp 
82300af9731SPoul-Henning Kamp 	/* Figure out the wall-clock time */
824a58f0f8eSPoul-Henning Kamp 	tc->tc_nanotime.tv_sec = tc->tc_offset_sec + boottime.tv_sec;
825a58f0f8eSPoul-Henning Kamp 	tc->tc_nanotime.tv_nsec =
826a58f0f8eSPoul-Henning Kamp 	    (tc->tc_offset_nano >> 32) + boottime.tv_usec * 1000;
827a58f0f8eSPoul-Henning Kamp 	tc->tc_microtime.tv_usec = tc->tc_offset_micro + boottime.tv_usec;
828a58f0f8eSPoul-Henning Kamp 	if (tc->tc_nanotime.tv_nsec >= 1000000000) {
829a58f0f8eSPoul-Henning Kamp 		tc->tc_nanotime.tv_nsec -= 1000000000;
830a58f0f8eSPoul-Henning Kamp 		tc->tc_microtime.tv_usec -= 1000000;
831a58f0f8eSPoul-Henning Kamp 		tc->tc_nanotime.tv_sec++;
83200af9731SPoul-Henning Kamp 	}
833a58f0f8eSPoul-Henning Kamp 	time_second = tc->tc_microtime.tv_sec = tc->tc_nanotime.tv_sec;
83400af9731SPoul-Henning Kamp 
8357ec73f64SPoul-Henning Kamp 	timecounter = tc;
8367ec73f64SPoul-Henning Kamp }
8377ec73f64SPoul-Henning Kamp 
8387ec73f64SPoul-Henning Kamp static int
8397ec73f64SPoul-Henning Kamp sysctl_kern_timecounter_frequency SYSCTL_HANDLER_ARGS
8407ec73f64SPoul-Henning Kamp {
841b05dcf3cSPoul-Henning Kamp 
842a58f0f8eSPoul-Henning Kamp 	return (sysctl_handle_opaque(oidp,
843a58f0f8eSPoul-Henning Kamp 	    &timecounter->tc_tweak->tc_frequency,
844a58f0f8eSPoul-Henning Kamp 	    sizeof(timecounter->tc_tweak->tc_frequency), req));
8457ec73f64SPoul-Henning Kamp }
8467ec73f64SPoul-Henning Kamp 
8477ec73f64SPoul-Henning Kamp static int
8487ec73f64SPoul-Henning Kamp sysctl_kern_timecounter_adjustment SYSCTL_HANDLER_ARGS
8497ec73f64SPoul-Henning Kamp {
850b05dcf3cSPoul-Henning Kamp 
851a58f0f8eSPoul-Henning Kamp 	return (sysctl_handle_opaque(oidp,
852a58f0f8eSPoul-Henning Kamp 	    &timecounter->tc_tweak->tc_adjustment,
853a58f0f8eSPoul-Henning Kamp 	    sizeof(timecounter->tc_tweak->tc_adjustment), req));
8547ec73f64SPoul-Henning Kamp }
8557ec73f64SPoul-Henning Kamp 
8567ec73f64SPoul-Henning Kamp SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, "");
8577ec73f64SPoul-Henning Kamp 
858510eb5b9SPoul-Henning Kamp SYSCTL_INT(_kern_timecounter, KERN_ARGMAX, method, CTLFLAG_RW, &tco_method, 0,
859510eb5b9SPoul-Henning Kamp     "This variable determines the method used for updating timecounters. "
860510eb5b9SPoul-Henning Kamp     "If the default algorithm (0) fails with \"calcru negative...\" messages "
861510eb5b9SPoul-Henning Kamp     "try the alternate algorithm (1) which handles bad hardware better."
862510eb5b9SPoul-Henning Kamp 
863510eb5b9SPoul-Henning Kamp );
864510eb5b9SPoul-Henning Kamp 
8657ec73f64SPoul-Henning Kamp SYSCTL_PROC(_kern_timecounter, OID_AUTO, frequency, CTLTYPE_INT | CTLFLAG_RW,
8667ec73f64SPoul-Henning Kamp     0, sizeof(u_int), sysctl_kern_timecounter_frequency, "I", "");
8677ec73f64SPoul-Henning Kamp 
8687ec73f64SPoul-Henning Kamp SYSCTL_PROC(_kern_timecounter, OID_AUTO, adjustment, CTLTYPE_INT | CTLFLAG_RW,
8697ec73f64SPoul-Henning Kamp     0, sizeof(int), sysctl_kern_timecounter_adjustment, "I", "");
870