xref: /freebsd/sys/kern/kern_clocksource.c (revision 884a2a699669ec61e2366e3e358342dbc94be24a)
1 /*-
2  * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer,
10  *    without modification, immediately at the beginning of the file.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 /*
31  * Common routines to manage event timers hardware.
32  */
33 
34 #include "opt_device_polling.h"
35 #include "opt_kdtrace.h"
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/bus.h>
40 #include <sys/lock.h>
41 #include <sys/kdb.h>
42 #include <sys/ktr.h>
43 #include <sys/mutex.h>
44 #include <sys/proc.h>
45 #include <sys/kernel.h>
46 #include <sys/sched.h>
47 #include <sys/smp.h>
48 #include <sys/sysctl.h>
49 #include <sys/timeet.h>
50 #include <sys/timetc.h>
51 
52 #include <machine/atomic.h>
53 #include <machine/clock.h>
54 #include <machine/cpu.h>
55 #include <machine/smp.h>
56 
57 #ifdef KDTRACE_HOOKS
58 #include <sys/dtrace_bsd.h>
59 cyclic_clock_func_t	cyclic_clock_func = NULL;
60 #endif
61 
62 int			cpu_disable_deep_sleep = 0; /* Timer dies in C3. */
63 
64 static void		setuptimer(void);
65 static void		loadtimer(struct bintime *now, int first);
66 static int		doconfigtimer(void);
67 static void		configtimer(int start);
68 static int		round_freq(struct eventtimer *et, int freq);
69 
70 static void		getnextcpuevent(struct bintime *event, int idle);
71 static void		getnextevent(struct bintime *event);
72 static int		handleevents(struct bintime *now, int fake);
73 #ifdef SMP
74 static void		cpu_new_callout(int cpu, int ticks);
75 #endif
76 
77 static struct mtx	et_hw_mtx;
78 
79 #define	ET_HW_LOCK(state)						\
80 	{								\
81 		if (timer->et_flags & ET_FLAGS_PERCPU)			\
82 			mtx_lock_spin(&(state)->et_hw_mtx);		\
83 		else							\
84 			mtx_lock_spin(&et_hw_mtx);			\
85 	}
86 
87 #define	ET_HW_UNLOCK(state)						\
88 	{								\
89 		if (timer->et_flags & ET_FLAGS_PERCPU)			\
90 			mtx_unlock_spin(&(state)->et_hw_mtx);		\
91 		else							\
92 			mtx_unlock_spin(&et_hw_mtx);			\
93 	}
94 
95 static struct eventtimer *timer = NULL;
96 static struct bintime	timerperiod;	/* Timer period for periodic mode. */
97 static struct bintime	hardperiod;	/* hardclock() events period. */
98 static struct bintime	statperiod;	/* statclock() events period. */
99 static struct bintime	profperiod;	/* profclock() events period. */
100 static struct bintime	nexttick;	/* Next global timer tick time. */
101 static u_int		busy = 0;	/* Reconfiguration is in progress. */
102 static int		profiling = 0;	/* Profiling events enabled. */
103 
104 static char		timername[32];	/* Wanted timer. */
105 TUNABLE_STR("kern.eventtimer.timer", timername, sizeof(timername));
106 
107 static int		singlemul = 0;	/* Multiplier for periodic mode. */
108 TUNABLE_INT("kern.eventtimer.singlemul", &singlemul);
109 SYSCTL_INT(_kern_eventtimer, OID_AUTO, singlemul, CTLFLAG_RW, &singlemul,
110     0, "Multiplier for periodic mode");
111 
112 static u_int		idletick = 0;	/* Idle mode allowed. */
113 TUNABLE_INT("kern.eventtimer.idletick", &idletick);
114 SYSCTL_UINT(_kern_eventtimer, OID_AUTO, idletick, CTLFLAG_RW, &idletick,
115     0, "Run periodic events when idle");
116 
117 static int		periodic = 0;	/* Periodic or one-shot mode. */
118 static int		want_periodic = 0; /* What mode to prefer. */
119 TUNABLE_INT("kern.eventtimer.periodic", &want_periodic);
120 
121 struct pcpu_state {
122 	struct mtx	et_hw_mtx;	/* Per-CPU timer mutex. */
123 	u_int		action;		/* Reconfiguration requests. */
124 	u_int		handle;		/* Immediate handle resuests. */
125 	struct bintime	now;		/* Last tick time. */
126 	struct bintime	nextevent;	/* Next scheduled event on this CPU. */
127 	struct bintime	nexttick;	/* Next timer tick time. */
128 	struct bintime	nexthard;	/* Next hardlock() event. */
129 	struct bintime	nextstat;	/* Next statclock() event. */
130 	struct bintime	nextprof;	/* Next profclock() event. */
131 #ifdef KDTRACE_HOOKS
132 	struct bintime	nextcyc;	/* Next OpenSolaris cyclics event. */
133 #endif
134 	int		ipi;		/* This CPU needs IPI. */
135 	int		idle;		/* This CPU is in idle mode. */
136 };
137 
138 static DPCPU_DEFINE(struct pcpu_state, timerstate);
139 
140 #define FREQ2BT(freq, bt)						\
141 {									\
142 	(bt)->sec = 0;							\
143 	(bt)->frac = ((uint64_t)0x8000000000000000  / (freq)) << 1;	\
144 }
145 #define BT2FREQ(bt)							\
146 	(((uint64_t)0x8000000000000000 + ((bt)->frac >> 2)) /		\
147 	    ((bt)->frac >> 1))
148 
149 /*
150  * Timer broadcast IPI handler.
151  */
152 int
153 hardclockintr(void)
154 {
155 	struct bintime now;
156 	struct pcpu_state *state;
157 	int done;
158 
159 	if (doconfigtimer() || busy)
160 		return (FILTER_HANDLED);
161 	state = DPCPU_PTR(timerstate);
162 	now = state->now;
163 	CTR4(KTR_SPARE2, "ipi  at %d:    now  %d.%08x%08x",
164 	    curcpu, now.sec, (unsigned int)(now.frac >> 32),
165 			     (unsigned int)(now.frac & 0xffffffff));
166 	done = handleevents(&now, 0);
167 	return (done ? FILTER_HANDLED : FILTER_STRAY);
168 }
169 
170 /*
171  * Handle all events for specified time on this CPU
172  */
173 static int
174 handleevents(struct bintime *now, int fake)
175 {
176 	struct bintime t;
177 	struct trapframe *frame;
178 	struct pcpu_state *state;
179 	uintfptr_t pc;
180 	int usermode;
181 	int done, runs;
182 
183 	CTR4(KTR_SPARE2, "handle at %d:  now  %d.%08x%08x",
184 	    curcpu, now->sec, (unsigned int)(now->frac >> 32),
185 		     (unsigned int)(now->frac & 0xffffffff));
186 	done = 0;
187 	if (fake) {
188 		frame = NULL;
189 		usermode = 0;
190 		pc = 0;
191 	} else {
192 		frame = curthread->td_intr_frame;
193 		usermode = TRAPF_USERMODE(frame);
194 		pc = TRAPF_PC(frame);
195 	}
196 
197 	runs = 0;
198 	state = DPCPU_PTR(timerstate);
199 
200 	while (bintime_cmp(now, &state->nexthard, >=)) {
201 		bintime_add(&state->nexthard, &hardperiod);
202 		runs++;
203 	}
204 	if (runs && fake < 2) {
205 		hardclock_anycpu(runs, usermode);
206 		done = 1;
207 	}
208 	while (bintime_cmp(now, &state->nextstat, >=)) {
209 		if (fake < 2)
210 			statclock(usermode);
211 		bintime_add(&state->nextstat, &statperiod);
212 		done = 1;
213 	}
214 	if (profiling) {
215 		while (bintime_cmp(now, &state->nextprof, >=)) {
216 			if (!fake)
217 				profclock(usermode, pc);
218 			bintime_add(&state->nextprof, &profperiod);
219 			done = 1;
220 		}
221 	} else
222 		state->nextprof = state->nextstat;
223 
224 #ifdef KDTRACE_HOOKS
225 	if (fake == 0 && cyclic_clock_func != NULL &&
226 	    state->nextcyc.sec != -1 &&
227 	    bintime_cmp(now, &state->nextcyc, >=)) {
228 		state->nextcyc.sec = -1;
229 		(*cyclic_clock_func)(frame);
230 	}
231 #endif
232 
233 	getnextcpuevent(&t, 0);
234 	if (fake == 2) {
235 		state->nextevent = t;
236 		return (done);
237 	}
238 	ET_HW_LOCK(state);
239 	if (!busy) {
240 		state->idle = 0;
241 		state->nextevent = t;
242 		loadtimer(now, 0);
243 	}
244 	ET_HW_UNLOCK(state);
245 	return (done);
246 }
247 
248 /*
249  * Schedule binuptime of the next event on current CPU.
250  */
251 static void
252 getnextcpuevent(struct bintime *event, int idle)
253 {
254 	struct bintime tmp;
255 	struct pcpu_state *state;
256 	int skip;
257 
258 	state = DPCPU_PTR(timerstate);
259 	*event = state->nexthard;
260 	if (idle) { /* If CPU is idle - ask callouts for how long. */
261 		skip = 4;
262 		if (curcpu == CPU_FIRST() && tc_min_ticktock_freq > skip)
263 			skip = tc_min_ticktock_freq;
264 		skip = callout_tickstofirst(hz / skip) - 1;
265 		CTR2(KTR_SPARE2, "skip   at %d: %d", curcpu, skip);
266 		tmp = hardperiod;
267 		bintime_mul(&tmp, skip);
268 		bintime_add(event, &tmp);
269 	} else { /* If CPU is active - handle all types of events. */
270 		if (bintime_cmp(event, &state->nextstat, >))
271 			*event = state->nextstat;
272 		if (profiling && bintime_cmp(event, &state->nextprof, >))
273 			*event = state->nextprof;
274 	}
275 #ifdef KDTRACE_HOOKS
276 	if (state->nextcyc.sec != -1 && bintime_cmp(event, &state->nextcyc, >))
277 		*event = state->nextcyc;
278 #endif
279 }
280 
281 /*
282  * Schedule binuptime of the next event on all CPUs.
283  */
284 static void
285 getnextevent(struct bintime *event)
286 {
287 	struct pcpu_state *state;
288 #ifdef SMP
289 	int	cpu;
290 #endif
291 	int	c;
292 
293 	state = DPCPU_PTR(timerstate);
294 	*event = state->nextevent;
295 	c = curcpu;
296 #ifdef SMP
297 	if ((timer->et_flags & ET_FLAGS_PERCPU) == 0) {
298 		CPU_FOREACH(cpu) {
299 			if (curcpu == cpu)
300 				continue;
301 			state = DPCPU_ID_PTR(cpu, timerstate);
302 			if (bintime_cmp(event, &state->nextevent, >)) {
303 				*event = state->nextevent;
304 				c = cpu;
305 			}
306 		}
307 	}
308 #endif
309 	CTR5(KTR_SPARE2, "next at %d:    next %d.%08x%08x by %d",
310 	    curcpu, event->sec, (unsigned int)(event->frac >> 32),
311 			     (unsigned int)(event->frac & 0xffffffff), c);
312 }
313 
314 /* Hardware timer callback function. */
315 static void
316 timercb(struct eventtimer *et, void *arg)
317 {
318 	struct bintime now;
319 	struct bintime *next;
320 	struct pcpu_state *state;
321 #ifdef SMP
322 	int cpu, bcast;
323 #endif
324 
325 	/* Do not touch anything if somebody reconfiguring timers. */
326 	if (busy)
327 		return;
328 	/* Update present and next tick times. */
329 	state = DPCPU_PTR(timerstate);
330 	if (et->et_flags & ET_FLAGS_PERCPU) {
331 		next = &state->nexttick;
332 	} else
333 		next = &nexttick;
334 	if (periodic) {
335 		now = *next;	/* Ex-next tick time becomes present time. */
336 		bintime_add(next, &timerperiod); /* Next tick in 1 period. */
337 	} else {
338 		binuptime(&now);	/* Get present time from hardware. */
339 		next->sec = -1;		/* Next tick is not scheduled yet. */
340 	}
341 	state->now = now;
342 	CTR4(KTR_SPARE2, "intr at %d:    now  %d.%08x%08x",
343 	    curcpu, now.sec, (unsigned int)(now.frac >> 32),
344 			     (unsigned int)(now.frac & 0xffffffff));
345 
346 #ifdef SMP
347 	/* Prepare broadcasting to other CPUs for non-per-CPU timers. */
348 	bcast = 0;
349 	if ((et->et_flags & ET_FLAGS_PERCPU) == 0 && smp_started) {
350 		CPU_FOREACH(cpu) {
351 			state = DPCPU_ID_PTR(cpu, timerstate);
352 			ET_HW_LOCK(state);
353 			state->now = now;
354 			if (bintime_cmp(&now, &state->nextevent, >=)) {
355 				state->nextevent.sec++;
356 				if (curcpu != cpu) {
357 					state->ipi = 1;
358 					bcast = 1;
359 				}
360 			}
361 			ET_HW_UNLOCK(state);
362 		}
363 	}
364 #endif
365 
366 	/* Handle events for this time on this CPU. */
367 	handleevents(&now, 0);
368 
369 #ifdef SMP
370 	/* Broadcast interrupt to other CPUs for non-per-CPU timers. */
371 	if (bcast) {
372 		CPU_FOREACH(cpu) {
373 			if (curcpu == cpu)
374 				continue;
375 			state = DPCPU_ID_PTR(cpu, timerstate);
376 			if (state->ipi) {
377 				state->ipi = 0;
378 				ipi_cpu(cpu, IPI_HARDCLOCK);
379 			}
380 		}
381 	}
382 #endif
383 }
384 
385 /*
386  * Load new value into hardware timer.
387  */
388 static void
389 loadtimer(struct bintime *now, int start)
390 {
391 	struct pcpu_state *state;
392 	struct bintime new;
393 	struct bintime *next;
394 	uint64_t tmp;
395 	int eq;
396 
397 	if (timer->et_flags & ET_FLAGS_PERCPU) {
398 		state = DPCPU_PTR(timerstate);
399 		next = &state->nexttick;
400 	} else
401 		next = &nexttick;
402 	if (periodic) {
403 		if (start) {
404 			/*
405 			 * Try to start all periodic timers aligned
406 			 * to period to make events synchronous.
407 			 */
408 			tmp = ((uint64_t)now->sec << 36) + (now->frac >> 28);
409 			tmp = (tmp % (timerperiod.frac >> 28)) << 28;
410 			new.sec = 0;
411 			new.frac = timerperiod.frac - tmp;
412 			if (new.frac < tmp)	/* Left less then passed. */
413 				bintime_add(&new, &timerperiod);
414 			CTR5(KTR_SPARE2, "load p at %d:   now %d.%08x first in %d.%08x",
415 			    curcpu, now->sec, (unsigned int)(now->frac >> 32),
416 			    new.sec, (unsigned int)(new.frac >> 32));
417 			*next = new;
418 			bintime_add(next, now);
419 			et_start(timer, &new, &timerperiod);
420 		}
421 	} else {
422 		getnextevent(&new);
423 		eq = bintime_cmp(&new, next, ==);
424 		CTR5(KTR_SPARE2, "load at %d:    next %d.%08x%08x eq %d",
425 		    curcpu, new.sec, (unsigned int)(new.frac >> 32),
426 			     (unsigned int)(new.frac & 0xffffffff),
427 			     eq);
428 		if (!eq) {
429 			*next = new;
430 			bintime_sub(&new, now);
431 			et_start(timer, &new, NULL);
432 		}
433 	}
434 }
435 
436 /*
437  * Prepare event timer parameters after configuration changes.
438  */
439 static void
440 setuptimer(void)
441 {
442 	int freq;
443 
444 	if (periodic && (timer->et_flags & ET_FLAGS_PERIODIC) == 0)
445 		periodic = 0;
446 	else if (!periodic && (timer->et_flags & ET_FLAGS_ONESHOT) == 0)
447 		periodic = 1;
448 	singlemul = MIN(MAX(singlemul, 1), 20);
449 	freq = hz * singlemul;
450 	while (freq < (profiling ? profhz : stathz))
451 		freq += hz;
452 	freq = round_freq(timer, freq);
453 	FREQ2BT(freq, &timerperiod);
454 }
455 
456 /*
457  * Reconfigure specified per-CPU timer on other CPU. Called from IPI handler.
458  */
459 static int
460 doconfigtimer(void)
461 {
462 	struct bintime now;
463 	struct pcpu_state *state;
464 
465 	state = DPCPU_PTR(timerstate);
466 	switch (atomic_load_acq_int(&state->action)) {
467 	case 1:
468 		binuptime(&now);
469 		ET_HW_LOCK(state);
470 		loadtimer(&now, 1);
471 		ET_HW_UNLOCK(state);
472 		state->handle = 0;
473 		atomic_store_rel_int(&state->action, 0);
474 		return (1);
475 	case 2:
476 		ET_HW_LOCK(state);
477 		et_stop(timer);
478 		ET_HW_UNLOCK(state);
479 		state->handle = 0;
480 		atomic_store_rel_int(&state->action, 0);
481 		return (1);
482 	}
483 	if (atomic_readandclear_int(&state->handle) && !busy) {
484 		binuptime(&now);
485 		handleevents(&now, 0);
486 		return (1);
487 	}
488 	return (0);
489 }
490 
491 /*
492  * Reconfigure specified timer.
493  * For per-CPU timers use IPI to make other CPUs to reconfigure.
494  */
495 static void
496 configtimer(int start)
497 {
498 	struct bintime now, next;
499 	struct pcpu_state *state;
500 	int cpu;
501 
502 	if (start) {
503 		setuptimer();
504 		binuptime(&now);
505 	}
506 	critical_enter();
507 	ET_HW_LOCK(DPCPU_PTR(timerstate));
508 	if (start) {
509 		/* Initialize time machine parameters. */
510 		next = now;
511 		bintime_add(&next, &timerperiod);
512 		if (periodic)
513 			nexttick = next;
514 		else
515 			nexttick.sec = -1;
516 		CPU_FOREACH(cpu) {
517 			state = DPCPU_ID_PTR(cpu, timerstate);
518 			state->now = now;
519 			state->nextevent = next;
520 			if (periodic)
521 				state->nexttick = next;
522 			else
523 				state->nexttick.sec = -1;
524 			state->nexthard = next;
525 			state->nextstat = next;
526 			state->nextprof = next;
527 			hardclock_sync(cpu);
528 		}
529 		busy = 0;
530 		/* Start global timer or per-CPU timer of this CPU. */
531 		loadtimer(&now, 1);
532 	} else {
533 		busy = 1;
534 		/* Stop global timer or per-CPU timer of this CPU. */
535 		et_stop(timer);
536 	}
537 	ET_HW_UNLOCK(DPCPU_PTR(timerstate));
538 #ifdef SMP
539 	/* If timer is global or there is no other CPUs yet - we are done. */
540 	if ((timer->et_flags & ET_FLAGS_PERCPU) == 0 || !smp_started) {
541 		critical_exit();
542 		return;
543 	}
544 	/* Set reconfigure flags for other CPUs. */
545 	CPU_FOREACH(cpu) {
546 		state = DPCPU_ID_PTR(cpu, timerstate);
547 		atomic_store_rel_int(&state->action,
548 		    (cpu == curcpu) ? 0 : ( start ? 1 : 2));
549 	}
550 	/* Broadcast reconfigure IPI. */
551 	ipi_all_but_self(IPI_HARDCLOCK);
552 	/* Wait for reconfiguration completed. */
553 restart:
554 	cpu_spinwait();
555 	CPU_FOREACH(cpu) {
556 		if (cpu == curcpu)
557 			continue;
558 		state = DPCPU_ID_PTR(cpu, timerstate);
559 		if (atomic_load_acq_int(&state->action))
560 			goto restart;
561 	}
562 #endif
563 	critical_exit();
564 }
565 
566 /*
567  * Calculate nearest frequency supported by hardware timer.
568  */
569 static int
570 round_freq(struct eventtimer *et, int freq)
571 {
572 	uint64_t div;
573 
574 	if (et->et_frequency != 0) {
575 		div = lmax((et->et_frequency + freq / 2) / freq, 1);
576 		if (et->et_flags & ET_FLAGS_POW2DIV)
577 			div = 1 << (flsl(div + div / 2) - 1);
578 		freq = (et->et_frequency + div / 2) / div;
579 	}
580 	if (et->et_min_period.sec > 0)
581 		freq = 0;
582 	else if (et->et_min_period.frac != 0)
583 		freq = min(freq, BT2FREQ(&et->et_min_period));
584 	if (et->et_max_period.sec == 0 && et->et_max_period.frac != 0)
585 		freq = max(freq, BT2FREQ(&et->et_max_period));
586 	return (freq);
587 }
588 
589 /*
590  * Configure and start event timers (BSP part).
591  */
592 void
593 cpu_initclocks_bsp(void)
594 {
595 	struct pcpu_state *state;
596 	int base, div, cpu;
597 
598 	mtx_init(&et_hw_mtx, "et_hw_mtx", NULL, MTX_SPIN);
599 	CPU_FOREACH(cpu) {
600 		state = DPCPU_ID_PTR(cpu, timerstate);
601 		mtx_init(&state->et_hw_mtx, "et_hw_mtx", NULL, MTX_SPIN);
602 #ifdef KDTRACE_HOOKS
603 		state->nextcyc.sec = -1;
604 #endif
605 	}
606 #ifdef SMP
607 	callout_new_inserted = cpu_new_callout;
608 #endif
609 	periodic = want_periodic;
610 	/* Grab requested timer or the best of present. */
611 	if (timername[0])
612 		timer = et_find(timername, 0, 0);
613 	if (timer == NULL && periodic) {
614 		timer = et_find(NULL,
615 		    ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
616 	}
617 	if (timer == NULL) {
618 		timer = et_find(NULL,
619 		    ET_FLAGS_ONESHOT, ET_FLAGS_ONESHOT);
620 	}
621 	if (timer == NULL && !periodic) {
622 		timer = et_find(NULL,
623 		    ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
624 	}
625 	if (timer == NULL)
626 		panic("No usable event timer found!");
627 	et_init(timer, timercb, NULL, NULL);
628 
629 	/* Adapt to timer capabilities. */
630 	if (periodic && (timer->et_flags & ET_FLAGS_PERIODIC) == 0)
631 		periodic = 0;
632 	else if (!periodic && (timer->et_flags & ET_FLAGS_ONESHOT) == 0)
633 		periodic = 1;
634 	if (timer->et_flags & ET_FLAGS_C3STOP)
635 		cpu_disable_deep_sleep++;
636 
637 	/*
638 	 * We honor the requested 'hz' value.
639 	 * We want to run stathz in the neighborhood of 128hz.
640 	 * We would like profhz to run as often as possible.
641 	 */
642 	if (singlemul <= 0 || singlemul > 20) {
643 		if (hz >= 1500 || (hz % 128) == 0)
644 			singlemul = 1;
645 		else if (hz >= 750)
646 			singlemul = 2;
647 		else
648 			singlemul = 4;
649 	}
650 	if (periodic) {
651 		base = round_freq(timer, hz * singlemul);
652 		singlemul = max((base + hz / 2) / hz, 1);
653 		hz = (base + singlemul / 2) / singlemul;
654 		if (base <= 128)
655 			stathz = base;
656 		else {
657 			div = base / 128;
658 			if (div >= singlemul && (div % singlemul) == 0)
659 				div++;
660 			stathz = base / div;
661 		}
662 		profhz = stathz;
663 		while ((profhz + stathz) <= 128 * 64)
664 			profhz += stathz;
665 		profhz = round_freq(timer, profhz);
666 	} else {
667 		hz = round_freq(timer, hz);
668 		stathz = round_freq(timer, 127);
669 		profhz = round_freq(timer, stathz * 64);
670 	}
671 	tick = 1000000 / hz;
672 	FREQ2BT(hz, &hardperiod);
673 	FREQ2BT(stathz, &statperiod);
674 	FREQ2BT(profhz, &profperiod);
675 	ET_LOCK();
676 	configtimer(1);
677 	ET_UNLOCK();
678 }
679 
680 /*
681  * Start per-CPU event timers on APs.
682  */
683 void
684 cpu_initclocks_ap(void)
685 {
686 	struct bintime now;
687 	struct pcpu_state *state;
688 
689 	state = DPCPU_PTR(timerstate);
690 	binuptime(&now);
691 	ET_HW_LOCK(state);
692 	if ((timer->et_flags & ET_FLAGS_PERCPU) == 0 && periodic) {
693 		state->now = nexttick;
694 		bintime_sub(&state->now, &timerperiod);
695 	} else
696 		state->now = now;
697 	hardclock_sync(curcpu);
698 	handleevents(&state->now, 2);
699 	if (timer->et_flags & ET_FLAGS_PERCPU)
700 		loadtimer(&now, 1);
701 	ET_HW_UNLOCK(state);
702 }
703 
704 /*
705  * Switch to profiling clock rates.
706  */
707 void
708 cpu_startprofclock(void)
709 {
710 
711 	ET_LOCK();
712 	if (periodic) {
713 		configtimer(0);
714 		profiling = 1;
715 		configtimer(1);
716 	} else
717 		profiling = 1;
718 	ET_UNLOCK();
719 }
720 
721 /*
722  * Switch to regular clock rates.
723  */
724 void
725 cpu_stopprofclock(void)
726 {
727 
728 	ET_LOCK();
729 	if (periodic) {
730 		configtimer(0);
731 		profiling = 0;
732 		configtimer(1);
733 	} else
734 		profiling = 0;
735 	ET_UNLOCK();
736 }
737 
738 /*
739  * Switch to idle mode (all ticks handled).
740  */
741 void
742 cpu_idleclock(void)
743 {
744 	struct bintime now, t;
745 	struct pcpu_state *state;
746 
747 	if (idletick || busy ||
748 	    (periodic && (timer->et_flags & ET_FLAGS_PERCPU))
749 #ifdef DEVICE_POLLING
750 	    || curcpu == CPU_FIRST()
751 #endif
752 	    )
753 		return;
754 	state = DPCPU_PTR(timerstate);
755 	if (periodic)
756 		now = state->now;
757 	else
758 		binuptime(&now);
759 	CTR4(KTR_SPARE2, "idle at %d:    now  %d.%08x%08x",
760 	    curcpu, now.sec, (unsigned int)(now.frac >> 32),
761 			     (unsigned int)(now.frac & 0xffffffff));
762 	getnextcpuevent(&t, 1);
763 	ET_HW_LOCK(state);
764 	state->idle = 1;
765 	state->nextevent = t;
766 	if (!periodic)
767 		loadtimer(&now, 0);
768 	ET_HW_UNLOCK(state);
769 }
770 
771 /*
772  * Switch to active mode (skip empty ticks).
773  */
774 void
775 cpu_activeclock(void)
776 {
777 	struct bintime now;
778 	struct pcpu_state *state;
779 	struct thread *td;
780 
781 	state = DPCPU_PTR(timerstate);
782 	if (state->idle == 0 || busy)
783 		return;
784 	if (periodic)
785 		now = state->now;
786 	else
787 		binuptime(&now);
788 	CTR4(KTR_SPARE2, "active at %d:  now  %d.%08x%08x",
789 	    curcpu, now.sec, (unsigned int)(now.frac >> 32),
790 			     (unsigned int)(now.frac & 0xffffffff));
791 	spinlock_enter();
792 	td = curthread;
793 	td->td_intr_nesting_level++;
794 	handleevents(&now, 1);
795 	td->td_intr_nesting_level--;
796 	spinlock_exit();
797 }
798 
799 #ifdef KDTRACE_HOOKS
800 void
801 clocksource_cyc_set(const struct bintime *t)
802 {
803 	struct bintime now;
804 	struct pcpu_state *state;
805 
806 	state = DPCPU_PTR(timerstate);
807 	if (periodic)
808 		now = state->now;
809 	else
810 		binuptime(&now);
811 
812 	CTR4(KTR_SPARE2, "set_cyc at %d:  now  %d.%08x%08x",
813 	    curcpu, now.sec, (unsigned int)(now.frac >> 32),
814 			     (unsigned int)(now.frac & 0xffffffff));
815 	CTR4(KTR_SPARE2, "set_cyc at %d:  t  %d.%08x%08x",
816 	    curcpu, t->sec, (unsigned int)(t->frac >> 32),
817 			     (unsigned int)(t->frac & 0xffffffff));
818 
819 	ET_HW_LOCK(state);
820 	if (bintime_cmp(t, &state->nextcyc, ==)) {
821 		ET_HW_UNLOCK(state);
822 		return;
823 	}
824 	state->nextcyc = *t;
825 	if (bintime_cmp(&state->nextcyc, &state->nextevent, >=)) {
826 		ET_HW_UNLOCK(state);
827 		return;
828 	}
829 	state->nextevent = state->nextcyc;
830 	if (!periodic)
831 		loadtimer(&now, 0);
832 	ET_HW_UNLOCK(state);
833 }
834 #endif
835 
836 #ifdef SMP
837 static void
838 cpu_new_callout(int cpu, int ticks)
839 {
840 	struct bintime tmp;
841 	struct pcpu_state *state;
842 
843 	CTR3(KTR_SPARE2, "new co at %d:    on %d in %d",
844 	    curcpu, cpu, ticks);
845 	state = DPCPU_ID_PTR(cpu, timerstate);
846 	ET_HW_LOCK(state);
847 	if (state->idle == 0 || busy) {
848 		ET_HW_UNLOCK(state);
849 		return;
850 	}
851 	/*
852 	 * If timer is periodic - just update next event time for target CPU.
853 	 * If timer is global - there is chance it is already programmed.
854 	 */
855 	if (periodic || (timer->et_flags & ET_FLAGS_PERCPU) == 0) {
856 		state->nextevent = state->nexthard;
857 		tmp = hardperiod;
858 		bintime_mul(&tmp, ticks - 1);
859 		bintime_add(&state->nextevent, &tmp);
860 		if (periodic ||
861 		    bintime_cmp(&state->nextevent, &nexttick, >=)) {
862 			ET_HW_UNLOCK(state);
863 			return;
864 		}
865 	}
866 	/*
867 	 * Otherwise we have to wake that CPU up, as we can't get present
868 	 * bintime to reprogram global timer from here. If timer is per-CPU,
869 	 * we by definition can't do it from here.
870 	 */
871 	ET_HW_UNLOCK(state);
872 	if (timer->et_flags & ET_FLAGS_PERCPU) {
873 		state->handle = 1;
874 		ipi_cpu(cpu, IPI_HARDCLOCK);
875 	} else {
876 		if (!cpu_idle_wakeup(cpu))
877 			ipi_cpu(cpu, IPI_AST);
878 	}
879 }
880 #endif
881 
882 /*
883  * Report or change the active event timers hardware.
884  */
885 static int
886 sysctl_kern_eventtimer_timer(SYSCTL_HANDLER_ARGS)
887 {
888 	char buf[32];
889 	struct eventtimer *et;
890 	int error;
891 
892 	ET_LOCK();
893 	et = timer;
894 	snprintf(buf, sizeof(buf), "%s", et->et_name);
895 	ET_UNLOCK();
896 	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
897 	ET_LOCK();
898 	et = timer;
899 	if (error != 0 || req->newptr == NULL ||
900 	    strcasecmp(buf, et->et_name) == 0) {
901 		ET_UNLOCK();
902 		return (error);
903 	}
904 	et = et_find(buf, 0, 0);
905 	if (et == NULL) {
906 		ET_UNLOCK();
907 		return (ENOENT);
908 	}
909 	configtimer(0);
910 	et_free(timer);
911 	if (et->et_flags & ET_FLAGS_C3STOP)
912 		cpu_disable_deep_sleep++;
913 	if (timer->et_flags & ET_FLAGS_C3STOP)
914 		cpu_disable_deep_sleep--;
915 	periodic = want_periodic;
916 	timer = et;
917 	et_init(timer, timercb, NULL, NULL);
918 	configtimer(1);
919 	ET_UNLOCK();
920 	return (error);
921 }
922 SYSCTL_PROC(_kern_eventtimer, OID_AUTO, timer,
923     CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE,
924     0, 0, sysctl_kern_eventtimer_timer, "A", "Chosen event timer");
925 
926 /*
927  * Report or change the active event timer periodicity.
928  */
929 static int
930 sysctl_kern_eventtimer_periodic(SYSCTL_HANDLER_ARGS)
931 {
932 	int error, val;
933 
934 	val = periodic;
935 	error = sysctl_handle_int(oidp, &val, 0, req);
936 	if (error != 0 || req->newptr == NULL)
937 		return (error);
938 	ET_LOCK();
939 	configtimer(0);
940 	periodic = want_periodic = val;
941 	configtimer(1);
942 	ET_UNLOCK();
943 	return (error);
944 }
945 SYSCTL_PROC(_kern_eventtimer, OID_AUTO, periodic,
946     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
947     0, 0, sysctl_kern_eventtimer_periodic, "I", "Enable event timer periodic mode");
948