xref: /freebsd/sys/kern/kern_clock.c (revision 58a0f0d00c0cc4a90ce584a61470290751bfcac7)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1982, 1986, 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  * (c) UNIX System Laboratories, Inc.
7  * All or some portions of this file are derived from material licensed
8  * to the University of California by American Telephone and Telegraph
9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10  * the permission of UNIX System Laboratories, Inc.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)kern_clock.c	8.5 (Berkeley) 1/21/94
37  */
38 
39 #include <sys/cdefs.h>
40 __FBSDID("$FreeBSD$");
41 
42 #include "opt_kdb.h"
43 #include "opt_device_polling.h"
44 #include "opt_hwpmc_hooks.h"
45 #include "opt_ntp.h"
46 #include "opt_watchdog.h"
47 
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/callout.h>
51 #include <sys/epoch.h>
52 #include <sys/gtaskqueue.h>
53 #include <sys/kdb.h>
54 #include <sys/kernel.h>
55 #include <sys/kthread.h>
56 #include <sys/ktr.h>
57 #include <sys/lock.h>
58 #include <sys/mutex.h>
59 #include <sys/proc.h>
60 #include <sys/resource.h>
61 #include <sys/resourcevar.h>
62 #include <sys/sched.h>
63 #include <sys/sdt.h>
64 #include <sys/signalvar.h>
65 #include <sys/sleepqueue.h>
66 #include <sys/smp.h>
67 #include <vm/vm.h>
68 #include <vm/pmap.h>
69 #include <vm/vm_map.h>
70 #include <sys/sysctl.h>
71 #include <sys/bus.h>
72 #include <sys/interrupt.h>
73 #include <sys/limits.h>
74 #include <sys/timetc.h>
75 
76 #ifdef GPROF
77 #include <sys/gmon.h>
78 #endif
79 
80 #ifdef HWPMC_HOOKS
81 #include <sys/pmckern.h>
82 PMC_SOFT_DEFINE( , , clock, hard);
83 PMC_SOFT_DEFINE( , , clock, stat);
84 PMC_SOFT_DEFINE_EX( , , clock, prof, \
85     cpu_startprofclock, cpu_stopprofclock);
86 #endif
87 
88 #ifdef DEVICE_POLLING
89 extern void hardclock_device_poll(void);
90 #endif /* DEVICE_POLLING */
91 
92 static void initclocks(void *dummy);
93 SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL);
94 
95 /* Spin-lock protecting profiling statistics. */
96 static struct mtx time_lock;
97 
98 SDT_PROVIDER_DECLARE(sched);
99 SDT_PROBE_DEFINE2(sched, , , tick, "struct thread *", "struct proc *");
100 
101 static int
102 sysctl_kern_cp_time(SYSCTL_HANDLER_ARGS)
103 {
104 	int error;
105 	long cp_time[CPUSTATES];
106 #ifdef SCTL_MASK32
107 	int i;
108 	unsigned int cp_time32[CPUSTATES];
109 #endif
110 
111 	read_cpu_time(cp_time);
112 #ifdef SCTL_MASK32
113 	if (req->flags & SCTL_MASK32) {
114 		if (!req->oldptr)
115 			return SYSCTL_OUT(req, 0, sizeof(cp_time32));
116 		for (i = 0; i < CPUSTATES; i++)
117 			cp_time32[i] = (unsigned int)cp_time[i];
118 		error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32));
119 	} else
120 #endif
121 	{
122 		if (!req->oldptr)
123 			return SYSCTL_OUT(req, 0, sizeof(cp_time));
124 		error = SYSCTL_OUT(req, cp_time, sizeof(cp_time));
125 	}
126 	return error;
127 }
128 
129 SYSCTL_PROC(_kern, OID_AUTO, cp_time, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE,
130     0,0, sysctl_kern_cp_time, "LU", "CPU time statistics");
131 
132 static long empty[CPUSTATES];
133 
134 static int
135 sysctl_kern_cp_times(SYSCTL_HANDLER_ARGS)
136 {
137 	struct pcpu *pcpu;
138 	int error;
139 	int c;
140 	long *cp_time;
141 #ifdef SCTL_MASK32
142 	unsigned int cp_time32[CPUSTATES];
143 	int i;
144 #endif
145 
146 	if (!req->oldptr) {
147 #ifdef SCTL_MASK32
148 		if (req->flags & SCTL_MASK32)
149 			return SYSCTL_OUT(req, 0, sizeof(cp_time32) * (mp_maxid + 1));
150 		else
151 #endif
152 			return SYSCTL_OUT(req, 0, sizeof(long) * CPUSTATES * (mp_maxid + 1));
153 	}
154 	for (error = 0, c = 0; error == 0 && c <= mp_maxid; c++) {
155 		if (!CPU_ABSENT(c)) {
156 			pcpu = pcpu_find(c);
157 			cp_time = pcpu->pc_cp_time;
158 		} else {
159 			cp_time = empty;
160 		}
161 #ifdef SCTL_MASK32
162 		if (req->flags & SCTL_MASK32) {
163 			for (i = 0; i < CPUSTATES; i++)
164 				cp_time32[i] = (unsigned int)cp_time[i];
165 			error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32));
166 		} else
167 #endif
168 			error = SYSCTL_OUT(req, cp_time, sizeof(long) * CPUSTATES);
169 	}
170 	return error;
171 }
172 
173 SYSCTL_PROC(_kern, OID_AUTO, cp_times, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE,
174     0,0, sysctl_kern_cp_times, "LU", "per-CPU time statistics");
175 
176 #ifdef DEADLKRES
177 static const char *blessed[] = {
178 	"getblk",
179 	"so_snd_sx",
180 	"so_rcv_sx",
181 	NULL
182 };
183 static int slptime_threshold = 1800;
184 static int blktime_threshold = 900;
185 static int sleepfreq = 3;
186 
187 static void
188 deadlkres(void)
189 {
190 	struct proc *p;
191 	struct thread *td;
192 	void *wchan;
193 	int blkticks, i, slpticks, slptype, tryl, tticks;
194 
195 	tryl = 0;
196 	for (;;) {
197 		blkticks = blktime_threshold * hz;
198 		slpticks = slptime_threshold * hz;
199 
200 		/*
201 		 * Avoid to sleep on the sx_lock in order to avoid a possible
202 		 * priority inversion problem leading to starvation.
203 		 * If the lock can't be held after 100 tries, panic.
204 		 */
205 		if (!sx_try_slock(&allproc_lock)) {
206 			if (tryl > 100)
207 		panic("%s: possible deadlock detected on allproc_lock\n",
208 				    __func__);
209 			tryl++;
210 			pause("allproc", sleepfreq * hz);
211 			continue;
212 		}
213 		tryl = 0;
214 		FOREACH_PROC_IN_SYSTEM(p) {
215 			PROC_LOCK(p);
216 			if (p->p_state == PRS_NEW) {
217 				PROC_UNLOCK(p);
218 				continue;
219 			}
220 			FOREACH_THREAD_IN_PROC(p, td) {
221 
222 				thread_lock(td);
223 				if (TD_ON_LOCK(td)) {
224 
225 					/*
226 					 * The thread should be blocked on a
227 					 * turnstile, simply check if the
228 					 * turnstile channel is in good state.
229 					 */
230 					MPASS(td->td_blocked != NULL);
231 
232 					tticks = ticks - td->td_blktick;
233 					thread_unlock(td);
234 					if (tticks > blkticks) {
235 
236 						/*
237 						 * Accordingly with provided
238 						 * thresholds, this thread is
239 						 * stuck for too long on a
240 						 * turnstile.
241 						 */
242 						PROC_UNLOCK(p);
243 						sx_sunlock(&allproc_lock);
244 	panic("%s: possible deadlock detected for %p, blocked for %d ticks\n",
245 						    __func__, td, tticks);
246 					}
247 				} else if (TD_IS_SLEEPING(td) &&
248 				    TD_ON_SLEEPQ(td)) {
249 
250 					/*
251 					 * Check if the thread is sleeping on a
252 					 * lock, otherwise skip the check.
253 					 * Drop the thread lock in order to
254 					 * avoid a LOR with the sleepqueue
255 					 * spinlock.
256 					 */
257 					wchan = td->td_wchan;
258 					tticks = ticks - td->td_slptick;
259 					thread_unlock(td);
260 					slptype = sleepq_type(wchan);
261 					if ((slptype == SLEEPQ_SX ||
262 					    slptype == SLEEPQ_LK) &&
263 					    tticks > slpticks) {
264 
265 						/*
266 						 * Accordingly with provided
267 						 * thresholds, this thread is
268 						 * stuck for too long on a
269 						 * sleepqueue.
270 						 * However, being on a
271 						 * sleepqueue, we might still
272 						 * check for the blessed
273 						 * list.
274 						 */
275 						tryl = 0;
276 						for (i = 0; blessed[i] != NULL;
277 						    i++) {
278 							if (!strcmp(blessed[i],
279 							    td->td_wmesg)) {
280 								tryl = 1;
281 								break;
282 							}
283 						}
284 						if (tryl != 0) {
285 							tryl = 0;
286 							continue;
287 						}
288 						PROC_UNLOCK(p);
289 						sx_sunlock(&allproc_lock);
290 	panic("%s: possible deadlock detected for %p, blocked for %d ticks\n",
291 						    __func__, td, tticks);
292 					}
293 				} else
294 					thread_unlock(td);
295 			}
296 			PROC_UNLOCK(p);
297 		}
298 		sx_sunlock(&allproc_lock);
299 
300 		/* Sleep for sleepfreq seconds. */
301 		pause("-", sleepfreq * hz);
302 	}
303 }
304 
305 static struct kthread_desc deadlkres_kd = {
306 	"deadlkres",
307 	deadlkres,
308 	(struct thread **)NULL
309 };
310 
311 SYSINIT(deadlkres, SI_SUB_CLOCKS, SI_ORDER_ANY, kthread_start, &deadlkres_kd);
312 
313 static SYSCTL_NODE(_debug, OID_AUTO, deadlkres, CTLFLAG_RW, 0,
314     "Deadlock resolver");
315 SYSCTL_INT(_debug_deadlkres, OID_AUTO, slptime_threshold, CTLFLAG_RW,
316     &slptime_threshold, 0,
317     "Number of seconds within is valid to sleep on a sleepqueue");
318 SYSCTL_INT(_debug_deadlkres, OID_AUTO, blktime_threshold, CTLFLAG_RW,
319     &blktime_threshold, 0,
320     "Number of seconds within is valid to block on a turnstile");
321 SYSCTL_INT(_debug_deadlkres, OID_AUTO, sleepfreq, CTLFLAG_RW, &sleepfreq, 0,
322     "Number of seconds between any deadlock resolver thread run");
323 #endif	/* DEADLKRES */
324 
325 void
326 read_cpu_time(long *cp_time)
327 {
328 	struct pcpu *pc;
329 	int i, j;
330 
331 	/* Sum up global cp_time[]. */
332 	bzero(cp_time, sizeof(long) * CPUSTATES);
333 	CPU_FOREACH(i) {
334 		pc = pcpu_find(i);
335 		for (j = 0; j < CPUSTATES; j++)
336 			cp_time[j] += pc->pc_cp_time[j];
337 	}
338 }
339 
340 #include <sys/watchdog.h>
341 
342 static int watchdog_ticks;
343 static int watchdog_enabled;
344 static void watchdog_fire(void);
345 static void watchdog_config(void *, u_int, int *);
346 
347 static void
348 watchdog_attach(void)
349 {
350 	EVENTHANDLER_REGISTER(watchdog_list, watchdog_config, NULL, 0);
351 }
352 
353 /*
354  * Clock handling routines.
355  *
356  * This code is written to operate with two timers that run independently of
357  * each other.
358  *
359  * The main timer, running hz times per second, is used to trigger interval
360  * timers, timeouts and rescheduling as needed.
361  *
362  * The second timer handles kernel and user profiling,
363  * and does resource use estimation.  If the second timer is programmable,
364  * it is randomized to avoid aliasing between the two clocks.  For example,
365  * the randomization prevents an adversary from always giving up the cpu
366  * just before its quantum expires.  Otherwise, it would never accumulate
367  * cpu ticks.  The mean frequency of the second timer is stathz.
368  *
369  * If no second timer exists, stathz will be zero; in this case we drive
370  * profiling and statistics off the main clock.  This WILL NOT be accurate;
371  * do not do it unless absolutely necessary.
372  *
373  * The statistics clock may (or may not) be run at a higher rate while
374  * profiling.  This profile clock runs at profhz.  We require that profhz
375  * be an integral multiple of stathz.
376  *
377  * If the statistics clock is running fast, it must be divided by the ratio
378  * profhz/stathz for statistics.  (For profiling, every tick counts.)
379  *
380  * Time-of-day is maintained using a "timecounter", which may or may
381  * not be related to the hardware generating the above mentioned
382  * interrupts.
383  */
384 
385 int	stathz;
386 int	profhz;
387 int	profprocs;
388 volatile int	ticks;
389 int	psratio;
390 
391 static DPCPU_DEFINE(int, pcputicks);	/* Per-CPU version of ticks. */
392 #ifdef DEVICE_POLLING
393 static int devpoll_run = 0;
394 #endif
395 
396 /*
397  * Initialize clock frequencies and start both clocks running.
398  */
399 /* ARGSUSED*/
400 static void
401 initclocks(void *dummy)
402 {
403 	int i;
404 
405 	/*
406 	 * Set divisors to 1 (normal case) and let the machine-specific
407 	 * code do its bit.
408 	 */
409 	mtx_init(&time_lock, "time lock", NULL, MTX_DEF);
410 	cpu_initclocks();
411 
412 	/*
413 	 * Compute profhz/stathz, and fix profhz if needed.
414 	 */
415 	i = stathz ? stathz : hz;
416 	if (profhz == 0)
417 		profhz = i;
418 	psratio = profhz / i;
419 
420 #ifdef SW_WATCHDOG
421 	/* Enable hardclock watchdog now, even if a hardware watchdog exists. */
422 	watchdog_attach();
423 #else
424 	/* Volunteer to run a software watchdog. */
425 	if (wdog_software_attach == NULL)
426 		wdog_software_attach = watchdog_attach;
427 #endif
428 }
429 
430 /*
431  * Each time the real-time timer fires, this function is called on all CPUs.
432  * Note that hardclock() calls hardclock_cpu() for the boot CPU, so only
433  * the other CPUs in the system need to call this function.
434  */
435 void
436 hardclock_cpu(int usermode)
437 {
438 	struct pstats *pstats;
439 	struct thread *td = curthread;
440 	struct proc *p = td->td_proc;
441 	int flags;
442 
443 	/*
444 	 * Run current process's virtual and profile time, as needed.
445 	 */
446 	pstats = p->p_stats;
447 	flags = 0;
448 	if (usermode &&
449 	    timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value)) {
450 		PROC_ITIMLOCK(p);
451 		if (itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
452 			flags |= TDF_ALRMPEND | TDF_ASTPENDING;
453 		PROC_ITIMUNLOCK(p);
454 	}
455 	if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value)) {
456 		PROC_ITIMLOCK(p);
457 		if (itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
458 			flags |= TDF_PROFPEND | TDF_ASTPENDING;
459 		PROC_ITIMUNLOCK(p);
460 	}
461 	thread_lock(td);
462 	td->td_flags |= flags;
463 	thread_unlock(td);
464 
465 #ifdef HWPMC_HOOKS
466 	if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid)))
467 		PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL);
468 	if (td->td_intr_frame != NULL)
469 		PMC_SOFT_CALL_TF( , , clock, hard, td->td_intr_frame);
470 #endif
471 	callout_process(sbinuptime());
472 	if (__predict_false(DPCPU_GET(epoch_cb_count)))
473 		GROUPTASK_ENQUEUE(DPCPU_PTR(epoch_cb_task));
474 }
475 
476 /*
477  * The real-time timer, interrupting hz times per second.
478  */
479 void
480 hardclock(int usermode, uintfptr_t pc)
481 {
482 
483 	atomic_add_int(&ticks, 1);
484 	hardclock_cpu(usermode);
485 	tc_ticktock(1);
486 	cpu_tick_calibration();
487 	/*
488 	 * If no separate statistics clock is available, run it from here.
489 	 *
490 	 * XXX: this only works for UP
491 	 */
492 	if (stathz == 0) {
493 		profclock(usermode, pc);
494 		statclock(usermode);
495 	}
496 #ifdef DEVICE_POLLING
497 	hardclock_device_poll();	/* this is very short and quick */
498 #endif /* DEVICE_POLLING */
499 	if (watchdog_enabled > 0 && --watchdog_ticks <= 0)
500 		watchdog_fire();
501 }
502 
503 void
504 hardclock_cnt(int cnt, int usermode)
505 {
506 	struct pstats *pstats;
507 	struct thread *td = curthread;
508 	struct proc *p = td->td_proc;
509 	int *t = DPCPU_PTR(pcputicks);
510 	int flags, global, newticks;
511 	int i;
512 
513 	/*
514 	 * Update per-CPU and possibly global ticks values.
515 	 */
516 	*t += cnt;
517 	do {
518 		global = ticks;
519 		newticks = *t - global;
520 		if (newticks <= 0) {
521 			if (newticks < -1)
522 				*t = global - 1;
523 			newticks = 0;
524 			break;
525 		}
526 	} while (!atomic_cmpset_int(&ticks, global, *t));
527 
528 	/*
529 	 * Run current process's virtual and profile time, as needed.
530 	 */
531 	pstats = p->p_stats;
532 	flags = 0;
533 	if (usermode &&
534 	    timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value)) {
535 		PROC_ITIMLOCK(p);
536 		if (itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL],
537 		    tick * cnt) == 0)
538 			flags |= TDF_ALRMPEND | TDF_ASTPENDING;
539 		PROC_ITIMUNLOCK(p);
540 	}
541 	if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value)) {
542 		PROC_ITIMLOCK(p);
543 		if (itimerdecr(&pstats->p_timer[ITIMER_PROF],
544 		    tick * cnt) == 0)
545 			flags |= TDF_PROFPEND | TDF_ASTPENDING;
546 		PROC_ITIMUNLOCK(p);
547 	}
548 	if (flags != 0) {
549 		thread_lock(td);
550 		td->td_flags |= flags;
551 		thread_unlock(td);
552 	}
553 
554 #ifdef	HWPMC_HOOKS
555 	if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid)))
556 		PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL);
557 	if (td->td_intr_frame != NULL)
558 		PMC_SOFT_CALL_TF( , , clock, hard, td->td_intr_frame);
559 #endif
560 	/* We are in charge to handle this tick duty. */
561 	if (newticks > 0) {
562 		tc_ticktock(newticks);
563 #ifdef DEVICE_POLLING
564 		/* Dangerous and no need to call these things concurrently. */
565 		if (atomic_cmpset_acq_int(&devpoll_run, 0, 1)) {
566 			/* This is very short and quick. */
567 			hardclock_device_poll();
568 			atomic_store_rel_int(&devpoll_run, 0);
569 		}
570 #endif /* DEVICE_POLLING */
571 		if (watchdog_enabled > 0) {
572 			i = atomic_fetchadd_int(&watchdog_ticks, -newticks);
573 			if (i > 0 && i <= newticks)
574 				watchdog_fire();
575 		}
576 	}
577 	if (curcpu == CPU_FIRST())
578 		cpu_tick_calibration();
579 	if (__predict_false(DPCPU_GET(epoch_cb_count)))
580 		GROUPTASK_ENQUEUE(DPCPU_PTR(epoch_cb_task));
581 }
582 
583 void
584 hardclock_sync(int cpu)
585 {
586 	int *t;
587 	KASSERT(!CPU_ABSENT(cpu), ("Absent CPU %d", cpu));
588 	t = DPCPU_ID_PTR(cpu, pcputicks);
589 
590 	*t = ticks;
591 }
592 
593 /*
594  * Compute number of ticks in the specified amount of time.
595  */
596 int
597 tvtohz(struct timeval *tv)
598 {
599 	unsigned long ticks;
600 	long sec, usec;
601 
602 	/*
603 	 * If the number of usecs in the whole seconds part of the time
604 	 * difference fits in a long, then the total number of usecs will
605 	 * fit in an unsigned long.  Compute the total and convert it to
606 	 * ticks, rounding up and adding 1 to allow for the current tick
607 	 * to expire.  Rounding also depends on unsigned long arithmetic
608 	 * to avoid overflow.
609 	 *
610 	 * Otherwise, if the number of ticks in the whole seconds part of
611 	 * the time difference fits in a long, then convert the parts to
612 	 * ticks separately and add, using similar rounding methods and
613 	 * overflow avoidance.  This method would work in the previous
614 	 * case but it is slightly slower and assumes that hz is integral.
615 	 *
616 	 * Otherwise, round the time difference down to the maximum
617 	 * representable value.
618 	 *
619 	 * If ints have 32 bits, then the maximum value for any timeout in
620 	 * 10ms ticks is 248 days.
621 	 */
622 	sec = tv->tv_sec;
623 	usec = tv->tv_usec;
624 	if (usec < 0) {
625 		sec--;
626 		usec += 1000000;
627 	}
628 	if (sec < 0) {
629 #ifdef DIAGNOSTIC
630 		if (usec > 0) {
631 			sec++;
632 			usec -= 1000000;
633 		}
634 		printf("tvotohz: negative time difference %ld sec %ld usec\n",
635 		       sec, usec);
636 #endif
637 		ticks = 1;
638 	} else if (sec <= LONG_MAX / 1000000)
639 		ticks = howmany(sec * 1000000 + (unsigned long)usec, tick) + 1;
640 	else if (sec <= LONG_MAX / hz)
641 		ticks = sec * hz
642 			+ howmany((unsigned long)usec, tick) + 1;
643 	else
644 		ticks = LONG_MAX;
645 	if (ticks > INT_MAX)
646 		ticks = INT_MAX;
647 	return ((int)ticks);
648 }
649 
650 /*
651  * Start profiling on a process.
652  *
653  * Kernel profiling passes proc0 which never exits and hence
654  * keeps the profile clock running constantly.
655  */
656 void
657 startprofclock(struct proc *p)
658 {
659 
660 	PROC_LOCK_ASSERT(p, MA_OWNED);
661 	if (p->p_flag & P_STOPPROF)
662 		return;
663 	if ((p->p_flag & P_PROFIL) == 0) {
664 		p->p_flag |= P_PROFIL;
665 		mtx_lock(&time_lock);
666 		if (++profprocs == 1)
667 			cpu_startprofclock();
668 		mtx_unlock(&time_lock);
669 	}
670 }
671 
672 /*
673  * Stop profiling on a process.
674  */
675 void
676 stopprofclock(struct proc *p)
677 {
678 
679 	PROC_LOCK_ASSERT(p, MA_OWNED);
680 	if (p->p_flag & P_PROFIL) {
681 		if (p->p_profthreads != 0) {
682 			while (p->p_profthreads != 0) {
683 				p->p_flag |= P_STOPPROF;
684 				msleep(&p->p_profthreads, &p->p_mtx, PPAUSE,
685 				    "stopprof", 0);
686 			}
687 		}
688 		if ((p->p_flag & P_PROFIL) == 0)
689 			return;
690 		p->p_flag &= ~P_PROFIL;
691 		mtx_lock(&time_lock);
692 		if (--profprocs == 0)
693 			cpu_stopprofclock();
694 		mtx_unlock(&time_lock);
695 	}
696 }
697 
698 /*
699  * Statistics clock.  Updates rusage information and calls the scheduler
700  * to adjust priorities of the active thread.
701  *
702  * This should be called by all active processors.
703  */
704 void
705 statclock(int usermode)
706 {
707 
708 	statclock_cnt(1, usermode);
709 }
710 
711 void
712 statclock_cnt(int cnt, int usermode)
713 {
714 	struct rusage *ru;
715 	struct vmspace *vm;
716 	struct thread *td;
717 	struct proc *p;
718 	long rss;
719 	long *cp_time;
720 
721 	td = curthread;
722 	p = td->td_proc;
723 
724 	cp_time = (long *)PCPU_PTR(cp_time);
725 	if (usermode) {
726 		/*
727 		 * Charge the time as appropriate.
728 		 */
729 		td->td_uticks += cnt;
730 		if (p->p_nice > NZERO)
731 			cp_time[CP_NICE] += cnt;
732 		else
733 			cp_time[CP_USER] += cnt;
734 	} else {
735 		/*
736 		 * Came from kernel mode, so we were:
737 		 * - handling an interrupt,
738 		 * - doing syscall or trap work on behalf of the current
739 		 *   user process, or
740 		 * - spinning in the idle loop.
741 		 * Whichever it is, charge the time as appropriate.
742 		 * Note that we charge interrupts to the current process,
743 		 * regardless of whether they are ``for'' that process,
744 		 * so that we know how much of its real time was spent
745 		 * in ``non-process'' (i.e., interrupt) work.
746 		 */
747 		if ((td->td_pflags & TDP_ITHREAD) ||
748 		    td->td_intr_nesting_level >= 2) {
749 			td->td_iticks += cnt;
750 			cp_time[CP_INTR] += cnt;
751 		} else {
752 			td->td_pticks += cnt;
753 			td->td_sticks += cnt;
754 			if (!TD_IS_IDLETHREAD(td))
755 				cp_time[CP_SYS] += cnt;
756 			else
757 				cp_time[CP_IDLE] += cnt;
758 		}
759 	}
760 
761 	/* Update resource usage integrals and maximums. */
762 	MPASS(p->p_vmspace != NULL);
763 	vm = p->p_vmspace;
764 	ru = &td->td_ru;
765 	ru->ru_ixrss += pgtok(vm->vm_tsize) * cnt;
766 	ru->ru_idrss += pgtok(vm->vm_dsize) * cnt;
767 	ru->ru_isrss += pgtok(vm->vm_ssize) * cnt;
768 	rss = pgtok(vmspace_resident_count(vm));
769 	if (ru->ru_maxrss < rss)
770 		ru->ru_maxrss = rss;
771 	KTR_POINT2(KTR_SCHED, "thread", sched_tdname(td), "statclock",
772 	    "prio:%d", td->td_priority, "stathz:%d", (stathz)?stathz:hz);
773 	SDT_PROBE2(sched, , , tick, td, td->td_proc);
774 	thread_lock_flags(td, MTX_QUIET);
775 	for ( ; cnt > 0; cnt--)
776 		sched_clock(td);
777 	thread_unlock(td);
778 #ifdef HWPMC_HOOKS
779 	if (td->td_intr_frame != NULL)
780 		PMC_SOFT_CALL_TF( , , clock, stat, td->td_intr_frame);
781 #endif
782 }
783 
784 void
785 profclock(int usermode, uintfptr_t pc)
786 {
787 
788 	profclock_cnt(1, usermode, pc);
789 }
790 
791 void
792 profclock_cnt(int cnt, int usermode, uintfptr_t pc)
793 {
794 	struct thread *td;
795 #ifdef GPROF
796 	struct gmonparam *g;
797 	uintfptr_t i;
798 #endif
799 
800 	td = curthread;
801 	if (usermode) {
802 		/*
803 		 * Came from user mode; CPU was in user state.
804 		 * If this process is being profiled, record the tick.
805 		 * if there is no related user location yet, don't
806 		 * bother trying to count it.
807 		 */
808 		if (td->td_proc->p_flag & P_PROFIL)
809 			addupc_intr(td, pc, cnt);
810 	}
811 #ifdef GPROF
812 	else {
813 		/*
814 		 * Kernel statistics are just like addupc_intr, only easier.
815 		 */
816 		g = &_gmonparam;
817 		if (g->state == GMON_PROF_ON && pc >= g->lowpc) {
818 			i = PC_TO_I(g, pc);
819 			if (i < g->textsize) {
820 				KCOUNT(g, i) += cnt;
821 			}
822 		}
823 	}
824 #endif
825 #ifdef HWPMC_HOOKS
826 	if (td->td_intr_frame != NULL)
827 		PMC_SOFT_CALL_TF( , , clock, prof, td->td_intr_frame);
828 #endif
829 }
830 
831 /*
832  * Return information about system clocks.
833  */
834 static int
835 sysctl_kern_clockrate(SYSCTL_HANDLER_ARGS)
836 {
837 	struct clockinfo clkinfo;
838 	/*
839 	 * Construct clockinfo structure.
840 	 */
841 	bzero(&clkinfo, sizeof(clkinfo));
842 	clkinfo.hz = hz;
843 	clkinfo.tick = tick;
844 	clkinfo.profhz = profhz;
845 	clkinfo.stathz = stathz ? stathz : hz;
846 	return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req));
847 }
848 
849 SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate,
850 	CTLTYPE_STRUCT|CTLFLAG_RD|CTLFLAG_MPSAFE,
851 	0, 0, sysctl_kern_clockrate, "S,clockinfo",
852 	"Rate and period of various kernel clocks");
853 
854 static void
855 watchdog_config(void *unused __unused, u_int cmd, int *error)
856 {
857 	u_int u;
858 
859 	u = cmd & WD_INTERVAL;
860 	if (u >= WD_TO_1SEC) {
861 		watchdog_ticks = (1 << (u - WD_TO_1SEC)) * hz;
862 		watchdog_enabled = 1;
863 		*error = 0;
864 	} else {
865 		watchdog_enabled = 0;
866 	}
867 }
868 
869 /*
870  * Handle a watchdog timeout by dumping interrupt information and
871  * then either dropping to DDB or panicking.
872  */
873 static void
874 watchdog_fire(void)
875 {
876 	int nintr;
877 	uint64_t inttotal;
878 	u_long *curintr;
879 	char *curname;
880 
881 	curintr = intrcnt;
882 	curname = intrnames;
883 	inttotal = 0;
884 	nintr = sintrcnt / sizeof(u_long);
885 
886 	printf("interrupt                   total\n");
887 	while (--nintr >= 0) {
888 		if (*curintr)
889 			printf("%-12s %20lu\n", curname, *curintr);
890 		curname += strlen(curname) + 1;
891 		inttotal += *curintr++;
892 	}
893 	printf("Total        %20ju\n", (uintmax_t)inttotal);
894 
895 #if defined(KDB) && !defined(KDB_UNATTENDED)
896 	kdb_backtrace();
897 	kdb_enter(KDB_WHY_WATCHDOG, "watchdog timeout");
898 #else
899 	panic("watchdog timeout");
900 #endif
901 }
902