1df8bae1dSRodney W. Grimes /*- 2df8bae1dSRodney W. Grimes * Copyright (c) 1982, 1986, 1991, 1993 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 4df8bae1dSRodney W. Grimes * (c) UNIX System Laboratories, Inc. 5df8bae1dSRodney W. Grimes * All or some portions of this file are derived from material licensed 6df8bae1dSRodney W. Grimes * to the University of California by American Telephone and Telegraph 7df8bae1dSRodney W. Grimes * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8df8bae1dSRodney W. Grimes * the permission of UNIX System Laboratories, Inc. 9df8bae1dSRodney W. Grimes * 10df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 11df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 12df8bae1dSRodney W. Grimes * are met: 13df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 14df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 15df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 16df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 17df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 18df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 19df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 20df8bae1dSRodney W. Grimes * without specific prior written permission. 21df8bae1dSRodney W. Grimes * 22df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32df8bae1dSRodney W. Grimes * SUCH DAMAGE. 33df8bae1dSRodney W. Grimes * 34df8bae1dSRodney W. Grimes * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 35df8bae1dSRodney W. Grimes */ 36df8bae1dSRodney W. Grimes 37677b542eSDavid E. O'Brien #include <sys/cdefs.h> 38677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 39677b542eSDavid E. O'Brien 40911d16b8SEd Maste #include "opt_kdb.h" 41f0796cd2SGleb Smirnoff #include "opt_device_polling.h" 424da0d332SPeter Wemm #include "opt_hwpmc_hooks.h" 4332c20357SPoul-Henning Kamp #include "opt_ntp.h" 44370c3cb5SSean Kelly #include "opt_watchdog.h" 4532c20357SPoul-Henning Kamp 46df8bae1dSRodney W. Grimes #include <sys/param.h> 47df8bae1dSRodney W. Grimes #include <sys/systm.h> 48df8bae1dSRodney W. Grimes #include <sys/callout.h> 492d50560aSMarcel Moolenaar #include <sys/kdb.h> 50df8bae1dSRodney W. Grimes #include <sys/kernel.h> 51f34fa851SJohn Baldwin #include <sys/lock.h> 5261d80e90SJohn Baldwin #include <sys/ktr.h> 5335e0e5b3SJohn Baldwin #include <sys/mutex.h> 54df8bae1dSRodney W. Grimes #include <sys/proc.h> 55e4625663SJeff Roberson #include <sys/resource.h> 56df8bae1dSRodney W. Grimes #include <sys/resourcevar.h> 57b43179fbSJeff Roberson #include <sys/sched.h> 58797f2d22SPoul-Henning Kamp #include <sys/signalvar.h> 596caa8a15SJohn Baldwin #include <sys/smp.h> 608a129caeSDavid Greenman #include <vm/vm.h> 61efeaf95aSDavid Greenman #include <vm/pmap.h> 62efeaf95aSDavid Greenman #include <vm/vm_map.h> 63797f2d22SPoul-Henning Kamp #include <sys/sysctl.h> 648088699fSJohn Baldwin #include <sys/bus.h> 658088699fSJohn Baldwin #include <sys/interrupt.h> 66104a9b7eSAlexander Kabaev #include <sys/limits.h> 67e7fa55afSPoul-Henning Kamp #include <sys/timetc.h> 68df8bae1dSRodney W. Grimes 69df8bae1dSRodney W. Grimes #ifdef GPROF 70df8bae1dSRodney W. Grimes #include <sys/gmon.h> 71df8bae1dSRodney W. Grimes #endif 72df8bae1dSRodney W. Grimes 7336c0fd9dSJoseph Koshy #ifdef HWPMC_HOOKS 7436c0fd9dSJoseph Koshy #include <sys/pmckern.h> 7536c0fd9dSJoseph Koshy #endif 7636c0fd9dSJoseph Koshy 77e4fc250cSLuigi Rizzo #ifdef DEVICE_POLLING 78e4fc250cSLuigi Rizzo extern void hardclock_device_poll(void); 79e4fc250cSLuigi Rizzo #endif /* DEVICE_POLLING */ 80eae8fc2cSSteve Passe 814d77a549SAlfred Perlstein static void initclocks(void *dummy); 822b14f991SJulian Elischer SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL) 832b14f991SJulian Elischer 84f23b4c91SGarrett Wollman /* Some of these don't belong here, but it's easiest to concentrate them. */ 85eae8fc2cSSteve Passe long cp_time[CPUSTATES]; 86f23b4c91SGarrett Wollman 878b98fec9SJeff Roberson /* Spin-lock protecting profiling statistics. */ 888b98fec9SJeff Roberson struct mtx time_lock; 898b98fec9SJeff Roberson 9062919d78SPeter Wemm static int 9162919d78SPeter Wemm sysctl_kern_cp_time(SYSCTL_HANDLER_ARGS) 9262919d78SPeter Wemm { 9362919d78SPeter Wemm int error; 94cff2e749SPaul Saab #ifdef SCTL_MASK32 9562919d78SPeter Wemm int i; 9662919d78SPeter Wemm unsigned int cp_time32[CPUSTATES]; 9762919d78SPeter Wemm 98cff2e749SPaul Saab if (req->flags & SCTL_MASK32) { 9962919d78SPeter Wemm if (!req->oldptr) 10062919d78SPeter Wemm return SYSCTL_OUT(req, 0, sizeof(cp_time32)); 10162919d78SPeter Wemm for (i = 0; i < CPUSTATES; i++) 10262919d78SPeter Wemm cp_time32[i] = (unsigned int)cp_time[i]; 10362919d78SPeter Wemm error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32)); 10462919d78SPeter Wemm } else 10562919d78SPeter Wemm #endif 10662919d78SPeter Wemm { 10762919d78SPeter Wemm if (!req->oldptr) 10862919d78SPeter Wemm return SYSCTL_OUT(req, 0, sizeof(cp_time)); 10962919d78SPeter Wemm error = SYSCTL_OUT(req, cp_time, sizeof(cp_time)); 11062919d78SPeter Wemm } 11162919d78SPeter Wemm return error; 11262919d78SPeter Wemm } 11362919d78SPeter Wemm 11462919d78SPeter Wemm SYSCTL_PROC(_kern, OID_AUTO, cp_time, CTLTYPE_LONG|CTLFLAG_RD, 11562919d78SPeter Wemm 0,0, sysctl_kern_cp_time, "LU", "CPU time statistics"); 1167f112b04SRobert Watson 1174103b765SPoul-Henning Kamp #ifdef SW_WATCHDOG 1184103b765SPoul-Henning Kamp #include <sys/watchdog.h> 119370c3cb5SSean Kelly 1204103b765SPoul-Henning Kamp static int watchdog_ticks; 121370c3cb5SSean Kelly static int watchdog_enabled; 1224103b765SPoul-Henning Kamp static void watchdog_fire(void); 1234103b765SPoul-Henning Kamp static void watchdog_config(void *, u_int, int *); 1244103b765SPoul-Henning Kamp #endif /* SW_WATCHDOG */ 125370c3cb5SSean Kelly 1263bac064fSPoul-Henning Kamp /* 127df8bae1dSRodney W. Grimes * Clock handling routines. 128df8bae1dSRodney W. Grimes * 129b05dcf3cSPoul-Henning Kamp * This code is written to operate with two timers that run independently of 130b05dcf3cSPoul-Henning Kamp * each other. 1317ec73f64SPoul-Henning Kamp * 132b05dcf3cSPoul-Henning Kamp * The main timer, running hz times per second, is used to trigger interval 133b05dcf3cSPoul-Henning Kamp * timers, timeouts and rescheduling as needed. 1347ec73f64SPoul-Henning Kamp * 135b05dcf3cSPoul-Henning Kamp * The second timer handles kernel and user profiling, 136b05dcf3cSPoul-Henning Kamp * and does resource use estimation. If the second timer is programmable, 137b05dcf3cSPoul-Henning Kamp * it is randomized to avoid aliasing between the two clocks. For example, 138b05dcf3cSPoul-Henning Kamp * the randomization prevents an adversary from always giving up the cpu 139df8bae1dSRodney W. Grimes * just before its quantum expires. Otherwise, it would never accumulate 140df8bae1dSRodney W. Grimes * cpu ticks. The mean frequency of the second timer is stathz. 141b05dcf3cSPoul-Henning Kamp * 142b05dcf3cSPoul-Henning Kamp * If no second timer exists, stathz will be zero; in this case we drive 143b05dcf3cSPoul-Henning Kamp * profiling and statistics off the main clock. This WILL NOT be accurate; 144b05dcf3cSPoul-Henning Kamp * do not do it unless absolutely necessary. 145b05dcf3cSPoul-Henning Kamp * 146df8bae1dSRodney W. Grimes * The statistics clock may (or may not) be run at a higher rate while 147b05dcf3cSPoul-Henning Kamp * profiling. This profile clock runs at profhz. We require that profhz 148b05dcf3cSPoul-Henning Kamp * be an integral multiple of stathz. 149b05dcf3cSPoul-Henning Kamp * 150b05dcf3cSPoul-Henning Kamp * If the statistics clock is running fast, it must be divided by the ratio 151b05dcf3cSPoul-Henning Kamp * profhz/stathz for statistics. (For profiling, every tick counts.) 152df8bae1dSRodney W. Grimes * 1537ec73f64SPoul-Henning Kamp * Time-of-day is maintained using a "timecounter", which may or may 1547ec73f64SPoul-Henning Kamp * not be related to the hardware generating the above mentioned 1557ec73f64SPoul-Henning Kamp * interrupts. 156df8bae1dSRodney W. Grimes */ 157df8bae1dSRodney W. Grimes 158df8bae1dSRodney W. Grimes int stathz; 159df8bae1dSRodney W. Grimes int profhz; 160238dd320SJake Burkholder int profprocs; 161df8bae1dSRodney W. Grimes int ticks; 162238dd320SJake Burkholder int psratio; 163df8bae1dSRodney W. Grimes 164df8bae1dSRodney W. Grimes /* 165df8bae1dSRodney W. Grimes * Initialize clock frequencies and start both clocks running. 166df8bae1dSRodney W. Grimes */ 1672b14f991SJulian Elischer /* ARGSUSED*/ 1682b14f991SJulian Elischer static void 169d841aaa7SBruce Evans initclocks(dummy) 170d841aaa7SBruce Evans void *dummy; 171df8bae1dSRodney W. Grimes { 172df8bae1dSRodney W. Grimes register int i; 173df8bae1dSRodney W. Grimes 174df8bae1dSRodney W. Grimes /* 175df8bae1dSRodney W. Grimes * Set divisors to 1 (normal case) and let the machine-specific 176df8bae1dSRodney W. Grimes * code do its bit. 177df8bae1dSRodney W. Grimes */ 1788b98fec9SJeff Roberson mtx_init(&time_lock, "time lock", NULL, MTX_SPIN); 17963d69d25SRobert Watson cpu_initclocks(); 180df8bae1dSRodney W. Grimes 181df8bae1dSRodney W. Grimes /* 182df8bae1dSRodney W. Grimes * Compute profhz/stathz, and fix profhz if needed. 183df8bae1dSRodney W. Grimes */ 184df8bae1dSRodney W. Grimes i = stathz ? stathz : hz; 185df8bae1dSRodney W. Grimes if (profhz == 0) 186df8bae1dSRodney W. Grimes profhz = i; 187df8bae1dSRodney W. Grimes psratio = profhz / i; 1884103b765SPoul-Henning Kamp #ifdef SW_WATCHDOG 1894103b765SPoul-Henning Kamp EVENTHANDLER_REGISTER(watchdog_list, watchdog_config, NULL, 0); 1904103b765SPoul-Henning Kamp #endif 191df8bae1dSRodney W. Grimes } 192df8bae1dSRodney W. Grimes 193df8bae1dSRodney W. Grimes /* 194238dd320SJake Burkholder * Each time the real-time timer fires, this function is called on all CPUs. 195b439e431SJohn Baldwin * Note that hardclock() calls hardclock_cpu() for the boot CPU, so only 196238dd320SJake Burkholder * the other CPUs in the system need to call this function. 1976caa8a15SJohn Baldwin */ 1986caa8a15SJohn Baldwin void 199b439e431SJohn Baldwin hardclock_cpu(int usermode) 2006caa8a15SJohn Baldwin { 2016caa8a15SJohn Baldwin struct pstats *pstats; 202238dd320SJake Burkholder struct thread *td = curthread; 203b40ce416SJulian Elischer struct proc *p = td->td_proc; 2046caa8a15SJohn Baldwin 2056caa8a15SJohn Baldwin /* 2066caa8a15SJohn Baldwin * Run current process's virtual and profile time, as needed. 2076caa8a15SJohn Baldwin */ 208238dd320SJake Burkholder mtx_lock_spin_flags(&sched_lock, MTX_QUIET); 209b41f1452SDavid Xu sched_tick(); 2108460a577SJohn Birrell #ifdef KSE 211ad1e7d28SJulian Elischer #if 0 /* for now do nothing */ 2120e2a4d3aSDavid Xu if (p->p_flag & P_SA) { 213ad1e7d28SJulian Elischer /* XXXKSE What to do? Should do more. */ 2148460a577SJohn Birrell } 2158460a577SJohn Birrell #endif 216ad1e7d28SJulian Elischer #endif 217ad1e7d28SJulian Elischer pstats = p->p_stats; 218ad1e7d28SJulian Elischer if (usermode && 219ad1e7d28SJulian Elischer timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 220ad1e7d28SJulian Elischer itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) { 221ad1e7d28SJulian Elischer p->p_sflag |= PS_ALRMPEND; 222ad1e7d28SJulian Elischer td->td_flags |= TDF_ASTPENDING; 223ad1e7d28SJulian Elischer } 224ad1e7d28SJulian Elischer if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) && 225ad1e7d28SJulian Elischer itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) { 226ad1e7d28SJulian Elischer p->p_sflag |= PS_PROFPEND; 227ad1e7d28SJulian Elischer td->td_flags |= TDF_ASTPENDING; 228ad1e7d28SJulian Elischer } 229238dd320SJake Burkholder mtx_unlock_spin_flags(&sched_lock, MTX_QUIET); 23036c0fd9dSJoseph Koshy 23136c0fd9dSJoseph Koshy #ifdef HWPMC_HOOKS 23236c0fd9dSJoseph Koshy if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid))) 23336c0fd9dSJoseph Koshy PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL); 23436c0fd9dSJoseph Koshy #endif 2356caa8a15SJohn Baldwin } 2366caa8a15SJohn Baldwin 2376caa8a15SJohn Baldwin /* 238df8bae1dSRodney W. Grimes * The real-time timer, interrupting hz times per second. 239df8bae1dSRodney W. Grimes */ 240df8bae1dSRodney W. Grimes void 241b439e431SJohn Baldwin hardclock(int usermode, uintfptr_t pc) 242df8bae1dSRodney W. Grimes { 243fa2fbc3dSJake Burkholder int need_softclock = 0; 244df8bae1dSRodney W. Grimes 245b439e431SJohn Baldwin hardclock_cpu(usermode); 246b05dcf3cSPoul-Henning Kamp 247e7fa55afSPoul-Henning Kamp tc_ticktock(); 248df8bae1dSRodney W. Grimes /* 249df8bae1dSRodney W. Grimes * If no separate statistics clock is available, run it from here. 2506caa8a15SJohn Baldwin * 2516caa8a15SJohn Baldwin * XXX: this only works for UP 252df8bae1dSRodney W. Grimes */ 253238dd320SJake Burkholder if (stathz == 0) { 254b439e431SJohn Baldwin profclock(usermode, pc); 255b439e431SJohn Baldwin statclock(usermode); 256238dd320SJake Burkholder } 257df8bae1dSRodney W. Grimes 258e4fc250cSLuigi Rizzo #ifdef DEVICE_POLLING 259daccb638SLuigi Rizzo hardclock_device_poll(); /* this is very short and quick */ 260e4fc250cSLuigi Rizzo #endif /* DEVICE_POLLING */ 2613f31c649SGarrett Wollman 262b05dcf3cSPoul-Henning Kamp /* 263b05dcf3cSPoul-Henning Kamp * Process callouts at a very low cpu priority, so we don't keep the 264b05dcf3cSPoul-Henning Kamp * relatively high clock interrupt priority any longer than necessary. 265b05dcf3cSPoul-Henning Kamp */ 26621a7a9aeSJohn Baldwin mtx_lock_spin_flags(&callout_lock, MTX_QUIET); 267fa2fbc3dSJake Burkholder ticks++; 2686ad26d83SXin LI if (!TAILQ_EMPTY(&callwheel[ticks & callwheelmask])) { 269fa2fbc3dSJake Burkholder need_softclock = 1; 270b05dcf3cSPoul-Henning Kamp } else if (softticks + 1 == ticks) 271b05dcf3cSPoul-Henning Kamp ++softticks; 27221a7a9aeSJohn Baldwin mtx_unlock_spin_flags(&callout_lock, MTX_QUIET); 273fa2fbc3dSJake Burkholder 274fa2fbc3dSJake Burkholder /* 275062d8ff5SJohn Baldwin * swi_sched acquires sched_lock, so we don't want to call it with 276fa2fbc3dSJake Burkholder * callout_lock held; incorrect locking order. 277fa2fbc3dSJake Burkholder */ 278fa2fbc3dSJake Burkholder if (need_softclock) 279c86b6ff5SJohn Baldwin swi_sched(softclock_ih, 0); 280370c3cb5SSean Kelly 2814103b765SPoul-Henning Kamp #ifdef SW_WATCHDOG 2824103b765SPoul-Henning Kamp if (watchdog_enabled > 0 && --watchdog_ticks <= 0) 283370c3cb5SSean Kelly watchdog_fire(); 2844103b765SPoul-Henning Kamp #endif /* SW_WATCHDOG */ 285ab36c067SJustin T. Gibbs } 286ab36c067SJustin T. Gibbs 287df8bae1dSRodney W. Grimes /* 288227ee8a1SPoul-Henning Kamp * Compute number of ticks in the specified amount of time. 289df8bae1dSRodney W. Grimes */ 290df8bae1dSRodney W. Grimes int 291227ee8a1SPoul-Henning Kamp tvtohz(tv) 292df8bae1dSRodney W. Grimes struct timeval *tv; 293df8bae1dSRodney W. Grimes { 2946976af69SBruce Evans register unsigned long ticks; 2956976af69SBruce Evans register long sec, usec; 296df8bae1dSRodney W. Grimes 297df8bae1dSRodney W. Grimes /* 2986976af69SBruce Evans * If the number of usecs in the whole seconds part of the time 2996976af69SBruce Evans * difference fits in a long, then the total number of usecs will 3006976af69SBruce Evans * fit in an unsigned long. Compute the total and convert it to 3016976af69SBruce Evans * ticks, rounding up and adding 1 to allow for the current tick 3026976af69SBruce Evans * to expire. Rounding also depends on unsigned long arithmetic 3036976af69SBruce Evans * to avoid overflow. 304df8bae1dSRodney W. Grimes * 3056976af69SBruce Evans * Otherwise, if the number of ticks in the whole seconds part of 3066976af69SBruce Evans * the time difference fits in a long, then convert the parts to 3076976af69SBruce Evans * ticks separately and add, using similar rounding methods and 3086976af69SBruce Evans * overflow avoidance. This method would work in the previous 3096976af69SBruce Evans * case but it is slightly slower and assumes that hz is integral. 3106976af69SBruce Evans * 3116976af69SBruce Evans * Otherwise, round the time difference down to the maximum 3126976af69SBruce Evans * representable value. 3136976af69SBruce Evans * 3146976af69SBruce Evans * If ints have 32 bits, then the maximum value for any timeout in 3156976af69SBruce Evans * 10ms ticks is 248 days. 316df8bae1dSRodney W. Grimes */ 317227ee8a1SPoul-Henning Kamp sec = tv->tv_sec; 318227ee8a1SPoul-Henning Kamp usec = tv->tv_usec; 3196976af69SBruce Evans if (usec < 0) { 3206976af69SBruce Evans sec--; 3216976af69SBruce Evans usec += 1000000; 3226976af69SBruce Evans } 3236976af69SBruce Evans if (sec < 0) { 3246976af69SBruce Evans #ifdef DIAGNOSTIC 325b05dcf3cSPoul-Henning Kamp if (usec > 0) { 3267ec73f64SPoul-Henning Kamp sec++; 3277ec73f64SPoul-Henning Kamp usec -= 1000000; 3287ec73f64SPoul-Henning Kamp } 329227ee8a1SPoul-Henning Kamp printf("tvotohz: negative time difference %ld sec %ld usec\n", 3306976af69SBruce Evans sec, usec); 3316976af69SBruce Evans #endif 3326976af69SBruce Evans ticks = 1; 3336976af69SBruce Evans } else if (sec <= LONG_MAX / 1000000) 3346976af69SBruce Evans ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1)) 3356976af69SBruce Evans / tick + 1; 3366976af69SBruce Evans else if (sec <= LONG_MAX / hz) 3376976af69SBruce Evans ticks = sec * hz 3386976af69SBruce Evans + ((unsigned long)usec + (tick - 1)) / tick + 1; 3396976af69SBruce Evans else 3406976af69SBruce Evans ticks = LONG_MAX; 3416976af69SBruce Evans if (ticks > INT_MAX) 3426976af69SBruce Evans ticks = INT_MAX; 343d6116663SAlexander Langer return ((int)ticks); 344df8bae1dSRodney W. Grimes } 345df8bae1dSRodney W. Grimes 346df8bae1dSRodney W. Grimes /* 347df8bae1dSRodney W. Grimes * Start profiling on a process. 348df8bae1dSRodney W. Grimes * 349df8bae1dSRodney W. Grimes * Kernel profiling passes proc0 which never exits and hence 350df8bae1dSRodney W. Grimes * keeps the profile clock running constantly. 351df8bae1dSRodney W. Grimes */ 352df8bae1dSRodney W. Grimes void 353df8bae1dSRodney W. Grimes startprofclock(p) 354df8bae1dSRodney W. Grimes register struct proc *p; 355df8bae1dSRodney W. Grimes { 356df8bae1dSRodney W. Grimes 3579752f794SJohn Baldwin PROC_LOCK_ASSERT(p, MA_OWNED); 3589752f794SJohn Baldwin if (p->p_flag & P_STOPPROF) 359a282253aSJulian Elischer return; 3609752f794SJohn Baldwin if ((p->p_flag & P_PROFIL) == 0) { 3619752f794SJohn Baldwin p->p_flag |= P_PROFIL; 3628b98fec9SJeff Roberson mtx_lock_spin(&time_lock); 363238dd320SJake Burkholder if (++profprocs == 1) 364238dd320SJake Burkholder cpu_startprofclock(); 3658b98fec9SJeff Roberson mtx_unlock_spin(&time_lock); 366df8bae1dSRodney W. Grimes } 3679752f794SJohn Baldwin } 368df8bae1dSRodney W. Grimes 369df8bae1dSRodney W. Grimes /* 370df8bae1dSRodney W. Grimes * Stop profiling on a process. 371df8bae1dSRodney W. Grimes */ 372df8bae1dSRodney W. Grimes void 373df8bae1dSRodney W. Grimes stopprofclock(p) 374df8bae1dSRodney W. Grimes register struct proc *p; 375df8bae1dSRodney W. Grimes { 376df8bae1dSRodney W. Grimes 377a282253aSJulian Elischer PROC_LOCK_ASSERT(p, MA_OWNED); 3789752f794SJohn Baldwin if (p->p_flag & P_PROFIL) { 3799752f794SJohn Baldwin if (p->p_profthreads != 0) { 3809752f794SJohn Baldwin p->p_flag |= P_STOPPROF; 3819752f794SJohn Baldwin while (p->p_profthreads != 0) 382a282253aSJulian Elischer msleep(&p->p_profthreads, &p->p_mtx, PPAUSE, 383a89ec05eSPeter Wemm "stopprof", 0); 3849752f794SJohn Baldwin p->p_flag &= ~P_STOPPROF; 385a282253aSJulian Elischer } 386b62b2304SColin Percival if ((p->p_flag & P_PROFIL) == 0) 387b62b2304SColin Percival return; 3889752f794SJohn Baldwin p->p_flag &= ~P_PROFIL; 3898b98fec9SJeff Roberson mtx_lock_spin(&time_lock); 390238dd320SJake Burkholder if (--profprocs == 0) 391238dd320SJake Burkholder cpu_stopprofclock(); 3928b98fec9SJeff Roberson mtx_unlock_spin(&time_lock); 393df8bae1dSRodney W. Grimes } 3949752f794SJohn Baldwin } 395df8bae1dSRodney W. Grimes 396df8bae1dSRodney W. Grimes /* 397238dd320SJake Burkholder * Statistics clock. Grab profile sample, and if divider reaches 0, 398238dd320SJake Burkholder * do process and kernel statistics. Most of the statistics are only 39971a62f8aSBruce Evans * used by user-level statistics programs. The main exceptions are 40078c85e8dSJohn Baldwin * ke->ke_uticks, p->p_rux.rux_sticks, p->p_rux.rux_iticks, and p->p_estcpu. 401238dd320SJake Burkholder * This should be called by all active processors. 402df8bae1dSRodney W. Grimes */ 403df8bae1dSRodney W. Grimes void 404b439e431SJohn Baldwin statclock(int usermode) 405df8bae1dSRodney W. Grimes { 4068a129caeSDavid Greenman struct rusage *ru; 4078a129caeSDavid Greenman struct vmspace *vm; 408238dd320SJake Burkholder struct thread *td; 409238dd320SJake Burkholder struct proc *p; 410238dd320SJake Burkholder long rss; 4118a129caeSDavid Greenman 412238dd320SJake Burkholder td = curthread; 413238dd320SJake Burkholder p = td->td_proc; 414238dd320SJake Burkholder 415b439e431SJohn Baldwin if (usermode) { 416df8bae1dSRodney W. Grimes /* 41771a62f8aSBruce Evans * Charge the time as appropriate. 418df8bae1dSRodney W. Grimes */ 4198460a577SJohn Birrell #ifdef KSE 4200e2a4d3aSDavid Xu if (p->p_flag & P_SA) 4215215b187SJeff Roberson thread_statclock(1); 4228460a577SJohn Birrell #endif 423e8444a7eSPoul-Henning Kamp td->td_uticks++; 4248b98fec9SJeff Roberson mtx_lock_spin_flags(&time_lock, MTX_QUIET); 425fa885116SJulian Elischer if (p->p_nice > NZERO) 426df8bae1dSRodney W. Grimes cp_time[CP_NICE]++; 427df8bae1dSRodney W. Grimes else 428df8bae1dSRodney W. Grimes cp_time[CP_USER]++; 429df8bae1dSRodney W. Grimes } else { 430df8bae1dSRodney W. Grimes /* 431df8bae1dSRodney W. Grimes * Came from kernel mode, so we were: 432df8bae1dSRodney W. Grimes * - handling an interrupt, 433df8bae1dSRodney W. Grimes * - doing syscall or trap work on behalf of the current 434df8bae1dSRodney W. Grimes * user process, or 435df8bae1dSRodney W. Grimes * - spinning in the idle loop. 436df8bae1dSRodney W. Grimes * Whichever it is, charge the time as appropriate. 437df8bae1dSRodney W. Grimes * Note that we charge interrupts to the current process, 438df8bae1dSRodney W. Grimes * regardless of whether they are ``for'' that process, 439df8bae1dSRodney W. Grimes * so that we know how much of its real time was spent 440df8bae1dSRodney W. Grimes * in ``non-process'' (i.e., interrupt) work. 441df8bae1dSRodney W. Grimes */ 442e0f66ef8SJohn Baldwin if ((td->td_pflags & TDP_ITHREAD) || 443e0f66ef8SJohn Baldwin td->td_intr_nesting_level >= 2) { 444e8444a7eSPoul-Henning Kamp td->td_iticks++; 4458b98fec9SJeff Roberson mtx_lock_spin_flags(&time_lock, MTX_QUIET); 446df8bae1dSRodney W. Grimes cp_time[CP_INTR]++; 4470384fff8SJason Evans } else { 4488460a577SJohn Birrell #ifdef KSE 4490e2a4d3aSDavid Xu if (p->p_flag & P_SA) 4505215b187SJeff Roberson thread_statclock(0); 4518460a577SJohn Birrell #endif 452eb2da9a5SPoul-Henning Kamp td->td_pticks++; 453e8444a7eSPoul-Henning Kamp td->td_sticks++; 4548b98fec9SJeff Roberson mtx_lock_spin_flags(&time_lock, MTX_QUIET); 455486a9414SJulian Elischer if (!TD_IS_IDLETHREAD(td)) 456df8bae1dSRodney W. Grimes cp_time[CP_SYS]++; 4570384fff8SJason Evans else 458df8bae1dSRodney W. Grimes cp_time[CP_IDLE]++; 459df8bae1dSRodney W. Grimes } 4600384fff8SJason Evans } 4618b98fec9SJeff Roberson mtx_unlock_spin_flags(&time_lock, MTX_QUIET); 46285da7a56SJeff Roberson CTR4(KTR_SCHED, "statclock: %p(%s) prio %d stathz %d", 46385da7a56SJeff Roberson td, td->td_proc->p_comm, td->td_priority, (stathz)?stathz:hz); 464df8bae1dSRodney W. Grimes 4658b98fec9SJeff Roberson mtx_lock_spin_flags(&sched_lock, MTX_QUIET); 4667cf90fb3SJeff Roberson sched_clock(td); 467f5e9e8ecSBruce Evans 468f5e9e8ecSBruce Evans /* Update resource usage integrals and maximums. */ 46916f9f205SJohn Baldwin MPASS(p->p_stats != NULL); 47016f9f205SJohn Baldwin MPASS(p->p_vmspace != NULL); 47116f9f205SJohn Baldwin vm = p->p_vmspace; 47216f9f205SJohn Baldwin ru = &p->p_stats->p_ru; 4731c6d46f9SLuoqi Chen ru->ru_ixrss += pgtok(vm->vm_tsize); 4741c6d46f9SLuoqi Chen ru->ru_idrss += pgtok(vm->vm_dsize); 4751c6d46f9SLuoqi Chen ru->ru_isrss += pgtok(vm->vm_ssize); 4761c6d46f9SLuoqi Chen rss = pgtok(vmspace_resident_count(vm)); 477f5e9e8ecSBruce Evans if (ru->ru_maxrss < rss) 478f5e9e8ecSBruce Evans ru->ru_maxrss = rss; 479238dd320SJake Burkholder mtx_unlock_spin_flags(&sched_lock, MTX_QUIET); 4806caa8a15SJohn Baldwin } 4816c567274SJohn Baldwin 4826caa8a15SJohn Baldwin void 483b439e431SJohn Baldwin profclock(int usermode, uintfptr_t pc) 4846caa8a15SJohn Baldwin { 485238dd320SJake Burkholder struct thread *td; 486238dd320SJake Burkholder #ifdef GPROF 487238dd320SJake Burkholder struct gmonparam *g; 4885c8b4441SJohn Baldwin uintfptr_t i; 489238dd320SJake Burkholder #endif 4906caa8a15SJohn Baldwin 4914a338afdSJulian Elischer td = curthread; 492b439e431SJohn Baldwin if (usermode) { 493238dd320SJake Burkholder /* 494238dd320SJake Burkholder * Came from user mode; CPU was in user state. 495238dd320SJake Burkholder * If this process is being profiled, record the tick. 496a282253aSJulian Elischer * if there is no related user location yet, don't 497a282253aSJulian Elischer * bother trying to count it. 498238dd320SJake Burkholder */ 4999752f794SJohn Baldwin if (td->td_proc->p_flag & P_PROFIL) 500b439e431SJohn Baldwin addupc_intr(td, pc, 1); 501238dd320SJake Burkholder } 502238dd320SJake Burkholder #ifdef GPROF 503238dd320SJake Burkholder else { 504238dd320SJake Burkholder /* 505238dd320SJake Burkholder * Kernel statistics are just like addupc_intr, only easier. 506238dd320SJake Burkholder */ 507238dd320SJake Burkholder g = &_gmonparam; 508b439e431SJohn Baldwin if (g->state == GMON_PROF_ON && pc >= g->lowpc) { 509b439e431SJohn Baldwin i = PC_TO_I(g, pc); 510238dd320SJake Burkholder if (i < g->textsize) { 511b439e431SJohn Baldwin KCOUNT(g, i)++; 512238dd320SJake Burkholder } 513238dd320SJake Burkholder } 514238dd320SJake Burkholder } 515238dd320SJake Burkholder #endif 516df8bae1dSRodney W. Grimes } 517df8bae1dSRodney W. Grimes 518df8bae1dSRodney W. Grimes /* 519df8bae1dSRodney W. Grimes * Return information about system clocks. 520df8bae1dSRodney W. Grimes */ 521787d58f2SPoul-Henning Kamp static int 52282d9ae4eSPoul-Henning Kamp sysctl_kern_clockrate(SYSCTL_HANDLER_ARGS) 523df8bae1dSRodney W. Grimes { 524df8bae1dSRodney W. Grimes struct clockinfo clkinfo; 525df8bae1dSRodney W. Grimes /* 526df8bae1dSRodney W. Grimes * Construct clockinfo structure. 527df8bae1dSRodney W. Grimes */ 528a9a0f15aSBruce Evans bzero(&clkinfo, sizeof(clkinfo)); 529df8bae1dSRodney W. Grimes clkinfo.hz = hz; 530df8bae1dSRodney W. Grimes clkinfo.tick = tick; 531df8bae1dSRodney W. Grimes clkinfo.profhz = profhz; 532df8bae1dSRodney W. Grimes clkinfo.stathz = stathz ? stathz : hz; 533ae0eb976SPoul-Henning Kamp return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req)); 534df8bae1dSRodney W. Grimes } 5353f31c649SGarrett Wollman 536946bb7a2SPoul-Henning Kamp SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD, 537af1408e3SLuigi Rizzo 0, 0, sysctl_kern_clockrate, "S,clockinfo", 538af1408e3SLuigi Rizzo "Rate and period of various kernel clocks"); 539370c3cb5SSean Kelly 5404103b765SPoul-Henning Kamp #ifdef SW_WATCHDOG 5414103b765SPoul-Henning Kamp 5424103b765SPoul-Henning Kamp static void 5439079fff5SNick Hibma watchdog_config(void *unused __unused, u_int cmd, int *error) 544370c3cb5SSean Kelly { 5454103b765SPoul-Henning Kamp u_int u; 546370c3cb5SSean Kelly 5474103b765SPoul-Henning Kamp u = cmd & WD_INTERVAL; 5489079fff5SNick Hibma if (u >= WD_TO_1SEC) { 5494103b765SPoul-Henning Kamp watchdog_ticks = (1 << (u - WD_TO_1SEC)) * hz; 5504103b765SPoul-Henning Kamp watchdog_enabled = 1; 5519079fff5SNick Hibma *error = 0; 5524103b765SPoul-Henning Kamp } else { 5534103b765SPoul-Henning Kamp watchdog_enabled = 0; 554370c3cb5SSean Kelly } 5554103b765SPoul-Henning Kamp } 556370c3cb5SSean Kelly 557370c3cb5SSean Kelly /* 558370c3cb5SSean Kelly * Handle a watchdog timeout by dumping interrupt information and 559911d16b8SEd Maste * then either dropping to DDB or panicking. 560370c3cb5SSean Kelly */ 561370c3cb5SSean Kelly static void 562370c3cb5SSean Kelly watchdog_fire(void) 563370c3cb5SSean Kelly { 564370c3cb5SSean Kelly int nintr; 565370c3cb5SSean Kelly u_int64_t inttotal; 566370c3cb5SSean Kelly u_long *curintr; 567370c3cb5SSean Kelly char *curname; 568370c3cb5SSean Kelly 569370c3cb5SSean Kelly curintr = intrcnt; 570370c3cb5SSean Kelly curname = intrnames; 571370c3cb5SSean Kelly inttotal = 0; 572370c3cb5SSean Kelly nintr = eintrcnt - intrcnt; 573370c3cb5SSean Kelly 574370c3cb5SSean Kelly printf("interrupt total\n"); 575370c3cb5SSean Kelly while (--nintr >= 0) { 576370c3cb5SSean Kelly if (*curintr) 577370c3cb5SSean Kelly printf("%-12s %20lu\n", curname, *curintr); 578370c3cb5SSean Kelly curname += strlen(curname) + 1; 579370c3cb5SSean Kelly inttotal += *curintr++; 580370c3cb5SSean Kelly } 5816cda4155SSean Kelly printf("Total %20ju\n", (uintmax_t)inttotal); 582911d16b8SEd Maste 583911d16b8SEd Maste #if defined(KDB) && !defined(KDB_UNATTENDED) 584911d16b8SEd Maste kdb_backtrace(); 585911d16b8SEd Maste kdb_enter("watchdog timeout"); 586911d16b8SEd Maste #else 587370c3cb5SSean Kelly panic("watchdog timeout"); 588911d16b8SEd Maste #endif 589370c3cb5SSean Kelly } 590370c3cb5SSean Kelly 5914103b765SPoul-Henning Kamp #endif /* SW_WATCHDOG */ 592