1 /*- 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 39 * $FreeBSD$ 40 */ 41 42 #include "opt_ntp.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/dkstat.h> 47 #include <sys/callout.h> 48 #include <sys/kernel.h> 49 #include <sys/proc.h> 50 #include <sys/resourcevar.h> 51 #include <sys/signalvar.h> 52 #include <sys/timetc.h> 53 #include <sys/timepps.h> 54 #include <vm/vm.h> 55 #include <sys/lock.h> 56 #include <vm/pmap.h> 57 #include <vm/vm_map.h> 58 #include <sys/sysctl.h> 59 60 #include <machine/cpu.h> 61 #include <machine/limits.h> 62 #include <machine/smp.h> 63 64 #ifdef GPROF 65 #include <sys/gmon.h> 66 #endif 67 68 69 static void initclocks __P((void *dummy)); 70 SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL) 71 72 /* Some of these don't belong here, but it's easiest to concentrate them. */ 73 #if defined(SMP) && defined(BETTER_CLOCK) 74 long cp_time[CPUSTATES]; 75 #else 76 static long cp_time[CPUSTATES]; 77 #endif 78 79 long tk_cancc; 80 long tk_nin; 81 long tk_nout; 82 long tk_rawcc; 83 84 /* 85 * Clock handling routines. 86 * 87 * This code is written to operate with two timers that run independently of 88 * each other. 89 * 90 * The main timer, running hz times per second, is used to trigger interval 91 * timers, timeouts and rescheduling as needed. 92 * 93 * The second timer handles kernel and user profiling, 94 * and does resource use estimation. If the second timer is programmable, 95 * it is randomized to avoid aliasing between the two clocks. For example, 96 * the randomization prevents an adversary from always giving up the cpu 97 * just before its quantum expires. Otherwise, it would never accumulate 98 * cpu ticks. The mean frequency of the second timer is stathz. 99 * 100 * If no second timer exists, stathz will be zero; in this case we drive 101 * profiling and statistics off the main clock. This WILL NOT be accurate; 102 * do not do it unless absolutely necessary. 103 * 104 * The statistics clock may (or may not) be run at a higher rate while 105 * profiling. This profile clock runs at profhz. We require that profhz 106 * be an integral multiple of stathz. 107 * 108 * If the statistics clock is running fast, it must be divided by the ratio 109 * profhz/stathz for statistics. (For profiling, every tick counts.) 110 * 111 * Time-of-day is maintained using a "timecounter", which may or may 112 * not be related to the hardware generating the above mentioned 113 * interrupts. 114 */ 115 116 int stathz; 117 int profhz; 118 static int profprocs; 119 int ticks; 120 static int psdiv, pscnt; /* prof => stat divider */ 121 int psratio; /* ratio: prof / stat */ 122 123 /* 124 * Initialize clock frequencies and start both clocks running. 125 */ 126 /* ARGSUSED*/ 127 static void 128 initclocks(dummy) 129 void *dummy; 130 { 131 register int i; 132 133 /* 134 * Set divisors to 1 (normal case) and let the machine-specific 135 * code do its bit. 136 */ 137 psdiv = pscnt = 1; 138 cpu_initclocks(); 139 140 /* 141 * Compute profhz/stathz, and fix profhz if needed. 142 */ 143 i = stathz ? stathz : hz; 144 if (profhz == 0) 145 profhz = i; 146 psratio = profhz / i; 147 } 148 149 /* 150 * The real-time timer, interrupting hz times per second. 151 */ 152 void 153 hardclock(frame) 154 register struct clockframe *frame; 155 { 156 register struct proc *p; 157 158 p = curproc; 159 if (p) { 160 register struct pstats *pstats; 161 162 /* 163 * Run current process's virtual and profile time, as needed. 164 */ 165 pstats = p->p_stats; 166 if (CLKF_USERMODE(frame) && 167 timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 168 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 169 psignal(p, SIGVTALRM); 170 if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) && 171 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 172 psignal(p, SIGPROF); 173 } 174 175 #if defined(SMP) && defined(BETTER_CLOCK) 176 forward_hardclock(pscnt); 177 #endif 178 179 /* 180 * If no separate statistics clock is available, run it from here. 181 */ 182 if (stathz == 0) 183 statclock(frame); 184 185 tc_windup(); 186 ticks++; 187 188 /* 189 * Process callouts at a very low cpu priority, so we don't keep the 190 * relatively high clock interrupt priority any longer than necessary. 191 */ 192 if (TAILQ_FIRST(&callwheel[ticks & callwheelmask]) != NULL) { 193 if (CLKF_BASEPRI(frame)) { 194 /* 195 * Save the overhead of a software interrupt; 196 * it will happen as soon as we return, so do it now. 197 */ 198 (void)splsoftclock(); 199 softclock(); 200 } else 201 setsoftclock(); 202 } else if (softticks + 1 == ticks) 203 ++softticks; 204 } 205 206 /* 207 * Compute number of ticks in the specified amount of time. 208 */ 209 int 210 tvtohz(tv) 211 struct timeval *tv; 212 { 213 register unsigned long ticks; 214 register long sec, usec; 215 216 /* 217 * If the number of usecs in the whole seconds part of the time 218 * difference fits in a long, then the total number of usecs will 219 * fit in an unsigned long. Compute the total and convert it to 220 * ticks, rounding up and adding 1 to allow for the current tick 221 * to expire. Rounding also depends on unsigned long arithmetic 222 * to avoid overflow. 223 * 224 * Otherwise, if the number of ticks in the whole seconds part of 225 * the time difference fits in a long, then convert the parts to 226 * ticks separately and add, using similar rounding methods and 227 * overflow avoidance. This method would work in the previous 228 * case but it is slightly slower and assumes that hz is integral. 229 * 230 * Otherwise, round the time difference down to the maximum 231 * representable value. 232 * 233 * If ints have 32 bits, then the maximum value for any timeout in 234 * 10ms ticks is 248 days. 235 */ 236 sec = tv->tv_sec; 237 usec = tv->tv_usec; 238 if (usec < 0) { 239 sec--; 240 usec += 1000000; 241 } 242 if (sec < 0) { 243 #ifdef DIAGNOSTIC 244 if (usec > 0) { 245 sec++; 246 usec -= 1000000; 247 } 248 printf("tvotohz: negative time difference %ld sec %ld usec\n", 249 sec, usec); 250 #endif 251 ticks = 1; 252 } else if (sec <= LONG_MAX / 1000000) 253 ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1)) 254 / tick + 1; 255 else if (sec <= LONG_MAX / hz) 256 ticks = sec * hz 257 + ((unsigned long)usec + (tick - 1)) / tick + 1; 258 else 259 ticks = LONG_MAX; 260 if (ticks > INT_MAX) 261 ticks = INT_MAX; 262 return ((int)ticks); 263 } 264 265 /* 266 * Start profiling on a process. 267 * 268 * Kernel profiling passes proc0 which never exits and hence 269 * keeps the profile clock running constantly. 270 */ 271 void 272 startprofclock(p) 273 register struct proc *p; 274 { 275 int s; 276 277 if ((p->p_flag & P_PROFIL) == 0) { 278 p->p_flag |= P_PROFIL; 279 if (++profprocs == 1 && stathz != 0) { 280 s = splstatclock(); 281 psdiv = pscnt = psratio; 282 setstatclockrate(profhz); 283 splx(s); 284 } 285 } 286 } 287 288 /* 289 * Stop profiling on a process. 290 */ 291 void 292 stopprofclock(p) 293 register struct proc *p; 294 { 295 int s; 296 297 if (p->p_flag & P_PROFIL) { 298 p->p_flag &= ~P_PROFIL; 299 if (--profprocs == 0 && stathz != 0) { 300 s = splstatclock(); 301 psdiv = pscnt = 1; 302 setstatclockrate(stathz); 303 splx(s); 304 } 305 } 306 } 307 308 /* 309 * Statistics clock. Grab profile sample, and if divider reaches 0, 310 * do process and kernel statistics. Most of the statistics are only 311 * used by user-level statistics programs. The main exceptions are 312 * p->p_uticks, p->p_sticks, p->p_iticks, and p->p_estcpu. 313 */ 314 void 315 statclock(frame) 316 register struct clockframe *frame; 317 { 318 #ifdef GPROF 319 register struct gmonparam *g; 320 int i; 321 #endif 322 register struct proc *p; 323 struct pstats *pstats; 324 long rss; 325 struct rusage *ru; 326 struct vmspace *vm; 327 328 if (curproc != NULL && CLKF_USERMODE(frame)) { 329 /* 330 * Came from user mode; CPU was in user state. 331 * If this process is being profiled, record the tick. 332 */ 333 p = curproc; 334 if (p->p_flag & P_PROFIL) 335 addupc_intr(p, CLKF_PC(frame), 1); 336 #if defined(SMP) && defined(BETTER_CLOCK) 337 if (stathz != 0) 338 forward_statclock(pscnt); 339 #endif 340 if (--pscnt > 0) 341 return; 342 /* 343 * Charge the time as appropriate. 344 */ 345 p->p_uticks++; 346 if (p->p_nice > NZERO) 347 cp_time[CP_NICE]++; 348 else 349 cp_time[CP_USER]++; 350 } else { 351 #ifdef GPROF 352 /* 353 * Kernel statistics are just like addupc_intr, only easier. 354 */ 355 g = &_gmonparam; 356 if (g->state == GMON_PROF_ON) { 357 i = CLKF_PC(frame) - g->lowpc; 358 if (i < g->textsize) { 359 i /= HISTFRACTION * sizeof(*g->kcount); 360 g->kcount[i]++; 361 } 362 } 363 #endif 364 #if defined(SMP) && defined(BETTER_CLOCK) 365 if (stathz != 0) 366 forward_statclock(pscnt); 367 #endif 368 if (--pscnt > 0) 369 return; 370 /* 371 * Came from kernel mode, so we were: 372 * - handling an interrupt, 373 * - doing syscall or trap work on behalf of the current 374 * user process, or 375 * - spinning in the idle loop. 376 * Whichever it is, charge the time as appropriate. 377 * Note that we charge interrupts to the current process, 378 * regardless of whether they are ``for'' that process, 379 * so that we know how much of its real time was spent 380 * in ``non-process'' (i.e., interrupt) work. 381 */ 382 p = curproc; 383 if (CLKF_INTR(frame)) { 384 if (p != NULL) 385 p->p_iticks++; 386 cp_time[CP_INTR]++; 387 } else if (p != NULL) { 388 p->p_sticks++; 389 cp_time[CP_SYS]++; 390 } else 391 cp_time[CP_IDLE]++; 392 } 393 pscnt = psdiv; 394 395 if (p != NULL) { 396 schedclock(p); 397 398 /* Update resource usage integrals and maximums. */ 399 if ((pstats = p->p_stats) != NULL && 400 (ru = &pstats->p_ru) != NULL && 401 (vm = p->p_vmspace) != NULL) { 402 ru->ru_ixrss += pgtok(vm->vm_tsize); 403 ru->ru_idrss += pgtok(vm->vm_dsize); 404 ru->ru_isrss += pgtok(vm->vm_ssize); 405 rss = pgtok(vmspace_resident_count(vm)); 406 if (ru->ru_maxrss < rss) 407 ru->ru_maxrss = rss; 408 } 409 } 410 } 411 412 /* 413 * Return information about system clocks. 414 */ 415 static int 416 sysctl_kern_clockrate SYSCTL_HANDLER_ARGS 417 { 418 struct clockinfo clkinfo; 419 /* 420 * Construct clockinfo structure. 421 */ 422 clkinfo.hz = hz; 423 clkinfo.tick = tick; 424 clkinfo.tickadj = tickadj; 425 clkinfo.profhz = profhz; 426 clkinfo.stathz = stathz ? stathz : hz; 427 return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req)); 428 } 429 430 SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD, 431 0, 0, sysctl_kern_clockrate, "S,clockinfo",""); 432