1 /*- 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/dkstat.h> 44 #include <sys/callout.h> 45 #include <sys/kernel.h> 46 #include <sys/proc.h> 47 #include <sys/resourcevar.h> 48 49 #include <machine/cpu.h> 50 51 #ifdef GPROF 52 #include <sys/gmon.h> 53 #endif 54 55 /* 56 * Clock handling routines. 57 * 58 * This code is written to operate with two timers that run independently of 59 * each other. The main clock, running hz times per second, is used to keep 60 * track of real time. The second timer handles kernel and user profiling, 61 * and does resource use estimation. If the second timer is programmable, 62 * it is randomized to avoid aliasing between the two clocks. For example, 63 * the randomization prevents an adversary from always giving up the cpu 64 * just before its quantum expires. Otherwise, it would never accumulate 65 * cpu ticks. The mean frequency of the second timer is stathz. 66 * 67 * If no second timer exists, stathz will be zero; in this case we drive 68 * profiling and statistics off the main clock. This WILL NOT be accurate; 69 * do not do it unless absolutely necessary. 70 * 71 * The statistics clock may (or may not) be run at a higher rate while 72 * profiling. This profile clock runs at profhz. We require that profhz 73 * be an integral multiple of stathz. 74 * 75 * If the statistics clock is running fast, it must be divided by the ratio 76 * profhz/stathz for statistics. (For profiling, every tick counts.) 77 */ 78 79 /* 80 * TODO: 81 * allocate more timeout table slots when table overflows. 82 */ 83 84 /* 85 * Bump a timeval by a small number of usec's. 86 */ 87 #define BUMPTIME(t, usec) { \ 88 register volatile struct timeval *tp = (t); \ 89 register long us; \ 90 \ 91 tp->tv_usec = us = tp->tv_usec + (usec); \ 92 if (us >= 1000000) { \ 93 tp->tv_usec = us - 1000000; \ 94 tp->tv_sec++; \ 95 } \ 96 } 97 98 int stathz; 99 int profhz; 100 int profprocs; 101 int ticks; 102 static int psdiv, pscnt; /* prof => stat divider */ 103 int psratio; /* ratio: prof / stat */ 104 105 volatile struct timeval time; 106 volatile struct timeval mono_time; 107 108 /* 109 * Initialize clock frequencies and start both clocks running. 110 */ 111 void 112 initclocks() 113 { 114 register int i; 115 116 /* 117 * Set divisors to 1 (normal case) and let the machine-specific 118 * code do its bit. 119 */ 120 psdiv = pscnt = 1; 121 cpu_initclocks(); 122 123 /* 124 * Compute profhz/stathz, and fix profhz if needed. 125 */ 126 i = stathz ? stathz : hz; 127 if (profhz == 0) 128 profhz = i; 129 psratio = profhz / i; 130 } 131 132 /* 133 * The real-time timer, interrupting hz times per second. 134 */ 135 void 136 hardclock(frame) 137 register struct clockframe *frame; 138 { 139 register struct callout *p1; 140 register struct proc *p; 141 register int delta, needsoft; 142 extern int tickdelta; 143 extern long timedelta; 144 145 /* 146 * Update real-time timeout queue. 147 * At front of queue are some number of events which are ``due''. 148 * The time to these is <= 0 and if negative represents the 149 * number of ticks which have passed since it was supposed to happen. 150 * The rest of the q elements (times > 0) are events yet to happen, 151 * where the time for each is given as a delta from the previous. 152 * Decrementing just the first of these serves to decrement the time 153 * to all events. 154 */ 155 needsoft = 0; 156 for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) { 157 if (--p1->c_time > 0) 158 break; 159 needsoft = 1; 160 if (p1->c_time == 0) 161 break; 162 } 163 164 p = curproc; 165 if (p) { 166 register struct pstats *pstats; 167 168 /* 169 * Run current process's virtual and profile time, as needed. 170 */ 171 pstats = p->p_stats; 172 if (CLKF_USERMODE(frame) && 173 timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 174 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 175 psignal(p, SIGVTALRM); 176 if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && 177 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 178 psignal(p, SIGPROF); 179 } 180 181 /* 182 * If no separate statistics clock is available, run it from here. 183 */ 184 if (stathz == 0) 185 statclock(frame); 186 187 /* 188 * Increment the time-of-day. The increment is just ``tick'' unless 189 * we are still adjusting the clock; see adjtime(). 190 */ 191 ticks++; 192 if (timedelta == 0) 193 delta = tick; 194 else { 195 delta = tick + tickdelta; 196 timedelta -= tickdelta; 197 } 198 BUMPTIME(&time, delta); 199 BUMPTIME(&mono_time, delta); 200 201 /* 202 * Process callouts at a very low cpu priority, so we don't keep the 203 * relatively high clock interrupt priority any longer than necessary. 204 */ 205 if (needsoft) { 206 if (CLKF_BASEPRI(frame)) { 207 /* 208 * Save the overhead of a software interrupt; 209 * it will happen as soon as we return, so do it now. 210 */ 211 (void)splsoftclock(); 212 softclock(); 213 } else 214 setsoftclock(); 215 } 216 } 217 218 /* 219 * Software (low priority) clock interrupt. 220 * Run periodic events from timeout queue. 221 */ 222 /*ARGSUSED*/ 223 void 224 softclock() 225 { 226 register struct callout *c; 227 register void *arg; 228 register void (*func) __P((void *)); 229 register int s; 230 231 s = splhigh(); 232 while ((c = calltodo.c_next) != NULL && c->c_time <= 0) { 233 func = c->c_func; 234 arg = c->c_arg; 235 calltodo.c_next = c->c_next; 236 c->c_next = callfree; 237 callfree = c; 238 splx(s); 239 (*func)(arg); 240 (void) splhigh(); 241 } 242 splx(s); 243 } 244 245 /* 246 * timeout -- 247 * Execute a function after a specified length of time. 248 * 249 * untimeout -- 250 * Cancel previous timeout function call. 251 * 252 * See AT&T BCI Driver Reference Manual for specification. This 253 * implementation differs from that one in that no identification 254 * value is returned from timeout, rather, the original arguments 255 * to timeout are used to identify entries for untimeout. 256 */ 257 void 258 timeout(ftn, arg, ticks) 259 void (*ftn) __P((void *)); 260 void *arg; 261 register int ticks; 262 { 263 register struct callout *new, *p, *t; 264 register int s; 265 266 if (ticks <= 0) 267 ticks = 1; 268 269 /* Lock out the clock. */ 270 s = splhigh(); 271 272 /* Fill in the next free callout structure. */ 273 if (callfree == NULL) 274 panic("timeout table full"); 275 new = callfree; 276 callfree = new->c_next; 277 new->c_arg = arg; 278 new->c_func = ftn; 279 280 /* 281 * The time for each event is stored as a difference from the time 282 * of the previous event on the queue. Walk the queue, correcting 283 * the ticks argument for queue entries passed. Correct the ticks 284 * value for the queue entry immediately after the insertion point 285 * as well. Watch out for negative c_time values; these represent 286 * overdue events. 287 */ 288 for (p = &calltodo; 289 (t = p->c_next) != NULL && ticks > t->c_time; p = t) 290 if (t->c_time > 0) 291 ticks -= t->c_time; 292 new->c_time = ticks; 293 if (t != NULL) 294 t->c_time -= ticks; 295 296 /* Insert the new entry into the queue. */ 297 p->c_next = new; 298 new->c_next = t; 299 splx(s); 300 } 301 302 void 303 untimeout(ftn, arg) 304 void (*ftn) __P((void *)); 305 void *arg; 306 { 307 register struct callout *p, *t; 308 register int s; 309 310 s = splhigh(); 311 for (p = &calltodo; (t = p->c_next) != NULL; p = t) 312 if (t->c_func == ftn && t->c_arg == arg) { 313 /* Increment next entry's tick count. */ 314 if (t->c_next && t->c_time > 0) 315 t->c_next->c_time += t->c_time; 316 317 /* Move entry from callout queue to callfree queue. */ 318 p->c_next = t->c_next; 319 t->c_next = callfree; 320 callfree = t; 321 break; 322 } 323 splx(s); 324 } 325 326 /* 327 * Compute number of hz until specified time. Used to 328 * compute third argument to timeout() from an absolute time. 329 */ 330 int 331 hzto(tv) 332 struct timeval *tv; 333 { 334 register long ticks, sec; 335 int s; 336 337 /* 338 * If number of milliseconds will fit in 32 bit arithmetic, 339 * then compute number of milliseconds to time and scale to 340 * ticks. Otherwise just compute number of hz in time, rounding 341 * times greater than representible to maximum value. 342 * 343 * Delta times less than 25 days can be computed ``exactly''. 344 * Maximum value for any timeout in 10ms ticks is 250 days. 345 */ 346 s = splhigh(); 347 sec = tv->tv_sec - time.tv_sec; 348 if (sec <= 0x7fffffff / 1000 - 1000) 349 ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 350 (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 351 else if (sec <= 0x7fffffff / hz) 352 ticks = sec * hz; 353 else 354 ticks = 0x7fffffff; 355 splx(s); 356 return (ticks); 357 } 358 359 /* 360 * Start profiling on a process. 361 * 362 * Kernel profiling passes proc0 which never exits and hence 363 * keeps the profile clock running constantly. 364 */ 365 void 366 startprofclock(p) 367 register struct proc *p; 368 { 369 int s; 370 371 if ((p->p_flag & P_PROFIL) == 0) { 372 p->p_flag |= P_PROFIL; 373 if (++profprocs == 1 && stathz != 0) { 374 s = splstatclock(); 375 psdiv = pscnt = psratio; 376 setstatclockrate(profhz); 377 splx(s); 378 } 379 } 380 } 381 382 /* 383 * Stop profiling on a process. 384 */ 385 void 386 stopprofclock(p) 387 register struct proc *p; 388 { 389 int s; 390 391 if (p->p_flag & P_PROFIL) { 392 p->p_flag &= ~P_PROFIL; 393 if (--profprocs == 0 && stathz != 0) { 394 s = splstatclock(); 395 psdiv = pscnt = 1; 396 setstatclockrate(stathz); 397 splx(s); 398 } 399 } 400 } 401 402 int dk_ndrive = DK_NDRIVE; 403 404 /* 405 * Statistics clock. Grab profile sample, and if divider reaches 0, 406 * do process and kernel statistics. 407 */ 408 void 409 statclock(frame) 410 register struct clockframe *frame; 411 { 412 #ifdef GPROF 413 register struct gmonparam *g; 414 #endif 415 register struct proc *p; 416 register int i; 417 418 if (CLKF_USERMODE(frame)) { 419 p = curproc; 420 if (p->p_flag & P_PROFIL) 421 addupc_intr(p, CLKF_PC(frame), 1); 422 if (--pscnt > 0) 423 return; 424 /* 425 * Came from user mode; CPU was in user state. 426 * If this process is being profiled record the tick. 427 */ 428 p->p_uticks++; 429 if (p->p_nice > NZERO) 430 cp_time[CP_NICE]++; 431 else 432 cp_time[CP_USER]++; 433 } else { 434 #ifdef GPROF 435 /* 436 * Kernel statistics are just like addupc_intr, only easier. 437 */ 438 g = &_gmonparam; 439 if (g->state == GMON_PROF_ON) { 440 i = CLKF_PC(frame) - g->lowpc; 441 if (i < g->textsize) { 442 i /= HISTFRACTION * sizeof(*g->kcount); 443 g->kcount[i]++; 444 } 445 } 446 #endif 447 if (--pscnt > 0) 448 return; 449 /* 450 * Came from kernel mode, so we were: 451 * - handling an interrupt, 452 * - doing syscall or trap work on behalf of the current 453 * user process, or 454 * - spinning in the idle loop. 455 * Whichever it is, charge the time as appropriate. 456 * Note that we charge interrupts to the current process, 457 * regardless of whether they are ``for'' that process, 458 * so that we know how much of its real time was spent 459 * in ``non-process'' (i.e., interrupt) work. 460 */ 461 p = curproc; 462 if (CLKF_INTR(frame)) { 463 if (p != NULL) 464 p->p_iticks++; 465 cp_time[CP_INTR]++; 466 } else if (p != NULL) { 467 p->p_sticks++; 468 cp_time[CP_SYS]++; 469 } else 470 cp_time[CP_IDLE]++; 471 } 472 pscnt = psdiv; 473 474 /* 475 * We maintain statistics shown by user-level statistics 476 * programs: the amount of time in each cpu state, and 477 * the amount of time each of DK_NDRIVE ``drives'' is busy. 478 * 479 * XXX should either run linked list of drives, or (better) 480 * grab timestamps in the start & done code. 481 */ 482 for (i = 0; i < DK_NDRIVE; i++) 483 if (dk_busy & (1 << i)) 484 dk_time[i]++; 485 486 /* 487 * We adjust the priority of the current process. The priority of 488 * a process gets worse as it accumulates CPU time. The cpu usage 489 * estimator (p_estcpu) is increased here. The formula for computing 490 * priorities (in kern_synch.c) will compute a different value each 491 * time p_estcpu increases by 4. The cpu usage estimator ramps up 492 * quite quickly when the process is running (linearly), and decays 493 * away exponentially, at a rate which is proportionally slower when 494 * the system is busy. The basic principal is that the system will 495 * 90% forget that the process used a lot of CPU time in 5 * loadav 496 * seconds. This causes the system to favor processes which haven't 497 * run much recently, and to round-robin among other processes. 498 */ 499 if (p != NULL) { 500 p->p_cpticks++; 501 if (++p->p_estcpu == 0) 502 p->p_estcpu--; 503 if ((p->p_estcpu & 3) == 0) { 504 resetpriority(p); 505 if (p->p_priority >= PUSER) 506 p->p_priority = p->p_usrpri; 507 } 508 } 509 } 510 511 /* 512 * Return information about system clocks. 513 */ 514 int 515 sysctl_clockrate(where, sizep) 516 register char *where; 517 size_t *sizep; 518 { 519 struct clockinfo clkinfo; 520 521 /* 522 * Construct clockinfo structure. 523 */ 524 clkinfo.hz = hz; 525 clkinfo.tick = tick; 526 clkinfo.profhz = profhz; 527 clkinfo.stathz = stathz ? stathz : hz; 528 return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo))); 529 } 530