1 /*- 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 39 * $Id: kern_clock.c,v 1.3 1994/08/02 07:41:54 davidg Exp $ 40 */ 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/dkstat.h> 45 #include <sys/callout.h> 46 #include <sys/kernel.h> 47 #include <sys/proc.h> 48 #include <sys/resourcevar.h> 49 50 #include <machine/cpu.h> 51 52 #ifdef GPROF 53 #include <sys/gmon.h> 54 #endif 55 56 /* Does anybody else really care about these? */ 57 struct callout *callfree, *callout, calltodo; 58 int ncallout; 59 60 /* Some of these don't belong here, but it's easiest to concentrate them. */ 61 long cp_time[CPUSTATES]; 62 long dk_seek[DK_NDRIVE]; 63 long dk_time[DK_NDRIVE]; 64 long dk_wds[DK_NDRIVE]; 65 long dk_wpms[DK_NDRIVE]; 66 long dk_xfer[DK_NDRIVE]; 67 68 int dk_busy; 69 int dk_ndrive = DK_NDRIVE; 70 71 long tk_cancc; 72 long tk_nin; 73 long tk_nout; 74 long tk_rawcc; 75 76 /* 77 * Clock handling routines. 78 * 79 * This code is written to operate with two timers that run independently of 80 * each other. The main clock, running hz times per second, is used to keep 81 * track of real time. The second timer handles kernel and user profiling, 82 * and does resource use estimation. If the second timer is programmable, 83 * it is randomized to avoid aliasing between the two clocks. For example, 84 * the randomization prevents an adversary from always giving up the cpu 85 * just before its quantum expires. Otherwise, it would never accumulate 86 * cpu ticks. The mean frequency of the second timer is stathz. 87 * 88 * If no second timer exists, stathz will be zero; in this case we drive 89 * profiling and statistics off the main clock. This WILL NOT be accurate; 90 * do not do it unless absolutely necessary. 91 * 92 * The statistics clock may (or may not) be run at a higher rate while 93 * profiling. This profile clock runs at profhz. We require that profhz 94 * be an integral multiple of stathz. 95 * 96 * If the statistics clock is running fast, it must be divided by the ratio 97 * profhz/stathz for statistics. (For profiling, every tick counts.) 98 */ 99 100 /* 101 * TODO: 102 * allocate more timeout table slots when table overflows. 103 */ 104 105 /* 106 * Bump a timeval by a small number of usec's. 107 */ 108 #define BUMPTIME(t, usec) { \ 109 register volatile struct timeval *tp = (t); \ 110 register long us; \ 111 \ 112 tp->tv_usec = us = tp->tv_usec + (usec); \ 113 if (us >= 1000000) { \ 114 tp->tv_usec = us - 1000000; \ 115 tp->tv_sec++; \ 116 } \ 117 } 118 119 int stathz; 120 int profhz; 121 int profprocs; 122 int ticks; 123 static int psdiv, pscnt; /* prof => stat divider */ 124 int psratio; /* ratio: prof / stat */ 125 126 volatile struct timeval time; 127 volatile struct timeval mono_time; 128 129 /* 130 * Initialize clock frequencies and start both clocks running. 131 */ 132 void 133 initclocks() 134 { 135 register int i; 136 137 /* 138 * Set divisors to 1 (normal case) and let the machine-specific 139 * code do its bit. 140 */ 141 psdiv = pscnt = 1; 142 cpu_initclocks(); 143 144 /* 145 * Compute profhz/stathz, and fix profhz if needed. 146 */ 147 i = stathz ? stathz : hz; 148 if (profhz == 0) 149 profhz = i; 150 psratio = profhz / i; 151 } 152 153 /* 154 * The real-time timer, interrupting hz times per second. 155 */ 156 void 157 hardclock(frame) 158 register struct clockframe *frame; 159 { 160 register struct callout *p1; 161 register struct proc *p; 162 register int delta, needsoft; 163 extern int tickdelta; 164 extern long timedelta; 165 166 /* 167 * Update real-time timeout queue. 168 * At front of queue are some number of events which are ``due''. 169 * The time to these is <= 0 and if negative represents the 170 * number of ticks which have passed since it was supposed to happen. 171 * The rest of the q elements (times > 0) are events yet to happen, 172 * where the time for each is given as a delta from the previous. 173 * Decrementing just the first of these serves to decrement the time 174 * to all events. 175 */ 176 needsoft = 0; 177 for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) { 178 if (--p1->c_time > 0) 179 break; 180 needsoft = 1; 181 if (p1->c_time == 0) 182 break; 183 } 184 185 p = curproc; 186 if (p) { 187 register struct pstats *pstats; 188 189 /* 190 * Run current process's virtual and profile time, as needed. 191 */ 192 pstats = p->p_stats; 193 if (CLKF_USERMODE(frame) && 194 timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 195 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 196 psignal(p, SIGVTALRM); 197 if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && 198 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 199 psignal(p, SIGPROF); 200 } 201 202 /* 203 * If no separate statistics clock is available, run it from here. 204 */ 205 if (stathz == 0) 206 statclock(frame); 207 208 /* 209 * Increment the time-of-day. The increment is just ``tick'' unless 210 * we are still adjusting the clock; see adjtime(). 211 */ 212 ticks++; 213 if (timedelta == 0) 214 delta = tick; 215 else { 216 delta = tick + tickdelta; 217 timedelta -= tickdelta; 218 } 219 BUMPTIME(&time, delta); 220 BUMPTIME(&mono_time, delta); 221 222 /* 223 * Process callouts at a very low cpu priority, so we don't keep the 224 * relatively high clock interrupt priority any longer than necessary. 225 */ 226 if (needsoft) { 227 if (CLKF_BASEPRI(frame)) { 228 /* 229 * Save the overhead of a software interrupt; 230 * it will happen as soon as we return, so do it now. 231 */ 232 (void)splsoftclock(); 233 softclock(); 234 } else 235 setsoftclock(); 236 } 237 } 238 239 /* 240 * Software (low priority) clock interrupt. 241 * Run periodic events from timeout queue. 242 */ 243 /*ARGSUSED*/ 244 void 245 softclock() 246 { 247 register struct callout *c; 248 register void *arg; 249 register void (*func) __P((void *)); 250 register int s; 251 252 s = splhigh(); 253 while ((c = calltodo.c_next) != NULL && c->c_time <= 0) { 254 func = c->c_func; 255 arg = c->c_arg; 256 calltodo.c_next = c->c_next; 257 c->c_next = callfree; 258 callfree = c; 259 splx(s); 260 (*func)(arg); 261 (void) splhigh(); 262 } 263 splx(s); 264 } 265 266 /* 267 * timeout -- 268 * Execute a function after a specified length of time. 269 * 270 * untimeout -- 271 * Cancel previous timeout function call. 272 * 273 * See AT&T BCI Driver Reference Manual for specification. This 274 * implementation differs from that one in that no identification 275 * value is returned from timeout, rather, the original arguments 276 * to timeout are used to identify entries for untimeout. 277 */ 278 void 279 timeout(ftn, arg, ticks) 280 timeout_t ftn; 281 void *arg; 282 register int ticks; 283 { 284 register struct callout *new, *p, *t; 285 register int s; 286 287 if (ticks <= 0) 288 ticks = 1; 289 290 /* Lock out the clock. */ 291 s = splhigh(); 292 293 /* Fill in the next free callout structure. */ 294 if (callfree == NULL) 295 panic("timeout table full"); 296 new = callfree; 297 callfree = new->c_next; 298 new->c_arg = arg; 299 new->c_func = ftn; 300 301 /* 302 * The time for each event is stored as a difference from the time 303 * of the previous event on the queue. Walk the queue, correcting 304 * the ticks argument for queue entries passed. Correct the ticks 305 * value for the queue entry immediately after the insertion point 306 * as well. Watch out for negative c_time values; these represent 307 * overdue events. 308 */ 309 for (p = &calltodo; 310 (t = p->c_next) != NULL && ticks > t->c_time; p = t) 311 if (t->c_time > 0) 312 ticks -= t->c_time; 313 new->c_time = ticks; 314 if (t != NULL) 315 t->c_time -= ticks; 316 317 /* Insert the new entry into the queue. */ 318 p->c_next = new; 319 new->c_next = t; 320 splx(s); 321 } 322 323 void 324 untimeout(ftn, arg) 325 timeout_t ftn; 326 void *arg; 327 { 328 register struct callout *p, *t; 329 register int s; 330 331 s = splhigh(); 332 for (p = &calltodo; (t = p->c_next) != NULL; p = t) 333 if (t->c_func == ftn && t->c_arg == arg) { 334 /* Increment next entry's tick count. */ 335 if (t->c_next && t->c_time > 0) 336 t->c_next->c_time += t->c_time; 337 338 /* Move entry from callout queue to callfree queue. */ 339 p->c_next = t->c_next; 340 t->c_next = callfree; 341 callfree = t; 342 break; 343 } 344 splx(s); 345 } 346 347 /* 348 * Compute number of hz until specified time. Used to 349 * compute third argument to timeout() from an absolute time. 350 */ 351 int 352 hzto(tv) 353 struct timeval *tv; 354 { 355 register long ticks, sec; 356 int s; 357 358 /* 359 * If number of milliseconds will fit in 32 bit arithmetic, 360 * then compute number of milliseconds to time and scale to 361 * ticks. Otherwise just compute number of hz in time, rounding 362 * times greater than representible to maximum value. 363 * 364 * Delta times less than 25 days can be computed ``exactly''. 365 * Maximum value for any timeout in 10ms ticks is 250 days. 366 */ 367 s = splhigh(); 368 sec = tv->tv_sec - time.tv_sec; 369 if (sec <= 0x7fffffff / 1000 - 1000) 370 ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 371 (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 372 else if (sec <= 0x7fffffff / hz) 373 ticks = sec * hz; 374 else 375 ticks = 0x7fffffff; 376 splx(s); 377 return (ticks); 378 } 379 380 /* 381 * Start profiling on a process. 382 * 383 * Kernel profiling passes proc0 which never exits and hence 384 * keeps the profile clock running constantly. 385 */ 386 void 387 startprofclock(p) 388 register struct proc *p; 389 { 390 int s; 391 392 if ((p->p_flag & P_PROFIL) == 0) { 393 p->p_flag |= P_PROFIL; 394 if (++profprocs == 1 && stathz != 0) { 395 s = splstatclock(); 396 psdiv = pscnt = psratio; 397 setstatclockrate(profhz); 398 splx(s); 399 } 400 } 401 } 402 403 /* 404 * Stop profiling on a process. 405 */ 406 void 407 stopprofclock(p) 408 register struct proc *p; 409 { 410 int s; 411 412 if (p->p_flag & P_PROFIL) { 413 p->p_flag &= ~P_PROFIL; 414 if (--profprocs == 0 && stathz != 0) { 415 s = splstatclock(); 416 psdiv = pscnt = 1; 417 setstatclockrate(stathz); 418 splx(s); 419 } 420 } 421 } 422 423 /* 424 * Statistics clock. Grab profile sample, and if divider reaches 0, 425 * do process and kernel statistics. 426 */ 427 void 428 statclock(frame) 429 register struct clockframe *frame; 430 { 431 #ifdef GPROF 432 register struct gmonparam *g; 433 #endif 434 register struct proc *p; 435 register int i; 436 437 if (CLKF_USERMODE(frame)) { 438 p = curproc; 439 if (p->p_flag & P_PROFIL) 440 addupc_intr(p, CLKF_PC(frame), 1); 441 if (--pscnt > 0) 442 return; 443 /* 444 * Came from user mode; CPU was in user state. 445 * If this process is being profiled record the tick. 446 */ 447 p->p_uticks++; 448 if (p->p_nice > NZERO) 449 cp_time[CP_NICE]++; 450 else 451 cp_time[CP_USER]++; 452 } else { 453 #ifdef GPROF 454 /* 455 * Kernel statistics are just like addupc_intr, only easier. 456 */ 457 g = &_gmonparam; 458 if (g->state == GMON_PROF_ON) { 459 i = CLKF_PC(frame) - g->lowpc; 460 if (i < g->textsize) { 461 i /= HISTFRACTION * sizeof(*g->kcount); 462 g->kcount[i]++; 463 } 464 } 465 #endif 466 if (--pscnt > 0) 467 return; 468 /* 469 * Came from kernel mode, so we were: 470 * - handling an interrupt, 471 * - doing syscall or trap work on behalf of the current 472 * user process, or 473 * - spinning in the idle loop. 474 * Whichever it is, charge the time as appropriate. 475 * Note that we charge interrupts to the current process, 476 * regardless of whether they are ``for'' that process, 477 * so that we know how much of its real time was spent 478 * in ``non-process'' (i.e., interrupt) work. 479 */ 480 p = curproc; 481 if (CLKF_INTR(frame)) { 482 if (p != NULL) 483 p->p_iticks++; 484 cp_time[CP_INTR]++; 485 } else if (p != NULL) { 486 p->p_sticks++; 487 cp_time[CP_SYS]++; 488 } else 489 cp_time[CP_IDLE]++; 490 } 491 pscnt = psdiv; 492 493 /* 494 * We maintain statistics shown by user-level statistics 495 * programs: the amount of time in each cpu state, and 496 * the amount of time each of DK_NDRIVE ``drives'' is busy. 497 * 498 * XXX should either run linked list of drives, or (better) 499 * grab timestamps in the start & done code. 500 */ 501 for (i = 0; i < DK_NDRIVE; i++) 502 if (dk_busy & (1 << i)) 503 dk_time[i]++; 504 505 /* 506 * We adjust the priority of the current process. The priority of 507 * a process gets worse as it accumulates CPU time. The cpu usage 508 * estimator (p_estcpu) is increased here. The formula for computing 509 * priorities (in kern_synch.c) will compute a different value each 510 * time p_estcpu increases by 4. The cpu usage estimator ramps up 511 * quite quickly when the process is running (linearly), and decays 512 * away exponentially, at a rate which is proportionally slower when 513 * the system is busy. The basic principal is that the system will 514 * 90% forget that the process used a lot of CPU time in 5 * loadav 515 * seconds. This causes the system to favor processes which haven't 516 * run much recently, and to round-robin among other processes. 517 */ 518 if (p != NULL) { 519 p->p_cpticks++; 520 if (++p->p_estcpu == 0) 521 p->p_estcpu--; 522 if ((p->p_estcpu & 3) == 0) { 523 resetpriority(p); 524 if (p->p_priority >= PUSER) 525 p->p_priority = p->p_usrpri; 526 } 527 } 528 } 529 530 /* 531 * Return information about system clocks. 532 */ 533 int 534 sysctl_clockrate(where, sizep) 535 register char *where; 536 size_t *sizep; 537 { 538 struct clockinfo clkinfo; 539 540 /* 541 * Construct clockinfo structure. 542 */ 543 clkinfo.hz = hz; 544 clkinfo.tick = tick; 545 clkinfo.profhz = profhz; 546 clkinfo.stathz = stathz ? stathz : hz; 547 return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo))); 548 } 549