1 /*- 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_kdb.h" 41 #include "opt_device_polling.h" 42 #include "opt_hwpmc_hooks.h" 43 #include "opt_kdtrace.h" 44 #include "opt_ntp.h" 45 #include "opt_watchdog.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/callout.h> 50 #include <sys/kdb.h> 51 #include <sys/kernel.h> 52 #include <sys/kthread.h> 53 #include <sys/ktr.h> 54 #include <sys/lock.h> 55 #include <sys/mutex.h> 56 #include <sys/proc.h> 57 #include <sys/resource.h> 58 #include <sys/resourcevar.h> 59 #include <sys/sched.h> 60 #include <sys/sdt.h> 61 #include <sys/signalvar.h> 62 #include <sys/sleepqueue.h> 63 #include <sys/smp.h> 64 #include <vm/vm.h> 65 #include <vm/pmap.h> 66 #include <vm/vm_map.h> 67 #include <sys/sysctl.h> 68 #include <sys/bus.h> 69 #include <sys/interrupt.h> 70 #include <sys/limits.h> 71 #include <sys/timetc.h> 72 73 #ifdef GPROF 74 #include <sys/gmon.h> 75 #endif 76 77 #ifdef HWPMC_HOOKS 78 #include <sys/pmckern.h> 79 PMC_SOFT_DEFINE( , , clock, hard); 80 PMC_SOFT_DEFINE( , , clock, stat); 81 #endif 82 83 #ifdef DEVICE_POLLING 84 extern void hardclock_device_poll(void); 85 #endif /* DEVICE_POLLING */ 86 87 static void initclocks(void *dummy); 88 SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL); 89 90 /* Spin-lock protecting profiling statistics. */ 91 static struct mtx time_lock; 92 93 SDT_PROVIDER_DECLARE(sched); 94 SDT_PROBE_DEFINE2(sched, , , tick, tick, "struct thread *", "struct proc *"); 95 96 static int 97 sysctl_kern_cp_time(SYSCTL_HANDLER_ARGS) 98 { 99 int error; 100 long cp_time[CPUSTATES]; 101 #ifdef SCTL_MASK32 102 int i; 103 unsigned int cp_time32[CPUSTATES]; 104 #endif 105 106 read_cpu_time(cp_time); 107 #ifdef SCTL_MASK32 108 if (req->flags & SCTL_MASK32) { 109 if (!req->oldptr) 110 return SYSCTL_OUT(req, 0, sizeof(cp_time32)); 111 for (i = 0; i < CPUSTATES; i++) 112 cp_time32[i] = (unsigned int)cp_time[i]; 113 error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32)); 114 } else 115 #endif 116 { 117 if (!req->oldptr) 118 return SYSCTL_OUT(req, 0, sizeof(cp_time)); 119 error = SYSCTL_OUT(req, cp_time, sizeof(cp_time)); 120 } 121 return error; 122 } 123 124 SYSCTL_PROC(_kern, OID_AUTO, cp_time, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE, 125 0,0, sysctl_kern_cp_time, "LU", "CPU time statistics"); 126 127 static long empty[CPUSTATES]; 128 129 static int 130 sysctl_kern_cp_times(SYSCTL_HANDLER_ARGS) 131 { 132 struct pcpu *pcpu; 133 int error; 134 int c; 135 long *cp_time; 136 #ifdef SCTL_MASK32 137 unsigned int cp_time32[CPUSTATES]; 138 int i; 139 #endif 140 141 if (!req->oldptr) { 142 #ifdef SCTL_MASK32 143 if (req->flags & SCTL_MASK32) 144 return SYSCTL_OUT(req, 0, sizeof(cp_time32) * (mp_maxid + 1)); 145 else 146 #endif 147 return SYSCTL_OUT(req, 0, sizeof(long) * CPUSTATES * (mp_maxid + 1)); 148 } 149 for (error = 0, c = 0; error == 0 && c <= mp_maxid; c++) { 150 if (!CPU_ABSENT(c)) { 151 pcpu = pcpu_find(c); 152 cp_time = pcpu->pc_cp_time; 153 } else { 154 cp_time = empty; 155 } 156 #ifdef SCTL_MASK32 157 if (req->flags & SCTL_MASK32) { 158 for (i = 0; i < CPUSTATES; i++) 159 cp_time32[i] = (unsigned int)cp_time[i]; 160 error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32)); 161 } else 162 #endif 163 error = SYSCTL_OUT(req, cp_time, sizeof(long) * CPUSTATES); 164 } 165 return error; 166 } 167 168 SYSCTL_PROC(_kern, OID_AUTO, cp_times, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE, 169 0,0, sysctl_kern_cp_times, "LU", "per-CPU time statistics"); 170 171 #ifdef DEADLKRES 172 static const char *blessed[] = { 173 "getblk", 174 "so_snd_sx", 175 "so_rcv_sx", 176 NULL 177 }; 178 static int slptime_threshold = 1800; 179 static int blktime_threshold = 900; 180 static int sleepfreq = 3; 181 182 static void 183 deadlkres(void) 184 { 185 struct proc *p; 186 struct thread *td; 187 void *wchan; 188 int blkticks, i, slpticks, slptype, tryl, tticks; 189 190 tryl = 0; 191 for (;;) { 192 blkticks = blktime_threshold * hz; 193 slpticks = slptime_threshold * hz; 194 195 /* 196 * Avoid to sleep on the sx_lock in order to avoid a possible 197 * priority inversion problem leading to starvation. 198 * If the lock can't be held after 100 tries, panic. 199 */ 200 if (!sx_try_slock(&allproc_lock)) { 201 if (tryl > 100) 202 panic("%s: possible deadlock detected on allproc_lock\n", 203 __func__); 204 tryl++; 205 pause("allproc", sleepfreq * hz); 206 continue; 207 } 208 tryl = 0; 209 FOREACH_PROC_IN_SYSTEM(p) { 210 PROC_LOCK(p); 211 if (p->p_state == PRS_NEW) { 212 PROC_UNLOCK(p); 213 continue; 214 } 215 FOREACH_THREAD_IN_PROC(p, td) { 216 217 /* 218 * Once a thread is found in "interesting" 219 * state a possible ticks wrap-up needs to be 220 * checked. 221 */ 222 thread_lock(td); 223 if (TD_ON_LOCK(td) && ticks < td->td_blktick) { 224 225 /* 226 * The thread should be blocked on a 227 * turnstile, simply check if the 228 * turnstile channel is in good state. 229 */ 230 MPASS(td->td_blocked != NULL); 231 232 tticks = ticks - td->td_blktick; 233 thread_unlock(td); 234 if (tticks > blkticks) { 235 236 /* 237 * Accordingly with provided 238 * thresholds, this thread is 239 * stuck for too long on a 240 * turnstile. 241 */ 242 PROC_UNLOCK(p); 243 sx_sunlock(&allproc_lock); 244 panic("%s: possible deadlock detected for %p, blocked for %d ticks\n", 245 __func__, td, tticks); 246 } 247 } else if (TD_IS_SLEEPING(td) && 248 TD_ON_SLEEPQ(td) && 249 ticks < td->td_blktick) { 250 251 /* 252 * Check if the thread is sleeping on a 253 * lock, otherwise skip the check. 254 * Drop the thread lock in order to 255 * avoid a LOR with the sleepqueue 256 * spinlock. 257 */ 258 wchan = td->td_wchan; 259 tticks = ticks - td->td_slptick; 260 thread_unlock(td); 261 slptype = sleepq_type(wchan); 262 if ((slptype == SLEEPQ_SX || 263 slptype == SLEEPQ_LK) && 264 tticks > slpticks) { 265 266 /* 267 * Accordingly with provided 268 * thresholds, this thread is 269 * stuck for too long on a 270 * sleepqueue. 271 * However, being on a 272 * sleepqueue, we might still 273 * check for the blessed 274 * list. 275 */ 276 tryl = 0; 277 for (i = 0; blessed[i] != NULL; 278 i++) { 279 if (!strcmp(blessed[i], 280 td->td_wmesg)) { 281 tryl = 1; 282 break; 283 } 284 } 285 if (tryl != 0) { 286 tryl = 0; 287 continue; 288 } 289 PROC_UNLOCK(p); 290 sx_sunlock(&allproc_lock); 291 panic("%s: possible deadlock detected for %p, blocked for %d ticks\n", 292 __func__, td, tticks); 293 } 294 } else 295 thread_unlock(td); 296 } 297 PROC_UNLOCK(p); 298 } 299 sx_sunlock(&allproc_lock); 300 301 /* Sleep for sleepfreq seconds. */ 302 pause("-", sleepfreq * hz); 303 } 304 } 305 306 static struct kthread_desc deadlkres_kd = { 307 "deadlkres", 308 deadlkres, 309 (struct thread **)NULL 310 }; 311 312 SYSINIT(deadlkres, SI_SUB_CLOCKS, SI_ORDER_ANY, kthread_start, &deadlkres_kd); 313 314 static SYSCTL_NODE(_debug, OID_AUTO, deadlkres, CTLFLAG_RW, 0, 315 "Deadlock resolver"); 316 SYSCTL_INT(_debug_deadlkres, OID_AUTO, slptime_threshold, CTLFLAG_RW, 317 &slptime_threshold, 0, 318 "Number of seconds within is valid to sleep on a sleepqueue"); 319 SYSCTL_INT(_debug_deadlkres, OID_AUTO, blktime_threshold, CTLFLAG_RW, 320 &blktime_threshold, 0, 321 "Number of seconds within is valid to block on a turnstile"); 322 SYSCTL_INT(_debug_deadlkres, OID_AUTO, sleepfreq, CTLFLAG_RW, &sleepfreq, 0, 323 "Number of seconds between any deadlock resolver thread run"); 324 #endif /* DEADLKRES */ 325 326 void 327 read_cpu_time(long *cp_time) 328 { 329 struct pcpu *pc; 330 int i, j; 331 332 /* Sum up global cp_time[]. */ 333 bzero(cp_time, sizeof(long) * CPUSTATES); 334 CPU_FOREACH(i) { 335 pc = pcpu_find(i); 336 for (j = 0; j < CPUSTATES; j++) 337 cp_time[j] += pc->pc_cp_time[j]; 338 } 339 } 340 341 #ifdef SW_WATCHDOG 342 #include <sys/watchdog.h> 343 344 static int watchdog_ticks; 345 static int watchdog_enabled; 346 static void watchdog_fire(void); 347 static void watchdog_config(void *, u_int, int *); 348 #endif /* SW_WATCHDOG */ 349 350 /* 351 * Clock handling routines. 352 * 353 * This code is written to operate with two timers that run independently of 354 * each other. 355 * 356 * The main timer, running hz times per second, is used to trigger interval 357 * timers, timeouts and rescheduling as needed. 358 * 359 * The second timer handles kernel and user profiling, 360 * and does resource use estimation. If the second timer is programmable, 361 * it is randomized to avoid aliasing between the two clocks. For example, 362 * the randomization prevents an adversary from always giving up the cpu 363 * just before its quantum expires. Otherwise, it would never accumulate 364 * cpu ticks. The mean frequency of the second timer is stathz. 365 * 366 * If no second timer exists, stathz will be zero; in this case we drive 367 * profiling and statistics off the main clock. This WILL NOT be accurate; 368 * do not do it unless absolutely necessary. 369 * 370 * The statistics clock may (or may not) be run at a higher rate while 371 * profiling. This profile clock runs at profhz. We require that profhz 372 * be an integral multiple of stathz. 373 * 374 * If the statistics clock is running fast, it must be divided by the ratio 375 * profhz/stathz for statistics. (For profiling, every tick counts.) 376 * 377 * Time-of-day is maintained using a "timecounter", which may or may 378 * not be related to the hardware generating the above mentioned 379 * interrupts. 380 */ 381 382 int stathz; 383 int profhz; 384 int profprocs; 385 int ticks; 386 int psratio; 387 388 static DPCPU_DEFINE(int, pcputicks); /* Per-CPU version of ticks. */ 389 static int global_hardclock_run = 0; 390 391 /* 392 * Initialize clock frequencies and start both clocks running. 393 */ 394 /* ARGSUSED*/ 395 static void 396 initclocks(dummy) 397 void *dummy; 398 { 399 register int i; 400 401 /* 402 * Set divisors to 1 (normal case) and let the machine-specific 403 * code do its bit. 404 */ 405 mtx_init(&time_lock, "time lock", NULL, MTX_DEF); 406 cpu_initclocks(); 407 408 /* 409 * Compute profhz/stathz, and fix profhz if needed. 410 */ 411 i = stathz ? stathz : hz; 412 if (profhz == 0) 413 profhz = i; 414 psratio = profhz / i; 415 #ifdef SW_WATCHDOG 416 EVENTHANDLER_REGISTER(watchdog_list, watchdog_config, NULL, 0); 417 #endif 418 } 419 420 /* 421 * Each time the real-time timer fires, this function is called on all CPUs. 422 * Note that hardclock() calls hardclock_cpu() for the boot CPU, so only 423 * the other CPUs in the system need to call this function. 424 */ 425 void 426 hardclock_cpu(int usermode) 427 { 428 struct pstats *pstats; 429 struct thread *td = curthread; 430 struct proc *p = td->td_proc; 431 int flags; 432 433 /* 434 * Run current process's virtual and profile time, as needed. 435 */ 436 pstats = p->p_stats; 437 flags = 0; 438 if (usermode && 439 timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value)) { 440 PROC_SLOCK(p); 441 if (itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 442 flags |= TDF_ALRMPEND | TDF_ASTPENDING; 443 PROC_SUNLOCK(p); 444 } 445 if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value)) { 446 PROC_SLOCK(p); 447 if (itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 448 flags |= TDF_PROFPEND | TDF_ASTPENDING; 449 PROC_SUNLOCK(p); 450 } 451 thread_lock(td); 452 sched_tick(1); 453 td->td_flags |= flags; 454 thread_unlock(td); 455 456 #ifdef HWPMC_HOOKS 457 if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid))) 458 PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL); 459 if (td->td_intr_frame != NULL) 460 PMC_SOFT_CALL_TF( , , clock, hard, td->td_intr_frame); 461 #endif 462 callout_tick(); 463 } 464 465 /* 466 * The real-time timer, interrupting hz times per second. 467 */ 468 void 469 hardclock(int usermode, uintfptr_t pc) 470 { 471 472 atomic_add_int((volatile int *)&ticks, 1); 473 hardclock_cpu(usermode); 474 tc_ticktock(1); 475 cpu_tick_calibration(); 476 /* 477 * If no separate statistics clock is available, run it from here. 478 * 479 * XXX: this only works for UP 480 */ 481 if (stathz == 0) { 482 profclock(usermode, pc); 483 statclock(usermode); 484 } 485 #ifdef DEVICE_POLLING 486 hardclock_device_poll(); /* this is very short and quick */ 487 #endif /* DEVICE_POLLING */ 488 #ifdef SW_WATCHDOG 489 if (watchdog_enabled > 0 && --watchdog_ticks <= 0) 490 watchdog_fire(); 491 #endif /* SW_WATCHDOG */ 492 } 493 494 void 495 hardclock_cnt(int cnt, int usermode) 496 { 497 struct pstats *pstats; 498 struct thread *td = curthread; 499 struct proc *p = td->td_proc; 500 int *t = DPCPU_PTR(pcputicks); 501 int flags, global, newticks; 502 #ifdef SW_WATCHDOG 503 int i; 504 #endif /* SW_WATCHDOG */ 505 506 /* 507 * Update per-CPU and possibly global ticks values. 508 */ 509 *t += cnt; 510 do { 511 global = ticks; 512 newticks = *t - global; 513 if (newticks <= 0) { 514 if (newticks < -1) 515 *t = global - 1; 516 newticks = 0; 517 break; 518 } 519 } while (!atomic_cmpset_int(&ticks, global, *t)); 520 521 /* 522 * Run current process's virtual and profile time, as needed. 523 */ 524 pstats = p->p_stats; 525 flags = 0; 526 if (usermode && 527 timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value)) { 528 PROC_SLOCK(p); 529 if (itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], 530 tick * cnt) == 0) 531 flags |= TDF_ALRMPEND | TDF_ASTPENDING; 532 PROC_SUNLOCK(p); 533 } 534 if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value)) { 535 PROC_SLOCK(p); 536 if (itimerdecr(&pstats->p_timer[ITIMER_PROF], 537 tick * cnt) == 0) 538 flags |= TDF_PROFPEND | TDF_ASTPENDING; 539 PROC_SUNLOCK(p); 540 } 541 thread_lock(td); 542 sched_tick(cnt); 543 td->td_flags |= flags; 544 thread_unlock(td); 545 546 #ifdef HWPMC_HOOKS 547 if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid))) 548 PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL); 549 if (td->td_intr_frame != NULL) 550 PMC_SOFT_CALL_TF( , , clock, hard, td->td_intr_frame); 551 #endif 552 callout_tick(); 553 /* We are in charge to handle this tick duty. */ 554 if (newticks > 0) { 555 /* Dangerous and no need to call these things concurrently. */ 556 if (atomic_cmpset_acq_int(&global_hardclock_run, 0, 1)) { 557 tc_ticktock(newticks); 558 #ifdef DEVICE_POLLING 559 /* This is very short and quick. */ 560 hardclock_device_poll(); 561 #endif /* DEVICE_POLLING */ 562 atomic_store_rel_int(&global_hardclock_run, 0); 563 } 564 #ifdef SW_WATCHDOG 565 if (watchdog_enabled > 0) { 566 i = atomic_fetchadd_int(&watchdog_ticks, -newticks); 567 if (i > 0 && i <= newticks) 568 watchdog_fire(); 569 } 570 #endif /* SW_WATCHDOG */ 571 } 572 if (curcpu == CPU_FIRST()) 573 cpu_tick_calibration(); 574 } 575 576 void 577 hardclock_sync(int cpu) 578 { 579 int *t = DPCPU_ID_PTR(cpu, pcputicks); 580 581 *t = ticks; 582 } 583 584 /* 585 * Compute number of ticks in the specified amount of time. 586 */ 587 int 588 tvtohz(tv) 589 struct timeval *tv; 590 { 591 register unsigned long ticks; 592 register long sec, usec; 593 594 /* 595 * If the number of usecs in the whole seconds part of the time 596 * difference fits in a long, then the total number of usecs will 597 * fit in an unsigned long. Compute the total and convert it to 598 * ticks, rounding up and adding 1 to allow for the current tick 599 * to expire. Rounding also depends on unsigned long arithmetic 600 * to avoid overflow. 601 * 602 * Otherwise, if the number of ticks in the whole seconds part of 603 * the time difference fits in a long, then convert the parts to 604 * ticks separately and add, using similar rounding methods and 605 * overflow avoidance. This method would work in the previous 606 * case but it is slightly slower and assumes that hz is integral. 607 * 608 * Otherwise, round the time difference down to the maximum 609 * representable value. 610 * 611 * If ints have 32 bits, then the maximum value for any timeout in 612 * 10ms ticks is 248 days. 613 */ 614 sec = tv->tv_sec; 615 usec = tv->tv_usec; 616 if (usec < 0) { 617 sec--; 618 usec += 1000000; 619 } 620 if (sec < 0) { 621 #ifdef DIAGNOSTIC 622 if (usec > 0) { 623 sec++; 624 usec -= 1000000; 625 } 626 printf("tvotohz: negative time difference %ld sec %ld usec\n", 627 sec, usec); 628 #endif 629 ticks = 1; 630 } else if (sec <= LONG_MAX / 1000000) 631 ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1)) 632 / tick + 1; 633 else if (sec <= LONG_MAX / hz) 634 ticks = sec * hz 635 + ((unsigned long)usec + (tick - 1)) / tick + 1; 636 else 637 ticks = LONG_MAX; 638 if (ticks > INT_MAX) 639 ticks = INT_MAX; 640 return ((int)ticks); 641 } 642 643 /* 644 * Start profiling on a process. 645 * 646 * Kernel profiling passes proc0 which never exits and hence 647 * keeps the profile clock running constantly. 648 */ 649 void 650 startprofclock(p) 651 register struct proc *p; 652 { 653 654 PROC_LOCK_ASSERT(p, MA_OWNED); 655 if (p->p_flag & P_STOPPROF) 656 return; 657 if ((p->p_flag & P_PROFIL) == 0) { 658 p->p_flag |= P_PROFIL; 659 mtx_lock(&time_lock); 660 if (++profprocs == 1) 661 cpu_startprofclock(); 662 mtx_unlock(&time_lock); 663 } 664 } 665 666 /* 667 * Stop profiling on a process. 668 */ 669 void 670 stopprofclock(p) 671 register struct proc *p; 672 { 673 674 PROC_LOCK_ASSERT(p, MA_OWNED); 675 if (p->p_flag & P_PROFIL) { 676 if (p->p_profthreads != 0) { 677 p->p_flag |= P_STOPPROF; 678 while (p->p_profthreads != 0) 679 msleep(&p->p_profthreads, &p->p_mtx, PPAUSE, 680 "stopprof", 0); 681 p->p_flag &= ~P_STOPPROF; 682 } 683 if ((p->p_flag & P_PROFIL) == 0) 684 return; 685 p->p_flag &= ~P_PROFIL; 686 mtx_lock(&time_lock); 687 if (--profprocs == 0) 688 cpu_stopprofclock(); 689 mtx_unlock(&time_lock); 690 } 691 } 692 693 /* 694 * Statistics clock. Updates rusage information and calls the scheduler 695 * to adjust priorities of the active thread. 696 * 697 * This should be called by all active processors. 698 */ 699 void 700 statclock(int usermode) 701 { 702 703 statclock_cnt(1, usermode); 704 } 705 706 void 707 statclock_cnt(int cnt, int usermode) 708 { 709 struct rusage *ru; 710 struct vmspace *vm; 711 struct thread *td; 712 struct proc *p; 713 long rss; 714 long *cp_time; 715 716 td = curthread; 717 p = td->td_proc; 718 719 cp_time = (long *)PCPU_PTR(cp_time); 720 if (usermode) { 721 /* 722 * Charge the time as appropriate. 723 */ 724 td->td_uticks += cnt; 725 if (p->p_nice > NZERO) 726 cp_time[CP_NICE] += cnt; 727 else 728 cp_time[CP_USER] += cnt; 729 } else { 730 /* 731 * Came from kernel mode, so we were: 732 * - handling an interrupt, 733 * - doing syscall or trap work on behalf of the current 734 * user process, or 735 * - spinning in the idle loop. 736 * Whichever it is, charge the time as appropriate. 737 * Note that we charge interrupts to the current process, 738 * regardless of whether they are ``for'' that process, 739 * so that we know how much of its real time was spent 740 * in ``non-process'' (i.e., interrupt) work. 741 */ 742 if ((td->td_pflags & TDP_ITHREAD) || 743 td->td_intr_nesting_level >= 2) { 744 td->td_iticks += cnt; 745 cp_time[CP_INTR] += cnt; 746 } else { 747 td->td_pticks += cnt; 748 td->td_sticks += cnt; 749 if (!TD_IS_IDLETHREAD(td)) 750 cp_time[CP_SYS] += cnt; 751 else 752 cp_time[CP_IDLE] += cnt; 753 } 754 } 755 756 /* Update resource usage integrals and maximums. */ 757 MPASS(p->p_vmspace != NULL); 758 vm = p->p_vmspace; 759 ru = &td->td_ru; 760 ru->ru_ixrss += pgtok(vm->vm_tsize) * cnt; 761 ru->ru_idrss += pgtok(vm->vm_dsize) * cnt; 762 ru->ru_isrss += pgtok(vm->vm_ssize) * cnt; 763 rss = pgtok(vmspace_resident_count(vm)); 764 if (ru->ru_maxrss < rss) 765 ru->ru_maxrss = rss; 766 KTR_POINT2(KTR_SCHED, "thread", sched_tdname(td), "statclock", 767 "prio:%d", td->td_priority, "stathz:%d", (stathz)?stathz:hz); 768 SDT_PROBE2(sched, , , tick, td, td->td_proc); 769 thread_lock_flags(td, MTX_QUIET); 770 for ( ; cnt > 0; cnt--) 771 sched_clock(td); 772 thread_unlock(td); 773 #ifdef HWPMC_HOOKS 774 if (td->td_intr_frame != NULL) 775 PMC_SOFT_CALL_TF( , , clock, stat, td->td_intr_frame); 776 #endif 777 } 778 779 void 780 profclock(int usermode, uintfptr_t pc) 781 { 782 783 profclock_cnt(1, usermode, pc); 784 } 785 786 void 787 profclock_cnt(int cnt, int usermode, uintfptr_t pc) 788 { 789 struct thread *td; 790 #ifdef GPROF 791 struct gmonparam *g; 792 uintfptr_t i; 793 #endif 794 795 td = curthread; 796 if (usermode) { 797 /* 798 * Came from user mode; CPU was in user state. 799 * If this process is being profiled, record the tick. 800 * if there is no related user location yet, don't 801 * bother trying to count it. 802 */ 803 if (td->td_proc->p_flag & P_PROFIL) 804 addupc_intr(td, pc, cnt); 805 } 806 #ifdef GPROF 807 else { 808 /* 809 * Kernel statistics are just like addupc_intr, only easier. 810 */ 811 g = &_gmonparam; 812 if (g->state == GMON_PROF_ON && pc >= g->lowpc) { 813 i = PC_TO_I(g, pc); 814 if (i < g->textsize) { 815 KCOUNT(g, i) += cnt; 816 } 817 } 818 } 819 #endif 820 } 821 822 /* 823 * Return information about system clocks. 824 */ 825 static int 826 sysctl_kern_clockrate(SYSCTL_HANDLER_ARGS) 827 { 828 struct clockinfo clkinfo; 829 /* 830 * Construct clockinfo structure. 831 */ 832 bzero(&clkinfo, sizeof(clkinfo)); 833 clkinfo.hz = hz; 834 clkinfo.tick = tick; 835 clkinfo.profhz = profhz; 836 clkinfo.stathz = stathz ? stathz : hz; 837 return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req)); 838 } 839 840 SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, 841 CTLTYPE_STRUCT|CTLFLAG_RD|CTLFLAG_MPSAFE, 842 0, 0, sysctl_kern_clockrate, "S,clockinfo", 843 "Rate and period of various kernel clocks"); 844 845 #ifdef SW_WATCHDOG 846 847 static void 848 watchdog_config(void *unused __unused, u_int cmd, int *error) 849 { 850 u_int u; 851 852 u = cmd & WD_INTERVAL; 853 if (u >= WD_TO_1SEC) { 854 watchdog_ticks = (1 << (u - WD_TO_1SEC)) * hz; 855 watchdog_enabled = 1; 856 *error = 0; 857 } else { 858 watchdog_enabled = 0; 859 } 860 } 861 862 /* 863 * Handle a watchdog timeout by dumping interrupt information and 864 * then either dropping to DDB or panicking. 865 */ 866 static void 867 watchdog_fire(void) 868 { 869 int nintr; 870 uint64_t inttotal; 871 u_long *curintr; 872 char *curname; 873 874 curintr = intrcnt; 875 curname = intrnames; 876 inttotal = 0; 877 nintr = sintrcnt / sizeof(u_long); 878 879 printf("interrupt total\n"); 880 while (--nintr >= 0) { 881 if (*curintr) 882 printf("%-12s %20lu\n", curname, *curintr); 883 curname += strlen(curname) + 1; 884 inttotal += *curintr++; 885 } 886 printf("Total %20ju\n", (uintmax_t)inttotal); 887 888 #if defined(KDB) && !defined(KDB_UNATTENDED) 889 kdb_backtrace(); 890 kdb_enter(KDB_WHY_WATCHDOG, "watchdog timeout"); 891 #else 892 panic("watchdog timeout"); 893 #endif 894 } 895 896 #endif /* SW_WATCHDOG */ 897