1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1982, 1986, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include "opt_kdb.h" 43 #include "opt_device_polling.h" 44 #include "opt_hwpmc_hooks.h" 45 #include "opt_ntp.h" 46 #include "opt_watchdog.h" 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/callout.h> 51 #include <sys/epoch.h> 52 #include <sys/eventhandler.h> 53 #include <sys/gtaskqueue.h> 54 #include <sys/kdb.h> 55 #include <sys/kernel.h> 56 #include <sys/kthread.h> 57 #include <sys/ktr.h> 58 #include <sys/lock.h> 59 #include <sys/mutex.h> 60 #include <sys/proc.h> 61 #include <sys/resource.h> 62 #include <sys/resourcevar.h> 63 #include <sys/sched.h> 64 #include <sys/sdt.h> 65 #include <sys/signalvar.h> 66 #include <sys/sleepqueue.h> 67 #include <sys/smp.h> 68 #include <vm/vm.h> 69 #include <vm/pmap.h> 70 #include <vm/vm_map.h> 71 #include <sys/sysctl.h> 72 #include <sys/bus.h> 73 #include <sys/interrupt.h> 74 #include <sys/limits.h> 75 #include <sys/timetc.h> 76 77 #ifdef HWPMC_HOOKS 78 #include <sys/pmckern.h> 79 PMC_SOFT_DEFINE( , , clock, hard); 80 PMC_SOFT_DEFINE( , , clock, stat); 81 PMC_SOFT_DEFINE_EX( , , clock, prof, \ 82 cpu_startprofclock, cpu_stopprofclock); 83 #endif 84 85 #ifdef DEVICE_POLLING 86 extern void hardclock_device_poll(void); 87 #endif /* DEVICE_POLLING */ 88 89 /* Spin-lock protecting profiling statistics. */ 90 static struct mtx time_lock; 91 92 SDT_PROVIDER_DECLARE(sched); 93 SDT_PROBE_DEFINE2(sched, , , tick, "struct thread *", "struct proc *"); 94 95 static int 96 sysctl_kern_cp_time(SYSCTL_HANDLER_ARGS) 97 { 98 int error; 99 long cp_time[CPUSTATES]; 100 #ifdef SCTL_MASK32 101 int i; 102 unsigned int cp_time32[CPUSTATES]; 103 #endif 104 105 read_cpu_time(cp_time); 106 #ifdef SCTL_MASK32 107 if (req->flags & SCTL_MASK32) { 108 if (!req->oldptr) 109 return SYSCTL_OUT(req, 0, sizeof(cp_time32)); 110 for (i = 0; i < CPUSTATES; i++) 111 cp_time32[i] = (unsigned int)cp_time[i]; 112 error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32)); 113 } else 114 #endif 115 { 116 if (!req->oldptr) 117 return SYSCTL_OUT(req, 0, sizeof(cp_time)); 118 error = SYSCTL_OUT(req, cp_time, sizeof(cp_time)); 119 } 120 return error; 121 } 122 123 SYSCTL_PROC(_kern, OID_AUTO, cp_time, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE, 124 0,0, sysctl_kern_cp_time, "LU", "CPU time statistics"); 125 126 static long empty[CPUSTATES]; 127 128 static int 129 sysctl_kern_cp_times(SYSCTL_HANDLER_ARGS) 130 { 131 struct pcpu *pcpu; 132 int error; 133 int c; 134 long *cp_time; 135 #ifdef SCTL_MASK32 136 unsigned int cp_time32[CPUSTATES]; 137 int i; 138 #endif 139 140 if (!req->oldptr) { 141 #ifdef SCTL_MASK32 142 if (req->flags & SCTL_MASK32) 143 return SYSCTL_OUT(req, 0, sizeof(cp_time32) * (mp_maxid + 1)); 144 else 145 #endif 146 return SYSCTL_OUT(req, 0, sizeof(long) * CPUSTATES * (mp_maxid + 1)); 147 } 148 for (error = 0, c = 0; error == 0 && c <= mp_maxid; c++) { 149 if (!CPU_ABSENT(c)) { 150 pcpu = pcpu_find(c); 151 cp_time = pcpu->pc_cp_time; 152 } else { 153 cp_time = empty; 154 } 155 #ifdef SCTL_MASK32 156 if (req->flags & SCTL_MASK32) { 157 for (i = 0; i < CPUSTATES; i++) 158 cp_time32[i] = (unsigned int)cp_time[i]; 159 error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32)); 160 } else 161 #endif 162 error = SYSCTL_OUT(req, cp_time, sizeof(long) * CPUSTATES); 163 } 164 return error; 165 } 166 167 SYSCTL_PROC(_kern, OID_AUTO, cp_times, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE, 168 0,0, sysctl_kern_cp_times, "LU", "per-CPU time statistics"); 169 170 #ifdef DEADLKRES 171 static const char *blessed[] = { 172 "getblk", 173 "so_snd_sx", 174 "so_rcv_sx", 175 NULL 176 }; 177 static int slptime_threshold = 1800; 178 static int blktime_threshold = 900; 179 static int sleepfreq = 3; 180 181 static void 182 deadlres_td_on_lock(struct proc *p, struct thread *td, int blkticks) 183 { 184 int tticks; 185 186 sx_assert(&allproc_lock, SX_LOCKED); 187 PROC_LOCK_ASSERT(p, MA_OWNED); 188 THREAD_LOCK_ASSERT(td, MA_OWNED); 189 /* 190 * The thread should be blocked on a turnstile, simply check 191 * if the turnstile channel is in good state. 192 */ 193 MPASS(td->td_blocked != NULL); 194 195 tticks = ticks - td->td_blktick; 196 if (tticks > blkticks) 197 /* 198 * Accordingly with provided thresholds, this thread is stuck 199 * for too long on a turnstile. 200 */ 201 panic("%s: possible deadlock detected for %p (%s), " 202 "blocked for %d ticks\n", __func__, 203 td, sched_tdname(td), tticks); 204 } 205 206 static void 207 deadlres_td_sleep_q(struct proc *p, struct thread *td, int slpticks) 208 { 209 const void *wchan; 210 int i, slptype, tticks; 211 212 sx_assert(&allproc_lock, SX_LOCKED); 213 PROC_LOCK_ASSERT(p, MA_OWNED); 214 THREAD_LOCK_ASSERT(td, MA_OWNED); 215 /* 216 * Check if the thread is sleeping on a lock, otherwise skip the check. 217 * Drop the thread lock in order to avoid a LOR with the sleepqueue 218 * spinlock. 219 */ 220 wchan = td->td_wchan; 221 tticks = ticks - td->td_slptick; 222 slptype = sleepq_type(wchan); 223 if ((slptype == SLEEPQ_SX || slptype == SLEEPQ_LK) && 224 tticks > slpticks) { 225 /* 226 * Accordingly with provided thresholds, this thread is stuck 227 * for too long on a sleepqueue. 228 * However, being on a sleepqueue, we might still check for the 229 * blessed list. 230 */ 231 for (i = 0; blessed[i] != NULL; i++) 232 if (!strcmp(blessed[i], td->td_wmesg)) 233 return; 234 235 panic("%s: possible deadlock detected for %p (%s), " 236 "blocked for %d ticks\n", __func__, 237 td, sched_tdname(td), tticks); 238 } 239 } 240 241 static void 242 deadlkres(void) 243 { 244 struct proc *p; 245 struct thread *td; 246 int blkticks, slpticks, tryl; 247 248 tryl = 0; 249 for (;;) { 250 blkticks = blktime_threshold * hz; 251 slpticks = slptime_threshold * hz; 252 253 /* 254 * Avoid to sleep on the sx_lock in order to avoid a 255 * possible priority inversion problem leading to 256 * starvation. 257 * If the lock can't be held after 100 tries, panic. 258 */ 259 if (!sx_try_slock(&allproc_lock)) { 260 if (tryl > 100) 261 panic("%s: possible deadlock detected " 262 "on allproc_lock\n", __func__); 263 tryl++; 264 pause("allproc", sleepfreq * hz); 265 continue; 266 } 267 tryl = 0; 268 FOREACH_PROC_IN_SYSTEM(p) { 269 PROC_LOCK(p); 270 if (p->p_state == PRS_NEW) { 271 PROC_UNLOCK(p); 272 continue; 273 } 274 FOREACH_THREAD_IN_PROC(p, td) { 275 thread_lock(td); 276 if (TD_ON_LOCK(td)) 277 deadlres_td_on_lock(p, td, 278 blkticks); 279 else if (TD_IS_SLEEPING(td)) 280 deadlres_td_sleep_q(p, td, 281 slpticks); 282 thread_unlock(td); 283 } 284 PROC_UNLOCK(p); 285 } 286 sx_sunlock(&allproc_lock); 287 288 /* Sleep for sleepfreq seconds. */ 289 pause("-", sleepfreq * hz); 290 } 291 } 292 293 static struct kthread_desc deadlkres_kd = { 294 "deadlkres", 295 deadlkres, 296 (struct thread **)NULL 297 }; 298 299 SYSINIT(deadlkres, SI_SUB_CLOCKS, SI_ORDER_ANY, kthread_start, &deadlkres_kd); 300 301 static SYSCTL_NODE(_debug, OID_AUTO, deadlkres, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 302 "Deadlock resolver"); 303 SYSCTL_INT(_debug_deadlkres, OID_AUTO, slptime_threshold, CTLFLAG_RW, 304 &slptime_threshold, 0, 305 "Number of seconds within is valid to sleep on a sleepqueue"); 306 SYSCTL_INT(_debug_deadlkres, OID_AUTO, blktime_threshold, CTLFLAG_RW, 307 &blktime_threshold, 0, 308 "Number of seconds within is valid to block on a turnstile"); 309 SYSCTL_INT(_debug_deadlkres, OID_AUTO, sleepfreq, CTLFLAG_RW, &sleepfreq, 0, 310 "Number of seconds between any deadlock resolver thread run"); 311 #endif /* DEADLKRES */ 312 313 void 314 read_cpu_time(long *cp_time) 315 { 316 struct pcpu *pc; 317 int i, j; 318 319 /* Sum up global cp_time[]. */ 320 bzero(cp_time, sizeof(long) * CPUSTATES); 321 CPU_FOREACH(i) { 322 pc = pcpu_find(i); 323 for (j = 0; j < CPUSTATES; j++) 324 cp_time[j] += pc->pc_cp_time[j]; 325 } 326 } 327 328 #include <sys/watchdog.h> 329 330 static int watchdog_ticks; 331 static int watchdog_enabled; 332 static void watchdog_fire(void); 333 static void watchdog_config(void *, u_int, int *); 334 335 static void 336 watchdog_attach(void) 337 { 338 EVENTHANDLER_REGISTER(watchdog_list, watchdog_config, NULL, 0); 339 } 340 341 /* 342 * Clock handling routines. 343 * 344 * This code is written to operate with two timers that run independently of 345 * each other. 346 * 347 * The main timer, running hz times per second, is used to trigger interval 348 * timers, timeouts and rescheduling as needed. 349 * 350 * The second timer handles kernel and user profiling, 351 * and does resource use estimation. If the second timer is programmable, 352 * it is randomized to avoid aliasing between the two clocks. For example, 353 * the randomization prevents an adversary from always giving up the cpu 354 * just before its quantum expires. Otherwise, it would never accumulate 355 * cpu ticks. The mean frequency of the second timer is stathz. 356 * 357 * If no second timer exists, stathz will be zero; in this case we drive 358 * profiling and statistics off the main clock. This WILL NOT be accurate; 359 * do not do it unless absolutely necessary. 360 * 361 * The statistics clock may (or may not) be run at a higher rate while 362 * profiling. This profile clock runs at profhz. We require that profhz 363 * be an integral multiple of stathz. 364 * 365 * If the statistics clock is running fast, it must be divided by the ratio 366 * profhz/stathz for statistics. (For profiling, every tick counts.) 367 * 368 * Time-of-day is maintained using a "timecounter", which may or may 369 * not be related to the hardware generating the above mentioned 370 * interrupts. 371 */ 372 373 int stathz; 374 int profhz; 375 int profprocs; 376 volatile int ticks; 377 int psratio; 378 379 DPCPU_DEFINE_STATIC(int, pcputicks); /* Per-CPU version of ticks. */ 380 #ifdef DEVICE_POLLING 381 static int devpoll_run = 0; 382 #endif 383 384 static void 385 ast_oweupc(struct thread *td, int tda __unused) 386 { 387 if ((td->td_proc->p_flag & P_PROFIL) == 0) 388 return; 389 addupc_task(td, td->td_profil_addr, td->td_profil_ticks); 390 td->td_profil_ticks = 0; 391 td->td_pflags &= ~TDP_OWEUPC; 392 } 393 394 static void 395 ast_alrm(struct thread *td, int tda __unused) 396 { 397 struct proc *p; 398 399 p = td->td_proc; 400 PROC_LOCK(p); 401 kern_psignal(p, SIGVTALRM); 402 PROC_UNLOCK(p); 403 } 404 405 static void 406 ast_prof(struct thread *td, int tda __unused) 407 { 408 struct proc *p; 409 410 p = td->td_proc; 411 PROC_LOCK(p); 412 kern_psignal(p, SIGPROF); 413 PROC_UNLOCK(p); 414 } 415 416 /* 417 * Initialize clock frequencies and start both clocks running. 418 */ 419 static void 420 initclocks(void *dummy __unused) 421 { 422 int i; 423 424 /* 425 * Set divisors to 1 (normal case) and let the machine-specific 426 * code do its bit. 427 */ 428 mtx_init(&time_lock, "time lock", NULL, MTX_DEF); 429 cpu_initclocks(); 430 431 /* 432 * Compute profhz/stathz, and fix profhz if needed. 433 */ 434 i = stathz ? stathz : hz; 435 if (profhz == 0) 436 profhz = i; 437 psratio = profhz / i; 438 439 ast_register(TDA_OWEUPC, ASTR_ASTF_REQUIRED, 0, ast_oweupc); 440 ast_register(TDA_ALRM, ASTR_ASTF_REQUIRED, 0, ast_alrm); 441 ast_register(TDA_PROF, ASTR_ASTF_REQUIRED, 0, ast_prof); 442 443 #ifdef SW_WATCHDOG 444 /* Enable hardclock watchdog now, even if a hardware watchdog exists. */ 445 watchdog_attach(); 446 #else 447 /* Volunteer to run a software watchdog. */ 448 if (wdog_software_attach == NULL) 449 wdog_software_attach = watchdog_attach; 450 #endif 451 } 452 SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL); 453 454 static __noinline void 455 hardclock_itimer(struct thread *td, struct pstats *pstats, int cnt, int usermode) 456 { 457 struct proc *p; 458 int ast; 459 460 ast = 0; 461 p = td->td_proc; 462 if (usermode && 463 timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value)) { 464 PROC_ITIMLOCK(p); 465 if (itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], 466 tick * cnt) == 0) 467 ast |= TDAI(TDA_ALRM); 468 PROC_ITIMUNLOCK(p); 469 } 470 if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value)) { 471 PROC_ITIMLOCK(p); 472 if (itimerdecr(&pstats->p_timer[ITIMER_PROF], 473 tick * cnt) == 0) 474 ast |= TDAI(TDA_PROF); 475 PROC_ITIMUNLOCK(p); 476 } 477 if (ast != 0) 478 ast_sched_mask(td, ast); 479 } 480 481 void 482 hardclock(int cnt, int usermode) 483 { 484 struct pstats *pstats; 485 struct thread *td = curthread; 486 struct proc *p = td->td_proc; 487 int *t = DPCPU_PTR(pcputicks); 488 int global, i, newticks; 489 490 /* 491 * Update per-CPU and possibly global ticks values. 492 */ 493 *t += cnt; 494 global = ticks; 495 do { 496 newticks = *t - global; 497 if (newticks <= 0) { 498 if (newticks < -1) 499 *t = global - 1; 500 newticks = 0; 501 break; 502 } 503 } while (!atomic_fcmpset_int(&ticks, &global, *t)); 504 505 /* 506 * Run current process's virtual and profile time, as needed. 507 */ 508 pstats = p->p_stats; 509 if (__predict_false( 510 timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) || 511 timevalisset(&pstats->p_timer[ITIMER_PROF].it_value))) 512 hardclock_itimer(td, pstats, cnt, usermode); 513 514 #ifdef HWPMC_HOOKS 515 if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid))) 516 PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL); 517 if (td->td_intr_frame != NULL) 518 PMC_SOFT_CALL_TF( , , clock, hard, td->td_intr_frame); 519 #endif 520 /* We are in charge to handle this tick duty. */ 521 if (newticks > 0) { 522 tc_ticktock(newticks); 523 #ifdef DEVICE_POLLING 524 /* Dangerous and no need to call these things concurrently. */ 525 if (atomic_cmpset_acq_int(&devpoll_run, 0, 1)) { 526 /* This is very short and quick. */ 527 hardclock_device_poll(); 528 atomic_store_rel_int(&devpoll_run, 0); 529 } 530 #endif /* DEVICE_POLLING */ 531 if (watchdog_enabled > 0) { 532 i = atomic_fetchadd_int(&watchdog_ticks, -newticks); 533 if (i > 0 && i <= newticks) 534 watchdog_fire(); 535 } 536 intr_event_handle(clk_intr_event, NULL); 537 } 538 if (curcpu == CPU_FIRST()) 539 cpu_tick_calibration(); 540 if (__predict_false(DPCPU_GET(epoch_cb_count))) 541 GROUPTASK_ENQUEUE(DPCPU_PTR(epoch_cb_task)); 542 } 543 544 void 545 hardclock_sync(int cpu) 546 { 547 int *t; 548 KASSERT(!CPU_ABSENT(cpu), ("Absent CPU %d", cpu)); 549 t = DPCPU_ID_PTR(cpu, pcputicks); 550 551 *t = ticks; 552 } 553 554 /* 555 * Regular integer scaling formula without losing precision: 556 */ 557 #define TIME_INT_SCALE(value, mul, div) \ 558 (((value) / (div)) * (mul) + (((value) % (div)) * (mul)) / (div)) 559 560 /* 561 * Macro for converting seconds and microseconds into actual ticks, 562 * based on the given hz value: 563 */ 564 #define TIME_TO_TICKS(sec, usec, hz) \ 565 ((sec) * (hz) + TIME_INT_SCALE(usec, hz, 1 << 6) / (1000000 >> 6)) 566 567 #define TIME_ASSERT_VALID_HZ(hz) \ 568 _Static_assert(TIME_TO_TICKS(INT_MAX / (hz) - 1, 999999, hz) >= 0 && \ 569 TIME_TO_TICKS(INT_MAX / (hz) - 1, 999999, hz) < INT_MAX, \ 570 "tvtohz() can overflow the regular integer type") 571 572 /* 573 * Compile time assert the maximum and minimum values to fit into a 574 * regular integer when computing TIME_TO_TICKS(): 575 */ 576 TIME_ASSERT_VALID_HZ(HZ_MAXIMUM); 577 TIME_ASSERT_VALID_HZ(HZ_MINIMUM); 578 579 /* 580 * The formula is mostly linear, but test some more common values just 581 * in case: 582 */ 583 TIME_ASSERT_VALID_HZ(1024); 584 TIME_ASSERT_VALID_HZ(1000); 585 TIME_ASSERT_VALID_HZ(128); 586 TIME_ASSERT_VALID_HZ(100); 587 588 /* 589 * Compute number of ticks representing the specified amount of time. 590 * If the specified time is negative, a value of 1 is returned. This 591 * function returns a value from 1 up to and including INT_MAX. 592 */ 593 int 594 tvtohz(struct timeval *tv) 595 { 596 int retval; 597 598 /* 599 * The values passed here may come from user-space and these 600 * checks ensure "tv_usec" is within its allowed range: 601 */ 602 603 /* check for tv_usec underflow */ 604 if (__predict_false(tv->tv_usec < 0)) { 605 tv->tv_sec += tv->tv_usec / 1000000; 606 tv->tv_usec = tv->tv_usec % 1000000; 607 /* convert tv_usec to a positive value */ 608 if (__predict_true(tv->tv_usec < 0)) { 609 tv->tv_usec += 1000000; 610 tv->tv_sec -= 1; 611 } 612 /* check for tv_usec overflow */ 613 } else if (__predict_false(tv->tv_usec >= 1000000)) { 614 tv->tv_sec += tv->tv_usec / 1000000; 615 tv->tv_usec = tv->tv_usec % 1000000; 616 } 617 618 /* check for tv_sec underflow */ 619 if (__predict_false(tv->tv_sec < 0)) 620 return (1); 621 /* check for tv_sec overflow (including room for the tv_usec part) */ 622 else if (__predict_false(tv->tv_sec >= tick_seconds_max)) 623 return (INT_MAX); 624 625 /* cast to "int" to avoid platform differences */ 626 retval = TIME_TO_TICKS((int)tv->tv_sec, (int)tv->tv_usec, hz); 627 628 /* add one additional tick */ 629 return (retval + 1); 630 } 631 632 /* 633 * Start profiling on a process. 634 * 635 * Kernel profiling passes proc0 which never exits and hence 636 * keeps the profile clock running constantly. 637 */ 638 void 639 startprofclock(struct proc *p) 640 { 641 642 PROC_LOCK_ASSERT(p, MA_OWNED); 643 if (p->p_flag & P_STOPPROF) 644 return; 645 if ((p->p_flag & P_PROFIL) == 0) { 646 p->p_flag |= P_PROFIL; 647 mtx_lock(&time_lock); 648 if (++profprocs == 1) 649 cpu_startprofclock(); 650 mtx_unlock(&time_lock); 651 } 652 } 653 654 /* 655 * Stop profiling on a process. 656 */ 657 void 658 stopprofclock(struct proc *p) 659 { 660 661 PROC_LOCK_ASSERT(p, MA_OWNED); 662 if (p->p_flag & P_PROFIL) { 663 if (p->p_profthreads != 0) { 664 while (p->p_profthreads != 0) { 665 p->p_flag |= P_STOPPROF; 666 msleep(&p->p_profthreads, &p->p_mtx, PPAUSE, 667 "stopprof", 0); 668 } 669 } 670 if ((p->p_flag & P_PROFIL) == 0) 671 return; 672 p->p_flag &= ~P_PROFIL; 673 mtx_lock(&time_lock); 674 if (--profprocs == 0) 675 cpu_stopprofclock(); 676 mtx_unlock(&time_lock); 677 } 678 } 679 680 /* 681 * Statistics clock. Updates rusage information and calls the scheduler 682 * to adjust priorities of the active thread. 683 * 684 * This should be called by all active processors. 685 */ 686 void 687 statclock(int cnt, int usermode) 688 { 689 struct rusage *ru; 690 struct vmspace *vm; 691 struct thread *td; 692 struct proc *p; 693 long rss; 694 long *cp_time; 695 uint64_t runtime, new_switchtime; 696 697 td = curthread; 698 p = td->td_proc; 699 700 cp_time = (long *)PCPU_PTR(cp_time); 701 if (usermode) { 702 /* 703 * Charge the time as appropriate. 704 */ 705 td->td_uticks += cnt; 706 if (p->p_nice > NZERO) 707 cp_time[CP_NICE] += cnt; 708 else 709 cp_time[CP_USER] += cnt; 710 } else { 711 /* 712 * Came from kernel mode, so we were: 713 * - handling an interrupt, 714 * - doing syscall or trap work on behalf of the current 715 * user process, or 716 * - spinning in the idle loop. 717 * Whichever it is, charge the time as appropriate. 718 * Note that we charge interrupts to the current process, 719 * regardless of whether they are ``for'' that process, 720 * so that we know how much of its real time was spent 721 * in ``non-process'' (i.e., interrupt) work. 722 */ 723 if ((td->td_pflags & TDP_ITHREAD) || 724 td->td_intr_nesting_level >= 2) { 725 td->td_iticks += cnt; 726 cp_time[CP_INTR] += cnt; 727 } else { 728 td->td_pticks += cnt; 729 td->td_sticks += cnt; 730 if (!TD_IS_IDLETHREAD(td)) 731 cp_time[CP_SYS] += cnt; 732 else 733 cp_time[CP_IDLE] += cnt; 734 } 735 } 736 737 /* Update resource usage integrals and maximums. */ 738 MPASS(p->p_vmspace != NULL); 739 vm = p->p_vmspace; 740 ru = &td->td_ru; 741 ru->ru_ixrss += pgtok(vm->vm_tsize) * cnt; 742 ru->ru_idrss += pgtok(vm->vm_dsize) * cnt; 743 ru->ru_isrss += pgtok(vm->vm_ssize) * cnt; 744 rss = pgtok(vmspace_resident_count(vm)); 745 if (ru->ru_maxrss < rss) 746 ru->ru_maxrss = rss; 747 KTR_POINT2(KTR_SCHED, "thread", sched_tdname(td), "statclock", 748 "prio:%d", td->td_priority, "stathz:%d", (stathz)?stathz:hz); 749 SDT_PROBE2(sched, , , tick, td, td->td_proc); 750 thread_lock_flags(td, MTX_QUIET); 751 752 /* 753 * Compute the amount of time during which the current 754 * thread was running, and add that to its total so far. 755 */ 756 new_switchtime = cpu_ticks(); 757 runtime = new_switchtime - PCPU_GET(switchtime); 758 td->td_runtime += runtime; 759 td->td_incruntime += runtime; 760 PCPU_SET(switchtime, new_switchtime); 761 762 sched_clock(td, cnt); 763 thread_unlock(td); 764 #ifdef HWPMC_HOOKS 765 if (td->td_intr_frame != NULL) 766 PMC_SOFT_CALL_TF( , , clock, stat, td->td_intr_frame); 767 #endif 768 } 769 770 void 771 profclock(int cnt, int usermode, uintfptr_t pc) 772 { 773 struct thread *td; 774 775 td = curthread; 776 if (usermode) { 777 /* 778 * Came from user mode; CPU was in user state. 779 * If this process is being profiled, record the tick. 780 * if there is no related user location yet, don't 781 * bother trying to count it. 782 */ 783 if (td->td_proc->p_flag & P_PROFIL) 784 addupc_intr(td, pc, cnt); 785 } 786 #ifdef HWPMC_HOOKS 787 if (td->td_intr_frame != NULL) 788 PMC_SOFT_CALL_TF( , , clock, prof, td->td_intr_frame); 789 #endif 790 } 791 792 /* 793 * Return information about system clocks. 794 */ 795 static int 796 sysctl_kern_clockrate(SYSCTL_HANDLER_ARGS) 797 { 798 struct clockinfo clkinfo; 799 /* 800 * Construct clockinfo structure. 801 */ 802 bzero(&clkinfo, sizeof(clkinfo)); 803 clkinfo.hz = hz; 804 clkinfo.tick = tick; 805 clkinfo.profhz = profhz; 806 clkinfo.stathz = stathz ? stathz : hz; 807 return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req)); 808 } 809 810 SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, 811 CTLTYPE_STRUCT|CTLFLAG_RD|CTLFLAG_MPSAFE, 812 0, 0, sysctl_kern_clockrate, "S,clockinfo", 813 "Rate and period of various kernel clocks"); 814 815 static void 816 watchdog_config(void *unused __unused, u_int cmd, int *error) 817 { 818 u_int u; 819 820 u = cmd & WD_INTERVAL; 821 if (u >= WD_TO_1SEC) { 822 watchdog_ticks = (1 << (u - WD_TO_1SEC)) * hz; 823 watchdog_enabled = 1; 824 *error = 0; 825 } else { 826 watchdog_enabled = 0; 827 } 828 } 829 830 /* 831 * Handle a watchdog timeout by dumping interrupt information and 832 * then either dropping to DDB or panicking. 833 */ 834 static void 835 watchdog_fire(void) 836 { 837 int nintr; 838 uint64_t inttotal; 839 u_long *curintr; 840 char *curname; 841 842 curintr = intrcnt; 843 curname = intrnames; 844 inttotal = 0; 845 nintr = sintrcnt / sizeof(u_long); 846 847 printf("interrupt total\n"); 848 while (--nintr >= 0) { 849 if (*curintr) 850 printf("%-12s %20lu\n", curname, *curintr); 851 curname += strlen(curname) + 1; 852 inttotal += *curintr++; 853 } 854 printf("Total %20ju\n", (uintmax_t)inttotal); 855 856 #if defined(KDB) && !defined(KDB_UNATTENDED) 857 kdb_backtrace(); 858 kdb_enter(KDB_WHY_WATCHDOG, "watchdog timeout"); 859 #else 860 panic("watchdog timeout"); 861 #endif 862 } 863