1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1982, 1986, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 37 */ 38 39 #include <sys/cdefs.h> 40 #include "opt_kdb.h" 41 #include "opt_device_polling.h" 42 #include "opt_hwpmc_hooks.h" 43 #include "opt_ntp.h" 44 #include "opt_watchdog.h" 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/callout.h> 49 #include <sys/epoch.h> 50 #include <sys/eventhandler.h> 51 #include <sys/gtaskqueue.h> 52 #include <sys/kdb.h> 53 #include <sys/kernel.h> 54 #include <sys/kthread.h> 55 #include <sys/ktr.h> 56 #include <sys/lock.h> 57 #include <sys/mutex.h> 58 #include <sys/proc.h> 59 #include <sys/resource.h> 60 #include <sys/resourcevar.h> 61 #include <sys/sched.h> 62 #include <sys/sdt.h> 63 #include <sys/signalvar.h> 64 #include <sys/sleepqueue.h> 65 #include <sys/smp.h> 66 #include <vm/vm.h> 67 #include <vm/pmap.h> 68 #include <vm/vm_map.h> 69 #include <sys/sysctl.h> 70 #include <sys/bus.h> 71 #include <sys/interrupt.h> 72 #include <sys/limits.h> 73 #include <sys/timetc.h> 74 75 #ifdef HWPMC_HOOKS 76 #include <sys/pmckern.h> 77 PMC_SOFT_DEFINE( , , clock, hard); 78 PMC_SOFT_DEFINE( , , clock, stat); 79 PMC_SOFT_DEFINE_EX( , , clock, prof, \ 80 cpu_startprofclock, cpu_stopprofclock); 81 #endif 82 83 #ifdef DEVICE_POLLING 84 extern void hardclock_device_poll(void); 85 #endif /* DEVICE_POLLING */ 86 87 /* Spin-lock protecting profiling statistics. */ 88 static struct mtx time_lock; 89 90 SDT_PROVIDER_DECLARE(sched); 91 SDT_PROBE_DEFINE2(sched, , , tick, "struct thread *", "struct proc *"); 92 93 static int 94 sysctl_kern_cp_time(SYSCTL_HANDLER_ARGS) 95 { 96 int error; 97 long cp_time[CPUSTATES]; 98 #ifdef SCTL_MASK32 99 int i; 100 unsigned int cp_time32[CPUSTATES]; 101 #endif 102 103 read_cpu_time(cp_time); 104 #ifdef SCTL_MASK32 105 if (req->flags & SCTL_MASK32) { 106 if (!req->oldptr) 107 return SYSCTL_OUT(req, 0, sizeof(cp_time32)); 108 for (i = 0; i < CPUSTATES; i++) 109 cp_time32[i] = (unsigned int)cp_time[i]; 110 error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32)); 111 } else 112 #endif 113 { 114 if (!req->oldptr) 115 return SYSCTL_OUT(req, 0, sizeof(cp_time)); 116 error = SYSCTL_OUT(req, cp_time, sizeof(cp_time)); 117 } 118 return error; 119 } 120 121 SYSCTL_PROC(_kern, OID_AUTO, cp_time, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE, 122 0,0, sysctl_kern_cp_time, "LU", "CPU time statistics"); 123 124 static long empty[CPUSTATES]; 125 126 static int 127 sysctl_kern_cp_times(SYSCTL_HANDLER_ARGS) 128 { 129 struct pcpu *pcpu; 130 int error; 131 int c; 132 long *cp_time; 133 #ifdef SCTL_MASK32 134 unsigned int cp_time32[CPUSTATES]; 135 int i; 136 #endif 137 138 if (!req->oldptr) { 139 #ifdef SCTL_MASK32 140 if (req->flags & SCTL_MASK32) 141 return SYSCTL_OUT(req, 0, sizeof(cp_time32) * (mp_maxid + 1)); 142 else 143 #endif 144 return SYSCTL_OUT(req, 0, sizeof(long) * CPUSTATES * (mp_maxid + 1)); 145 } 146 for (error = 0, c = 0; error == 0 && c <= mp_maxid; c++) { 147 if (!CPU_ABSENT(c)) { 148 pcpu = pcpu_find(c); 149 cp_time = pcpu->pc_cp_time; 150 } else { 151 cp_time = empty; 152 } 153 #ifdef SCTL_MASK32 154 if (req->flags & SCTL_MASK32) { 155 for (i = 0; i < CPUSTATES; i++) 156 cp_time32[i] = (unsigned int)cp_time[i]; 157 error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32)); 158 } else 159 #endif 160 error = SYSCTL_OUT(req, cp_time, sizeof(long) * CPUSTATES); 161 } 162 return error; 163 } 164 165 SYSCTL_PROC(_kern, OID_AUTO, cp_times, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE, 166 0,0, sysctl_kern_cp_times, "LU", "per-CPU time statistics"); 167 168 #ifdef DEADLKRES 169 static const char *blessed[] = { 170 "getblk", 171 "so_snd_sx", 172 "so_rcv_sx", 173 NULL 174 }; 175 static int slptime_threshold = 1800; 176 static int blktime_threshold = 900; 177 static int sleepfreq = 3; 178 179 static void 180 deadlres_td_on_lock(struct proc *p, struct thread *td, int blkticks) 181 { 182 int tticks; 183 184 sx_assert(&allproc_lock, SX_LOCKED); 185 PROC_LOCK_ASSERT(p, MA_OWNED); 186 THREAD_LOCK_ASSERT(td, MA_OWNED); 187 /* 188 * The thread should be blocked on a turnstile, simply check 189 * if the turnstile channel is in good state. 190 */ 191 MPASS(td->td_blocked != NULL); 192 193 tticks = ticks - td->td_blktick; 194 if (tticks > blkticks) 195 /* 196 * Accordingly with provided thresholds, this thread is stuck 197 * for too long on a turnstile. 198 */ 199 panic("%s: possible deadlock detected for %p (%s), " 200 "blocked for %d ticks\n", __func__, 201 td, sched_tdname(td), tticks); 202 } 203 204 static void 205 deadlres_td_sleep_q(struct proc *p, struct thread *td, int slpticks) 206 { 207 const void *wchan; 208 int i, slptype, tticks; 209 210 sx_assert(&allproc_lock, SX_LOCKED); 211 PROC_LOCK_ASSERT(p, MA_OWNED); 212 THREAD_LOCK_ASSERT(td, MA_OWNED); 213 /* 214 * Check if the thread is sleeping on a lock, otherwise skip the check. 215 * Drop the thread lock in order to avoid a LOR with the sleepqueue 216 * spinlock. 217 */ 218 wchan = td->td_wchan; 219 tticks = ticks - td->td_slptick; 220 slptype = sleepq_type(wchan); 221 if ((slptype == SLEEPQ_SX || slptype == SLEEPQ_LK) && 222 tticks > slpticks) { 223 /* 224 * Accordingly with provided thresholds, this thread is stuck 225 * for too long on a sleepqueue. 226 * However, being on a sleepqueue, we might still check for the 227 * blessed list. 228 */ 229 for (i = 0; blessed[i] != NULL; i++) 230 if (!strcmp(blessed[i], td->td_wmesg)) 231 return; 232 233 panic("%s: possible deadlock detected for %p (%s), " 234 "blocked for %d ticks\n", __func__, 235 td, sched_tdname(td), tticks); 236 } 237 } 238 239 static void 240 deadlkres(void) 241 { 242 struct proc *p; 243 struct thread *td; 244 int blkticks, slpticks, tryl; 245 246 tryl = 0; 247 for (;;) { 248 blkticks = blktime_threshold * hz; 249 slpticks = slptime_threshold * hz; 250 251 /* 252 * Avoid to sleep on the sx_lock in order to avoid a 253 * possible priority inversion problem leading to 254 * starvation. 255 * If the lock can't be held after 100 tries, panic. 256 */ 257 if (!sx_try_slock(&allproc_lock)) { 258 if (tryl > 100) 259 panic("%s: possible deadlock detected " 260 "on allproc_lock\n", __func__); 261 tryl++; 262 pause("allproc", sleepfreq * hz); 263 continue; 264 } 265 tryl = 0; 266 FOREACH_PROC_IN_SYSTEM(p) { 267 PROC_LOCK(p); 268 if (p->p_state == PRS_NEW) { 269 PROC_UNLOCK(p); 270 continue; 271 } 272 FOREACH_THREAD_IN_PROC(p, td) { 273 thread_lock(td); 274 if (TD_ON_LOCK(td)) 275 deadlres_td_on_lock(p, td, 276 blkticks); 277 else if (TD_IS_SLEEPING(td)) 278 deadlres_td_sleep_q(p, td, 279 slpticks); 280 thread_unlock(td); 281 } 282 PROC_UNLOCK(p); 283 } 284 sx_sunlock(&allproc_lock); 285 286 /* Sleep for sleepfreq seconds. */ 287 pause("-", sleepfreq * hz); 288 } 289 } 290 291 static struct kthread_desc deadlkres_kd = { 292 "deadlkres", 293 deadlkres, 294 (struct thread **)NULL 295 }; 296 297 SYSINIT(deadlkres, SI_SUB_CLOCKS, SI_ORDER_ANY, kthread_start, &deadlkres_kd); 298 299 static SYSCTL_NODE(_debug, OID_AUTO, deadlkres, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 300 "Deadlock resolver"); 301 SYSCTL_INT(_debug_deadlkres, OID_AUTO, slptime_threshold, CTLFLAG_RWTUN, 302 &slptime_threshold, 0, 303 "Number of seconds within is valid to sleep on a sleepqueue"); 304 SYSCTL_INT(_debug_deadlkres, OID_AUTO, blktime_threshold, CTLFLAG_RWTUN, 305 &blktime_threshold, 0, 306 "Number of seconds within is valid to block on a turnstile"); 307 SYSCTL_INT(_debug_deadlkres, OID_AUTO, sleepfreq, CTLFLAG_RWTUN, &sleepfreq, 0, 308 "Number of seconds between any deadlock resolver thread run"); 309 #endif /* DEADLKRES */ 310 311 void 312 read_cpu_time(long *cp_time) 313 { 314 struct pcpu *pc; 315 int i, j; 316 317 /* Sum up global cp_time[]. */ 318 bzero(cp_time, sizeof(long) * CPUSTATES); 319 CPU_FOREACH(i) { 320 pc = pcpu_find(i); 321 for (j = 0; j < CPUSTATES; j++) 322 cp_time[j] += pc->pc_cp_time[j]; 323 } 324 } 325 326 #include <sys/watchdog.h> 327 328 static int watchdog_ticks; 329 static int watchdog_enabled; 330 static void watchdog_fire(void); 331 static void watchdog_config(void *, u_int, int *); 332 333 static void 334 watchdog_attach(void) 335 { 336 EVENTHANDLER_REGISTER(watchdog_list, watchdog_config, NULL, 0); 337 } 338 339 /* 340 * Clock handling routines. 341 * 342 * This code is written to operate with two timers that run independently of 343 * each other. 344 * 345 * The main timer, running hz times per second, is used to trigger interval 346 * timers, timeouts and rescheduling as needed. 347 * 348 * The second timer handles kernel and user profiling, 349 * and does resource use estimation. If the second timer is programmable, 350 * it is randomized to avoid aliasing between the two clocks. For example, 351 * the randomization prevents an adversary from always giving up the cpu 352 * just before its quantum expires. Otherwise, it would never accumulate 353 * cpu ticks. The mean frequency of the second timer is stathz. 354 * 355 * If no second timer exists, stathz will be zero; in this case we drive 356 * profiling and statistics off the main clock. This WILL NOT be accurate; 357 * do not do it unless absolutely necessary. 358 * 359 * The statistics clock may (or may not) be run at a higher rate while 360 * profiling. This profile clock runs at profhz. We require that profhz 361 * be an integral multiple of stathz. 362 * 363 * If the statistics clock is running fast, it must be divided by the ratio 364 * profhz/stathz for statistics. (For profiling, every tick counts.) 365 * 366 * Time-of-day is maintained using a "timecounter", which may or may 367 * not be related to the hardware generating the above mentioned 368 * interrupts. 369 */ 370 371 int stathz; 372 int profhz; 373 int profprocs; 374 volatile int ticks; 375 int psratio; 376 377 DPCPU_DEFINE_STATIC(int, pcputicks); /* Per-CPU version of ticks. */ 378 #ifdef DEVICE_POLLING 379 static int devpoll_run = 0; 380 #endif 381 382 static void 383 ast_oweupc(struct thread *td, int tda __unused) 384 { 385 if ((td->td_proc->p_flag & P_PROFIL) == 0) 386 return; 387 addupc_task(td, td->td_profil_addr, td->td_profil_ticks); 388 td->td_profil_ticks = 0; 389 td->td_pflags &= ~TDP_OWEUPC; 390 } 391 392 static void 393 ast_alrm(struct thread *td, int tda __unused) 394 { 395 struct proc *p; 396 397 p = td->td_proc; 398 PROC_LOCK(p); 399 kern_psignal(p, SIGVTALRM); 400 PROC_UNLOCK(p); 401 } 402 403 static void 404 ast_prof(struct thread *td, int tda __unused) 405 { 406 struct proc *p; 407 408 p = td->td_proc; 409 PROC_LOCK(p); 410 kern_psignal(p, SIGPROF); 411 PROC_UNLOCK(p); 412 } 413 414 /* 415 * Initialize clock frequencies and start both clocks running. 416 */ 417 static void 418 initclocks(void *dummy __unused) 419 { 420 int i; 421 422 /* 423 * Set divisors to 1 (normal case) and let the machine-specific 424 * code do its bit. 425 */ 426 mtx_init(&time_lock, "time lock", NULL, MTX_DEF); 427 cpu_initclocks(); 428 429 /* 430 * Compute profhz/stathz, and fix profhz if needed. 431 */ 432 i = stathz ? stathz : hz; 433 if (profhz == 0) 434 profhz = i; 435 psratio = profhz / i; 436 437 ast_register(TDA_OWEUPC, ASTR_ASTF_REQUIRED, 0, ast_oweupc); 438 ast_register(TDA_ALRM, ASTR_ASTF_REQUIRED, 0, ast_alrm); 439 ast_register(TDA_PROF, ASTR_ASTF_REQUIRED, 0, ast_prof); 440 441 #ifdef SW_WATCHDOG 442 /* Enable hardclock watchdog now, even if a hardware watchdog exists. */ 443 watchdog_attach(); 444 #else 445 /* Volunteer to run a software watchdog. */ 446 if (wdog_software_attach == NULL) 447 wdog_software_attach = watchdog_attach; 448 #endif 449 } 450 SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL); 451 452 static __noinline void 453 hardclock_itimer(struct thread *td, struct pstats *pstats, int cnt, int usermode) 454 { 455 struct proc *p; 456 int ast; 457 458 ast = 0; 459 p = td->td_proc; 460 if (usermode && 461 timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value)) { 462 PROC_ITIMLOCK(p); 463 if (itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], 464 tick * cnt) == 0) 465 ast |= TDAI(TDA_ALRM); 466 PROC_ITIMUNLOCK(p); 467 } 468 if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value)) { 469 PROC_ITIMLOCK(p); 470 if (itimerdecr(&pstats->p_timer[ITIMER_PROF], 471 tick * cnt) == 0) 472 ast |= TDAI(TDA_PROF); 473 PROC_ITIMUNLOCK(p); 474 } 475 if (ast != 0) 476 ast_sched_mask(td, ast); 477 } 478 479 void 480 hardclock(int cnt, int usermode) 481 { 482 struct pstats *pstats; 483 struct thread *td = curthread; 484 struct proc *p = td->td_proc; 485 int *t = DPCPU_PTR(pcputicks); 486 int global, i, newticks; 487 488 /* 489 * Update per-CPU and possibly global ticks values. 490 */ 491 *t += cnt; 492 global = ticks; 493 do { 494 newticks = *t - global; 495 if (newticks <= 0) { 496 if (newticks < -1) 497 *t = global - 1; 498 newticks = 0; 499 break; 500 } 501 } while (!atomic_fcmpset_int(&ticks, &global, *t)); 502 503 /* 504 * Run current process's virtual and profile time, as needed. 505 */ 506 pstats = p->p_stats; 507 if (__predict_false( 508 timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) || 509 timevalisset(&pstats->p_timer[ITIMER_PROF].it_value))) 510 hardclock_itimer(td, pstats, cnt, usermode); 511 512 #ifdef HWPMC_HOOKS 513 if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid))) 514 PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL); 515 if (td->td_intr_frame != NULL) 516 PMC_SOFT_CALL_TF( , , clock, hard, td->td_intr_frame); 517 #endif 518 /* We are in charge to handle this tick duty. */ 519 if (newticks > 0) { 520 tc_ticktock(newticks); 521 #ifdef DEVICE_POLLING 522 /* Dangerous and no need to call these things concurrently. */ 523 if (atomic_cmpset_acq_int(&devpoll_run, 0, 1)) { 524 /* This is very short and quick. */ 525 hardclock_device_poll(); 526 atomic_store_rel_int(&devpoll_run, 0); 527 } 528 #endif /* DEVICE_POLLING */ 529 if (watchdog_enabled > 0) { 530 i = atomic_fetchadd_int(&watchdog_ticks, -newticks); 531 if (i > 0 && i <= newticks) 532 watchdog_fire(); 533 } 534 intr_event_handle(clk_intr_event, NULL); 535 } 536 if (curcpu == CPU_FIRST()) 537 cpu_tick_calibration(); 538 if (__predict_false(DPCPU_GET(epoch_cb_count))) 539 GROUPTASK_ENQUEUE(DPCPU_PTR(epoch_cb_task)); 540 } 541 542 void 543 hardclock_sync(int cpu) 544 { 545 int *t; 546 KASSERT(!CPU_ABSENT(cpu), ("Absent CPU %d", cpu)); 547 t = DPCPU_ID_PTR(cpu, pcputicks); 548 549 *t = ticks; 550 } 551 552 /* 553 * Regular integer scaling formula without losing precision: 554 */ 555 #define TIME_INT_SCALE(value, mul, div) \ 556 (((value) / (div)) * (mul) + (((value) % (div)) * (mul)) / (div)) 557 558 /* 559 * Macro for converting seconds and microseconds into actual ticks, 560 * based on the given hz value: 561 */ 562 #define TIME_TO_TICKS(sec, usec, hz) \ 563 ((sec) * (hz) + TIME_INT_SCALE(usec, hz, 1 << 6) / (1000000 >> 6)) 564 565 #define TIME_ASSERT_VALID_HZ(hz) \ 566 _Static_assert(TIME_TO_TICKS(INT_MAX / (hz) - 1, 999999, hz) >= 0 && \ 567 TIME_TO_TICKS(INT_MAX / (hz) - 1, 999999, hz) < INT_MAX, \ 568 "tvtohz() can overflow the regular integer type") 569 570 /* 571 * Compile time assert the maximum and minimum values to fit into a 572 * regular integer when computing TIME_TO_TICKS(): 573 */ 574 TIME_ASSERT_VALID_HZ(HZ_MAXIMUM); 575 TIME_ASSERT_VALID_HZ(HZ_MINIMUM); 576 577 /* 578 * The formula is mostly linear, but test some more common values just 579 * in case: 580 */ 581 TIME_ASSERT_VALID_HZ(1024); 582 TIME_ASSERT_VALID_HZ(1000); 583 TIME_ASSERT_VALID_HZ(128); 584 TIME_ASSERT_VALID_HZ(100); 585 586 /* 587 * Compute number of ticks representing the specified amount of time. 588 * If the specified time is negative, a value of 1 is returned. This 589 * function returns a value from 1 up to and including INT_MAX. 590 */ 591 int 592 tvtohz(struct timeval *tv) 593 { 594 int retval; 595 596 /* 597 * The values passed here may come from user-space and these 598 * checks ensure "tv_usec" is within its allowed range: 599 */ 600 601 /* check for tv_usec underflow */ 602 if (__predict_false(tv->tv_usec < 0)) { 603 tv->tv_sec += tv->tv_usec / 1000000; 604 tv->tv_usec = tv->tv_usec % 1000000; 605 /* convert tv_usec to a positive value */ 606 if (__predict_true(tv->tv_usec < 0)) { 607 tv->tv_usec += 1000000; 608 tv->tv_sec -= 1; 609 } 610 /* check for tv_usec overflow */ 611 } else if (__predict_false(tv->tv_usec >= 1000000)) { 612 tv->tv_sec += tv->tv_usec / 1000000; 613 tv->tv_usec = tv->tv_usec % 1000000; 614 } 615 616 /* check for tv_sec underflow */ 617 if (__predict_false(tv->tv_sec < 0)) 618 return (1); 619 /* check for tv_sec overflow (including room for the tv_usec part) */ 620 else if (__predict_false(tv->tv_sec >= tick_seconds_max)) 621 return (INT_MAX); 622 623 /* cast to "int" to avoid platform differences */ 624 retval = TIME_TO_TICKS((int)tv->tv_sec, (int)tv->tv_usec, hz); 625 626 /* add one additional tick */ 627 return (retval + 1); 628 } 629 630 /* 631 * Start profiling on a process. 632 * 633 * Kernel profiling passes proc0 which never exits and hence 634 * keeps the profile clock running constantly. 635 */ 636 void 637 startprofclock(struct proc *p) 638 { 639 640 PROC_LOCK_ASSERT(p, MA_OWNED); 641 if (p->p_flag & P_STOPPROF) 642 return; 643 if ((p->p_flag & P_PROFIL) == 0) { 644 p->p_flag |= P_PROFIL; 645 mtx_lock(&time_lock); 646 if (++profprocs == 1) 647 cpu_startprofclock(); 648 mtx_unlock(&time_lock); 649 } 650 } 651 652 /* 653 * Stop profiling on a process. 654 */ 655 void 656 stopprofclock(struct proc *p) 657 { 658 659 PROC_LOCK_ASSERT(p, MA_OWNED); 660 if (p->p_flag & P_PROFIL) { 661 if (p->p_profthreads != 0) { 662 while (p->p_profthreads != 0) { 663 p->p_flag |= P_STOPPROF; 664 msleep(&p->p_profthreads, &p->p_mtx, PPAUSE, 665 "stopprof", 0); 666 } 667 } 668 if ((p->p_flag & P_PROFIL) == 0) 669 return; 670 p->p_flag &= ~P_PROFIL; 671 mtx_lock(&time_lock); 672 if (--profprocs == 0) 673 cpu_stopprofclock(); 674 mtx_unlock(&time_lock); 675 } 676 } 677 678 /* 679 * Statistics clock. Updates rusage information and calls the scheduler 680 * to adjust priorities of the active thread. 681 * 682 * This should be called by all active processors. 683 */ 684 void 685 statclock(int cnt, int usermode) 686 { 687 struct rusage *ru; 688 struct vmspace *vm; 689 struct thread *td; 690 struct proc *p; 691 long rss; 692 long *cp_time; 693 uint64_t runtime, new_switchtime; 694 695 td = curthread; 696 p = td->td_proc; 697 698 cp_time = (long *)PCPU_PTR(cp_time); 699 if (usermode) { 700 /* 701 * Charge the time as appropriate. 702 */ 703 td->td_uticks += cnt; 704 if (p->p_nice > NZERO) 705 cp_time[CP_NICE] += cnt; 706 else 707 cp_time[CP_USER] += cnt; 708 } else { 709 /* 710 * Came from kernel mode, so we were: 711 * - handling an interrupt, 712 * - doing syscall or trap work on behalf of the current 713 * user process, or 714 * - spinning in the idle loop. 715 * Whichever it is, charge the time as appropriate. 716 * Note that we charge interrupts to the current process, 717 * regardless of whether they are ``for'' that process, 718 * so that we know how much of its real time was spent 719 * in ``non-process'' (i.e., interrupt) work. 720 */ 721 if ((td->td_pflags & TDP_ITHREAD) || 722 td->td_intr_nesting_level >= 2) { 723 td->td_iticks += cnt; 724 cp_time[CP_INTR] += cnt; 725 } else { 726 td->td_pticks += cnt; 727 td->td_sticks += cnt; 728 if (!TD_IS_IDLETHREAD(td)) 729 cp_time[CP_SYS] += cnt; 730 else 731 cp_time[CP_IDLE] += cnt; 732 } 733 } 734 735 /* Update resource usage integrals and maximums. */ 736 MPASS(p->p_vmspace != NULL); 737 vm = p->p_vmspace; 738 ru = &td->td_ru; 739 ru->ru_ixrss += pgtok(vm->vm_tsize) * cnt; 740 ru->ru_idrss += pgtok(vm->vm_dsize) * cnt; 741 ru->ru_isrss += pgtok(vm->vm_ssize) * cnt; 742 rss = pgtok(vmspace_resident_count(vm)); 743 if (ru->ru_maxrss < rss) 744 ru->ru_maxrss = rss; 745 KTR_POINT2(KTR_SCHED, "thread", sched_tdname(td), "statclock", 746 "prio:%d", td->td_priority, "stathz:%d", (stathz)?stathz:hz); 747 SDT_PROBE2(sched, , , tick, td, td->td_proc); 748 thread_lock_flags(td, MTX_QUIET); 749 750 /* 751 * Compute the amount of time during which the current 752 * thread was running, and add that to its total so far. 753 */ 754 new_switchtime = cpu_ticks(); 755 runtime = new_switchtime - PCPU_GET(switchtime); 756 td->td_runtime += runtime; 757 td->td_incruntime += runtime; 758 PCPU_SET(switchtime, new_switchtime); 759 760 sched_clock(td, cnt); 761 thread_unlock(td); 762 #ifdef HWPMC_HOOKS 763 if (td->td_intr_frame != NULL) 764 PMC_SOFT_CALL_TF( , , clock, stat, td->td_intr_frame); 765 #endif 766 } 767 768 void 769 profclock(int cnt, int usermode, uintfptr_t pc) 770 { 771 struct thread *td; 772 773 td = curthread; 774 if (usermode) { 775 /* 776 * Came from user mode; CPU was in user state. 777 * If this process is being profiled, record the tick. 778 * if there is no related user location yet, don't 779 * bother trying to count it. 780 */ 781 if (td->td_proc->p_flag & P_PROFIL) 782 addupc_intr(td, pc, cnt); 783 } 784 #ifdef HWPMC_HOOKS 785 if (td->td_intr_frame != NULL) 786 PMC_SOFT_CALL_TF( , , clock, prof, td->td_intr_frame); 787 #endif 788 } 789 790 /* 791 * Return information about system clocks. 792 */ 793 static int 794 sysctl_kern_clockrate(SYSCTL_HANDLER_ARGS) 795 { 796 struct clockinfo clkinfo; 797 /* 798 * Construct clockinfo structure. 799 */ 800 bzero(&clkinfo, sizeof(clkinfo)); 801 clkinfo.hz = hz; 802 clkinfo.tick = tick; 803 clkinfo.profhz = profhz; 804 clkinfo.stathz = stathz ? stathz : hz; 805 return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req)); 806 } 807 808 SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, 809 CTLTYPE_STRUCT|CTLFLAG_RD|CTLFLAG_MPSAFE, 810 0, 0, sysctl_kern_clockrate, "S,clockinfo", 811 "Rate and period of various kernel clocks"); 812 813 static void 814 watchdog_config(void *unused __unused, u_int cmd, int *error) 815 { 816 u_int u; 817 818 u = cmd & WD_INTERVAL; 819 if (u >= WD_TO_1SEC) { 820 watchdog_ticks = (1 << (u - WD_TO_1SEC)) * hz; 821 watchdog_enabled = 1; 822 *error = 0; 823 } else { 824 watchdog_enabled = 0; 825 } 826 } 827 828 /* 829 * Handle a watchdog timeout by dropping to DDB or panicking. 830 */ 831 static void 832 watchdog_fire(void) 833 { 834 835 #if defined(KDB) && !defined(KDB_UNATTENDED) 836 kdb_backtrace(); 837 kdb_enter(KDB_WHY_WATCHDOG, "watchdog timeout"); 838 #else 839 panic("watchdog timeout"); 840 #endif 841 } 842