1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1982, 1986, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #include <sys/cdefs.h> 38 #include "opt_kdb.h" 39 #include "opt_device_polling.h" 40 #include "opt_hwpmc_hooks.h" 41 #include "opt_ntp.h" 42 #include "opt_watchdog.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/callout.h> 47 #include <sys/epoch.h> 48 #include <sys/eventhandler.h> 49 #include <sys/gtaskqueue.h> 50 #include <sys/kdb.h> 51 #include <sys/kernel.h> 52 #include <sys/kthread.h> 53 #include <sys/ktr.h> 54 #include <sys/lock.h> 55 #include <sys/mutex.h> 56 #include <sys/proc.h> 57 #include <sys/resource.h> 58 #include <sys/resourcevar.h> 59 #include <sys/sched.h> 60 #include <sys/sdt.h> 61 #include <sys/signalvar.h> 62 #include <sys/sleepqueue.h> 63 #include <sys/smp.h> 64 #include <vm/vm.h> 65 #include <vm/pmap.h> 66 #include <vm/vm_map.h> 67 #include <sys/sysctl.h> 68 #include <sys/bus.h> 69 #include <sys/interrupt.h> 70 #include <sys/limits.h> 71 #include <sys/timetc.h> 72 73 #ifdef HWPMC_HOOKS 74 #include <sys/pmckern.h> 75 PMC_SOFT_DEFINE( , , clock, hard); 76 PMC_SOFT_DEFINE( , , clock, stat); 77 PMC_SOFT_DEFINE_EX( , , clock, prof, \ 78 cpu_startprofclock, cpu_stopprofclock); 79 #endif 80 81 #ifdef DEVICE_POLLING 82 extern void hardclock_device_poll(void); 83 #endif /* DEVICE_POLLING */ 84 85 /* Spin-lock protecting profiling statistics. */ 86 static struct mtx time_lock; 87 88 SDT_PROVIDER_DECLARE(sched); 89 SDT_PROBE_DEFINE2(sched, , , tick, "struct thread *", "struct proc *"); 90 91 static int 92 sysctl_kern_cp_time(SYSCTL_HANDLER_ARGS) 93 { 94 int error; 95 long cp_time[CPUSTATES]; 96 #ifdef SCTL_MASK32 97 int i; 98 unsigned int cp_time32[CPUSTATES]; 99 #endif 100 101 read_cpu_time(cp_time); 102 #ifdef SCTL_MASK32 103 if (req->flags & SCTL_MASK32) { 104 if (!req->oldptr) 105 return SYSCTL_OUT(req, 0, sizeof(cp_time32)); 106 for (i = 0; i < CPUSTATES; i++) 107 cp_time32[i] = (unsigned int)cp_time[i]; 108 error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32)); 109 } else 110 #endif 111 { 112 if (!req->oldptr) 113 return SYSCTL_OUT(req, 0, sizeof(cp_time)); 114 error = SYSCTL_OUT(req, cp_time, sizeof(cp_time)); 115 } 116 return error; 117 } 118 119 SYSCTL_PROC(_kern, OID_AUTO, cp_time, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE, 120 0,0, sysctl_kern_cp_time, "LU", "CPU time statistics"); 121 122 static long empty[CPUSTATES]; 123 124 static int 125 sysctl_kern_cp_times(SYSCTL_HANDLER_ARGS) 126 { 127 struct pcpu *pcpu; 128 int error; 129 int c; 130 long *cp_time; 131 #ifdef SCTL_MASK32 132 unsigned int cp_time32[CPUSTATES]; 133 int i; 134 #endif 135 136 if (!req->oldptr) { 137 #ifdef SCTL_MASK32 138 if (req->flags & SCTL_MASK32) 139 return SYSCTL_OUT(req, 0, sizeof(cp_time32) * (mp_maxid + 1)); 140 else 141 #endif 142 return SYSCTL_OUT(req, 0, sizeof(long) * CPUSTATES * (mp_maxid + 1)); 143 } 144 for (error = 0, c = 0; error == 0 && c <= mp_maxid; c++) { 145 if (!CPU_ABSENT(c)) { 146 pcpu = pcpu_find(c); 147 cp_time = pcpu->pc_cp_time; 148 } else { 149 cp_time = empty; 150 } 151 #ifdef SCTL_MASK32 152 if (req->flags & SCTL_MASK32) { 153 for (i = 0; i < CPUSTATES; i++) 154 cp_time32[i] = (unsigned int)cp_time[i]; 155 error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32)); 156 } else 157 #endif 158 error = SYSCTL_OUT(req, cp_time, sizeof(long) * CPUSTATES); 159 } 160 return error; 161 } 162 163 SYSCTL_PROC(_kern, OID_AUTO, cp_times, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE, 164 0,0, sysctl_kern_cp_times, "LU", "per-CPU time statistics"); 165 166 #ifdef DEADLKRES 167 static const char *blessed[] = { 168 "getblk", 169 "so_snd_sx", 170 "so_rcv_sx", 171 NULL 172 }; 173 static int slptime_threshold = 1800; 174 static int blktime_threshold = 900; 175 static int sleepfreq = 3; 176 177 static void 178 deadlres_td_on_lock(struct proc *p, struct thread *td, int blkticks) 179 { 180 int tticks; 181 182 sx_assert(&allproc_lock, SX_LOCKED); 183 PROC_LOCK_ASSERT(p, MA_OWNED); 184 THREAD_LOCK_ASSERT(td, MA_OWNED); 185 /* 186 * The thread should be blocked on a turnstile, simply check 187 * if the turnstile channel is in good state. 188 */ 189 MPASS(td->td_blocked != NULL); 190 191 tticks = ticks - td->td_blktick; 192 if (tticks > blkticks) 193 /* 194 * Accordingly with provided thresholds, this thread is stuck 195 * for too long on a turnstile. 196 */ 197 panic("%s: possible deadlock detected for %p (%s), " 198 "blocked for %d ticks\n", __func__, 199 td, sched_tdname(td), tticks); 200 } 201 202 static void 203 deadlres_td_sleep_q(struct proc *p, struct thread *td, int slpticks) 204 { 205 const void *wchan; 206 int i, slptype, tticks; 207 208 sx_assert(&allproc_lock, SX_LOCKED); 209 PROC_LOCK_ASSERT(p, MA_OWNED); 210 THREAD_LOCK_ASSERT(td, MA_OWNED); 211 /* 212 * Check if the thread is sleeping on a lock, otherwise skip the check. 213 * Drop the thread lock in order to avoid a LOR with the sleepqueue 214 * spinlock. 215 */ 216 wchan = td->td_wchan; 217 tticks = ticks - td->td_slptick; 218 slptype = sleepq_type(wchan); 219 if ((slptype == SLEEPQ_SX || slptype == SLEEPQ_LK) && 220 tticks > slpticks) { 221 /* 222 * Accordingly with provided thresholds, this thread is stuck 223 * for too long on a sleepqueue. 224 * However, being on a sleepqueue, we might still check for the 225 * blessed list. 226 */ 227 for (i = 0; blessed[i] != NULL; i++) 228 if (!strcmp(blessed[i], td->td_wmesg)) 229 return; 230 231 panic("%s: possible deadlock detected for %p (%s), " 232 "blocked for %d ticks\n", __func__, 233 td, sched_tdname(td), tticks); 234 } 235 } 236 237 static void 238 deadlkres(void) 239 { 240 struct proc *p; 241 struct thread *td; 242 int blkticks, slpticks, tryl; 243 244 tryl = 0; 245 for (;;) { 246 blkticks = blktime_threshold * hz; 247 slpticks = slptime_threshold * hz; 248 249 /* 250 * Avoid to sleep on the sx_lock in order to avoid a 251 * possible priority inversion problem leading to 252 * starvation. 253 * If the lock can't be held after 100 tries, panic. 254 */ 255 if (!sx_try_slock(&allproc_lock)) { 256 if (tryl > 100) 257 panic("%s: possible deadlock detected " 258 "on allproc_lock\n", __func__); 259 tryl++; 260 pause("allproc", sleepfreq * hz); 261 continue; 262 } 263 tryl = 0; 264 FOREACH_PROC_IN_SYSTEM(p) { 265 PROC_LOCK(p); 266 if (p->p_state == PRS_NEW) { 267 PROC_UNLOCK(p); 268 continue; 269 } 270 FOREACH_THREAD_IN_PROC(p, td) { 271 thread_lock(td); 272 if (TD_ON_LOCK(td)) 273 deadlres_td_on_lock(p, td, 274 blkticks); 275 else if (TD_IS_SLEEPING(td)) 276 deadlres_td_sleep_q(p, td, 277 slpticks); 278 thread_unlock(td); 279 } 280 PROC_UNLOCK(p); 281 } 282 sx_sunlock(&allproc_lock); 283 284 /* Sleep for sleepfreq seconds. */ 285 pause("-", sleepfreq * hz); 286 } 287 } 288 289 static struct kthread_desc deadlkres_kd = { 290 "deadlkres", 291 deadlkres, 292 (struct thread **)NULL 293 }; 294 295 SYSINIT(deadlkres, SI_SUB_CLOCKS, SI_ORDER_ANY, kthread_start, &deadlkres_kd); 296 297 static SYSCTL_NODE(_debug, OID_AUTO, deadlkres, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 298 "Deadlock resolver"); 299 SYSCTL_INT(_debug_deadlkres, OID_AUTO, slptime_threshold, CTLFLAG_RWTUN, 300 &slptime_threshold, 0, 301 "Number of seconds within is valid to sleep on a sleepqueue"); 302 SYSCTL_INT(_debug_deadlkres, OID_AUTO, blktime_threshold, CTLFLAG_RWTUN, 303 &blktime_threshold, 0, 304 "Number of seconds within is valid to block on a turnstile"); 305 SYSCTL_INT(_debug_deadlkres, OID_AUTO, sleepfreq, CTLFLAG_RWTUN, &sleepfreq, 0, 306 "Number of seconds between any deadlock resolver thread run"); 307 #endif /* DEADLKRES */ 308 309 void 310 read_cpu_time(long *cp_time) 311 { 312 struct pcpu *pc; 313 int i, j; 314 315 /* Sum up global cp_time[]. */ 316 bzero(cp_time, sizeof(long) * CPUSTATES); 317 CPU_FOREACH(i) { 318 pc = pcpu_find(i); 319 for (j = 0; j < CPUSTATES; j++) 320 cp_time[j] += pc->pc_cp_time[j]; 321 } 322 } 323 324 #include <sys/watchdog.h> 325 326 static long watchdog_ticks; 327 static int watchdog_enabled; 328 static void watchdog_fire(void); 329 static void watchdog_config(void *, u_int, int *); 330 331 static void 332 watchdog_attach(void) 333 { 334 EVENTHANDLER_REGISTER(watchdog_list, watchdog_config, NULL, 0); 335 } 336 337 /* 338 * Clock handling routines. 339 * 340 * This code is written to operate with two timers that run independently of 341 * each other. 342 * 343 * The main timer, running hz times per second, is used to trigger interval 344 * timers, timeouts and rescheduling as needed. 345 * 346 * The second timer handles kernel and user profiling, 347 * and does resource use estimation. If the second timer is programmable, 348 * it is randomized to avoid aliasing between the two clocks. For example, 349 * the randomization prevents an adversary from always giving up the cpu 350 * just before its quantum expires. Otherwise, it would never accumulate 351 * cpu ticks. The mean frequency of the second timer is stathz. 352 * 353 * If no second timer exists, stathz will be zero; in this case we drive 354 * profiling and statistics off the main clock. This WILL NOT be accurate; 355 * do not do it unless absolutely necessary. 356 * 357 * The statistics clock may (or may not) be run at a higher rate while 358 * profiling. This profile clock runs at profhz. We require that profhz 359 * be an integral multiple of stathz. 360 * 361 * If the statistics clock is running fast, it must be divided by the ratio 362 * profhz/stathz for statistics. (For profiling, every tick counts.) 363 * 364 * Time-of-day is maintained using a "timecounter", which may or may 365 * not be related to the hardware generating the above mentioned 366 * interrupts. 367 */ 368 369 int stathz; 370 int profhz; 371 int profprocs; 372 int psratio; 373 374 DPCPU_DEFINE_STATIC(long, pcputicks); /* Per-CPU version of ticks. */ 375 #ifdef DEVICE_POLLING 376 static int devpoll_run = 0; 377 #endif 378 379 static void 380 ast_oweupc(struct thread *td, int tda __unused) 381 { 382 if ((td->td_proc->p_flag & P_PROFIL) == 0) 383 return; 384 addupc_task(td, td->td_profil_addr, td->td_profil_ticks); 385 td->td_profil_ticks = 0; 386 td->td_pflags &= ~TDP_OWEUPC; 387 } 388 389 static void 390 ast_alrm(struct thread *td, int tda __unused) 391 { 392 struct proc *p; 393 394 p = td->td_proc; 395 PROC_LOCK(p); 396 kern_psignal(p, SIGVTALRM); 397 PROC_UNLOCK(p); 398 } 399 400 static void 401 ast_prof(struct thread *td, int tda __unused) 402 { 403 struct proc *p; 404 405 p = td->td_proc; 406 PROC_LOCK(p); 407 kern_psignal(p, SIGPROF); 408 PROC_UNLOCK(p); 409 } 410 411 /* 412 * Initialize clock frequencies and start both clocks running. 413 */ 414 static void 415 initclocks(void *dummy __unused) 416 { 417 int i; 418 419 /* 420 * Set divisors to 1 (normal case) and let the machine-specific 421 * code do its bit. 422 */ 423 mtx_init(&time_lock, "time lock", NULL, MTX_DEF); 424 cpu_initclocks(); 425 426 /* 427 * Compute profhz/stathz, and fix profhz if needed. 428 */ 429 i = stathz ? stathz : hz; 430 if (profhz == 0) 431 profhz = i; 432 psratio = profhz / i; 433 434 ast_register(TDA_OWEUPC, ASTR_ASTF_REQUIRED, 0, ast_oweupc); 435 ast_register(TDA_ALRM, ASTR_ASTF_REQUIRED, 0, ast_alrm); 436 ast_register(TDA_PROF, ASTR_ASTF_REQUIRED, 0, ast_prof); 437 438 #ifdef SW_WATCHDOG 439 /* Enable hardclock watchdog now, even if a hardware watchdog exists. */ 440 watchdog_attach(); 441 #else 442 /* Volunteer to run a software watchdog. */ 443 if (wdog_software_attach == NULL) 444 wdog_software_attach = watchdog_attach; 445 #endif 446 } 447 SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL); 448 449 static __noinline void 450 hardclock_itimer(struct thread *td, struct pstats *pstats, int cnt, int usermode) 451 { 452 struct proc *p; 453 int ast; 454 455 ast = 0; 456 p = td->td_proc; 457 if (usermode && 458 timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value)) { 459 PROC_ITIMLOCK(p); 460 if (itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], 461 tick * cnt) == 0) 462 ast |= TDAI(TDA_ALRM); 463 PROC_ITIMUNLOCK(p); 464 } 465 if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value)) { 466 PROC_ITIMLOCK(p); 467 if (itimerdecr(&pstats->p_timer[ITIMER_PROF], 468 tick * cnt) == 0) 469 ast |= TDAI(TDA_PROF); 470 PROC_ITIMUNLOCK(p); 471 } 472 if (ast != 0) 473 ast_sched_mask(td, ast); 474 } 475 476 void 477 hardclock(int cnt, int usermode) 478 { 479 struct pstats *pstats; 480 struct thread *td = curthread; 481 struct proc *p = td->td_proc; 482 long global, newticks, *t; 483 484 /* 485 * Update per-CPU and possibly global ticks values. 486 */ 487 t = DPCPU_PTR(pcputicks); 488 *t += cnt; 489 global = atomic_load_long(&ticksl); 490 do { 491 newticks = *t - global; 492 if (newticks <= 0) { 493 if (newticks < -1) 494 *t = global - 1; 495 newticks = 0; 496 break; 497 } 498 } while (!atomic_fcmpset_long(&ticksl, &global, *t)); 499 500 /* 501 * Run current process's virtual and profile time, as needed. 502 */ 503 pstats = p->p_stats; 504 if (__predict_false( 505 timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) || 506 timevalisset(&pstats->p_timer[ITIMER_PROF].it_value))) 507 hardclock_itimer(td, pstats, cnt, usermode); 508 509 #ifdef HWPMC_HOOKS 510 if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid))) 511 PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL); 512 if (td->td_intr_frame != NULL) 513 PMC_SOFT_CALL_TF( , , clock, hard, td->td_intr_frame); 514 #endif 515 /* We are in charge to handle this tick duty. */ 516 if (newticks > 0) { 517 tc_ticktock(newticks); 518 #ifdef DEVICE_POLLING 519 /* Dangerous and no need to call these things concurrently. */ 520 if (atomic_cmpset_acq_int(&devpoll_run, 0, 1)) { 521 /* This is very short and quick. */ 522 hardclock_device_poll(); 523 atomic_store_rel_int(&devpoll_run, 0); 524 } 525 #endif /* DEVICE_POLLING */ 526 if (watchdog_enabled > 0) { 527 long left; 528 529 left = atomic_fetchadd_long(&watchdog_ticks, -newticks); 530 if (left > 0 && left <= newticks) 531 watchdog_fire(); 532 } 533 intr_event_handle(clk_intr_event, NULL); 534 } 535 if (curcpu == CPU_FIRST()) 536 cpu_tick_calibration(); 537 if (__predict_false(DPCPU_GET(epoch_cb_count))) 538 GROUPTASK_ENQUEUE(DPCPU_PTR(epoch_cb_task)); 539 } 540 541 void 542 hardclock_sync(int cpu) 543 { 544 long *t; 545 546 KASSERT(!CPU_ABSENT(cpu), ("Absent CPU %d", cpu)); 547 548 t = DPCPU_ID_PTR(cpu, pcputicks); 549 *t = ticksl; 550 } 551 552 /* 553 * Regular integer scaling formula without losing precision: 554 */ 555 #define TIME_INT_SCALE(value, mul, div) \ 556 (((value) / (div)) * (mul) + (((value) % (div)) * (mul)) / (div)) 557 558 /* 559 * Macro for converting seconds and microseconds into actual ticks, 560 * based on the given hz value: 561 */ 562 #define TIME_TO_TICKS(sec, usec, hz) \ 563 ((sec) * (hz) + TIME_INT_SCALE(usec, hz, 1 << 6) / (1000000 >> 6)) 564 565 #define TIME_ASSERT_VALID_HZ(hz) \ 566 _Static_assert(TIME_TO_TICKS(INT_MAX / (hz) - 1, 999999, hz) >= 0 && \ 567 TIME_TO_TICKS(INT_MAX / (hz) - 1, 999999, hz) < INT_MAX, \ 568 "tvtohz() can overflow the regular integer type") 569 570 /* 571 * Compile time assert the maximum and minimum values to fit into a 572 * regular integer when computing TIME_TO_TICKS(): 573 */ 574 TIME_ASSERT_VALID_HZ(HZ_MAXIMUM); 575 TIME_ASSERT_VALID_HZ(HZ_MINIMUM); 576 577 /* 578 * The formula is mostly linear, but test some more common values just 579 * in case: 580 */ 581 TIME_ASSERT_VALID_HZ(1024); 582 TIME_ASSERT_VALID_HZ(1000); 583 TIME_ASSERT_VALID_HZ(128); 584 TIME_ASSERT_VALID_HZ(100); 585 586 /* 587 * Compute number of ticks representing the specified amount of time. 588 * If the specified time is negative, a value of 1 is returned. This 589 * function returns a value from 1 up to and including INT_MAX. 590 */ 591 int 592 tvtohz(struct timeval *tv) 593 { 594 int retval; 595 596 /* 597 * The values passed here may come from user-space and these 598 * checks ensure "tv_usec" is within its allowed range: 599 */ 600 601 /* check for tv_usec underflow */ 602 if (__predict_false(tv->tv_usec < 0)) { 603 tv->tv_sec += tv->tv_usec / 1000000; 604 tv->tv_usec = tv->tv_usec % 1000000; 605 /* convert tv_usec to a positive value */ 606 if (__predict_true(tv->tv_usec < 0)) { 607 tv->tv_usec += 1000000; 608 tv->tv_sec -= 1; 609 } 610 /* check for tv_usec overflow */ 611 } else if (__predict_false(tv->tv_usec >= 1000000)) { 612 tv->tv_sec += tv->tv_usec / 1000000; 613 tv->tv_usec = tv->tv_usec % 1000000; 614 } 615 616 /* check for tv_sec underflow */ 617 if (__predict_false(tv->tv_sec < 0)) 618 return (1); 619 /* check for tv_sec overflow (including room for the tv_usec part) */ 620 else if (__predict_false(tv->tv_sec >= tick_seconds_max)) 621 return (INT_MAX); 622 623 /* cast to "int" to avoid platform differences */ 624 retval = TIME_TO_TICKS((int)tv->tv_sec, (int)tv->tv_usec, hz); 625 626 /* add one additional tick */ 627 return (retval + 1); 628 } 629 630 /* 631 * Start profiling on a process. 632 * 633 * Kernel profiling passes proc0 which never exits and hence 634 * keeps the profile clock running constantly. 635 */ 636 void 637 startprofclock(struct proc *p) 638 { 639 640 PROC_LOCK_ASSERT(p, MA_OWNED); 641 if (p->p_flag & P_STOPPROF) 642 return; 643 if ((p->p_flag & P_PROFIL) == 0) { 644 p->p_flag |= P_PROFIL; 645 mtx_lock(&time_lock); 646 if (++profprocs == 1) 647 cpu_startprofclock(); 648 mtx_unlock(&time_lock); 649 } 650 } 651 652 /* 653 * Stop profiling on a process. 654 */ 655 void 656 stopprofclock(struct proc *p) 657 { 658 659 PROC_LOCK_ASSERT(p, MA_OWNED); 660 if (p->p_flag & P_PROFIL) { 661 if (p->p_profthreads != 0) { 662 while (p->p_profthreads != 0) { 663 p->p_flag |= P_STOPPROF; 664 msleep(&p->p_profthreads, &p->p_mtx, PPAUSE, 665 "stopprof", 0); 666 } 667 } 668 if ((p->p_flag & P_PROFIL) == 0) 669 return; 670 p->p_flag &= ~P_PROFIL; 671 mtx_lock(&time_lock); 672 if (--profprocs == 0) 673 cpu_stopprofclock(); 674 mtx_unlock(&time_lock); 675 } 676 } 677 678 /* 679 * Statistics clock. Updates rusage information and calls the scheduler 680 * to adjust priorities of the active thread. 681 * 682 * This should be called by all active processors. 683 */ 684 void 685 statclock(int cnt, int usermode) 686 { 687 struct rusage *ru; 688 struct vmspace *vm; 689 struct thread *td; 690 struct proc *p; 691 long rss; 692 long *cp_time; 693 uint64_t runtime, new_switchtime; 694 695 td = curthread; 696 p = td->td_proc; 697 698 cp_time = (long *)PCPU_PTR(cp_time); 699 if (usermode) { 700 /* 701 * Charge the time as appropriate. 702 */ 703 td->td_uticks += cnt; 704 if (p->p_nice > NZERO) 705 cp_time[CP_NICE] += cnt; 706 else 707 cp_time[CP_USER] += cnt; 708 } else { 709 /* 710 * Came from kernel mode, so we were: 711 * - handling an interrupt, 712 * - doing syscall or trap work on behalf of the current 713 * user process, or 714 * - spinning in the idle loop. 715 * Whichever it is, charge the time as appropriate. 716 * Note that we charge interrupts to the current process, 717 * regardless of whether they are ``for'' that process, 718 * so that we know how much of its real time was spent 719 * in ``non-process'' (i.e., interrupt) work. 720 */ 721 if ((td->td_pflags & TDP_ITHREAD) || 722 td->td_intr_nesting_level >= 2) { 723 td->td_iticks += cnt; 724 cp_time[CP_INTR] += cnt; 725 } else { 726 td->td_pticks += cnt; 727 td->td_sticks += cnt; 728 if (!TD_IS_IDLETHREAD(td)) 729 cp_time[CP_SYS] += cnt; 730 else 731 cp_time[CP_IDLE] += cnt; 732 } 733 } 734 735 /* Update resource usage integrals and maximums. */ 736 MPASS(p->p_vmspace != NULL); 737 vm = p->p_vmspace; 738 ru = &td->td_ru; 739 ru->ru_ixrss += pgtok(vm->vm_tsize) * cnt; 740 ru->ru_idrss += pgtok(vm->vm_dsize) * cnt; 741 ru->ru_isrss += pgtok(vm->vm_ssize) * cnt; 742 rss = pgtok(vmspace_resident_count(vm)); 743 if (ru->ru_maxrss < rss) 744 ru->ru_maxrss = rss; 745 KTR_POINT2(KTR_SCHED, "thread", sched_tdname(td), "statclock", 746 "prio:%d", td->td_priority, "stathz:%d", (stathz)?stathz:hz); 747 SDT_PROBE2(sched, , , tick, td, td->td_proc); 748 thread_lock_flags(td, MTX_QUIET); 749 750 /* 751 * Compute the amount of time during which the current 752 * thread was running, and add that to its total so far. 753 */ 754 new_switchtime = cpu_ticks(); 755 runtime = new_switchtime - PCPU_GET(switchtime); 756 td->td_runtime += runtime; 757 td->td_incruntime += runtime; 758 PCPU_SET(switchtime, new_switchtime); 759 760 sched_clock(td, cnt); 761 thread_unlock(td); 762 #ifdef HWPMC_HOOKS 763 if (td->td_intr_frame != NULL) 764 PMC_SOFT_CALL_TF( , , clock, stat, td->td_intr_frame); 765 #endif 766 } 767 768 void 769 profclock(int cnt, int usermode, uintfptr_t pc) 770 { 771 struct thread *td; 772 773 td = curthread; 774 if (usermode) { 775 /* 776 * Came from user mode; CPU was in user state. 777 * If this process is being profiled, record the tick. 778 * if there is no related user location yet, don't 779 * bother trying to count it. 780 */ 781 if (td->td_proc->p_flag & P_PROFIL) 782 addupc_intr(td, pc, cnt); 783 } 784 #ifdef HWPMC_HOOKS 785 if (td->td_intr_frame != NULL) 786 PMC_SOFT_CALL_TF( , , clock, prof, td->td_intr_frame); 787 #endif 788 } 789 790 /* 791 * Return information about system clocks. 792 */ 793 static int 794 sysctl_kern_clockrate(SYSCTL_HANDLER_ARGS) 795 { 796 struct clockinfo clkinfo; 797 /* 798 * Construct clockinfo structure. 799 */ 800 bzero(&clkinfo, sizeof(clkinfo)); 801 clkinfo.hz = hz; 802 clkinfo.tick = tick; 803 clkinfo.profhz = profhz; 804 clkinfo.stathz = stathz ? stathz : hz; 805 return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req)); 806 } 807 808 SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, 809 CTLTYPE_STRUCT|CTLFLAG_RD|CTLFLAG_MPSAFE, 810 0, 0, sysctl_kern_clockrate, "S,clockinfo", 811 "Rate and period of various kernel clocks"); 812 813 static void 814 watchdog_config(void *unused __unused, u_int cmd, int *error) 815 { 816 u_int u; 817 818 u = cmd & WD_INTERVAL; 819 if (u >= WD_TO_1SEC) { 820 watchdog_ticks = (1 << (u - WD_TO_1SEC)) * hz; 821 watchdog_enabled = 1; 822 *error = 0; 823 } else { 824 watchdog_enabled = 0; 825 } 826 } 827 828 /* 829 * Handle a watchdog timeout by dropping to DDB or panicking. 830 */ 831 static void 832 watchdog_fire(void) 833 { 834 835 #if defined(KDB) && !defined(KDB_UNATTENDED) 836 kdb_backtrace(); 837 kdb_enter(KDB_WHY_WATCHDOG, "watchdog timeout"); 838 #else 839 panic("watchdog timeout"); 840 #endif 841 } 842