1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1982, 1986, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #include <sys/cdefs.h> 38 #include "opt_kdb.h" 39 #include "opt_device_polling.h" 40 #include "opt_hwpmc_hooks.h" 41 #include "opt_ntp.h" 42 #include "opt_watchdog.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/callout.h> 47 #include <sys/epoch.h> 48 #include <sys/eventhandler.h> 49 #include <sys/gtaskqueue.h> 50 #include <sys/kdb.h> 51 #include <sys/kernel.h> 52 #include <sys/kthread.h> 53 #include <sys/ktr.h> 54 #include <sys/lock.h> 55 #include <sys/mutex.h> 56 #include <sys/proc.h> 57 #include <sys/resource.h> 58 #include <sys/resourcevar.h> 59 #include <sys/sched.h> 60 #include <sys/sdt.h> 61 #include <sys/signalvar.h> 62 #include <sys/sleepqueue.h> 63 #include <sys/smp.h> 64 #include <vm/vm.h> 65 #include <vm/pmap.h> 66 #include <vm/vm_map.h> 67 #include <sys/sysctl.h> 68 #include <sys/bus.h> 69 #include <sys/interrupt.h> 70 #include <sys/limits.h> 71 #include <sys/timetc.h> 72 73 #ifdef HWPMC_HOOKS 74 #include <sys/pmckern.h> 75 PMC_SOFT_DEFINE( , , clock, hard); 76 PMC_SOFT_DEFINE( , , clock, stat); 77 PMC_SOFT_DEFINE_EX( , , clock, prof, \ 78 cpu_startprofclock, cpu_stopprofclock); 79 #endif 80 81 #ifdef DEVICE_POLLING 82 extern void hardclock_device_poll(void); 83 #endif /* DEVICE_POLLING */ 84 85 /* Spin-lock protecting profiling statistics. */ 86 static struct mtx time_lock; 87 88 SDT_PROVIDER_DECLARE(sched); 89 SDT_PROBE_DEFINE2(sched, , , tick, "struct thread *", "struct proc *"); 90 91 static int 92 sysctl_kern_cp_time(SYSCTL_HANDLER_ARGS) 93 { 94 int error; 95 long cp_time[CPUSTATES]; 96 #ifdef SCTL_MASK32 97 int i; 98 unsigned int cp_time32[CPUSTATES]; 99 #endif 100 101 read_cpu_time(cp_time); 102 #ifdef SCTL_MASK32 103 if (req->flags & SCTL_MASK32) { 104 if (!req->oldptr) 105 return SYSCTL_OUT(req, 0, sizeof(cp_time32)); 106 for (i = 0; i < CPUSTATES; i++) 107 cp_time32[i] = (unsigned int)cp_time[i]; 108 error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32)); 109 } else 110 #endif 111 { 112 if (!req->oldptr) 113 return SYSCTL_OUT(req, 0, sizeof(cp_time)); 114 error = SYSCTL_OUT(req, cp_time, sizeof(cp_time)); 115 } 116 return error; 117 } 118 119 SYSCTL_PROC(_kern, OID_AUTO, cp_time, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE, 120 0,0, sysctl_kern_cp_time, "LU", "CPU time statistics"); 121 122 static long empty[CPUSTATES]; 123 124 static int 125 sysctl_kern_cp_times(SYSCTL_HANDLER_ARGS) 126 { 127 struct pcpu *pcpu; 128 int error; 129 int c; 130 long *cp_time; 131 #ifdef SCTL_MASK32 132 unsigned int cp_time32[CPUSTATES]; 133 int i; 134 #endif 135 136 if (!req->oldptr) { 137 #ifdef SCTL_MASK32 138 if (req->flags & SCTL_MASK32) 139 return SYSCTL_OUT(req, 0, sizeof(cp_time32) * (mp_maxid + 1)); 140 else 141 #endif 142 return SYSCTL_OUT(req, 0, sizeof(long) * CPUSTATES * (mp_maxid + 1)); 143 } 144 for (error = 0, c = 0; error == 0 && c <= mp_maxid; c++) { 145 if (!CPU_ABSENT(c)) { 146 pcpu = pcpu_find(c); 147 cp_time = pcpu->pc_cp_time; 148 } else { 149 cp_time = empty; 150 } 151 #ifdef SCTL_MASK32 152 if (req->flags & SCTL_MASK32) { 153 for (i = 0; i < CPUSTATES; i++) 154 cp_time32[i] = (unsigned int)cp_time[i]; 155 error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32)); 156 } else 157 #endif 158 error = SYSCTL_OUT(req, cp_time, sizeof(long) * CPUSTATES); 159 } 160 return error; 161 } 162 163 SYSCTL_PROC(_kern, OID_AUTO, cp_times, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE, 164 0,0, sysctl_kern_cp_times, "LU", "per-CPU time statistics"); 165 166 #ifdef DEADLKRES 167 static const char *blessed[] = { 168 "getblk", 169 "so_snd_sx", 170 "so_rcv_sx", 171 NULL 172 }; 173 static int slptime_threshold = 1800; 174 static int blktime_threshold = 900; 175 static int sleepfreq = 3; 176 177 static void 178 deadlres_td_on_lock(struct proc *p, struct thread *td, int blkticks) 179 { 180 int tticks; 181 182 sx_assert(&allproc_lock, SX_LOCKED); 183 PROC_LOCK_ASSERT(p, MA_OWNED); 184 THREAD_LOCK_ASSERT(td, MA_OWNED); 185 /* 186 * The thread should be blocked on a turnstile, simply check 187 * if the turnstile channel is in good state. 188 */ 189 MPASS(td->td_blocked != NULL); 190 191 tticks = ticks - td->td_blktick; 192 if (tticks > blkticks) 193 /* 194 * Accordingly with provided thresholds, this thread is stuck 195 * for too long on a turnstile. 196 */ 197 panic("%s: possible deadlock detected for %p (%s), " 198 "blocked for %d ticks\n", __func__, 199 td, sched_tdname(td), tticks); 200 } 201 202 static void 203 deadlres_td_sleep_q(struct proc *p, struct thread *td, int slpticks) 204 { 205 const void *wchan; 206 int i, slptype, tticks; 207 208 sx_assert(&allproc_lock, SX_LOCKED); 209 PROC_LOCK_ASSERT(p, MA_OWNED); 210 THREAD_LOCK_ASSERT(td, MA_OWNED); 211 /* 212 * Check if the thread is sleeping on a lock, otherwise skip the check. 213 * Drop the thread lock in order to avoid a LOR with the sleepqueue 214 * spinlock. 215 */ 216 wchan = td->td_wchan; 217 tticks = ticks - td->td_slptick; 218 slptype = sleepq_type(wchan); 219 if ((slptype == SLEEPQ_SX || slptype == SLEEPQ_LK) && 220 tticks > slpticks) { 221 /* 222 * Accordingly with provided thresholds, this thread is stuck 223 * for too long on a sleepqueue. 224 * However, being on a sleepqueue, we might still check for the 225 * blessed list. 226 */ 227 for (i = 0; blessed[i] != NULL; i++) 228 if (!strcmp(blessed[i], td->td_wmesg)) 229 return; 230 231 panic("%s: possible deadlock detected for %p (%s), " 232 "blocked for %d ticks\n", __func__, 233 td, sched_tdname(td), tticks); 234 } 235 } 236 237 static void 238 deadlkres(void) 239 { 240 struct proc *p; 241 struct thread *td; 242 int blkticks, slpticks, tryl; 243 244 tryl = 0; 245 for (;;) { 246 blkticks = blktime_threshold * hz; 247 slpticks = slptime_threshold * hz; 248 249 /* 250 * Avoid to sleep on the sx_lock in order to avoid a 251 * possible priority inversion problem leading to 252 * starvation. 253 * If the lock can't be held after 100 tries, panic. 254 */ 255 if (!sx_try_slock(&allproc_lock)) { 256 if (tryl > 100) 257 panic("%s: possible deadlock detected " 258 "on allproc_lock\n", __func__); 259 tryl++; 260 pause("allproc", sleepfreq * hz); 261 continue; 262 } 263 tryl = 0; 264 FOREACH_PROC_IN_SYSTEM(p) { 265 PROC_LOCK(p); 266 if (p->p_state == PRS_NEW) { 267 PROC_UNLOCK(p); 268 continue; 269 } 270 FOREACH_THREAD_IN_PROC(p, td) { 271 thread_lock(td); 272 if (TD_ON_LOCK(td)) 273 deadlres_td_on_lock(p, td, 274 blkticks); 275 else if (TD_IS_SLEEPING(td)) 276 deadlres_td_sleep_q(p, td, 277 slpticks); 278 thread_unlock(td); 279 } 280 PROC_UNLOCK(p); 281 } 282 sx_sunlock(&allproc_lock); 283 284 /* Sleep for sleepfreq seconds. */ 285 pause("-", sleepfreq * hz); 286 } 287 } 288 289 static struct kthread_desc deadlkres_kd = { 290 "deadlkres", 291 deadlkres, 292 (struct thread **)NULL 293 }; 294 295 SYSINIT(deadlkres, SI_SUB_CLOCKS, SI_ORDER_ANY, kthread_start, &deadlkres_kd); 296 297 static SYSCTL_NODE(_debug, OID_AUTO, deadlkres, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 298 "Deadlock resolver"); 299 SYSCTL_INT(_debug_deadlkres, OID_AUTO, slptime_threshold, CTLFLAG_RWTUN, 300 &slptime_threshold, 0, 301 "Number of seconds within is valid to sleep on a sleepqueue"); 302 SYSCTL_INT(_debug_deadlkres, OID_AUTO, blktime_threshold, CTLFLAG_RWTUN, 303 &blktime_threshold, 0, 304 "Number of seconds within is valid to block on a turnstile"); 305 SYSCTL_INT(_debug_deadlkres, OID_AUTO, sleepfreq, CTLFLAG_RWTUN, &sleepfreq, 0, 306 "Number of seconds between any deadlock resolver thread run"); 307 #endif /* DEADLKRES */ 308 309 void 310 read_cpu_time(long *cp_time) 311 { 312 struct pcpu *pc; 313 int i, j; 314 315 /* Sum up global cp_time[]. */ 316 bzero(cp_time, sizeof(long) * CPUSTATES); 317 CPU_FOREACH(i) { 318 pc = pcpu_find(i); 319 for (j = 0; j < CPUSTATES; j++) 320 cp_time[j] += pc->pc_cp_time[j]; 321 } 322 } 323 324 #include <sys/watchdog.h> 325 326 static int watchdog_ticks; 327 static int watchdog_enabled; 328 static void watchdog_fire(void); 329 static void watchdog_config(void *, u_int, int *); 330 331 static void 332 watchdog_attach(void) 333 { 334 EVENTHANDLER_REGISTER(watchdog_list, watchdog_config, NULL, 0); 335 } 336 337 /* 338 * Clock handling routines. 339 * 340 * This code is written to operate with two timers that run independently of 341 * each other. 342 * 343 * The main timer, running hz times per second, is used to trigger interval 344 * timers, timeouts and rescheduling as needed. 345 * 346 * The second timer handles kernel and user profiling, 347 * and does resource use estimation. If the second timer is programmable, 348 * it is randomized to avoid aliasing between the two clocks. For example, 349 * the randomization prevents an adversary from always giving up the cpu 350 * just before its quantum expires. Otherwise, it would never accumulate 351 * cpu ticks. The mean frequency of the second timer is stathz. 352 * 353 * If no second timer exists, stathz will be zero; in this case we drive 354 * profiling and statistics off the main clock. This WILL NOT be accurate; 355 * do not do it unless absolutely necessary. 356 * 357 * The statistics clock may (or may not) be run at a higher rate while 358 * profiling. This profile clock runs at profhz. We require that profhz 359 * be an integral multiple of stathz. 360 * 361 * If the statistics clock is running fast, it must be divided by the ratio 362 * profhz/stathz for statistics. (For profiling, every tick counts.) 363 * 364 * Time-of-day is maintained using a "timecounter", which may or may 365 * not be related to the hardware generating the above mentioned 366 * interrupts. 367 */ 368 369 int stathz; 370 int profhz; 371 int profprocs; 372 volatile int ticks; 373 int psratio; 374 375 DPCPU_DEFINE_STATIC(int, pcputicks); /* Per-CPU version of ticks. */ 376 #ifdef DEVICE_POLLING 377 static int devpoll_run = 0; 378 #endif 379 380 static void 381 ast_oweupc(struct thread *td, int tda __unused) 382 { 383 if ((td->td_proc->p_flag & P_PROFIL) == 0) 384 return; 385 addupc_task(td, td->td_profil_addr, td->td_profil_ticks); 386 td->td_profil_ticks = 0; 387 td->td_pflags &= ~TDP_OWEUPC; 388 } 389 390 static void 391 ast_alrm(struct thread *td, int tda __unused) 392 { 393 struct proc *p; 394 395 p = td->td_proc; 396 PROC_LOCK(p); 397 kern_psignal(p, SIGVTALRM); 398 PROC_UNLOCK(p); 399 } 400 401 static void 402 ast_prof(struct thread *td, int tda __unused) 403 { 404 struct proc *p; 405 406 p = td->td_proc; 407 PROC_LOCK(p); 408 kern_psignal(p, SIGPROF); 409 PROC_UNLOCK(p); 410 } 411 412 /* 413 * Initialize clock frequencies and start both clocks running. 414 */ 415 static void 416 initclocks(void *dummy __unused) 417 { 418 int i; 419 420 /* 421 * Set divisors to 1 (normal case) and let the machine-specific 422 * code do its bit. 423 */ 424 mtx_init(&time_lock, "time lock", NULL, MTX_DEF); 425 cpu_initclocks(); 426 427 /* 428 * Compute profhz/stathz, and fix profhz if needed. 429 */ 430 i = stathz ? stathz : hz; 431 if (profhz == 0) 432 profhz = i; 433 psratio = profhz / i; 434 435 ast_register(TDA_OWEUPC, ASTR_ASTF_REQUIRED, 0, ast_oweupc); 436 ast_register(TDA_ALRM, ASTR_ASTF_REQUIRED, 0, ast_alrm); 437 ast_register(TDA_PROF, ASTR_ASTF_REQUIRED, 0, ast_prof); 438 439 #ifdef SW_WATCHDOG 440 /* Enable hardclock watchdog now, even if a hardware watchdog exists. */ 441 watchdog_attach(); 442 #else 443 /* Volunteer to run a software watchdog. */ 444 if (wdog_software_attach == NULL) 445 wdog_software_attach = watchdog_attach; 446 #endif 447 } 448 SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL); 449 450 static __noinline void 451 hardclock_itimer(struct thread *td, struct pstats *pstats, int cnt, int usermode) 452 { 453 struct proc *p; 454 int ast; 455 456 ast = 0; 457 p = td->td_proc; 458 if (usermode && 459 timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value)) { 460 PROC_ITIMLOCK(p); 461 if (itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], 462 tick * cnt) == 0) 463 ast |= TDAI(TDA_ALRM); 464 PROC_ITIMUNLOCK(p); 465 } 466 if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value)) { 467 PROC_ITIMLOCK(p); 468 if (itimerdecr(&pstats->p_timer[ITIMER_PROF], 469 tick * cnt) == 0) 470 ast |= TDAI(TDA_PROF); 471 PROC_ITIMUNLOCK(p); 472 } 473 if (ast != 0) 474 ast_sched_mask(td, ast); 475 } 476 477 void 478 hardclock(int cnt, int usermode) 479 { 480 struct pstats *pstats; 481 struct thread *td = curthread; 482 struct proc *p = td->td_proc; 483 int *t = DPCPU_PTR(pcputicks); 484 int global, i, newticks; 485 486 /* 487 * Update per-CPU and possibly global ticks values. 488 */ 489 *t += cnt; 490 global = ticks; 491 do { 492 newticks = *t - global; 493 if (newticks <= 0) { 494 if (newticks < -1) 495 *t = global - 1; 496 newticks = 0; 497 break; 498 } 499 } while (!atomic_fcmpset_int(&ticks, &global, *t)); 500 501 /* 502 * Run current process's virtual and profile time, as needed. 503 */ 504 pstats = p->p_stats; 505 if (__predict_false( 506 timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) || 507 timevalisset(&pstats->p_timer[ITIMER_PROF].it_value))) 508 hardclock_itimer(td, pstats, cnt, usermode); 509 510 #ifdef HWPMC_HOOKS 511 if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid))) 512 PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL); 513 if (td->td_intr_frame != NULL) 514 PMC_SOFT_CALL_TF( , , clock, hard, td->td_intr_frame); 515 #endif 516 /* We are in charge to handle this tick duty. */ 517 if (newticks > 0) { 518 tc_ticktock(newticks); 519 #ifdef DEVICE_POLLING 520 /* Dangerous and no need to call these things concurrently. */ 521 if (atomic_cmpset_acq_int(&devpoll_run, 0, 1)) { 522 /* This is very short and quick. */ 523 hardclock_device_poll(); 524 atomic_store_rel_int(&devpoll_run, 0); 525 } 526 #endif /* DEVICE_POLLING */ 527 if (watchdog_enabled > 0) { 528 i = atomic_fetchadd_int(&watchdog_ticks, -newticks); 529 if (i > 0 && i <= newticks) 530 watchdog_fire(); 531 } 532 intr_event_handle(clk_intr_event, NULL); 533 } 534 if (curcpu == CPU_FIRST()) 535 cpu_tick_calibration(); 536 if (__predict_false(DPCPU_GET(epoch_cb_count))) 537 GROUPTASK_ENQUEUE(DPCPU_PTR(epoch_cb_task)); 538 } 539 540 void 541 hardclock_sync(int cpu) 542 { 543 int *t; 544 KASSERT(!CPU_ABSENT(cpu), ("Absent CPU %d", cpu)); 545 t = DPCPU_ID_PTR(cpu, pcputicks); 546 547 *t = ticks; 548 } 549 550 /* 551 * Regular integer scaling formula without losing precision: 552 */ 553 #define TIME_INT_SCALE(value, mul, div) \ 554 (((value) / (div)) * (mul) + (((value) % (div)) * (mul)) / (div)) 555 556 /* 557 * Macro for converting seconds and microseconds into actual ticks, 558 * based on the given hz value: 559 */ 560 #define TIME_TO_TICKS(sec, usec, hz) \ 561 ((sec) * (hz) + TIME_INT_SCALE(usec, hz, 1 << 6) / (1000000 >> 6)) 562 563 #define TIME_ASSERT_VALID_HZ(hz) \ 564 _Static_assert(TIME_TO_TICKS(INT_MAX / (hz) - 1, 999999, hz) >= 0 && \ 565 TIME_TO_TICKS(INT_MAX / (hz) - 1, 999999, hz) < INT_MAX, \ 566 "tvtohz() can overflow the regular integer type") 567 568 /* 569 * Compile time assert the maximum and minimum values to fit into a 570 * regular integer when computing TIME_TO_TICKS(): 571 */ 572 TIME_ASSERT_VALID_HZ(HZ_MAXIMUM); 573 TIME_ASSERT_VALID_HZ(HZ_MINIMUM); 574 575 /* 576 * The formula is mostly linear, but test some more common values just 577 * in case: 578 */ 579 TIME_ASSERT_VALID_HZ(1024); 580 TIME_ASSERT_VALID_HZ(1000); 581 TIME_ASSERT_VALID_HZ(128); 582 TIME_ASSERT_VALID_HZ(100); 583 584 /* 585 * Compute number of ticks representing the specified amount of time. 586 * If the specified time is negative, a value of 1 is returned. This 587 * function returns a value from 1 up to and including INT_MAX. 588 */ 589 int 590 tvtohz(struct timeval *tv) 591 { 592 int retval; 593 594 /* 595 * The values passed here may come from user-space and these 596 * checks ensure "tv_usec" is within its allowed range: 597 */ 598 599 /* check for tv_usec underflow */ 600 if (__predict_false(tv->tv_usec < 0)) { 601 tv->tv_sec += tv->tv_usec / 1000000; 602 tv->tv_usec = tv->tv_usec % 1000000; 603 /* convert tv_usec to a positive value */ 604 if (__predict_true(tv->tv_usec < 0)) { 605 tv->tv_usec += 1000000; 606 tv->tv_sec -= 1; 607 } 608 /* check for tv_usec overflow */ 609 } else if (__predict_false(tv->tv_usec >= 1000000)) { 610 tv->tv_sec += tv->tv_usec / 1000000; 611 tv->tv_usec = tv->tv_usec % 1000000; 612 } 613 614 /* check for tv_sec underflow */ 615 if (__predict_false(tv->tv_sec < 0)) 616 return (1); 617 /* check for tv_sec overflow (including room for the tv_usec part) */ 618 else if (__predict_false(tv->tv_sec >= tick_seconds_max)) 619 return (INT_MAX); 620 621 /* cast to "int" to avoid platform differences */ 622 retval = TIME_TO_TICKS((int)tv->tv_sec, (int)tv->tv_usec, hz); 623 624 /* add one additional tick */ 625 return (retval + 1); 626 } 627 628 /* 629 * Start profiling on a process. 630 * 631 * Kernel profiling passes proc0 which never exits and hence 632 * keeps the profile clock running constantly. 633 */ 634 void 635 startprofclock(struct proc *p) 636 { 637 638 PROC_LOCK_ASSERT(p, MA_OWNED); 639 if (p->p_flag & P_STOPPROF) 640 return; 641 if ((p->p_flag & P_PROFIL) == 0) { 642 p->p_flag |= P_PROFIL; 643 mtx_lock(&time_lock); 644 if (++profprocs == 1) 645 cpu_startprofclock(); 646 mtx_unlock(&time_lock); 647 } 648 } 649 650 /* 651 * Stop profiling on a process. 652 */ 653 void 654 stopprofclock(struct proc *p) 655 { 656 657 PROC_LOCK_ASSERT(p, MA_OWNED); 658 if (p->p_flag & P_PROFIL) { 659 if (p->p_profthreads != 0) { 660 while (p->p_profthreads != 0) { 661 p->p_flag |= P_STOPPROF; 662 msleep(&p->p_profthreads, &p->p_mtx, PPAUSE, 663 "stopprof", 0); 664 } 665 } 666 if ((p->p_flag & P_PROFIL) == 0) 667 return; 668 p->p_flag &= ~P_PROFIL; 669 mtx_lock(&time_lock); 670 if (--profprocs == 0) 671 cpu_stopprofclock(); 672 mtx_unlock(&time_lock); 673 } 674 } 675 676 /* 677 * Statistics clock. Updates rusage information and calls the scheduler 678 * to adjust priorities of the active thread. 679 * 680 * This should be called by all active processors. 681 */ 682 void 683 statclock(int cnt, int usermode) 684 { 685 struct rusage *ru; 686 struct vmspace *vm; 687 struct thread *td; 688 struct proc *p; 689 long rss; 690 long *cp_time; 691 uint64_t runtime, new_switchtime; 692 693 td = curthread; 694 p = td->td_proc; 695 696 cp_time = (long *)PCPU_PTR(cp_time); 697 if (usermode) { 698 /* 699 * Charge the time as appropriate. 700 */ 701 td->td_uticks += cnt; 702 if (p->p_nice > NZERO) 703 cp_time[CP_NICE] += cnt; 704 else 705 cp_time[CP_USER] += cnt; 706 } else { 707 /* 708 * Came from kernel mode, so we were: 709 * - handling an interrupt, 710 * - doing syscall or trap work on behalf of the current 711 * user process, or 712 * - spinning in the idle loop. 713 * Whichever it is, charge the time as appropriate. 714 * Note that we charge interrupts to the current process, 715 * regardless of whether they are ``for'' that process, 716 * so that we know how much of its real time was spent 717 * in ``non-process'' (i.e., interrupt) work. 718 */ 719 if ((td->td_pflags & TDP_ITHREAD) || 720 td->td_intr_nesting_level >= 2) { 721 td->td_iticks += cnt; 722 cp_time[CP_INTR] += cnt; 723 } else { 724 td->td_pticks += cnt; 725 td->td_sticks += cnt; 726 if (!TD_IS_IDLETHREAD(td)) 727 cp_time[CP_SYS] += cnt; 728 else 729 cp_time[CP_IDLE] += cnt; 730 } 731 } 732 733 /* Update resource usage integrals and maximums. */ 734 MPASS(p->p_vmspace != NULL); 735 vm = p->p_vmspace; 736 ru = &td->td_ru; 737 ru->ru_ixrss += pgtok(vm->vm_tsize) * cnt; 738 ru->ru_idrss += pgtok(vm->vm_dsize) * cnt; 739 ru->ru_isrss += pgtok(vm->vm_ssize) * cnt; 740 rss = pgtok(vmspace_resident_count(vm)); 741 if (ru->ru_maxrss < rss) 742 ru->ru_maxrss = rss; 743 KTR_POINT2(KTR_SCHED, "thread", sched_tdname(td), "statclock", 744 "prio:%d", td->td_priority, "stathz:%d", (stathz)?stathz:hz); 745 SDT_PROBE2(sched, , , tick, td, td->td_proc); 746 thread_lock_flags(td, MTX_QUIET); 747 748 /* 749 * Compute the amount of time during which the current 750 * thread was running, and add that to its total so far. 751 */ 752 new_switchtime = cpu_ticks(); 753 runtime = new_switchtime - PCPU_GET(switchtime); 754 td->td_runtime += runtime; 755 td->td_incruntime += runtime; 756 PCPU_SET(switchtime, new_switchtime); 757 758 sched_clock(td, cnt); 759 thread_unlock(td); 760 #ifdef HWPMC_HOOKS 761 if (td->td_intr_frame != NULL) 762 PMC_SOFT_CALL_TF( , , clock, stat, td->td_intr_frame); 763 #endif 764 } 765 766 void 767 profclock(int cnt, int usermode, uintfptr_t pc) 768 { 769 struct thread *td; 770 771 td = curthread; 772 if (usermode) { 773 /* 774 * Came from user mode; CPU was in user state. 775 * If this process is being profiled, record the tick. 776 * if there is no related user location yet, don't 777 * bother trying to count it. 778 */ 779 if (td->td_proc->p_flag & P_PROFIL) 780 addupc_intr(td, pc, cnt); 781 } 782 #ifdef HWPMC_HOOKS 783 if (td->td_intr_frame != NULL) 784 PMC_SOFT_CALL_TF( , , clock, prof, td->td_intr_frame); 785 #endif 786 } 787 788 /* 789 * Return information about system clocks. 790 */ 791 static int 792 sysctl_kern_clockrate(SYSCTL_HANDLER_ARGS) 793 { 794 struct clockinfo clkinfo; 795 /* 796 * Construct clockinfo structure. 797 */ 798 bzero(&clkinfo, sizeof(clkinfo)); 799 clkinfo.hz = hz; 800 clkinfo.tick = tick; 801 clkinfo.profhz = profhz; 802 clkinfo.stathz = stathz ? stathz : hz; 803 return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req)); 804 } 805 806 SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, 807 CTLTYPE_STRUCT|CTLFLAG_RD|CTLFLAG_MPSAFE, 808 0, 0, sysctl_kern_clockrate, "S,clockinfo", 809 "Rate and period of various kernel clocks"); 810 811 static void 812 watchdog_config(void *unused __unused, u_int cmd, int *error) 813 { 814 u_int u; 815 816 u = cmd & WD_INTERVAL; 817 if (u >= WD_TO_1SEC) { 818 watchdog_ticks = (1 << (u - WD_TO_1SEC)) * hz; 819 watchdog_enabled = 1; 820 *error = 0; 821 } else { 822 watchdog_enabled = 0; 823 } 824 } 825 826 /* 827 * Handle a watchdog timeout by dropping to DDB or panicking. 828 */ 829 static void 830 watchdog_fire(void) 831 { 832 833 #if defined(KDB) && !defined(KDB_UNATTENDED) 834 kdb_backtrace(); 835 kdb_enter(KDB_WHY_WATCHDOG, "watchdog timeout"); 836 #else 837 panic("watchdog timeout"); 838 #endif 839 } 840