1 /* 2 * Detect hard and soft lockups on a system 3 * 4 * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc. 5 * 6 * Note: Most of this code is borrowed heavily from the original softlockup 7 * detector, so thanks to Ingo for the initial implementation. 8 * Some chunks also taken from the old x86-specific nmi watchdog code, thanks 9 * to those contributors as well. 10 */ 11 12 #define pr_fmt(fmt) "NMI watchdog: " fmt 13 14 #include <linux/mm.h> 15 #include <linux/cpu.h> 16 #include <linux/nmi.h> 17 #include <linux/init.h> 18 #include <linux/module.h> 19 #include <linux/sysctl.h> 20 #include <linux/smpboot.h> 21 #include <linux/sched/rt.h> 22 #include <linux/tick.h> 23 24 #include <asm/irq_regs.h> 25 #include <linux/kvm_para.h> 26 #include <linux/perf_event.h> 27 #include <linux/kthread.h> 28 29 /* 30 * The run state of the lockup detectors is controlled by the content of the 31 * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit - 32 * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector. 33 * 34 * 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled' 35 * are variables that are only used as an 'interface' between the parameters 36 * in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The 37 * 'watchdog_thresh' variable is handled differently because its value is not 38 * boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh' 39 * is equal zero. 40 */ 41 #define NMI_WATCHDOG_ENABLED_BIT 0 42 #define SOFT_WATCHDOG_ENABLED_BIT 1 43 #define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT) 44 #define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT) 45 46 static DEFINE_MUTEX(watchdog_proc_mutex); 47 48 #ifdef CONFIG_HARDLOCKUP_DETECTOR 49 static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED; 50 #else 51 static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED; 52 #endif 53 int __read_mostly nmi_watchdog_enabled; 54 int __read_mostly soft_watchdog_enabled; 55 int __read_mostly watchdog_user_enabled; 56 int __read_mostly watchdog_thresh = 10; 57 58 #ifdef CONFIG_SMP 59 int __read_mostly sysctl_softlockup_all_cpu_backtrace; 60 #else 61 #define sysctl_softlockup_all_cpu_backtrace 0 62 #endif 63 static struct cpumask watchdog_cpumask __read_mostly; 64 unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); 65 66 /* Helper for online, unparked cpus. */ 67 #define for_each_watchdog_cpu(cpu) \ 68 for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask) 69 70 /* 71 * The 'watchdog_running' variable is set to 1 when the watchdog threads 72 * are registered/started and is set to 0 when the watchdog threads are 73 * unregistered/stopped, so it is an indicator whether the threads exist. 74 */ 75 static int __read_mostly watchdog_running; 76 /* 77 * If a subsystem has a need to deactivate the watchdog temporarily, it 78 * can use the suspend/resume interface to achieve this. The content of 79 * the 'watchdog_suspended' variable reflects this state. Existing threads 80 * are parked/unparked by the lockup_detector_{suspend|resume} functions 81 * (see comment blocks pertaining to those functions for further details). 82 * 83 * 'watchdog_suspended' also prevents threads from being registered/started 84 * or unregistered/stopped via parameters in /proc/sys/kernel, so the state 85 * of 'watchdog_running' cannot change while the watchdog is deactivated 86 * temporarily (see related code in 'proc' handlers). 87 */ 88 static int __read_mostly watchdog_suspended; 89 90 static u64 __read_mostly sample_period; 91 92 static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); 93 static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog); 94 static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer); 95 static DEFINE_PER_CPU(bool, softlockup_touch_sync); 96 static DEFINE_PER_CPU(bool, soft_watchdog_warn); 97 static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); 98 static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt); 99 static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved); 100 #ifdef CONFIG_HARDLOCKUP_DETECTOR 101 static DEFINE_PER_CPU(bool, hard_watchdog_warn); 102 static DEFINE_PER_CPU(bool, watchdog_nmi_touch); 103 static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); 104 static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); 105 #endif 106 static unsigned long soft_lockup_nmi_warn; 107 108 /* boot commands */ 109 /* 110 * Should we panic when a soft-lockup or hard-lockup occurs: 111 */ 112 #ifdef CONFIG_HARDLOCKUP_DETECTOR 113 static int hardlockup_panic = 114 CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE; 115 /* 116 * We may not want to enable hard lockup detection by default in all cases, 117 * for example when running the kernel as a guest on a hypervisor. In these 118 * cases this function can be called to disable hard lockup detection. This 119 * function should only be executed once by the boot processor before the 120 * kernel command line parameters are parsed, because otherwise it is not 121 * possible to override this in hardlockup_panic_setup(). 122 */ 123 void hardlockup_detector_disable(void) 124 { 125 watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; 126 } 127 128 static int __init hardlockup_panic_setup(char *str) 129 { 130 if (!strncmp(str, "panic", 5)) 131 hardlockup_panic = 1; 132 else if (!strncmp(str, "nopanic", 7)) 133 hardlockup_panic = 0; 134 else if (!strncmp(str, "0", 1)) 135 watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; 136 else if (!strncmp(str, "1", 1)) 137 watchdog_enabled |= NMI_WATCHDOG_ENABLED; 138 return 1; 139 } 140 __setup("nmi_watchdog=", hardlockup_panic_setup); 141 #endif 142 143 unsigned int __read_mostly softlockup_panic = 144 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; 145 146 static int __init softlockup_panic_setup(char *str) 147 { 148 softlockup_panic = simple_strtoul(str, NULL, 0); 149 150 return 1; 151 } 152 __setup("softlockup_panic=", softlockup_panic_setup); 153 154 static int __init nowatchdog_setup(char *str) 155 { 156 watchdog_enabled = 0; 157 return 1; 158 } 159 __setup("nowatchdog", nowatchdog_setup); 160 161 static int __init nosoftlockup_setup(char *str) 162 { 163 watchdog_enabled &= ~SOFT_WATCHDOG_ENABLED; 164 return 1; 165 } 166 __setup("nosoftlockup", nosoftlockup_setup); 167 168 #ifdef CONFIG_SMP 169 static int __init softlockup_all_cpu_backtrace_setup(char *str) 170 { 171 sysctl_softlockup_all_cpu_backtrace = 172 !!simple_strtol(str, NULL, 0); 173 return 1; 174 } 175 __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup); 176 #endif 177 178 /* 179 * Hard-lockup warnings should be triggered after just a few seconds. Soft- 180 * lockups can have false positives under extreme conditions. So we generally 181 * want a higher threshold for soft lockups than for hard lockups. So we couple 182 * the thresholds with a factor: we make the soft threshold twice the amount of 183 * time the hard threshold is. 184 */ 185 static int get_softlockup_thresh(void) 186 { 187 return watchdog_thresh * 2; 188 } 189 190 /* 191 * Returns seconds, approximately. We don't need nanosecond 192 * resolution, and we don't need to waste time with a big divide when 193 * 2^30ns == 1.074s. 194 */ 195 static unsigned long get_timestamp(void) 196 { 197 return running_clock() >> 30LL; /* 2^30 ~= 10^9 */ 198 } 199 200 static void set_sample_period(void) 201 { 202 /* 203 * convert watchdog_thresh from seconds to ns 204 * the divide by 5 is to give hrtimer several chances (two 205 * or three with the current relation between the soft 206 * and hard thresholds) to increment before the 207 * hardlockup detector generates a warning 208 */ 209 sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5); 210 } 211 212 /* Commands for resetting the watchdog */ 213 static void __touch_watchdog(void) 214 { 215 __this_cpu_write(watchdog_touch_ts, get_timestamp()); 216 } 217 218 void touch_softlockup_watchdog(void) 219 { 220 /* 221 * Preemption can be enabled. It doesn't matter which CPU's timestamp 222 * gets zeroed here, so use the raw_ operation. 223 */ 224 raw_cpu_write(watchdog_touch_ts, 0); 225 } 226 EXPORT_SYMBOL(touch_softlockup_watchdog); 227 228 void touch_all_softlockup_watchdogs(void) 229 { 230 int cpu; 231 232 /* 233 * this is done lockless 234 * do we care if a 0 races with a timestamp? 235 * all it means is the softlock check starts one cycle later 236 */ 237 for_each_watchdog_cpu(cpu) 238 per_cpu(watchdog_touch_ts, cpu) = 0; 239 } 240 241 #ifdef CONFIG_HARDLOCKUP_DETECTOR 242 void touch_nmi_watchdog(void) 243 { 244 /* 245 * Using __raw here because some code paths have 246 * preemption enabled. If preemption is enabled 247 * then interrupts should be enabled too, in which 248 * case we shouldn't have to worry about the watchdog 249 * going off. 250 */ 251 raw_cpu_write(watchdog_nmi_touch, true); 252 touch_softlockup_watchdog(); 253 } 254 EXPORT_SYMBOL(touch_nmi_watchdog); 255 256 #endif 257 258 void touch_softlockup_watchdog_sync(void) 259 { 260 __this_cpu_write(softlockup_touch_sync, true); 261 __this_cpu_write(watchdog_touch_ts, 0); 262 } 263 264 #ifdef CONFIG_HARDLOCKUP_DETECTOR 265 /* watchdog detector functions */ 266 static int is_hardlockup(void) 267 { 268 unsigned long hrint = __this_cpu_read(hrtimer_interrupts); 269 270 if (__this_cpu_read(hrtimer_interrupts_saved) == hrint) 271 return 1; 272 273 __this_cpu_write(hrtimer_interrupts_saved, hrint); 274 return 0; 275 } 276 #endif 277 278 static int is_softlockup(unsigned long touch_ts) 279 { 280 unsigned long now = get_timestamp(); 281 282 if (watchdog_enabled & SOFT_WATCHDOG_ENABLED) { 283 /* Warn about unreasonable delays. */ 284 if (time_after(now, touch_ts + get_softlockup_thresh())) 285 return now - touch_ts; 286 } 287 return 0; 288 } 289 290 #ifdef CONFIG_HARDLOCKUP_DETECTOR 291 292 static struct perf_event_attr wd_hw_attr = { 293 .type = PERF_TYPE_HARDWARE, 294 .config = PERF_COUNT_HW_CPU_CYCLES, 295 .size = sizeof(struct perf_event_attr), 296 .pinned = 1, 297 .disabled = 1, 298 }; 299 300 /* Callback function for perf event subsystem */ 301 static void watchdog_overflow_callback(struct perf_event *event, 302 struct perf_sample_data *data, 303 struct pt_regs *regs) 304 { 305 /* Ensure the watchdog never gets throttled */ 306 event->hw.interrupts = 0; 307 308 if (__this_cpu_read(watchdog_nmi_touch) == true) { 309 __this_cpu_write(watchdog_nmi_touch, false); 310 return; 311 } 312 313 /* check for a hardlockup 314 * This is done by making sure our timer interrupt 315 * is incrementing. The timer interrupt should have 316 * fired multiple times before we overflow'd. If it hasn't 317 * then this is a good indication the cpu is stuck 318 */ 319 if (is_hardlockup()) { 320 int this_cpu = smp_processor_id(); 321 322 /* only print hardlockups once */ 323 if (__this_cpu_read(hard_watchdog_warn) == true) 324 return; 325 326 if (hardlockup_panic) 327 panic("Watchdog detected hard LOCKUP on cpu %d", 328 this_cpu); 329 else 330 WARN(1, "Watchdog detected hard LOCKUP on cpu %d", 331 this_cpu); 332 333 __this_cpu_write(hard_watchdog_warn, true); 334 return; 335 } 336 337 __this_cpu_write(hard_watchdog_warn, false); 338 return; 339 } 340 #endif /* CONFIG_HARDLOCKUP_DETECTOR */ 341 342 static void watchdog_interrupt_count(void) 343 { 344 __this_cpu_inc(hrtimer_interrupts); 345 } 346 347 static int watchdog_nmi_enable(unsigned int cpu); 348 static void watchdog_nmi_disable(unsigned int cpu); 349 350 /* watchdog kicker functions */ 351 static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) 352 { 353 unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts); 354 struct pt_regs *regs = get_irq_regs(); 355 int duration; 356 int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace; 357 358 /* kick the hardlockup detector */ 359 watchdog_interrupt_count(); 360 361 /* kick the softlockup detector */ 362 wake_up_process(__this_cpu_read(softlockup_watchdog)); 363 364 /* .. and repeat */ 365 hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period)); 366 367 if (touch_ts == 0) { 368 if (unlikely(__this_cpu_read(softlockup_touch_sync))) { 369 /* 370 * If the time stamp was touched atomically 371 * make sure the scheduler tick is up to date. 372 */ 373 __this_cpu_write(softlockup_touch_sync, false); 374 sched_clock_tick(); 375 } 376 377 /* Clear the guest paused flag on watchdog reset */ 378 kvm_check_and_clear_guest_paused(); 379 __touch_watchdog(); 380 return HRTIMER_RESTART; 381 } 382 383 /* check for a softlockup 384 * This is done by making sure a high priority task is 385 * being scheduled. The task touches the watchdog to 386 * indicate it is getting cpu time. If it hasn't then 387 * this is a good indication some task is hogging the cpu 388 */ 389 duration = is_softlockup(touch_ts); 390 if (unlikely(duration)) { 391 /* 392 * If a virtual machine is stopped by the host it can look to 393 * the watchdog like a soft lockup, check to see if the host 394 * stopped the vm before we issue the warning 395 */ 396 if (kvm_check_and_clear_guest_paused()) 397 return HRTIMER_RESTART; 398 399 /* only warn once */ 400 if (__this_cpu_read(soft_watchdog_warn) == true) { 401 /* 402 * When multiple processes are causing softlockups the 403 * softlockup detector only warns on the first one 404 * because the code relies on a full quiet cycle to 405 * re-arm. The second process prevents the quiet cycle 406 * and never gets reported. Use task pointers to detect 407 * this. 408 */ 409 if (__this_cpu_read(softlockup_task_ptr_saved) != 410 current) { 411 __this_cpu_write(soft_watchdog_warn, false); 412 __touch_watchdog(); 413 } 414 return HRTIMER_RESTART; 415 } 416 417 if (softlockup_all_cpu_backtrace) { 418 /* Prevent multiple soft-lockup reports if one cpu is already 419 * engaged in dumping cpu back traces 420 */ 421 if (test_and_set_bit(0, &soft_lockup_nmi_warn)) { 422 /* Someone else will report us. Let's give up */ 423 __this_cpu_write(soft_watchdog_warn, true); 424 return HRTIMER_RESTART; 425 } 426 } 427 428 pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", 429 smp_processor_id(), duration, 430 current->comm, task_pid_nr(current)); 431 __this_cpu_write(softlockup_task_ptr_saved, current); 432 print_modules(); 433 print_irqtrace_events(current); 434 if (regs) 435 show_regs(regs); 436 else 437 dump_stack(); 438 439 if (softlockup_all_cpu_backtrace) { 440 /* Avoid generating two back traces for current 441 * given that one is already made above 442 */ 443 trigger_allbutself_cpu_backtrace(); 444 445 clear_bit(0, &soft_lockup_nmi_warn); 446 /* Barrier to sync with other cpus */ 447 smp_mb__after_atomic(); 448 } 449 450 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK); 451 if (softlockup_panic) 452 panic("softlockup: hung tasks"); 453 __this_cpu_write(soft_watchdog_warn, true); 454 } else 455 __this_cpu_write(soft_watchdog_warn, false); 456 457 return HRTIMER_RESTART; 458 } 459 460 static void watchdog_set_prio(unsigned int policy, unsigned int prio) 461 { 462 struct sched_param param = { .sched_priority = prio }; 463 464 sched_setscheduler(current, policy, ¶m); 465 } 466 467 static void watchdog_enable(unsigned int cpu) 468 { 469 struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer); 470 471 /* kick off the timer for the hardlockup detector */ 472 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 473 hrtimer->function = watchdog_timer_fn; 474 475 /* Enable the perf event */ 476 watchdog_nmi_enable(cpu); 477 478 /* done here because hrtimer_start can only pin to smp_processor_id() */ 479 hrtimer_start(hrtimer, ns_to_ktime(sample_period), 480 HRTIMER_MODE_REL_PINNED); 481 482 /* initialize timestamp */ 483 watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1); 484 __touch_watchdog(); 485 } 486 487 static void watchdog_disable(unsigned int cpu) 488 { 489 struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer); 490 491 watchdog_set_prio(SCHED_NORMAL, 0); 492 hrtimer_cancel(hrtimer); 493 /* disable the perf event */ 494 watchdog_nmi_disable(cpu); 495 } 496 497 static void watchdog_cleanup(unsigned int cpu, bool online) 498 { 499 watchdog_disable(cpu); 500 } 501 502 static int watchdog_should_run(unsigned int cpu) 503 { 504 return __this_cpu_read(hrtimer_interrupts) != 505 __this_cpu_read(soft_lockup_hrtimer_cnt); 506 } 507 508 /* 509 * The watchdog thread function - touches the timestamp. 510 * 511 * It only runs once every sample_period seconds (4 seconds by 512 * default) to reset the softlockup timestamp. If this gets delayed 513 * for more than 2*watchdog_thresh seconds then the debug-printout 514 * triggers in watchdog_timer_fn(). 515 */ 516 static void watchdog(unsigned int cpu) 517 { 518 __this_cpu_write(soft_lockup_hrtimer_cnt, 519 __this_cpu_read(hrtimer_interrupts)); 520 __touch_watchdog(); 521 522 /* 523 * watchdog_nmi_enable() clears the NMI_WATCHDOG_ENABLED bit in the 524 * failure path. Check for failures that can occur asynchronously - 525 * for example, when CPUs are on-lined - and shut down the hardware 526 * perf event on each CPU accordingly. 527 * 528 * The only non-obvious place this bit can be cleared is through 529 * watchdog_nmi_enable(), so a pr_info() is placed there. Placing a 530 * pr_info here would be too noisy as it would result in a message 531 * every few seconds if the hardlockup was disabled but the softlockup 532 * enabled. 533 */ 534 if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) 535 watchdog_nmi_disable(cpu); 536 } 537 538 #ifdef CONFIG_HARDLOCKUP_DETECTOR 539 /* 540 * People like the simple clean cpu node info on boot. 541 * Reduce the watchdog noise by only printing messages 542 * that are different from what cpu0 displayed. 543 */ 544 static unsigned long cpu0_err; 545 546 static int watchdog_nmi_enable(unsigned int cpu) 547 { 548 struct perf_event_attr *wd_attr; 549 struct perf_event *event = per_cpu(watchdog_ev, cpu); 550 551 /* nothing to do if the hard lockup detector is disabled */ 552 if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) 553 goto out; 554 555 /* is it already setup and enabled? */ 556 if (event && event->state > PERF_EVENT_STATE_OFF) 557 goto out; 558 559 /* it is setup but not enabled */ 560 if (event != NULL) 561 goto out_enable; 562 563 wd_attr = &wd_hw_attr; 564 wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh); 565 566 /* Try to register using hardware perf events */ 567 event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL); 568 569 /* save cpu0 error for future comparision */ 570 if (cpu == 0 && IS_ERR(event)) 571 cpu0_err = PTR_ERR(event); 572 573 if (!IS_ERR(event)) { 574 /* only print for cpu0 or different than cpu0 */ 575 if (cpu == 0 || cpu0_err) 576 pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.\n"); 577 goto out_save; 578 } 579 580 /* 581 * Disable the hard lockup detector if _any_ CPU fails to set up 582 * set up the hardware perf event. The watchdog() function checks 583 * the NMI_WATCHDOG_ENABLED bit periodically. 584 * 585 * The barriers are for syncing up watchdog_enabled across all the 586 * cpus, as clear_bit() does not use barriers. 587 */ 588 smp_mb__before_atomic(); 589 clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled); 590 smp_mb__after_atomic(); 591 592 /* skip displaying the same error again */ 593 if (cpu > 0 && (PTR_ERR(event) == cpu0_err)) 594 return PTR_ERR(event); 595 596 /* vary the KERN level based on the returned errno */ 597 if (PTR_ERR(event) == -EOPNOTSUPP) 598 pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu); 599 else if (PTR_ERR(event) == -ENOENT) 600 pr_warn("disabled (cpu%i): hardware events not enabled\n", 601 cpu); 602 else 603 pr_err("disabled (cpu%i): unable to create perf event: %ld\n", 604 cpu, PTR_ERR(event)); 605 606 pr_info("Shutting down hard lockup detector on all cpus\n"); 607 608 return PTR_ERR(event); 609 610 /* success path */ 611 out_save: 612 per_cpu(watchdog_ev, cpu) = event; 613 out_enable: 614 perf_event_enable(per_cpu(watchdog_ev, cpu)); 615 out: 616 return 0; 617 } 618 619 static void watchdog_nmi_disable(unsigned int cpu) 620 { 621 struct perf_event *event = per_cpu(watchdog_ev, cpu); 622 623 if (event) { 624 perf_event_disable(event); 625 per_cpu(watchdog_ev, cpu) = NULL; 626 627 /* should be in cleanup, but blocks oprofile */ 628 perf_event_release_kernel(event); 629 } 630 if (cpu == 0) { 631 /* watchdog_nmi_enable() expects this to be zero initially. */ 632 cpu0_err = 0; 633 } 634 } 635 636 #else 637 static int watchdog_nmi_enable(unsigned int cpu) { return 0; } 638 static void watchdog_nmi_disable(unsigned int cpu) { return; } 639 #endif /* CONFIG_HARDLOCKUP_DETECTOR */ 640 641 static struct smp_hotplug_thread watchdog_threads = { 642 .store = &softlockup_watchdog, 643 .thread_should_run = watchdog_should_run, 644 .thread_fn = watchdog, 645 .thread_comm = "watchdog/%u", 646 .setup = watchdog_enable, 647 .cleanup = watchdog_cleanup, 648 .park = watchdog_disable, 649 .unpark = watchdog_enable, 650 }; 651 652 /* 653 * park all watchdog threads that are specified in 'watchdog_cpumask' 654 */ 655 static int watchdog_park_threads(void) 656 { 657 int cpu, ret = 0; 658 659 get_online_cpus(); 660 for_each_watchdog_cpu(cpu) { 661 ret = kthread_park(per_cpu(softlockup_watchdog, cpu)); 662 if (ret) 663 break; 664 } 665 if (ret) { 666 for_each_watchdog_cpu(cpu) 667 kthread_unpark(per_cpu(softlockup_watchdog, cpu)); 668 } 669 put_online_cpus(); 670 671 return ret; 672 } 673 674 /* 675 * unpark all watchdog threads that are specified in 'watchdog_cpumask' 676 */ 677 static void watchdog_unpark_threads(void) 678 { 679 int cpu; 680 681 get_online_cpus(); 682 for_each_watchdog_cpu(cpu) 683 kthread_unpark(per_cpu(softlockup_watchdog, cpu)); 684 put_online_cpus(); 685 } 686 687 /* 688 * Suspend the hard and soft lockup detector by parking the watchdog threads. 689 */ 690 int lockup_detector_suspend(void) 691 { 692 int ret = 0; 693 694 mutex_lock(&watchdog_proc_mutex); 695 /* 696 * Multiple suspend requests can be active in parallel (counted by 697 * the 'watchdog_suspended' variable). If the watchdog threads are 698 * running, the first caller takes care that they will be parked. 699 * The state of 'watchdog_running' cannot change while a suspend 700 * request is active (see related code in 'proc' handlers). 701 */ 702 if (watchdog_running && !watchdog_suspended) 703 ret = watchdog_park_threads(); 704 705 if (ret == 0) 706 watchdog_suspended++; 707 708 mutex_unlock(&watchdog_proc_mutex); 709 710 return ret; 711 } 712 713 /* 714 * Resume the hard and soft lockup detector by unparking the watchdog threads. 715 */ 716 void lockup_detector_resume(void) 717 { 718 mutex_lock(&watchdog_proc_mutex); 719 720 watchdog_suspended--; 721 /* 722 * The watchdog threads are unparked if they were previously running 723 * and if there is no more active suspend request. 724 */ 725 if (watchdog_running && !watchdog_suspended) 726 watchdog_unpark_threads(); 727 728 mutex_unlock(&watchdog_proc_mutex); 729 } 730 731 static void update_watchdog_all_cpus(void) 732 { 733 watchdog_park_threads(); 734 watchdog_unpark_threads(); 735 } 736 737 static int watchdog_enable_all_cpus(void) 738 { 739 int err = 0; 740 741 if (!watchdog_running) { 742 err = smpboot_register_percpu_thread_cpumask(&watchdog_threads, 743 &watchdog_cpumask); 744 if (err) 745 pr_err("Failed to create watchdog threads, disabled\n"); 746 else 747 watchdog_running = 1; 748 } else { 749 /* 750 * Enable/disable the lockup detectors or 751 * change the sample period 'on the fly'. 752 */ 753 update_watchdog_all_cpus(); 754 } 755 756 return err; 757 } 758 759 /* prepare/enable/disable routines */ 760 /* sysctl functions */ 761 #ifdef CONFIG_SYSCTL 762 static void watchdog_disable_all_cpus(void) 763 { 764 if (watchdog_running) { 765 watchdog_running = 0; 766 smpboot_unregister_percpu_thread(&watchdog_threads); 767 } 768 } 769 770 /* 771 * Update the run state of the lockup detectors. 772 */ 773 static int proc_watchdog_update(void) 774 { 775 int err = 0; 776 777 /* 778 * Watchdog threads won't be started if they are already active. 779 * The 'watchdog_running' variable in watchdog_*_all_cpus() takes 780 * care of this. If those threads are already active, the sample 781 * period will be updated and the lockup detectors will be enabled 782 * or disabled 'on the fly'. 783 */ 784 if (watchdog_enabled && watchdog_thresh) 785 err = watchdog_enable_all_cpus(); 786 else 787 watchdog_disable_all_cpus(); 788 789 return err; 790 791 } 792 793 /* 794 * common function for watchdog, nmi_watchdog and soft_watchdog parameter 795 * 796 * caller | table->data points to | 'which' contains the flag(s) 797 * -------------------|-----------------------|----------------------------- 798 * proc_watchdog | watchdog_user_enabled | NMI_WATCHDOG_ENABLED or'ed 799 * | | with SOFT_WATCHDOG_ENABLED 800 * -------------------|-----------------------|----------------------------- 801 * proc_nmi_watchdog | nmi_watchdog_enabled | NMI_WATCHDOG_ENABLED 802 * -------------------|-----------------------|----------------------------- 803 * proc_soft_watchdog | soft_watchdog_enabled | SOFT_WATCHDOG_ENABLED 804 */ 805 static int proc_watchdog_common(int which, struct ctl_table *table, int write, 806 void __user *buffer, size_t *lenp, loff_t *ppos) 807 { 808 int err, old, new; 809 int *watchdog_param = (int *)table->data; 810 811 mutex_lock(&watchdog_proc_mutex); 812 813 if (watchdog_suspended) { 814 /* no parameter changes allowed while watchdog is suspended */ 815 err = -EAGAIN; 816 goto out; 817 } 818 819 /* 820 * If the parameter is being read return the state of the corresponding 821 * bit(s) in 'watchdog_enabled', else update 'watchdog_enabled' and the 822 * run state of the lockup detectors. 823 */ 824 if (!write) { 825 *watchdog_param = (watchdog_enabled & which) != 0; 826 err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 827 } else { 828 err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 829 if (err) 830 goto out; 831 832 /* 833 * There is a race window between fetching the current value 834 * from 'watchdog_enabled' and storing the new value. During 835 * this race window, watchdog_nmi_enable() can sneak in and 836 * clear the NMI_WATCHDOG_ENABLED bit in 'watchdog_enabled'. 837 * The 'cmpxchg' detects this race and the loop retries. 838 */ 839 do { 840 old = watchdog_enabled; 841 /* 842 * If the parameter value is not zero set the 843 * corresponding bit(s), else clear it(them). 844 */ 845 if (*watchdog_param) 846 new = old | which; 847 else 848 new = old & ~which; 849 } while (cmpxchg(&watchdog_enabled, old, new) != old); 850 851 /* 852 * Update the run state of the lockup detectors. 853 * Restore 'watchdog_enabled' on failure. 854 */ 855 err = proc_watchdog_update(); 856 if (err) 857 watchdog_enabled = old; 858 } 859 out: 860 mutex_unlock(&watchdog_proc_mutex); 861 return err; 862 } 863 864 /* 865 * /proc/sys/kernel/watchdog 866 */ 867 int proc_watchdog(struct ctl_table *table, int write, 868 void __user *buffer, size_t *lenp, loff_t *ppos) 869 { 870 return proc_watchdog_common(NMI_WATCHDOG_ENABLED|SOFT_WATCHDOG_ENABLED, 871 table, write, buffer, lenp, ppos); 872 } 873 874 /* 875 * /proc/sys/kernel/nmi_watchdog 876 */ 877 int proc_nmi_watchdog(struct ctl_table *table, int write, 878 void __user *buffer, size_t *lenp, loff_t *ppos) 879 { 880 return proc_watchdog_common(NMI_WATCHDOG_ENABLED, 881 table, write, buffer, lenp, ppos); 882 } 883 884 /* 885 * /proc/sys/kernel/soft_watchdog 886 */ 887 int proc_soft_watchdog(struct ctl_table *table, int write, 888 void __user *buffer, size_t *lenp, loff_t *ppos) 889 { 890 return proc_watchdog_common(SOFT_WATCHDOG_ENABLED, 891 table, write, buffer, lenp, ppos); 892 } 893 894 /* 895 * /proc/sys/kernel/watchdog_thresh 896 */ 897 int proc_watchdog_thresh(struct ctl_table *table, int write, 898 void __user *buffer, size_t *lenp, loff_t *ppos) 899 { 900 int err, old; 901 902 mutex_lock(&watchdog_proc_mutex); 903 904 if (watchdog_suspended) { 905 /* no parameter changes allowed while watchdog is suspended */ 906 err = -EAGAIN; 907 goto out; 908 } 909 910 old = ACCESS_ONCE(watchdog_thresh); 911 err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 912 913 if (err || !write) 914 goto out; 915 916 /* 917 * Update the sample period. 918 * Restore 'watchdog_thresh' on failure. 919 */ 920 set_sample_period(); 921 err = proc_watchdog_update(); 922 if (err) 923 watchdog_thresh = old; 924 out: 925 mutex_unlock(&watchdog_proc_mutex); 926 return err; 927 } 928 929 /* 930 * The cpumask is the mask of possible cpus that the watchdog can run 931 * on, not the mask of cpus it is actually running on. This allows the 932 * user to specify a mask that will include cpus that have not yet 933 * been brought online, if desired. 934 */ 935 int proc_watchdog_cpumask(struct ctl_table *table, int write, 936 void __user *buffer, size_t *lenp, loff_t *ppos) 937 { 938 int err; 939 940 mutex_lock(&watchdog_proc_mutex); 941 942 if (watchdog_suspended) { 943 /* no parameter changes allowed while watchdog is suspended */ 944 err = -EAGAIN; 945 goto out; 946 } 947 948 err = proc_do_large_bitmap(table, write, buffer, lenp, ppos); 949 if (!err && write) { 950 /* Remove impossible cpus to keep sysctl output cleaner. */ 951 cpumask_and(&watchdog_cpumask, &watchdog_cpumask, 952 cpu_possible_mask); 953 954 if (watchdog_running) { 955 /* 956 * Failure would be due to being unable to allocate 957 * a temporary cpumask, so we are likely not in a 958 * position to do much else to make things better. 959 */ 960 if (smpboot_update_cpumask_percpu_thread( 961 &watchdog_threads, &watchdog_cpumask) != 0) 962 pr_err("cpumask update failed\n"); 963 } 964 } 965 out: 966 mutex_unlock(&watchdog_proc_mutex); 967 return err; 968 } 969 970 #endif /* CONFIG_SYSCTL */ 971 972 void __init lockup_detector_init(void) 973 { 974 set_sample_period(); 975 976 #ifdef CONFIG_NO_HZ_FULL 977 if (tick_nohz_full_enabled()) { 978 pr_info("Disabling watchdog on nohz_full cores by default\n"); 979 cpumask_copy(&watchdog_cpumask, housekeeping_mask); 980 } else 981 cpumask_copy(&watchdog_cpumask, cpu_possible_mask); 982 #else 983 cpumask_copy(&watchdog_cpumask, cpu_possible_mask); 984 #endif 985 986 if (watchdog_enabled) 987 watchdog_enable_all_cpus(); 988 } 989