1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <linux/cpu.h> 4 #include <linux/cpumask.h> 5 #include <linux/kernel.h> 6 #include <linux/nmi.h> 7 #include <linux/percpu-defs.h> 8 9 static cpumask_t __read_mostly watchdog_cpus; 10 11 static unsigned int watchdog_next_cpu(unsigned int cpu) 12 { 13 unsigned int next_cpu; 14 15 next_cpu = cpumask_next(cpu, &watchdog_cpus); 16 if (next_cpu >= nr_cpu_ids) 17 next_cpu = cpumask_first(&watchdog_cpus); 18 19 if (next_cpu == cpu) 20 return nr_cpu_ids; 21 22 return next_cpu; 23 } 24 25 int __init watchdog_hardlockup_probe(void) 26 { 27 return 0; 28 } 29 30 void watchdog_hardlockup_enable(unsigned int cpu) 31 { 32 unsigned int next_cpu; 33 34 /* 35 * The new CPU will be marked online before the hrtimer interrupt 36 * gets a chance to run on it. If another CPU tests for a 37 * hardlockup on the new CPU before it has run its the hrtimer 38 * interrupt, it will get a false positive. Touch the watchdog on 39 * the new CPU to delay the check for at least 3 sampling periods 40 * to guarantee one hrtimer has run on the new CPU. 41 */ 42 watchdog_hardlockup_touch_cpu(cpu); 43 44 /* 45 * We are going to check the next CPU. Our watchdog_hrtimer 46 * need not be zero if the CPU has already been online earlier. 47 * Touch the watchdog on the next CPU to avoid false positive 48 * if we try to check it in less then 3 interrupts. 49 */ 50 next_cpu = watchdog_next_cpu(cpu); 51 if (next_cpu < nr_cpu_ids) 52 watchdog_hardlockup_touch_cpu(next_cpu); 53 54 /* 55 * Makes sure that watchdog is touched on this CPU before 56 * other CPUs could see it in watchdog_cpus. The counter 57 * part is in watchdog_buddy_check_hardlockup(). 58 */ 59 smp_wmb(); 60 61 cpumask_set_cpu(cpu, &watchdog_cpus); 62 } 63 64 void watchdog_hardlockup_disable(unsigned int cpu) 65 { 66 unsigned int next_cpu = watchdog_next_cpu(cpu); 67 68 /* 69 * Offlining this CPU will cause the CPU before this one to start 70 * checking the one after this one. If this CPU just finished checking 71 * the next CPU and updating hrtimer_interrupts_saved, and then the 72 * previous CPU checks it within one sample period, it will trigger a 73 * false positive. Touch the watchdog on the next CPU to prevent it. 74 */ 75 if (next_cpu < nr_cpu_ids) 76 watchdog_hardlockup_touch_cpu(next_cpu); 77 78 /* 79 * Makes sure that watchdog is touched on the next CPU before 80 * this CPU disappear in watchdog_cpus. The counter part is in 81 * watchdog_buddy_check_hardlockup(). 82 */ 83 smp_wmb(); 84 85 cpumask_clear_cpu(cpu, &watchdog_cpus); 86 } 87 88 void watchdog_buddy_check_hardlockup(int hrtimer_interrupts) 89 { 90 unsigned int next_cpu; 91 92 /* 93 * Test for hardlockups every 3 samples. The sample period is 94 * watchdog_thresh * 2 / 5, so 3 samples gets us back to slightly over 95 * watchdog_thresh (over by 20%). 96 */ 97 if (hrtimer_interrupts % 3 != 0) 98 return; 99 100 /* check for a hardlockup on the next CPU */ 101 next_cpu = watchdog_next_cpu(smp_processor_id()); 102 if (next_cpu >= nr_cpu_ids) 103 return; 104 105 /* 106 * Make sure that the watchdog was touched on next CPU when 107 * watchdog_next_cpu() returned another one because of 108 * a change in watchdog_hardlockup_enable()/disable(). 109 */ 110 smp_rmb(); 111 112 watchdog_hardlockup_check(next_cpu, NULL); 113 } 114