1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <linux/cpu.h> 4 #include <linux/cpumask.h> 5 #include <linux/kernel.h> 6 #include <linux/nmi.h> 7 #include <linux/percpu-defs.h> 8 9 static cpumask_t __read_mostly watchdog_cpus; 10 11 static unsigned int watchdog_next_cpu(unsigned int cpu) 12 { 13 unsigned int next_cpu; 14 15 next_cpu = cpumask_next_wrap(cpu, &watchdog_cpus); 16 if (next_cpu == cpu) 17 return nr_cpu_ids; 18 19 return next_cpu; 20 } 21 22 int __init watchdog_hardlockup_probe(void) 23 { 24 watchdog_hardlockup_miss_thresh = 3; 25 return 0; 26 } 27 28 void watchdog_hardlockup_enable(unsigned int cpu) 29 { 30 unsigned int next_cpu; 31 32 /* 33 * The new CPU will be marked online before the hrtimer interrupt 34 * gets a chance to run on it. If another CPU tests for a 35 * hardlockup on the new CPU before it has run its the hrtimer 36 * interrupt, it will get a false positive. Touch the watchdog on 37 * the new CPU to delay the check for at least 3 sampling periods 38 * to guarantee one hrtimer has run on the new CPU. 39 */ 40 watchdog_hardlockup_touch_cpu(cpu); 41 42 /* 43 * We are going to check the next CPU. Our watchdog_hrtimer 44 * need not be zero if the CPU has already been online earlier. 45 * Touch the watchdog on the next CPU to avoid false positive 46 * if we try to check it in less then 3 interrupts. 47 */ 48 next_cpu = watchdog_next_cpu(cpu); 49 if (next_cpu < nr_cpu_ids) 50 watchdog_hardlockup_touch_cpu(next_cpu); 51 52 /* 53 * Makes sure that watchdog is touched on this CPU before 54 * other CPUs could see it in watchdog_cpus. The counter 55 * part is in watchdog_buddy_check_hardlockup(). 56 */ 57 smp_wmb(); 58 59 cpumask_set_cpu(cpu, &watchdog_cpus); 60 } 61 62 void watchdog_hardlockup_disable(unsigned int cpu) 63 { 64 unsigned int next_cpu = watchdog_next_cpu(cpu); 65 66 /* 67 * Offlining this CPU will cause the CPU before this one to start 68 * checking the one after this one. If this CPU just finished checking 69 * the next CPU and updating hrtimer_interrupts_saved, and then the 70 * previous CPU checks it within one sample period, it will trigger a 71 * false positive. Touch the watchdog on the next CPU to prevent it. 72 */ 73 if (next_cpu < nr_cpu_ids) 74 watchdog_hardlockup_touch_cpu(next_cpu); 75 76 /* 77 * Makes sure that watchdog is touched on the next CPU before 78 * this CPU disappear in watchdog_cpus. The counter part is in 79 * watchdog_buddy_check_hardlockup(). 80 */ 81 smp_wmb(); 82 83 cpumask_clear_cpu(cpu, &watchdog_cpus); 84 } 85 86 void watchdog_buddy_check_hardlockup(int hrtimer_interrupts) 87 { 88 unsigned int next_cpu; 89 90 /* check for a hardlockup on the next CPU */ 91 next_cpu = watchdog_next_cpu(smp_processor_id()); 92 if (next_cpu >= nr_cpu_ids) 93 return; 94 95 /* 96 * Make sure that the watchdog was touched on next CPU when 97 * watchdog_next_cpu() returned another one because of 98 * a change in watchdog_hardlockup_enable()/disable(). 99 */ 100 smp_rmb(); 101 102 watchdog_hardlockup_check(next_cpu, NULL); 103 } 104