1 /* Pseudo NMI support on sparc64 systems. 2 * 3 * Copyright (C) 2009 David S. Miller <davem@davemloft.net> 4 * 5 * The NMI watchdog support and infrastructure is based almost 6 * entirely upon the x86 NMI support code. 7 */ 8 #include <linux/kernel.h> 9 #include <linux/param.h> 10 #include <linux/init.h> 11 #include <linux/percpu.h> 12 #include <linux/nmi.h> 13 #include <linux/module.h> 14 #include <linux/kprobes.h> 15 #include <linux/kernel_stat.h> 16 #include <linux/reboot.h> 17 #include <linux/slab.h> 18 #include <linux/kdebug.h> 19 #include <linux/delay.h> 20 #include <linux/smp.h> 21 22 #include <asm/ptrace.h> 23 #include <asm/local.h> 24 #include <asm/pcr.h> 25 26 /* We don't have a real NMI on sparc64, but we can fake one 27 * up using profiling counter overflow interrupts and interrupt 28 * levels. 29 * 30 * The profile overflow interrupts at level 15, so we use 31 * level 14 as our IRQ off level. 32 */ 33 34 static int panic_on_timeout; 35 36 /* nmi_active: 37 * >0: the NMI watchdog is active, but can be disabled 38 * <0: the NMI watchdog has not been set up, and cannot be enabled 39 * 0: the NMI watchdog is disabled, but can be enabled 40 */ 41 atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ 42 EXPORT_SYMBOL(nmi_active); 43 44 static unsigned int nmi_hz = HZ; 45 static DEFINE_PER_CPU(short, wd_enabled); 46 static int endflag __initdata; 47 48 static DEFINE_PER_CPU(unsigned int, last_irq_sum); 49 static DEFINE_PER_CPU(local_t, alert_counter); 50 static DEFINE_PER_CPU(int, nmi_touch); 51 52 void touch_nmi_watchdog(void) 53 { 54 if (atomic_read(&nmi_active)) { 55 int cpu; 56 57 for_each_present_cpu(cpu) { 58 if (per_cpu(nmi_touch, cpu) != 1) 59 per_cpu(nmi_touch, cpu) = 1; 60 } 61 } 62 63 touch_softlockup_watchdog(); 64 } 65 EXPORT_SYMBOL(touch_nmi_watchdog); 66 67 static void die_nmi(const char *str, struct pt_regs *regs, int do_panic) 68 { 69 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 70 pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP) 71 return; 72 73 console_verbose(); 74 bust_spinlocks(1); 75 76 printk(KERN_EMERG "%s", str); 77 printk(" on CPU%d, ip %08lx, registers:\n", 78 smp_processor_id(), regs->tpc); 79 show_regs(regs); 80 dump_stack(); 81 82 bust_spinlocks(0); 83 84 if (do_panic || panic_on_oops) 85 panic("Non maskable interrupt"); 86 87 nmi_exit(); 88 local_irq_enable(); 89 do_exit(SIGBUS); 90 } 91 92 notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs) 93 { 94 unsigned int sum, touched = 0; 95 int cpu = smp_processor_id(); 96 97 clear_softint(1 << irq); 98 pcr_ops->write(PCR_PIC_PRIV); 99 100 local_cpu_data().__nmi_count++; 101 102 nmi_enter(); 103 104 if (notify_die(DIE_NMI, "nmi", regs, 0, 105 pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP) 106 touched = 1; 107 108 sum = kstat_irqs_cpu(0, cpu); 109 if (__get_cpu_var(nmi_touch)) { 110 __get_cpu_var(nmi_touch) = 0; 111 touched = 1; 112 } 113 if (!touched && __get_cpu_var(last_irq_sum) == sum) { 114 local_inc(&__get_cpu_var(alert_counter)); 115 if (local_read(&__get_cpu_var(alert_counter)) == 5 * nmi_hz) 116 die_nmi("BUG: NMI Watchdog detected LOCKUP", 117 regs, panic_on_timeout); 118 } else { 119 __get_cpu_var(last_irq_sum) = sum; 120 local_set(&__get_cpu_var(alert_counter), 0); 121 } 122 if (__get_cpu_var(wd_enabled)) { 123 write_pic(picl_value(nmi_hz)); 124 pcr_ops->write(pcr_enable); 125 } 126 127 nmi_exit(); 128 } 129 130 static inline unsigned int get_nmi_count(int cpu) 131 { 132 return cpu_data(cpu).__nmi_count; 133 } 134 135 static __init void nmi_cpu_busy(void *data) 136 { 137 local_irq_enable_in_hardirq(); 138 while (endflag == 0) 139 mb(); 140 } 141 142 static void report_broken_nmi(int cpu, int *prev_nmi_count) 143 { 144 printk(KERN_CONT "\n"); 145 146 printk(KERN_WARNING 147 "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n", 148 cpu, prev_nmi_count[cpu], get_nmi_count(cpu)); 149 150 printk(KERN_WARNING 151 "Please report this to bugzilla.kernel.org,\n"); 152 printk(KERN_WARNING 153 "and attach the output of the 'dmesg' command.\n"); 154 155 per_cpu(wd_enabled, cpu) = 0; 156 atomic_dec(&nmi_active); 157 } 158 159 static void stop_nmi_watchdog(void *unused) 160 { 161 pcr_ops->write(PCR_PIC_PRIV); 162 __get_cpu_var(wd_enabled) = 0; 163 atomic_dec(&nmi_active); 164 } 165 166 static int __init check_nmi_watchdog(void) 167 { 168 unsigned int *prev_nmi_count; 169 int cpu, err; 170 171 if (!atomic_read(&nmi_active)) 172 return 0; 173 174 prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(unsigned int), GFP_KERNEL); 175 if (!prev_nmi_count) { 176 err = -ENOMEM; 177 goto error; 178 } 179 180 printk(KERN_INFO "Testing NMI watchdog ... "); 181 182 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0); 183 184 for_each_possible_cpu(cpu) 185 prev_nmi_count[cpu] = get_nmi_count(cpu); 186 local_irq_enable(); 187 mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */ 188 189 for_each_online_cpu(cpu) { 190 if (!per_cpu(wd_enabled, cpu)) 191 continue; 192 if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) 193 report_broken_nmi(cpu, prev_nmi_count); 194 } 195 endflag = 1; 196 if (!atomic_read(&nmi_active)) { 197 kfree(prev_nmi_count); 198 atomic_set(&nmi_active, -1); 199 err = -ENODEV; 200 goto error; 201 } 202 printk("OK.\n"); 203 204 nmi_hz = 1; 205 206 kfree(prev_nmi_count); 207 return 0; 208 error: 209 on_each_cpu(stop_nmi_watchdog, NULL, 1); 210 return err; 211 } 212 213 static void start_nmi_watchdog(void *unused) 214 { 215 __get_cpu_var(wd_enabled) = 1; 216 atomic_inc(&nmi_active); 217 218 pcr_ops->write(PCR_PIC_PRIV); 219 write_pic(picl_value(nmi_hz)); 220 221 pcr_ops->write(pcr_enable); 222 } 223 224 static void nmi_adjust_hz_one(void *unused) 225 { 226 if (!__get_cpu_var(wd_enabled)) 227 return; 228 229 pcr_ops->write(PCR_PIC_PRIV); 230 write_pic(picl_value(nmi_hz)); 231 232 pcr_ops->write(pcr_enable); 233 } 234 235 void nmi_adjust_hz(unsigned int new_hz) 236 { 237 nmi_hz = new_hz; 238 on_each_cpu(nmi_adjust_hz_one, NULL, 1); 239 } 240 EXPORT_SYMBOL_GPL(nmi_adjust_hz); 241 242 static int nmi_shutdown(struct notifier_block *nb, unsigned long cmd, void *p) 243 { 244 on_each_cpu(stop_nmi_watchdog, NULL, 1); 245 return 0; 246 } 247 248 static struct notifier_block nmi_reboot_notifier = { 249 .notifier_call = nmi_shutdown, 250 }; 251 252 int __init nmi_init(void) 253 { 254 int err; 255 256 on_each_cpu(start_nmi_watchdog, NULL, 1); 257 258 err = check_nmi_watchdog(); 259 if (!err) { 260 err = register_reboot_notifier(&nmi_reboot_notifier); 261 if (err) { 262 on_each_cpu(stop_nmi_watchdog, NULL, 1); 263 atomic_set(&nmi_active, -1); 264 } 265 } 266 return err; 267 } 268 269 static int __init setup_nmi_watchdog(char *str) 270 { 271 if (!strncmp(str, "panic", 5)) 272 panic_on_timeout = 1; 273 274 return 0; 275 } 276 __setup("nmi_watchdog=", setup_nmi_watchdog); 277