xref: /linux/arch/x86/platform/uv/uv_nmi.c (revision b7019ac550eb3916f34d79db583e9b7ea2524afa)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * SGI NMI support routines
4  *
5  *  Copyright (c) 2009-2013 Silicon Graphics, Inc.  All Rights Reserved.
6  *  Copyright (c) Mike Travis
7  */
8 
9 #include <linux/cpu.h>
10 #include <linux/delay.h>
11 #include <linux/kdb.h>
12 #include <linux/kexec.h>
13 #include <linux/kgdb.h>
14 #include <linux/moduleparam.h>
15 #include <linux/nmi.h>
16 #include <linux/sched.h>
17 #include <linux/sched/debug.h>
18 #include <linux/slab.h>
19 #include <linux/clocksource.h>
20 
21 #include <asm/apic.h>
22 #include <asm/current.h>
23 #include <asm/kdebug.h>
24 #include <asm/local64.h>
25 #include <asm/nmi.h>
26 #include <asm/traps.h>
27 #include <asm/uv/uv.h>
28 #include <asm/uv/uv_hub.h>
29 #include <asm/uv/uv_mmrs.h>
30 
31 /*
32  * UV handler for NMI
33  *
34  * Handle system-wide NMI events generated by the global 'power nmi' command.
35  *
36  * Basic operation is to field the NMI interrupt on each CPU and wait
37  * until all CPU's have arrived into the nmi handler.  If some CPU's do not
38  * make it into the handler, try and force them in with the IPI(NMI) signal.
39  *
40  * We also have to lessen UV Hub MMR accesses as much as possible as this
41  * disrupts the UV Hub's primary mission of directing NumaLink traffic and
42  * can cause system problems to occur.
43  *
44  * To do this we register our primary NMI notifier on the NMI_UNKNOWN
45  * chain.  This reduces the number of false NMI calls when the perf
46  * tools are running which generate an enormous number of NMIs per
47  * second (~4M/s for 1024 CPU threads).  Our secondary NMI handler is
48  * very short as it only checks that if it has been "pinged" with the
49  * IPI(NMI) signal as mentioned above, and does not read the UV Hub's MMR.
50  *
51  */
52 
53 static struct uv_hub_nmi_s **uv_hub_nmi_list;
54 
55 DEFINE_PER_CPU(struct uv_cpu_nmi_s, uv_cpu_nmi);
56 
57 /* UV hubless values */
58 #define NMI_CONTROL_PORT	0x70
59 #define NMI_DUMMY_PORT		0x71
60 #define PAD_OWN_GPP_D_0		0x2c
61 #define GPI_NMI_STS_GPP_D_0	0x164
62 #define GPI_NMI_ENA_GPP_D_0	0x174
63 #define STS_GPP_D_0_MASK	0x1
64 #define PAD_CFG_DW0_GPP_D_0	0x4c0
65 #define GPIROUTNMI		(1ul << 17)
66 #define PCH_PCR_GPIO_1_BASE	0xfdae0000ul
67 #define PCH_PCR_GPIO_ADDRESS(offset) (int *)((u64)(pch_base) | (u64)(offset))
68 
69 static u64 *pch_base;
70 static unsigned long nmi_mmr;
71 static unsigned long nmi_mmr_clear;
72 static unsigned long nmi_mmr_pending;
73 
74 static atomic_t	uv_in_nmi;
75 static atomic_t uv_nmi_cpu = ATOMIC_INIT(-1);
76 static atomic_t uv_nmi_cpus_in_nmi = ATOMIC_INIT(-1);
77 static atomic_t uv_nmi_slave_continue;
78 static cpumask_var_t uv_nmi_cpu_mask;
79 
80 /* Values for uv_nmi_slave_continue */
81 #define SLAVE_CLEAR	0
82 #define SLAVE_CONTINUE	1
83 #define SLAVE_EXIT	2
84 
85 /*
86  * Default is all stack dumps go to the console and buffer.
87  * Lower level to send to log buffer only.
88  */
89 static int uv_nmi_loglevel = CONSOLE_LOGLEVEL_DEFAULT;
90 module_param_named(dump_loglevel, uv_nmi_loglevel, int, 0644);
91 
92 /*
93  * The following values show statistics on how perf events are affecting
94  * this system.
95  */
96 static int param_get_local64(char *buffer, const struct kernel_param *kp)
97 {
98 	return sprintf(buffer, "%lu\n", local64_read((local64_t *)kp->arg));
99 }
100 
101 static int param_set_local64(const char *val, const struct kernel_param *kp)
102 {
103 	/* Clear on any write */
104 	local64_set((local64_t *)kp->arg, 0);
105 	return 0;
106 }
107 
108 static const struct kernel_param_ops param_ops_local64 = {
109 	.get = param_get_local64,
110 	.set = param_set_local64,
111 };
112 #define param_check_local64(name, p) __param_check(name, p, local64_t)
113 
114 static local64_t uv_nmi_count;
115 module_param_named(nmi_count, uv_nmi_count, local64, 0644);
116 
117 static local64_t uv_nmi_misses;
118 module_param_named(nmi_misses, uv_nmi_misses, local64, 0644);
119 
120 static local64_t uv_nmi_ping_count;
121 module_param_named(ping_count, uv_nmi_ping_count, local64, 0644);
122 
123 static local64_t uv_nmi_ping_misses;
124 module_param_named(ping_misses, uv_nmi_ping_misses, local64, 0644);
125 
126 /*
127  * Following values allow tuning for large systems under heavy loading
128  */
129 static int uv_nmi_initial_delay = 100;
130 module_param_named(initial_delay, uv_nmi_initial_delay, int, 0644);
131 
132 static int uv_nmi_slave_delay = 100;
133 module_param_named(slave_delay, uv_nmi_slave_delay, int, 0644);
134 
135 static int uv_nmi_loop_delay = 100;
136 module_param_named(loop_delay, uv_nmi_loop_delay, int, 0644);
137 
138 static int uv_nmi_trigger_delay = 10000;
139 module_param_named(trigger_delay, uv_nmi_trigger_delay, int, 0644);
140 
141 static int uv_nmi_wait_count = 100;
142 module_param_named(wait_count, uv_nmi_wait_count, int, 0644);
143 
144 static int uv_nmi_retry_count = 500;
145 module_param_named(retry_count, uv_nmi_retry_count, int, 0644);
146 
147 static bool uv_pch_intr_enable = true;
148 static bool uv_pch_intr_now_enabled;
149 module_param_named(pch_intr_enable, uv_pch_intr_enable, bool, 0644);
150 
151 static bool uv_pch_init_enable = true;
152 module_param_named(pch_init_enable, uv_pch_init_enable, bool, 0644);
153 
154 static int uv_nmi_debug;
155 module_param_named(debug, uv_nmi_debug, int, 0644);
156 
157 #define nmi_debug(fmt, ...)				\
158 	do {						\
159 		if (uv_nmi_debug)			\
160 			pr_info(fmt, ##__VA_ARGS__);	\
161 	} while (0)
162 
163 /* Valid NMI Actions */
164 #define	ACTION_LEN	16
165 static struct nmi_action {
166 	char	*action;
167 	char	*desc;
168 } valid_acts[] = {
169 	{	"kdump",	"do kernel crash dump"			},
170 	{	"dump",		"dump process stack for each cpu"	},
171 	{	"ips",		"dump Inst Ptr info for each cpu"	},
172 	{	"kdb",		"enter KDB (needs kgdboc= assignment)"	},
173 	{	"kgdb",		"enter KGDB (needs gdb target remote)"	},
174 	{	"health",	"check if CPUs respond to NMI"		},
175 };
176 typedef char action_t[ACTION_LEN];
177 static action_t uv_nmi_action = { "dump" };
178 
179 static int param_get_action(char *buffer, const struct kernel_param *kp)
180 {
181 	return sprintf(buffer, "%s\n", uv_nmi_action);
182 }
183 
184 static int param_set_action(const char *val, const struct kernel_param *kp)
185 {
186 	int i;
187 	int n = ARRAY_SIZE(valid_acts);
188 	char arg[ACTION_LEN], *p;
189 
190 	/* (remove possible '\n') */
191 	strncpy(arg, val, ACTION_LEN - 1);
192 	arg[ACTION_LEN - 1] = '\0';
193 	p = strchr(arg, '\n');
194 	if (p)
195 		*p = '\0';
196 
197 	for (i = 0; i < n; i++)
198 		if (!strcmp(arg, valid_acts[i].action))
199 			break;
200 
201 	if (i < n) {
202 		strcpy(uv_nmi_action, arg);
203 		pr_info("UV: New NMI action:%s\n", uv_nmi_action);
204 		return 0;
205 	}
206 
207 	pr_err("UV: Invalid NMI action:%s, valid actions are:\n", arg);
208 	for (i = 0; i < n; i++)
209 		pr_err("UV: %-8s - %s\n",
210 			valid_acts[i].action, valid_acts[i].desc);
211 	return -EINVAL;
212 }
213 
214 static const struct kernel_param_ops param_ops_action = {
215 	.get = param_get_action,
216 	.set = param_set_action,
217 };
218 #define param_check_action(name, p) __param_check(name, p, action_t)
219 
220 module_param_named(action, uv_nmi_action, action, 0644);
221 
222 static inline bool uv_nmi_action_is(const char *action)
223 {
224 	return (strncmp(uv_nmi_action, action, strlen(action)) == 0);
225 }
226 
227 /* Setup which NMI support is present in system */
228 static void uv_nmi_setup_mmrs(void)
229 {
230 	if (uv_read_local_mmr(UVH_NMI_MMRX_SUPPORTED)) {
231 		uv_write_local_mmr(UVH_NMI_MMRX_REQ,
232 					1UL << UVH_NMI_MMRX_REQ_SHIFT);
233 		nmi_mmr = UVH_NMI_MMRX;
234 		nmi_mmr_clear = UVH_NMI_MMRX_CLEAR;
235 		nmi_mmr_pending = 1UL << UVH_NMI_MMRX_SHIFT;
236 		pr_info("UV: SMI NMI support: %s\n", UVH_NMI_MMRX_TYPE);
237 	} else {
238 		nmi_mmr = UVH_NMI_MMR;
239 		nmi_mmr_clear = UVH_NMI_MMR_CLEAR;
240 		nmi_mmr_pending = 1UL << UVH_NMI_MMR_SHIFT;
241 		pr_info("UV: SMI NMI support: %s\n", UVH_NMI_MMR_TYPE);
242 	}
243 }
244 
245 /* Read NMI MMR and check if NMI flag was set by BMC. */
246 static inline int uv_nmi_test_mmr(struct uv_hub_nmi_s *hub_nmi)
247 {
248 	hub_nmi->nmi_value = uv_read_local_mmr(nmi_mmr);
249 	atomic_inc(&hub_nmi->read_mmr_count);
250 	return !!(hub_nmi->nmi_value & nmi_mmr_pending);
251 }
252 
253 static inline void uv_local_mmr_clear_nmi(void)
254 {
255 	uv_write_local_mmr(nmi_mmr_clear, nmi_mmr_pending);
256 }
257 
258 /*
259  * UV hubless NMI handler functions
260  */
261 static inline void uv_reassert_nmi(void)
262 {
263 	/* (from arch/x86/include/asm/mach_traps.h) */
264 	outb(0x8f, NMI_CONTROL_PORT);
265 	inb(NMI_DUMMY_PORT);		/* dummy read */
266 	outb(0x0f, NMI_CONTROL_PORT);
267 	inb(NMI_DUMMY_PORT);		/* dummy read */
268 }
269 
270 static void uv_init_hubless_pch_io(int offset, int mask, int data)
271 {
272 	int *addr = PCH_PCR_GPIO_ADDRESS(offset);
273 	int readd = readl(addr);
274 
275 	if (mask) {			/* OR in new data */
276 		int writed = (readd & ~mask) | data;
277 
278 		nmi_debug("UV:PCH: %p = %x & %x | %x (%x)\n",
279 			addr, readd, ~mask, data, writed);
280 		writel(writed, addr);
281 	} else if (readd & data) {	/* clear status bit */
282 		nmi_debug("UV:PCH: %p = %x\n", addr, data);
283 		writel(data, addr);
284 	}
285 
286 	(void)readl(addr);		/* flush write data */
287 }
288 
289 static void uv_nmi_setup_hubless_intr(void)
290 {
291 	uv_pch_intr_now_enabled = uv_pch_intr_enable;
292 
293 	uv_init_hubless_pch_io(
294 		PAD_CFG_DW0_GPP_D_0, GPIROUTNMI,
295 		uv_pch_intr_now_enabled ? GPIROUTNMI : 0);
296 
297 	nmi_debug("UV:NMI: GPP_D_0 interrupt %s\n",
298 		uv_pch_intr_now_enabled ? "enabled" : "disabled");
299 }
300 
301 static struct init_nmi {
302 	unsigned int	offset;
303 	unsigned int	mask;
304 	unsigned int	data;
305 } init_nmi[] = {
306 	{	/* HOSTSW_OWN_GPP_D_0 */
307 	.offset = 0x84,
308 	.mask = 0x1,
309 	.data = 0x0,	/* ACPI Mode */
310 	},
311 
312 /* Clear status: */
313 	{	/* GPI_INT_STS_GPP_D_0 */
314 	.offset = 0x104,
315 	.mask = 0x0,
316 	.data = 0x1,	/* Clear Status */
317 	},
318 	{	/* GPI_GPE_STS_GPP_D_0 */
319 	.offset = 0x124,
320 	.mask = 0x0,
321 	.data = 0x1,	/* Clear Status */
322 	},
323 	{	/* GPI_SMI_STS_GPP_D_0 */
324 	.offset = 0x144,
325 	.mask = 0x0,
326 	.data = 0x1,	/* Clear Status */
327 	},
328 	{	/* GPI_NMI_STS_GPP_D_0 */
329 	.offset = 0x164,
330 	.mask = 0x0,
331 	.data = 0x1,	/* Clear Status */
332 	},
333 
334 /* Disable interrupts: */
335 	{	/* GPI_INT_EN_GPP_D_0 */
336 	.offset = 0x114,
337 	.mask = 0x1,
338 	.data = 0x0,	/* Disable interrupt generation */
339 	},
340 	{	/* GPI_GPE_EN_GPP_D_0 */
341 	.offset = 0x134,
342 	.mask = 0x1,
343 	.data = 0x0,	/* Disable interrupt generation */
344 	},
345 	{	/* GPI_SMI_EN_GPP_D_0 */
346 	.offset = 0x154,
347 	.mask = 0x1,
348 	.data = 0x0,	/* Disable interrupt generation */
349 	},
350 	{	/* GPI_NMI_EN_GPP_D_0 */
351 	.offset = 0x174,
352 	.mask = 0x1,
353 	.data = 0x0,	/* Disable interrupt generation */
354 	},
355 
356 /* Setup GPP_D_0 Pad Config: */
357 	{	/* PAD_CFG_DW0_GPP_D_0 */
358 	.offset = 0x4c0,
359 	.mask = 0xffffffff,
360 	.data = 0x82020100,
361 /*
362  *  31:30 Pad Reset Config (PADRSTCFG): = 2h  # PLTRST# (default)
363  *
364  *  29    RX Pad State Select (RXPADSTSEL): = 0 # Raw RX pad state directly
365  *                                                from RX buffer (default)
366  *
367  *  28    RX Raw Override to '1' (RXRAW1): = 0 # No Override
368  *
369  *  26:25 RX Level/Edge Configuration (RXEVCFG):
370  *      = 0h # Level
371  *      = 1h # Edge
372  *
373  *  23    RX Invert (RXINV): = 0 # No Inversion (signal active high)
374  *
375  *  20    GPIO Input Route IOxAPIC (GPIROUTIOXAPIC):
376  * = 0 # Routing does not cause peripheral IRQ...
377  *     # (we want an NMI not an IRQ)
378  *
379  *  19    GPIO Input Route SCI (GPIROUTSCI): = 0 # Routing does not cause SCI.
380  *  18    GPIO Input Route SMI (GPIROUTSMI): = 0 # Routing does not cause SMI.
381  *  17    GPIO Input Route NMI (GPIROUTNMI): = 1 # Routing can cause NMI.
382  *
383  *  11:10 Pad Mode (PMODE1/0): = 0h = GPIO control the Pad.
384  *   9    GPIO RX Disable (GPIORXDIS):
385  * = 0 # Enable the input buffer (active low enable)
386  *
387  *   8    GPIO TX Disable (GPIOTXDIS):
388  * = 1 # Disable the output buffer; i.e. Hi-Z
389  *
390  *   1 GPIO RX State (GPIORXSTATE): This is the current internal RX pad state..
391  *   0 GPIO TX State (GPIOTXSTATE):
392  * = 0 # (Leave at default)
393  */
394 	},
395 
396 /* Pad Config DW1 */
397 	{	/* PAD_CFG_DW1_GPP_D_0 */
398 	.offset = 0x4c4,
399 	.mask = 0x3c00,
400 	.data = 0,	/* Termination = none (default) */
401 	},
402 };
403 
404 static void uv_init_hubless_pch_d0(void)
405 {
406 	int i, read;
407 
408 	read = *PCH_PCR_GPIO_ADDRESS(PAD_OWN_GPP_D_0);
409 	if (read != 0) {
410 		pr_info("UV: Hubless NMI already configured\n");
411 		return;
412 	}
413 
414 	nmi_debug("UV: Initializing UV Hubless NMI on PCH\n");
415 	for (i = 0; i < ARRAY_SIZE(init_nmi); i++) {
416 		uv_init_hubless_pch_io(init_nmi[i].offset,
417 					init_nmi[i].mask,
418 					init_nmi[i].data);
419 	}
420 }
421 
422 static int uv_nmi_test_hubless(struct uv_hub_nmi_s *hub_nmi)
423 {
424 	int *pstat = PCH_PCR_GPIO_ADDRESS(GPI_NMI_STS_GPP_D_0);
425 	int status = *pstat;
426 
427 	hub_nmi->nmi_value = status;
428 	atomic_inc(&hub_nmi->read_mmr_count);
429 
430 	if (!(status & STS_GPP_D_0_MASK))	/* Not a UV external NMI */
431 		return 0;
432 
433 	*pstat = STS_GPP_D_0_MASK;	/* Is a UV NMI: clear GPP_D_0 status */
434 	(void)*pstat;			/* Flush write */
435 
436 	return 1;
437 }
438 
439 static int uv_test_nmi(struct uv_hub_nmi_s *hub_nmi)
440 {
441 	if (hub_nmi->hub_present)
442 		return uv_nmi_test_mmr(hub_nmi);
443 
444 	if (hub_nmi->pch_owner)		/* Only PCH owner can check status */
445 		return uv_nmi_test_hubless(hub_nmi);
446 
447 	return -1;
448 }
449 
450 /*
451  * If first CPU in on this hub, set hub_nmi "in_nmi" and "owner" values and
452  * return true.  If first CPU in on the system, set global "in_nmi" flag.
453  */
454 static int uv_set_in_nmi(int cpu, struct uv_hub_nmi_s *hub_nmi)
455 {
456 	int first = atomic_add_unless(&hub_nmi->in_nmi, 1, 1);
457 
458 	if (first) {
459 		atomic_set(&hub_nmi->cpu_owner, cpu);
460 		if (atomic_add_unless(&uv_in_nmi, 1, 1))
461 			atomic_set(&uv_nmi_cpu, cpu);
462 
463 		atomic_inc(&hub_nmi->nmi_count);
464 	}
465 	return first;
466 }
467 
468 /* Check if this is a system NMI event */
469 static int uv_check_nmi(struct uv_hub_nmi_s *hub_nmi)
470 {
471 	int cpu = smp_processor_id();
472 	int nmi = 0;
473 	int nmi_detected = 0;
474 
475 	local64_inc(&uv_nmi_count);
476 	this_cpu_inc(uv_cpu_nmi.queries);
477 
478 	do {
479 		nmi = atomic_read(&hub_nmi->in_nmi);
480 		if (nmi)
481 			break;
482 
483 		if (raw_spin_trylock(&hub_nmi->nmi_lock)) {
484 			nmi_detected = uv_test_nmi(hub_nmi);
485 
486 			/* Check flag for UV external NMI */
487 			if (nmi_detected > 0) {
488 				uv_set_in_nmi(cpu, hub_nmi);
489 				nmi = 1;
490 				break;
491 			}
492 
493 			/* A non-PCH node in a hubless system waits for NMI */
494 			else if (nmi_detected < 0)
495 				goto slave_wait;
496 
497 			/* MMR/PCH NMI flag is clear */
498 			raw_spin_unlock(&hub_nmi->nmi_lock);
499 
500 		} else {
501 
502 			/* Wait a moment for the HUB NMI locker to set flag */
503 slave_wait:		cpu_relax();
504 			udelay(uv_nmi_slave_delay);
505 
506 			/* Re-check hub in_nmi flag */
507 			nmi = atomic_read(&hub_nmi->in_nmi);
508 			if (nmi)
509 				break;
510 		}
511 
512 		/*
513 		 * Check if this BMC missed setting the MMR NMI flag (or)
514 		 * UV hubless system where only PCH owner can check flag
515 		 */
516 		if (!nmi) {
517 			nmi = atomic_read(&uv_in_nmi);
518 			if (nmi)
519 				uv_set_in_nmi(cpu, hub_nmi);
520 		}
521 
522 		/* If we're holding the hub lock, release it now */
523 		if (nmi_detected < 0)
524 			raw_spin_unlock(&hub_nmi->nmi_lock);
525 
526 	} while (0);
527 
528 	if (!nmi)
529 		local64_inc(&uv_nmi_misses);
530 
531 	return nmi;
532 }
533 
534 /* Need to reset the NMI MMR register, but only once per hub. */
535 static inline void uv_clear_nmi(int cpu)
536 {
537 	struct uv_hub_nmi_s *hub_nmi = uv_hub_nmi;
538 
539 	if (cpu == atomic_read(&hub_nmi->cpu_owner)) {
540 		atomic_set(&hub_nmi->cpu_owner, -1);
541 		atomic_set(&hub_nmi->in_nmi, 0);
542 		if (hub_nmi->hub_present)
543 			uv_local_mmr_clear_nmi();
544 		else
545 			uv_reassert_nmi();
546 		raw_spin_unlock(&hub_nmi->nmi_lock);
547 	}
548 }
549 
550 /* Ping non-responding CPU's attempting to force them into the NMI handler */
551 static void uv_nmi_nr_cpus_ping(void)
552 {
553 	int cpu;
554 
555 	for_each_cpu(cpu, uv_nmi_cpu_mask)
556 		uv_cpu_nmi_per(cpu).pinging = 1;
557 
558 	apic->send_IPI_mask(uv_nmi_cpu_mask, APIC_DM_NMI);
559 }
560 
561 /* Clean up flags for CPU's that ignored both NMI and ping */
562 static void uv_nmi_cleanup_mask(void)
563 {
564 	int cpu;
565 
566 	for_each_cpu(cpu, uv_nmi_cpu_mask) {
567 		uv_cpu_nmi_per(cpu).pinging =  0;
568 		uv_cpu_nmi_per(cpu).state = UV_NMI_STATE_OUT;
569 		cpumask_clear_cpu(cpu, uv_nmi_cpu_mask);
570 	}
571 }
572 
573 /* Loop waiting as CPU's enter NMI handler */
574 static int uv_nmi_wait_cpus(int first)
575 {
576 	int i, j, k, n = num_online_cpus();
577 	int last_k = 0, waiting = 0;
578 	int cpu = smp_processor_id();
579 
580 	if (first) {
581 		cpumask_copy(uv_nmi_cpu_mask, cpu_online_mask);
582 		k = 0;
583 	} else {
584 		k = n - cpumask_weight(uv_nmi_cpu_mask);
585 	}
586 
587 	/* PCH NMI causes only one CPU to respond */
588 	if (first && uv_pch_intr_now_enabled) {
589 		cpumask_clear_cpu(cpu, uv_nmi_cpu_mask);
590 		return n - k - 1;
591 	}
592 
593 	udelay(uv_nmi_initial_delay);
594 	for (i = 0; i < uv_nmi_retry_count; i++) {
595 		int loop_delay = uv_nmi_loop_delay;
596 
597 		for_each_cpu(j, uv_nmi_cpu_mask) {
598 			if (uv_cpu_nmi_per(j).state) {
599 				cpumask_clear_cpu(j, uv_nmi_cpu_mask);
600 				if (++k >= n)
601 					break;
602 			}
603 		}
604 		if (k >= n) {		/* all in? */
605 			k = n;
606 			break;
607 		}
608 		if (last_k != k) {	/* abort if no new CPU's coming in */
609 			last_k = k;
610 			waiting = 0;
611 		} else if (++waiting > uv_nmi_wait_count)
612 			break;
613 
614 		/* Extend delay if waiting only for CPU 0: */
615 		if (waiting && (n - k) == 1 &&
616 		    cpumask_test_cpu(0, uv_nmi_cpu_mask))
617 			loop_delay *= 100;
618 
619 		udelay(loop_delay);
620 	}
621 	atomic_set(&uv_nmi_cpus_in_nmi, k);
622 	return n - k;
623 }
624 
625 /* Wait until all slave CPU's have entered UV NMI handler */
626 static void uv_nmi_wait(int master)
627 {
628 	/* Indicate this CPU is in: */
629 	this_cpu_write(uv_cpu_nmi.state, UV_NMI_STATE_IN);
630 
631 	/* If not the first CPU in (the master), then we are a slave CPU */
632 	if (!master)
633 		return;
634 
635 	do {
636 		/* Wait for all other CPU's to gather here */
637 		if (!uv_nmi_wait_cpus(1))
638 			break;
639 
640 		/* If not all made it in, send IPI NMI to them */
641 		pr_alert("UV: Sending NMI IPI to %d CPUs: %*pbl\n",
642 			 cpumask_weight(uv_nmi_cpu_mask),
643 			 cpumask_pr_args(uv_nmi_cpu_mask));
644 
645 		uv_nmi_nr_cpus_ping();
646 
647 		/* If all CPU's are in, then done */
648 		if (!uv_nmi_wait_cpus(0))
649 			break;
650 
651 		pr_alert("UV: %d CPUs not in NMI loop: %*pbl\n",
652 			 cpumask_weight(uv_nmi_cpu_mask),
653 			 cpumask_pr_args(uv_nmi_cpu_mask));
654 	} while (0);
655 
656 	pr_alert("UV: %d of %d CPUs in NMI\n",
657 		atomic_read(&uv_nmi_cpus_in_nmi), num_online_cpus());
658 }
659 
660 /* Dump Instruction Pointer header */
661 static void uv_nmi_dump_cpu_ip_hdr(void)
662 {
663 	pr_info("\nUV: %4s %6s %-32s %s   (Note: PID 0 not listed)\n",
664 		"CPU", "PID", "COMMAND", "IP");
665 }
666 
667 /* Dump Instruction Pointer info */
668 static void uv_nmi_dump_cpu_ip(int cpu, struct pt_regs *regs)
669 {
670 	pr_info("UV: %4d %6d %-32.32s %pS",
671 		cpu, current->pid, current->comm, (void *)regs->ip);
672 }
673 
674 /*
675  * Dump this CPU's state.  If action was set to "kdump" and the crash_kexec
676  * failed, then we provide "dump" as an alternate action.  Action "dump" now
677  * also includes the show "ips" (instruction pointers) action whereas the
678  * action "ips" only displays instruction pointers for the non-idle CPU's.
679  * This is an abbreviated form of the "ps" command.
680  */
681 static void uv_nmi_dump_state_cpu(int cpu, struct pt_regs *regs)
682 {
683 	const char *dots = " ................................. ";
684 
685 	if (cpu == 0)
686 		uv_nmi_dump_cpu_ip_hdr();
687 
688 	if (current->pid != 0 || !uv_nmi_action_is("ips"))
689 		uv_nmi_dump_cpu_ip(cpu, regs);
690 
691 	if (uv_nmi_action_is("dump")) {
692 		pr_info("UV:%sNMI process trace for CPU %d\n", dots, cpu);
693 		show_regs(regs);
694 	}
695 
696 	this_cpu_write(uv_cpu_nmi.state, UV_NMI_STATE_DUMP_DONE);
697 }
698 
699 /* Trigger a slave CPU to dump it's state */
700 static void uv_nmi_trigger_dump(int cpu)
701 {
702 	int retry = uv_nmi_trigger_delay;
703 
704 	if (uv_cpu_nmi_per(cpu).state != UV_NMI_STATE_IN)
705 		return;
706 
707 	uv_cpu_nmi_per(cpu).state = UV_NMI_STATE_DUMP;
708 	do {
709 		cpu_relax();
710 		udelay(10);
711 		if (uv_cpu_nmi_per(cpu).state
712 				!= UV_NMI_STATE_DUMP)
713 			return;
714 	} while (--retry > 0);
715 
716 	pr_crit("UV: CPU %d stuck in process dump function\n", cpu);
717 	uv_cpu_nmi_per(cpu).state = UV_NMI_STATE_DUMP_DONE;
718 }
719 
720 /* Wait until all CPU's ready to exit */
721 static void uv_nmi_sync_exit(int master)
722 {
723 	atomic_dec(&uv_nmi_cpus_in_nmi);
724 	if (master) {
725 		while (atomic_read(&uv_nmi_cpus_in_nmi) > 0)
726 			cpu_relax();
727 		atomic_set(&uv_nmi_slave_continue, SLAVE_CLEAR);
728 	} else {
729 		while (atomic_read(&uv_nmi_slave_continue))
730 			cpu_relax();
731 	}
732 }
733 
734 /* Current "health" check is to check which CPU's are responsive */
735 static void uv_nmi_action_health(int cpu, struct pt_regs *regs, int master)
736 {
737 	if (master) {
738 		int in = atomic_read(&uv_nmi_cpus_in_nmi);
739 		int out = num_online_cpus() - in;
740 
741 		pr_alert("UV: NMI CPU health check (non-responding:%d)\n", out);
742 		atomic_set(&uv_nmi_slave_continue, SLAVE_EXIT);
743 	} else {
744 		while (!atomic_read(&uv_nmi_slave_continue))
745 			cpu_relax();
746 	}
747 	uv_nmi_sync_exit(master);
748 }
749 
750 /* Walk through CPU list and dump state of each */
751 static void uv_nmi_dump_state(int cpu, struct pt_regs *regs, int master)
752 {
753 	if (master) {
754 		int tcpu;
755 		int ignored = 0;
756 		int saved_console_loglevel = console_loglevel;
757 
758 		pr_alert("UV: tracing %s for %d CPUs from CPU %d\n",
759 			uv_nmi_action_is("ips") ? "IPs" : "processes",
760 			atomic_read(&uv_nmi_cpus_in_nmi), cpu);
761 
762 		console_loglevel = uv_nmi_loglevel;
763 		atomic_set(&uv_nmi_slave_continue, SLAVE_EXIT);
764 		for_each_online_cpu(tcpu) {
765 			if (cpumask_test_cpu(tcpu, uv_nmi_cpu_mask))
766 				ignored++;
767 			else if (tcpu == cpu)
768 				uv_nmi_dump_state_cpu(tcpu, regs);
769 			else
770 				uv_nmi_trigger_dump(tcpu);
771 		}
772 		if (ignored)
773 			pr_alert("UV: %d CPUs ignored NMI\n", ignored);
774 
775 		console_loglevel = saved_console_loglevel;
776 		pr_alert("UV: process trace complete\n");
777 	} else {
778 		while (!atomic_read(&uv_nmi_slave_continue))
779 			cpu_relax();
780 		while (this_cpu_read(uv_cpu_nmi.state) != UV_NMI_STATE_DUMP)
781 			cpu_relax();
782 		uv_nmi_dump_state_cpu(cpu, regs);
783 	}
784 	uv_nmi_sync_exit(master);
785 }
786 
787 static void uv_nmi_touch_watchdogs(void)
788 {
789 	touch_softlockup_watchdog_sync();
790 	clocksource_touch_watchdog();
791 	rcu_cpu_stall_reset();
792 	touch_nmi_watchdog();
793 }
794 
795 static atomic_t uv_nmi_kexec_failed;
796 
797 #if defined(CONFIG_KEXEC_CORE)
798 static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs)
799 {
800 	/* Call crash to dump system state */
801 	if (master) {
802 		pr_emerg("UV: NMI executing crash_kexec on CPU%d\n", cpu);
803 		crash_kexec(regs);
804 
805 		pr_emerg("UV: crash_kexec unexpectedly returned, ");
806 		atomic_set(&uv_nmi_kexec_failed, 1);
807 		if (!kexec_crash_image) {
808 			pr_cont("crash kernel not loaded\n");
809 			return;
810 		}
811 		pr_cont("kexec busy, stalling cpus while waiting\n");
812 	}
813 
814 	/* If crash exec fails the slaves should return, otherwise stall */
815 	while (atomic_read(&uv_nmi_kexec_failed) == 0)
816 		mdelay(10);
817 }
818 
819 #else /* !CONFIG_KEXEC_CORE */
820 static inline void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs)
821 {
822 	if (master)
823 		pr_err("UV: NMI kdump: KEXEC not supported in this kernel\n");
824 	atomic_set(&uv_nmi_kexec_failed, 1);
825 }
826 #endif /* !CONFIG_KEXEC_CORE */
827 
828 #ifdef CONFIG_KGDB
829 #ifdef CONFIG_KGDB_KDB
830 static inline int uv_nmi_kdb_reason(void)
831 {
832 	return KDB_REASON_SYSTEM_NMI;
833 }
834 #else /* !CONFIG_KGDB_KDB */
835 static inline int uv_nmi_kdb_reason(void)
836 {
837 	/* Ensure user is expecting to attach gdb remote */
838 	if (uv_nmi_action_is("kgdb"))
839 		return 0;
840 
841 	pr_err("UV: NMI error: KDB is not enabled in this kernel\n");
842 	return -1;
843 }
844 #endif /* CONFIG_KGDB_KDB */
845 
846 /*
847  * Call KGDB/KDB from NMI handler
848  *
849  * Note that if both KGDB and KDB are configured, then the action of 'kgdb' or
850  * 'kdb' has no affect on which is used.  See the KGDB documention for further
851  * information.
852  */
853 static void uv_call_kgdb_kdb(int cpu, struct pt_regs *regs, int master)
854 {
855 	if (master) {
856 		int reason = uv_nmi_kdb_reason();
857 		int ret;
858 
859 		if (reason < 0)
860 			return;
861 
862 		/* Call KGDB NMI handler as MASTER */
863 		ret = kgdb_nmicallin(cpu, X86_TRAP_NMI, regs, reason,
864 				&uv_nmi_slave_continue);
865 		if (ret) {
866 			pr_alert("KGDB returned error, is kgdboc set?\n");
867 			atomic_set(&uv_nmi_slave_continue, SLAVE_EXIT);
868 		}
869 	} else {
870 		/* Wait for KGDB signal that it's ready for slaves to enter */
871 		int sig;
872 
873 		do {
874 			cpu_relax();
875 			sig = atomic_read(&uv_nmi_slave_continue);
876 		} while (!sig);
877 
878 		/* Call KGDB as slave */
879 		if (sig == SLAVE_CONTINUE)
880 			kgdb_nmicallback(cpu, regs);
881 	}
882 	uv_nmi_sync_exit(master);
883 }
884 
885 #else /* !CONFIG_KGDB */
886 static inline void uv_call_kgdb_kdb(int cpu, struct pt_regs *regs, int master)
887 {
888 	pr_err("UV: NMI error: KGDB is not enabled in this kernel\n");
889 }
890 #endif /* !CONFIG_KGDB */
891 
892 /*
893  * UV NMI handler
894  */
895 static int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
896 {
897 	struct uv_hub_nmi_s *hub_nmi = uv_hub_nmi;
898 	int cpu = smp_processor_id();
899 	int master = 0;
900 	unsigned long flags;
901 
902 	local_irq_save(flags);
903 
904 	/* If not a UV System NMI, ignore */
905 	if (!this_cpu_read(uv_cpu_nmi.pinging) && !uv_check_nmi(hub_nmi)) {
906 		local_irq_restore(flags);
907 		return NMI_DONE;
908 	}
909 
910 	/* Indicate we are the first CPU into the NMI handler */
911 	master = (atomic_read(&uv_nmi_cpu) == cpu);
912 
913 	/* If NMI action is "kdump", then attempt to do it */
914 	if (uv_nmi_action_is("kdump")) {
915 		uv_nmi_kdump(cpu, master, regs);
916 
917 		/* Unexpected return, revert action to "dump" */
918 		if (master)
919 			strncpy(uv_nmi_action, "dump", strlen(uv_nmi_action));
920 	}
921 
922 	/* Pause as all CPU's enter the NMI handler */
923 	uv_nmi_wait(master);
924 
925 	/* Process actions other than "kdump": */
926 	if (uv_nmi_action_is("health")) {
927 		uv_nmi_action_health(cpu, regs, master);
928 	} else if (uv_nmi_action_is("ips") || uv_nmi_action_is("dump")) {
929 		uv_nmi_dump_state(cpu, regs, master);
930 	} else if (uv_nmi_action_is("kdb") || uv_nmi_action_is("kgdb")) {
931 		uv_call_kgdb_kdb(cpu, regs, master);
932 	} else {
933 		if (master)
934 			pr_alert("UV: unknown NMI action: %s\n", uv_nmi_action);
935 		uv_nmi_sync_exit(master);
936 	}
937 
938 	/* Clear per_cpu "in_nmi" flag */
939 	this_cpu_write(uv_cpu_nmi.state, UV_NMI_STATE_OUT);
940 
941 	/* Clear MMR NMI flag on each hub */
942 	uv_clear_nmi(cpu);
943 
944 	/* Clear global flags */
945 	if (master) {
946 		if (cpumask_weight(uv_nmi_cpu_mask))
947 			uv_nmi_cleanup_mask();
948 		atomic_set(&uv_nmi_cpus_in_nmi, -1);
949 		atomic_set(&uv_nmi_cpu, -1);
950 		atomic_set(&uv_in_nmi, 0);
951 		atomic_set(&uv_nmi_kexec_failed, 0);
952 		atomic_set(&uv_nmi_slave_continue, SLAVE_CLEAR);
953 	}
954 
955 	uv_nmi_touch_watchdogs();
956 	local_irq_restore(flags);
957 
958 	return NMI_HANDLED;
959 }
960 
961 /*
962  * NMI handler for pulling in CPU's when perf events are grabbing our NMI
963  */
964 static int uv_handle_nmi_ping(unsigned int reason, struct pt_regs *regs)
965 {
966 	int ret;
967 
968 	this_cpu_inc(uv_cpu_nmi.queries);
969 	if (!this_cpu_read(uv_cpu_nmi.pinging)) {
970 		local64_inc(&uv_nmi_ping_misses);
971 		return NMI_DONE;
972 	}
973 
974 	this_cpu_inc(uv_cpu_nmi.pings);
975 	local64_inc(&uv_nmi_ping_count);
976 	ret = uv_handle_nmi(reason, regs);
977 	this_cpu_write(uv_cpu_nmi.pinging, 0);
978 	return ret;
979 }
980 
981 static void uv_register_nmi_notifier(void)
982 {
983 	if (register_nmi_handler(NMI_UNKNOWN, uv_handle_nmi, 0, "uv"))
984 		pr_warn("UV: NMI handler failed to register\n");
985 
986 	if (register_nmi_handler(NMI_LOCAL, uv_handle_nmi_ping, 0, "uvping"))
987 		pr_warn("UV: PING NMI handler failed to register\n");
988 }
989 
990 void uv_nmi_init(void)
991 {
992 	unsigned int value;
993 
994 	/*
995 	 * Unmask NMI on all CPU's
996 	 */
997 	value = apic_read(APIC_LVT1) | APIC_DM_NMI;
998 	value &= ~APIC_LVT_MASKED;
999 	apic_write(APIC_LVT1, value);
1000 }
1001 
1002 /* Setup HUB NMI info */
1003 static void __init uv_nmi_setup_common(bool hubbed)
1004 {
1005 	int size = sizeof(void *) * (1 << NODES_SHIFT);
1006 	int cpu;
1007 
1008 	uv_hub_nmi_list = kzalloc(size, GFP_KERNEL);
1009 	nmi_debug("UV: NMI hub list @ 0x%p (%d)\n", uv_hub_nmi_list, size);
1010 	BUG_ON(!uv_hub_nmi_list);
1011 	size = sizeof(struct uv_hub_nmi_s);
1012 	for_each_present_cpu(cpu) {
1013 		int nid = cpu_to_node(cpu);
1014 		if (uv_hub_nmi_list[nid] == NULL) {
1015 			uv_hub_nmi_list[nid] = kzalloc_node(size,
1016 							    GFP_KERNEL, nid);
1017 			BUG_ON(!uv_hub_nmi_list[nid]);
1018 			raw_spin_lock_init(&(uv_hub_nmi_list[nid]->nmi_lock));
1019 			atomic_set(&uv_hub_nmi_list[nid]->cpu_owner, -1);
1020 			uv_hub_nmi_list[nid]->hub_present = hubbed;
1021 			uv_hub_nmi_list[nid]->pch_owner = (nid == 0);
1022 		}
1023 		uv_hub_nmi_per(cpu) = uv_hub_nmi_list[nid];
1024 	}
1025 	BUG_ON(!alloc_cpumask_var(&uv_nmi_cpu_mask, GFP_KERNEL));
1026 }
1027 
1028 /* Setup for UV Hub systems */
1029 void __init uv_nmi_setup(void)
1030 {
1031 	uv_nmi_setup_mmrs();
1032 	uv_nmi_setup_common(true);
1033 	uv_register_nmi_notifier();
1034 	pr_info("UV: Hub NMI enabled\n");
1035 }
1036 
1037 /* Setup for UV Hubless systems */
1038 void __init uv_nmi_setup_hubless(void)
1039 {
1040 	uv_nmi_setup_common(false);
1041 	pch_base = xlate_dev_mem_ptr(PCH_PCR_GPIO_1_BASE);
1042 	nmi_debug("UV: PCH base:%p from 0x%lx, GPP_D_0\n",
1043 		pch_base, PCH_PCR_GPIO_1_BASE);
1044 	if (uv_pch_init_enable)
1045 		uv_init_hubless_pch_d0();
1046 	uv_init_hubless_pch_io(GPI_NMI_ENA_GPP_D_0,
1047 				STS_GPP_D_0_MASK, STS_GPP_D_0_MASK);
1048 	uv_nmi_setup_hubless_intr();
1049 	/* Ensure NMI enabled in Processor Interface Reg: */
1050 	uv_reassert_nmi();
1051 	uv_register_nmi_notifier();
1052 	pr_info("UV: Hubless NMI enabled\n");
1053 }
1054