xref: /titanic_41/usr/src/uts/sun4u/os/mach_cpu_states.c (revision 8eea8e29cc4374d1ee24c25a07f45af132db3499)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/t_lock.h>
31 #include <sys/uadmin.h>
32 #include <sys/panic.h>
33 #include <sys/reboot.h>
34 #include <sys/autoconf.h>
35 #include <sys/machsystm.h>
36 #include <sys/promif.h>
37 #include <sys/membar.h>
38 #include <vm/hat_sfmmu.h>
39 #include <sys/cpu_module.h>
40 #include <sys/cpu_sgnblk_defs.h>
41 #include <sys/intreg.h>
42 #include <sys/consdev.h>
43 #include <sys/kdi_impl.h>
44 
45 #ifdef	TRAPTRACE
46 #include <sys/traptrace.h>
47 u_longlong_t panic_tick;
48 #endif /* TRAPTRACE */
49 
50 extern u_longlong_t	gettick();
51 static void reboot_machine(char *);
52 extern int disable_watchdog_on_exit;
53 
54 /*
55  * Machine dependent code to reboot.
56  * "mdep" is interpreted as a character pointer; if non-null, it is a pointer
57  * to a string to be used as the argument string when rebooting.
58  */
59 /*ARGSUSED*/
60 void
61 mdboot(int cmd, int fcn, char *bootstr)
62 {
63 	page_t *first, *pp;
64 	extern void pm_cfb_check_and_powerup(void);
65 
66 	/*
67 	 * Disable the hw watchdog timer.
68 	 */
69 	if (disable_watchdog_on_exit && watchdog_activated) {
70 		mutex_enter(&tod_lock);
71 		(void) tod_ops.tod_clear_watchdog_timer();
72 		mutex_exit(&tod_lock);
73 	}
74 
75 	/*
76 	 * Clear any unresolved UEs from memory.  We rely on the fact that on
77 	 * sun4u, pagezero() will always clear UEs.  Since we're rebooting, we
78 	 * just force p_selock to appear locked so pagezero()'s assert works.
79 	 *
80 	 * Pages that were retired successfully due to multiple CEs will
81 	 * also be cleared.
82 	 */
83 	if (memsegs != NULL) {
84 		pp = first = page_first();
85 		do {
86 			if (page_isretired(pp) || page_istoxic(pp)) {
87 				/* pagezero asserts PAGE_LOCKED */
88 				pp->p_selock = -1;
89 				pagezero(pp, 0, PAGESIZE);
90 			}
91 		} while ((pp = page_next(pp)) != first);
92 	}
93 
94 	/*
95 	 * XXX - rconsvp is set to NULL to ensure that output messages
96 	 * are sent to the underlying "hardware" device using the
97 	 * monitor's printf routine since we are in the process of
98 	 * either rebooting or halting the machine.
99 	 */
100 	rconsvp = NULL;
101 
102 	/*
103 	 * At a high interrupt level we can't:
104 	 *	1) bring up the console
105 	 * or
106 	 *	2) wait for pending interrupts prior to redistribution
107 	 *	   to the current CPU
108 	 *
109 	 * so we do them now.
110 	 */
111 	pm_cfb_check_and_powerup();
112 
113 	/* make sure there are no more changes to the device tree */
114 	devtree_freeze();
115 
116 	/*
117 	 * stop other cpus which also raise our priority. since there is only
118 	 * one active cpu after this, and our priority will be too high
119 	 * for us to be preempted, we're essentially single threaded
120 	 * from here on out.
121 	 */
122 	stop_other_cpus();
123 
124 	/*
125 	 * try and reset leaf devices.  reset_leaves() should only
126 	 * be called when there are no other threads that could be
127 	 * accessing devices
128 	 */
129 	reset_leaves();
130 
131 	if (fcn == AD_HALT) {
132 		halt((char *)NULL);
133 	} else if (fcn == AD_POWEROFF) {
134 		power_down(NULL);
135 	} else {
136 		if (bootstr == NULL) {
137 			switch (fcn) {
138 
139 			case AD_BOOT:
140 				bootstr = "";
141 				break;
142 
143 			case AD_IBOOT:
144 				bootstr = "-a";
145 				break;
146 
147 			case AD_SBOOT:
148 				bootstr = "-s";
149 				break;
150 
151 			case AD_SIBOOT:
152 				bootstr = "-sa";
153 				break;
154 			default:
155 				cmn_err(CE_WARN,
156 				    "mdboot: invalid function %d", fcn);
157 				bootstr = "";
158 				break;
159 			}
160 		}
161 		reboot_machine(bootstr);
162 	}
163 	/* MAYBE REACHED */
164 }
165 
166 /* mdpreboot - may be called prior to mdboot while root fs still mounted */
167 /*ARGSUSED*/
168 void
169 mdpreboot(int cmd, int fcn, char *bootstr)
170 {
171 }
172 
173 /*
174  * Halt the machine and then reboot with the device
175  * and arguments specified in bootstr.
176  */
177 static void
178 reboot_machine(char *bootstr)
179 {
180 	flush_windows();
181 	stop_other_cpus();		/* send stop signal to other CPUs */
182 	prom_printf("rebooting...\n");
183 	/*
184 	 * For platforms that use CPU signatures, we
185 	 * need to set the signature block to OS and
186 	 * the state to exiting for all the processors.
187 	 */
188 	CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_REBOOT, -1);
189 	prom_reboot(bootstr);
190 	/*NOTREACHED*/
191 }
192 
193 /*
194  * We use the x-trap mechanism and idle_stop_xcall() to stop the other CPUs.
195  * Once in panic_idle() they raise spl, record their location, and spin.
196  */
197 static void
198 panic_idle(void)
199 {
200 	cpu_async_panic_callb(); /* check for async errors */
201 
202 	(void) spl7();
203 
204 	debug_flush_windows();
205 	(void) setjmp(&curthread->t_pcb);
206 
207 	CPU->cpu_m.in_prom = 1;
208 	membar_stld();
209 
210 	for (;;);
211 }
212 
213 /*
214  * Force the other CPUs to trap into panic_idle(), and then remove them
215  * from the cpu_ready_set so they will no longer receive cross-calls.
216  */
217 /*ARGSUSED*/
218 void
219 panic_stopcpus(cpu_t *cp, kthread_t *t, int spl)
220 {
221 	cpuset_t cps;
222 	int i;
223 
224 	(void) splzs();
225 	CPUSET_ALL_BUT(cps, cp->cpu_id);
226 	xt_some(cps, (xcfunc_t *)idle_stop_xcall, (uint64_t)&panic_idle, NULL);
227 
228 	for (i = 0; i < NCPU; i++) {
229 		if (i != cp->cpu_id && CPU_XCALL_READY(i)) {
230 			int ntries = 0x10000;
231 
232 			while (!cpu[i]->cpu_m.in_prom && ntries) {
233 				DELAY(50);
234 				ntries--;
235 			}
236 
237 			if (!cpu[i]->cpu_m.in_prom)
238 				printf("panic: failed to stop cpu%d\n", i);
239 
240 			cpu[i]->cpu_flags &= ~CPU_READY;
241 			cpu[i]->cpu_flags |= CPU_QUIESCED;
242 			CPUSET_DEL(cpu_ready_set, cpu[i]->cpu_id);
243 		}
244 	}
245 }
246 
247 /*
248  * Platform callback following each entry to panicsys().  If we've panicked at
249  * level 14, we examine t_panic_trap to see if a fatal trap occurred.  If so,
250  * we disable further %tick_cmpr interrupts.  If not, an explicit call to panic
251  * was made and so we re-enqueue an interrupt request structure to allow
252  * further level 14 interrupts to be processed once we lower PIL.  This allows
253  * us to handle panics from the deadman() CY_HIGH_LEVEL cyclic.
254  */
255 void
256 panic_enter_hw(int spl)
257 {
258 	if (spl == ipltospl(PIL_14)) {
259 		uint_t opstate = disable_vec_intr();
260 
261 		if (curthread->t_panic_trap != NULL) {
262 			tickcmpr_disable();
263 			intr_dequeue_req(PIL_14, cbe_level14_inum);
264 		} else {
265 			if (!tickcmpr_disabled())
266 				intr_enqueue_req(PIL_14, cbe_level14_inum);
267 			/*
268 			 * Clear SOFTINT<14>, SOFTINT<0> (TICK_INT)
269 			 * and SOFTINT<16> (STICK_INT) to indicate
270 			 * that the current level 14 has been serviced.
271 			 */
272 			wr_clr_softint((1 << PIL_14) |
273 				TICK_INT_MASK | STICK_INT_MASK);
274 		}
275 
276 		enable_vec_intr(opstate);
277 	}
278 }
279 
280 /*
281  * Miscellaneous hardware-specific code to execute after panicstr is set
282  * by the panic code: we also print and record PTL1 panic information here.
283  */
284 /*ARGSUSED*/
285 void
286 panic_quiesce_hw(panic_data_t *pdp)
287 {
288 	extern uint_t getpstate(void);
289 	extern void setpstate(uint_t);
290 
291 #ifdef TRAPTRACE
292 	/*
293 	 * Turn off TRAPTRACE and save the current %tick value in panic_tick.
294 	 */
295 	if (!panic_tick)
296 		panic_tick = gettick();
297 	TRAPTRACE_FREEZE;
298 #endif
299 	/*
300 	 * For Platforms that use CPU signatures, we
301 	 * need to set the signature block to OS, the state to
302 	 * exiting, and the substate to panic for all the processors.
303 	 */
304 	CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_PANIC, -1);
305 
306 	/*
307 	 * De-activate ECC functions and disable the watchdog timer now that
308 	 * we've made it through the critical part of the panic code.
309 	 */
310 	if (watchdog_enable)
311 		(void) tod_ops.tod_clear_watchdog_timer();
312 
313 	/*
314 	 * Disable further ECC errors from the CPU module and the bus nexus.
315 	 */
316 	cpu_disable_errors();
317 	(void) bus_func_invoke(BF_TYPE_ERRDIS);
318 
319 	/*
320 	 * Redirect all interrupts to the current CPU.
321 	 */
322 	intr_redist_all_cpus_shutdown();
323 
324 	/*
325 	 * This call exists solely to support dumps to network
326 	 * devices after sync from OBP.
327 	 *
328 	 * If we came here via the sync callback, then on some
329 	 * platforms, interrupts may have arrived while we were
330 	 * stopped in OBP.  OBP will arrange for those interrupts to
331 	 * be redelivered if you say "go", but not if you invoke a
332 	 * client callback like 'sync'.	 For some dump devices
333 	 * (network swap devices), we need interrupts to be
334 	 * delivered in order to dump, so we have to call the bus
335 	 * nexus driver to reset the interrupt state machines.
336 	 */
337 	(void) bus_func_invoke(BF_TYPE_RESINTR);
338 
339 	setpstate(getpstate() | PSTATE_IE);
340 }
341 
342 /*
343  * Platforms that use CPU signatures need to set the signature block to OS and
344  * the state to exiting for all CPUs. PANIC_CONT indicates that we're about to
345  * write the crash dump, which tells the SSP/SMS to begin a timeout routine to
346  * reboot the machine if the dump never completes.
347  */
348 /*ARGSUSED*/
349 void
350 panic_dump_hw(int spl)
351 {
352 	CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_DUMP, -1);
353 }
354 
355 /*
356  * for ptl1_panic
357  */
358 void
359 ptl1_init_cpu(struct cpu *cpu)
360 {
361 	ptl1_state_t *pstate = &cpu->cpu_m.ptl1_state;
362 
363 	/*CONSTCOND*/
364 	if (sizeof (struct cpu) + PTL1_SSIZE > CPU_ALLOC_SIZE) {
365 		panic("ptl1_init_cpu: not enough space left for ptl1_panic "
366 		    "stack, sizeof (struct cpu) = %d", sizeof (struct cpu));
367 	}
368 
369 	pstate->ptl1_stktop = (uintptr_t)cpu + CPU_ALLOC_SIZE;
370 	cpu_pa[cpu->cpu_id] = va_to_pa(cpu);
371 }
372 
373 void
374 ptl1_panic_handler(ptl1_state_t *pstate)
375 {
376 	static const char *ptl1_reasons[] = {
377 #ifdef	PTL1_PANIC_DEBUG
378 		"trap for debug purpose",	/* PTL1_BAD_DEBUG */
379 #else
380 		"unknown trap",			/* PTL1_BAD_DEBUG */
381 #endif
382 		"register window trap",		/* PTL1_BAD_WTRAP */
383 		"kernel MMU miss",		/* PTL1_BAD_KMISS */
384 		"kernel protection fault",	/* PTL1_BAD_KPROT_FAULT */
385 		"ISM MMU miss",			/* PTL1_BAD_ISM */
386 		"kernel MMU trap",		/* PTL1_BAD_MMUTRAP */
387 		"kernel trap handler state",	/* PTL1_BAD_TRAP */
388 		"floating point trap",		/* PTL1_BAD_FPTRAP */
389 #ifdef	DEBUG
390 		"pointer to intr_req",		/* PTL1_BAD_INTR_REQ */
391 #else
392 		"unknown trap",			/* PTL1_BAD_INTR_REQ */
393 #endif
394 #ifdef	TRAPTRACE
395 		"TRACE_PTR state",		/* PTL1_BAD_TRACE_PTR */
396 #else
397 		"unknown trap",			/* PTL1_BAD_TRACE_PTR */
398 #endif
399 		"stack overflow",		/* PTL1_BAD_STACK */
400 		"DTrace flags",			/* PTL1_BAD_DTRACE_FLAGS */
401 		"attempt to steal locked ctx",  /* PTL1_BAD_CTX_STEAL */
402 		"CPU ECC error loop",		/* PTL1_BAD_ECC */
403 		"unknown trap",			/* PTL1_BAD_HCALL */
404 	};
405 
406 	uint_t reason = pstate->ptl1_regs.ptl1_g1;
407 	uint_t tl = pstate->ptl1_regs.ptl1_trap_regs[0].ptl1_tl;
408 	struct trap_info ti = { 0 };
409 
410 	/*
411 	 * Use trap_info for a place holder to call panic_savetrap() and
412 	 * panic_showtrap() to save and print out ptl1_panic information.
413 	 */
414 	if (curthread->t_panic_trap == NULL)
415 		curthread->t_panic_trap = &ti;
416 
417 	if (reason < sizeof (ptl1_reasons) / sizeof (ptl1_reasons[0]))
418 		panic("bad %s at TL %u", ptl1_reasons[reason], tl);
419 	else
420 		panic("ptl1_panic reason 0x%x at TL %u", reason, tl);
421 }
422 
423 void
424 clear_watchdog_on_exit()
425 {
426 	/*
427 	 * Only shut down an active hardware watchdog timer if the platform
428 	 * has expressed an interest to.
429 	 */
430 	if (disable_watchdog_on_exit && watchdog_activated) {
431 		prom_printf("Debugging requested; hardware watchdog "
432 		    "disabled; reboot to re-enable.\n");
433 		cmn_err(CE_WARN, "!Debugging requested; hardware watchdog "
434 		    "disabled; reboot to re-enable.");
435 		mutex_enter(&tod_lock);
436 		(void) tod_ops.tod_clear_watchdog_timer();
437 		mutex_exit(&tod_lock);
438 	}
439 }
440 
441 int
442 kdi_watchdog_disable(void)
443 {
444 	if (watchdog_activated) {
445 		mutex_enter(&tod_lock);
446 		(void) tod_ops.tod_clear_watchdog_timer();
447 		mutex_exit(&tod_lock);
448 	}
449 
450 	return (watchdog_activated);
451 }
452 
453 void
454 kdi_watchdog_restore(void)
455 {
456 	if (watchdog_enable) {
457 		mutex_enter(&tod_lock);
458 		(void) tod_ops.tod_set_watchdog_timer(watchdog_timeout_seconds);
459 		mutex_exit(&tod_lock);
460 	}
461 }
462 
463 /*ARGSUSED*/
464 void
465 mach_dump_buffer_init(void)
466 {
467 	/*
468 	 * setup dump buffer to store extra crash information
469 	 * not applicable to sun4u
470 	 */
471 }
472 
473 /*
474  * xt_sync - wait for previous x-traps to finish
475  */
476 void
477 xt_sync(cpuset_t cpuset)
478 {
479 	kpreempt_disable();
480 	CPUSET_DEL(cpuset, CPU->cpu_id);
481 	CPUSET_AND(cpuset, cpu_ready_set);
482 	xt_some(cpuset, (xcfunc_t *)xt_sync_tl1, 0, 0);
483 	kpreempt_enable();
484 }
485