xref: /illumos-gate/usr/src/uts/common/os/panic.c (revision 814a60b13c0ad90e5d2edfd29a7a84bbf416cc1a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * When the operating system detects that it is in an invalid state, a panic
31  * is initiated in order to minimize potential damage to user data and to
32  * facilitate debugging.  There are three major tasks to be performed in
33  * a system panic: recording information about the panic in memory (and thus
34  * making it part of the crash dump), synchronizing the file systems to
35  * preserve user file data, and generating the crash dump.  We define the
36  * system to be in one of four states with respect to the panic code:
37  *
38  * CALM    - the state of the system prior to any thread initiating a panic
39  *
40  * QUIESCE - the state of the system when the first thread to initiate
41  *           a system panic records information about the cause of the panic
42  *           and renders the system quiescent by stopping other processors
43  *
44  * SYNC    - the state of the system when we synchronize the file systems
45  * DUMP    - the state when we generate the crash dump.
46  *
47  * The transitions between these states are irreversible: once we begin
48  * panicking, we only make one attempt to perform the actions associated with
49  * each state.
50  *
51  * The panic code itself must be re-entrant because actions taken during any
52  * state may lead to another system panic.  Additionally, any Solaris
53  * thread may initiate a panic at any time, and so we must have synchronization
54  * between threads which attempt to initiate a state transition simultaneously.
55  * The panic code makes use of a special locking primitive, a trigger, to
56  * perform this synchronization.  A trigger is simply a word which is set
57  * atomically and can only be set once.  We declare three triggers, one for
58  * each transition between the four states.  When a thread enters the panic
59  * code it attempts to set each trigger; if it fails it moves on to the
60  * next trigger.  A special case is the first trigger: if two threads race
61  * to perform the transition to QUIESCE, the losing thread may execute before
62  * the winner has a chance to stop its CPU.  To solve this problem, we have
63  * the loser look ahead to see if any other triggers are set; if not, it
64  * presumes a panic is underway and simply spins.  Unfortunately, since we
65  * are panicking, it is not possible to know this with absolute certainty.
66  *
67  * There are two common reasons for re-entering the panic code once a panic
68  * has been initiated: (1) after we debug_enter() at the end of QUIESCE,
69  * the operator may type "sync" instead of "go", and the PROM's sync callback
70  * routine will invoke panic(); (2) if the clock routine decides that sync
71  * or dump is not making progress, it will invoke panic() to force a timeout.
72  * The design assumes that a third possibility, another thread causing an
73  * unrelated panic while sync or dump is still underway, is extremely unlikely.
74  * If this situation occurs, we may end up triggering dump while sync is
75  * still in progress.  This third case is considered extremely unlikely because
76  * all other CPUs are stopped and low-level interrupts have been blocked.
77  *
78  * The panic code is entered via a call directly to the vpanic() function,
79  * or its varargs wrappers panic() and cmn_err(9F).  The vpanic routine
80  * is implemented in assembly language to record the current machine
81  * registers, attempt to set the trigger for the QUIESCE state, and
82  * if successful, switch stacks on to the panic_stack before calling into
83  * the common panicsys() routine.  The first thread to initiate a panic
84  * is allowed to make use of the reserved panic_stack so that executing
85  * the panic code itself does not overwrite valuable data on that thread's
86  * stack *ahead* of the current stack pointer.  This data will be preserved
87  * in the crash dump and may prove invaluable in determining what this
88  * thread has previously been doing.  The first thread, saved in panic_thread,
89  * is also responsible for stopping the other CPUs as quickly as possible,
90  * and then setting the various panic_* variables.  Most important among
91  * these is panicstr, which allows threads to subsequently bypass held
92  * locks so that we can proceed without ever blocking.  We must stop the
93  * other CPUs *prior* to setting panicstr in case threads running there are
94  * currently spinning to acquire a lock; we want that state to be preserved.
95  * Every thread which initiates a panic has its T_PANIC flag set so we can
96  * identify all such threads in the crash dump.
97  *
98  * The panic_thread is also allowed to make use of the special memory buffer
99  * panicbuf, which on machines with appropriate hardware is preserved across
100  * reboots.  We allow the panic_thread to store its register set and panic
101  * message in this buffer, so even if we fail to obtain a crash dump we will
102  * be able to examine the machine after reboot and determine some of the
103  * state at the time of the panic.  If we do get a dump, the panic buffer
104  * data is structured so that a debugger can easily consume the information
105  * therein (see <sys/panic.h>).
106  *
107  * Each platform or architecture is required to implement the functions
108  * panic_savetrap() to record trap-specific information to panicbuf,
109  * panic_saveregs() to record a register set to panicbuf, panic_stopcpus()
110  * to halt all CPUs but the panicking CPU, panic_quiesce_hw() to perform
111  * miscellaneous platform-specific tasks *after* panicstr is set,
112  * panic_showtrap() to print trap-specific information to the console,
113  * and panic_dump_hw() to perform platform tasks prior to calling dumpsys().
114  *
115  * A Note on Word Formation, courtesy of the Oxford Guide to English Usage:
116  *
117  * Words ending in -c interpose k before suffixes which otherwise would
118  * indicate a soft c, and thus the verb and adjective forms of 'panic' are
119  * spelled "panicked", "panicking", and "panicky" respectively.  Use of
120  * the ill-conceived "panicing" and "panic'd" is discouraged.
121  */
122 
123 #include <sys/types.h>
124 #include <sys/varargs.h>
125 #include <sys/sysmacros.h>
126 #include <sys/cmn_err.h>
127 #include <sys/cpuvar.h>
128 #include <sys/thread.h>
129 #include <sys/t_lock.h>
130 #include <sys/cred.h>
131 #include <sys/systm.h>
132 #include <sys/uadmin.h>
133 #include <sys/callb.h>
134 #include <sys/vfs.h>
135 #include <sys/log.h>
136 #include <sys/disp.h>
137 #include <sys/param.h>
138 #include <sys/dumphdr.h>
139 #include <sys/ftrace.h>
140 #include <sys/reboot.h>
141 #include <sys/debug.h>
142 #include <sys/stack.h>
143 #include <sys/spl.h>
144 #include <sys/errorq.h>
145 #include <sys/panic.h>
146 
147 /*
148  * Panic variables which are set once during the QUIESCE state by the
149  * first thread to initiate a panic.  These are examined by post-mortem
150  * debugging tools; the inconsistent use of 'panic' versus 'panic_' in
151  * the variable naming is historical and allows legacy tools to work.
152  */
153 #pragma align STACK_ALIGN(panic_stack)
154 char panic_stack[PANICSTKSIZE];		/* reserved stack for panic_thread */
155 kthread_t *panic_thread;		/* first thread to call panicsys() */
156 cpu_t panic_cpu;			/* cpu from first call to panicsys() */
157 label_t panic_regs;			/* setjmp label from panic_thread */
158 struct regs *panic_reg;			/* regs struct from first panicsys() */
159 char *volatile panicstr;		/* format string to first panicsys() */
160 va_list panicargs;			/* arguments to first panicsys() */
161 clock_t panic_lbolt;			/* lbolt at time of panic */
162 int64_t panic_lbolt64;			/* lbolt64 at time of panic */
163 hrtime_t panic_hrtime;			/* hrtime at time of panic */
164 timespec_t panic_hrestime;		/* hrestime at time of panic */
165 int panic_ipl;				/* ipl on panic_cpu at time of panic */
166 ushort_t panic_schedflag;		/* t_schedflag for panic_thread */
167 cpu_t *panic_bound_cpu;			/* t_bound_cpu for panic_thread */
168 char panic_preempt;			/* t_preempt for panic_thread */
169 
170 /*
171  * Panic variables which can be set via /etc/system or patched while
172  * the system is in operation.  Again, the stupid names are historic.
173  */
174 char *panic_bootstr = NULL;		/* mdboot string to use after panic */
175 int panic_bootfcn = AD_BOOT;		/* mdboot function to use after panic */
176 int halt_on_panic = 0;  		/* halt after dump instead of reboot? */
177 int nopanicdebug = 0;			/* reboot instead of call debugger? */
178 int in_sync = 0;			/* skip vfs_syncall() and just dump? */
179 
180 /*
181  * The do_polled_io flag is set by the panic code to inform the SCSI subsystem
182  * to use polled mode instead of interrupt-driven i/o.
183  */
184 int do_polled_io = 0;
185 
186 /*
187  * The panic_forced flag is set by the uadmin A_DUMP code to inform the
188  * panic subsystem that it should not attempt an initial debug_enter.
189  */
190 int panic_forced = 0;
191 
192 /*
193  * Triggers for panic state transitions:
194  */
195 int panic_quiesce;			/* trigger for CALM    -> QUIESCE */
196 int panic_sync;				/* trigger for QUIESCE -> SYNC */
197 int panic_dump;				/* trigger for SYNC    -> DUMP */
198 
199 void
200 panicsys(const char *format, va_list alist, struct regs *rp, int on_panic_stack)
201 {
202 	int s = spl8();
203 	kthread_t *t = curthread;
204 	cpu_t *cp = CPU;
205 
206 	caddr_t intr_stack = NULL;
207 	uint_t intr_actv;
208 
209 	ushort_t schedflag = t->t_schedflag;
210 	cpu_t *bound_cpu = t->t_bound_cpu;
211 	char preempt = t->t_preempt;
212 
213 	(void) setjmp(&t->t_pcb);
214 	t->t_flag |= T_PANIC;
215 
216 	t->t_schedflag |= TS_DONT_SWAP;
217 	t->t_bound_cpu = cp;
218 	t->t_preempt++;
219 
220 	panic_enter_hw(s);
221 
222 	/*
223 	 * If we're on the interrupt stack and an interrupt thread is available
224 	 * in this CPU's pool, preserve the interrupt stack by detaching an
225 	 * interrupt thread and making its stack the intr_stack.
226 	 */
227 	if (CPU_ON_INTR(cp) && cp->cpu_intr_thread != NULL) {
228 		kthread_t *it = cp->cpu_intr_thread;
229 
230 		intr_stack = cp->cpu_intr_stack;
231 		intr_actv = cp->cpu_intr_actv;
232 
233 		cp->cpu_intr_stack = thread_stk_init(it->t_stk);
234 		cp->cpu_intr_thread = it->t_link;
235 
236 		/*
237 		 * Clear only the high level bits of cpu_intr_actv.
238 		 * We want to indicate that high-level interrupts are
239 		 * not active without destroying the low-level interrupt
240 		 * information stored there.
241 		 */
242 		cp->cpu_intr_actv &= ((1 << (LOCK_LEVEL + 1)) - 1);
243 	}
244 
245 	/*
246 	 * Record one-time panic information and quiesce the other CPUs.
247 	 * Then print out the panic message and stack trace.
248 	 */
249 	if (on_panic_stack) {
250 		panic_data_t *pdp = (panic_data_t *)panicbuf;
251 
252 		pdp->pd_version = PANICBUFVERS;
253 		pdp->pd_msgoff = sizeof (panic_data_t) - sizeof (panic_nv_t);
254 
255 		if (t->t_panic_trap != NULL)
256 			panic_savetrap(pdp, t->t_panic_trap);
257 		else
258 			panic_saveregs(pdp, rp);
259 
260 		(void) vsnprintf(&panicbuf[pdp->pd_msgoff],
261 		    PANICBUFSIZE - pdp->pd_msgoff, format, alist);
262 
263 		/*
264 		 * Call into the platform code to stop the other CPUs.
265 		 * We currently have all interrupts blocked, and expect that
266 		 * the platform code will lower ipl only as far as needed to
267 		 * perform cross-calls, and will acquire as *few* locks as is
268 		 * possible -- panicstr is not set so we can still deadlock.
269 		 */
270 		panic_stopcpus(cp, t, s);
271 
272 		panicstr = (char *)format;
273 		va_copy(panicargs, alist);
274 		panic_lbolt = lbolt;
275 		panic_lbolt64 = lbolt64;
276 		panic_hrestime = hrestime;
277 		panic_hrtime = gethrtime_waitfree();
278 		panic_thread = t;
279 		panic_regs = t->t_pcb;
280 		panic_reg = rp;
281 		panic_cpu = *cp;
282 		panic_ipl = spltoipl(s);
283 		panic_schedflag = schedflag;
284 		panic_bound_cpu = bound_cpu;
285 		panic_preempt = preempt;
286 
287 		if (intr_stack != NULL) {
288 			panic_cpu.cpu_intr_stack = intr_stack;
289 			panic_cpu.cpu_intr_actv = intr_actv;
290 		}
291 
292 		/*
293 		 * Lower ipl to 10 to keep clock() from running, but allow
294 		 * keyboard interrupts to enter the debugger.  These callbacks
295 		 * are executed with panicstr set so they can bypass locks.
296 		 */
297 		splx(ipltospl(CLOCK_LEVEL));
298 		panic_quiesce_hw(pdp);
299 		(void) FTRACE_STOP();
300 		(void) callb_execute_class(CB_CL_PANIC, NULL);
301 
302 		if (log_intrq != NULL)
303 			log_flushq(log_intrq);
304 
305 		/*
306 		 * If log_consq has been initialized and syslogd has started,
307 		 * print any messages in log_consq that haven't been consumed.
308 		 */
309 		if (log_consq != NULL && log_consq != log_backlogq)
310 			log_printq(log_consq);
311 
312 		fm_banner();
313 		errorq_panic();
314 
315 		printf("\n\rpanic[cpu%d]/thread=%p: ", cp->cpu_id, (void *)t);
316 		vprintf(format, alist);
317 		printf("\n\n");
318 
319 		if (t->t_panic_trap != NULL) {
320 			panic_showtrap(t->t_panic_trap);
321 			printf("\n");
322 		}
323 
324 		traceregs(rp);
325 		printf("\n");
326 
327 		if (((boothowto & RB_DEBUG) || obpdebug) &&
328 		    !nopanicdebug && !panic_forced) {
329 			if (dumpvp != NULL) {
330 				debug_enter("panic: entering debugger "
331 				    "(continue to save dump)");
332 			} else {
333 				debug_enter("panic: entering debugger "
334 				    "(no dump device, continue to reboot)");
335 			}
336 		}
337 
338 	} else if (panic_dump != 0 || panic_sync != 0 || panicstr != NULL) {
339 		printf("\n\rpanic[cpu%d]/thread=%p: ", cp->cpu_id, (void *)t);
340 		vprintf(format, alist);
341 		printf("\n");
342 	} else
343 		goto spin;
344 
345 	/*
346 	 * Prior to performing sync or dump, we make sure that do_polled_io is
347 	 * set, but we'll leave ipl at 10; deadman(), a CY_HIGH_LEVEL cyclic,
348 	 * will re-enter panic if we are not making progress with sync or dump.
349 	 */
350 
351 	/*
352 	 * Sync the filesystems.  Reset t_cred if not set because much of
353 	 * the filesystem code depends on CRED() being valid.
354 	 */
355 	if (!in_sync && panic_trigger(&panic_sync)) {
356 		if (t->t_cred == NULL)
357 			t->t_cred = kcred;
358 		splx(ipltospl(CLOCK_LEVEL));
359 		do_polled_io = 1;
360 		vfs_syncall();
361 	}
362 
363 	/*
364 	 * Take the crash dump.  If the dump trigger is already set, try to
365 	 * enter the debugger again before rebooting the system.
366 	 */
367 	if (panic_trigger(&panic_dump)) {
368 		panic_dump_hw(s);
369 		splx(ipltospl(CLOCK_LEVEL));
370 		do_polled_io = 1;
371 		dumpsys();
372 	} else if (((boothowto & RB_DEBUG) || obpdebug) && !nopanicdebug) {
373 		debug_enter("panic: entering debugger (continue to reboot)");
374 	} else
375 		printf("dump aborted: please record the above information!\n");
376 
377 	if (halt_on_panic)
378 		mdboot(A_REBOOT, AD_HALT, NULL, B_FALSE);
379 	else
380 		mdboot(A_REBOOT, panic_bootfcn, panic_bootstr, B_FALSE);
381 spin:
382 	/*
383 	 * Restore ipl to at most CLOCK_LEVEL so we don't end up spinning
384 	 * and unable to jump into the debugger.
385 	 */
386 	splx(MIN(s, ipltospl(CLOCK_LEVEL)));
387 	for (;;);
388 }
389 
390 void
391 panic(const char *format, ...)
392 {
393 	va_list alist;
394 
395 	va_start(alist, format);
396 	vpanic(format, alist);
397 	va_end(alist);
398 }
399