xref: /titanic_50/usr/src/uts/common/os/panic.c (revision 84058112076dd7aa47feea28f15e375fee68125a)
1  /*
2   * CDDL HEADER START
3   *
4   * The contents of this file are subject to the terms of the
5   * Common Development and Distribution License (the "License").
6   * You may not use this file except in compliance with the License.
7   *
8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9   * or http://www.opensolaris.org/os/licensing.
10   * See the License for the specific language governing permissions
11   * and limitations under the License.
12   *
13   * When distributing Covered Code, include this CDDL HEADER in each
14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15   * If applicable, add the following below this CDDL HEADER, with the
16   * fields enclosed by brackets "[]" replaced with your own identifying
17   * information: Portions Copyright [yyyy] [name of copyright owner]
18   *
19   * CDDL HEADER END
20   */
21  /*
22   * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
23   */
24  
25  /*
26   * Copyright (c) 2011, Joyent, Inc. All rights reserved.
27   */
28  
29  /*
30   * When the operating system detects that it is in an invalid state, a panic
31   * is initiated in order to minimize potential damage to user data and to
32   * facilitate debugging.  There are three major tasks to be performed in
33   * a system panic: recording information about the panic in memory (and thus
34   * making it part of the crash dump), synchronizing the file systems to
35   * preserve user file data, and generating the crash dump.  We define the
36   * system to be in one of four states with respect to the panic code:
37   *
38   * CALM    - the state of the system prior to any thread initiating a panic
39   *
40   * QUIESCE - the state of the system when the first thread to initiate
41   *           a system panic records information about the cause of the panic
42   *           and renders the system quiescent by stopping other processors
43   *
44   * SYNC    - the state of the system when we synchronize the file systems
45   * DUMP    - the state when we generate the crash dump.
46   *
47   * The transitions between these states are irreversible: once we begin
48   * panicking, we only make one attempt to perform the actions associated with
49   * each state.
50   *
51   * The panic code itself must be re-entrant because actions taken during any
52   * state may lead to another system panic.  Additionally, any Solaris
53   * thread may initiate a panic at any time, and so we must have synchronization
54   * between threads which attempt to initiate a state transition simultaneously.
55   * The panic code makes use of a special locking primitive, a trigger, to
56   * perform this synchronization.  A trigger is simply a word which is set
57   * atomically and can only be set once.  We declare three triggers, one for
58   * each transition between the four states.  When a thread enters the panic
59   * code it attempts to set each trigger; if it fails it moves on to the
60   * next trigger.  A special case is the first trigger: if two threads race
61   * to perform the transition to QUIESCE, the losing thread may execute before
62   * the winner has a chance to stop its CPU.  To solve this problem, we have
63   * the loser look ahead to see if any other triggers are set; if not, it
64   * presumes a panic is underway and simply spins.  Unfortunately, since we
65   * are panicking, it is not possible to know this with absolute certainty.
66   *
67   * There are two common reasons for re-entering the panic code once a panic
68   * has been initiated: (1) after we debug_enter() at the end of QUIESCE,
69   * the operator may type "sync" instead of "go", and the PROM's sync callback
70   * routine will invoke panic(); (2) if the clock routine decides that sync
71   * or dump is not making progress, it will invoke panic() to force a timeout.
72   * The design assumes that a third possibility, another thread causing an
73   * unrelated panic while sync or dump is still underway, is extremely unlikely.
74   * If this situation occurs, we may end up triggering dump while sync is
75   * still in progress.  This third case is considered extremely unlikely because
76   * all other CPUs are stopped and low-level interrupts have been blocked.
77   *
78   * The panic code is entered via a call directly to the vpanic() function,
79   * or its varargs wrappers panic() and cmn_err(9F).  The vpanic routine
80   * is implemented in assembly language to record the current machine
81   * registers, attempt to set the trigger for the QUIESCE state, and
82   * if successful, switch stacks on to the panic_stack before calling into
83   * the common panicsys() routine.  The first thread to initiate a panic
84   * is allowed to make use of the reserved panic_stack so that executing
85   * the panic code itself does not overwrite valuable data on that thread's
86   * stack *ahead* of the current stack pointer.  This data will be preserved
87   * in the crash dump and may prove invaluable in determining what this
88   * thread has previously been doing.  The first thread, saved in panic_thread,
89   * is also responsible for stopping the other CPUs as quickly as possible,
90   * and then setting the various panic_* variables.  Most important among
91   * these is panicstr, which allows threads to subsequently bypass held
92   * locks so that we can proceed without ever blocking.  We must stop the
93   * other CPUs *prior* to setting panicstr in case threads running there are
94   * currently spinning to acquire a lock; we want that state to be preserved.
95   * Every thread which initiates a panic has its T_PANIC flag set so we can
96   * identify all such threads in the crash dump.
97   *
98   * The panic_thread is also allowed to make use of the special memory buffer
99   * panicbuf, which on machines with appropriate hardware is preserved across
100   * reboots.  We allow the panic_thread to store its register set and panic
101   * message in this buffer, so even if we fail to obtain a crash dump we will
102   * be able to examine the machine after reboot and determine some of the
103   * state at the time of the panic.  If we do get a dump, the panic buffer
104   * data is structured so that a debugger can easily consume the information
105   * therein (see <sys/panic.h>).
106   *
107   * Each platform or architecture is required to implement the functions
108   * panic_savetrap() to record trap-specific information to panicbuf,
109   * panic_saveregs() to record a register set to panicbuf, panic_stopcpus()
110   * to halt all CPUs but the panicking CPU, panic_quiesce_hw() to perform
111   * miscellaneous platform-specific tasks *after* panicstr is set,
112   * panic_showtrap() to print trap-specific information to the console,
113   * and panic_dump_hw() to perform platform tasks prior to calling dumpsys().
114   *
115   * A Note on Word Formation, courtesy of the Oxford Guide to English Usage:
116   *
117   * Words ending in -c interpose k before suffixes which otherwise would
118   * indicate a soft c, and thus the verb and adjective forms of 'panic' are
119   * spelled "panicked", "panicking", and "panicky" respectively.  Use of
120   * the ill-conceived "panicing" and "panic'd" is discouraged.
121   */
122  
123  #include <sys/types.h>
124  #include <sys/varargs.h>
125  #include <sys/sysmacros.h>
126  #include <sys/cmn_err.h>
127  #include <sys/cpuvar.h>
128  #include <sys/thread.h>
129  #include <sys/t_lock.h>
130  #include <sys/cred.h>
131  #include <sys/systm.h>
132  #include <sys/archsystm.h>
133  #include <sys/uadmin.h>
134  #include <sys/callb.h>
135  #include <sys/vfs.h>
136  #include <sys/log.h>
137  #include <sys/disp.h>
138  #include <sys/param.h>
139  #include <sys/dumphdr.h>
140  #include <sys/ftrace.h>
141  #include <sys/reboot.h>
142  #include <sys/debug.h>
143  #include <sys/stack.h>
144  #include <sys/spl.h>
145  #include <sys/errorq.h>
146  #include <sys/panic.h>
147  #include <sys/fm/util.h>
148  #include <sys/clock_impl.h>
149  
150  /*
151   * Panic variables which are set once during the QUIESCE state by the
152   * first thread to initiate a panic.  These are examined by post-mortem
153   * debugging tools; the inconsistent use of 'panic' versus 'panic_' in
154   * the variable naming is historical and allows legacy tools to work.
155   */
156  #pragma align STACK_ALIGN(panic_stack)
157  char panic_stack[PANICSTKSIZE];		/* reserved stack for panic_thread */
158  kthread_t *panic_thread;		/* first thread to call panicsys() */
159  cpu_t panic_cpu;			/* cpu from first call to panicsys() */
160  label_t panic_regs;			/* setjmp label from panic_thread */
161  label_t panic_pcb;			/* t_pcb at time of panic */
162  struct regs *panic_reg;			/* regs struct from first panicsys() */
163  char *volatile panicstr;		/* format string to first panicsys() */
164  va_list panicargs;			/* arguments to first panicsys() */
165  clock_t panic_lbolt;			/* lbolt at time of panic */
166  int64_t panic_lbolt64;			/* lbolt64 at time of panic */
167  hrtime_t panic_hrtime;			/* hrtime at time of panic */
168  timespec_t panic_hrestime;		/* hrestime at time of panic */
169  int panic_ipl;				/* ipl on panic_cpu at time of panic */
170  ushort_t panic_schedflag;		/* t_schedflag for panic_thread */
171  cpu_t *panic_bound_cpu;			/* t_bound_cpu for panic_thread */
172  char panic_preempt;			/* t_preempt for panic_thread */
173  
174  /*
175   * Panic variables which can be set via /etc/system or patched while
176   * the system is in operation.  Again, the stupid names are historic.
177   */
178  char *panic_bootstr = NULL;		/* mdboot string to use after panic */
179  int panic_bootfcn = AD_BOOT;		/* mdboot function to use after panic */
180  int halt_on_panic = 0;  		/* halt after dump instead of reboot? */
181  int nopanicdebug = 0;			/* reboot instead of call debugger? */
182  int in_sync = 0;			/* skip vfs_syncall() and just dump? */
183  
184  /*
185   * The do_polled_io flag is set by the panic code to inform the SCSI subsystem
186   * to use polled mode instead of interrupt-driven i/o.
187   */
188  int do_polled_io = 0;
189  
190  /*
191   * The panic_forced flag is set by the uadmin A_DUMP code to inform the
192   * panic subsystem that it should not attempt an initial debug_enter.
193   */
194  int panic_forced = 0;
195  
196  /*
197   * Triggers for panic state transitions:
198   */
199  int panic_quiesce;			/* trigger for CALM    -> QUIESCE */
200  int panic_sync;				/* trigger for QUIESCE -> SYNC */
201  int panic_dump;				/* trigger for SYNC    -> DUMP */
202  
203  /*
204   * Variable signifying quiesce(9E) is in progress.
205   */
206  volatile int quiesce_active = 0;
207  
208  void
panicsys(const char * format,va_list alist,struct regs * rp,int on_panic_stack)209  panicsys(const char *format, va_list alist, struct regs *rp, int on_panic_stack)
210  {
211  	int s = spl8();
212  	kthread_t *t = curthread;
213  	cpu_t *cp = CPU;
214  
215  	caddr_t intr_stack = NULL;
216  	uint_t intr_actv;
217  
218  	ushort_t schedflag = t->t_schedflag;
219  	cpu_t *bound_cpu = t->t_bound_cpu;
220  	char preempt = t->t_preempt;
221  	label_t pcb = t->t_pcb;
222  
223  	(void) setjmp(&t->t_pcb);
224  	t->t_flag |= T_PANIC;
225  
226  	t->t_schedflag |= TS_DONT_SWAP;
227  	t->t_bound_cpu = cp;
228  	t->t_preempt++;
229  
230  	panic_enter_hw(s);
231  
232  	/*
233  	 * If we're on the interrupt stack and an interrupt thread is available
234  	 * in this CPU's pool, preserve the interrupt stack by detaching an
235  	 * interrupt thread and making its stack the intr_stack.
236  	 */
237  	if (CPU_ON_INTR(cp) && cp->cpu_intr_thread != NULL) {
238  		kthread_t *it = cp->cpu_intr_thread;
239  
240  		intr_stack = cp->cpu_intr_stack;
241  		intr_actv = cp->cpu_intr_actv;
242  
243  		cp->cpu_intr_stack = thread_stk_init(it->t_stk);
244  		cp->cpu_intr_thread = it->t_link;
245  
246  		/*
247  		 * Clear only the high level bits of cpu_intr_actv.
248  		 * We want to indicate that high-level interrupts are
249  		 * not active without destroying the low-level interrupt
250  		 * information stored there.
251  		 */
252  		cp->cpu_intr_actv &= ((1 << (LOCK_LEVEL + 1)) - 1);
253  	}
254  
255  	/*
256  	 * Record one-time panic information and quiesce the other CPUs.
257  	 * Then print out the panic message and stack trace.
258  	 */
259  	if (on_panic_stack) {
260  		panic_data_t *pdp = (panic_data_t *)panicbuf;
261  
262  		pdp->pd_version = PANICBUFVERS;
263  		pdp->pd_msgoff = sizeof (panic_data_t) - sizeof (panic_nv_t);
264  
265  		(void) strncpy(pdp->pd_uuid, dump_get_uuid(),
266  		    sizeof (pdp->pd_uuid));
267  
268  		if (t->t_panic_trap != NULL)
269  			panic_savetrap(pdp, t->t_panic_trap);
270  		else
271  			panic_saveregs(pdp, rp);
272  
273  		(void) vsnprintf(&panicbuf[pdp->pd_msgoff],
274  		    PANICBUFSIZE - pdp->pd_msgoff, format, alist);
275  
276  		/*
277  		 * Call into the platform code to stop the other CPUs.
278  		 * We currently have all interrupts blocked, and expect that
279  		 * the platform code will lower ipl only as far as needed to
280  		 * perform cross-calls, and will acquire as *few* locks as is
281  		 * possible -- panicstr is not set so we can still deadlock.
282  		 */
283  		panic_stopcpus(cp, t, s);
284  
285  		panicstr = (char *)format;
286  		va_copy(panicargs, alist);
287  		panic_lbolt = LBOLT_NO_ACCOUNT;
288  		panic_lbolt64 = LBOLT_NO_ACCOUNT64;
289  		panic_hrestime = hrestime;
290  		panic_hrtime = gethrtime_waitfree();
291  		panic_thread = t;
292  		panic_regs = t->t_pcb;
293  		panic_reg = rp;
294  		panic_cpu = *cp;
295  		panic_ipl = spltoipl(s);
296  		panic_schedflag = schedflag;
297  		panic_bound_cpu = bound_cpu;
298  		panic_preempt = preempt;
299  		panic_pcb = pcb;
300  
301  		if (intr_stack != NULL) {
302  			panic_cpu.cpu_intr_stack = intr_stack;
303  			panic_cpu.cpu_intr_actv = intr_actv;
304  		}
305  
306  		/*
307  		 * Lower ipl to 10 to keep clock() from running, but allow
308  		 * keyboard interrupts to enter the debugger.  These callbacks
309  		 * are executed with panicstr set so they can bypass locks.
310  		 */
311  		splx(ipltospl(CLOCK_LEVEL));
312  		panic_quiesce_hw(pdp);
313  		(void) FTRACE_STOP();
314  		(void) callb_execute_class(CB_CL_PANIC, NULL);
315  
316  		if (log_intrq != NULL)
317  			log_flushq(log_intrq);
318  
319  		/*
320  		 * If log_consq has been initialized and syslogd has started,
321  		 * print any messages in log_consq that haven't been consumed.
322  		 */
323  		if (log_consq != NULL && log_consq != log_backlogq)
324  			log_printq(log_consq);
325  
326  		fm_banner();
327  
328  #if defined(__x86)
329  		/*
330  		 * A hypervisor panic originates outside of Solaris, so we
331  		 * don't want to prepend the panic message with misleading
332  		 * pointers from within Solaris.
333  		 */
334  		if (!IN_XPV_PANIC())
335  #endif
336  			printf("\n\rpanic[cpu%d]/thread=%p: ", cp->cpu_id,
337  			    (void *)t);
338  		vprintf(format, alist);
339  		printf("\n\n");
340  
341  		if (t->t_panic_trap != NULL) {
342  			panic_showtrap(t->t_panic_trap);
343  			printf("\n");
344  		}
345  
346  		traceregs(rp);
347  		printf("\n");
348  
349  		if (((boothowto & RB_DEBUG) || obpdebug) &&
350  		    !nopanicdebug && !panic_forced) {
351  			if (dumpvp != NULL) {
352  				debug_enter("panic: entering debugger "
353  				    "(continue to save dump)");
354  			} else {
355  				debug_enter("panic: entering debugger "
356  				    "(no dump device, continue to reboot)");
357  			}
358  		}
359  
360  	} else if (panic_dump != 0 || panic_sync != 0 || panicstr != NULL) {
361  		printf("\n\rpanic[cpu%d]/thread=%p: ", cp->cpu_id, (void *)t);
362  		vprintf(format, alist);
363  		printf("\n");
364  	} else
365  		goto spin;
366  
367  	/*
368  	 * Prior to performing sync or dump, we make sure that do_polled_io is
369  	 * set, but we'll leave ipl at 10; deadman(), a CY_HIGH_LEVEL cyclic,
370  	 * will re-enter panic if we are not making progress with sync or dump.
371  	 */
372  
373  	/*
374  	 * Sync the filesystems.  Reset t_cred if not set because much of
375  	 * the filesystem code depends on CRED() being valid.
376  	 */
377  	if (!in_sync && panic_trigger(&panic_sync)) {
378  		if (t->t_cred == NULL)
379  			t->t_cred = kcred;
380  		splx(ipltospl(CLOCK_LEVEL));
381  		do_polled_io = 1;
382  		vfs_syncall();
383  	}
384  
385  	/*
386  	 * Take the crash dump.  If the dump trigger is already set, try to
387  	 * enter the debugger again before rebooting the system.
388  	 */
389  	if (panic_trigger(&panic_dump)) {
390  		panic_dump_hw(s);
391  		splx(ipltospl(CLOCK_LEVEL));
392  		errorq_panic();
393  		do_polled_io = 1;
394  		dumpsys();
395  	} else if (((boothowto & RB_DEBUG) || obpdebug) && !nopanicdebug) {
396  		debug_enter("panic: entering debugger (continue to reboot)");
397  	} else
398  		printf("dump aborted: please record the above information!\n");
399  
400  	if (halt_on_panic)
401  		mdboot(A_REBOOT, AD_HALT, NULL, B_FALSE);
402  	else
403  		mdboot(A_REBOOT, panic_bootfcn, panic_bootstr, B_FALSE);
404  spin:
405  	/*
406  	 * Restore ipl to at most CLOCK_LEVEL so we don't end up spinning
407  	 * and unable to jump into the debugger.
408  	 */
409  	splx(MIN(s, ipltospl(CLOCK_LEVEL)));
410  	for (;;)
411  		;
412  }
413  
414  void
panic(const char * format,...)415  panic(const char *format, ...)
416  {
417  	va_list alist;
418  
419  	va_start(alist, format);
420  	vpanic(format, alist);
421  	va_end(alist);
422  }
423