1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Copyright (c) 2011, Joyent, Inc. All rights reserved. 27 */ 28 29 /* 30 * When the operating system detects that it is in an invalid state, a panic 31 * is initiated in order to minimize potential damage to user data and to 32 * facilitate debugging. There are three major tasks to be performed in 33 * a system panic: recording information about the panic in memory (and thus 34 * making it part of the crash dump), synchronizing the file systems to 35 * preserve user file data, and generating the crash dump. We define the 36 * system to be in one of four states with respect to the panic code: 37 * 38 * CALM - the state of the system prior to any thread initiating a panic 39 * 40 * QUIESCE - the state of the system when the first thread to initiate 41 * a system panic records information about the cause of the panic 42 * and renders the system quiescent by stopping other processors 43 * 44 * SYNC - the state of the system when we synchronize the file systems 45 * DUMP - the state when we generate the crash dump. 46 * 47 * The transitions between these states are irreversible: once we begin 48 * panicking, we only make one attempt to perform the actions associated with 49 * each state. 50 * 51 * The panic code itself must be re-entrant because actions taken during any 52 * state may lead to another system panic. Additionally, any Solaris 53 * thread may initiate a panic at any time, and so we must have synchronization 54 * between threads which attempt to initiate a state transition simultaneously. 55 * The panic code makes use of a special locking primitive, a trigger, to 56 * perform this synchronization. A trigger is simply a word which is set 57 * atomically and can only be set once. We declare three triggers, one for 58 * each transition between the four states. When a thread enters the panic 59 * code it attempts to set each trigger; if it fails it moves on to the 60 * next trigger. A special case is the first trigger: if two threads race 61 * to perform the transition to QUIESCE, the losing thread may execute before 62 * the winner has a chance to stop its CPU. To solve this problem, we have 63 * the loser look ahead to see if any other triggers are set; if not, it 64 * presumes a panic is underway and simply spins. Unfortunately, since we 65 * are panicking, it is not possible to know this with absolute certainty. 66 * 67 * There are two common reasons for re-entering the panic code once a panic 68 * has been initiated: (1) after we debug_enter() at the end of QUIESCE, 69 * the operator may type "sync" instead of "go", and the PROM's sync callback 70 * routine will invoke panic(); (2) if the clock routine decides that sync 71 * or dump is not making progress, it will invoke panic() to force a timeout. 72 * The design assumes that a third possibility, another thread causing an 73 * unrelated panic while sync or dump is still underway, is extremely unlikely. 74 * If this situation occurs, we may end up triggering dump while sync is 75 * still in progress. This third case is considered extremely unlikely because 76 * all other CPUs are stopped and low-level interrupts have been blocked. 77 * 78 * The panic code is entered via a call directly to the vpanic() function, 79 * or its varargs wrappers panic() and cmn_err(9F). The vpanic routine 80 * is implemented in assembly language to record the current machine 81 * registers, attempt to set the trigger for the QUIESCE state, and 82 * if successful, switch stacks on to the panic_stack before calling into 83 * the common panicsys() routine. The first thread to initiate a panic 84 * is allowed to make use of the reserved panic_stack so that executing 85 * the panic code itself does not overwrite valuable data on that thread's 86 * stack *ahead* of the current stack pointer. This data will be preserved 87 * in the crash dump and may prove invaluable in determining what this 88 * thread has previously been doing. The first thread, saved in panic_thread, 89 * is also responsible for stopping the other CPUs as quickly as possible, 90 * and then setting the various panic_* variables. Most important among 91 * these is panicstr, which allows threads to subsequently bypass held 92 * locks so that we can proceed without ever blocking. We must stop the 93 * other CPUs *prior* to setting panicstr in case threads running there are 94 * currently spinning to acquire a lock; we want that state to be preserved. 95 * Every thread which initiates a panic has its T_PANIC flag set so we can 96 * identify all such threads in the crash dump. 97 * 98 * The panic_thread is also allowed to make use of the special memory buffer 99 * panicbuf, which on machines with appropriate hardware is preserved across 100 * reboots. We allow the panic_thread to store its register set and panic 101 * message in this buffer, so even if we fail to obtain a crash dump we will 102 * be able to examine the machine after reboot and determine some of the 103 * state at the time of the panic. If we do get a dump, the panic buffer 104 * data is structured so that a debugger can easily consume the information 105 * therein (see <sys/panic.h>). 106 * 107 * Each platform or architecture is required to implement the functions 108 * panic_savetrap() to record trap-specific information to panicbuf, 109 * panic_saveregs() to record a register set to panicbuf, panic_stopcpus() 110 * to halt all CPUs but the panicking CPU, panic_quiesce_hw() to perform 111 * miscellaneous platform-specific tasks *after* panicstr is set, 112 * panic_showtrap() to print trap-specific information to the console, 113 * and panic_dump_hw() to perform platform tasks prior to calling dumpsys(). 114 * 115 * A Note on Word Formation, courtesy of the Oxford Guide to English Usage: 116 * 117 * Words ending in -c interpose k before suffixes which otherwise would 118 * indicate a soft c, and thus the verb and adjective forms of 'panic' are 119 * spelled "panicked", "panicking", and "panicky" respectively. Use of 120 * the ill-conceived "panicing" and "panic'd" is discouraged. 121 */ 122 123 #include <sys/types.h> 124 #include <sys/varargs.h> 125 #include <sys/sysmacros.h> 126 #include <sys/cmn_err.h> 127 #include <sys/cpuvar.h> 128 #include <sys/thread.h> 129 #include <sys/t_lock.h> 130 #include <sys/cred.h> 131 #include <sys/systm.h> 132 #include <sys/archsystm.h> 133 #include <sys/uadmin.h> 134 #include <sys/callb.h> 135 #include <sys/vfs.h> 136 #include <sys/log.h> 137 #include <sys/disp.h> 138 #include <sys/param.h> 139 #include <sys/dumphdr.h> 140 #include <sys/ftrace.h> 141 #include <sys/reboot.h> 142 #include <sys/debug.h> 143 #include <sys/stack.h> 144 #include <sys/spl.h> 145 #include <sys/errorq.h> 146 #include <sys/panic.h> 147 #include <sys/fm/util.h> 148 #include <sys/clock_impl.h> 149 150 /* 151 * Panic variables which are set once during the QUIESCE state by the 152 * first thread to initiate a panic. These are examined by post-mortem 153 * debugging tools; the inconsistent use of 'panic' versus 'panic_' in 154 * the variable naming is historical and allows legacy tools to work. 155 */ 156 #pragma align STACK_ALIGN(panic_stack) 157 char panic_stack[PANICSTKSIZE]; /* reserved stack for panic_thread */ 158 kthread_t *panic_thread; /* first thread to call panicsys() */ 159 cpu_t panic_cpu; /* cpu from first call to panicsys() */ 160 label_t panic_regs; /* setjmp label from panic_thread */ 161 label_t panic_pcb; /* t_pcb at time of panic */ 162 struct regs *panic_reg; /* regs struct from first panicsys() */ 163 char *volatile panicstr; /* format string to first panicsys() */ 164 va_list panicargs; /* arguments to first panicsys() */ 165 clock_t panic_lbolt; /* lbolt at time of panic */ 166 int64_t panic_lbolt64; /* lbolt64 at time of panic */ 167 hrtime_t panic_hrtime; /* hrtime at time of panic */ 168 timespec_t panic_hrestime; /* hrestime at time of panic */ 169 int panic_ipl; /* ipl on panic_cpu at time of panic */ 170 ushort_t panic_schedflag; /* t_schedflag for panic_thread */ 171 cpu_t *panic_bound_cpu; /* t_bound_cpu for panic_thread */ 172 char panic_preempt; /* t_preempt for panic_thread */ 173 174 /* 175 * Panic variables which can be set via /etc/system or patched while 176 * the system is in operation. Again, the stupid names are historic. 177 */ 178 char *panic_bootstr = NULL; /* mdboot string to use after panic */ 179 int panic_bootfcn = AD_BOOT; /* mdboot function to use after panic */ 180 int halt_on_panic = 0; /* halt after dump instead of reboot? */ 181 int nopanicdebug = 0; /* reboot instead of call debugger? */ 182 int in_sync = 0; /* skip vfs_syncall() and just dump? */ 183 184 /* 185 * The do_polled_io flag is set by the panic code to inform the SCSI subsystem 186 * to use polled mode instead of interrupt-driven i/o. 187 */ 188 int do_polled_io = 0; 189 190 /* 191 * The panic_forced flag is set by the uadmin A_DUMP code to inform the 192 * panic subsystem that it should not attempt an initial debug_enter. 193 */ 194 int panic_forced = 0; 195 196 /* 197 * Triggers for panic state transitions: 198 */ 199 int panic_quiesce; /* trigger for CALM -> QUIESCE */ 200 int panic_sync; /* trigger for QUIESCE -> SYNC */ 201 int panic_dump; /* trigger for SYNC -> DUMP */ 202 203 /* 204 * Variable signifying quiesce(9E) is in progress. 205 */ 206 volatile int quiesce_active = 0; 207 208 void 209 panicsys(const char *format, va_list alist, struct regs *rp, int on_panic_stack) 210 { 211 int s = spl8(); 212 kthread_t *t = curthread; 213 cpu_t *cp = CPU; 214 215 caddr_t intr_stack = NULL; 216 uint_t intr_actv; 217 218 ushort_t schedflag = t->t_schedflag; 219 cpu_t *bound_cpu = t->t_bound_cpu; 220 char preempt = t->t_preempt; 221 label_t pcb = t->t_pcb; 222 223 (void) setjmp(&t->t_pcb); 224 t->t_flag |= T_PANIC; 225 226 t->t_schedflag |= TS_DONT_SWAP; 227 t->t_bound_cpu = cp; 228 t->t_preempt++; 229 230 panic_enter_hw(s); 231 232 /* 233 * If we're on the interrupt stack and an interrupt thread is available 234 * in this CPU's pool, preserve the interrupt stack by detaching an 235 * interrupt thread and making its stack the intr_stack. 236 */ 237 if (CPU_ON_INTR(cp) && cp->cpu_intr_thread != NULL) { 238 kthread_t *it = cp->cpu_intr_thread; 239 240 intr_stack = cp->cpu_intr_stack; 241 intr_actv = cp->cpu_intr_actv; 242 243 cp->cpu_intr_stack = thread_stk_init(it->t_stk); 244 cp->cpu_intr_thread = it->t_link; 245 246 /* 247 * Clear only the high level bits of cpu_intr_actv. 248 * We want to indicate that high-level interrupts are 249 * not active without destroying the low-level interrupt 250 * information stored there. 251 */ 252 cp->cpu_intr_actv &= ((1 << (LOCK_LEVEL + 1)) - 1); 253 } 254 255 /* 256 * Record one-time panic information and quiesce the other CPUs. 257 * Then print out the panic message and stack trace. 258 */ 259 if (on_panic_stack) { 260 panic_data_t *pdp = (panic_data_t *)panicbuf; 261 262 pdp->pd_version = PANICBUFVERS; 263 pdp->pd_msgoff = sizeof (panic_data_t) - sizeof (panic_nv_t); 264 265 (void) strncpy(pdp->pd_uuid, dump_get_uuid(), 266 sizeof (pdp->pd_uuid)); 267 268 if (t->t_panic_trap != NULL) 269 panic_savetrap(pdp, t->t_panic_trap); 270 else 271 panic_saveregs(pdp, rp); 272 273 (void) vsnprintf(&panicbuf[pdp->pd_msgoff], 274 PANICBUFSIZE - pdp->pd_msgoff, format, alist); 275 276 /* 277 * Call into the platform code to stop the other CPUs. 278 * We currently have all interrupts blocked, and expect that 279 * the platform code will lower ipl only as far as needed to 280 * perform cross-calls, and will acquire as *few* locks as is 281 * possible -- panicstr is not set so we can still deadlock. 282 */ 283 panic_stopcpus(cp, t, s); 284 285 panicstr = (char *)format; 286 va_copy(panicargs, alist); 287 panic_lbolt = LBOLT_NO_ACCOUNT; 288 panic_lbolt64 = LBOLT_NO_ACCOUNT64; 289 panic_hrestime = hrestime; 290 panic_hrtime = gethrtime_waitfree(); 291 panic_thread = t; 292 panic_regs = t->t_pcb; 293 panic_reg = rp; 294 panic_cpu = *cp; 295 panic_ipl = spltoipl(s); 296 panic_schedflag = schedflag; 297 panic_bound_cpu = bound_cpu; 298 panic_preempt = preempt; 299 panic_pcb = pcb; 300 301 if (intr_stack != NULL) { 302 panic_cpu.cpu_intr_stack = intr_stack; 303 panic_cpu.cpu_intr_actv = intr_actv; 304 } 305 306 /* 307 * Lower ipl to 10 to keep clock() from running, but allow 308 * keyboard interrupts to enter the debugger. These callbacks 309 * are executed with panicstr set so they can bypass locks. 310 */ 311 splx(ipltospl(CLOCK_LEVEL)); 312 panic_quiesce_hw(pdp); 313 (void) FTRACE_STOP(); 314 (void) callb_execute_class(CB_CL_PANIC, NULL); 315 316 if (log_intrq != NULL) 317 log_flushq(log_intrq); 318 319 /* 320 * If log_consq has been initialized and syslogd has started, 321 * print any messages in log_consq that haven't been consumed. 322 */ 323 if (log_consq != NULL && log_consq != log_backlogq) 324 log_printq(log_consq); 325 326 fm_banner(); 327 328 #if defined(__x86) 329 /* 330 * A hypervisor panic originates outside of Solaris, so we 331 * don't want to prepend the panic message with misleading 332 * pointers from within Solaris. 333 */ 334 if (!IN_XPV_PANIC()) 335 #endif 336 printf("\n\rpanic[cpu%d]/thread=%p: ", cp->cpu_id, 337 (void *)t); 338 vprintf(format, alist); 339 printf("\n\n"); 340 341 if (t->t_panic_trap != NULL) { 342 panic_showtrap(t->t_panic_trap); 343 printf("\n"); 344 } 345 346 traceregs(rp); 347 printf("\n"); 348 349 if (((boothowto & RB_DEBUG) || obpdebug) && 350 !nopanicdebug && !panic_forced) { 351 if (dumpvp != NULL) { 352 debug_enter("panic: entering debugger " 353 "(continue to save dump)"); 354 } else { 355 debug_enter("panic: entering debugger " 356 "(no dump device, continue to reboot)"); 357 } 358 } 359 360 } else if (panic_dump != 0 || panic_sync != 0 || panicstr != NULL) { 361 printf("\n\rpanic[cpu%d]/thread=%p: ", cp->cpu_id, (void *)t); 362 vprintf(format, alist); 363 printf("\n"); 364 } else 365 goto spin; 366 367 /* 368 * Prior to performing sync or dump, we make sure that do_polled_io is 369 * set, but we'll leave ipl at 10; deadman(), a CY_HIGH_LEVEL cyclic, 370 * will re-enter panic if we are not making progress with sync or dump. 371 */ 372 373 /* 374 * Sync the filesystems. Reset t_cred if not set because much of 375 * the filesystem code depends on CRED() being valid. 376 */ 377 if (!in_sync && panic_trigger(&panic_sync)) { 378 if (t->t_cred == NULL) 379 t->t_cred = kcred; 380 splx(ipltospl(CLOCK_LEVEL)); 381 do_polled_io = 1; 382 vfs_syncall(); 383 } 384 385 /* 386 * Take the crash dump. If the dump trigger is already set, try to 387 * enter the debugger again before rebooting the system. 388 */ 389 if (panic_trigger(&panic_dump)) { 390 panic_dump_hw(s); 391 splx(ipltospl(CLOCK_LEVEL)); 392 errorq_panic(); 393 do_polled_io = 1; 394 dumpsys(); 395 } else if (((boothowto & RB_DEBUG) || obpdebug) && !nopanicdebug) { 396 debug_enter("panic: entering debugger (continue to reboot)"); 397 } else 398 printf("dump aborted: please record the above information!\n"); 399 400 if (halt_on_panic) 401 mdboot(A_REBOOT, AD_HALT, NULL, B_FALSE); 402 else 403 mdboot(A_REBOOT, panic_bootfcn, panic_bootstr, B_FALSE); 404 spin: 405 /* 406 * Restore ipl to at most CLOCK_LEVEL so we don't end up spinning 407 * and unable to jump into the debugger. 408 */ 409 splx(MIN(s, ipltospl(CLOCK_LEVEL))); 410 for (;;) 411 ; 412 } 413 414 void 415 panic(const char *format, ...) 416 { 417 va_list alist; 418 419 va_start(alist, format); 420 vpanic(format, alist); 421 va_end(alist); 422 } 423