1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/t_lock.h> 31 #include <sys/uadmin.h> 32 #include <sys/panic.h> 33 #include <sys/reboot.h> 34 #include <sys/autoconf.h> 35 #include <sys/machsystm.h> 36 #include <sys/promif.h> 37 #include <sys/membar.h> 38 #include <vm/hat_sfmmu.h> 39 #include <sys/cpu_module.h> 40 #include <sys/cpu_sgnblk_defs.h> 41 #include <sys/intreg.h> 42 #include <sys/consdev.h> 43 #include <sys/kdi_impl.h> 44 #include <sys/callb.h> 45 46 #ifdef TRAPTRACE 47 #include <sys/traptrace.h> 48 u_longlong_t panic_tick; 49 #endif /* TRAPTRACE */ 50 51 extern u_longlong_t gettick(); 52 static void reboot_machine(char *); 53 extern int disable_watchdog_on_exit; 54 55 /* 56 * Machine dependent code to reboot. 57 * "mdep" is interpreted as a character pointer; if non-null, it is a pointer 58 * to a string to be used as the argument string when rebooting. 59 * 60 * "invoke_cb" is a boolean. It is set to true when mdboot() can safely 61 * invoke CB_CL_MDBOOT callbacks before shutting the system down, i.e. when 62 * we are in a normal shutdown sequence (interrupts are not blocked, the 63 * system is not panic'ing or being suspended). 64 */ 65 /*ARGSUSED*/ 66 void 67 mdboot(int cmd, int fcn, char *bootstr, boolean_t invoke_cb) 68 { 69 extern void pm_cfb_check_and_powerup(void); 70 71 /* 72 * Disable the hw watchdog timer. 73 */ 74 if (disable_watchdog_on_exit && watchdog_activated) { 75 mutex_enter(&tod_lock); 76 (void) tod_ops.tod_clear_watchdog_timer(); 77 mutex_exit(&tod_lock); 78 } 79 80 /* 81 * XXX - rconsvp is set to NULL to ensure that output messages 82 * are sent to the underlying "hardware" device using the 83 * monitor's printf routine since we are in the process of 84 * either rebooting or halting the machine. 85 */ 86 rconsvp = NULL; 87 88 /* 89 * At a high interrupt level we can't: 90 * 1) bring up the console 91 * or 92 * 2) wait for pending interrupts prior to redistribution 93 * to the current CPU 94 * 95 * so we do them now. 96 */ 97 pm_cfb_check_and_powerup(); 98 99 /* make sure there are no more changes to the device tree */ 100 devtree_freeze(); 101 102 if (invoke_cb) 103 (void) callb_execute_class(CB_CL_MDBOOT, NULL); 104 105 /* 106 * Clear any unresolved UEs from memory. 107 */ 108 if (memsegs != NULL) 109 page_retire_hunt(page_retire_mdboot_cb); 110 111 /* 112 * stop other cpus which also raise our priority. since there is only 113 * one active cpu after this, and our priority will be too high 114 * for us to be preempted, we're essentially single threaded 115 * from here on out. 116 */ 117 stop_other_cpus(); 118 119 /* 120 * try and reset leaf devices. reset_leaves() should only 121 * be called when there are no other threads that could be 122 * accessing devices 123 */ 124 reset_leaves(); 125 126 if (fcn == AD_HALT) { 127 halt((char *)NULL); 128 } else if (fcn == AD_POWEROFF) { 129 power_down(NULL); 130 } else { 131 if (bootstr == NULL) { 132 switch (fcn) { 133 134 case AD_BOOT: 135 bootstr = ""; 136 break; 137 138 case AD_IBOOT: 139 bootstr = "-a"; 140 break; 141 142 case AD_SBOOT: 143 bootstr = "-s"; 144 break; 145 146 case AD_SIBOOT: 147 bootstr = "-sa"; 148 break; 149 default: 150 cmn_err(CE_WARN, 151 "mdboot: invalid function %d", fcn); 152 bootstr = ""; 153 break; 154 } 155 } 156 reboot_machine(bootstr); 157 } 158 /* MAYBE REACHED */ 159 } 160 161 /* mdpreboot - may be called prior to mdboot while root fs still mounted */ 162 /*ARGSUSED*/ 163 void 164 mdpreboot(int cmd, int fcn, char *bootstr) 165 { 166 } 167 168 /* 169 * Halt the machine and then reboot with the device 170 * and arguments specified in bootstr. 171 */ 172 static void 173 reboot_machine(char *bootstr) 174 { 175 flush_windows(); 176 stop_other_cpus(); /* send stop signal to other CPUs */ 177 prom_printf("rebooting...\n"); 178 /* 179 * For platforms that use CPU signatures, we 180 * need to set the signature block to OS and 181 * the state to exiting for all the processors. 182 */ 183 CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_REBOOT, -1); 184 prom_reboot(bootstr); 185 /*NOTREACHED*/ 186 } 187 188 /* 189 * We use the x-trap mechanism and idle_stop_xcall() to stop the other CPUs. 190 * Once in panic_idle() they raise spl, record their location, and spin. 191 */ 192 static void 193 panic_idle(void) 194 { 195 cpu_async_panic_callb(); /* check for async errors */ 196 197 (void) spl7(); 198 199 debug_flush_windows(); 200 (void) setjmp(&curthread->t_pcb); 201 202 CPU->cpu_m.in_prom = 1; 203 membar_stld(); 204 205 for (;;); 206 } 207 208 /* 209 * Force the other CPUs to trap into panic_idle(), and then remove them 210 * from the cpu_ready_set so they will no longer receive cross-calls. 211 */ 212 /*ARGSUSED*/ 213 void 214 panic_stopcpus(cpu_t *cp, kthread_t *t, int spl) 215 { 216 cpuset_t cps; 217 int i; 218 219 (void) splzs(); 220 CPUSET_ALL_BUT(cps, cp->cpu_id); 221 xt_some(cps, (xcfunc_t *)idle_stop_xcall, (uint64_t)&panic_idle, NULL); 222 223 for (i = 0; i < NCPU; i++) { 224 if (i != cp->cpu_id && CPU_XCALL_READY(i)) { 225 int ntries = 0x10000; 226 227 while (!cpu[i]->cpu_m.in_prom && ntries) { 228 DELAY(50); 229 ntries--; 230 } 231 232 if (!cpu[i]->cpu_m.in_prom) 233 printf("panic: failed to stop cpu%d\n", i); 234 235 cpu[i]->cpu_flags &= ~CPU_READY; 236 cpu[i]->cpu_flags |= CPU_QUIESCED; 237 CPUSET_DEL(cpu_ready_set, cpu[i]->cpu_id); 238 } 239 } 240 } 241 242 /* 243 * Platform callback following each entry to panicsys(). If we've panicked at 244 * level 14, we examine t_panic_trap to see if a fatal trap occurred. If so, 245 * we disable further %tick_cmpr interrupts. If not, an explicit call to panic 246 * was made and so we re-enqueue an interrupt request structure to allow 247 * further level 14 interrupts to be processed once we lower PIL. This allows 248 * us to handle panics from the deadman() CY_HIGH_LEVEL cyclic. 249 */ 250 void 251 panic_enter_hw(int spl) 252 { 253 if (spl == ipltospl(PIL_14)) { 254 uint_t opstate = disable_vec_intr(); 255 256 if (curthread->t_panic_trap != NULL) { 257 tickcmpr_disable(); 258 intr_dequeue_req(PIL_14, cbe_level14_inum); 259 } else { 260 if (!tickcmpr_disabled()) 261 intr_enqueue_req(PIL_14, cbe_level14_inum); 262 /* 263 * Clear SOFTINT<14>, SOFTINT<0> (TICK_INT) 264 * and SOFTINT<16> (STICK_INT) to indicate 265 * that the current level 14 has been serviced. 266 */ 267 wr_clr_softint((1 << PIL_14) | 268 TICK_INT_MASK | STICK_INT_MASK); 269 } 270 271 enable_vec_intr(opstate); 272 } 273 } 274 275 /* 276 * Miscellaneous hardware-specific code to execute after panicstr is set 277 * by the panic code: we also print and record PTL1 panic information here. 278 */ 279 /*ARGSUSED*/ 280 void 281 panic_quiesce_hw(panic_data_t *pdp) 282 { 283 extern uint_t getpstate(void); 284 extern void setpstate(uint_t); 285 286 #ifdef TRAPTRACE 287 /* 288 * Turn off TRAPTRACE and save the current %tick value in panic_tick. 289 */ 290 if (!panic_tick) 291 panic_tick = gettick(); 292 TRAPTRACE_FREEZE; 293 #endif 294 /* 295 * For Platforms that use CPU signatures, we 296 * need to set the signature block to OS, the state to 297 * exiting, and the substate to panic for all the processors. 298 */ 299 CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_PANIC, -1); 300 301 /* 302 * De-activate ECC functions and disable the watchdog timer now that 303 * we've made it through the critical part of the panic code. 304 */ 305 if (watchdog_enable) 306 (void) tod_ops.tod_clear_watchdog_timer(); 307 308 /* 309 * Disable further ECC errors from the CPU module and the bus nexus. 310 */ 311 cpu_disable_errors(); 312 (void) bus_func_invoke(BF_TYPE_ERRDIS); 313 314 /* 315 * Redirect all interrupts to the current CPU. 316 */ 317 intr_redist_all_cpus_shutdown(); 318 319 /* 320 * This call exists solely to support dumps to network 321 * devices after sync from OBP. 322 * 323 * If we came here via the sync callback, then on some 324 * platforms, interrupts may have arrived while we were 325 * stopped in OBP. OBP will arrange for those interrupts to 326 * be redelivered if you say "go", but not if you invoke a 327 * client callback like 'sync'. For some dump devices 328 * (network swap devices), we need interrupts to be 329 * delivered in order to dump, so we have to call the bus 330 * nexus driver to reset the interrupt state machines. 331 */ 332 (void) bus_func_invoke(BF_TYPE_RESINTR); 333 334 setpstate(getpstate() | PSTATE_IE); 335 } 336 337 /* 338 * Platforms that use CPU signatures need to set the signature block to OS and 339 * the state to exiting for all CPUs. PANIC_CONT indicates that we're about to 340 * write the crash dump, which tells the SSP/SMS to begin a timeout routine to 341 * reboot the machine if the dump never completes. 342 */ 343 /*ARGSUSED*/ 344 void 345 panic_dump_hw(int spl) 346 { 347 CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_DUMP, -1); 348 } 349 350 /* 351 * for ptl1_panic 352 */ 353 void 354 ptl1_init_cpu(struct cpu *cpu) 355 { 356 ptl1_state_t *pstate = &cpu->cpu_m.ptl1_state; 357 358 /*CONSTCOND*/ 359 if (sizeof (struct cpu) + PTL1_SSIZE > CPU_ALLOC_SIZE) { 360 panic("ptl1_init_cpu: not enough space left for ptl1_panic " 361 "stack, sizeof (struct cpu) = %lu", sizeof (struct cpu)); 362 } 363 364 pstate->ptl1_stktop = (uintptr_t)cpu + CPU_ALLOC_SIZE; 365 cpu_pa[cpu->cpu_id] = va_to_pa(cpu); 366 } 367 368 void 369 ptl1_panic_handler(ptl1_state_t *pstate) 370 { 371 static const char *ptl1_reasons[] = { 372 #ifdef PTL1_PANIC_DEBUG 373 "trap for debug purpose", /* PTL1_BAD_DEBUG */ 374 #else 375 "unknown trap", /* PTL1_BAD_DEBUG */ 376 #endif 377 "register window trap", /* PTL1_BAD_WTRAP */ 378 "kernel MMU miss", /* PTL1_BAD_KMISS */ 379 "kernel protection fault", /* PTL1_BAD_KPROT_FAULT */ 380 "ISM MMU miss", /* PTL1_BAD_ISM */ 381 "kernel MMU trap", /* PTL1_BAD_MMUTRAP */ 382 "kernel trap handler state", /* PTL1_BAD_TRAP */ 383 "floating point trap", /* PTL1_BAD_FPTRAP */ 384 #ifdef DEBUG 385 "pointer to intr_req", /* PTL1_BAD_INTR_REQ */ 386 #else 387 "unknown trap", /* PTL1_BAD_INTR_REQ */ 388 #endif 389 #ifdef TRAPTRACE 390 "TRACE_PTR state", /* PTL1_BAD_TRACE_PTR */ 391 #else 392 "unknown trap", /* PTL1_BAD_TRACE_PTR */ 393 #endif 394 "stack overflow", /* PTL1_BAD_STACK */ 395 "DTrace flags", /* PTL1_BAD_DTRACE_FLAGS */ 396 "attempt to steal locked ctx", /* PTL1_BAD_CTX_STEAL */ 397 "CPU ECC error loop", /* PTL1_BAD_ECC */ 398 "non-kernel context in sys/priv_trap() below or", 399 /* PTL1_BAD_CTX */ 400 }; 401 402 uint_t reason = pstate->ptl1_regs.ptl1_g1; 403 uint_t tl = pstate->ptl1_regs.ptl1_trap_regs[0].ptl1_tl; 404 struct trap_info ti = { 0 }; 405 406 /* 407 * Use trap_info for a place holder to call panic_savetrap() and 408 * panic_showtrap() to save and print out ptl1_panic information. 409 */ 410 if (curthread->t_panic_trap == NULL) 411 curthread->t_panic_trap = &ti; 412 413 if (reason < sizeof (ptl1_reasons) / sizeof (ptl1_reasons[0])) 414 panic("bad %s at TL %u", ptl1_reasons[reason], tl); 415 else 416 panic("ptl1_panic reason 0x%x at TL %u", reason, tl); 417 } 418 419 void 420 clear_watchdog_on_exit() 421 { 422 /* 423 * Only shut down an active hardware watchdog timer if the platform 424 * has expressed an interest to. 425 */ 426 if (disable_watchdog_on_exit && watchdog_activated) { 427 prom_printf("Debugging requested; hardware watchdog " 428 "disabled; reboot to re-enable.\n"); 429 cmn_err(CE_WARN, "!Debugging requested; hardware watchdog " 430 "disabled; reboot to re-enable."); 431 mutex_enter(&tod_lock); 432 (void) tod_ops.tod_clear_watchdog_timer(); 433 mutex_exit(&tod_lock); 434 } 435 } 436 437 int 438 kdi_watchdog_disable(void) 439 { 440 if (watchdog_activated) { 441 mutex_enter(&tod_lock); 442 (void) tod_ops.tod_clear_watchdog_timer(); 443 mutex_exit(&tod_lock); 444 } 445 446 return (watchdog_activated); 447 } 448 449 void 450 kdi_watchdog_restore(void) 451 { 452 if (watchdog_enable) { 453 mutex_enter(&tod_lock); 454 (void) tod_ops.tod_set_watchdog_timer(watchdog_timeout_seconds); 455 mutex_exit(&tod_lock); 456 } 457 } 458 459 /*ARGSUSED*/ 460 void 461 mach_dump_buffer_init(void) 462 { 463 /* 464 * setup dump buffer to store extra crash information 465 * not applicable to sun4u 466 */ 467 } 468 469 /* 470 * xt_sync - wait for previous x-traps to finish 471 */ 472 void 473 xt_sync(cpuset_t cpuset) 474 { 475 kpreempt_disable(); 476 CPUSET_DEL(cpuset, CPU->cpu_id); 477 CPUSET_AND(cpuset, cpu_ready_set); 478 xt_some(cpuset, (xcfunc_t *)xt_sync_tl1, 0, 0); 479 kpreempt_enable(); 480 } 481