1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/t_lock.h> 30 #include <sys/uadmin.h> 31 #include <sys/panic.h> 32 #include <sys/reboot.h> 33 #include <sys/autoconf.h> 34 #include <sys/machsystm.h> 35 #include <sys/promif.h> 36 #include <sys/membar.h> 37 #include <vm/hat_sfmmu.h> 38 #include <sys/cpu_module.h> 39 #include <sys/cpu_sgnblk_defs.h> 40 #include <sys/intreg.h> 41 #include <sys/consdev.h> 42 #include <sys/kdi_impl.h> 43 #include <sys/callb.h> 44 45 #ifdef TRAPTRACE 46 #include <sys/traptrace.h> 47 u_longlong_t panic_tick; 48 #endif /* TRAPTRACE */ 49 50 extern u_longlong_t gettick(); 51 static void reboot_machine(char *); 52 int disable_watchdog_on_exit = 0; 53 54 /* 55 * Machine dependent code to reboot. 56 * "mdep" is interpreted as a character pointer; if non-null, it is a pointer 57 * to a string to be used as the argument string when rebooting. 58 * 59 * "invoke_cb" is a boolean. It is set to true when mdboot() can safely 60 * invoke CB_CL_MDBOOT callbacks before shutting the system down, i.e. when 61 * we are in a normal shutdown sequence (interrupts are not blocked, the 62 * system is not panic'ing or being suspended). 63 */ 64 /*ARGSUSED*/ 65 void 66 mdboot(int cmd, int fcn, char *bootstr, boolean_t invoke_cb) 67 { 68 extern void pm_cfb_check_and_powerup(void); 69 70 /* 71 * Disable the hw watchdog timer. 72 */ 73 if (disable_watchdog_on_exit && watchdog_activated) { 74 mutex_enter(&tod_lock); 75 (void) tod_ops.tod_clear_watchdog_timer(); 76 mutex_exit(&tod_lock); 77 } 78 79 /* 80 * XXX - rconsvp is set to NULL to ensure that output messages 81 * are sent to the underlying "hardware" device using the 82 * monitor's printf routine since we are in the process of 83 * either rebooting or halting the machine. 84 */ 85 rconsvp = NULL; 86 87 /* 88 * At a high interrupt level we can't: 89 * 1) bring up the console 90 * or 91 * 2) wait for pending interrupts prior to redistribution 92 * to the current CPU 93 * 94 * so we do them now. 95 */ 96 pm_cfb_check_and_powerup(); 97 98 /* make sure there are no more changes to the device tree */ 99 devtree_freeze(); 100 101 if (invoke_cb) 102 (void) callb_execute_class(CB_CL_MDBOOT, NULL); 103 104 /* 105 * Clear any unresolved UEs from memory. 106 */ 107 page_retire_mdboot(); 108 109 /* 110 * stop other cpus which also raise our priority. since there is only 111 * one active cpu after this, and our priority will be too high 112 * for us to be preempted, we're essentially single threaded 113 * from here on out. 114 */ 115 stop_other_cpus(); 116 117 /* 118 * try and reset leaf devices. reset_leaves() should only 119 * be called when there are no other threads that could be 120 * accessing devices 121 */ 122 reset_leaves(); 123 124 if (fcn == AD_HALT) { 125 halt((char *)NULL); 126 } else if (fcn == AD_POWEROFF) { 127 power_down(NULL); 128 } else { 129 if (bootstr == NULL) { 130 switch (fcn) { 131 132 case AD_BOOT: 133 bootstr = ""; 134 break; 135 136 case AD_IBOOT: 137 bootstr = "-a"; 138 break; 139 140 case AD_SBOOT: 141 bootstr = "-s"; 142 break; 143 144 case AD_SIBOOT: 145 bootstr = "-sa"; 146 break; 147 default: 148 cmn_err(CE_WARN, 149 "mdboot: invalid function %d", fcn); 150 bootstr = ""; 151 break; 152 } 153 } 154 reboot_machine(bootstr); 155 } 156 /* MAYBE REACHED */ 157 } 158 159 /* mdpreboot - may be called prior to mdboot while root fs still mounted */ 160 /*ARGSUSED*/ 161 void 162 mdpreboot(int cmd, int fcn, char *bootstr) 163 { 164 } 165 166 /* 167 * Halt the machine and then reboot with the device 168 * and arguments specified in bootstr. 169 */ 170 static void 171 reboot_machine(char *bootstr) 172 { 173 flush_windows(); 174 stop_other_cpus(); /* send stop signal to other CPUs */ 175 prom_printf("rebooting...\n"); 176 /* 177 * For platforms that use CPU signatures, we 178 * need to set the signature block to OS and 179 * the state to exiting for all the processors. 180 */ 181 CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_REBOOT, -1); 182 prom_reboot(bootstr); 183 /*NOTREACHED*/ 184 } 185 186 /* 187 * We use the x-trap mechanism and idle_stop_xcall() to stop the other CPUs. 188 * Once in panic_idle() they raise spl, record their location, and spin. 189 */ 190 static void 191 panic_idle(void) 192 { 193 cpu_async_panic_callb(); /* check for async errors */ 194 195 (void) spl7(); 196 197 debug_flush_windows(); 198 (void) setjmp(&curthread->t_pcb); 199 200 CPU->cpu_m.in_prom = 1; 201 membar_stld(); 202 203 for (;;); 204 } 205 206 /* 207 * Force the other CPUs to trap into panic_idle(), and then remove them 208 * from the cpu_ready_set so they will no longer receive cross-calls. 209 */ 210 /*ARGSUSED*/ 211 void 212 panic_stopcpus(cpu_t *cp, kthread_t *t, int spl) 213 { 214 cpuset_t cps; 215 int i; 216 217 (void) splzs(); 218 CPUSET_ALL_BUT(cps, cp->cpu_id); 219 xt_some(cps, (xcfunc_t *)idle_stop_xcall, (uint64_t)&panic_idle, NULL); 220 221 for (i = 0; i < NCPU; i++) { 222 if (i != cp->cpu_id && CPU_XCALL_READY(i)) { 223 int ntries = 0x10000; 224 225 while (!cpu[i]->cpu_m.in_prom && ntries) { 226 DELAY(50); 227 ntries--; 228 } 229 230 if (!cpu[i]->cpu_m.in_prom) 231 printf("panic: failed to stop cpu%d\n", i); 232 233 cpu[i]->cpu_flags &= ~CPU_READY; 234 cpu[i]->cpu_flags |= CPU_QUIESCED; 235 CPUSET_DEL(cpu_ready_set, cpu[i]->cpu_id); 236 } 237 } 238 } 239 240 /* 241 * Platform callback following each entry to panicsys(). If we've panicked at 242 * level 14, we examine t_panic_trap to see if a fatal trap occurred. If so, 243 * we disable further %tick_cmpr interrupts. If not, an explicit call to panic 244 * was made and so we re-enqueue an interrupt request structure to allow 245 * further level 14 interrupts to be processed once we lower PIL. This allows 246 * us to handle panics from the deadman() CY_HIGH_LEVEL cyclic. 247 */ 248 void 249 panic_enter_hw(int spl) 250 { 251 if (spl == ipltospl(PIL_14)) { 252 uint_t opstate = disable_vec_intr(); 253 254 if (curthread->t_panic_trap != NULL) { 255 tickcmpr_disable(); 256 intr_dequeue_req(PIL_14, cbe_level14_inum); 257 } else { 258 if (!tickcmpr_disabled()) 259 intr_enqueue_req(PIL_14, cbe_level14_inum); 260 /* 261 * Clear SOFTINT<14>, SOFTINT<0> (TICK_INT) 262 * and SOFTINT<16> (STICK_INT) to indicate 263 * that the current level 14 has been serviced. 264 */ 265 wr_clr_softint((1 << PIL_14) | 266 TICK_INT_MASK | STICK_INT_MASK); 267 } 268 269 enable_vec_intr(opstate); 270 } 271 } 272 273 /* 274 * Miscellaneous hardware-specific code to execute after panicstr is set 275 * by the panic code: we also print and record PTL1 panic information here. 276 */ 277 /*ARGSUSED*/ 278 void 279 panic_quiesce_hw(panic_data_t *pdp) 280 { 281 extern uint_t getpstate(void); 282 extern void setpstate(uint_t); 283 284 #ifdef TRAPTRACE 285 /* 286 * Turn off TRAPTRACE and save the current %tick value in panic_tick. 287 */ 288 if (!panic_tick) 289 panic_tick = gettick(); 290 TRAPTRACE_FREEZE; 291 #endif 292 /* 293 * For Platforms that use CPU signatures, we 294 * need to set the signature block to OS, the state to 295 * exiting, and the substate to panic for all the processors. 296 */ 297 CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_PANIC, -1); 298 299 /* 300 * De-activate ECC functions and disable the watchdog timer now that 301 * we've made it through the critical part of the panic code. 302 */ 303 if (watchdog_enable) 304 (void) tod_ops.tod_clear_watchdog_timer(); 305 306 /* 307 * Disable further ECC errors from the CPU module and the bus nexus. 308 */ 309 cpu_disable_errors(); 310 (void) bus_func_invoke(BF_TYPE_ERRDIS); 311 312 /* 313 * Redirect all interrupts to the current CPU. 314 */ 315 intr_redist_all_cpus_shutdown(); 316 317 /* 318 * This call exists solely to support dumps to network 319 * devices after sync from OBP. 320 * 321 * If we came here via the sync callback, then on some 322 * platforms, interrupts may have arrived while we were 323 * stopped in OBP. OBP will arrange for those interrupts to 324 * be redelivered if you say "go", but not if you invoke a 325 * client callback like 'sync'. For some dump devices 326 * (network swap devices), we need interrupts to be 327 * delivered in order to dump, so we have to call the bus 328 * nexus driver to reset the interrupt state machines. 329 */ 330 (void) bus_func_invoke(BF_TYPE_RESINTR); 331 332 setpstate(getpstate() | PSTATE_IE); 333 } 334 335 /* 336 * Platforms that use CPU signatures need to set the signature block to OS and 337 * the state to exiting for all CPUs. PANIC_CONT indicates that we're about to 338 * write the crash dump, which tells the SSP/SMS to begin a timeout routine to 339 * reboot the machine if the dump never completes. 340 */ 341 /*ARGSUSED*/ 342 void 343 panic_dump_hw(int spl) 344 { 345 CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_DUMP, -1); 346 } 347 348 /* 349 * for ptl1_panic 350 */ 351 void 352 ptl1_init_cpu(struct cpu *cpu) 353 { 354 ptl1_state_t *pstate = &cpu->cpu_m.ptl1_state; 355 356 /*CONSTCOND*/ 357 if (sizeof (struct cpu) + PTL1_SSIZE > CPU_ALLOC_SIZE) { 358 panic("ptl1_init_cpu: not enough space left for ptl1_panic " 359 "stack, sizeof (struct cpu) = %lu", sizeof (struct cpu)); 360 } 361 362 pstate->ptl1_stktop = (uintptr_t)cpu + CPU_ALLOC_SIZE; 363 cpu_pa[cpu->cpu_id] = va_to_pa(cpu); 364 } 365 366 void 367 ptl1_panic_handler(ptl1_state_t *pstate) 368 { 369 static const char *ptl1_reasons[] = { 370 #ifdef PTL1_PANIC_DEBUG 371 "trap for debug purpose", /* PTL1_BAD_DEBUG */ 372 #else 373 "unknown trap", /* PTL1_BAD_DEBUG */ 374 #endif 375 "register window trap", /* PTL1_BAD_WTRAP */ 376 "kernel MMU miss", /* PTL1_BAD_KMISS */ 377 "kernel protection fault", /* PTL1_BAD_KPROT_FAULT */ 378 "ISM MMU miss", /* PTL1_BAD_ISM */ 379 "kernel MMU trap", /* PTL1_BAD_MMUTRAP */ 380 "kernel trap handler state", /* PTL1_BAD_TRAP */ 381 "floating point trap", /* PTL1_BAD_FPTRAP */ 382 #ifdef DEBUG 383 "pointer to intr_vec", /* PTL1_BAD_INTR_VEC */ 384 #else 385 "unknown trap", /* PTL1_BAD_INTR_VEC */ 386 #endif 387 #ifdef TRAPTRACE 388 "TRACE_PTR state", /* PTL1_BAD_TRACE_PTR */ 389 #else 390 "unknown trap", /* PTL1_BAD_TRACE_PTR */ 391 #endif 392 "stack overflow", /* PTL1_BAD_STACK */ 393 "DTrace flags", /* PTL1_BAD_DTRACE_FLAGS */ 394 "attempt to steal locked ctx", /* PTL1_BAD_CTX_STEAL */ 395 "CPU ECC error loop", /* PTL1_BAD_ECC */ 396 "non-kernel context in sys/priv_trap() below or", 397 /* PTL1_BAD_CTX */ 398 }; 399 400 uint_t reason = pstate->ptl1_regs.ptl1_g1; 401 uint_t tl = pstate->ptl1_regs.ptl1_trap_regs[0].ptl1_tl; 402 struct trap_info ti = { 0 }; 403 404 /* 405 * Use trap_info for a place holder to call panic_savetrap() and 406 * panic_showtrap() to save and print out ptl1_panic information. 407 */ 408 if (curthread->t_panic_trap == NULL) 409 curthread->t_panic_trap = &ti; 410 411 if (reason < sizeof (ptl1_reasons) / sizeof (ptl1_reasons[0])) 412 panic("bad %s at TL %u", ptl1_reasons[reason], tl); 413 else 414 panic("ptl1_panic reason 0x%x at TL %u", reason, tl); 415 } 416 417 void 418 clear_watchdog_on_exit() 419 { 420 /* 421 * Only shut down an active hardware watchdog timer if the platform 422 * has expressed an interest to. 423 */ 424 if (disable_watchdog_on_exit && watchdog_activated) { 425 prom_printf("Debugging requested; hardware watchdog " 426 "disabled; reboot to re-enable.\n"); 427 cmn_err(CE_WARN, "!Debugging requested; hardware watchdog " 428 "disabled; reboot to re-enable."); 429 mutex_enter(&tod_lock); 430 (void) tod_ops.tod_clear_watchdog_timer(); 431 mutex_exit(&tod_lock); 432 } 433 } 434 435 /* 436 * This null routine is only used by sun4v watchdog timer support. 437 */ 438 void 439 restore_watchdog_on_entry(void) 440 { 441 } 442 443 int 444 kdi_watchdog_disable(void) 445 { 446 if (watchdog_activated) { 447 mutex_enter(&tod_lock); 448 (void) tod_ops.tod_clear_watchdog_timer(); 449 mutex_exit(&tod_lock); 450 } 451 452 return (watchdog_activated); 453 } 454 455 void 456 kdi_watchdog_restore(void) 457 { 458 if (watchdog_enable) { 459 mutex_enter(&tod_lock); 460 (void) tod_ops.tod_set_watchdog_timer(watchdog_timeout_seconds); 461 mutex_exit(&tod_lock); 462 } 463 } 464 465 /* 466 * This null routine is only used by sun4v watchdog timer support. 467 */ 468 void 469 watchdog_init(void) 470 { 471 } 472 473 /* 474 * This null routine is only used by sun4v watchdog timer support. 475 */ 476 void 477 watchdog_pat(void) 478 { 479 } 480 481 /* 482 * This null routine is only used by sun4v watchdog timer support. 483 */ 484 void 485 watchdog_suspend(void) 486 { 487 } 488 489 /* 490 * This null routine is only used by sun4v watchdog timer support. 491 */ 492 void 493 watchdog_resume(void) 494 { 495 } 496 497 /* 498 * This null routine is only used by sun4v watchdog timer support. 499 */ 500 void 501 watchdog_clear(void) 502 { 503 } 504 505 /*ARGSUSED*/ 506 void 507 mach_dump_buffer_init(void) 508 { 509 /* 510 * setup dump buffer to store extra crash information 511 * not applicable to sun4u 512 */ 513 } 514 515 /* 516 * xt_sync - wait for previous x-traps to finish 517 */ 518 void 519 xt_sync(cpuset_t cpuset) 520 { 521 kpreempt_disable(); 522 CPUSET_DEL(cpuset, CPU->cpu_id); 523 CPUSET_AND(cpuset, cpu_ready_set); 524 xt_some(cpuset, (xcfunc_t *)xt_sync_tl1, 0, 0); 525 kpreempt_enable(); 526 } 527