1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/t_lock.h> 30 #include <sys/uadmin.h> 31 #include <sys/panic.h> 32 #include <sys/reboot.h> 33 #include <sys/autoconf.h> 34 #include <sys/machsystm.h> 35 #include <sys/promif.h> 36 #include <sys/membar.h> 37 #include <vm/hat_sfmmu.h> 38 #include <sys/cpu_module.h> 39 #include <sys/cpu_sgnblk_defs.h> 40 #include <sys/intreg.h> 41 #include <sys/consdev.h> 42 #include <sys/kdi_impl.h> 43 #include <sys/callb.h> 44 45 #ifdef TRAPTRACE 46 #include <sys/traptrace.h> 47 u_longlong_t panic_tick; 48 #endif /* TRAPTRACE */ 49 50 extern u_longlong_t gettick(); 51 static void reboot_machine(char *); 52 int disable_watchdog_on_exit = 0; 53 54 /* 55 * Machine dependent code to reboot. 56 * "mdep" is interpreted as a character pointer; if non-null, it is a pointer 57 * to a string to be used as the argument string when rebooting. 58 * 59 * "invoke_cb" is a boolean. It is set to true when mdboot() can safely 60 * invoke CB_CL_MDBOOT callbacks before shutting the system down, i.e. when 61 * we are in a normal shutdown sequence (interrupts are not blocked, the 62 * system is not panic'ing or being suspended). 63 */ 64 /*ARGSUSED*/ 65 void 66 mdboot(int cmd, int fcn, char *bootstr, boolean_t invoke_cb) 67 { 68 extern void pm_cfb_check_and_powerup(void); 69 70 /* 71 * Disable the hw watchdog timer. 72 */ 73 if (disable_watchdog_on_exit && watchdog_activated) { 74 mutex_enter(&tod_lock); 75 (void) tod_ops.tod_clear_watchdog_timer(); 76 mutex_exit(&tod_lock); 77 } 78 79 /* 80 * XXX - rconsvp is set to NULL to ensure that output messages 81 * are sent to the underlying "hardware" device using the 82 * monitor's printf routine since we are in the process of 83 * either rebooting or halting the machine. 84 */ 85 rconsvp = NULL; 86 87 /* 88 * At a high interrupt level we can't: 89 * 1) bring up the console 90 * or 91 * 2) wait for pending interrupts prior to redistribution 92 * to the current CPU 93 * 94 * so we do them now. 95 */ 96 pm_cfb_check_and_powerup(); 97 98 /* make sure there are no more changes to the device tree */ 99 devtree_freeze(); 100 101 if (invoke_cb) 102 (void) callb_execute_class(CB_CL_MDBOOT, NULL); 103 104 /* 105 * Clear any unresolved UEs from memory. 106 */ 107 if (memsegs != NULL) 108 page_retire_hunt(page_retire_mdboot_cb); 109 110 /* 111 * stop other cpus which also raise our priority. since there is only 112 * one active cpu after this, and our priority will be too high 113 * for us to be preempted, we're essentially single threaded 114 * from here on out. 115 */ 116 stop_other_cpus(); 117 118 /* 119 * try and reset leaf devices. reset_leaves() should only 120 * be called when there are no other threads that could be 121 * accessing devices 122 */ 123 reset_leaves(); 124 125 if (fcn == AD_HALT) { 126 halt((char *)NULL); 127 } else if (fcn == AD_POWEROFF) { 128 power_down(NULL); 129 } else { 130 if (bootstr == NULL) { 131 switch (fcn) { 132 133 case AD_BOOT: 134 bootstr = ""; 135 break; 136 137 case AD_IBOOT: 138 bootstr = "-a"; 139 break; 140 141 case AD_SBOOT: 142 bootstr = "-s"; 143 break; 144 145 case AD_SIBOOT: 146 bootstr = "-sa"; 147 break; 148 default: 149 cmn_err(CE_WARN, 150 "mdboot: invalid function %d", fcn); 151 bootstr = ""; 152 break; 153 } 154 } 155 reboot_machine(bootstr); 156 } 157 /* MAYBE REACHED */ 158 } 159 160 /* mdpreboot - may be called prior to mdboot while root fs still mounted */ 161 /*ARGSUSED*/ 162 void 163 mdpreboot(int cmd, int fcn, char *bootstr) 164 { 165 } 166 167 /* 168 * Halt the machine and then reboot with the device 169 * and arguments specified in bootstr. 170 */ 171 static void 172 reboot_machine(char *bootstr) 173 { 174 flush_windows(); 175 stop_other_cpus(); /* send stop signal to other CPUs */ 176 prom_printf("rebooting...\n"); 177 /* 178 * For platforms that use CPU signatures, we 179 * need to set the signature block to OS and 180 * the state to exiting for all the processors. 181 */ 182 CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_REBOOT, -1); 183 prom_reboot(bootstr); 184 /*NOTREACHED*/ 185 } 186 187 /* 188 * We use the x-trap mechanism and idle_stop_xcall() to stop the other CPUs. 189 * Once in panic_idle() they raise spl, record their location, and spin. 190 */ 191 static void 192 panic_idle(void) 193 { 194 cpu_async_panic_callb(); /* check for async errors */ 195 196 (void) spl7(); 197 198 debug_flush_windows(); 199 (void) setjmp(&curthread->t_pcb); 200 201 CPU->cpu_m.in_prom = 1; 202 membar_stld(); 203 204 for (;;); 205 } 206 207 /* 208 * Force the other CPUs to trap into panic_idle(), and then remove them 209 * from the cpu_ready_set so they will no longer receive cross-calls. 210 */ 211 /*ARGSUSED*/ 212 void 213 panic_stopcpus(cpu_t *cp, kthread_t *t, int spl) 214 { 215 cpuset_t cps; 216 int i; 217 218 (void) splzs(); 219 CPUSET_ALL_BUT(cps, cp->cpu_id); 220 xt_some(cps, (xcfunc_t *)idle_stop_xcall, (uint64_t)&panic_idle, NULL); 221 222 for (i = 0; i < NCPU; i++) { 223 if (i != cp->cpu_id && CPU_XCALL_READY(i)) { 224 int ntries = 0x10000; 225 226 while (!cpu[i]->cpu_m.in_prom && ntries) { 227 DELAY(50); 228 ntries--; 229 } 230 231 if (!cpu[i]->cpu_m.in_prom) 232 printf("panic: failed to stop cpu%d\n", i); 233 234 cpu[i]->cpu_flags &= ~CPU_READY; 235 cpu[i]->cpu_flags |= CPU_QUIESCED; 236 CPUSET_DEL(cpu_ready_set, cpu[i]->cpu_id); 237 } 238 } 239 } 240 241 /* 242 * Platform callback following each entry to panicsys(). If we've panicked at 243 * level 14, we examine t_panic_trap to see if a fatal trap occurred. If so, 244 * we disable further %tick_cmpr interrupts. If not, an explicit call to panic 245 * was made and so we re-enqueue an interrupt request structure to allow 246 * further level 14 interrupts to be processed once we lower PIL. This allows 247 * us to handle panics from the deadman() CY_HIGH_LEVEL cyclic. 248 */ 249 void 250 panic_enter_hw(int spl) 251 { 252 if (spl == ipltospl(PIL_14)) { 253 uint_t opstate = disable_vec_intr(); 254 255 if (curthread->t_panic_trap != NULL) { 256 tickcmpr_disable(); 257 intr_dequeue_req(PIL_14, cbe_level14_inum); 258 } else { 259 if (!tickcmpr_disabled()) 260 intr_enqueue_req(PIL_14, cbe_level14_inum); 261 /* 262 * Clear SOFTINT<14>, SOFTINT<0> (TICK_INT) 263 * and SOFTINT<16> (STICK_INT) to indicate 264 * that the current level 14 has been serviced. 265 */ 266 wr_clr_softint((1 << PIL_14) | 267 TICK_INT_MASK | STICK_INT_MASK); 268 } 269 270 enable_vec_intr(opstate); 271 } 272 } 273 274 /* 275 * Miscellaneous hardware-specific code to execute after panicstr is set 276 * by the panic code: we also print and record PTL1 panic information here. 277 */ 278 /*ARGSUSED*/ 279 void 280 panic_quiesce_hw(panic_data_t *pdp) 281 { 282 extern uint_t getpstate(void); 283 extern void setpstate(uint_t); 284 285 #ifdef TRAPTRACE 286 /* 287 * Turn off TRAPTRACE and save the current %tick value in panic_tick. 288 */ 289 if (!panic_tick) 290 panic_tick = gettick(); 291 TRAPTRACE_FREEZE; 292 #endif 293 /* 294 * For Platforms that use CPU signatures, we 295 * need to set the signature block to OS, the state to 296 * exiting, and the substate to panic for all the processors. 297 */ 298 CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_PANIC, -1); 299 300 /* 301 * De-activate ECC functions and disable the watchdog timer now that 302 * we've made it through the critical part of the panic code. 303 */ 304 if (watchdog_enable) 305 (void) tod_ops.tod_clear_watchdog_timer(); 306 307 /* 308 * Disable further ECC errors from the CPU module and the bus nexus. 309 */ 310 cpu_disable_errors(); 311 (void) bus_func_invoke(BF_TYPE_ERRDIS); 312 313 /* 314 * Redirect all interrupts to the current CPU. 315 */ 316 intr_redist_all_cpus_shutdown(); 317 318 /* 319 * This call exists solely to support dumps to network 320 * devices after sync from OBP. 321 * 322 * If we came here via the sync callback, then on some 323 * platforms, interrupts may have arrived while we were 324 * stopped in OBP. OBP will arrange for those interrupts to 325 * be redelivered if you say "go", but not if you invoke a 326 * client callback like 'sync'. For some dump devices 327 * (network swap devices), we need interrupts to be 328 * delivered in order to dump, so we have to call the bus 329 * nexus driver to reset the interrupt state machines. 330 */ 331 (void) bus_func_invoke(BF_TYPE_RESINTR); 332 333 setpstate(getpstate() | PSTATE_IE); 334 } 335 336 /* 337 * Platforms that use CPU signatures need to set the signature block to OS and 338 * the state to exiting for all CPUs. PANIC_CONT indicates that we're about to 339 * write the crash dump, which tells the SSP/SMS to begin a timeout routine to 340 * reboot the machine if the dump never completes. 341 */ 342 /*ARGSUSED*/ 343 void 344 panic_dump_hw(int spl) 345 { 346 CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_DUMP, -1); 347 } 348 349 /* 350 * for ptl1_panic 351 */ 352 void 353 ptl1_init_cpu(struct cpu *cpu) 354 { 355 ptl1_state_t *pstate = &cpu->cpu_m.ptl1_state; 356 357 /*CONSTCOND*/ 358 if (sizeof (struct cpu) + PTL1_SSIZE > CPU_ALLOC_SIZE) { 359 panic("ptl1_init_cpu: not enough space left for ptl1_panic " 360 "stack, sizeof (struct cpu) = %lu", sizeof (struct cpu)); 361 } 362 363 pstate->ptl1_stktop = (uintptr_t)cpu + CPU_ALLOC_SIZE; 364 cpu_pa[cpu->cpu_id] = va_to_pa(cpu); 365 } 366 367 void 368 ptl1_panic_handler(ptl1_state_t *pstate) 369 { 370 static const char *ptl1_reasons[] = { 371 #ifdef PTL1_PANIC_DEBUG 372 "trap for debug purpose", /* PTL1_BAD_DEBUG */ 373 #else 374 "unknown trap", /* PTL1_BAD_DEBUG */ 375 #endif 376 "register window trap", /* PTL1_BAD_WTRAP */ 377 "kernel MMU miss", /* PTL1_BAD_KMISS */ 378 "kernel protection fault", /* PTL1_BAD_KPROT_FAULT */ 379 "ISM MMU miss", /* PTL1_BAD_ISM */ 380 "kernel MMU trap", /* PTL1_BAD_MMUTRAP */ 381 "kernel trap handler state", /* PTL1_BAD_TRAP */ 382 "floating point trap", /* PTL1_BAD_FPTRAP */ 383 #ifdef DEBUG 384 "pointer to intr_req", /* PTL1_BAD_INTR_REQ */ 385 #else 386 "unknown trap", /* PTL1_BAD_INTR_REQ */ 387 #endif 388 #ifdef TRAPTRACE 389 "TRACE_PTR state", /* PTL1_BAD_TRACE_PTR */ 390 #else 391 "unknown trap", /* PTL1_BAD_TRACE_PTR */ 392 #endif 393 "stack overflow", /* PTL1_BAD_STACK */ 394 "DTrace flags", /* PTL1_BAD_DTRACE_FLAGS */ 395 "attempt to steal locked ctx", /* PTL1_BAD_CTX_STEAL */ 396 "CPU ECC error loop", /* PTL1_BAD_ECC */ 397 "non-kernel context in sys/priv_trap() below or", 398 /* PTL1_BAD_CTX */ 399 }; 400 401 uint_t reason = pstate->ptl1_regs.ptl1_g1; 402 uint_t tl = pstate->ptl1_regs.ptl1_trap_regs[0].ptl1_tl; 403 struct trap_info ti = { 0 }; 404 405 /* 406 * Use trap_info for a place holder to call panic_savetrap() and 407 * panic_showtrap() to save and print out ptl1_panic information. 408 */ 409 if (curthread->t_panic_trap == NULL) 410 curthread->t_panic_trap = &ti; 411 412 if (reason < sizeof (ptl1_reasons) / sizeof (ptl1_reasons[0])) 413 panic("bad %s at TL %u", ptl1_reasons[reason], tl); 414 else 415 panic("ptl1_panic reason 0x%x at TL %u", reason, tl); 416 } 417 418 void 419 clear_watchdog_on_exit() 420 { 421 /* 422 * Only shut down an active hardware watchdog timer if the platform 423 * has expressed an interest to. 424 */ 425 if (disable_watchdog_on_exit && watchdog_activated) { 426 prom_printf("Debugging requested; hardware watchdog " 427 "disabled; reboot to re-enable.\n"); 428 cmn_err(CE_WARN, "!Debugging requested; hardware watchdog " 429 "disabled; reboot to re-enable."); 430 mutex_enter(&tod_lock); 431 (void) tod_ops.tod_clear_watchdog_timer(); 432 mutex_exit(&tod_lock); 433 } 434 } 435 436 /* 437 * This null routine is only used by sun4v watchdog timer support. 438 */ 439 void 440 restore_watchdog_on_entry(void) 441 { 442 } 443 444 int 445 kdi_watchdog_disable(void) 446 { 447 if (watchdog_activated) { 448 mutex_enter(&tod_lock); 449 (void) tod_ops.tod_clear_watchdog_timer(); 450 mutex_exit(&tod_lock); 451 } 452 453 return (watchdog_activated); 454 } 455 456 void 457 kdi_watchdog_restore(void) 458 { 459 if (watchdog_enable) { 460 mutex_enter(&tod_lock); 461 (void) tod_ops.tod_set_watchdog_timer(watchdog_timeout_seconds); 462 mutex_exit(&tod_lock); 463 } 464 } 465 466 /* 467 * This null routine is only used by sun4v watchdog timer support. 468 */ 469 void 470 watchdog_init(void) 471 { 472 } 473 474 /* 475 * This null routine is only used by sun4v watchdog timer support. 476 */ 477 void 478 watchdog_pat(void) 479 { 480 } 481 482 /* 483 * This null routine is only used by sun4v watchdog timer support. 484 */ 485 void 486 watchdog_suspend(void) 487 { 488 } 489 490 /* 491 * This null routine is only used by sun4v watchdog timer support. 492 */ 493 void 494 watchdog_resume(void) 495 { 496 } 497 498 /* 499 * This null routine is only used by sun4v watchdog timer support. 500 */ 501 void 502 watchdog_clear(void) 503 { 504 } 505 506 /*ARGSUSED*/ 507 void 508 mach_dump_buffer_init(void) 509 { 510 /* 511 * setup dump buffer to store extra crash information 512 * not applicable to sun4u 513 */ 514 } 515 516 /* 517 * xt_sync - wait for previous x-traps to finish 518 */ 519 void 520 xt_sync(cpuset_t cpuset) 521 { 522 kpreempt_disable(); 523 CPUSET_DEL(cpuset, CPU->cpu_id); 524 CPUSET_AND(cpuset, cpu_ready_set); 525 xt_some(cpuset, (xcfunc_t *)xt_sync_tl1, 0, 0); 526 kpreempt_enable(); 527 } 528