1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/t_lock.h> 30 #include <sys/uadmin.h> 31 #include <sys/panic.h> 32 #include <sys/reboot.h> 33 #include <sys/autoconf.h> 34 #include <sys/machsystm.h> 35 #include <sys/promif.h> 36 #include <sys/membar.h> 37 #include <vm/hat_sfmmu.h> 38 #include <sys/cpu_module.h> 39 #include <sys/cpu_sgnblk_defs.h> 40 #include <sys/intreg.h> 41 #include <sys/consdev.h> 42 #include <sys/kdi_impl.h> 43 #include <sys/callb.h> 44 45 #ifdef TRAPTRACE 46 #include <sys/traptrace.h> 47 u_longlong_t panic_tick; 48 #endif /* TRAPTRACE */ 49 50 extern u_longlong_t gettick(); 51 static void reboot_machine(char *); 52 int disable_watchdog_on_exit = 0; 53 54 extern void consconfig_teardown(); 55 56 /* 57 * Machine dependent code to reboot. 58 * "mdep" is interpreted as a character pointer; if non-null, it is a pointer 59 * to a string to be used as the argument string when rebooting. 60 * 61 * "invoke_cb" is a boolean. It is set to true when mdboot() can safely 62 * invoke CB_CL_MDBOOT callbacks before shutting the system down, i.e. when 63 * we are in a normal shutdown sequence (interrupts are not blocked, the 64 * system is not panic'ing or being suspended). 65 */ 66 /*ARGSUSED*/ 67 void 68 mdboot(int cmd, int fcn, char *bootstr, boolean_t invoke_cb) 69 { 70 extern void pm_cfb_check_and_powerup(void); 71 72 /* 73 * Disable the hw watchdog timer. 74 */ 75 if (disable_watchdog_on_exit && watchdog_activated) { 76 mutex_enter(&tod_lock); 77 (void) tod_ops.tod_clear_watchdog_timer(); 78 mutex_exit(&tod_lock); 79 } 80 81 /* 82 * At a high interrupt level we can't: 83 * 1) bring up the console 84 * or 85 * 2) wait for pending interrupts prior to redistribution 86 * to the current CPU 87 * 88 * so we do them now. 89 */ 90 pm_cfb_check_and_powerup(); 91 92 /* make sure there are no more changes to the device tree */ 93 devtree_freeze(); 94 95 if (invoke_cb) 96 (void) callb_execute_class(CB_CL_MDBOOT, NULL); 97 98 /* 99 * Clear any unresolved UEs from memory. 100 */ 101 page_retire_mdboot(); 102 103 /* 104 * stop other cpus which also raise our priority. since there is only 105 * one active cpu after this, and our priority will be too high 106 * for us to be preempted, we're essentially single threaded 107 * from here on out. 108 */ 109 stop_other_cpus(); 110 111 consconfig_teardown(); 112 113 /* 114 * try and reset leaf devices. reset_leaves() should only 115 * be called when there are no other threads that could be 116 * accessing devices 117 */ 118 reset_leaves(); 119 120 if (fcn == AD_HALT) { 121 halt((char *)NULL); 122 } else if (fcn == AD_POWEROFF) { 123 power_down(NULL); 124 } else { 125 if (bootstr == NULL) { 126 switch (fcn) { 127 128 case AD_BOOT: 129 bootstr = ""; 130 break; 131 132 case AD_IBOOT: 133 bootstr = "-a"; 134 break; 135 136 case AD_SBOOT: 137 bootstr = "-s"; 138 break; 139 140 case AD_SIBOOT: 141 bootstr = "-sa"; 142 break; 143 default: 144 cmn_err(CE_WARN, 145 "mdboot: invalid function %d", fcn); 146 bootstr = ""; 147 break; 148 } 149 } 150 reboot_machine(bootstr); 151 } 152 /* MAYBE REACHED */ 153 } 154 155 /* mdpreboot - may be called prior to mdboot while root fs still mounted */ 156 /*ARGSUSED*/ 157 void 158 mdpreboot(int cmd, int fcn, char *bootstr) 159 { 160 } 161 162 /* 163 * Halt the machine and then reboot with the device 164 * and arguments specified in bootstr. 165 */ 166 static void 167 reboot_machine(char *bootstr) 168 { 169 flush_windows(); 170 stop_other_cpus(); /* send stop signal to other CPUs */ 171 prom_printf("rebooting...\n"); 172 /* 173 * For platforms that use CPU signatures, we 174 * need to set the signature block to OS and 175 * the state to exiting for all the processors. 176 */ 177 CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_REBOOT, -1); 178 prom_reboot(bootstr); 179 /*NOTREACHED*/ 180 } 181 182 /* 183 * We use the x-trap mechanism and idle_stop_xcall() to stop the other CPUs. 184 * Once in panic_idle() they raise spl, record their location, and spin. 185 */ 186 static void 187 panic_idle(void) 188 { 189 cpu_async_panic_callb(); /* check for async errors */ 190 191 (void) spl7(); 192 193 debug_flush_windows(); 194 (void) setjmp(&curthread->t_pcb); 195 196 CPU->cpu_m.in_prom = 1; 197 membar_stld(); 198 199 for (;;) 200 continue; 201 } 202 203 /* 204 * Force the other CPUs to trap into panic_idle(), and then remove them 205 * from the cpu_ready_set so they will no longer receive cross-calls. 206 */ 207 /*ARGSUSED*/ 208 void 209 panic_stopcpus(cpu_t *cp, kthread_t *t, int spl) 210 { 211 cpuset_t cps; 212 int i; 213 214 (void) splzs(); 215 CPUSET_ALL_BUT(cps, cp->cpu_id); 216 xt_some(cps, (xcfunc_t *)idle_stop_xcall, (uint64_t)&panic_idle, NULL); 217 218 for (i = 0; i < NCPU; i++) { 219 if (i != cp->cpu_id && CPU_XCALL_READY(i)) { 220 int ntries = 0x10000; 221 222 while (!cpu[i]->cpu_m.in_prom && ntries) { 223 DELAY(50); 224 ntries--; 225 } 226 227 if (!cpu[i]->cpu_m.in_prom) 228 printf("panic: failed to stop cpu%d\n", i); 229 230 cpu[i]->cpu_flags &= ~CPU_READY; 231 cpu[i]->cpu_flags |= CPU_QUIESCED; 232 CPUSET_DEL(cpu_ready_set, cpu[i]->cpu_id); 233 } 234 } 235 } 236 237 /* 238 * Platform callback following each entry to panicsys(). If we've panicked at 239 * level 14, we examine t_panic_trap to see if a fatal trap occurred. If so, 240 * we disable further %tick_cmpr interrupts. If not, an explicit call to panic 241 * was made and so we re-enqueue an interrupt request structure to allow 242 * further level 14 interrupts to be processed once we lower PIL. This allows 243 * us to handle panics from the deadman() CY_HIGH_LEVEL cyclic. 244 */ 245 void 246 panic_enter_hw(int spl) 247 { 248 if (spl == ipltospl(PIL_14)) { 249 uint_t opstate = disable_vec_intr(); 250 251 if (curthread->t_panic_trap != NULL) { 252 tickcmpr_disable(); 253 intr_dequeue_req(PIL_14, cbe_level14_inum); 254 } else { 255 if (!tickcmpr_disabled()) 256 intr_enqueue_req(PIL_14, cbe_level14_inum); 257 /* 258 * Clear SOFTINT<14>, SOFTINT<0> (TICK_INT) 259 * and SOFTINT<16> (STICK_INT) to indicate 260 * that the current level 14 has been serviced. 261 */ 262 wr_clr_softint((1 << PIL_14) | 263 TICK_INT_MASK | STICK_INT_MASK); 264 } 265 266 enable_vec_intr(opstate); 267 } 268 } 269 270 /* 271 * Miscellaneous hardware-specific code to execute after panicstr is set 272 * by the panic code: we also print and record PTL1 panic information here. 273 */ 274 /*ARGSUSED*/ 275 void 276 panic_quiesce_hw(panic_data_t *pdp) 277 { 278 extern uint_t getpstate(void); 279 extern void setpstate(uint_t); 280 281 #ifdef TRAPTRACE 282 /* 283 * Turn off TRAPTRACE and save the current %tick value in panic_tick. 284 */ 285 if (!panic_tick) 286 panic_tick = gettick(); 287 TRAPTRACE_FREEZE; 288 #endif 289 /* 290 * For Platforms that use CPU signatures, we 291 * need to set the signature block to OS, the state to 292 * exiting, and the substate to panic for all the processors. 293 */ 294 CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_PANIC, -1); 295 296 /* 297 * De-activate ECC functions and disable the watchdog timer now that 298 * we've made it through the critical part of the panic code. 299 */ 300 if (watchdog_enable) 301 (void) tod_ops.tod_clear_watchdog_timer(); 302 303 /* 304 * Disable further ECC errors from the CPU module and the bus nexus. 305 */ 306 cpu_disable_errors(); 307 (void) bus_func_invoke(BF_TYPE_ERRDIS); 308 309 /* 310 * Redirect all interrupts to the current CPU. 311 */ 312 intr_redist_all_cpus_shutdown(); 313 314 /* 315 * This call exists solely to support dumps to network 316 * devices after sync from OBP. 317 * 318 * If we came here via the sync callback, then on some 319 * platforms, interrupts may have arrived while we were 320 * stopped in OBP. OBP will arrange for those interrupts to 321 * be redelivered if you say "go", but not if you invoke a 322 * client callback like 'sync'. For some dump devices 323 * (network swap devices), we need interrupts to be 324 * delivered in order to dump, so we have to call the bus 325 * nexus driver to reset the interrupt state machines. 326 */ 327 (void) bus_func_invoke(BF_TYPE_RESINTR); 328 329 setpstate(getpstate() | PSTATE_IE); 330 } 331 332 /* 333 * Platforms that use CPU signatures need to set the signature block to OS and 334 * the state to exiting for all CPUs. PANIC_CONT indicates that we're about to 335 * write the crash dump, which tells the SSP/SMS to begin a timeout routine to 336 * reboot the machine if the dump never completes. 337 */ 338 /*ARGSUSED*/ 339 void 340 panic_dump_hw(int spl) 341 { 342 CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_DUMP, -1); 343 } 344 345 /* 346 * for ptl1_panic 347 */ 348 void 349 ptl1_init_cpu(struct cpu *cpu) 350 { 351 ptl1_state_t *pstate = &cpu->cpu_m.ptl1_state; 352 353 /*CONSTCOND*/ 354 if (sizeof (struct cpu) + PTL1_SSIZE > CPU_ALLOC_SIZE) { 355 panic("ptl1_init_cpu: not enough space left for ptl1_panic " 356 "stack, sizeof (struct cpu) = %lu", sizeof (struct cpu)); 357 } 358 359 pstate->ptl1_stktop = (uintptr_t)cpu + CPU_ALLOC_SIZE; 360 cpu_pa[cpu->cpu_id] = va_to_pa(cpu); 361 } 362 363 void 364 ptl1_panic_handler(ptl1_state_t *pstate) 365 { 366 static const char *ptl1_reasons[] = { 367 #ifdef PTL1_PANIC_DEBUG 368 "trap for debug purpose", /* PTL1_BAD_DEBUG */ 369 #else 370 "unknown trap", /* PTL1_BAD_DEBUG */ 371 #endif 372 "register window trap", /* PTL1_BAD_WTRAP */ 373 "kernel MMU miss", /* PTL1_BAD_KMISS */ 374 "kernel protection fault", /* PTL1_BAD_KPROT_FAULT */ 375 "ISM MMU miss", /* PTL1_BAD_ISM */ 376 "kernel MMU trap", /* PTL1_BAD_MMUTRAP */ 377 "kernel trap handler state", /* PTL1_BAD_TRAP */ 378 "floating point trap", /* PTL1_BAD_FPTRAP */ 379 #ifdef DEBUG 380 "pointer to intr_vec", /* PTL1_BAD_INTR_VEC */ 381 #else 382 "unknown trap", /* PTL1_BAD_INTR_VEC */ 383 #endif 384 #ifdef TRAPTRACE 385 "TRACE_PTR state", /* PTL1_BAD_TRACE_PTR */ 386 #else 387 "unknown trap", /* PTL1_BAD_TRACE_PTR */ 388 #endif 389 "stack overflow", /* PTL1_BAD_STACK */ 390 "DTrace flags", /* PTL1_BAD_DTRACE_FLAGS */ 391 "attempt to steal locked ctx", /* PTL1_BAD_CTX_STEAL */ 392 "CPU ECC error loop", /* PTL1_BAD_ECC */ 393 "non-kernel context in sys/priv_trap() below or", 394 /* PTL1_BAD_CTX */ 395 }; 396 397 uint_t reason = pstate->ptl1_regs.ptl1_g1; 398 uint_t tl = pstate->ptl1_regs.ptl1_trap_regs[0].ptl1_tl; 399 struct panic_trap_info ti = { 0 }; 400 401 /* 402 * Use trap_info for a place holder to call panic_savetrap() and 403 * panic_showtrap() to save and print out ptl1_panic information. 404 */ 405 if (curthread->t_panic_trap == NULL) 406 curthread->t_panic_trap = &ti; 407 408 if (reason < sizeof (ptl1_reasons) / sizeof (ptl1_reasons[0])) 409 panic("bad %s at TL %u", ptl1_reasons[reason], tl); 410 else 411 panic("ptl1_panic reason 0x%x at TL %u", reason, tl); 412 } 413 414 void 415 clear_watchdog_on_exit() 416 { 417 /* 418 * Only shut down an active hardware watchdog timer if the platform 419 * has expressed an interest to. 420 */ 421 if (disable_watchdog_on_exit && watchdog_activated) { 422 prom_printf("Debugging requested; hardware watchdog " 423 "disabled; reboot to re-enable.\n"); 424 cmn_err(CE_WARN, "!Debugging requested; hardware watchdog " 425 "disabled; reboot to re-enable."); 426 mutex_enter(&tod_lock); 427 (void) tod_ops.tod_clear_watchdog_timer(); 428 mutex_exit(&tod_lock); 429 } 430 } 431 432 /* 433 * This null routine is only used by sun4v watchdog timer support. 434 */ 435 void 436 restore_watchdog_on_entry(void) 437 { 438 } 439 440 int 441 kdi_watchdog_disable(void) 442 { 443 if (watchdog_activated) { 444 mutex_enter(&tod_lock); 445 (void) tod_ops.tod_clear_watchdog_timer(); 446 mutex_exit(&tod_lock); 447 } 448 449 return (watchdog_activated); 450 } 451 452 void 453 kdi_watchdog_restore(void) 454 { 455 if (watchdog_enable) { 456 mutex_enter(&tod_lock); 457 (void) tod_ops.tod_set_watchdog_timer(watchdog_timeout_seconds); 458 mutex_exit(&tod_lock); 459 } 460 } 461 462 /*ARGSUSED*/ 463 void 464 mach_dump_buffer_init(void) 465 { 466 /* 467 * setup dump buffer to store extra crash information 468 * not applicable to sun4u 469 */ 470 } 471 472 /* 473 * xt_sync - wait for previous x-traps to finish 474 */ 475 void 476 xt_sync(cpuset_t cpuset) 477 { 478 kpreempt_disable(); 479 CPUSET_DEL(cpuset, CPU->cpu_id); 480 CPUSET_AND(cpuset, cpu_ready_set); 481 xt_some(cpuset, (xcfunc_t *)xt_sync_tl1, 0, 0); 482 kpreempt_enable(); 483 } 484 485 /* 486 * mach_soft_state_init() - dummy routine for sun4v soft state 487 */ 488 void 489 mach_soft_state_init(void) 490 {} 491