1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/t_lock.h> 30 #include <sys/uadmin.h> 31 #include <sys/panic.h> 32 #include <sys/reboot.h> 33 #include <sys/autoconf.h> 34 #include <sys/machsystm.h> 35 #include <sys/promif.h> 36 #include <sys/membar.h> 37 #include <vm/hat_sfmmu.h> 38 #include <sys/cpu_module.h> 39 #include <sys/cpu_sgnblk_defs.h> 40 #include <sys/intreg.h> 41 #include <sys/consdev.h> 42 #include <sys/kdi_impl.h> 43 #include <sys/callb.h> 44 45 #ifdef TRAPTRACE 46 #include <sys/traptrace.h> 47 u_longlong_t panic_tick; 48 #endif /* TRAPTRACE */ 49 50 extern u_longlong_t gettick(); 51 static void reboot_machine(char *); 52 int disable_watchdog_on_exit = 0; 53 54 /* 55 * Machine dependent code to reboot. 56 * "mdep" is interpreted as a character pointer; if non-null, it is a pointer 57 * to a string to be used as the argument string when rebooting. 58 * 59 * "invoke_cb" is a boolean. It is set to true when mdboot() can safely 60 * invoke CB_CL_MDBOOT callbacks before shutting the system down, i.e. when 61 * we are in a normal shutdown sequence (interrupts are not blocked, the 62 * system is not panic'ing or being suspended). 63 */ 64 /*ARGSUSED*/ 65 void 66 mdboot(int cmd, int fcn, char *bootstr, boolean_t invoke_cb) 67 { 68 extern void pm_cfb_check_and_powerup(void); 69 70 /* 71 * Disable the hw watchdog timer. 72 */ 73 if (disable_watchdog_on_exit && watchdog_activated) { 74 mutex_enter(&tod_lock); 75 (void) tod_ops.tod_clear_watchdog_timer(); 76 mutex_exit(&tod_lock); 77 } 78 79 /* 80 * XXX - rconsvp is set to NULL to ensure that output messages 81 * are sent to the underlying "hardware" device using the 82 * monitor's printf routine since we are in the process of 83 * either rebooting or halting the machine. 84 */ 85 rconsvp = NULL; 86 87 /* 88 * At a high interrupt level we can't: 89 * 1) bring up the console 90 * or 91 * 2) wait for pending interrupts prior to redistribution 92 * to the current CPU 93 * 94 * so we do them now. 95 */ 96 pm_cfb_check_and_powerup(); 97 98 /* make sure there are no more changes to the device tree */ 99 devtree_freeze(); 100 101 if (invoke_cb) 102 (void) callb_execute_class(CB_CL_MDBOOT, NULL); 103 104 /* 105 * Clear any unresolved UEs from memory. 106 */ 107 page_retire_mdboot(); 108 109 /* 110 * stop other cpus which also raise our priority. since there is only 111 * one active cpu after this, and our priority will be too high 112 * for us to be preempted, we're essentially single threaded 113 * from here on out. 114 */ 115 stop_other_cpus(); 116 117 /* 118 * try and reset leaf devices. reset_leaves() should only 119 * be called when there are no other threads that could be 120 * accessing devices 121 */ 122 reset_leaves(); 123 124 if (fcn == AD_HALT) { 125 halt((char *)NULL); 126 } else if (fcn == AD_POWEROFF) { 127 power_down(NULL); 128 } else { 129 if (bootstr == NULL) { 130 switch (fcn) { 131 132 case AD_BOOT: 133 bootstr = ""; 134 break; 135 136 case AD_IBOOT: 137 bootstr = "-a"; 138 break; 139 140 case AD_SBOOT: 141 bootstr = "-s"; 142 break; 143 144 case AD_SIBOOT: 145 bootstr = "-sa"; 146 break; 147 default: 148 cmn_err(CE_WARN, 149 "mdboot: invalid function %d", fcn); 150 bootstr = ""; 151 break; 152 } 153 } 154 reboot_machine(bootstr); 155 } 156 /* MAYBE REACHED */ 157 } 158 159 /* mdpreboot - may be called prior to mdboot while root fs still mounted */ 160 /*ARGSUSED*/ 161 void 162 mdpreboot(int cmd, int fcn, char *bootstr) 163 { 164 } 165 166 /* 167 * Halt the machine and then reboot with the device 168 * and arguments specified in bootstr. 169 */ 170 static void 171 reboot_machine(char *bootstr) 172 { 173 flush_windows(); 174 stop_other_cpus(); /* send stop signal to other CPUs */ 175 prom_printf("rebooting...\n"); 176 /* 177 * For platforms that use CPU signatures, we 178 * need to set the signature block to OS and 179 * the state to exiting for all the processors. 180 */ 181 CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_REBOOT, -1); 182 prom_reboot(bootstr); 183 /*NOTREACHED*/ 184 } 185 186 /* 187 * We use the x-trap mechanism and idle_stop_xcall() to stop the other CPUs. 188 * Once in panic_idle() they raise spl, record their location, and spin. 189 */ 190 static void 191 panic_idle(void) 192 { 193 cpu_async_panic_callb(); /* check for async errors */ 194 195 (void) spl7(); 196 197 debug_flush_windows(); 198 (void) setjmp(&curthread->t_pcb); 199 200 CPU->cpu_m.in_prom = 1; 201 membar_stld(); 202 203 for (;;) 204 continue; 205 } 206 207 /* 208 * Force the other CPUs to trap into panic_idle(), and then remove them 209 * from the cpu_ready_set so they will no longer receive cross-calls. 210 */ 211 /*ARGSUSED*/ 212 void 213 panic_stopcpus(cpu_t *cp, kthread_t *t, int spl) 214 { 215 cpuset_t cps; 216 int i; 217 218 (void) splzs(); 219 CPUSET_ALL_BUT(cps, cp->cpu_id); 220 xt_some(cps, (xcfunc_t *)idle_stop_xcall, (uint64_t)&panic_idle, NULL); 221 222 for (i = 0; i < NCPU; i++) { 223 if (i != cp->cpu_id && CPU_XCALL_READY(i)) { 224 int ntries = 0x10000; 225 226 while (!cpu[i]->cpu_m.in_prom && ntries) { 227 DELAY(50); 228 ntries--; 229 } 230 231 if (!cpu[i]->cpu_m.in_prom) 232 printf("panic: failed to stop cpu%d\n", i); 233 234 cpu[i]->cpu_flags &= ~CPU_READY; 235 cpu[i]->cpu_flags |= CPU_QUIESCED; 236 CPUSET_DEL(cpu_ready_set, cpu[i]->cpu_id); 237 } 238 } 239 } 240 241 /* 242 * Platform callback following each entry to panicsys(). If we've panicked at 243 * level 14, we examine t_panic_trap to see if a fatal trap occurred. If so, 244 * we disable further %tick_cmpr interrupts. If not, an explicit call to panic 245 * was made and so we re-enqueue an interrupt request structure to allow 246 * further level 14 interrupts to be processed once we lower PIL. This allows 247 * us to handle panics from the deadman() CY_HIGH_LEVEL cyclic. 248 */ 249 void 250 panic_enter_hw(int spl) 251 { 252 if (spl == ipltospl(PIL_14)) { 253 uint_t opstate = disable_vec_intr(); 254 255 if (curthread->t_panic_trap != NULL) { 256 tickcmpr_disable(); 257 intr_dequeue_req(PIL_14, cbe_level14_inum); 258 } else { 259 if (!tickcmpr_disabled()) 260 intr_enqueue_req(PIL_14, cbe_level14_inum); 261 /* 262 * Clear SOFTINT<14>, SOFTINT<0> (TICK_INT) 263 * and SOFTINT<16> (STICK_INT) to indicate 264 * that the current level 14 has been serviced. 265 */ 266 wr_clr_softint((1 << PIL_14) | 267 TICK_INT_MASK | STICK_INT_MASK); 268 } 269 270 enable_vec_intr(opstate); 271 } 272 } 273 274 /* 275 * Miscellaneous hardware-specific code to execute after panicstr is set 276 * by the panic code: we also print and record PTL1 panic information here. 277 */ 278 /*ARGSUSED*/ 279 void 280 panic_quiesce_hw(panic_data_t *pdp) 281 { 282 extern uint_t getpstate(void); 283 extern void setpstate(uint_t); 284 285 #ifdef TRAPTRACE 286 /* 287 * Turn off TRAPTRACE and save the current %tick value in panic_tick. 288 */ 289 if (!panic_tick) 290 panic_tick = gettick(); 291 TRAPTRACE_FREEZE; 292 #endif 293 /* 294 * For Platforms that use CPU signatures, we 295 * need to set the signature block to OS, the state to 296 * exiting, and the substate to panic for all the processors. 297 */ 298 CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_PANIC, -1); 299 300 /* 301 * De-activate ECC functions and disable the watchdog timer now that 302 * we've made it through the critical part of the panic code. 303 */ 304 if (watchdog_enable) 305 (void) tod_ops.tod_clear_watchdog_timer(); 306 307 /* 308 * Disable further ECC errors from the CPU module and the bus nexus. 309 */ 310 cpu_disable_errors(); 311 (void) bus_func_invoke(BF_TYPE_ERRDIS); 312 313 /* 314 * Redirect all interrupts to the current CPU. 315 */ 316 intr_redist_all_cpus_shutdown(); 317 318 /* 319 * This call exists solely to support dumps to network 320 * devices after sync from OBP. 321 * 322 * If we came here via the sync callback, then on some 323 * platforms, interrupts may have arrived while we were 324 * stopped in OBP. OBP will arrange for those interrupts to 325 * be redelivered if you say "go", but not if you invoke a 326 * client callback like 'sync'. For some dump devices 327 * (network swap devices), we need interrupts to be 328 * delivered in order to dump, so we have to call the bus 329 * nexus driver to reset the interrupt state machines. 330 */ 331 (void) bus_func_invoke(BF_TYPE_RESINTR); 332 333 setpstate(getpstate() | PSTATE_IE); 334 } 335 336 /* 337 * Platforms that use CPU signatures need to set the signature block to OS and 338 * the state to exiting for all CPUs. PANIC_CONT indicates that we're about to 339 * write the crash dump, which tells the SSP/SMS to begin a timeout routine to 340 * reboot the machine if the dump never completes. 341 */ 342 /*ARGSUSED*/ 343 void 344 panic_dump_hw(int spl) 345 { 346 CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_DUMP, -1); 347 } 348 349 /* 350 * for ptl1_panic 351 */ 352 void 353 ptl1_init_cpu(struct cpu *cpu) 354 { 355 ptl1_state_t *pstate = &cpu->cpu_m.ptl1_state; 356 357 /*CONSTCOND*/ 358 if (sizeof (struct cpu) + PTL1_SSIZE > CPU_ALLOC_SIZE) { 359 panic("ptl1_init_cpu: not enough space left for ptl1_panic " 360 "stack, sizeof (struct cpu) = %lu", sizeof (struct cpu)); 361 } 362 363 pstate->ptl1_stktop = (uintptr_t)cpu + CPU_ALLOC_SIZE; 364 cpu_pa[cpu->cpu_id] = va_to_pa(cpu); 365 } 366 367 void 368 ptl1_panic_handler(ptl1_state_t *pstate) 369 { 370 static const char *ptl1_reasons[] = { 371 #ifdef PTL1_PANIC_DEBUG 372 "trap for debug purpose", /* PTL1_BAD_DEBUG */ 373 #else 374 "unknown trap", /* PTL1_BAD_DEBUG */ 375 #endif 376 "register window trap", /* PTL1_BAD_WTRAP */ 377 "kernel MMU miss", /* PTL1_BAD_KMISS */ 378 "kernel protection fault", /* PTL1_BAD_KPROT_FAULT */ 379 "ISM MMU miss", /* PTL1_BAD_ISM */ 380 "kernel MMU trap", /* PTL1_BAD_MMUTRAP */ 381 "kernel trap handler state", /* PTL1_BAD_TRAP */ 382 "floating point trap", /* PTL1_BAD_FPTRAP */ 383 #ifdef DEBUG 384 "pointer to intr_vec", /* PTL1_BAD_INTR_VEC */ 385 #else 386 "unknown trap", /* PTL1_BAD_INTR_VEC */ 387 #endif 388 #ifdef TRAPTRACE 389 "TRACE_PTR state", /* PTL1_BAD_TRACE_PTR */ 390 #else 391 "unknown trap", /* PTL1_BAD_TRACE_PTR */ 392 #endif 393 "stack overflow", /* PTL1_BAD_STACK */ 394 "DTrace flags", /* PTL1_BAD_DTRACE_FLAGS */ 395 "attempt to steal locked ctx", /* PTL1_BAD_CTX_STEAL */ 396 "CPU ECC error loop", /* PTL1_BAD_ECC */ 397 "non-kernel context in sys/priv_trap() below or", 398 /* PTL1_BAD_CTX */ 399 "error raising a TSB exception", /* PTL1_BAD_RAISE_TSBEXCP */ 400 "missing shared TSB" /* PTL1_NO_SCDTSB8K */ 401 }; 402 403 uint_t reason = pstate->ptl1_regs.ptl1_g1; 404 uint_t tl = pstate->ptl1_regs.ptl1_trap_regs[0].ptl1_tl; 405 struct panic_trap_info ti = { 0 }; 406 407 /* 408 * Use trap_info for a place holder to call panic_savetrap() and 409 * panic_showtrap() to save and print out ptl1_panic information. 410 */ 411 if (curthread->t_panic_trap == NULL) 412 curthread->t_panic_trap = &ti; 413 414 if (reason < sizeof (ptl1_reasons) / sizeof (ptl1_reasons[0])) 415 panic("bad %s at TL %u", ptl1_reasons[reason], tl); 416 else 417 panic("ptl1_panic reason 0x%x at TL %u", reason, tl); 418 } 419 420 void 421 clear_watchdog_on_exit() 422 { 423 /* 424 * Only shut down an active hardware watchdog timer if the platform 425 * has expressed an interest to. 426 */ 427 if (disable_watchdog_on_exit && watchdog_activated) { 428 prom_printf("Debugging requested; hardware watchdog " 429 "disabled; reboot to re-enable.\n"); 430 cmn_err(CE_WARN, "!Debugging requested; hardware watchdog " 431 "disabled; reboot to re-enable."); 432 mutex_enter(&tod_lock); 433 (void) tod_ops.tod_clear_watchdog_timer(); 434 mutex_exit(&tod_lock); 435 } 436 } 437 438 /* 439 * This null routine is only used by sun4v watchdog timer support. 440 */ 441 void 442 restore_watchdog_on_entry(void) 443 { 444 } 445 446 int 447 kdi_watchdog_disable(void) 448 { 449 if (watchdog_activated) { 450 mutex_enter(&tod_lock); 451 (void) tod_ops.tod_clear_watchdog_timer(); 452 mutex_exit(&tod_lock); 453 } 454 455 return (watchdog_activated); 456 } 457 458 void 459 kdi_watchdog_restore(void) 460 { 461 if (watchdog_enable) { 462 mutex_enter(&tod_lock); 463 (void) tod_ops.tod_set_watchdog_timer(watchdog_timeout_seconds); 464 mutex_exit(&tod_lock); 465 } 466 } 467 468 /*ARGSUSED*/ 469 void 470 mach_dump_buffer_init(void) 471 { 472 /* 473 * setup dump buffer to store extra crash information 474 * not applicable to sun4u 475 */ 476 } 477 478 /* 479 * xt_sync - wait for previous x-traps to finish 480 */ 481 void 482 xt_sync(cpuset_t cpuset) 483 { 484 kpreempt_disable(); 485 CPUSET_DEL(cpuset, CPU->cpu_id); 486 CPUSET_AND(cpuset, cpu_ready_set); 487 xt_some(cpuset, (xcfunc_t *)xt_sync_tl1, 0, 0); 488 kpreempt_enable(); 489 } 490 491 /* 492 * mach_soft_state_init() - dummy routine for sun4v soft state 493 */ 494 void 495 mach_soft_state_init(void) 496 {} 497