1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. 23 * 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/t_lock.h> 29 #include <sys/uadmin.h> 30 #include <sys/panic.h> 31 #include <sys/reboot.h> 32 #include <sys/autoconf.h> 33 #include <sys/machsystm.h> 34 #include <sys/promif.h> 35 #include <sys/membar.h> 36 #include <vm/hat_sfmmu.h> 37 #include <sys/cpu_module.h> 38 #include <sys/cpu_sgnblk_defs.h> 39 #include <sys/intreg.h> 40 #include <sys/consdev.h> 41 #include <sys/kdi_impl.h> 42 #include <sys/callb.h> 43 #include <sys/dumphdr.h> 44 45 #ifdef TRAPTRACE 46 #include <sys/traptrace.h> 47 u_longlong_t panic_tick; 48 #endif /* TRAPTRACE */ 49 50 extern u_longlong_t gettick(); 51 static void reboot_machine(char *); 52 int disable_watchdog_on_exit = 0; 53 extern uint64_t cpc_level15_inum; 54 55 /* 56 * Machine dependent code to reboot. 57 * "mdep" is interpreted as a character pointer; if non-null, it is a pointer 58 * to a string to be used as the argument string when rebooting. 59 * 60 * "invoke_cb" is a boolean. It is set to true when mdboot() can safely 61 * invoke CB_CL_MDBOOT callbacks before shutting the system down, i.e. when 62 * we are in a normal shutdown sequence (interrupts are not blocked, the 63 * system is not panic'ing or being suspended). 64 */ 65 /*ARGSUSED*/ 66 void 67 mdboot(int cmd, int fcn, char *bootstr, boolean_t invoke_cb) 68 { 69 extern void pm_cfb_check_and_powerup(void); 70 71 /* 72 * Disable the hw watchdog timer. 73 */ 74 if (disable_watchdog_on_exit && watchdog_activated) { 75 mutex_enter(&tod_lock); 76 (void) tod_ops.tod_clear_watchdog_timer(); 77 mutex_exit(&tod_lock); 78 } 79 80 /* 81 * XXX - rconsvp is set to NULL to ensure that output messages 82 * are sent to the underlying "hardware" device using the 83 * monitor's printf routine since we are in the process of 84 * either rebooting or halting the machine. 85 */ 86 rconsvp = NULL; 87 88 /* 89 * At a high interrupt level we can't: 90 * 1) bring up the console 91 * or 92 * 2) wait for pending interrupts prior to redistribution 93 * to the current CPU 94 * 95 * so we do them now. 96 */ 97 pm_cfb_check_and_powerup(); 98 99 /* make sure there are no more changes to the device tree */ 100 devtree_freeze(); 101 102 if (invoke_cb) 103 (void) callb_execute_class(CB_CL_MDBOOT, NULL); 104 105 /* 106 * Clear any unresolved UEs from memory. 107 */ 108 page_retire_mdboot(); 109 110 /* 111 * stop other cpus which also raise our priority. since there is only 112 * one active cpu after this, and our priority will be too high 113 * for us to be preempted, we're essentially single threaded 114 * from here on out. 115 */ 116 stop_other_cpus(); 117 118 /* 119 * try and reset leaf devices. reset_leaves() should only 120 * be called when there are no other threads that could be 121 * accessing devices 122 */ 123 reset_leaves(); 124 125 if (fcn == AD_HALT) { 126 halt((char *)NULL); 127 } else if (fcn == AD_POWEROFF) { 128 power_down(NULL); 129 } else { 130 if (bootstr == NULL) { 131 switch (fcn) { 132 133 case AD_FASTREBOOT: 134 case AD_BOOT: 135 bootstr = ""; 136 break; 137 138 case AD_IBOOT: 139 bootstr = "-a"; 140 break; 141 142 case AD_SBOOT: 143 bootstr = "-s"; 144 break; 145 146 case AD_SIBOOT: 147 bootstr = "-sa"; 148 break; 149 default: 150 cmn_err(CE_WARN, 151 "mdboot: invalid function %d", fcn); 152 bootstr = ""; 153 break; 154 } 155 } 156 if (fcn == AD_FASTREBOOT) { 157 pnode_t onode; 158 int dllen; 159 onode = prom_optionsnode(); 160 if ((onode == OBP_NONODE) || (onode == OBP_BADNODE)) { 161 cmn_err(CE_WARN, "Unable to set diag level for" 162 " quick reboot"); 163 } else { 164 dllen = prom_getproplen(onode, "diag-level"); 165 if (dllen != -1) { 166 int newstrlen; 167 char *newstr = kmem_alloc(strlen( 168 bootstr) + dllen + 5, KM_SLEEP); 169 (void) strcpy(newstr, bootstr); 170 (void) strcat(newstr, " -f "); 171 newstrlen = strlen(bootstr) + 4; 172 (void) prom_getprop(onode, "diag-level", 173 (caddr_t)&(newstr[newstrlen])); 174 newstr[newstrlen + dllen] = '\0'; 175 bootstr = newstr; 176 } 177 (void) prom_setprop(onode, "diag-level", 178 "off", 4); 179 } 180 } 181 reboot_machine(bootstr); 182 } 183 /* MAYBE REACHED */ 184 } 185 186 /* mdpreboot - may be called prior to mdboot while root fs still mounted */ 187 /*ARGSUSED*/ 188 void 189 mdpreboot(int cmd, int fcn, char *bootstr) 190 { 191 } 192 193 /* 194 * Halt the machine and then reboot with the device 195 * and arguments specified in bootstr. 196 */ 197 static void 198 reboot_machine(char *bootstr) 199 { 200 flush_windows(); 201 stop_other_cpus(); /* send stop signal to other CPUs */ 202 prom_printf("rebooting...\n"); 203 /* 204 * For platforms that use CPU signatures, we 205 * need to set the signature block to OS and 206 * the state to exiting for all the processors. 207 */ 208 CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_REBOOT, -1); 209 prom_reboot(bootstr); 210 /*NOTREACHED*/ 211 } 212 213 /* 214 * We use the x-trap mechanism and idle_stop_xcall() to stop the other CPUs. 215 * Once in panic_idle() they raise spl, record their location, and spin. 216 */ 217 static void 218 panic_idle(void) 219 { 220 cpu_async_panic_callb(); /* check for async errors */ 221 222 (void) spl7(); 223 224 debug_flush_windows(); 225 (void) setjmp(&curthread->t_pcb); 226 227 CPU->cpu_m.in_prom = 1; 228 membar_stld(); 229 230 dumpsys_helper(); 231 232 for (;;) 233 continue; 234 } 235 236 /* 237 * Force the other CPUs to trap into panic_idle(), and then remove them 238 * from the cpu_ready_set so they will no longer receive cross-calls. 239 */ 240 /*ARGSUSED*/ 241 void 242 panic_stopcpus(cpu_t *cp, kthread_t *t, int spl) 243 { 244 cpuset_t cps; 245 int i; 246 247 (void) splzs(); 248 CPUSET_ALL_BUT(cps, cp->cpu_id); 249 xt_some(cps, (xcfunc_t *)idle_stop_xcall, (uint64_t)&panic_idle, NULL); 250 251 for (i = 0; i < NCPU; i++) { 252 if (i != cp->cpu_id && CPU_XCALL_READY(i)) { 253 int ntries = 0x10000; 254 255 while (!cpu[i]->cpu_m.in_prom && ntries) { 256 DELAY(50); 257 ntries--; 258 } 259 260 if (!cpu[i]->cpu_m.in_prom) 261 printf("panic: failed to stop cpu%d\n", i); 262 263 cpu[i]->cpu_flags &= ~CPU_READY; 264 cpu[i]->cpu_flags |= CPU_QUIESCED; 265 CPUSET_DEL(cpu_ready_set, cpu[i]->cpu_id); 266 } 267 } 268 } 269 270 /* 271 * Platform callback following each entry to panicsys(). If we've panicked at 272 * level 14, we examine t_panic_trap to see if a fatal trap occurred. If so, 273 * we disable further %tick_cmpr interrupts. If not, an explicit call to panic 274 * was made and so we re-enqueue an interrupt request structure to allow 275 * further level 14 interrupts to be processed once we lower PIL. This allows 276 * us to handle panics from the deadman() CY_HIGH_LEVEL cyclic. 277 * 278 * In case we panic at level 15, ensure that the cpc handler has been 279 * reinstalled otherwise we could run the risk of hitting a missing interrupt 280 * handler when this thread drops PIL and the cpc counter overflows. 281 */ 282 void 283 panic_enter_hw(int spl) 284 { 285 uint_t opstate; 286 287 if (spl == ipltospl(PIL_14)) { 288 opstate = disable_vec_intr(); 289 290 if (curthread->t_panic_trap != NULL) { 291 tickcmpr_disable(); 292 intr_dequeue_req(PIL_14, cbe_level14_inum); 293 } else { 294 if (!tickcmpr_disabled()) 295 intr_enqueue_req(PIL_14, cbe_level14_inum); 296 /* 297 * Clear SOFTINT<14>, SOFTINT<0> (TICK_INT) 298 * and SOFTINT<16> (STICK_INT) to indicate 299 * that the current level 14 has been serviced. 300 */ 301 wr_clr_softint((1 << PIL_14) | 302 TICK_INT_MASK | STICK_INT_MASK); 303 } 304 305 enable_vec_intr(opstate); 306 } else if (spl == ipltospl(PIL_15)) { 307 opstate = disable_vec_intr(); 308 intr_enqueue_req(PIL_15, cpc_level15_inum); 309 wr_clr_softint(1 << PIL_15); 310 enable_vec_intr(opstate); 311 } 312 } 313 314 /* 315 * Miscellaneous hardware-specific code to execute after panicstr is set 316 * by the panic code: we also print and record PTL1 panic information here. 317 */ 318 /*ARGSUSED*/ 319 void 320 panic_quiesce_hw(panic_data_t *pdp) 321 { 322 extern uint_t getpstate(void); 323 extern void setpstate(uint_t); 324 325 #ifdef TRAPTRACE 326 /* 327 * Turn off TRAPTRACE and save the current %tick value in panic_tick. 328 */ 329 if (!panic_tick) 330 panic_tick = gettick(); 331 TRAPTRACE_FREEZE; 332 #endif 333 /* 334 * For Platforms that use CPU signatures, we 335 * need to set the signature block to OS, the state to 336 * exiting, and the substate to panic for all the processors. 337 */ 338 CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_PANIC, -1); 339 340 /* 341 * De-activate ECC functions and disable the watchdog timer now that 342 * we've made it through the critical part of the panic code. 343 */ 344 if (watchdog_enable) 345 (void) tod_ops.tod_clear_watchdog_timer(); 346 347 /* 348 * Disable further ECC errors from the CPU module and the bus nexus. 349 */ 350 cpu_disable_errors(); 351 (void) bus_func_invoke(BF_TYPE_ERRDIS); 352 353 /* 354 * Redirect all interrupts to the current CPU. 355 */ 356 intr_redist_all_cpus_shutdown(); 357 358 /* 359 * This call exists solely to support dumps to network 360 * devices after sync from OBP. 361 * 362 * If we came here via the sync callback, then on some 363 * platforms, interrupts may have arrived while we were 364 * stopped in OBP. OBP will arrange for those interrupts to 365 * be redelivered if you say "go", but not if you invoke a 366 * client callback like 'sync'. For some dump devices 367 * (network swap devices), we need interrupts to be 368 * delivered in order to dump, so we have to call the bus 369 * nexus driver to reset the interrupt state machines. 370 */ 371 (void) bus_func_invoke(BF_TYPE_RESINTR); 372 373 setpstate(getpstate() | PSTATE_IE); 374 } 375 376 /* 377 * Platforms that use CPU signatures need to set the signature block to OS and 378 * the state to exiting for all CPUs. PANIC_CONT indicates that we're about to 379 * write the crash dump, which tells the SSP/SMS to begin a timeout routine to 380 * reboot the machine if the dump never completes. 381 */ 382 /*ARGSUSED*/ 383 void 384 panic_dump_hw(int spl) 385 { 386 CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_DUMP, -1); 387 } 388 389 /* 390 * for ptl1_panic 391 */ 392 void 393 ptl1_init_cpu(struct cpu *cpu) 394 { 395 ptl1_state_t *pstate = &cpu->cpu_m.ptl1_state; 396 397 /*CONSTCOND*/ 398 if (sizeof (struct cpu) + PTL1_SSIZE > CPU_ALLOC_SIZE) { 399 panic("ptl1_init_cpu: not enough space left for ptl1_panic " 400 "stack, sizeof (struct cpu) = %lu", sizeof (struct cpu)); 401 } 402 403 pstate->ptl1_stktop = (uintptr_t)cpu + CPU_ALLOC_SIZE; 404 cpu_pa[cpu->cpu_id] = va_to_pa(cpu); 405 } 406 407 void 408 ptl1_panic_handler(ptl1_state_t *pstate) 409 { 410 static const char *ptl1_reasons[] = { 411 #ifdef PTL1_PANIC_DEBUG 412 "trap for debug purpose", /* PTL1_BAD_DEBUG */ 413 #else 414 "unknown trap", /* PTL1_BAD_DEBUG */ 415 #endif 416 "register window trap", /* PTL1_BAD_WTRAP */ 417 "kernel MMU miss", /* PTL1_BAD_KMISS */ 418 "kernel protection fault", /* PTL1_BAD_KPROT_FAULT */ 419 "ISM MMU miss", /* PTL1_BAD_ISM */ 420 "kernel MMU trap", /* PTL1_BAD_MMUTRAP */ 421 "kernel trap handler state", /* PTL1_BAD_TRAP */ 422 "floating point trap", /* PTL1_BAD_FPTRAP */ 423 #ifdef DEBUG 424 "pointer to intr_vec", /* PTL1_BAD_INTR_VEC */ 425 #else 426 "unknown trap", /* PTL1_BAD_INTR_VEC */ 427 #endif 428 #ifdef TRAPTRACE 429 "TRACE_PTR state", /* PTL1_BAD_TRACE_PTR */ 430 #else 431 "unknown trap", /* PTL1_BAD_TRACE_PTR */ 432 #endif 433 "stack overflow", /* PTL1_BAD_STACK */ 434 "DTrace flags", /* PTL1_BAD_DTRACE_FLAGS */ 435 "attempt to steal locked ctx", /* PTL1_BAD_CTX_STEAL */ 436 "CPU ECC error loop", /* PTL1_BAD_ECC */ 437 "non-kernel context in sys/priv_trap() below or", 438 /* PTL1_BAD_CTX */ 439 "error raising a TSB exception", /* PTL1_BAD_RAISE_TSBEXCP */ 440 "missing shared TSB" /* PTL1_NO_SCDTSB8K */ 441 }; 442 443 uint_t reason = pstate->ptl1_regs.ptl1_g1; 444 uint_t tl = pstate->ptl1_regs.ptl1_trap_regs[0].ptl1_tl; 445 struct panic_trap_info ti = { 0 }; 446 447 /* 448 * Use trap_info for a place holder to call panic_savetrap() and 449 * panic_showtrap() to save and print out ptl1_panic information. 450 */ 451 if (curthread->t_panic_trap == NULL) 452 curthread->t_panic_trap = &ti; 453 454 if (reason < sizeof (ptl1_reasons) / sizeof (ptl1_reasons[0])) 455 panic("bad %s at TL %u", ptl1_reasons[reason], tl); 456 else 457 panic("ptl1_panic reason 0x%x at TL %u", reason, tl); 458 } 459 460 void 461 clear_watchdog_on_exit() 462 { 463 /* 464 * Only shut down an active hardware watchdog timer if the platform 465 * has expressed an interest to. 466 */ 467 if (disable_watchdog_on_exit && watchdog_activated) { 468 prom_printf("Debugging requested; hardware watchdog " 469 "disabled; reboot to re-enable.\n"); 470 cmn_err(CE_WARN, "!Debugging requested; hardware watchdog " 471 "disabled; reboot to re-enable."); 472 mutex_enter(&tod_lock); 473 (void) tod_ops.tod_clear_watchdog_timer(); 474 mutex_exit(&tod_lock); 475 } 476 } 477 478 /* 479 * This null routine is only used by sun4v watchdog timer support. 480 */ 481 void 482 restore_watchdog_on_entry(void) 483 { 484 } 485 486 int 487 kdi_watchdog_disable(void) 488 { 489 if (watchdog_activated) { 490 mutex_enter(&tod_lock); 491 (void) tod_ops.tod_clear_watchdog_timer(); 492 mutex_exit(&tod_lock); 493 } 494 495 return (watchdog_activated); 496 } 497 498 void 499 kdi_watchdog_restore(void) 500 { 501 if (watchdog_enable) { 502 mutex_enter(&tod_lock); 503 (void) tod_ops.tod_set_watchdog_timer(watchdog_timeout_seconds); 504 mutex_exit(&tod_lock); 505 } 506 } 507 508 /*ARGSUSED*/ 509 void 510 mach_dump_buffer_init(void) 511 { 512 /* 513 * setup dump buffer to store extra crash information 514 * not applicable to sun4u 515 */ 516 } 517 518 /* 519 * xt_sync - wait for previous x-traps to finish 520 */ 521 void 522 xt_sync(cpuset_t cpuset) 523 { 524 kpreempt_disable(); 525 CPUSET_DEL(cpuset, CPU->cpu_id); 526 CPUSET_AND(cpuset, cpu_ready_set); 527 xt_some(cpuset, (xcfunc_t *)xt_sync_tl1, 0, 0); 528 kpreempt_enable(); 529 } 530 531 /* 532 * mach_soft_state_init() - dummy routine for sun4v soft state 533 */ 534 void 535 mach_soft_state_init(void) 536 {} 537