1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. 23 * 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/t_lock.h> 29 #include <sys/uadmin.h> 30 #include <sys/panic.h> 31 #include <sys/reboot.h> 32 #include <sys/autoconf.h> 33 #include <sys/machsystm.h> 34 #include <sys/promif.h> 35 #include <sys/membar.h> 36 #include <vm/hat_sfmmu.h> 37 #include <sys/cpu_module.h> 38 #include <sys/cpu_sgnblk_defs.h> 39 #include <sys/intreg.h> 40 #include <sys/consdev.h> 41 #include <sys/kdi_impl.h> 42 #include <sys/callb.h> 43 #include <sys/dumphdr.h> 44 45 #ifdef TRAPTRACE 46 #include <sys/traptrace.h> 47 u_longlong_t panic_tick; 48 #endif /* TRAPTRACE */ 49 50 extern u_longlong_t gettick(); 51 static void reboot_machine(char *); 52 int disable_watchdog_on_exit = 0; 53 extern uint64_t cpc_level15_inum; 54 55 /* 56 * Machine dependent code to reboot. 57 * "mdep" is interpreted as a character pointer; if non-null, it is a pointer 58 * to a string to be used as the argument string when rebooting. 59 * 60 * "invoke_cb" is a boolean. It is set to true when mdboot() can safely 61 * invoke CB_CL_MDBOOT callbacks before shutting the system down, i.e. when 62 * we are in a normal shutdown sequence (interrupts are not blocked, the 63 * system is not panic'ing or being suspended). 64 */ 65 /*ARGSUSED*/ 66 void 67 mdboot(int cmd, int fcn, char *bootstr, boolean_t invoke_cb) 68 { 69 extern void pm_cfb_check_and_powerup(void); 70 71 /* 72 * Disable the hw watchdog timer. 73 */ 74 if (disable_watchdog_on_exit && watchdog_activated) { 75 mutex_enter(&tod_lock); 76 (void) tod_ops.tod_clear_watchdog_timer(); 77 mutex_exit(&tod_lock); 78 } 79 80 /* 81 * XXX - rconsvp is set to NULL to ensure that output messages 82 * are sent to the underlying "hardware" device using the 83 * monitor's printf routine since we are in the process of 84 * either rebooting or halting the machine. 85 */ 86 rconsvp = NULL; 87 88 /* 89 * At a high interrupt level we can't: 90 * 1) bring up the console 91 * or 92 * 2) wait for pending interrupts prior to redistribution 93 * to the current CPU 94 * 95 * so we do them now. 96 */ 97 pm_cfb_check_and_powerup(); 98 99 /* make sure there are no more changes to the device tree */ 100 devtree_freeze(); 101 102 if (invoke_cb) 103 (void) callb_execute_class(CB_CL_MDBOOT, NULL); 104 105 /* 106 * Clear any unresolved UEs from memory. 107 */ 108 page_retire_mdboot(); 109 110 /* 111 * stop other cpus which also raise our priority. since there is only 112 * one active cpu after this, and our priority will be too high 113 * for us to be preempted, we're essentially single threaded 114 * from here on out. 115 */ 116 stop_other_cpus(); 117 118 /* 119 * try and reset leaf devices. reset_leaves() should only 120 * be called when there are no other threads that could be 121 * accessing devices 122 */ 123 reset_leaves(); 124 125 if (fcn == AD_HALT) { 126 halt((char *)NULL); 127 } else if (fcn == AD_POWEROFF) { 128 power_down(NULL); 129 } else { 130 if (bootstr == NULL) { 131 switch (fcn) { 132 133 case AD_FASTREBOOT: 134 case AD_BOOT: 135 bootstr = ""; 136 break; 137 138 case AD_IBOOT: 139 bootstr = "-a"; 140 break; 141 142 case AD_SBOOT: 143 bootstr = "-s"; 144 break; 145 146 case AD_SIBOOT: 147 bootstr = "-sa"; 148 break; 149 default: 150 cmn_err(CE_WARN, 151 "mdboot: invalid function %d", fcn); 152 bootstr = ""; 153 break; 154 } 155 } 156 if (fcn == AD_FASTREBOOT) { 157 pnode_t onode; 158 int dllen; 159 onode = prom_optionsnode(); 160 if ((onode == OBP_NONODE) || (onode == OBP_BADNODE)) { 161 cmn_err(CE_WARN, "Unable to set diag level for" 162 " quick reboot"); 163 } else { 164 dllen = prom_getproplen(onode, "diag-level"); 165 if (dllen != -1) { 166 char *newstr = kmem_alloc(strlen( 167 bootstr) + dllen + 5, KM_NOSLEEP); 168 if (newstr != NULL) { 169 int newstrlen; 170 (void) strcpy(newstr, bootstr); 171 (void) strcat(newstr, " -f "); 172 newstrlen = strlen(bootstr) + 4; 173 (void) prom_getprop(onode, 174 "diag-level", 175 (caddr_t) 176 &(newstr[newstrlen])); 177 newstr[newstrlen + dllen] = 178 '\0'; 179 bootstr = newstr; 180 (void) prom_setprop(onode, 181 "diag-level", 182 "off", 4); 183 } 184 } 185 } 186 } 187 reboot_machine(bootstr); 188 } 189 /* MAYBE REACHED */ 190 } 191 192 /* mdpreboot - may be called prior to mdboot while root fs still mounted */ 193 /*ARGSUSED*/ 194 void 195 mdpreboot(int cmd, int fcn, char *bootstr) 196 { 197 } 198 199 /* 200 * Halt the machine and then reboot with the device 201 * and arguments specified in bootstr. 202 */ 203 static void 204 reboot_machine(char *bootstr) 205 { 206 flush_windows(); 207 stop_other_cpus(); /* send stop signal to other CPUs */ 208 prom_printf("rebooting...\n"); 209 /* 210 * For platforms that use CPU signatures, we 211 * need to set the signature block to OS and 212 * the state to exiting for all the processors. 213 */ 214 CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_REBOOT, -1); 215 prom_reboot(bootstr); 216 /*NOTREACHED*/ 217 } 218 219 /* 220 * We use the x-trap mechanism and idle_stop_xcall() to stop the other CPUs. 221 * Once in panic_idle() they raise spl, record their location, and spin. 222 */ 223 static void 224 panic_idle(void) 225 { 226 cpu_async_panic_callb(); /* check for async errors */ 227 228 (void) spl7(); 229 230 debug_flush_windows(); 231 (void) setjmp(&curthread->t_pcb); 232 233 CPU->cpu_m.in_prom = 1; 234 membar_stld(); 235 236 dumpsys_helper(); 237 238 for (;;) 239 continue; 240 } 241 242 /* 243 * Force the other CPUs to trap into panic_idle(), and then remove them 244 * from the cpu_ready_set so they will no longer receive cross-calls. 245 */ 246 /*ARGSUSED*/ 247 void 248 panic_stopcpus(cpu_t *cp, kthread_t *t, int spl) 249 { 250 cpuset_t cps; 251 int i; 252 253 (void) splzs(); 254 CPUSET_ALL_BUT(cps, cp->cpu_id); 255 xt_some(cps, (xcfunc_t *)idle_stop_xcall, (uint64_t)&panic_idle, NULL); 256 257 for (i = 0; i < NCPU; i++) { 258 if (i != cp->cpu_id && CPU_XCALL_READY(i)) { 259 int ntries = 0x10000; 260 261 while (!cpu[i]->cpu_m.in_prom && ntries) { 262 DELAY(50); 263 ntries--; 264 } 265 266 if (!cpu[i]->cpu_m.in_prom) 267 printf("panic: failed to stop cpu%d\n", i); 268 269 cpu[i]->cpu_flags &= ~CPU_READY; 270 cpu[i]->cpu_flags |= CPU_QUIESCED; 271 CPUSET_DEL(cpu_ready_set, cpu[i]->cpu_id); 272 } 273 } 274 } 275 276 /* 277 * Platform callback following each entry to panicsys(). If we've panicked at 278 * level 14, we examine t_panic_trap to see if a fatal trap occurred. If so, 279 * we disable further %tick_cmpr interrupts. If not, an explicit call to panic 280 * was made and so we re-enqueue an interrupt request structure to allow 281 * further level 14 interrupts to be processed once we lower PIL. This allows 282 * us to handle panics from the deadman() CY_HIGH_LEVEL cyclic. 283 * 284 * In case we panic at level 15, ensure that the cpc handler has been 285 * reinstalled otherwise we could run the risk of hitting a missing interrupt 286 * handler when this thread drops PIL and the cpc counter overflows. 287 */ 288 void 289 panic_enter_hw(int spl) 290 { 291 uint_t opstate; 292 293 if (spl == ipltospl(PIL_14)) { 294 opstate = disable_vec_intr(); 295 296 if (curthread->t_panic_trap != NULL) { 297 tickcmpr_disable(); 298 intr_dequeue_req(PIL_14, cbe_level14_inum); 299 } else { 300 if (!tickcmpr_disabled()) 301 intr_enqueue_req(PIL_14, cbe_level14_inum); 302 /* 303 * Clear SOFTINT<14>, SOFTINT<0> (TICK_INT) 304 * and SOFTINT<16> (STICK_INT) to indicate 305 * that the current level 14 has been serviced. 306 */ 307 wr_clr_softint((1 << PIL_14) | 308 TICK_INT_MASK | STICK_INT_MASK); 309 } 310 311 enable_vec_intr(opstate); 312 } else if (spl == ipltospl(PIL_15)) { 313 opstate = disable_vec_intr(); 314 intr_enqueue_req(PIL_15, cpc_level15_inum); 315 wr_clr_softint(1 << PIL_15); 316 enable_vec_intr(opstate); 317 } 318 } 319 320 /* 321 * Miscellaneous hardware-specific code to execute after panicstr is set 322 * by the panic code: we also print and record PTL1 panic information here. 323 */ 324 /*ARGSUSED*/ 325 void 326 panic_quiesce_hw(panic_data_t *pdp) 327 { 328 extern uint_t getpstate(void); 329 extern void setpstate(uint_t); 330 331 #ifdef TRAPTRACE 332 /* 333 * Turn off TRAPTRACE and save the current %tick value in panic_tick. 334 */ 335 if (!panic_tick) 336 panic_tick = gettick(); 337 TRAPTRACE_FREEZE; 338 #endif 339 /* 340 * For Platforms that use CPU signatures, we 341 * need to set the signature block to OS, the state to 342 * exiting, and the substate to panic for all the processors. 343 */ 344 CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_PANIC, -1); 345 346 /* 347 * De-activate ECC functions and disable the watchdog timer now that 348 * we've made it through the critical part of the panic code. 349 */ 350 if (watchdog_enable) 351 (void) tod_ops.tod_clear_watchdog_timer(); 352 353 /* 354 * Disable further ECC errors from the CPU module and the bus nexus. 355 */ 356 cpu_disable_errors(); 357 (void) bus_func_invoke(BF_TYPE_ERRDIS); 358 359 /* 360 * Redirect all interrupts to the current CPU. 361 */ 362 intr_redist_all_cpus_shutdown(); 363 364 /* 365 * This call exists solely to support dumps to network 366 * devices after sync from OBP. 367 * 368 * If we came here via the sync callback, then on some 369 * platforms, interrupts may have arrived while we were 370 * stopped in OBP. OBP will arrange for those interrupts to 371 * be redelivered if you say "go", but not if you invoke a 372 * client callback like 'sync'. For some dump devices 373 * (network swap devices), we need interrupts to be 374 * delivered in order to dump, so we have to call the bus 375 * nexus driver to reset the interrupt state machines. 376 */ 377 (void) bus_func_invoke(BF_TYPE_RESINTR); 378 379 setpstate(getpstate() | PSTATE_IE); 380 } 381 382 /* 383 * Platforms that use CPU signatures need to set the signature block to OS and 384 * the state to exiting for all CPUs. PANIC_CONT indicates that we're about to 385 * write the crash dump, which tells the SSP/SMS to begin a timeout routine to 386 * reboot the machine if the dump never completes. 387 */ 388 /*ARGSUSED*/ 389 void 390 panic_dump_hw(int spl) 391 { 392 CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_DUMP, -1); 393 } 394 395 /* 396 * for ptl1_panic 397 */ 398 void 399 ptl1_init_cpu(struct cpu *cpu) 400 { 401 ptl1_state_t *pstate = &cpu->cpu_m.ptl1_state; 402 403 /*CONSTCOND*/ 404 if (sizeof (struct cpu) + PTL1_SSIZE > CPU_ALLOC_SIZE) { 405 panic("ptl1_init_cpu: not enough space left for ptl1_panic " 406 "stack, sizeof (struct cpu) = %lu", sizeof (struct cpu)); 407 } 408 409 pstate->ptl1_stktop = (uintptr_t)cpu + CPU_ALLOC_SIZE; 410 cpu_pa[cpu->cpu_id] = va_to_pa(cpu); 411 } 412 413 void 414 ptl1_panic_handler(ptl1_state_t *pstate) 415 { 416 static const char *ptl1_reasons[] = { 417 #ifdef PTL1_PANIC_DEBUG 418 "trap for debug purpose", /* PTL1_BAD_DEBUG */ 419 #else 420 "unknown trap", /* PTL1_BAD_DEBUG */ 421 #endif 422 "register window trap", /* PTL1_BAD_WTRAP */ 423 "kernel MMU miss", /* PTL1_BAD_KMISS */ 424 "kernel protection fault", /* PTL1_BAD_KPROT_FAULT */ 425 "ISM MMU miss", /* PTL1_BAD_ISM */ 426 "kernel MMU trap", /* PTL1_BAD_MMUTRAP */ 427 "kernel trap handler state", /* PTL1_BAD_TRAP */ 428 "floating point trap", /* PTL1_BAD_FPTRAP */ 429 #ifdef DEBUG 430 "pointer to intr_vec", /* PTL1_BAD_INTR_VEC */ 431 #else 432 "unknown trap", /* PTL1_BAD_INTR_VEC */ 433 #endif 434 #ifdef TRAPTRACE 435 "TRACE_PTR state", /* PTL1_BAD_TRACE_PTR */ 436 #else 437 "unknown trap", /* PTL1_BAD_TRACE_PTR */ 438 #endif 439 "stack overflow", /* PTL1_BAD_STACK */ 440 "DTrace flags", /* PTL1_BAD_DTRACE_FLAGS */ 441 "attempt to steal locked ctx", /* PTL1_BAD_CTX_STEAL */ 442 "CPU ECC error loop", /* PTL1_BAD_ECC */ 443 "non-kernel context in sys/priv_trap() below or", 444 /* PTL1_BAD_CTX */ 445 "error raising a TSB exception", /* PTL1_BAD_RAISE_TSBEXCP */ 446 "missing shared TSB" /* PTL1_NO_SCDTSB8K */ 447 }; 448 449 uint_t reason = pstate->ptl1_regs.ptl1_g1; 450 uint_t tl = pstate->ptl1_regs.ptl1_trap_regs[0].ptl1_tl; 451 struct panic_trap_info ti = { 0 }; 452 453 /* 454 * Use trap_info for a place holder to call panic_savetrap() and 455 * panic_showtrap() to save and print out ptl1_panic information. 456 */ 457 if (curthread->t_panic_trap == NULL) 458 curthread->t_panic_trap = &ti; 459 460 if (reason < sizeof (ptl1_reasons) / sizeof (ptl1_reasons[0])) 461 panic("bad %s at TL %u", ptl1_reasons[reason], tl); 462 else 463 panic("ptl1_panic reason 0x%x at TL %u", reason, tl); 464 } 465 466 void 467 clear_watchdog_on_exit() 468 { 469 /* 470 * Only shut down an active hardware watchdog timer if the platform 471 * has expressed an interest to. 472 */ 473 if (disable_watchdog_on_exit && watchdog_activated) { 474 prom_printf("Debugging requested; hardware watchdog " 475 "disabled; reboot to re-enable.\n"); 476 cmn_err(CE_WARN, "!Debugging requested; hardware watchdog " 477 "disabled; reboot to re-enable."); 478 mutex_enter(&tod_lock); 479 (void) tod_ops.tod_clear_watchdog_timer(); 480 mutex_exit(&tod_lock); 481 } 482 } 483 484 /* 485 * This null routine is only used by sun4v watchdog timer support. 486 */ 487 void 488 restore_watchdog_on_entry(void) 489 { 490 } 491 492 int 493 kdi_watchdog_disable(void) 494 { 495 if (watchdog_activated) { 496 mutex_enter(&tod_lock); 497 (void) tod_ops.tod_clear_watchdog_timer(); 498 mutex_exit(&tod_lock); 499 } 500 501 return (watchdog_activated); 502 } 503 504 void 505 kdi_watchdog_restore(void) 506 { 507 if (watchdog_enable) { 508 mutex_enter(&tod_lock); 509 (void) tod_ops.tod_set_watchdog_timer(watchdog_timeout_seconds); 510 mutex_exit(&tod_lock); 511 } 512 } 513 514 /*ARGSUSED*/ 515 void 516 mach_dump_buffer_init(void) 517 { 518 /* 519 * setup dump buffer to store extra crash information 520 * not applicable to sun4u 521 */ 522 } 523 524 /* 525 * xt_sync - wait for previous x-traps to finish 526 */ 527 void 528 xt_sync(cpuset_t cpuset) 529 { 530 kpreempt_disable(); 531 CPUSET_DEL(cpuset, CPU->cpu_id); 532 CPUSET_AND(cpuset, cpu_ready_set); 533 xt_some(cpuset, (xcfunc_t *)xt_sync_tl1, 0, 0); 534 kpreempt_enable(); 535 } 536 537 /* 538 * mach_soft_state_init() - dummy routine for sun4v soft state 539 */ 540 void 541 mach_soft_state_init(void) 542 {} 543