1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/t_lock.h> 28 #include <sys/uadmin.h> 29 #include <sys/panic.h> 30 #include <sys/reboot.h> 31 #include <sys/autoconf.h> 32 #include <sys/machsystm.h> 33 #include <sys/promif.h> 34 #include <sys/membar.h> 35 #include <vm/hat_sfmmu.h> 36 #include <sys/cpu_module.h> 37 #include <sys/cpu_sgnblk_defs.h> 38 #include <sys/intreg.h> 39 #include <sys/consdev.h> 40 #include <sys/kdi_impl.h> 41 #include <sys/callb.h> 42 #include <sys/dumphdr.h> 43 44 #ifdef TRAPTRACE 45 #include <sys/traptrace.h> 46 u_longlong_t panic_tick; 47 #endif /* TRAPTRACE */ 48 49 extern u_longlong_t gettick(); 50 static void reboot_machine(char *); 51 int disable_watchdog_on_exit = 0; 52 53 /* 54 * Machine dependent code to reboot. 55 * "mdep" is interpreted as a character pointer; if non-null, it is a pointer 56 * to a string to be used as the argument string when rebooting. 57 * 58 * "invoke_cb" is a boolean. It is set to true when mdboot() can safely 59 * invoke CB_CL_MDBOOT callbacks before shutting the system down, i.e. when 60 * we are in a normal shutdown sequence (interrupts are not blocked, the 61 * system is not panic'ing or being suspended). 62 */ 63 /*ARGSUSED*/ 64 void 65 mdboot(int cmd, int fcn, char *bootstr, boolean_t invoke_cb) 66 { 67 extern void pm_cfb_check_and_powerup(void); 68 69 /* 70 * Disable the hw watchdog timer. 71 */ 72 if (disable_watchdog_on_exit && watchdog_activated) { 73 mutex_enter(&tod_lock); 74 (void) tod_ops.tod_clear_watchdog_timer(); 75 mutex_exit(&tod_lock); 76 } 77 78 /* 79 * XXX - rconsvp is set to NULL to ensure that output messages 80 * are sent to the underlying "hardware" device using the 81 * monitor's printf routine since we are in the process of 82 * either rebooting or halting the machine. 83 */ 84 rconsvp = NULL; 85 86 /* 87 * At a high interrupt level we can't: 88 * 1) bring up the console 89 * or 90 * 2) wait for pending interrupts prior to redistribution 91 * to the current CPU 92 * 93 * so we do them now. 94 */ 95 pm_cfb_check_and_powerup(); 96 97 /* make sure there are no more changes to the device tree */ 98 devtree_freeze(); 99 100 if (invoke_cb) 101 (void) callb_execute_class(CB_CL_MDBOOT, NULL); 102 103 /* 104 * Clear any unresolved UEs from memory. 105 */ 106 page_retire_mdboot(); 107 108 /* 109 * stop other cpus which also raise our priority. since there is only 110 * one active cpu after this, and our priority will be too high 111 * for us to be preempted, we're essentially single threaded 112 * from here on out. 113 */ 114 stop_other_cpus(); 115 116 /* 117 * try and reset leaf devices. reset_leaves() should only 118 * be called when there are no other threads that could be 119 * accessing devices 120 */ 121 reset_leaves(); 122 123 if (fcn == AD_HALT) { 124 halt((char *)NULL); 125 } else if (fcn == AD_POWEROFF) { 126 power_down(NULL); 127 } else { 128 if (bootstr == NULL) { 129 switch (fcn) { 130 131 case AD_FASTREBOOT: 132 case AD_BOOT: 133 bootstr = ""; 134 break; 135 136 case AD_IBOOT: 137 bootstr = "-a"; 138 break; 139 140 case AD_SBOOT: 141 bootstr = "-s"; 142 break; 143 144 case AD_SIBOOT: 145 bootstr = "-sa"; 146 break; 147 default: 148 cmn_err(CE_WARN, 149 "mdboot: invalid function %d", fcn); 150 bootstr = ""; 151 break; 152 } 153 } 154 if (fcn == AD_FASTREBOOT) { 155 pnode_t onode; 156 int dllen; 157 onode = prom_optionsnode(); 158 if ((onode == OBP_NONODE) || (onode == OBP_BADNODE)) { 159 cmn_err(CE_WARN, "Unable to set diag level for" 160 " quick reboot"); 161 } else { 162 dllen = prom_getproplen(onode, "diag-level"); 163 if (dllen != -1) { 164 int newstrlen; 165 char *newstr = kmem_alloc(strlen( 166 bootstr) + dllen + 5, KM_SLEEP); 167 (void) strcpy(newstr, bootstr); 168 (void) strcat(newstr, " -f "); 169 newstrlen = strlen(bootstr) + 4; 170 (void) prom_getprop(onode, "diag-level", 171 (caddr_t)&(newstr[newstrlen])); 172 newstr[newstrlen + dllen] = '\0'; 173 bootstr = newstr; 174 } 175 (void) prom_setprop(onode, "diag-level", 176 "off", 4); 177 } 178 } 179 reboot_machine(bootstr); 180 } 181 /* MAYBE REACHED */ 182 } 183 184 /* mdpreboot - may be called prior to mdboot while root fs still mounted */ 185 /*ARGSUSED*/ 186 void 187 mdpreboot(int cmd, int fcn, char *bootstr) 188 { 189 } 190 191 /* 192 * Halt the machine and then reboot with the device 193 * and arguments specified in bootstr. 194 */ 195 static void 196 reboot_machine(char *bootstr) 197 { 198 flush_windows(); 199 stop_other_cpus(); /* send stop signal to other CPUs */ 200 prom_printf("rebooting...\n"); 201 /* 202 * For platforms that use CPU signatures, we 203 * need to set the signature block to OS and 204 * the state to exiting for all the processors. 205 */ 206 CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_REBOOT, -1); 207 prom_reboot(bootstr); 208 /*NOTREACHED*/ 209 } 210 211 /* 212 * We use the x-trap mechanism and idle_stop_xcall() to stop the other CPUs. 213 * Once in panic_idle() they raise spl, record their location, and spin. 214 */ 215 static void 216 panic_idle(void) 217 { 218 cpu_async_panic_callb(); /* check for async errors */ 219 220 (void) spl7(); 221 222 debug_flush_windows(); 223 (void) setjmp(&curthread->t_pcb); 224 225 CPU->cpu_m.in_prom = 1; 226 membar_stld(); 227 228 dumpsys_helper(); 229 230 for (;;) 231 continue; 232 } 233 234 /* 235 * Force the other CPUs to trap into panic_idle(), and then remove them 236 * from the cpu_ready_set so they will no longer receive cross-calls. 237 */ 238 /*ARGSUSED*/ 239 void 240 panic_stopcpus(cpu_t *cp, kthread_t *t, int spl) 241 { 242 cpuset_t cps; 243 int i; 244 245 (void) splzs(); 246 CPUSET_ALL_BUT(cps, cp->cpu_id); 247 xt_some(cps, (xcfunc_t *)idle_stop_xcall, (uint64_t)&panic_idle, NULL); 248 249 for (i = 0; i < NCPU; i++) { 250 if (i != cp->cpu_id && CPU_XCALL_READY(i)) { 251 int ntries = 0x10000; 252 253 while (!cpu[i]->cpu_m.in_prom && ntries) { 254 DELAY(50); 255 ntries--; 256 } 257 258 if (!cpu[i]->cpu_m.in_prom) 259 printf("panic: failed to stop cpu%d\n", i); 260 261 cpu[i]->cpu_flags &= ~CPU_READY; 262 cpu[i]->cpu_flags |= CPU_QUIESCED; 263 CPUSET_DEL(cpu_ready_set, cpu[i]->cpu_id); 264 } 265 } 266 } 267 268 /* 269 * Platform callback following each entry to panicsys(). If we've panicked at 270 * level 14, we examine t_panic_trap to see if a fatal trap occurred. If so, 271 * we disable further %tick_cmpr interrupts. If not, an explicit call to panic 272 * was made and so we re-enqueue an interrupt request structure to allow 273 * further level 14 interrupts to be processed once we lower PIL. This allows 274 * us to handle panics from the deadman() CY_HIGH_LEVEL cyclic. 275 */ 276 void 277 panic_enter_hw(int spl) 278 { 279 if (spl == ipltospl(PIL_14)) { 280 uint_t opstate = disable_vec_intr(); 281 282 if (curthread->t_panic_trap != NULL) { 283 tickcmpr_disable(); 284 intr_dequeue_req(PIL_14, cbe_level14_inum); 285 } else { 286 if (!tickcmpr_disabled()) 287 intr_enqueue_req(PIL_14, cbe_level14_inum); 288 /* 289 * Clear SOFTINT<14>, SOFTINT<0> (TICK_INT) 290 * and SOFTINT<16> (STICK_INT) to indicate 291 * that the current level 14 has been serviced. 292 */ 293 wr_clr_softint((1 << PIL_14) | 294 TICK_INT_MASK | STICK_INT_MASK); 295 } 296 297 enable_vec_intr(opstate); 298 } 299 } 300 301 /* 302 * Miscellaneous hardware-specific code to execute after panicstr is set 303 * by the panic code: we also print and record PTL1 panic information here. 304 */ 305 /*ARGSUSED*/ 306 void 307 panic_quiesce_hw(panic_data_t *pdp) 308 { 309 extern uint_t getpstate(void); 310 extern void setpstate(uint_t); 311 312 #ifdef TRAPTRACE 313 /* 314 * Turn off TRAPTRACE and save the current %tick value in panic_tick. 315 */ 316 if (!panic_tick) 317 panic_tick = gettick(); 318 TRAPTRACE_FREEZE; 319 #endif 320 /* 321 * For Platforms that use CPU signatures, we 322 * need to set the signature block to OS, the state to 323 * exiting, and the substate to panic for all the processors. 324 */ 325 CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_PANIC, -1); 326 327 /* 328 * De-activate ECC functions and disable the watchdog timer now that 329 * we've made it through the critical part of the panic code. 330 */ 331 if (watchdog_enable) 332 (void) tod_ops.tod_clear_watchdog_timer(); 333 334 /* 335 * Disable further ECC errors from the CPU module and the bus nexus. 336 */ 337 cpu_disable_errors(); 338 (void) bus_func_invoke(BF_TYPE_ERRDIS); 339 340 /* 341 * Redirect all interrupts to the current CPU. 342 */ 343 intr_redist_all_cpus_shutdown(); 344 345 /* 346 * This call exists solely to support dumps to network 347 * devices after sync from OBP. 348 * 349 * If we came here via the sync callback, then on some 350 * platforms, interrupts may have arrived while we were 351 * stopped in OBP. OBP will arrange for those interrupts to 352 * be redelivered if you say "go", but not if you invoke a 353 * client callback like 'sync'. For some dump devices 354 * (network swap devices), we need interrupts to be 355 * delivered in order to dump, so we have to call the bus 356 * nexus driver to reset the interrupt state machines. 357 */ 358 (void) bus_func_invoke(BF_TYPE_RESINTR); 359 360 setpstate(getpstate() | PSTATE_IE); 361 } 362 363 /* 364 * Platforms that use CPU signatures need to set the signature block to OS and 365 * the state to exiting for all CPUs. PANIC_CONT indicates that we're about to 366 * write the crash dump, which tells the SSP/SMS to begin a timeout routine to 367 * reboot the machine if the dump never completes. 368 */ 369 /*ARGSUSED*/ 370 void 371 panic_dump_hw(int spl) 372 { 373 CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_DUMP, -1); 374 } 375 376 /* 377 * for ptl1_panic 378 */ 379 void 380 ptl1_init_cpu(struct cpu *cpu) 381 { 382 ptl1_state_t *pstate = &cpu->cpu_m.ptl1_state; 383 384 /*CONSTCOND*/ 385 if (sizeof (struct cpu) + PTL1_SSIZE > CPU_ALLOC_SIZE) { 386 panic("ptl1_init_cpu: not enough space left for ptl1_panic " 387 "stack, sizeof (struct cpu) = %lu", sizeof (struct cpu)); 388 } 389 390 pstate->ptl1_stktop = (uintptr_t)cpu + CPU_ALLOC_SIZE; 391 cpu_pa[cpu->cpu_id] = va_to_pa(cpu); 392 } 393 394 void 395 ptl1_panic_handler(ptl1_state_t *pstate) 396 { 397 static const char *ptl1_reasons[] = { 398 #ifdef PTL1_PANIC_DEBUG 399 "trap for debug purpose", /* PTL1_BAD_DEBUG */ 400 #else 401 "unknown trap", /* PTL1_BAD_DEBUG */ 402 #endif 403 "register window trap", /* PTL1_BAD_WTRAP */ 404 "kernel MMU miss", /* PTL1_BAD_KMISS */ 405 "kernel protection fault", /* PTL1_BAD_KPROT_FAULT */ 406 "ISM MMU miss", /* PTL1_BAD_ISM */ 407 "kernel MMU trap", /* PTL1_BAD_MMUTRAP */ 408 "kernel trap handler state", /* PTL1_BAD_TRAP */ 409 "floating point trap", /* PTL1_BAD_FPTRAP */ 410 #ifdef DEBUG 411 "pointer to intr_vec", /* PTL1_BAD_INTR_VEC */ 412 #else 413 "unknown trap", /* PTL1_BAD_INTR_VEC */ 414 #endif 415 #ifdef TRAPTRACE 416 "TRACE_PTR state", /* PTL1_BAD_TRACE_PTR */ 417 #else 418 "unknown trap", /* PTL1_BAD_TRACE_PTR */ 419 #endif 420 "stack overflow", /* PTL1_BAD_STACK */ 421 "DTrace flags", /* PTL1_BAD_DTRACE_FLAGS */ 422 "attempt to steal locked ctx", /* PTL1_BAD_CTX_STEAL */ 423 "CPU ECC error loop", /* PTL1_BAD_ECC */ 424 "non-kernel context in sys/priv_trap() below or", 425 /* PTL1_BAD_CTX */ 426 "error raising a TSB exception", /* PTL1_BAD_RAISE_TSBEXCP */ 427 "missing shared TSB" /* PTL1_NO_SCDTSB8K */ 428 }; 429 430 uint_t reason = pstate->ptl1_regs.ptl1_g1; 431 uint_t tl = pstate->ptl1_regs.ptl1_trap_regs[0].ptl1_tl; 432 struct panic_trap_info ti = { 0 }; 433 434 /* 435 * Use trap_info for a place holder to call panic_savetrap() and 436 * panic_showtrap() to save and print out ptl1_panic information. 437 */ 438 if (curthread->t_panic_trap == NULL) 439 curthread->t_panic_trap = &ti; 440 441 if (reason < sizeof (ptl1_reasons) / sizeof (ptl1_reasons[0])) 442 panic("bad %s at TL %u", ptl1_reasons[reason], tl); 443 else 444 panic("ptl1_panic reason 0x%x at TL %u", reason, tl); 445 } 446 447 void 448 clear_watchdog_on_exit() 449 { 450 /* 451 * Only shut down an active hardware watchdog timer if the platform 452 * has expressed an interest to. 453 */ 454 if (disable_watchdog_on_exit && watchdog_activated) { 455 prom_printf("Debugging requested; hardware watchdog " 456 "disabled; reboot to re-enable.\n"); 457 cmn_err(CE_WARN, "!Debugging requested; hardware watchdog " 458 "disabled; reboot to re-enable."); 459 mutex_enter(&tod_lock); 460 (void) tod_ops.tod_clear_watchdog_timer(); 461 mutex_exit(&tod_lock); 462 } 463 } 464 465 /* 466 * This null routine is only used by sun4v watchdog timer support. 467 */ 468 void 469 restore_watchdog_on_entry(void) 470 { 471 } 472 473 int 474 kdi_watchdog_disable(void) 475 { 476 if (watchdog_activated) { 477 mutex_enter(&tod_lock); 478 (void) tod_ops.tod_clear_watchdog_timer(); 479 mutex_exit(&tod_lock); 480 } 481 482 return (watchdog_activated); 483 } 484 485 void 486 kdi_watchdog_restore(void) 487 { 488 if (watchdog_enable) { 489 mutex_enter(&tod_lock); 490 (void) tod_ops.tod_set_watchdog_timer(watchdog_timeout_seconds); 491 mutex_exit(&tod_lock); 492 } 493 } 494 495 /*ARGSUSED*/ 496 void 497 mach_dump_buffer_init(void) 498 { 499 /* 500 * setup dump buffer to store extra crash information 501 * not applicable to sun4u 502 */ 503 } 504 505 /* 506 * xt_sync - wait for previous x-traps to finish 507 */ 508 void 509 xt_sync(cpuset_t cpuset) 510 { 511 kpreempt_disable(); 512 CPUSET_DEL(cpuset, CPU->cpu_id); 513 CPUSET_AND(cpuset, cpu_ready_set); 514 xt_some(cpuset, (xcfunc_t *)xt_sync_tl1, 0, 0); 515 kpreempt_enable(); 516 } 517 518 /* 519 * mach_soft_state_init() - dummy routine for sun4v soft state 520 */ 521 void 522 mach_soft_state_init(void) 523 {} 524