1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/t_lock.h> 31 #include <sys/uadmin.h> 32 #include <sys/panic.h> 33 #include <sys/reboot.h> 34 #include <sys/autoconf.h> 35 #include <sys/machsystm.h> 36 #include <sys/promif.h> 37 #include <sys/membar.h> 38 #include <vm/hat_sfmmu.h> 39 #include <sys/cpu_module.h> 40 #include <sys/cpu_sgnblk_defs.h> 41 #include <sys/intreg.h> 42 #include <sys/consdev.h> 43 #include <sys/kdi_impl.h> 44 #include <sys/callb.h> 45 46 #ifdef TRAPTRACE 47 #include <sys/traptrace.h> 48 u_longlong_t panic_tick; 49 #endif /* TRAPTRACE */ 50 51 extern u_longlong_t gettick(); 52 static void reboot_machine(char *); 53 extern int disable_watchdog_on_exit; 54 55 /* 56 * Machine dependent code to reboot. 57 * "mdep" is interpreted as a character pointer; if non-null, it is a pointer 58 * to a string to be used as the argument string when rebooting. 59 * 60 * "invoke_cb" is a boolean. It is set to true when mdboot() can safely 61 * invoke CB_CL_MDBOOT callbacks before shutting the system down, i.e. when 62 * we are in a normal shutdown sequence (interrupts are not blocked, the 63 * system is not panic'ing or being suspended). 64 */ 65 /*ARGSUSED*/ 66 void 67 mdboot(int cmd, int fcn, char *bootstr, boolean_t invoke_cb) 68 { 69 page_t *first, *pp; 70 extern void pm_cfb_check_and_powerup(void); 71 72 /* 73 * Disable the hw watchdog timer. 74 */ 75 if (disable_watchdog_on_exit && watchdog_activated) { 76 mutex_enter(&tod_lock); 77 (void) tod_ops.tod_clear_watchdog_timer(); 78 mutex_exit(&tod_lock); 79 } 80 81 /* 82 * Clear any unresolved UEs from memory. We rely on the fact that on 83 * sun4u, pagezero() will always clear UEs. Since we're rebooting, we 84 * just force p_selock to appear locked so pagezero()'s assert works. 85 * 86 * Pages that were retired successfully due to multiple CEs will 87 * also be cleared. 88 */ 89 if (memsegs != NULL) { 90 pp = first = page_first(); 91 do { 92 if (page_isretired(pp) || page_istoxic(pp)) { 93 /* pagezero asserts PAGE_LOCKED */ 94 pp->p_selock = -1; 95 pagezero(pp, 0, PAGESIZE); 96 } 97 } while ((pp = page_next(pp)) != first); 98 } 99 100 /* 101 * XXX - rconsvp is set to NULL to ensure that output messages 102 * are sent to the underlying "hardware" device using the 103 * monitor's printf routine since we are in the process of 104 * either rebooting or halting the machine. 105 */ 106 rconsvp = NULL; 107 108 /* 109 * At a high interrupt level we can't: 110 * 1) bring up the console 111 * or 112 * 2) wait for pending interrupts prior to redistribution 113 * to the current CPU 114 * 115 * so we do them now. 116 */ 117 pm_cfb_check_and_powerup(); 118 119 /* make sure there are no more changes to the device tree */ 120 devtree_freeze(); 121 122 if (invoke_cb) 123 (void) callb_execute_class(CB_CL_MDBOOT, NULL); 124 125 /* 126 * stop other cpus which also raise our priority. since there is only 127 * one active cpu after this, and our priority will be too high 128 * for us to be preempted, we're essentially single threaded 129 * from here on out. 130 */ 131 stop_other_cpus(); 132 133 /* 134 * try and reset leaf devices. reset_leaves() should only 135 * be called when there are no other threads that could be 136 * accessing devices 137 */ 138 reset_leaves(); 139 140 if (fcn == AD_HALT) { 141 halt((char *)NULL); 142 } else if (fcn == AD_POWEROFF) { 143 power_down(NULL); 144 } else { 145 if (bootstr == NULL) { 146 switch (fcn) { 147 148 case AD_BOOT: 149 bootstr = ""; 150 break; 151 152 case AD_IBOOT: 153 bootstr = "-a"; 154 break; 155 156 case AD_SBOOT: 157 bootstr = "-s"; 158 break; 159 160 case AD_SIBOOT: 161 bootstr = "-sa"; 162 break; 163 default: 164 cmn_err(CE_WARN, 165 "mdboot: invalid function %d", fcn); 166 bootstr = ""; 167 break; 168 } 169 } 170 reboot_machine(bootstr); 171 } 172 /* MAYBE REACHED */ 173 } 174 175 /* mdpreboot - may be called prior to mdboot while root fs still mounted */ 176 /*ARGSUSED*/ 177 void 178 mdpreboot(int cmd, int fcn, char *bootstr) 179 { 180 } 181 182 /* 183 * Halt the machine and then reboot with the device 184 * and arguments specified in bootstr. 185 */ 186 static void 187 reboot_machine(char *bootstr) 188 { 189 flush_windows(); 190 stop_other_cpus(); /* send stop signal to other CPUs */ 191 prom_printf("rebooting...\n"); 192 /* 193 * For platforms that use CPU signatures, we 194 * need to set the signature block to OS and 195 * the state to exiting for all the processors. 196 */ 197 CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_REBOOT, -1); 198 prom_reboot(bootstr); 199 /*NOTREACHED*/ 200 } 201 202 /* 203 * We use the x-trap mechanism and idle_stop_xcall() to stop the other CPUs. 204 * Once in panic_idle() they raise spl, record their location, and spin. 205 */ 206 static void 207 panic_idle(void) 208 { 209 cpu_async_panic_callb(); /* check for async errors */ 210 211 (void) spl7(); 212 213 debug_flush_windows(); 214 (void) setjmp(&curthread->t_pcb); 215 216 CPU->cpu_m.in_prom = 1; 217 membar_stld(); 218 219 for (;;); 220 } 221 222 /* 223 * Force the other CPUs to trap into panic_idle(), and then remove them 224 * from the cpu_ready_set so they will no longer receive cross-calls. 225 */ 226 /*ARGSUSED*/ 227 void 228 panic_stopcpus(cpu_t *cp, kthread_t *t, int spl) 229 { 230 cpuset_t cps; 231 int i; 232 233 (void) splzs(); 234 CPUSET_ALL_BUT(cps, cp->cpu_id); 235 xt_some(cps, (xcfunc_t *)idle_stop_xcall, (uint64_t)&panic_idle, NULL); 236 237 for (i = 0; i < NCPU; i++) { 238 if (i != cp->cpu_id && CPU_XCALL_READY(i)) { 239 int ntries = 0x10000; 240 241 while (!cpu[i]->cpu_m.in_prom && ntries) { 242 DELAY(50); 243 ntries--; 244 } 245 246 if (!cpu[i]->cpu_m.in_prom) 247 printf("panic: failed to stop cpu%d\n", i); 248 249 cpu[i]->cpu_flags &= ~CPU_READY; 250 cpu[i]->cpu_flags |= CPU_QUIESCED; 251 CPUSET_DEL(cpu_ready_set, cpu[i]->cpu_id); 252 } 253 } 254 } 255 256 /* 257 * Platform callback following each entry to panicsys(). If we've panicked at 258 * level 14, we examine t_panic_trap to see if a fatal trap occurred. If so, 259 * we disable further %tick_cmpr interrupts. If not, an explicit call to panic 260 * was made and so we re-enqueue an interrupt request structure to allow 261 * further level 14 interrupts to be processed once we lower PIL. This allows 262 * us to handle panics from the deadman() CY_HIGH_LEVEL cyclic. 263 */ 264 void 265 panic_enter_hw(int spl) 266 { 267 if (spl == ipltospl(PIL_14)) { 268 uint_t opstate = disable_vec_intr(); 269 270 if (curthread->t_panic_trap != NULL) { 271 tickcmpr_disable(); 272 intr_dequeue_req(PIL_14, cbe_level14_inum); 273 } else { 274 if (!tickcmpr_disabled()) 275 intr_enqueue_req(PIL_14, cbe_level14_inum); 276 /* 277 * Clear SOFTINT<14>, SOFTINT<0> (TICK_INT) 278 * and SOFTINT<16> (STICK_INT) to indicate 279 * that the current level 14 has been serviced. 280 */ 281 wr_clr_softint((1 << PIL_14) | 282 TICK_INT_MASK | STICK_INT_MASK); 283 } 284 285 enable_vec_intr(opstate); 286 } 287 } 288 289 /* 290 * Miscellaneous hardware-specific code to execute after panicstr is set 291 * by the panic code: we also print and record PTL1 panic information here. 292 */ 293 /*ARGSUSED*/ 294 void 295 panic_quiesce_hw(panic_data_t *pdp) 296 { 297 extern uint_t getpstate(void); 298 extern void setpstate(uint_t); 299 300 #ifdef TRAPTRACE 301 /* 302 * Turn off TRAPTRACE and save the current %tick value in panic_tick. 303 */ 304 if (!panic_tick) 305 panic_tick = gettick(); 306 TRAPTRACE_FREEZE; 307 #endif 308 /* 309 * For Platforms that use CPU signatures, we 310 * need to set the signature block to OS, the state to 311 * exiting, and the substate to panic for all the processors. 312 */ 313 CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_PANIC, -1); 314 315 /* 316 * De-activate ECC functions and disable the watchdog timer now that 317 * we've made it through the critical part of the panic code. 318 */ 319 if (watchdog_enable) 320 (void) tod_ops.tod_clear_watchdog_timer(); 321 322 /* 323 * Disable further ECC errors from the CPU module and the bus nexus. 324 */ 325 cpu_disable_errors(); 326 (void) bus_func_invoke(BF_TYPE_ERRDIS); 327 328 /* 329 * Redirect all interrupts to the current CPU. 330 */ 331 intr_redist_all_cpus_shutdown(); 332 333 /* 334 * This call exists solely to support dumps to network 335 * devices after sync from OBP. 336 * 337 * If we came here via the sync callback, then on some 338 * platforms, interrupts may have arrived while we were 339 * stopped in OBP. OBP will arrange for those interrupts to 340 * be redelivered if you say "go", but not if you invoke a 341 * client callback like 'sync'. For some dump devices 342 * (network swap devices), we need interrupts to be 343 * delivered in order to dump, so we have to call the bus 344 * nexus driver to reset the interrupt state machines. 345 */ 346 (void) bus_func_invoke(BF_TYPE_RESINTR); 347 348 setpstate(getpstate() | PSTATE_IE); 349 } 350 351 /* 352 * Platforms that use CPU signatures need to set the signature block to OS and 353 * the state to exiting for all CPUs. PANIC_CONT indicates that we're about to 354 * write the crash dump, which tells the SSP/SMS to begin a timeout routine to 355 * reboot the machine if the dump never completes. 356 */ 357 /*ARGSUSED*/ 358 void 359 panic_dump_hw(int spl) 360 { 361 CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_DUMP, -1); 362 } 363 364 /* 365 * for ptl1_panic 366 */ 367 void 368 ptl1_init_cpu(struct cpu *cpu) 369 { 370 ptl1_state_t *pstate = &cpu->cpu_m.ptl1_state; 371 372 /*CONSTCOND*/ 373 if (sizeof (struct cpu) + PTL1_SSIZE > CPU_ALLOC_SIZE) { 374 panic("ptl1_init_cpu: not enough space left for ptl1_panic " 375 "stack, sizeof (struct cpu) = %d", sizeof (struct cpu)); 376 } 377 378 pstate->ptl1_stktop = (uintptr_t)cpu + CPU_ALLOC_SIZE; 379 cpu_pa[cpu->cpu_id] = va_to_pa(cpu); 380 } 381 382 void 383 ptl1_panic_handler(ptl1_state_t *pstate) 384 { 385 static const char *ptl1_reasons[] = { 386 #ifdef PTL1_PANIC_DEBUG 387 "trap for debug purpose", /* PTL1_BAD_DEBUG */ 388 #else 389 "unknown trap", /* PTL1_BAD_DEBUG */ 390 #endif 391 "register window trap", /* PTL1_BAD_WTRAP */ 392 "kernel MMU miss", /* PTL1_BAD_KMISS */ 393 "kernel protection fault", /* PTL1_BAD_KPROT_FAULT */ 394 "ISM MMU miss", /* PTL1_BAD_ISM */ 395 "kernel MMU trap", /* PTL1_BAD_MMUTRAP */ 396 "kernel trap handler state", /* PTL1_BAD_TRAP */ 397 "floating point trap", /* PTL1_BAD_FPTRAP */ 398 #ifdef DEBUG 399 "pointer to intr_req", /* PTL1_BAD_INTR_REQ */ 400 #else 401 "unknown trap", /* PTL1_BAD_INTR_REQ */ 402 #endif 403 #ifdef TRAPTRACE 404 "TRACE_PTR state", /* PTL1_BAD_TRACE_PTR */ 405 #else 406 "unknown trap", /* PTL1_BAD_TRACE_PTR */ 407 #endif 408 "stack overflow", /* PTL1_BAD_STACK */ 409 "DTrace flags", /* PTL1_BAD_DTRACE_FLAGS */ 410 "attempt to steal locked ctx", /* PTL1_BAD_CTX_STEAL */ 411 "CPU ECC error loop", /* PTL1_BAD_ECC */ 412 "unknown trap", /* PTL1_BAD_HCALL */ 413 }; 414 415 uint_t reason = pstate->ptl1_regs.ptl1_g1; 416 uint_t tl = pstate->ptl1_regs.ptl1_trap_regs[0].ptl1_tl; 417 struct trap_info ti = { 0 }; 418 419 /* 420 * Use trap_info for a place holder to call panic_savetrap() and 421 * panic_showtrap() to save and print out ptl1_panic information. 422 */ 423 if (curthread->t_panic_trap == NULL) 424 curthread->t_panic_trap = &ti; 425 426 if (reason < sizeof (ptl1_reasons) / sizeof (ptl1_reasons[0])) 427 panic("bad %s at TL %u", ptl1_reasons[reason], tl); 428 else 429 panic("ptl1_panic reason 0x%x at TL %u", reason, tl); 430 } 431 432 void 433 clear_watchdog_on_exit() 434 { 435 /* 436 * Only shut down an active hardware watchdog timer if the platform 437 * has expressed an interest to. 438 */ 439 if (disable_watchdog_on_exit && watchdog_activated) { 440 prom_printf("Debugging requested; hardware watchdog " 441 "disabled; reboot to re-enable.\n"); 442 cmn_err(CE_WARN, "!Debugging requested; hardware watchdog " 443 "disabled; reboot to re-enable."); 444 mutex_enter(&tod_lock); 445 (void) tod_ops.tod_clear_watchdog_timer(); 446 mutex_exit(&tod_lock); 447 } 448 } 449 450 int 451 kdi_watchdog_disable(void) 452 { 453 if (watchdog_activated) { 454 mutex_enter(&tod_lock); 455 (void) tod_ops.tod_clear_watchdog_timer(); 456 mutex_exit(&tod_lock); 457 } 458 459 return (watchdog_activated); 460 } 461 462 void 463 kdi_watchdog_restore(void) 464 { 465 if (watchdog_enable) { 466 mutex_enter(&tod_lock); 467 (void) tod_ops.tod_set_watchdog_timer(watchdog_timeout_seconds); 468 mutex_exit(&tod_lock); 469 } 470 } 471 472 /*ARGSUSED*/ 473 void 474 mach_dump_buffer_init(void) 475 { 476 /* 477 * setup dump buffer to store extra crash information 478 * not applicable to sun4u 479 */ 480 } 481 482 /* 483 * xt_sync - wait for previous x-traps to finish 484 */ 485 void 486 xt_sync(cpuset_t cpuset) 487 { 488 kpreempt_disable(); 489 CPUSET_DEL(cpuset, CPU->cpu_id); 490 CPUSET_AND(cpuset, cpu_ready_set); 491 xt_some(cpuset, (xcfunc_t *)xt_sync_tl1, 0, 0); 492 kpreempt_enable(); 493 } 494