1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/t_lock.h> 31 #include <sys/uadmin.h> 32 #include <sys/panic.h> 33 #include <sys/reboot.h> 34 #include <sys/autoconf.h> 35 #include <sys/machsystm.h> 36 #include <sys/promif.h> 37 #include <sys/membar.h> 38 #include <vm/hat_sfmmu.h> 39 #include <sys/cpu_module.h> 40 #include <sys/cpu_sgnblk_defs.h> 41 #include <sys/intreg.h> 42 #include <sys/consdev.h> 43 #include <sys/kdi_impl.h> 44 45 #ifdef TRAPTRACE 46 #include <sys/traptrace.h> 47 u_longlong_t panic_tick; 48 #endif /* TRAPTRACE */ 49 50 extern u_longlong_t gettick(); 51 static void reboot_machine(char *); 52 extern int disable_watchdog_on_exit; 53 54 /* 55 * Machine dependent code to reboot. 56 * "mdep" is interpreted as a character pointer; if non-null, it is a pointer 57 * to a string to be used as the argument string when rebooting. 58 */ 59 /*ARGSUSED*/ 60 void 61 mdboot(int cmd, int fcn, char *bootstr) 62 { 63 page_t *first, *pp; 64 extern void pm_cfb_check_and_powerup(void); 65 66 /* 67 * Disable the hw watchdog timer. 68 */ 69 if (disable_watchdog_on_exit && watchdog_activated) { 70 mutex_enter(&tod_lock); 71 (void) tod_ops.tod_clear_watchdog_timer(); 72 mutex_exit(&tod_lock); 73 } 74 75 /* 76 * Clear any unresolved UEs from memory. We rely on the fact that on 77 * sun4u, pagezero() will always clear UEs. Since we're rebooting, we 78 * just force p_selock to appear locked so pagezero()'s assert works. 79 * 80 * Pages that were retired successfully due to multiple CEs will 81 * also be cleared. 82 */ 83 if (memsegs != NULL) { 84 pp = first = page_first(); 85 do { 86 if (page_isretired(pp) || page_istoxic(pp)) { 87 /* pagezero asserts PAGE_LOCKED */ 88 pp->p_selock = -1; 89 pagezero(pp, 0, PAGESIZE); 90 } 91 } while ((pp = page_next(pp)) != first); 92 } 93 94 /* 95 * XXX - rconsvp is set to NULL to ensure that output messages 96 * are sent to the underlying "hardware" device using the 97 * monitor's printf routine since we are in the process of 98 * either rebooting or halting the machine. 99 */ 100 rconsvp = NULL; 101 102 /* 103 * At a high interrupt level we can't: 104 * 1) bring up the console 105 * or 106 * 2) wait for pending interrupts prior to redistribution 107 * to the current CPU 108 * 109 * so we do them now. 110 */ 111 pm_cfb_check_and_powerup(); 112 113 /* make sure there are no more changes to the device tree */ 114 devtree_freeze(); 115 116 /* 117 * stop other cpus which also raise our priority. since there is only 118 * one active cpu after this, and our priority will be too high 119 * for us to be preempted, we're essentially single threaded 120 * from here on out. 121 */ 122 stop_other_cpus(); 123 124 /* 125 * try and reset leaf devices. reset_leaves() should only 126 * be called when there are no other threads that could be 127 * accessing devices 128 */ 129 reset_leaves(); 130 131 if (fcn == AD_HALT) { 132 halt((char *)NULL); 133 } else if (fcn == AD_POWEROFF) { 134 power_down(NULL); 135 } else { 136 if (bootstr == NULL) { 137 switch (fcn) { 138 139 case AD_BOOT: 140 bootstr = ""; 141 break; 142 143 case AD_IBOOT: 144 bootstr = "-a"; 145 break; 146 147 case AD_SBOOT: 148 bootstr = "-s"; 149 break; 150 151 case AD_SIBOOT: 152 bootstr = "-sa"; 153 break; 154 default: 155 cmn_err(CE_WARN, 156 "mdboot: invalid function %d", fcn); 157 bootstr = ""; 158 break; 159 } 160 } 161 reboot_machine(bootstr); 162 } 163 /* MAYBE REACHED */ 164 } 165 166 /* mdpreboot - may be called prior to mdboot while root fs still mounted */ 167 /*ARGSUSED*/ 168 void 169 mdpreboot(int cmd, int fcn, char *bootstr) 170 { 171 } 172 173 /* 174 * Halt the machine and then reboot with the device 175 * and arguments specified in bootstr. 176 */ 177 static void 178 reboot_machine(char *bootstr) 179 { 180 flush_windows(); 181 stop_other_cpus(); /* send stop signal to other CPUs */ 182 prom_printf("rebooting...\n"); 183 /* 184 * For platforms that use CPU signatures, we 185 * need to set the signature block to OS and 186 * the state to exiting for all the processors. 187 */ 188 CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_REBOOT, -1); 189 prom_reboot(bootstr); 190 /*NOTREACHED*/ 191 } 192 193 /* 194 * We use the x-trap mechanism and idle_stop_xcall() to stop the other CPUs. 195 * Once in panic_idle() they raise spl, record their location, and spin. 196 */ 197 static void 198 panic_idle(void) 199 { 200 cpu_async_panic_callb(); /* check for async errors */ 201 202 (void) spl7(); 203 204 debug_flush_windows(); 205 (void) setjmp(&curthread->t_pcb); 206 207 CPU->cpu_m.in_prom = 1; 208 membar_stld(); 209 210 for (;;); 211 } 212 213 /* 214 * Force the other CPUs to trap into panic_idle(), and then remove them 215 * from the cpu_ready_set so they will no longer receive cross-calls. 216 */ 217 /*ARGSUSED*/ 218 void 219 panic_stopcpus(cpu_t *cp, kthread_t *t, int spl) 220 { 221 cpuset_t cps; 222 int i; 223 224 (void) splzs(); 225 CPUSET_ALL_BUT(cps, cp->cpu_id); 226 xt_some(cps, (xcfunc_t *)idle_stop_xcall, (uint64_t)&panic_idle, NULL); 227 228 for (i = 0; i < NCPU; i++) { 229 if (i != cp->cpu_id && CPU_XCALL_READY(i)) { 230 int ntries = 0x10000; 231 232 while (!cpu[i]->cpu_m.in_prom && ntries) { 233 DELAY(50); 234 ntries--; 235 } 236 237 if (!cpu[i]->cpu_m.in_prom) 238 printf("panic: failed to stop cpu%d\n", i); 239 240 cpu[i]->cpu_flags &= ~CPU_READY; 241 cpu[i]->cpu_flags |= CPU_QUIESCED; 242 CPUSET_DEL(cpu_ready_set, cpu[i]->cpu_id); 243 } 244 } 245 } 246 247 /* 248 * Platform callback following each entry to panicsys(). If we've panicked at 249 * level 14, we examine t_panic_trap to see if a fatal trap occurred. If so, 250 * we disable further %tick_cmpr interrupts. If not, an explicit call to panic 251 * was made and so we re-enqueue an interrupt request structure to allow 252 * further level 14 interrupts to be processed once we lower PIL. This allows 253 * us to handle panics from the deadman() CY_HIGH_LEVEL cyclic. 254 */ 255 void 256 panic_enter_hw(int spl) 257 { 258 if (spl == ipltospl(PIL_14)) { 259 uint_t opstate = disable_vec_intr(); 260 261 if (curthread->t_panic_trap != NULL) { 262 tickcmpr_disable(); 263 intr_dequeue_req(PIL_14, cbe_level14_inum); 264 } else { 265 if (!tickcmpr_disabled()) 266 intr_enqueue_req(PIL_14, cbe_level14_inum); 267 /* 268 * Clear SOFTINT<14>, SOFTINT<0> (TICK_INT) 269 * and SOFTINT<16> (STICK_INT) to indicate 270 * that the current level 14 has been serviced. 271 */ 272 wr_clr_softint((1 << PIL_14) | 273 TICK_INT_MASK | STICK_INT_MASK); 274 } 275 276 enable_vec_intr(opstate); 277 } 278 } 279 280 /* 281 * Miscellaneous hardware-specific code to execute after panicstr is set 282 * by the panic code: we also print and record PTL1 panic information here. 283 */ 284 /*ARGSUSED*/ 285 void 286 panic_quiesce_hw(panic_data_t *pdp) 287 { 288 extern uint_t getpstate(void); 289 extern void setpstate(uint_t); 290 291 #ifdef TRAPTRACE 292 /* 293 * Turn off TRAPTRACE and save the current %tick value in panic_tick. 294 */ 295 if (!panic_tick) 296 panic_tick = gettick(); 297 TRAPTRACE_FREEZE; 298 #endif 299 /* 300 * For Platforms that use CPU signatures, we 301 * need to set the signature block to OS, the state to 302 * exiting, and the substate to panic for all the processors. 303 */ 304 CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_PANIC, -1); 305 306 /* 307 * De-activate ECC functions and disable the watchdog timer now that 308 * we've made it through the critical part of the panic code. 309 */ 310 if (watchdog_enable) 311 (void) tod_ops.tod_clear_watchdog_timer(); 312 313 /* 314 * Disable further ECC errors from the CPU module and the bus nexus. 315 */ 316 cpu_disable_errors(); 317 (void) bus_func_invoke(BF_TYPE_ERRDIS); 318 319 /* 320 * Redirect all interrupts to the current CPU. 321 */ 322 intr_redist_all_cpus_shutdown(); 323 324 /* 325 * This call exists solely to support dumps to network 326 * devices after sync from OBP. 327 * 328 * If we came here via the sync callback, then on some 329 * platforms, interrupts may have arrived while we were 330 * stopped in OBP. OBP will arrange for those interrupts to 331 * be redelivered if you say "go", but not if you invoke a 332 * client callback like 'sync'. For some dump devices 333 * (network swap devices), we need interrupts to be 334 * delivered in order to dump, so we have to call the bus 335 * nexus driver to reset the interrupt state machines. 336 */ 337 (void) bus_func_invoke(BF_TYPE_RESINTR); 338 339 setpstate(getpstate() | PSTATE_IE); 340 } 341 342 /* 343 * Platforms that use CPU signatures need to set the signature block to OS and 344 * the state to exiting for all CPUs. PANIC_CONT indicates that we're about to 345 * write the crash dump, which tells the SSP/SMS to begin a timeout routine to 346 * reboot the machine if the dump never completes. 347 */ 348 /*ARGSUSED*/ 349 void 350 panic_dump_hw(int spl) 351 { 352 CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_DUMP, -1); 353 } 354 355 /* 356 * for ptl1_panic 357 */ 358 void 359 ptl1_init_cpu(struct cpu *cpu) 360 { 361 ptl1_state_t *pstate = &cpu->cpu_m.ptl1_state; 362 363 /*CONSTCOND*/ 364 if (sizeof (struct cpu) + PTL1_SSIZE > CPU_ALLOC_SIZE) { 365 panic("ptl1_init_cpu: not enough space left for ptl1_panic " 366 "stack, sizeof (struct cpu) = %d", sizeof (struct cpu)); 367 } 368 369 pstate->ptl1_stktop = (uintptr_t)cpu + CPU_ALLOC_SIZE; 370 cpu_pa[cpu->cpu_id] = va_to_pa(cpu); 371 } 372 373 void 374 ptl1_panic_handler(ptl1_state_t *pstate) 375 { 376 static const char *ptl1_reasons[] = { 377 #ifdef PTL1_PANIC_DEBUG 378 "trap for debug purpose", /* PTL1_BAD_DEBUG */ 379 #else 380 "unknown trap", /* PTL1_BAD_DEBUG */ 381 #endif 382 "register window trap", /* PTL1_BAD_WTRAP */ 383 "kernel MMU miss", /* PTL1_BAD_KMISS */ 384 "kernel protection fault", /* PTL1_BAD_KPROT_FAULT */ 385 "ISM MMU miss", /* PTL1_BAD_ISM */ 386 "kernel MMU trap", /* PTL1_BAD_MMUTRAP */ 387 "kernel trap handler state", /* PTL1_BAD_TRAP */ 388 "floating point trap", /* PTL1_BAD_FPTRAP */ 389 #ifdef DEBUG 390 "pointer to intr_req", /* PTL1_BAD_INTR_REQ */ 391 #else 392 "unknown trap", /* PTL1_BAD_INTR_REQ */ 393 #endif 394 #ifdef TRAPTRACE 395 "TRACE_PTR state", /* PTL1_BAD_TRACE_PTR */ 396 #else 397 "unknown trap", /* PTL1_BAD_TRACE_PTR */ 398 #endif 399 "stack overflow", /* PTL1_BAD_STACK */ 400 "DTrace flags", /* PTL1_BAD_DTRACE_FLAGS */ 401 "attempt to steal locked ctx", /* PTL1_BAD_CTX_STEAL */ 402 "CPU ECC error loop", /* PTL1_BAD_ECC */ 403 "unknown trap", /* PTL1_BAD_HCALL */ 404 }; 405 406 uint_t reason = pstate->ptl1_regs.ptl1_g1; 407 uint_t tl = pstate->ptl1_regs.ptl1_trap_regs[0].ptl1_tl; 408 struct trap_info ti = { 0 }; 409 410 /* 411 * Use trap_info for a place holder to call panic_savetrap() and 412 * panic_showtrap() to save and print out ptl1_panic information. 413 */ 414 if (curthread->t_panic_trap == NULL) 415 curthread->t_panic_trap = &ti; 416 417 if (reason < sizeof (ptl1_reasons) / sizeof (ptl1_reasons[0])) 418 panic("bad %s at TL %u", ptl1_reasons[reason], tl); 419 else 420 panic("ptl1_panic reason 0x%x at TL %u", reason, tl); 421 } 422 423 void 424 clear_watchdog_on_exit() 425 { 426 /* 427 * Only shut down an active hardware watchdog timer if the platform 428 * has expressed an interest to. 429 */ 430 if (disable_watchdog_on_exit && watchdog_activated) { 431 prom_printf("Debugging requested; hardware watchdog " 432 "disabled; reboot to re-enable.\n"); 433 cmn_err(CE_WARN, "!Debugging requested; hardware watchdog " 434 "disabled; reboot to re-enable."); 435 mutex_enter(&tod_lock); 436 (void) tod_ops.tod_clear_watchdog_timer(); 437 mutex_exit(&tod_lock); 438 } 439 } 440 441 int 442 kdi_watchdog_disable(void) 443 { 444 if (watchdog_activated) { 445 mutex_enter(&tod_lock); 446 (void) tod_ops.tod_clear_watchdog_timer(); 447 mutex_exit(&tod_lock); 448 } 449 450 return (watchdog_activated); 451 } 452 453 void 454 kdi_watchdog_restore(void) 455 { 456 if (watchdog_enable) { 457 mutex_enter(&tod_lock); 458 (void) tod_ops.tod_set_watchdog_timer(watchdog_timeout_seconds); 459 mutex_exit(&tod_lock); 460 } 461 } 462 463 /*ARGSUSED*/ 464 void 465 mach_dump_buffer_init(void) 466 { 467 /* 468 * setup dump buffer to store extra crash information 469 * not applicable to sun4u 470 */ 471 } 472 473 /* 474 * xt_sync - wait for previous x-traps to finish 475 */ 476 void 477 xt_sync(cpuset_t cpuset) 478 { 479 kpreempt_disable(); 480 CPUSET_DEL(cpuset, CPU->cpu_id); 481 CPUSET_AND(cpuset, cpu_ready_set); 482 xt_some(cpuset, (xcfunc_t *)xt_sync_tl1, 0, 0); 483 kpreempt_enable(); 484 } 485