1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * The main CPU-control loops, used to control masters and slaves. 30 */ 31 32 #include <sys/types.h> 33 34 #include <kmdb/kaif.h> 35 #include <kmdb/kaif_start.h> 36 #include <kmdb/kmdb_asmutil.h> 37 #include <kmdb/kmdb_dpi_impl.h> 38 #include <kmdb/kmdb_kdi.h> 39 40 #define KAIF_SLAVE_CMD_SPIN 0 41 #define KAIF_SLAVE_CMD_SWITCH 1 42 #define KAIF_SLAVE_CMD_RESUME 2 43 #define KAIF_SLAVE_CMD_FLUSH 3 44 #define KAIF_SLAVE_CMD_REBOOT 4 45 #if defined(__sparc) 46 #define KAIF_SLAVE_CMD_ACK 5 47 #endif 48 49 50 /* 51 * Used to synchronize attempts to set kaif_master_cpuid. kaif_master_cpuid may 52 * be read without kaif_master_lock, and may be written by the current master 53 * CPU. 54 */ 55 int kaif_master_cpuid = KAIF_MASTER_CPUID_UNSET; 56 static uintptr_t kaif_master_lock = 0; 57 58 /* 59 * Used to ensure that all CPUs leave the debugger together. kaif_loop_lock must 60 * be held to write kaif_looping, but need not be held to read it. 61 */ 62 static volatile uint_t kaif_looping; 63 static uintptr_t kaif_loop_lock; 64 65 static volatile int kaif_slave_cmd; 66 static volatile int kaif_slave_tgt; /* target cpuid for CMD_SWITCH */ 67 68 static void 69 kaif_lock_enter(uintptr_t *lock) 70 { 71 while (cas(lock, 0, 1) != 0) 72 continue; 73 membar_producer(); 74 } 75 76 static void 77 kaif_lock_exit(uintptr_t *lock) 78 { 79 *lock = 0; 80 membar_producer(); 81 } 82 83 static int 84 kaif_master_loop(kaif_cpusave_t *cpusave) 85 { 86 int notflushed, i; 87 88 #if defined(__sparc) 89 kaif_prom_rearm(); 90 #endif 91 kaif_trap_set_debugger(); 92 93 master_loop: 94 switch (kmdb_dpi_reenter()) { 95 case KMDB_DPI_CMD_SWITCH_CPU: 96 /* 97 * We assume that the target CPU is a valid slave. There's no 98 * easy way to complain here, so we'll assume that the caller 99 * has done the proper checking. 100 */ 101 if (kmdb_dpi_switch_target == cpusave->krs_cpu_id) 102 break; 103 104 kaif_slave_tgt = kaif_master_cpuid = kmdb_dpi_switch_target; 105 cpusave->krs_cpu_state = KAIF_CPU_STATE_SLAVE; 106 membar_producer(); 107 108 /* 109 * Switch back to the saved trap table before we switch CPUs -- 110 * we need to make sure that only one CPU is on the debugger's 111 * table at a time. 112 */ 113 kaif_trap_set_saved(cpusave); 114 115 kaif_slave_cmd = KAIF_SLAVE_CMD_SWITCH; 116 117 /* The new master is now awake */ 118 return (KAIF_CPU_CMD_SWITCH); 119 120 case KMDB_DPI_CMD_RESUME_ALL: 121 case KMDB_DPI_CMD_RESUME_UNLOAD: 122 /* 123 * Resume everyone, clean up for next entry. 124 */ 125 kaif_master_cpuid = KAIF_MASTER_CPUID_UNSET; 126 membar_producer(); 127 kaif_slave_cmd = KAIF_SLAVE_CMD_RESUME; 128 129 if (kmdb_dpi_work_required()) 130 kmdb_dpi_wrintr_fire(); 131 132 kaif_trap_set_saved(cpusave); 133 134 return (KAIF_CPU_CMD_RESUME); 135 136 case KMDB_DPI_CMD_RESUME_MASTER: 137 /* 138 * Single-CPU resume, which is performed on the debugger's 139 * trap table (so no need to switch back). 140 */ 141 return (KAIF_CPU_CMD_RESUME_MASTER); 142 143 case KMDB_DPI_CMD_FLUSH_CACHES: 144 kaif_slave_cmd = KAIF_SLAVE_CMD_FLUSH; 145 146 /* 147 * Wait for the other cpus to finish flushing their caches. 148 */ 149 do { 150 notflushed = 0; 151 for (i = 0; i < kaif_ncpusave; i++) { 152 kaif_cpusave_t *save = &kaif_cpusave[i]; 153 154 if (save->krs_cpu_state == 155 KAIF_CPU_STATE_SLAVE && 156 !save->krs_cpu_flushed) { 157 notflushed++; 158 break; 159 } 160 } 161 } while (notflushed > 0); 162 163 kaif_slave_cmd = KAIF_SLAVE_CMD_SPIN; 164 break; 165 166 #if defined(__i386) || defined(__amd64) 167 case KMDB_DPI_CMD_REBOOT: 168 /* 169 * Reboot must be initiated by CPU 0. I could ask why, but I'm 170 * afraid that I don't want to know the answer. 171 */ 172 if (cpusave->krs_cpu_id == 0) 173 return (KAIF_CPU_CMD_REBOOT); 174 175 kaif_slave_cmd = KAIF_SLAVE_CMD_REBOOT; 176 177 /* 178 * Spin forever, waiting for CPU 0 (apparently a slave) to 179 * reboot the system. 180 */ 181 for (;;) 182 continue; 183 184 /*NOTREACHED*/ 185 break; 186 #endif 187 } 188 189 goto master_loop; 190 } 191 192 static int 193 kaif_slave_loop(kaif_cpusave_t *cpusave) 194 { 195 int slavecmd, rv; 196 197 #if defined(__sparc) 198 /* 199 * If the user elects to drop to OBP from the debugger, some OBP 200 * implementations will cross-call the slaves. We have to turn 201 * IE back on so we can receive the cross-calls. If we don't, 202 * some OBP implementations will wait forever. 203 */ 204 interrupts_on(); 205 #endif 206 207 /* Wait for duty to call */ 208 for (;;) { 209 slavecmd = kaif_slave_cmd; 210 211 if (slavecmd == KAIF_SLAVE_CMD_SWITCH && 212 kaif_slave_tgt == cpusave->krs_cpu_id) { 213 kaif_slave_cmd = KAIF_SLAVE_CMD_SPIN; 214 cpusave->krs_cpu_state = KAIF_CPU_STATE_MASTER; 215 rv = KAIF_CPU_CMD_SWITCH; 216 break; 217 218 } else if (slavecmd == KAIF_SLAVE_CMD_FLUSH) { 219 kmdb_kdi_flush_caches(); 220 cpusave->krs_cpu_flushed = 1; 221 continue; 222 223 #if defined(__i386) || defined(__amd64) 224 } else if (slavecmd == KAIF_SLAVE_CMD_REBOOT && 225 cpusave->krs_cpu_id == 0) { 226 rv = KAIF_CPU_CMD_REBOOT; 227 break; 228 #endif 229 230 } else if (slavecmd == KAIF_SLAVE_CMD_RESUME) { 231 rv = KAIF_CPU_CMD_RESUME; 232 break; 233 #if defined(__sparc) 234 } else if (slavecmd == KAIF_SLAVE_CMD_ACK) { 235 cpusave->krs_cpu_acked = 1; 236 } else if (cpusave->krs_cpu_acked && 237 slavecmd == KAIF_SLAVE_CMD_SPIN) { 238 cpusave->krs_cpu_acked = 0; 239 #endif 240 } 241 } 242 243 #if defined(__sparc) 244 interrupts_off(); 245 #endif 246 247 return (rv); 248 } 249 250 static void 251 kaif_select_master(kaif_cpusave_t *cpusave) 252 { 253 kaif_lock_enter(&kaif_master_lock); 254 255 if (kaif_master_cpuid == KAIF_MASTER_CPUID_UNSET) { 256 /* This is the master. */ 257 kaif_master_cpuid = cpusave->krs_cpu_id; 258 cpusave->krs_cpu_state = KAIF_CPU_STATE_MASTER; 259 kaif_slave_cmd = KAIF_SLAVE_CMD_SPIN; 260 261 membar_producer(); 262 263 kmdb_kdi_stop_other_cpus(cpusave->krs_cpu_id, 264 kaif_slave_entry); 265 266 } else { 267 /* The master was already chosen - go be a slave */ 268 cpusave->krs_cpu_state = KAIF_CPU_STATE_SLAVE; 269 membar_producer(); 270 } 271 272 kaif_lock_exit(&kaif_master_lock); 273 } 274 275 int 276 kaif_main_loop(kaif_cpusave_t *cpusave) 277 { 278 int cmd; 279 280 if (kaif_master_cpuid == KAIF_MASTER_CPUID_UNSET) { 281 if (!kmdb_dpi_resume_requested && 282 kmdb_kdi_get_unload_request()) { 283 /* 284 * Special case: Unload requested before first debugger 285 * entry. Don't stop the world, as there's nothing to 286 * clean up that can't be handled by the running kernel. 287 */ 288 cpusave->krs_cpu_state = KAIF_CPU_STATE_NONE; 289 return (KAIF_CPU_CMD_RESUME); 290 } 291 292 kaif_select_master(cpusave); 293 294 #ifdef __sparc 295 if (kaif_master_cpuid == cpusave->krs_cpu_id) { 296 /* 297 * Everyone has arrived, so we can disarm the post-PROM 298 * entry point. 299 */ 300 *kaif_promexitarmp = 0; 301 membar_producer(); 302 } 303 #endif 304 } else if (kaif_master_cpuid == cpusave->krs_cpu_id) { 305 cpusave->krs_cpu_state = KAIF_CPU_STATE_MASTER; 306 } else { 307 cpusave->krs_cpu_state = KAIF_CPU_STATE_SLAVE; 308 } 309 310 cpusave->krs_cpu_flushed = 0; 311 312 kaif_lock_enter(&kaif_loop_lock); 313 kaif_looping++; 314 kaif_lock_exit(&kaif_loop_lock); 315 316 /* 317 * We know who the master and slaves are, so now they can go off 318 * to their respective loops. 319 */ 320 do { 321 if (kaif_master_cpuid == cpusave->krs_cpu_id) 322 cmd = kaif_master_loop(cpusave); 323 else 324 cmd = kaif_slave_loop(cpusave); 325 } while (cmd == KAIF_CPU_CMD_SWITCH); 326 327 kaif_lock_enter(&kaif_loop_lock); 328 kaif_looping--; 329 kaif_lock_exit(&kaif_loop_lock); 330 331 cpusave->krs_cpu_state = KAIF_CPU_STATE_NONE; 332 333 if (cmd == KAIF_CPU_CMD_RESUME) { 334 /* 335 * By this point, the master has directed the slaves to resume, 336 * and everyone is making their way to this point. We're going 337 * to block here until all CPUs leave the master and slave 338 * loops. When all have arrived, we'll turn them all loose. 339 * This barrier is required for two reasons: 340 * 341 * 1. There exists a race condition whereby a CPU could reenter 342 * the debugger while another CPU is still in the slave loop 343 * from this debugger entry. This usually happens when the 344 * current master releases the slaves, and makes it back to 345 * the world before the slaves notice the release. The 346 * former master then triggers a debugger entry, and attempts 347 * to stop the slaves for this entry before they've even 348 * resumed from the last one. When the slaves arrive here, 349 * they'll have re-disabled interrupts, and will thus ignore 350 * cross-calls until they finish resuming. 351 * 352 * 2. At the time of this writing, there exists a SPARC bug that 353 * causes an apparently unsolicited interrupt vector trap 354 * from OBP to one of the slaves. This wouldn't normally be 355 * a problem but for the fact that the cross-called CPU 356 * encounters some sort of failure while in OBP. OBP 357 * recovers by executing the debugger-hook word, which sends 358 * the slave back into the debugger, triggering a debugger 359 * fault. This problem seems to only happen during resume, 360 * the result being that all CPUs save for the cross-called 361 * one make it back into the world, while the cross-called 362 * one is stuck at the debugger fault prompt. Leave the 363 * world in that state too long, and you'll get a mondo 364 * timeout panic. If we hold everyone here, we can give the 365 * the user a chance to trigger a panic for further analysis. 366 * To trigger the bug, "pool_unlock:b :c" and "while : ; do 367 * psrset -p ; done". 368 * 369 * When the second item is fixed, the barrier can move into 370 * kaif_select_master(), immediately prior to the setting of 371 * kaif_master_cpuid. 372 */ 373 while (kaif_looping != 0) 374 continue; 375 } 376 377 return (cmd); 378 } 379 380 381 #if defined(__sparc) 382 383 static int slave_loop_barrier_failures = 0; /* for debug */ 384 385 /* 386 * There exist a race condition observed by some 387 * platforms where the kmdb master cpu exits to OBP via 388 * prom_enter_mon (e.g. "$q" command) and then later re-enter 389 * kmdb (typing "go") while the slaves are still proceeding 390 * from the OBP idle-loop back to the kmdb slave loop. The 391 * problem arises when the master cpu now back in kmdb proceed 392 * to re-enter OBP (e.g. doing a prom_read() from the kmdb main 393 * loop) while the slaves are still trying to get out of (the 394 * previous trip in) OBP into the safety of the kmdb slave loop. 395 * This routine forces the slaves to explicitly acknowledge 396 * that they are back in the slave loop. The master cpu can 397 * call this routine to ensure that all slave cpus are back 398 * in the slave loop before proceeding. 399 */ 400 void 401 kaif_slave_loop_barrier(void) 402 { 403 extern void kdi_usecwait(clock_t); 404 int i; 405 int not_acked; 406 int timeout_count = 0; 407 408 kaif_slave_cmd = KAIF_SLAVE_CMD_ACK; 409 410 /* 411 * Wait for slave cpus to explicitly acknowledge 412 * that they are spinning in the slave loop. 413 */ 414 do { 415 not_acked = 0; 416 for (i = 0; i < kaif_ncpusave; i++) { 417 kaif_cpusave_t *save = &kaif_cpusave[i]; 418 419 if (save->krs_cpu_state == 420 KAIF_CPU_STATE_SLAVE && 421 !save->krs_cpu_acked) { 422 not_acked++; 423 break; 424 } 425 } 426 427 if (not_acked == 0) 428 break; 429 430 /* 431 * Play it safe and do a timeout delay. 432 * We will do at most kaif_ncpusave delays before 433 * bailing out of this barrier. 434 */ 435 kdi_usecwait(200); 436 437 } while (++timeout_count < kaif_ncpusave); 438 439 if (not_acked > 0) 440 /* 441 * we cannot establish a barrier with all 442 * the slave cpus coming back from OBP 443 * Record this fact for future debugging 444 */ 445 slave_loop_barrier_failures++; 446 447 kaif_slave_cmd = KAIF_SLAVE_CMD_SPIN; 448 } 449 #endif 450