1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * The main CPU-control loops, used to control masters and slaves. 31 */ 32 33 #include <sys/types.h> 34 35 #include <kmdb/kaif.h> 36 #include <kmdb/kaif_start.h> 37 #include <kmdb/kmdb_asmutil.h> 38 #include <kmdb/kmdb_dpi_impl.h> 39 #include <kmdb/kmdb_kdi.h> 40 41 #define KAIF_SLAVE_CMD_SPIN 0 42 #define KAIF_SLAVE_CMD_SWITCH 1 43 #define KAIF_SLAVE_CMD_RESUME 2 44 #define KAIF_SLAVE_CMD_FLUSH 3 45 #define KAIF_SLAVE_CMD_REBOOT 4 46 47 /* 48 * Used to synchronize attempts to set kaif_master_cpuid. kaif_master_cpuid may 49 * be read without kaif_master_lock, and may be written by the current master 50 * CPU. 51 */ 52 int kaif_master_cpuid = KAIF_MASTER_CPUID_UNSET; 53 static uintptr_t kaif_master_lock = 0; 54 55 /* 56 * Used to ensure that all CPUs leave the debugger together. kaif_loop_lock must 57 * be held to write kaif_looping, but need not be held to read it. 58 */ 59 static volatile uint_t kaif_looping; 60 static uintptr_t kaif_loop_lock; 61 62 static volatile int kaif_slave_cmd; 63 static volatile int kaif_slave_tgt; /* target cpuid for CMD_SWITCH */ 64 65 static void 66 kaif_lock_enter(uintptr_t *lock) 67 { 68 while (cas(lock, 0, 1) != 0) 69 continue; 70 membar_producer(); 71 } 72 73 static void 74 kaif_lock_exit(uintptr_t *lock) 75 { 76 *lock = 0; 77 membar_producer(); 78 } 79 80 static int 81 kaif_master_loop(kaif_cpusave_t *cpusave) 82 { 83 int notflushed, i; 84 85 #if defined(__sparc) 86 kaif_prom_rearm(); 87 #endif 88 kaif_trap_set_debugger(); 89 90 master_loop: 91 switch (kmdb_dpi_reenter()) { 92 case KMDB_DPI_CMD_SWITCH_CPU: 93 /* 94 * We assume that the target CPU is a valid slave. There's no 95 * easy way to complain here, so we'll assume that the caller 96 * has done the proper checking. 97 */ 98 if (kmdb_dpi_switch_target == cpusave->krs_cpu_id) 99 break; 100 101 kaif_slave_tgt = kaif_master_cpuid = kmdb_dpi_switch_target; 102 cpusave->krs_cpu_state = KAIF_CPU_STATE_SLAVE; 103 membar_producer(); 104 105 /* 106 * Switch back to the saved trap table before we switch CPUs -- 107 * we need to make sure that only one CPU is on the debugger's 108 * table at a time. 109 */ 110 kaif_trap_set_saved(cpusave); 111 112 kaif_slave_cmd = KAIF_SLAVE_CMD_SWITCH; 113 114 /* The new master is now awake */ 115 return (KAIF_CPU_CMD_SWITCH); 116 117 case KMDB_DPI_CMD_RESUME_ALL: 118 case KMDB_DPI_CMD_RESUME_UNLOAD: 119 /* 120 * Resume everyone, clean up for next entry. 121 */ 122 kaif_master_cpuid = KAIF_MASTER_CPUID_UNSET; 123 membar_producer(); 124 kaif_slave_cmd = KAIF_SLAVE_CMD_RESUME; 125 126 if (kmdb_dpi_work_required()) 127 kmdb_dpi_wrintr_fire(); 128 129 kaif_trap_set_saved(cpusave); 130 131 return (KAIF_CPU_CMD_RESUME); 132 133 case KMDB_DPI_CMD_RESUME_MASTER: 134 /* 135 * Single-CPU resume, which is performed on the debugger's 136 * trap table (so no need to switch back). 137 */ 138 return (KAIF_CPU_CMD_RESUME_MASTER); 139 140 case KMDB_DPI_CMD_FLUSH_CACHES: 141 kaif_slave_cmd = KAIF_SLAVE_CMD_FLUSH; 142 143 /* 144 * Wait for the other cpus to finish flushing their caches. 145 */ 146 do { 147 notflushed = 0; 148 for (i = 0; i < kaif_ncpusave; i++) { 149 kaif_cpusave_t *save = &kaif_cpusave[i]; 150 151 if (save->krs_cpu_state == 152 KAIF_CPU_STATE_SLAVE && 153 !save->krs_cpu_flushed) { 154 notflushed++; 155 break; 156 } 157 } 158 } while (notflushed > 0); 159 160 kaif_slave_cmd = KAIF_SLAVE_CMD_SPIN; 161 break; 162 163 #if defined(__i386) || defined(__amd64) 164 case KMDB_DPI_CMD_REBOOT: 165 /* 166 * Reboot must be initiated by CPU 0. I could ask why, but I'm 167 * afraid that I don't want to know the answer. 168 */ 169 if (cpusave->krs_cpu_id == 0) 170 return (KAIF_CPU_CMD_REBOOT); 171 172 kaif_slave_cmd = KAIF_SLAVE_CMD_REBOOT; 173 174 /* 175 * Spin forever, waiting for CPU 0 (apparently a slave) to 176 * reboot the system. 177 */ 178 for (;;) 179 continue; 180 181 /*NOTREACHED*/ 182 break; 183 #endif 184 } 185 186 goto master_loop; 187 } 188 189 static int 190 kaif_slave_loop(kaif_cpusave_t *cpusave) 191 { 192 int slavecmd, rv; 193 194 #if defined(__sparc) 195 /* 196 * If the user elects to drop to OBP from the debugger, some OBP 197 * implementations will cross-call the slaves. We have to turn 198 * IE back on so we can receive the cross-calls. If we don't, 199 * some OBP implementations will wait forever. 200 */ 201 interrupts_on(); 202 #endif 203 204 /* Wait for duty to call */ 205 for (;;) { 206 slavecmd = kaif_slave_cmd; 207 208 if (slavecmd == KAIF_SLAVE_CMD_SWITCH && 209 kaif_slave_tgt == cpusave->krs_cpu_id) { 210 kaif_slave_cmd = KAIF_SLAVE_CMD_SPIN; 211 cpusave->krs_cpu_state = KAIF_CPU_STATE_MASTER; 212 rv = KAIF_CPU_CMD_SWITCH; 213 break; 214 215 } else if (slavecmd == KAIF_SLAVE_CMD_FLUSH) { 216 kmdb_kdi_flush_caches(); 217 cpusave->krs_cpu_flushed = 1; 218 continue; 219 220 #if defined(__i386) || defined(__amd64) 221 } else if (slavecmd == KAIF_SLAVE_CMD_REBOOT && 222 cpusave->krs_cpu_id == 0) { 223 rv = KAIF_CPU_CMD_REBOOT; 224 break; 225 #endif 226 227 } else if (slavecmd == KAIF_SLAVE_CMD_RESUME) { 228 rv = KAIF_CPU_CMD_RESUME; 229 break; 230 } 231 } 232 233 #if defined(__sparc) 234 interrupts_off(); 235 #endif 236 237 return (rv); 238 } 239 240 static void 241 kaif_select_master(kaif_cpusave_t *cpusave) 242 { 243 kaif_lock_enter(&kaif_master_lock); 244 245 if (kaif_master_cpuid == KAIF_MASTER_CPUID_UNSET) { 246 /* This is the master. */ 247 kaif_master_cpuid = cpusave->krs_cpu_id; 248 cpusave->krs_cpu_state = KAIF_CPU_STATE_MASTER; 249 kaif_slave_cmd = KAIF_SLAVE_CMD_SPIN; 250 251 membar_producer(); 252 253 kmdb_kdi_stop_other_cpus(cpusave->krs_cpu_id, 254 kaif_slave_entry); 255 256 } else { 257 /* The master was already chosen - go be a slave */ 258 cpusave->krs_cpu_state = KAIF_CPU_STATE_SLAVE; 259 membar_producer(); 260 } 261 262 kaif_lock_exit(&kaif_master_lock); 263 } 264 265 int 266 kaif_main_loop(kaif_cpusave_t *cpusave) 267 { 268 int cmd; 269 270 if (kaif_master_cpuid == KAIF_MASTER_CPUID_UNSET) { 271 if (!kmdb_dpi_resume_requested && 272 kmdb_kdi_get_unload_request()) { 273 /* 274 * Special case: Unload requested before first debugger 275 * entry. Don't stop the world, as there's nothing to 276 * clean up that can't be handled by the running kernel. 277 */ 278 cpusave->krs_cpu_state = KAIF_CPU_STATE_NONE; 279 return (KAIF_CPU_CMD_RESUME); 280 } 281 282 kaif_select_master(cpusave); 283 284 #ifdef __sparc 285 if (kaif_master_cpuid == cpusave->krs_cpu_id) { 286 /* 287 * Everyone has arrived, so we can disarm the post-PROM 288 * entry point. 289 */ 290 *kaif_promexitarmp = 0; 291 membar_producer(); 292 } 293 #endif 294 } else if (kaif_master_cpuid == cpusave->krs_cpu_id) { 295 cpusave->krs_cpu_state = KAIF_CPU_STATE_MASTER; 296 } else { 297 cpusave->krs_cpu_state = KAIF_CPU_STATE_SLAVE; 298 } 299 300 cpusave->krs_cpu_flushed = 0; 301 302 kaif_lock_enter(&kaif_loop_lock); 303 kaif_looping++; 304 kaif_lock_exit(&kaif_loop_lock); 305 306 /* 307 * We know who the master and slaves are, so now they can go off 308 * to their respective loops. 309 */ 310 do { 311 if (kaif_master_cpuid == cpusave->krs_cpu_id) 312 cmd = kaif_master_loop(cpusave); 313 else 314 cmd = kaif_slave_loop(cpusave); 315 } while (cmd == KAIF_CPU_CMD_SWITCH); 316 317 kaif_lock_enter(&kaif_loop_lock); 318 kaif_looping--; 319 kaif_lock_exit(&kaif_loop_lock); 320 321 cpusave->krs_cpu_state = KAIF_CPU_STATE_NONE; 322 323 if (cmd == KAIF_CPU_CMD_RESUME) { 324 /* 325 * By this point, the master has directed the slaves to resume, 326 * and everyone is making their way to this point. We're going 327 * to block here until all CPUs leave the master and slave 328 * loops. When all have arrived, we'll turn them all loose. 329 * This barrier is required for two reasons: 330 * 331 * 1. There exists a race condition whereby a CPU could reenter 332 * the debugger while another CPU is still in the slave loop 333 * from this debugger entry. This usually happens when the 334 * current master releases the slaves, and makes it back to 335 * the world before the slaves notice the release. The 336 * former master then triggers a debugger entry, and attempts 337 * to stop the slaves for this entry before they've even 338 * resumed from the last one. When the slaves arrive here, 339 * they'll have re-disabled interrupts, and will thus ignore 340 * cross-calls until they finish resuming. 341 * 342 * 2. At the time of this writing, there exists a SPARC bug that 343 * causes an apparently unsolicited interrupt vector trap 344 * from OBP to one of the slaves. This wouldn't normally be 345 * a problem but for the fact that the cross-called CPU 346 * encounters some sort of failure while in OBP. OBP 347 * recovers by executing the debugger-hook word, which sends 348 * the slave back into the debugger, triggering a debugger 349 * fault. This problem seems to only happen during resume, 350 * the result being that all CPUs save for the cross-called 351 * one make it back into the world, while the cross-called 352 * one is stuck at the debugger fault prompt. Leave the 353 * world in that state too long, and you'll get a mondo 354 * timeout panic. If we hold everyone here, we can give the 355 * the user a chance to trigger a panic for further analysis. 356 * To trigger the bug, "pool_unlock:b :c" and "while : ; do 357 * psrset -p ; done". 358 * 359 * When the second item is fixed, the barrier can move into 360 * kaif_select_master(), immediately prior to the setting of 361 * kaif_master_cpuid. 362 */ 363 while (kaif_looping != 0) 364 continue; 365 } 366 367 return (cmd); 368 } 369