1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #pragma ident "%Z%%M% %I% %E% SMI"
27
28 /*
29 * The main CPU-control loops, used to control masters and slaves.
30 */
31
32 #include <sys/types.h>
33
34 #include <kmdb/kaif.h>
35 #include <kmdb/kaif_start.h>
36 #include <kmdb/kmdb_asmutil.h>
37 #include <kmdb/kmdb_dpi_impl.h>
38 #include <kmdb/kmdb_kdi.h>
39
40 #define KAIF_SLAVE_CMD_SPIN 0
41 #define KAIF_SLAVE_CMD_SWITCH 1
42 #define KAIF_SLAVE_CMD_RESUME 2
43 #define KAIF_SLAVE_CMD_FLUSH 3
44 #define KAIF_SLAVE_CMD_REBOOT 4
45 #if defined(__sparc)
46 #define KAIF_SLAVE_CMD_ACK 5
47 #endif
48
49
50 /*
51 * Used to synchronize attempts to set kaif_master_cpuid. kaif_master_cpuid may
52 * be read without kaif_master_lock, and may be written by the current master
53 * CPU.
54 */
55 int kaif_master_cpuid = KAIF_MASTER_CPUID_UNSET;
56 static uintptr_t kaif_master_lock = 0;
57
58 /*
59 * Used to ensure that all CPUs leave the debugger together. kaif_loop_lock must
60 * be held to write kaif_looping, but need not be held to read it.
61 */
62 static volatile uint_t kaif_looping;
63 static uintptr_t kaif_loop_lock;
64
65 static volatile int kaif_slave_cmd;
66 static volatile int kaif_slave_tgt; /* target cpuid for CMD_SWITCH */
67
68 static void
kaif_lock_enter(uintptr_t * lock)69 kaif_lock_enter(uintptr_t *lock)
70 {
71 while (cas(lock, 0, 1) != 0)
72 continue;
73 membar_producer();
74 }
75
76 static void
kaif_lock_exit(uintptr_t * lock)77 kaif_lock_exit(uintptr_t *lock)
78 {
79 *lock = 0;
80 membar_producer();
81 }
82
83 static void
kaif_start_slaves(int cmd)84 kaif_start_slaves(int cmd)
85 {
86 kaif_slave_cmd = cmd;
87 kmdb_kdi_start_slaves();
88 }
89
90 static int
kaif_master_loop(kaif_cpusave_t * cpusave)91 kaif_master_loop(kaif_cpusave_t *cpusave)
92 {
93 int notflushed, i;
94
95 #if defined(__sparc)
96 kaif_prom_rearm();
97 #endif
98 kaif_trap_set_debugger();
99
100 /*
101 * If we re-entered due to a ::switch, we need to tell the slave CPUs
102 * to sleep again.
103 */
104 kmdb_kdi_stop_slaves(cpusave->krs_cpu_id, 0);
105
106 master_loop:
107 switch (kmdb_dpi_reenter()) {
108 case KMDB_DPI_CMD_SWITCH_CPU:
109 /*
110 * We assume that the target CPU is a valid slave. There's no
111 * easy way to complain here, so we'll assume that the caller
112 * has done the proper checking.
113 */
114 if (kmdb_dpi_switch_target == cpusave->krs_cpu_id)
115 break;
116
117 kaif_slave_tgt = kaif_master_cpuid = kmdb_dpi_switch_target;
118 cpusave->krs_cpu_state = KAIF_CPU_STATE_SLAVE;
119 membar_producer();
120
121 /*
122 * Switch back to the saved trap table before we switch CPUs --
123 * we need to make sure that only one CPU is on the debugger's
124 * table at a time.
125 */
126 kaif_trap_set_saved(cpusave);
127
128 kaif_start_slaves(KAIF_SLAVE_CMD_SWITCH);
129
130 /* The new master is now awake */
131 return (KAIF_CPU_CMD_SWITCH);
132
133 case KMDB_DPI_CMD_RESUME_ALL:
134 case KMDB_DPI_CMD_RESUME_UNLOAD:
135 /*
136 * Resume everyone, clean up for next entry.
137 */
138 kaif_master_cpuid = KAIF_MASTER_CPUID_UNSET;
139 membar_producer();
140 kaif_start_slaves(KAIF_SLAVE_CMD_RESUME);
141
142 if (kmdb_dpi_work_required())
143 kmdb_dpi_wrintr_fire();
144
145 kaif_trap_set_saved(cpusave);
146
147 return (KAIF_CPU_CMD_RESUME);
148
149 case KMDB_DPI_CMD_RESUME_MASTER:
150 /*
151 * Single-CPU resume, which is performed on the debugger's
152 * trap table (so no need to switch back).
153 */
154 return (KAIF_CPU_CMD_RESUME_MASTER);
155
156 case KMDB_DPI_CMD_FLUSH_CACHES:
157 kaif_start_slaves(KAIF_SLAVE_CMD_FLUSH);
158
159 /*
160 * Wait for the other cpus to finish flushing their caches.
161 */
162 do {
163 notflushed = 0;
164 for (i = 0; i < kaif_ncpusave; i++) {
165 kaif_cpusave_t *save = &kaif_cpusave[i];
166
167 if (save->krs_cpu_state ==
168 KAIF_CPU_STATE_SLAVE &&
169 !save->krs_cpu_flushed) {
170 notflushed++;
171 break;
172 }
173 }
174 } while (notflushed > 0);
175
176 kaif_slave_cmd = KAIF_SLAVE_CMD_SPIN;
177 break;
178
179 #if defined(__i386) || defined(__amd64)
180 case KMDB_DPI_CMD_REBOOT:
181 /*
182 * Reboot must be initiated by CPU 0. I could ask why, but I'm
183 * afraid that I don't want to know the answer.
184 */
185 if (cpusave->krs_cpu_id == 0)
186 kmdb_kdi_reboot();
187
188 kaif_start_slaves(KAIF_SLAVE_CMD_REBOOT);
189
190 /*
191 * Spin forever, waiting for CPU 0 (apparently a slave) to
192 * reboot the system.
193 */
194 for (;;)
195 continue;
196
197 /*NOTREACHED*/
198 break;
199 #endif
200 }
201
202 goto master_loop;
203 }
204
205 static int
kaif_slave_loop(kaif_cpusave_t * cpusave)206 kaif_slave_loop(kaif_cpusave_t *cpusave)
207 {
208 int slavecmd, rv;
209
210 #if defined(__sparc)
211 /*
212 * If the user elects to drop to OBP from the debugger, some OBP
213 * implementations will cross-call the slaves. We have to turn
214 * IE back on so we can receive the cross-calls. If we don't,
215 * some OBP implementations will wait forever.
216 */
217 interrupts_on();
218 #endif
219
220 /* Wait for duty to call */
221 for (;;) {
222 slavecmd = kaif_slave_cmd;
223
224 if (slavecmd == KAIF_SLAVE_CMD_SWITCH &&
225 kaif_slave_tgt == cpusave->krs_cpu_id) {
226 kaif_slave_cmd = KAIF_SLAVE_CMD_SPIN;
227 cpusave->krs_cpu_state = KAIF_CPU_STATE_MASTER;
228 rv = KAIF_CPU_CMD_SWITCH;
229 break;
230
231 } else if (slavecmd == KAIF_SLAVE_CMD_FLUSH) {
232 kmdb_kdi_flush_caches();
233 cpusave->krs_cpu_flushed = 1;
234 continue;
235
236 #if defined(__i386) || defined(__amd64)
237 } else if (slavecmd == KAIF_SLAVE_CMD_REBOOT &&
238 cpusave->krs_cpu_id == 0) {
239 rv = 0;
240 kmdb_kdi_reboot();
241 break;
242 #endif
243
244 } else if (slavecmd == KAIF_SLAVE_CMD_RESUME) {
245 rv = KAIF_CPU_CMD_RESUME;
246 break;
247 #if defined(__sparc)
248 } else if (slavecmd == KAIF_SLAVE_CMD_ACK) {
249 cpusave->krs_cpu_acked = 1;
250 } else if (cpusave->krs_cpu_acked &&
251 slavecmd == KAIF_SLAVE_CMD_SPIN) {
252 cpusave->krs_cpu_acked = 0;
253 #endif
254 }
255
256 kmdb_kdi_slave_wait();
257 }
258
259 #if defined(__sparc)
260 interrupts_off();
261 #endif
262
263 return (rv);
264 }
265
266 static void
kaif_select_master(kaif_cpusave_t * cpusave)267 kaif_select_master(kaif_cpusave_t *cpusave)
268 {
269 kaif_lock_enter(&kaif_master_lock);
270
271 if (kaif_master_cpuid == KAIF_MASTER_CPUID_UNSET) {
272 /* This is the master. */
273 kaif_master_cpuid = cpusave->krs_cpu_id;
274 cpusave->krs_cpu_state = KAIF_CPU_STATE_MASTER;
275 kaif_slave_cmd = KAIF_SLAVE_CMD_SPIN;
276
277 membar_producer();
278
279 kmdb_kdi_stop_slaves(cpusave->krs_cpu_id, 1);
280 } else {
281 /* The master was already chosen - go be a slave */
282 cpusave->krs_cpu_state = KAIF_CPU_STATE_SLAVE;
283 membar_producer();
284 }
285
286 kaif_lock_exit(&kaif_master_lock);
287 }
288
289 int
kaif_main_loop(kaif_cpusave_t * cpusave)290 kaif_main_loop(kaif_cpusave_t *cpusave)
291 {
292 int cmd;
293
294 if (kaif_master_cpuid == KAIF_MASTER_CPUID_UNSET) {
295 if (!kmdb_dpi_resume_requested &&
296 kmdb_kdi_get_unload_request()) {
297 /*
298 * Special case: Unload requested before first debugger
299 * entry. Don't stop the world, as there's nothing to
300 * clean up that can't be handled by the running kernel.
301 */
302 cpusave->krs_cpu_state = KAIF_CPU_STATE_NONE;
303 return (KAIF_CPU_CMD_RESUME);
304 }
305
306 kaif_select_master(cpusave);
307
308 #ifdef __sparc
309 if (kaif_master_cpuid == cpusave->krs_cpu_id) {
310 /*
311 * Everyone has arrived, so we can disarm the post-PROM
312 * entry point.
313 */
314 *kaif_promexitarmp = 0;
315 membar_producer();
316 }
317 #endif
318 } else if (kaif_master_cpuid == cpusave->krs_cpu_id) {
319 cpusave->krs_cpu_state = KAIF_CPU_STATE_MASTER;
320 } else {
321 cpusave->krs_cpu_state = KAIF_CPU_STATE_SLAVE;
322 }
323
324 cpusave->krs_cpu_flushed = 0;
325
326 kaif_lock_enter(&kaif_loop_lock);
327 kaif_looping++;
328 kaif_lock_exit(&kaif_loop_lock);
329
330 /*
331 * We know who the master and slaves are, so now they can go off
332 * to their respective loops.
333 */
334 do {
335 if (kaif_master_cpuid == cpusave->krs_cpu_id)
336 cmd = kaif_master_loop(cpusave);
337 else
338 cmd = kaif_slave_loop(cpusave);
339 } while (cmd == KAIF_CPU_CMD_SWITCH);
340
341 kaif_lock_enter(&kaif_loop_lock);
342 kaif_looping--;
343 kaif_lock_exit(&kaif_loop_lock);
344
345 cpusave->krs_cpu_state = KAIF_CPU_STATE_NONE;
346
347 if (cmd == KAIF_CPU_CMD_RESUME) {
348 /*
349 * By this point, the master has directed the slaves to resume,
350 * and everyone is making their way to this point. We're going
351 * to block here until all CPUs leave the master and slave
352 * loops. When all have arrived, we'll turn them all loose.
353 * This barrier is required for two reasons:
354 *
355 * 1. There exists a race condition whereby a CPU could reenter
356 * the debugger while another CPU is still in the slave loop
357 * from this debugger entry. This usually happens when the
358 * current master releases the slaves, and makes it back to
359 * the world before the slaves notice the release. The
360 * former master then triggers a debugger entry, and attempts
361 * to stop the slaves for this entry before they've even
362 * resumed from the last one. When the slaves arrive here,
363 * they'll have re-disabled interrupts, and will thus ignore
364 * cross-calls until they finish resuming.
365 *
366 * 2. At the time of this writing, there exists a SPARC bug that
367 * causes an apparently unsolicited interrupt vector trap
368 * from OBP to one of the slaves. This wouldn't normally be
369 * a problem but for the fact that the cross-called CPU
370 * encounters some sort of failure while in OBP. OBP
371 * recovers by executing the debugger-hook word, which sends
372 * the slave back into the debugger, triggering a debugger
373 * fault. This problem seems to only happen during resume,
374 * the result being that all CPUs save for the cross-called
375 * one make it back into the world, while the cross-called
376 * one is stuck at the debugger fault prompt. Leave the
377 * world in that state too long, and you'll get a mondo
378 * timeout panic. If we hold everyone here, we can give the
379 * the user a chance to trigger a panic for further analysis.
380 * To trigger the bug, "pool_unlock:b :c" and "while : ; do
381 * psrset -p ; done".
382 *
383 * When the second item is fixed, the barrier can move into
384 * kaif_select_master(), immediately prior to the setting of
385 * kaif_master_cpuid.
386 */
387 while (kaif_looping != 0)
388 continue;
389 }
390
391 return (cmd);
392 }
393
394
395 #if defined(__sparc)
396
397 static int slave_loop_barrier_failures = 0; /* for debug */
398
399 /*
400 * There exist a race condition observed by some
401 * platforms where the kmdb master cpu exits to OBP via
402 * prom_enter_mon (e.g. "$q" command) and then later re-enter
403 * kmdb (typing "go") while the slaves are still proceeding
404 * from the OBP idle-loop back to the kmdb slave loop. The
405 * problem arises when the master cpu now back in kmdb proceed
406 * to re-enter OBP (e.g. doing a prom_read() from the kmdb main
407 * loop) while the slaves are still trying to get out of (the
408 * previous trip in) OBP into the safety of the kmdb slave loop.
409 * This routine forces the slaves to explicitly acknowledge
410 * that they are back in the slave loop. The master cpu can
411 * call this routine to ensure that all slave cpus are back
412 * in the slave loop before proceeding.
413 */
414 void
kaif_slave_loop_barrier(void)415 kaif_slave_loop_barrier(void)
416 {
417 extern void kdi_usecwait(clock_t);
418 int i;
419 int not_acked;
420 int timeout_count = 0;
421
422 kaif_start_slaves(KAIF_SLAVE_CMD_ACK);
423
424 /*
425 * Wait for slave cpus to explicitly acknowledge
426 * that they are spinning in the slave loop.
427 */
428 do {
429 not_acked = 0;
430 for (i = 0; i < kaif_ncpusave; i++) {
431 kaif_cpusave_t *save = &kaif_cpusave[i];
432
433 if (save->krs_cpu_state ==
434 KAIF_CPU_STATE_SLAVE &&
435 !save->krs_cpu_acked) {
436 not_acked++;
437 break;
438 }
439 }
440
441 if (not_acked == 0)
442 break;
443
444 /*
445 * Play it safe and do a timeout delay.
446 * We will do at most kaif_ncpusave delays before
447 * bailing out of this barrier.
448 */
449 kdi_usecwait(200);
450
451 } while (++timeout_count < kaif_ncpusave);
452
453 if (not_acked > 0)
454 /*
455 * we cannot establish a barrier with all
456 * the slave cpus coming back from OBP
457 * Record this fact for future debugging
458 */
459 slave_loop_barrier_failures++;
460
461 kaif_slave_cmd = KAIF_SLAVE_CMD_SPIN;
462 }
463 #endif
464