1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 * Copyright 2018 Joyent, Inc.
25 */
26
27 /*
28 * The main CPU-control loops, used to control masters and slaves.
29 */
30
31 #include <sys/types.h>
32
33 #include <kmdb/kaif.h>
34 #include <kmdb/kaif_start.h>
35 #include <kmdb/kmdb_asmutil.h>
36 #include <kmdb/kmdb_dpi_impl.h>
37 #include <kmdb/kmdb_kdi.h>
38
39 #define KAIF_SLAVE_CMD_SPIN 0
40 #define KAIF_SLAVE_CMD_SWITCH 1
41 #define KAIF_SLAVE_CMD_RESUME 2
42 #define KAIF_SLAVE_CMD_FLUSH 3
43 #define KAIF_SLAVE_CMD_REBOOT 4
44 #if defined(__sparc)
45 #define KAIF_SLAVE_CMD_ACK 5
46 #endif
47
48
49 /*
50 * Used to synchronize attempts to set kaif_master_cpuid. kaif_master_cpuid may
51 * be read without kaif_master_lock, and may be written by the current master
52 * CPU.
53 */
54 int kaif_master_cpuid = KAIF_MASTER_CPUID_UNSET;
55 static uintptr_t kaif_master_lock = 0;
56
57 /*
58 * Used to ensure that all CPUs leave the debugger together. kaif_loop_lock must
59 * be held to write kaif_looping, but need not be held to read it.
60 */
61 static volatile uint_t kaif_looping;
62 static uintptr_t kaif_loop_lock;
63
64 static volatile int kaif_slave_cmd;
65 static volatile int kaif_slave_tgt; /* target cpuid for CMD_SWITCH */
66
67 static void
kaif_lock_enter(uintptr_t * lock)68 kaif_lock_enter(uintptr_t *lock)
69 {
70 while (cas(lock, 0, 1) != 0)
71 continue;
72 membar_producer();
73 }
74
75 static void
kaif_lock_exit(uintptr_t * lock)76 kaif_lock_exit(uintptr_t *lock)
77 {
78 *lock = 0;
79 membar_producer();
80 }
81
82 static void
kaif_start_slaves(int cmd)83 kaif_start_slaves(int cmd)
84 {
85 kaif_slave_cmd = cmd;
86 kmdb_kdi_start_slaves();
87 }
88
89 static int
kaif_master_loop(kaif_cpusave_t * cpusave)90 kaif_master_loop(kaif_cpusave_t *cpusave)
91 {
92 int notflushed, i;
93
94 #if defined(__sparc)
95 kaif_prom_rearm();
96 #endif
97 kaif_trap_set_debugger();
98
99 /*
100 * If we re-entered due to a ::switch, we need to tell the slave CPUs
101 * to sleep again.
102 */
103 kmdb_kdi_stop_slaves(cpusave->krs_cpu_id, 0);
104
105 master_loop:
106 switch (kmdb_dpi_reenter()) {
107 case KMDB_DPI_CMD_SWITCH_CPU:
108 /*
109 * We assume that the target CPU is a valid slave. There's no
110 * easy way to complain here, so we'll assume that the caller
111 * has done the proper checking.
112 */
113 if (kmdb_dpi_switch_target == cpusave->krs_cpu_id)
114 break;
115
116 kaif_slave_tgt = kaif_master_cpuid = kmdb_dpi_switch_target;
117 cpusave->krs_cpu_state = KAIF_CPU_STATE_SLAVE;
118 membar_producer();
119
120 /*
121 * Switch back to the saved trap table before we switch CPUs --
122 * we need to make sure that only one CPU is on the debugger's
123 * table at a time.
124 */
125 kaif_trap_set_saved(cpusave);
126
127 kaif_start_slaves(KAIF_SLAVE_CMD_SWITCH);
128
129 /* The new master is now awake */
130 return (KAIF_CPU_CMD_SWITCH);
131
132 case KMDB_DPI_CMD_RESUME_ALL:
133 case KMDB_DPI_CMD_RESUME_UNLOAD:
134 /*
135 * Resume everyone, clean up for next entry.
136 */
137 kaif_master_cpuid = KAIF_MASTER_CPUID_UNSET;
138 membar_producer();
139 kaif_start_slaves(KAIF_SLAVE_CMD_RESUME);
140
141 if (kmdb_dpi_work_required())
142 kmdb_dpi_wrintr_fire();
143
144 kaif_trap_set_saved(cpusave);
145
146 return (KAIF_CPU_CMD_RESUME);
147
148 case KMDB_DPI_CMD_RESUME_MASTER:
149 /*
150 * Single-CPU resume, which is performed on the debugger's
151 * trap table (so no need to switch back).
152 */
153 return (KAIF_CPU_CMD_RESUME_MASTER);
154
155 case KMDB_DPI_CMD_FLUSH_CACHES:
156 kaif_start_slaves(KAIF_SLAVE_CMD_FLUSH);
157
158 /*
159 * Wait for the other cpus to finish flushing their caches.
160 */
161 do {
162 notflushed = 0;
163 for (i = 0; i < kaif_ncpusave; i++) {
164 kaif_cpusave_t *save = &kaif_cpusave[i];
165
166 if (save->krs_cpu_state ==
167 KAIF_CPU_STATE_SLAVE &&
168 !save->krs_cpu_flushed) {
169 notflushed++;
170 break;
171 }
172 }
173 } while (notflushed > 0);
174
175 kaif_slave_cmd = KAIF_SLAVE_CMD_SPIN;
176 break;
177
178 #if defined(__i386) || defined(__amd64)
179 case KMDB_DPI_CMD_REBOOT:
180 /*
181 * Reboot must be initiated by CPU 0. I could ask why, but I'm
182 * afraid that I don't want to know the answer.
183 */
184 if (cpusave->krs_cpu_id == 0)
185 kmdb_kdi_reboot();
186
187 kaif_start_slaves(KAIF_SLAVE_CMD_REBOOT);
188
189 /*
190 * Spin forever, waiting for CPU 0 (apparently a slave) to
191 * reboot the system.
192 */
193 for (;;)
194 continue;
195
196 /*NOTREACHED*/
197 break;
198 #endif
199 }
200
201 goto master_loop;
202 }
203
204 static int
kaif_slave_loop(kaif_cpusave_t * cpusave)205 kaif_slave_loop(kaif_cpusave_t *cpusave)
206 {
207 int slavecmd, rv;
208
209 #if defined(__sparc)
210 /*
211 * If the user elects to drop to OBP from the debugger, some OBP
212 * implementations will cross-call the slaves. We have to turn
213 * IE back on so we can receive the cross-calls. If we don't,
214 * some OBP implementations will wait forever.
215 */
216 interrupts_on();
217 #endif
218
219 /* Wait for duty to call */
220 for (;;) {
221 slavecmd = kaif_slave_cmd;
222
223 if (slavecmd == KAIF_SLAVE_CMD_SWITCH &&
224 kaif_slave_tgt == cpusave->krs_cpu_id) {
225 kaif_slave_cmd = KAIF_SLAVE_CMD_SPIN;
226 cpusave->krs_cpu_state = KAIF_CPU_STATE_MASTER;
227 rv = KAIF_CPU_CMD_SWITCH;
228 break;
229
230 } else if (slavecmd == KAIF_SLAVE_CMD_FLUSH) {
231 kmdb_kdi_flush_caches();
232 cpusave->krs_cpu_flushed = 1;
233 continue;
234
235 #if defined(__i386) || defined(__amd64)
236 } else if (slavecmd == KAIF_SLAVE_CMD_REBOOT &&
237 cpusave->krs_cpu_id == 0) {
238 rv = 0;
239 kmdb_kdi_reboot();
240 break;
241 #endif
242
243 } else if (slavecmd == KAIF_SLAVE_CMD_RESUME) {
244 rv = KAIF_CPU_CMD_RESUME;
245 break;
246 #if defined(__sparc)
247 } else if (slavecmd == KAIF_SLAVE_CMD_ACK) {
248 cpusave->krs_cpu_acked = 1;
249 } else if (cpusave->krs_cpu_acked &&
250 slavecmd == KAIF_SLAVE_CMD_SPIN) {
251 cpusave->krs_cpu_acked = 0;
252 #endif
253 }
254
255 kmdb_kdi_slave_wait();
256 }
257
258 #if defined(__sparc)
259 interrupts_off();
260 #endif
261
262 return (rv);
263 }
264
265 static void
kaif_select_master(kaif_cpusave_t * cpusave)266 kaif_select_master(kaif_cpusave_t *cpusave)
267 {
268 kaif_lock_enter(&kaif_master_lock);
269
270 if (kaif_master_cpuid == KAIF_MASTER_CPUID_UNSET) {
271 /* This is the master. */
272 kaif_master_cpuid = cpusave->krs_cpu_id;
273 cpusave->krs_cpu_state = KAIF_CPU_STATE_MASTER;
274 kaif_slave_cmd = KAIF_SLAVE_CMD_SPIN;
275
276 membar_producer();
277
278 kmdb_kdi_stop_slaves(cpusave->krs_cpu_id, 1);
279 } else {
280 /* The master was already chosen - go be a slave */
281 cpusave->krs_cpu_state = KAIF_CPU_STATE_SLAVE;
282 membar_producer();
283 }
284
285 kaif_lock_exit(&kaif_master_lock);
286 }
287
288 int
kaif_main_loop(kaif_cpusave_t * cpusave)289 kaif_main_loop(kaif_cpusave_t *cpusave)
290 {
291 int cmd;
292
293 if (kaif_master_cpuid == KAIF_MASTER_CPUID_UNSET) {
294
295 /*
296 * Special case: Unload requested before first debugger entry.
297 * Don't stop the world, as there's nothing to clean up that
298 * can't be handled by the running kernel.
299 */
300 if (!kmdb_dpi_resume_requested &&
301 kmdb_kdi_get_unload_request()) {
302 cpusave->krs_cpu_state = KAIF_CPU_STATE_NONE;
303 return (KAIF_CPU_CMD_RESUME);
304 }
305
306 /*
307 * We're a slave with no master, so just resume. This can
308 * happen if, prior to this, two CPUs both raced through
309 * kdi_cmnint() - for example, a breakpoint on a frequently
310 * called function. The loser will be redirected to the slave
311 * loop; note that the event itself is lost at this point.
312 *
313 * The winner will then cross-call that slave, but it won't
314 * actually be received until the slave returns to the kernel
315 * and enables interrupts. We'll then come back in via
316 * kdi_slave_entry() and hit this path.
317 */
318 if (cpusave->krs_cpu_state == KAIF_CPU_STATE_SLAVE) {
319 cpusave->krs_cpu_state = KAIF_CPU_STATE_NONE;
320 return (KAIF_CPU_CMD_RESUME);
321 }
322
323 kaif_select_master(cpusave);
324
325 #ifdef __sparc
326 if (kaif_master_cpuid == cpusave->krs_cpu_id) {
327 /*
328 * Everyone has arrived, so we can disarm the post-PROM
329 * entry point.
330 */
331 *kaif_promexitarmp = 0;
332 membar_producer();
333 }
334 #endif
335 } else if (kaif_master_cpuid == cpusave->krs_cpu_id) {
336 cpusave->krs_cpu_state = KAIF_CPU_STATE_MASTER;
337 } else {
338 cpusave->krs_cpu_state = KAIF_CPU_STATE_SLAVE;
339 }
340
341 cpusave->krs_cpu_flushed = 0;
342
343 kaif_lock_enter(&kaif_loop_lock);
344 kaif_looping++;
345 kaif_lock_exit(&kaif_loop_lock);
346
347 /*
348 * We know who the master and slaves are, so now they can go off
349 * to their respective loops.
350 */
351 do {
352 if (kaif_master_cpuid == cpusave->krs_cpu_id)
353 cmd = kaif_master_loop(cpusave);
354 else
355 cmd = kaif_slave_loop(cpusave);
356 } while (cmd == KAIF_CPU_CMD_SWITCH);
357
358 kaif_lock_enter(&kaif_loop_lock);
359 kaif_looping--;
360 kaif_lock_exit(&kaif_loop_lock);
361
362 cpusave->krs_cpu_state = KAIF_CPU_STATE_NONE;
363
364 if (cmd == KAIF_CPU_CMD_RESUME) {
365 /*
366 * By this point, the master has directed the slaves to resume,
367 * and everyone is making their way to this point. We're going
368 * to block here until all CPUs leave the master and slave
369 * loops. When all have arrived, we'll turn them all loose.
370 * This barrier is required for two reasons:
371 *
372 * 1. There exists a race condition whereby a CPU could reenter
373 * the debugger while another CPU is still in the slave loop
374 * from this debugger entry. This usually happens when the
375 * current master releases the slaves, and makes it back to
376 * the world before the slaves notice the release. The
377 * former master then triggers a debugger entry, and attempts
378 * to stop the slaves for this entry before they've even
379 * resumed from the last one. When the slaves arrive here,
380 * they'll have re-disabled interrupts, and will thus ignore
381 * cross-calls until they finish resuming.
382 *
383 * 2. At the time of this writing, there exists a SPARC bug that
384 * causes an apparently unsolicited interrupt vector trap
385 * from OBP to one of the slaves. This wouldn't normally be
386 * a problem but for the fact that the cross-called CPU
387 * encounters some sort of failure while in OBP. OBP
388 * recovers by executing the debugger-hook word, which sends
389 * the slave back into the debugger, triggering a debugger
390 * fault. This problem seems to only happen during resume,
391 * the result being that all CPUs save for the cross-called
392 * one make it back into the world, while the cross-called
393 * one is stuck at the debugger fault prompt. Leave the
394 * world in that state too long, and you'll get a mondo
395 * timeout panic. If we hold everyone here, we can give the
396 * the user a chance to trigger a panic for further analysis.
397 * To trigger the bug, "pool_unlock:b :c" and "while : ; do
398 * psrset -p ; done".
399 *
400 * When the second item is fixed, the barrier can move into
401 * kaif_select_master(), immediately prior to the setting of
402 * kaif_master_cpuid.
403 */
404 while (kaif_looping != 0)
405 continue;
406 }
407
408 return (cmd);
409 }
410
411
412 #if defined(__sparc)
413
414 static int slave_loop_barrier_failures = 0; /* for debug */
415
416 /*
417 * There exist a race condition observed by some
418 * platforms where the kmdb master cpu exits to OBP via
419 * prom_enter_mon (e.g. "$q" command) and then later re-enter
420 * kmdb (typing "go") while the slaves are still proceeding
421 * from the OBP idle-loop back to the kmdb slave loop. The
422 * problem arises when the master cpu now back in kmdb proceed
423 * to re-enter OBP (e.g. doing a prom_read() from the kmdb main
424 * loop) while the slaves are still trying to get out of (the
425 * previous trip in) OBP into the safety of the kmdb slave loop.
426 * This routine forces the slaves to explicitly acknowledge
427 * that they are back in the slave loop. The master cpu can
428 * call this routine to ensure that all slave cpus are back
429 * in the slave loop before proceeding.
430 */
431 void
kaif_slave_loop_barrier(void)432 kaif_slave_loop_barrier(void)
433 {
434 extern void kdi_usecwait(clock_t);
435 int i;
436 int not_acked;
437 int timeout_count = 0;
438
439 kaif_start_slaves(KAIF_SLAVE_CMD_ACK);
440
441 /*
442 * Wait for slave cpus to explicitly acknowledge
443 * that they are spinning in the slave loop.
444 */
445 do {
446 not_acked = 0;
447 for (i = 0; i < kaif_ncpusave; i++) {
448 kaif_cpusave_t *save = &kaif_cpusave[i];
449
450 if (save->krs_cpu_state ==
451 KAIF_CPU_STATE_SLAVE &&
452 !save->krs_cpu_acked) {
453 not_acked++;
454 break;
455 }
456 }
457
458 if (not_acked == 0)
459 break;
460
461 /*
462 * Play it safe and do a timeout delay.
463 * We will do at most kaif_ncpusave delays before
464 * bailing out of this barrier.
465 */
466 kdi_usecwait(200);
467
468 } while (++timeout_count < kaif_ncpusave);
469
470 if (not_acked > 0)
471 /*
472 * we cannot establish a barrier with all
473 * the slave cpus coming back from OBP
474 * Record this fact for future debugging
475 */
476 slave_loop_barrier_failures++;
477
478 kaif_slave_cmd = KAIF_SLAVE_CMD_SPIN;
479 }
480 #endif
481