xref: /titanic_44/usr/src/cmd/mdb/common/kmdb/kaif_start.c (revision 8eea8e29cc4374d1ee24c25a07f45af132db3499)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * The main CPU-control loops, used to control masters and slaves.
31  */
32 
33 #include <sys/types.h>
34 
35 #include <kmdb/kaif.h>
36 #include <kmdb/kaif_start.h>
37 #include <kmdb/kmdb_asmutil.h>
38 #include <kmdb/kmdb_dpi_impl.h>
39 #include <kmdb/kmdb_kdi.h>
40 
41 #define	KAIF_SLAVE_CMD_SPIN	0
42 #define	KAIF_SLAVE_CMD_SWITCH	1
43 #define	KAIF_SLAVE_CMD_RESUME	2
44 #define	KAIF_SLAVE_CMD_FLUSH	3
45 #define	KAIF_SLAVE_CMD_REBOOT	4
46 
47 /*
48  * Used to synchronize attempts to set kaif_master_cpuid.  kaif_master_cpuid may
49  * be read without kaif_master_lock, and may be written by the current master
50  * CPU.
51  */
52 int kaif_master_cpuid = KAIF_MASTER_CPUID_UNSET;
53 static uintptr_t kaif_master_lock = 0;
54 
55 /*
56  * Used to ensure that all CPUs leave the debugger together. kaif_loop_lock must
57  * be held to write kaif_looping, but need not be held to read it.
58  */
59 static volatile uint_t kaif_looping;
60 static uintptr_t kaif_loop_lock;
61 
62 static volatile int kaif_slave_cmd;
63 static volatile int kaif_slave_tgt;	/* target cpuid for CMD_SWITCH */
64 
65 static void
66 kaif_lock_enter(uintptr_t *lock)
67 {
68 	while (cas(lock, 0, 1) != 0)
69 		continue;
70 	membar_producer();
71 }
72 
73 static void
74 kaif_lock_exit(uintptr_t *lock)
75 {
76 	*lock = 0;
77 	membar_producer();
78 }
79 
80 static int
81 kaif_master_loop(kaif_cpusave_t *cpusave)
82 {
83 	int notflushed, i;
84 
85 #if defined(__sparc)
86 	kaif_prom_rearm();
87 #endif
88 	kaif_trap_set_debugger();
89 
90 master_loop:
91 	switch (kmdb_dpi_reenter()) {
92 	case KMDB_DPI_CMD_SWITCH_CPU:
93 		/*
94 		 * We assume that the target CPU is a valid slave.  There's no
95 		 * easy way to complain here, so we'll assume that the caller
96 		 * has done the proper checking.
97 		 */
98 		if (kmdb_dpi_switch_target == cpusave->krs_cpu_id)
99 			break;
100 
101 		kaif_slave_tgt = kaif_master_cpuid = kmdb_dpi_switch_target;
102 		cpusave->krs_cpu_state = KAIF_CPU_STATE_SLAVE;
103 		membar_producer();
104 
105 		/*
106 		 * Switch back to the saved trap table before we switch CPUs --
107 		 * we need to make sure that only one CPU is on the debugger's
108 		 * table at a time.
109 		 */
110 		kaif_trap_set_saved(cpusave);
111 
112 		kaif_slave_cmd = KAIF_SLAVE_CMD_SWITCH;
113 
114 		/* The new master is now awake */
115 		return (KAIF_CPU_CMD_SWITCH);
116 
117 	case KMDB_DPI_CMD_RESUME_ALL:
118 	case KMDB_DPI_CMD_RESUME_UNLOAD:
119 		/*
120 		 * Resume everyone, clean up for next entry.
121 		 */
122 		kaif_master_cpuid = KAIF_MASTER_CPUID_UNSET;
123 		membar_producer();
124 		kaif_slave_cmd = KAIF_SLAVE_CMD_RESUME;
125 
126 		if (kmdb_dpi_work_required())
127 			kmdb_dpi_wrintr_fire();
128 
129 		kaif_trap_set_saved(cpusave);
130 
131 		return (KAIF_CPU_CMD_RESUME);
132 
133 	case KMDB_DPI_CMD_RESUME_MASTER:
134 		/*
135 		 * Single-CPU resume, which is performed on the debugger's
136 		 * trap table (so no need to switch back).
137 		 */
138 		return (KAIF_CPU_CMD_RESUME_MASTER);
139 
140 	case KMDB_DPI_CMD_FLUSH_CACHES:
141 		kaif_slave_cmd = KAIF_SLAVE_CMD_FLUSH;
142 
143 		/*
144 		 * Wait for the other cpus to finish flushing their caches.
145 		 */
146 		do {
147 			notflushed = 0;
148 			for (i = 0; i < kaif_ncpusave; i++) {
149 				kaif_cpusave_t *save = &kaif_cpusave[i];
150 
151 				if (save->krs_cpu_state ==
152 				    KAIF_CPU_STATE_SLAVE &&
153 				    !save->krs_cpu_flushed) {
154 					notflushed++;
155 					break;
156 				}
157 			}
158 		} while (notflushed > 0);
159 
160 		kaif_slave_cmd = KAIF_SLAVE_CMD_SPIN;
161 		break;
162 
163 #if defined(__i386) || defined(__amd64)
164 	case KMDB_DPI_CMD_REBOOT:
165 		/*
166 		 * Reboot must be initiated by CPU 0.  I could ask why, but I'm
167 		 * afraid that I don't want to know the answer.
168 		 */
169 		if (cpusave->krs_cpu_id == 0)
170 			return (KAIF_CPU_CMD_REBOOT);
171 
172 		kaif_slave_cmd = KAIF_SLAVE_CMD_REBOOT;
173 
174 		/*
175 		 * Spin forever, waiting for CPU 0 (apparently a slave) to
176 		 * reboot the system.
177 		 */
178 		for (;;)
179 			continue;
180 
181 		/*NOTREACHED*/
182 		break;
183 #endif
184 	}
185 
186 	goto master_loop;
187 }
188 
189 static int
190 kaif_slave_loop(kaif_cpusave_t *cpusave)
191 {
192 	int slavecmd, rv;
193 
194 #if defined(__sparc)
195 	/*
196 	 * If the user elects to drop to OBP from the debugger, some OBP
197 	 * implementations will cross-call the slaves.  We have to turn
198 	 * IE back on so we can receive the cross-calls.  If we don't,
199 	 * some OBP implementations will wait forever.
200 	 */
201 	interrupts_on();
202 #endif
203 
204 	/* Wait for duty to call */
205 	for (;;) {
206 		slavecmd = kaif_slave_cmd;
207 
208 		if (slavecmd == KAIF_SLAVE_CMD_SWITCH &&
209 		    kaif_slave_tgt == cpusave->krs_cpu_id) {
210 			kaif_slave_cmd = KAIF_SLAVE_CMD_SPIN;
211 			cpusave->krs_cpu_state = KAIF_CPU_STATE_MASTER;
212 			rv = KAIF_CPU_CMD_SWITCH;
213 			break;
214 
215 		} else if (slavecmd == KAIF_SLAVE_CMD_FLUSH) {
216 			kmdb_kdi_flush_caches();
217 			cpusave->krs_cpu_flushed = 1;
218 			continue;
219 
220 #if defined(__i386) || defined(__amd64)
221 		} else if (slavecmd == KAIF_SLAVE_CMD_REBOOT &&
222 		    cpusave->krs_cpu_id == 0) {
223 			rv = KAIF_CPU_CMD_REBOOT;
224 			break;
225 #endif
226 
227 		} else if (slavecmd == KAIF_SLAVE_CMD_RESUME) {
228 			rv = KAIF_CPU_CMD_RESUME;
229 			break;
230 		}
231 	}
232 
233 #if defined(__sparc)
234 	interrupts_off();
235 #endif
236 
237 	return (rv);
238 }
239 
240 static void
241 kaif_select_master(kaif_cpusave_t *cpusave)
242 {
243 	kaif_lock_enter(&kaif_master_lock);
244 
245 	if (kaif_master_cpuid == KAIF_MASTER_CPUID_UNSET) {
246 		/* This is the master. */
247 		kaif_master_cpuid = cpusave->krs_cpu_id;
248 		cpusave->krs_cpu_state = KAIF_CPU_STATE_MASTER;
249 		kaif_slave_cmd = KAIF_SLAVE_CMD_SPIN;
250 
251 		membar_producer();
252 
253 		kmdb_kdi_stop_other_cpus(cpusave->krs_cpu_id,
254 		    kaif_slave_entry);
255 
256 	} else {
257 		/* The master was already chosen - go be a slave */
258 		cpusave->krs_cpu_state = KAIF_CPU_STATE_SLAVE;
259 		membar_producer();
260 	}
261 
262 	kaif_lock_exit(&kaif_master_lock);
263 }
264 
265 int
266 kaif_main_loop(kaif_cpusave_t *cpusave)
267 {
268 	int cmd;
269 
270 	if (kaif_master_cpuid == KAIF_MASTER_CPUID_UNSET) {
271 		if (!kmdb_dpi_resume_requested &&
272 		    kmdb_kdi_get_unload_request()) {
273 			/*
274 			 * Special case: Unload requested before first debugger
275 			 * entry.  Don't stop the world, as there's nothing to
276 			 * clean up that can't be handled by the running kernel.
277 			 */
278 			cpusave->krs_cpu_state = KAIF_CPU_STATE_NONE;
279 			return (KAIF_CPU_CMD_RESUME);
280 		}
281 
282 		kaif_select_master(cpusave);
283 
284 #ifdef __sparc
285 		if (kaif_master_cpuid == cpusave->krs_cpu_id) {
286 			/*
287 			 * Everyone has arrived, so we can disarm the post-PROM
288 			 * entry point.
289 			 */
290 			*kaif_promexitarmp = 0;
291 			membar_producer();
292 		}
293 #endif
294 	} else if (kaif_master_cpuid == cpusave->krs_cpu_id) {
295 		cpusave->krs_cpu_state = KAIF_CPU_STATE_MASTER;
296 	} else {
297 		cpusave->krs_cpu_state = KAIF_CPU_STATE_SLAVE;
298 	}
299 
300 	cpusave->krs_cpu_flushed = 0;
301 
302 	kaif_lock_enter(&kaif_loop_lock);
303 	kaif_looping++;
304 	kaif_lock_exit(&kaif_loop_lock);
305 
306 	/*
307 	 * We know who the master and slaves are, so now they can go off
308 	 * to their respective loops.
309 	 */
310 	do {
311 		if (kaif_master_cpuid == cpusave->krs_cpu_id)
312 			cmd = kaif_master_loop(cpusave);
313 		else
314 			cmd = kaif_slave_loop(cpusave);
315 	} while (cmd == KAIF_CPU_CMD_SWITCH);
316 
317 	kaif_lock_enter(&kaif_loop_lock);
318 	kaif_looping--;
319 	kaif_lock_exit(&kaif_loop_lock);
320 
321 	cpusave->krs_cpu_state = KAIF_CPU_STATE_NONE;
322 
323 	if (cmd == KAIF_CPU_CMD_RESUME) {
324 		/*
325 		 * By this point, the master has directed the slaves to resume,
326 		 * and everyone is making their way to this point.  We're going
327 		 * to block here until all CPUs leave the master and slave
328 		 * loops.  When all have arrived, we'll turn them all loose.
329 		 * This barrier is required for two reasons:
330 		 *
331 		 * 1. There exists a race condition whereby a CPU could reenter
332 		 *    the debugger while another CPU is still in the slave loop
333 		 *    from this debugger entry.  This usually happens when the
334 		 *    current master releases the slaves, and makes it back to
335 		 *    the world before the slaves notice the release.  The
336 		 *    former master then triggers a debugger entry, and attempts
337 		 *    to stop the slaves for this entry before they've even
338 		 *    resumed from the last one.  When the slaves arrive here,
339 		 *    they'll have re-disabled interrupts, and will thus ignore
340 		 *    cross-calls until they finish resuming.
341 		 *
342 		 * 2. At the time of this writing, there exists a SPARC bug that
343 		 *    causes an apparently unsolicited interrupt vector trap
344 		 *    from OBP to one of the slaves.  This wouldn't normally be
345 		 *    a problem but for the fact that the cross-called CPU
346 		 *    encounters some sort of failure while in OBP.  OBP
347 		 *    recovers by executing the debugger-hook word, which sends
348 		 *    the slave back into the debugger, triggering a debugger
349 		 *    fault.  This problem seems to only happen during resume,
350 		 *    the result being that all CPUs save for the cross-called
351 		 *    one make it back into the world, while the cross-called
352 		 *    one is stuck at the debugger fault prompt.  Leave the
353 		 *    world in that state too long, and you'll get a mondo
354 		 *    timeout panic.  If we hold everyone here, we can give the
355 		 *    the user a chance to trigger a panic for further analysis.
356 		 *    To trigger the bug, "pool_unlock:b :c" and "while : ; do
357 		 *    psrset -p ; done".
358 		 *
359 		 * When the second item is fixed, the barrier can move into
360 		 * kaif_select_master(), immediately prior to the setting of
361 		 * kaif_master_cpuid.
362 		 */
363 		while (kaif_looping != 0)
364 			continue;
365 	}
366 
367 	return (cmd);
368 }
369