xref: /titanic_41/usr/src/uts/i86pc/os/x_call.c (revision 7010c12ad3ac2cada55cf126121a8c46957d3632)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Facilities for cross-processor subroutine calls using "mailbox" interrupts.
30  *
31  */
32 
33 #include <sys/types.h>
34 
35 #include <sys/param.h>
36 #include <sys/t_lock.h>
37 #include <sys/thread.h>
38 #include <sys/cpuvar.h>
39 #include <sys/x_call.h>
40 #include <sys/cpu.h>
41 #include <sys/psw.h>
42 #include <sys/sunddi.h>
43 #include <sys/mmu.h>
44 #include <sys/debug.h>
45 #include <sys/systm.h>
46 #include <sys/machsystm.h>
47 #include <sys/mutex_impl.h>
48 
49 static struct	xc_mbox xc_mboxes[X_CALL_LEVELS];
50 static kmutex_t xc_mbox_lock[X_CALL_LEVELS];
51 static uint_t 	xc_xlat_xcptoipl[X_CALL_LEVELS] = {
52 	XC_LO_PIL,
53 	XC_MED_PIL,
54 	XC_HI_PIL
55 };
56 
57 static void xc_common(xc_func_t, xc_arg_t, xc_arg_t, xc_arg_t,
58     int, cpuset_t, int);
59 
60 static int	xc_initialized = 0;
61 extern cpuset_t	cpu_ready_set;
62 
63 void
64 xc_init()
65 {
66 	/*
67 	 * By making these mutexes type MUTEX_DRIVER, the ones below
68 	 * LOCK_LEVEL will be implemented as adaptive mutexes, and the
69 	 * ones above LOCK_LEVEL will be spin mutexes.
70 	 */
71 	mutex_init(&xc_mbox_lock[0], NULL, MUTEX_DRIVER,
72 	    (void *)ipltospl(XC_LO_PIL));
73 	mutex_init(&xc_mbox_lock[1], NULL, MUTEX_DRIVER,
74 	    (void *)ipltospl(XC_MED_PIL));
75 	mutex_init(&xc_mbox_lock[2], NULL, MUTEX_DRIVER,
76 	    (void *)ipltospl(XC_HI_PIL));
77 
78 	xc_initialized = 1;
79 }
80 
81 /*
82  * Used by the debugger to determine whether or not cross calls have been
83  * initialized and are safe to use.
84  */
85 int
86 kdi_xc_initialized(void)
87 {
88 	return (xc_initialized);
89 }
90 
91 #define	CAPTURE_CPU_ARG	~0UL
92 
93 /*
94  * X-call interrupt service routine.
95  *
96  * arg == X_CALL_MEDPRI	-  capture cpus.
97  *
98  * We're protected against changing CPUs by being a high-priority interrupt.
99  */
100 /*ARGSUSED*/
101 uint_t
102 xc_serv(caddr_t arg1, caddr_t arg2)
103 {
104 	int	op;
105 	int	pri = (int)(uintptr_t)arg1;
106 	struct cpu *cpup = CPU;
107 	xc_arg_t *argp;
108 	xc_arg_t arg2val;
109 	uint_t	tlbflush;
110 
111 	if (pri == X_CALL_MEDPRI) {
112 
113 		argp = &xc_mboxes[X_CALL_MEDPRI].arg2;
114 		arg2val = *argp;
115 		if (arg2val != CAPTURE_CPU_ARG &&
116 		    !CPU_IN_SET((cpuset_t)arg2val, cpup->cpu_id))
117 			return (DDI_INTR_UNCLAIMED);
118 		ASSERT(arg2val == CAPTURE_CPU_ARG);
119 		if (cpup->cpu_m.xc_pend[pri] == 0)
120 			return (DDI_INTR_UNCLAIMED);
121 
122 		cpup->cpu_m.xc_pend[X_CALL_MEDPRI] = 0;
123 		cpup->cpu_m.xc_ack[X_CALL_MEDPRI] = 1;
124 
125 		for (;;) {
126 			if ((cpup->cpu_m.xc_state[X_CALL_MEDPRI] == XC_DONE) ||
127 				(cpup->cpu_m.xc_pend[X_CALL_MEDPRI]))
128 				break;
129 			ht_pause();
130 		}
131 		return (DDI_INTR_CLAIMED);
132 	}
133 	if (cpup->cpu_m.xc_pend[pri] == 0)
134 		return (DDI_INTR_UNCLAIMED);
135 
136 	cpup->cpu_m.xc_pend[pri] = 0;
137 	op = cpup->cpu_m.xc_state[pri];
138 
139 	/*
140 	 * When invalidating TLB entries, wait until the initiator changes the
141 	 * memory PTE before doing any INVLPG. Otherwise, if the PTE in memory
142 	 * hasn't been changed, the processor's TLB Flush filter may ignore
143 	 * the INVLPG instruction.
144 	 */
145 	tlbflush = (cpup->cpu_m.xc_wait[pri] == 2);
146 
147 	/*
148 	 * Don't invoke a null function.
149 	 */
150 	if (xc_mboxes[pri].func != NULL) {
151 		if (!tlbflush)
152 			cpup->cpu_m.xc_retval[pri] = (*xc_mboxes[pri].func)
153 			    (xc_mboxes[pri].arg1, xc_mboxes[pri].arg2,
154 				xc_mboxes[pri].arg3);
155 	} else
156 		cpup->cpu_m.xc_retval[pri] = 0;
157 
158 	/*
159 	 * Acknowledge that we have completed the x-call operation.
160 	 */
161 	cpup->cpu_m.xc_ack[pri] = 1;
162 
163 	if (op == XC_CALL_OP)
164 		return (DDI_INTR_CLAIMED);
165 
166 	/*
167 	 * for (op == XC_SYNC_OP)
168 	 * Wait for the initiator of the x-call to indicate
169 	 * that all CPUs involved can proceed.
170 	 */
171 	while (cpup->cpu_m.xc_wait[pri])
172 		ht_pause();
173 
174 	while (cpup->cpu_m.xc_state[pri] != XC_DONE)
175 		ht_pause();
176 
177 	/*
178 	 * Flush the TLB, if that's what is requested.
179 	 */
180 	if (xc_mboxes[pri].func != NULL && tlbflush) {
181 		cpup->cpu_m.xc_retval[pri] = (*xc_mboxes[pri].func)
182 		    (xc_mboxes[pri].arg1, xc_mboxes[pri].arg2,
183 			xc_mboxes[pri].arg3);
184 	}
185 
186 	/*
187 	 * Acknowledge that we have received the directive to continue.
188 	 */
189 	ASSERT(cpup->cpu_m.xc_ack[pri] == 0);
190 	cpup->cpu_m.xc_ack[pri] = 1;
191 
192 	return (DDI_INTR_CLAIMED);
193 }
194 
195 
196 /*
197  * xc_do_call:
198  */
199 static void
200 xc_do_call(
201 	xc_arg_t arg1,
202 	xc_arg_t arg2,
203 	xc_arg_t arg3,
204 	int pri,
205 	cpuset_t set,
206 	xc_func_t func,
207 	int sync)
208 {
209 	/*
210 	 * If the pri indicates a low priority lock (below LOCK_LEVEL),
211 	 * we must disable preemption to avoid migrating to another CPU
212 	 * during the call.
213 	 */
214 	if (pri == X_CALL_LOPRI) {
215 		kpreempt_disable();
216 	} else {
217 		pri = X_CALL_HIPRI;
218 	}
219 
220 	/* always grab highest mutex to avoid deadlock */
221 	mutex_enter(&xc_mbox_lock[X_CALL_HIPRI]);
222 	xc_common(func, arg1, arg2, arg3, pri, set, sync);
223 	mutex_exit(&xc_mbox_lock[X_CALL_HIPRI]);
224 	if (pri == X_CALL_LOPRI)
225 		kpreempt_enable();
226 }
227 
228 
229 /*
230  * xc_call: call specified function on all processors
231  * remotes may continue after service
232  * we wait here until everybody has completed.
233  */
234 void
235 xc_call(
236 	xc_arg_t arg1,
237 	xc_arg_t arg2,
238 	xc_arg_t arg3,
239 	int pri,
240 	cpuset_t set,
241 	xc_func_t func)
242 {
243 	xc_do_call(arg1, arg2, arg3, pri, set, func, 0);
244 }
245 
246 /*
247  * xc_sync: call specified function on all processors
248  * after doing work, each remote waits until we let
249  * it continue; send the contiunue after everyone has
250  * informed us that they are done.
251  */
252 void
253 xc_sync(
254 	xc_arg_t arg1,
255 	xc_arg_t arg2,
256 	xc_arg_t arg3,
257 	int pri,
258 	cpuset_t set,
259 	xc_func_t func)
260 {
261 	xc_do_call(arg1, arg2, arg3, pri, set, func, 1);
262 }
263 
264 /*
265  * xc_sync_wait: similar to xc_sync(), except that the starting
266  * cpu waits for all other cpus to check in before running its
267  * service locally.
268  */
269 void
270 xc_wait_sync(
271 	xc_arg_t arg1,
272 	xc_arg_t arg2,
273 	xc_arg_t arg3,
274 	int pri,
275 	cpuset_t set,
276 	xc_func_t func)
277 {
278 	xc_do_call(arg1, arg2, arg3, pri, set, func, 2);
279 }
280 
281 
282 /*
283  * The routines xc_capture_cpus and xc_release_cpus
284  * can be used in place of xc_sync in order to implement a critical
285  * code section where all CPUs in the system can be controlled.
286  * xc_capture_cpus is used to start the critical code section, and
287  * xc_release_cpus is used to end the critical code section.
288  */
289 
290 /*
291  * Capture the CPUs specified in order to start a x-call session,
292  * and/or to begin a critical section.
293  */
294 void
295 xc_capture_cpus(cpuset_t set)
296 {
297 	int cix;
298 	int lcx;
299 	struct cpu *cpup;
300 	int	i;
301 	cpuset_t *cpus;
302 	cpuset_t c;
303 
304 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
305 
306 	/*
307 	 * Prevent deadlocks where we take an interrupt and are waiting
308 	 * for a mutex owned by one of the CPUs that is captured for
309 	 * the x-call, while that CPU is waiting for some x-call signal
310 	 * to be set by us.
311 	 *
312 	 * This mutex also prevents preemption, since it raises SPL above
313 	 * LOCK_LEVEL (it is a spin-type driver mutex).
314 	 */
315 	/* always grab highest mutex to avoid deadlock */
316 	mutex_enter(&xc_mbox_lock[X_CALL_HIPRI]);
317 	lcx = CPU->cpu_id;	/* now we're safe */
318 
319 	ASSERT(CPU->cpu_flags & CPU_READY);
320 
321 	/*
322 	 * Wait for all cpus
323 	 */
324 	cpus = (cpuset_t *)&xc_mboxes[X_CALL_MEDPRI].arg2;
325 	if (CPU_IN_SET(*cpus, CPU->cpu_id))
326 		CPUSET_ATOMIC_DEL(*cpus, CPU->cpu_id);
327 	for (;;) {
328 		c = *(volatile cpuset_t *)cpus;
329 		CPUSET_AND(c, cpu_ready_set);
330 		if (CPUSET_ISNULL(c))
331 			break;
332 		ht_pause();
333 	}
334 
335 	/*
336 	 * Store the set of CPUs involved in the x-call session, so that
337 	 * xc_release_cpus will know what CPUs to act upon.
338 	 */
339 	xc_mboxes[X_CALL_MEDPRI].set = set;
340 	xc_mboxes[X_CALL_MEDPRI].arg2 = CAPTURE_CPU_ARG;
341 
342 	/*
343 	 * Now capture each CPU in the set and cause it to go into a
344 	 * holding pattern.
345 	 */
346 	i = 0;
347 	for (cix = 0; cix < NCPU; cix++) {
348 		if ((cpup = cpu[cix]) == NULL ||
349 		    (cpup->cpu_flags & CPU_READY) == 0) {
350 			/*
351 			 * In case CPU wasn't ready, but becomes ready later,
352 			 * take the CPU out of the set now.
353 			 */
354 			CPUSET_DEL(set, cix);
355 			continue;
356 		}
357 		if (cix != lcx && CPU_IN_SET(set, cix)) {
358 			cpup->cpu_m.xc_ack[X_CALL_MEDPRI] = 0;
359 			cpup->cpu_m.xc_state[X_CALL_MEDPRI] = XC_HOLD;
360 			cpup->cpu_m.xc_pend[X_CALL_MEDPRI] = 1;
361 			send_dirint(cix, XC_MED_PIL);
362 		}
363 		i++;
364 		if (i >= ncpus)
365 			break;
366 	}
367 
368 	/*
369 	 * Wait here until all remote calls to complete.
370 	 */
371 	i = 0;
372 	for (cix = 0; cix < NCPU; cix++) {
373 		if (lcx != cix && CPU_IN_SET(set, cix)) {
374 			cpup = cpu[cix];
375 			while (cpup->cpu_m.xc_ack[X_CALL_MEDPRI] == 0)
376 				ht_pause();
377 			cpup->cpu_m.xc_ack[X_CALL_MEDPRI] = 0;
378 		}
379 		i++;
380 		if (i >= ncpus)
381 			break;
382 	}
383 
384 }
385 
386 /*
387  * Release the CPUs captured by xc_capture_cpus, thus terminating the
388  * x-call session and exiting the critical section.
389  */
390 void
391 xc_release_cpus(void)
392 {
393 	int cix;
394 	int lcx = (int)(CPU->cpu_id);
395 	cpuset_t set = xc_mboxes[X_CALL_MEDPRI].set;
396 	struct cpu *cpup;
397 	int	i;
398 
399 	ASSERT(MUTEX_HELD(&xc_mbox_lock[X_CALL_HIPRI]));
400 
401 	/*
402 	 * Allow each CPU to exit its holding pattern.
403 	 */
404 	i = 0;
405 	for (cix = 0; cix < NCPU; cix++) {
406 		if ((cpup = cpu[cix]) == NULL)
407 			continue;
408 		if ((cpup->cpu_flags & CPU_READY) &&
409 		    (cix != lcx) && CPU_IN_SET(set, cix)) {
410 			/*
411 			 * Clear xc_ack since we will be waiting for it
412 			 * to be set again after we set XC_DONE.
413 			 */
414 			cpup->cpu_m.xc_state[X_CALL_MEDPRI] = XC_DONE;
415 		}
416 		i++;
417 		if (i >= ncpus)
418 			break;
419 	}
420 
421 	xc_mboxes[X_CALL_MEDPRI].arg2 = 0;
422 	mutex_exit(&xc_mbox_lock[X_CALL_HIPRI]);
423 }
424 
425 /*
426  * Common code to call a specified function on a set of processors.
427  * sync specifies what kind of waiting is done.
428  *	-1 - no waiting, don't release remotes
429  *	0 - no waiting, release remotes immediately
430  *	1 - run service locally w/o waiting for remotes.
431  *	2 - wait for remotes before running locally
432  */
433 static void
434 xc_common(
435 	xc_func_t func,
436 	xc_arg_t arg1,
437 	xc_arg_t arg2,
438 	xc_arg_t arg3,
439 	int pri,
440 	cpuset_t set,
441 	int sync)
442 {
443 	int cix;
444 	int lcx = (int)(CPU->cpu_id);
445 	struct cpu *cpup;
446 
447 	ASSERT(panicstr == NULL);
448 
449 	ASSERT(MUTEX_HELD(&xc_mbox_lock[X_CALL_HIPRI]));
450 	ASSERT(CPU->cpu_flags & CPU_READY);
451 
452 	/*
453 	 * Set up the service definition mailbox.
454 	 */
455 	xc_mboxes[pri].func = func;
456 	xc_mboxes[pri].arg1 = arg1;
457 	xc_mboxes[pri].arg2 = arg2;
458 	xc_mboxes[pri].arg3 = arg3;
459 
460 	/*
461 	 * Request service on all remote processors.
462 	 */
463 	for (cix = 0; cix < NCPU; cix++) {
464 		if ((cpup = cpu[cix]) == NULL ||
465 		    (cpup->cpu_flags & CPU_READY) == 0) {
466 			/*
467 			 * In case the non-local CPU is not ready but becomes
468 			 * ready later, take it out of the set now. The local
469 			 * CPU needs to remain in the set to complete the
470 			 * requested function.
471 			 */
472 			if (cix != lcx)
473 				CPUSET_DEL(set, cix);
474 		} else if (cix != lcx && CPU_IN_SET(set, cix)) {
475 			CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
476 			cpup->cpu_m.xc_ack[pri] = 0;
477 			cpup->cpu_m.xc_wait[pri] = sync;
478 			if (sync > 0)
479 				cpup->cpu_m.xc_state[pri] = XC_SYNC_OP;
480 			else
481 				cpup->cpu_m.xc_state[pri] = XC_CALL_OP;
482 			cpup->cpu_m.xc_pend[pri] = 1;
483 			send_dirint(cix, xc_xlat_xcptoipl[pri]);
484 		}
485 	}
486 
487 	/*
488 	 * Run service locally if not waiting for remotes.
489 	 */
490 	if (sync != 2 && CPU_IN_SET(set, lcx) && func != NULL)
491 		CPU->cpu_m.xc_retval[pri] = (*func)(arg1, arg2, arg3);
492 
493 	if (sync == -1)
494 		return;
495 
496 	/*
497 	 * Wait here until all remote calls complete.
498 	 */
499 	for (cix = 0; cix < NCPU; cix++) {
500 		if (lcx != cix && CPU_IN_SET(set, cix)) {
501 			cpup = cpu[cix];
502 			while (cpup->cpu_m.xc_ack[pri] == 0)
503 				ht_pause();
504 			cpup->cpu_m.xc_ack[pri] = 0;
505 		}
506 	}
507 
508 	/*
509 	 * Run service locally if waiting for remotes.
510 	 */
511 	if (sync == 2 && CPU_IN_SET(set, lcx) && func != NULL)
512 		CPU->cpu_m.xc_retval[pri] = (*func)(arg1, arg2, arg3);
513 
514 	if (sync == 0)
515 		return;
516 
517 	/*
518 	 * Release any waiting CPUs
519 	 */
520 	for (cix = 0; cix < NCPU; cix++) {
521 		if (lcx != cix && CPU_IN_SET(set, cix)) {
522 			cpup = cpu[cix];
523 			if (cpup != NULL && (cpup->cpu_flags & CPU_READY)) {
524 				cpup->cpu_m.xc_wait[pri] = 0;
525 				cpup->cpu_m.xc_state[pri] = XC_DONE;
526 			}
527 		}
528 	}
529 
530 	/*
531 	 * Wait for all CPUs to acknowledge completion before we continue.
532 	 * Without this check it's possible (on a VM or hyper-threaded CPUs
533 	 * or in the presence of Service Management Interrupts which can all
534 	 * cause delays) for the remote processor to still be waiting by
535 	 * the time xc_common() is next invoked with the sync flag set
536 	 * resulting in a deadlock.
537 	 */
538 	for (cix = 0; cix < NCPU; cix++) {
539 		if (lcx != cix && CPU_IN_SET(set, cix)) {
540 			cpup = cpu[cix];
541 			if (cpup != NULL && (cpup->cpu_flags & CPU_READY)) {
542 				while (cpup->cpu_m.xc_ack[pri] == 0)
543 					ht_pause();
544 				cpup->cpu_m.xc_ack[pri] = 0;
545 			}
546 		}
547 	}
548 }
549 
550 /*
551  * xc_trycall: attempt to call specified function on all processors
552  * remotes may wait for a long time
553  * we continue immediately
554  */
555 void
556 xc_trycall(
557 	xc_arg_t arg1,
558 	xc_arg_t arg2,
559 	xc_arg_t arg3,
560 	cpuset_t set,
561 	xc_func_t func)
562 {
563 	int		save_kernel_preemption;
564 	extern int	IGNORE_KERNEL_PREEMPTION;
565 
566 	/*
567 	 * If we can grab the mutex, we'll do the cross-call.  If not -- if
568 	 * someone else is already doing a cross-call -- we won't.
569 	 */
570 
571 	save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
572 	IGNORE_KERNEL_PREEMPTION = 1;
573 	if (mutex_tryenter(&xc_mbox_lock[X_CALL_HIPRI])) {
574 		xc_common(func, arg1, arg2, arg3, X_CALL_HIPRI, set, -1);
575 		mutex_exit(&xc_mbox_lock[X_CALL_HIPRI]);
576 	}
577 	IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
578 }
579 
580 /*
581  * Used by the debugger to cross-call the other CPUs, thus causing them to
582  * enter the debugger.  We can't hold locks, so we spin on the cross-call
583  * lock until we get it.  When we get it, we send the cross-call, and assume
584  * that we successfully stopped the other CPUs.
585  */
586 void
587 kdi_xc_others(int this_cpu, void (*func)(void))
588 {
589 	extern int	IGNORE_KERNEL_PREEMPTION;
590 	int save_kernel_preemption;
591 	mutex_impl_t *lp;
592 	cpuset_t set;
593 	int x;
594 
595 	CPUSET_ALL_BUT(set, this_cpu);
596 
597 	save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
598 	IGNORE_KERNEL_PREEMPTION = 1;
599 
600 	lp = (mutex_impl_t *)&xc_mbox_lock[X_CALL_HIPRI];
601 	for (x = 0; x < 0x400000; x++) {
602 		if (lock_spin_try(&lp->m_spin.m_spinlock)) {
603 			xc_common((xc_func_t)func, 0, 0, 0, X_CALL_HIPRI,
604 			    set, -1);
605 			lp->m_spin.m_spinlock = 0; /* XXX */
606 			break;
607 		}
608 		(void) xc_serv((caddr_t)X_CALL_MEDPRI, NULL);
609 	}
610 	IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
611 }
612