xref: /illumos-gate/usr/src/uts/i86pc/os/x_call.c (revision d327dbeacda682ba3d4efc9b451baa429ba8830c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright (c) 2010, Intel Corporation.
27  * All rights reserved.
28  * Copyright 2018 Joyent, Inc.
29  */
30 
31 #include <sys/types.h>
32 #include <sys/param.h>
33 #include <sys/t_lock.h>
34 #include <sys/thread.h>
35 #include <sys/cpuvar.h>
36 #include <sys/x_call.h>
37 #include <sys/xc_levels.h>
38 #include <sys/cpu.h>
39 #include <sys/psw.h>
40 #include <sys/sunddi.h>
41 #include <sys/debug.h>
42 #include <sys/systm.h>
43 #include <sys/archsystm.h>
44 #include <sys/machsystm.h>
45 #include <sys/mutex_impl.h>
46 #include <sys/stack.h>
47 #include <sys/promif.h>
48 #include <sys/x86_archext.h>
49 
50 /*
51  * Implementation for cross-processor calls via interprocessor interrupts
52  *
53  * This implementation uses a message passing architecture to allow multiple
54  * concurrent cross calls to be in flight at any given time. We use the cmpxchg
55  * instruction, aka atomic_cas_ptr(), to implement simple efficient work
56  * queues for message passing between CPUs with almost no need for regular
57  * locking.  See xc_extract() and xc_insert() below.
58  *
59  * The general idea is that initiating a cross call means putting a message
60  * on a target(s) CPU's work queue. Any synchronization is handled by passing
61  * the message back and forth between initiator and target(s).
62  *
63  * Every CPU has xc_work_cnt, which indicates it has messages to process.
64  * This value is incremented as message traffic is initiated and decremented
65  * with every message that finishes all processing.
66  *
67  * The code needs no mfence or other membar_*() calls. The uses of
68  * atomic_cas_ptr(), atomic_cas_32() and atomic_dec_32() for the message
69  * passing are implemented with LOCK prefix instructions which are
70  * equivalent to mfence.
71  *
72  * One interesting aspect of this implmentation is that it allows 2 or more
73  * CPUs to initiate cross calls to intersecting sets of CPUs at the same time.
74  * The cross call processing by the CPUs will happen in any order with only
75  * a guarantee, for xc_call() and xc_sync(), that an initiator won't return
76  * from cross calls before all slaves have invoked the function.
77  *
78  * The reason for this asynchronous approach is to allow for fast global
79  * TLB shootdowns. If all CPUs, say N, tried to do a global TLB invalidation
80  * on a different Virtual Address at the same time. The old code required
81  * N squared IPIs. With this method, depending on timing, it could happen
82  * with just N IPIs.
83  *
84  * Here are the normal transitions for XC_MSG_* values in ->xc_command. A
85  * transition of "->" happens in the slave cpu and "=>" happens in the master
86  * cpu as the messages are passed back and forth.
87  *
88  * FREE => ASYNC ->                       DONE => FREE
89  * FREE => CALL ->                        DONE => FREE
90  * FREE => SYNC -> WAITING => RELEASED -> DONE => FREE
91  *
92  * The interesting one above is ASYNC. You might ask, why not go directly
93  * to FREE, instead of DONE? If it did that, it might be possible to exhaust
94  * the master's xc_free list if a master can generate ASYNC messages faster
95  * then the slave can process them. That could be handled with more complicated
96  * handling. However since nothing important uses ASYNC, I've not bothered.
97  */
98 
99 /*
100  * The default is to not enable collecting counts of IPI information, since
101  * the updating of shared cachelines could cause excess bus traffic.
102  */
103 uint_t xc_collect_enable = 0;
104 uint64_t xc_total_cnt = 0;	/* total #IPIs sent for cross calls */
105 uint64_t xc_multi_cnt = 0;	/* # times we piggy backed on another IPI */
106 
107 /*
108  * We allow for one high priority message at a time to happen in the system.
109  * This is used for panic, kmdb, etc., so no locking is done.
110  */
111 static volatile cpuset_t xc_priority_set_store;
112 static volatile ulong_t *xc_priority_set = CPUSET2BV(xc_priority_set_store);
113 static xc_data_t xc_priority_data;
114 
115 /*
116  * Decrement a CPU's work count
117  */
118 static void
119 xc_decrement(struct machcpu *mcpu)
120 {
121 	atomic_dec_32(&mcpu->xc_work_cnt);
122 }
123 
124 /*
125  * Increment a CPU's work count and return the old value
126  */
127 static int
128 xc_increment(struct machcpu *mcpu)
129 {
130 	int old;
131 	do {
132 		old = mcpu->xc_work_cnt;
133 	} while (atomic_cas_32(&mcpu->xc_work_cnt, old, old + 1) != old);
134 	return (old);
135 }
136 
137 /*
138  * Put a message into a queue. The insertion is atomic no matter
139  * how many different inserts/extracts to the same queue happen.
140  */
141 static void
142 xc_insert(void *queue, xc_msg_t *msg)
143 {
144 	xc_msg_t *old_head;
145 
146 	/*
147 	 * FREE messages should only ever be getting inserted into
148 	 * the xc_master CPUs xc_free queue.
149 	 */
150 	ASSERT(msg->xc_command != XC_MSG_FREE ||
151 	    cpu[msg->xc_master] == NULL || /* possible only during init */
152 	    queue == &cpu[msg->xc_master]->cpu_m.xc_free);
153 
154 	do {
155 		old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue;
156 		msg->xc_next = old_head;
157 	} while (atomic_cas_ptr(queue, old_head, msg) != old_head);
158 }
159 
160 /*
161  * Extract a message from a queue. The extraction is atomic only
162  * when just one thread does extractions from the queue.
163  * If the queue is empty, NULL is returned.
164  */
165 static xc_msg_t *
166 xc_extract(xc_msg_t **queue)
167 {
168 	xc_msg_t *old_head;
169 
170 	do {
171 		old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue;
172 		if (old_head == NULL)
173 			return (old_head);
174 	} while (atomic_cas_ptr(queue, old_head, old_head->xc_next) !=
175 	    old_head);
176 	old_head->xc_next = NULL;
177 	return (old_head);
178 }
179 
180 /*
181  * Extract the next message from the CPU's queue, and place the message in
182  * .xc_curmsg.  The latter is solely to make debugging (and ::xcall) more
183  * useful.
184  */
185 static xc_msg_t *
186 xc_get(void)
187 {
188 	struct machcpu *mcpup = &CPU->cpu_m;
189 	xc_msg_t *msg = xc_extract(&mcpup->xc_msgbox);
190 	mcpup->xc_curmsg = msg;
191 	return (msg);
192 }
193 
194 /*
195  * Initialize the machcpu fields used for cross calls
196  */
197 static uint_t xc_initialized = 0;
198 
199 void
200 xc_init_cpu(struct cpu *cpup)
201 {
202 	xc_msg_t *msg;
203 	int c;
204 
205 	/*
206 	 * Allocate message buffers for the new CPU.
207 	 */
208 	for (c = 0; c < max_ncpus; ++c) {
209 		if (plat_dr_support_cpu()) {
210 			/*
211 			 * Allocate a message buffer for every CPU possible
212 			 * in system, including our own, and add them to our xc
213 			 * message queue.
214 			 */
215 			msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
216 			msg->xc_command = XC_MSG_FREE;
217 			msg->xc_master = cpup->cpu_id;
218 			xc_insert(&cpup->cpu_m.xc_free, msg);
219 		} else if (cpu[c] != NULL && cpu[c] != cpup) {
220 			/*
221 			 * Add a new message buffer to each existing CPU's free
222 			 * list, as well as one for my list for each of them.
223 			 * Note: cpu0 is statically inserted into cpu[] array,
224 			 * so need to check cpu[c] isn't cpup itself to avoid
225 			 * allocating extra message buffers for cpu0.
226 			 */
227 			msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
228 			msg->xc_command = XC_MSG_FREE;
229 			msg->xc_master = c;
230 			xc_insert(&cpu[c]->cpu_m.xc_free, msg);
231 
232 			msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
233 			msg->xc_command = XC_MSG_FREE;
234 			msg->xc_master = cpup->cpu_id;
235 			xc_insert(&cpup->cpu_m.xc_free, msg);
236 		}
237 	}
238 
239 	if (!plat_dr_support_cpu()) {
240 		/*
241 		 * Add one for self messages if CPU hotplug is disabled.
242 		 */
243 		msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
244 		msg->xc_command = XC_MSG_FREE;
245 		msg->xc_master = cpup->cpu_id;
246 		xc_insert(&cpup->cpu_m.xc_free, msg);
247 	}
248 
249 	if (!xc_initialized)
250 		xc_initialized = 1;
251 }
252 
253 void
254 xc_fini_cpu(struct cpu *cpup)
255 {
256 	xc_msg_t *msg;
257 
258 	ASSERT((cpup->cpu_flags & CPU_READY) == 0);
259 	ASSERT(cpup->cpu_m.xc_msgbox == NULL);
260 	ASSERT(cpup->cpu_m.xc_work_cnt == 0);
261 
262 	while ((msg = xc_extract(&cpup->cpu_m.xc_free)) != NULL) {
263 		kmem_free(msg, sizeof (*msg));
264 	}
265 }
266 
267 #define	XC_FLUSH_MAX_WAITS		1000
268 
269 /* Flush inflight message buffers. */
270 int
271 xc_flush_cpu(struct cpu *cpup)
272 {
273 	int i;
274 
275 	ASSERT((cpup->cpu_flags & CPU_READY) == 0);
276 
277 	/*
278 	 * Pause all working CPUs, which ensures that there's no CPU in
279 	 * function xc_common().
280 	 * This is used to work around a race condition window in xc_common()
281 	 * between checking CPU_READY flag and increasing working item count.
282 	 */
283 	pause_cpus(cpup, NULL);
284 	start_cpus();
285 
286 	for (i = 0; i < XC_FLUSH_MAX_WAITS; i++) {
287 		if (cpup->cpu_m.xc_work_cnt == 0) {
288 			break;
289 		}
290 		DELAY(1);
291 	}
292 	for (; i < XC_FLUSH_MAX_WAITS; i++) {
293 		if (!BT_TEST(xc_priority_set, cpup->cpu_id)) {
294 			break;
295 		}
296 		DELAY(1);
297 	}
298 
299 	return (i >= XC_FLUSH_MAX_WAITS ? ETIME : 0);
300 }
301 
302 /*
303  * X-call message processing routine. Note that this is used by both
304  * senders and recipients of messages.
305  *
306  * We're protected against changing CPUs by either being in a high-priority
307  * interrupt, having preemption disabled or by having a raised SPL.
308  */
309 /*ARGSUSED*/
310 uint_t
311 xc_serv(caddr_t arg1, caddr_t arg2)
312 {
313 	struct machcpu *mcpup = &(CPU->cpu_m);
314 	xc_msg_t *msg;
315 	xc_data_t *data;
316 	xc_msg_t *xc_waiters = NULL;
317 	uint32_t num_waiting = 0;
318 	xc_func_t func;
319 	xc_arg_t a1;
320 	xc_arg_t a2;
321 	xc_arg_t a3;
322 	uint_t rc = DDI_INTR_UNCLAIMED;
323 
324 	while (mcpup->xc_work_cnt != 0) {
325 		rc = DDI_INTR_CLAIMED;
326 
327 		/*
328 		 * We may have to wait for a message to arrive.
329 		 */
330 		for (msg = NULL; msg == NULL; msg = xc_get()) {
331 
332 			/*
333 			 * Alway check for and handle a priority message.
334 			 */
335 			if (BT_TEST(xc_priority_set, CPU->cpu_id)) {
336 				func = xc_priority_data.xc_func;
337 				a1 = xc_priority_data.xc_a1;
338 				a2 = xc_priority_data.xc_a2;
339 				a3 = xc_priority_data.xc_a3;
340 				BT_ATOMIC_CLEAR(xc_priority_set, CPU->cpu_id);
341 				xc_decrement(mcpup);
342 				func(a1, a2, a3);
343 				if (mcpup->xc_work_cnt == 0)
344 					return (rc);
345 			}
346 
347 			/*
348 			 * wait for a message to arrive
349 			 */
350 			SMT_PAUSE();
351 		}
352 
353 
354 		/*
355 		 * process the message
356 		 */
357 		switch (msg->xc_command) {
358 
359 		/*
360 		 * ASYNC gives back the message immediately, then we do the
361 		 * function and return with no more waiting.
362 		 */
363 		case XC_MSG_ASYNC:
364 			data = &cpu[msg->xc_master]->cpu_m.xc_data;
365 			func = data->xc_func;
366 			a1 = data->xc_a1;
367 			a2 = data->xc_a2;
368 			a3 = data->xc_a3;
369 			msg->xc_command = XC_MSG_DONE;
370 			xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
371 			if (func != NULL)
372 				(void) (*func)(a1, a2, a3);
373 			xc_decrement(mcpup);
374 			break;
375 
376 		/*
377 		 * SYNC messages do the call, then send it back to the master
378 		 * in WAITING mode
379 		 */
380 		case XC_MSG_SYNC:
381 			data = &cpu[msg->xc_master]->cpu_m.xc_data;
382 			if (data->xc_func != NULL)
383 				(void) (*data->xc_func)(data->xc_a1,
384 				    data->xc_a2, data->xc_a3);
385 			msg->xc_command = XC_MSG_WAITING;
386 			xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
387 			break;
388 
389 		/*
390 		 * WAITING messsages are collected by the master until all
391 		 * have arrived. Once all arrive, we release them back to
392 		 * the slaves
393 		 */
394 		case XC_MSG_WAITING:
395 			xc_insert(&xc_waiters, msg);
396 			if (++num_waiting < mcpup->xc_wait_cnt)
397 				break;
398 			while ((msg = xc_extract(&xc_waiters)) != NULL) {
399 				msg->xc_command = XC_MSG_RELEASED;
400 				xc_insert(&cpu[msg->xc_slave]->cpu_m.xc_msgbox,
401 				    msg);
402 				--num_waiting;
403 			}
404 			if (num_waiting != 0)
405 				panic("wrong number waiting");
406 			mcpup->xc_wait_cnt = 0;
407 			break;
408 
409 		/*
410 		 * CALL messages do the function and then, like RELEASE,
411 		 * send the message is back to master as DONE.
412 		 */
413 		case XC_MSG_CALL:
414 			data = &cpu[msg->xc_master]->cpu_m.xc_data;
415 			if (data->xc_func != NULL)
416 				(void) (*data->xc_func)(data->xc_a1,
417 				    data->xc_a2, data->xc_a3);
418 			/*FALLTHROUGH*/
419 		case XC_MSG_RELEASED:
420 			msg->xc_command = XC_MSG_DONE;
421 			xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
422 			xc_decrement(mcpup);
423 			break;
424 
425 		/*
426 		 * DONE means a slave has completely finished up.
427 		 * Once we collect all the DONE messages, we'll exit
428 		 * processing too.
429 		 */
430 		case XC_MSG_DONE:
431 			msg->xc_command = XC_MSG_FREE;
432 			xc_insert(&mcpup->xc_free, msg);
433 			xc_decrement(mcpup);
434 			break;
435 
436 		case XC_MSG_FREE:
437 			panic("free message 0x%p in msgbox", (void *)msg);
438 			break;
439 
440 		default:
441 			panic("bad message 0x%p in msgbox", (void *)msg);
442 			break;
443 		}
444 
445 		CPU->cpu_m.xc_curmsg = NULL;
446 	}
447 	return (rc);
448 }
449 
450 /*
451  * Initiate cross call processing.
452  */
453 static void
454 xc_common(
455 	xc_func_t func,
456 	xc_arg_t arg1,
457 	xc_arg_t arg2,
458 	xc_arg_t arg3,
459 	ulong_t *set,
460 	uint_t command)
461 {
462 	int c;
463 	struct cpu *cpup;
464 	xc_msg_t *msg;
465 	xc_data_t *data;
466 	int cnt;
467 	int save_spl;
468 
469 	if (!xc_initialized) {
470 		if (BT_TEST(set, CPU->cpu_id) && (CPU->cpu_flags & CPU_READY) &&
471 		    func != NULL)
472 			(void) (*func)(arg1, arg2, arg3);
473 		return;
474 	}
475 
476 	save_spl = splr(ipltospl(XC_HI_PIL));
477 
478 	/*
479 	 * fill in cross call data
480 	 */
481 	data = &CPU->cpu_m.xc_data;
482 	data->xc_func = func;
483 	data->xc_a1 = arg1;
484 	data->xc_a2 = arg2;
485 	data->xc_a3 = arg3;
486 
487 	/*
488 	 * Post messages to all CPUs involved that are CPU_READY
489 	 */
490 	CPU->cpu_m.xc_wait_cnt = 0;
491 	for (c = 0; c < max_ncpus; ++c) {
492 		if (!BT_TEST(set, c))
493 			continue;
494 		cpup = cpu[c];
495 		if (cpup == NULL || !(cpup->cpu_flags & CPU_READY))
496 			continue;
497 
498 		/*
499 		 * Fill out a new message.
500 		 */
501 		msg = xc_extract(&CPU->cpu_m.xc_free);
502 		if (msg == NULL)
503 			panic("Ran out of free xc_msg_t's");
504 		msg->xc_command = command;
505 		if (msg->xc_master != CPU->cpu_id)
506 			panic("msg %p has wrong xc_master", (void *)msg);
507 		msg->xc_slave = c;
508 
509 		/*
510 		 * Increment my work count for all messages that I'll
511 		 * transition from DONE to FREE.
512 		 * Also remember how many XC_MSG_WAITINGs to look for
513 		 */
514 		(void) xc_increment(&CPU->cpu_m);
515 		if (command == XC_MSG_SYNC)
516 			++CPU->cpu_m.xc_wait_cnt;
517 
518 		/*
519 		 * Increment the target CPU work count then insert the message
520 		 * in the target msgbox. If I post the first bit of work
521 		 * for the target to do, send an IPI to the target CPU.
522 		 */
523 		cnt = xc_increment(&cpup->cpu_m);
524 		xc_insert(&cpup->cpu_m.xc_msgbox, msg);
525 		if (cpup != CPU) {
526 			if (cnt == 0) {
527 				CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
528 				send_dirint(c, XC_HI_PIL);
529 				if (xc_collect_enable)
530 					++xc_total_cnt;
531 			} else if (xc_collect_enable) {
532 				++xc_multi_cnt;
533 			}
534 		}
535 	}
536 
537 	/*
538 	 * Now drop into the message handler until all work is done
539 	 */
540 	(void) xc_serv(NULL, NULL);
541 	splx(save_spl);
542 }
543 
544 /*
545  * Push out a priority cross call.
546  */
547 static void
548 xc_priority_common(
549 	xc_func_t func,
550 	xc_arg_t arg1,
551 	xc_arg_t arg2,
552 	xc_arg_t arg3,
553 	ulong_t *set)
554 {
555 	int i;
556 	int c;
557 	struct cpu *cpup;
558 
559 	/*
560 	 * Wait briefly for any previous xc_priority to have finished.
561 	 */
562 	for (c = 0; c < max_ncpus; ++c) {
563 		cpup = cpu[c];
564 		if (cpup == NULL || !(cpup->cpu_flags & CPU_READY))
565 			continue;
566 
567 		/*
568 		 * The value of 40000 here is from old kernel code. It
569 		 * really should be changed to some time based value, since
570 		 * under a hypervisor, there's no guarantee a remote CPU
571 		 * is even scheduled.
572 		 */
573 		for (i = 0; BT_TEST(xc_priority_set, c) && i < 40000; ++i)
574 			SMT_PAUSE();
575 
576 		/*
577 		 * Some CPU did not respond to a previous priority request. It's
578 		 * probably deadlocked with interrupts blocked or some such
579 		 * problem. We'll just erase the previous request - which was
580 		 * most likely a kmdb_enter that has already expired - and plow
581 		 * ahead.
582 		 */
583 		if (BT_TEST(xc_priority_set, c)) {
584 			BT_ATOMIC_CLEAR(xc_priority_set, c);
585 			if (cpup->cpu_m.xc_work_cnt > 0)
586 				xc_decrement(&cpup->cpu_m);
587 		}
588 	}
589 
590 	/*
591 	 * fill in cross call data
592 	 */
593 	xc_priority_data.xc_func = func;
594 	xc_priority_data.xc_a1 = arg1;
595 	xc_priority_data.xc_a2 = arg2;
596 	xc_priority_data.xc_a3 = arg3;
597 
598 	/*
599 	 * Post messages to all CPUs involved that are CPU_READY
600 	 * We'll always IPI, plus bang on the xc_msgbox for i86_mwait()
601 	 */
602 	for (c = 0; c < max_ncpus; ++c) {
603 		if (!BT_TEST(set, c))
604 			continue;
605 		cpup = cpu[c];
606 		if (cpup == NULL || !(cpup->cpu_flags & CPU_READY) ||
607 		    cpup == CPU)
608 			continue;
609 		(void) xc_increment(&cpup->cpu_m);
610 		BT_ATOMIC_SET(xc_priority_set, c);
611 		send_dirint(c, XC_HI_PIL);
612 		for (i = 0; i < 10; ++i) {
613 			(void) atomic_cas_ptr(&cpup->cpu_m.xc_msgbox,
614 			    cpup->cpu_m.xc_msgbox, cpup->cpu_m.xc_msgbox);
615 		}
616 	}
617 }
618 
619 /*
620  * Do cross call to all other CPUs with absolutely no waiting or handshaking.
621  * This should only be used for extraordinary operations, like panic(), which
622  * need to work, in some fashion, in a not completely functional system.
623  * All other uses that want minimal waiting should use xc_call_nowait().
624  */
625 void
626 xc_priority(
627 	xc_arg_t arg1,
628 	xc_arg_t arg2,
629 	xc_arg_t arg3,
630 	ulong_t *set,
631 	xc_func_t func)
632 {
633 	extern int IGNORE_KERNEL_PREEMPTION;
634 	int save_spl = splr(ipltospl(XC_HI_PIL));
635 	int save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
636 
637 	IGNORE_KERNEL_PREEMPTION = 1;
638 	xc_priority_common((xc_func_t)func, arg1, arg2, arg3, set);
639 	IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
640 	splx(save_spl);
641 }
642 
643 /*
644  * Wrapper for kmdb to capture other CPUs, causing them to enter the debugger.
645  */
646 void
647 kdi_xc_others(int this_cpu, void (*func)(void))
648 {
649 	extern int IGNORE_KERNEL_PREEMPTION;
650 	int save_kernel_preemption;
651 	cpuset_t set;
652 
653 	if (!xc_initialized)
654 		return;
655 
656 	save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
657 	IGNORE_KERNEL_PREEMPTION = 1;
658 	CPUSET_ALL_BUT(set, this_cpu);
659 	xc_priority_common((xc_func_t)func, 0, 0, 0, CPUSET2BV(set));
660 	IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
661 }
662 
663 
664 
665 /*
666  * Invoke function on specified processors. Remotes may continue after
667  * service with no waiting. xc_call_nowait() may return immediately too.
668  */
669 void
670 xc_call_nowait(
671 	xc_arg_t arg1,
672 	xc_arg_t arg2,
673 	xc_arg_t arg3,
674 	ulong_t *set,
675 	xc_func_t func)
676 {
677 	xc_common(func, arg1, arg2, arg3, set, XC_MSG_ASYNC);
678 }
679 
680 /*
681  * Invoke function on specified processors. Remotes may continue after
682  * service with no waiting. xc_call() returns only after remotes have finished.
683  */
684 void
685 xc_call(
686 	xc_arg_t arg1,
687 	xc_arg_t arg2,
688 	xc_arg_t arg3,
689 	ulong_t *set,
690 	xc_func_t func)
691 {
692 	xc_common(func, arg1, arg2, arg3, set, XC_MSG_CALL);
693 }
694 
695 /*
696  * Invoke function on specified processors. Remotes wait until all have
697  * finished. xc_sync() also waits until all remotes have finished.
698  */
699 void
700 xc_sync(
701 	xc_arg_t arg1,
702 	xc_arg_t arg2,
703 	xc_arg_t arg3,
704 	ulong_t *set,
705 	xc_func_t func)
706 {
707 	xc_common(func, arg1, arg2, arg3, set, XC_MSG_SYNC);
708 }
709