xref: /titanic_52/usr/src/uts/sun4/os/x_call.c (revision 50c83d09652262aba75a6182b3203c80b48b092b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/systm.h>
30 #include <sys/archsystm.h>
31 #include <sys/machsystm.h>
32 #include <sys/cpuvar.h>
33 #include <sys/intreg.h>
34 #include <sys/x_call.h>
35 #include <sys/cmn_err.h>
36 #include <sys/membar.h>
37 #include <sys/disp.h>
38 #include <sys/debug.h>
39 #include <sys/privregs.h>
40 #include <sys/xc_impl.h>
41 #include <sys/ivintr.h>
42 #include <sys/dmv.h>
43 #include <sys/sysmacros.h>
44 
45 #ifdef DEBUG
46 uint_t x_dstat[NCPU][XC_LOOP_EXIT+1];
47 uint_t x_rstat[NCPU][4];
48 #endif /* DEBUG */
49 
50 static int xc_serv_inum;	/* software interrupt number for xc_serv() */
51 static int xc_loop_inum;	/* software interrupt number for xc_loop() */
52 kmutex_t xc_sys_mutex;		/* protect xcall session and xc_mbox */
53 int xc_spl_enter[NCPU];		/* protect sending x-call */
54 static int xc_holder = -1; /* the cpu who initiates xc_attention, 0 is valid */
55 
56 /*
57  * Mail box for handshaking and xcall request; protected by xc_sys_mutex
58  */
59 static struct xc_mbox {
60 	xcfunc_t *xc_func;
61 	uint64_t xc_arg1;
62 	uint64_t xc_arg2;
63 	cpuset_t xc_cpuset;
64 	volatile uint_t	xc_state;
65 } xc_mbox[NCPU];
66 
67 uint64_t xc_tick_limit;		/* send_mondo() tick limit value */
68 uint64_t xc_tick_limit_scale = 1;	/* scale used to increase the limit */
69 uint64_t xc_tick_jump_limit;	/* send_mondo() irregular tick jump limit */
70 
71 /* timeout value for xcalls to be received by the target CPU */
72 uint64_t xc_mondo_time_limit;
73 
74 /* timeout value for xcall functions to be executed on the target CPU */
75 uint64_t xc_func_time_limit;
76 
77 uint64_t xc_scale = 1;	/* scale used to calculate timeout limits */
78 uint64_t xc_mondo_multiplier = 10;
79 
80 uint_t sendmondo_in_recover;
81 
82 /*
83  * sending x-calls
84  */
85 void	init_mondo(xcfunc_t *func, uint64_t arg1, uint64_t arg2);
86 void	send_one_mondo(int cpuid);
87 void	send_mondo_set(cpuset_t set);
88 
89 /*
90  * Adjust xc_attention timeout if a faster cpu is dynamically added.
91  * Ignore the dynamic removal of a cpu that would lower these timeout
92  * values.
93  */
94 static int
95 xc_func_timeout_adj(cpu_setup_t what, int cpuid) {
96 	uint64_t freq = cpunodes[cpuid].clock_freq;
97 
98 	switch (what) {
99 	case CPU_ON:
100 	case CPU_INIT:
101 	case CPU_CONFIG:
102 	case CPU_CPUPART_IN:
103 		if (freq * xc_scale > xc_mondo_time_limit) {
104 			xc_mondo_time_limit = freq * xc_scale;
105 			xc_func_time_limit = xc_mondo_time_limit *
106 			    xc_mondo_multiplier;
107 		}
108 		break;
109 	case CPU_OFF:
110 	case CPU_UNCONFIG:
111 	case CPU_CPUPART_OUT:
112 	default:
113 		break;
114 	}
115 
116 	return (0);
117 }
118 
119 /*
120  * xc_init - initialize x-call related locks
121  */
122 void
123 xc_init(void)
124 {
125 	int pix;
126 	uint64_t maxfreq = 0;
127 
128 	mutex_init(&xc_sys_mutex, NULL, MUTEX_SPIN,
129 	    (void *)ipltospl(XCALL_PIL));
130 
131 #ifdef DEBUG
132 	/* Initialize for all possible CPUs. */
133 	for (pix = 0; pix < NCPU; pix++) {
134 		XC_STAT_INIT(pix);
135 	}
136 #endif /* DEBUG */
137 
138 	xc_serv_inum = add_softintr(XCALL_PIL, (softintrfunc)xc_serv, 0);
139 	xc_loop_inum = add_softintr(XCALL_PIL, (softintrfunc)xc_loop, 0);
140 
141 	/*
142 	 * Initialize the calibrated tick limit for send_mondo.
143 	 * The value represents the maximum tick count to wait.
144 	 */
145 	xc_tick_limit =
146 	    ((uint64_t)sys_tick_freq * XC_SEND_MONDO_MSEC) / 1000;
147 	xc_tick_jump_limit = xc_tick_limit / 32;
148 	xc_tick_limit *= xc_tick_limit_scale;
149 
150 	/*
151 	 * Maximum number of loops to wait before timing out in xc_attention.
152 	 */
153 	for (pix = 0; pix < NCPU; pix++) {
154 		maxfreq = MAX(cpunodes[pix].clock_freq, maxfreq);
155 	}
156 	xc_mondo_time_limit = maxfreq * xc_scale;
157 	register_cpu_setup_func((cpu_setup_func_t *)xc_func_timeout_adj, NULL);
158 
159 	/*
160 	 * Maximum number of loops to wait for a xcall function to be
161 	 * executed on the target CPU.
162 	 */
163 	xc_func_time_limit = xc_mondo_time_limit * xc_mondo_multiplier;
164 }
165 
166 /*
167  * The following routines basically provide callers with two kinds of
168  * inter-processor interrupt services:
169  *	1. cross calls (x-calls) - requests are handled at target cpu's TL=0
170  *	2. cross traps (c-traps) - requests are handled at target cpu's TL>0
171  *
172  * Although these routines protect the services from migrating to other cpus
173  * "after" they are called, it is the caller's choice or responsibility to
174  * prevent the cpu migration "before" calling them.
175  *
176  * X-call routines:
177  *
178  *	xc_one()  - send a request to one processor
179  *	xc_some() - send a request to some processors
180  *	xc_all()  - send a request to all processors
181  *
182  *	Their common parameters:
183  *		func - a TL=0 handler address
184  *		arg1 and arg2  - optional
185  *
186  *	The services provided by x-call routines allow callers
187  *	to send a request to target cpus to execute a TL=0
188  *	handler.
189  *	The interface of the registers of the TL=0 handler:
190  *		%o0: arg1
191  *		%o1: arg2
192  *
193  * X-trap routines:
194  *
195  *	xt_one()  - send a request to one processor
196  *	xt_some() - send a request to some processors
197  *	xt_all()  - send a request to all processors
198  *
199  *	Their common parameters:
200  *		func - a TL>0 handler address or an interrupt number
201  *		arg1, arg2
202  *		       optional when "func" is an address;
203  *		       0        when "func" is an interrupt number
204  *
205  *	If the request of "func" is a kernel address, then
206  *	the target cpu will execute the request of "func" with
207  *	args at "TL>0" level.
208  *	The interface of the registers of the TL>0 handler:
209  *		%g1: arg1
210  *		%g2: arg2
211  *
212  *	If the request of "func" is not a kernel address, then it has
213  *	to be an assigned interrupt number through add_softintr().
214  *	An interrupt number is an index to the interrupt vector table,
215  *	which entry contains an interrupt handler address with its
216  *	corresponding interrupt level and argument.
217  *	The target cpu will arrange the request to be serviced according
218  *	to its pre-registered information.
219  *	args are assumed to be zeros in this case.
220  *
221  * In addition, callers are allowed to capture and release cpus by
222  * calling the routines: xc_attention() and xc_dismissed().
223  */
224 
225 /*
226  * xt_one - send a "x-trap" to a cpu
227  */
228 void
229 xt_one(int cix, xcfunc_t *func, uint64_t arg1, uint64_t arg2)
230 {
231 	if (!CPU_IN_SET(cpu_ready_set, cix)) {
232 		return;
233 	}
234 	xt_one_unchecked(cix, func, arg1, arg2);
235 }
236 
237 /*
238  * xt_one_unchecked - send a "x-trap" to a cpu without checking for its
239  * existance in cpu_ready_set
240  */
241 void
242 xt_one_unchecked(int cix, xcfunc_t *func, uint64_t arg1, uint64_t arg2)
243 {
244 	int lcx;
245 	int opl;
246 	cpuset_t tset;
247 
248 	/*
249 	 * Make sure the function address will not be interpreted as a
250 	 * dmv interrupt
251 	 */
252 	ASSERT(!DMV_IS_DMV(func));
253 
254 	/*
255 	 * It's illegal to send software inums through the cross-trap
256 	 * interface.
257 	 */
258 	ASSERT((uintptr_t)func >= KERNELBASE);
259 
260 	CPUSET_ZERO(tset);
261 
262 	XC_SPL_ENTER(lcx, opl);			/* lcx set by the macro */
263 
264 	CPUSET_ADD(tset, cix);
265 
266 	if (cix == lcx) {
267 		/*
268 		 * same cpu - use software fast trap
269 		 */
270 		send_self_xcall(CPU, arg1, arg2, func);
271 		XC_STAT_INC(x_dstat[lcx][XT_ONE_SELF]);
272 		XC_TRACE(XT_ONE_SELF, &tset, func, arg1, arg2);
273 	} else {	/* other cpu - send a mondo to the target cpu */
274 		/*
275 		 * other cpu - send a mondo to the target cpu
276 		 */
277 		XC_TRACE(XT_ONE_OTHER, &tset, func, arg1, arg2);
278 		init_mondo(func, arg1, arg2);
279 		send_one_mondo(cix);
280 		XC_STAT_INC(x_dstat[lcx][XT_ONE_OTHER]);
281 	}
282 	XC_SPL_EXIT(lcx, opl);
283 }
284 
285 /*
286  * xt_some - send a "x-trap" to some cpus
287  */
288 void
289 xt_some(cpuset_t cpuset, xcfunc_t *func, uint64_t arg1, uint64_t arg2)
290 {
291 	int lcx;
292 	int opl;
293 	cpuset_t xc_cpuset, tset;
294 
295 	/*
296 	 * Make sure the function address will not be interpreted as a
297 	 * dmv interrupt
298 	 */
299 	ASSERT(!DMV_IS_DMV(func));
300 
301 	/*
302 	 * It's illegal to send software inums through the cross-trap
303 	 * interface.
304 	 */
305 	ASSERT((uintptr_t)func >= KERNELBASE);
306 
307 	CPUSET_ZERO(tset);
308 
309 	XC_SPL_ENTER(lcx, opl);		/* lcx set by the macro */
310 
311 	CPUSET_ADD(tset, lcx);
312 
313 	/*
314 	 * only send to the CPU_READY ones
315 	 */
316 	xc_cpuset = cpu_ready_set;
317 	CPUSET_AND(xc_cpuset, cpuset);
318 
319 	/*
320 	 * send to nobody; just return
321 	 */
322 	if (CPUSET_ISNULL(xc_cpuset)) {
323 		XC_SPL_EXIT(lcx, opl);
324 		return;
325 	}
326 
327 	/*
328 	 * don't send mondo to self
329 	 */
330 	if (CPU_IN_SET(xc_cpuset, lcx)) {
331 		/*
332 		 * same cpu - use software fast trap
333 		 */
334 		send_self_xcall(CPU, arg1, arg2, func);
335 		XC_STAT_INC(x_dstat[lcx][XT_SOME_SELF]);
336 		XC_TRACE(XT_SOME_SELF, &tset, func, arg1, arg2);
337 		CPUSET_DEL(xc_cpuset, lcx);
338 		if (CPUSET_ISNULL(xc_cpuset)) {
339 			XC_SPL_EXIT(lcx, opl);
340 			return;
341 		}
342 	}
343 	XC_TRACE(XT_SOME_OTHER, &xc_cpuset, func, arg1, arg2);
344 	init_mondo(func, arg1, arg2);
345 	send_mondo_set(xc_cpuset);
346 	XC_STAT_INC(x_dstat[lcx][XT_SOME_OTHER]);
347 
348 	XC_SPL_EXIT(lcx, opl);
349 }
350 
351 /*
352  * xt_all - send a "x-trap" to all cpus
353  */
354 void
355 xt_all(xcfunc_t *func, uint64_t arg1, uint64_t arg2)
356 {
357 	int lcx;
358 	int opl;
359 	cpuset_t xc_cpuset, tset;
360 
361 	/*
362 	 * Make sure the function address will not be interpreted as a
363 	 * dmv interrupt
364 	 */
365 	ASSERT(!DMV_IS_DMV(func));
366 
367 	/*
368 	 * It's illegal to send software inums through the cross-trap
369 	 * interface.
370 	 */
371 	ASSERT((uintptr_t)func >= KERNELBASE);
372 
373 	CPUSET_ZERO(tset);
374 
375 	XC_SPL_ENTER(lcx, opl);		/* lcx set by the macro */
376 
377 	CPUSET_ADD(tset, lcx);
378 
379 	/*
380 	 * same cpu - use software fast trap
381 	 */
382 	if (CPU_IN_SET(cpu_ready_set, lcx))
383 		send_self_xcall(CPU, arg1, arg2, func);
384 
385 	XC_TRACE(XT_ALL_OTHER, &cpu_ready_set, func, arg1, arg2);
386 
387 	/*
388 	 * don't send mondo to self
389 	 */
390 	xc_cpuset = cpu_ready_set;
391 	CPUSET_DEL(xc_cpuset, lcx);
392 
393 	if (CPUSET_ISNULL(xc_cpuset)) {
394 		XC_STAT_INC(x_dstat[lcx][XT_ALL_SELF]);
395 		XC_TRACE(XT_ALL_SELF, &tset, func, arg1, arg2);
396 		XC_SPL_EXIT(lcx, opl);
397 		return;
398 	}
399 
400 	init_mondo(func, arg1, arg2);
401 	send_mondo_set(xc_cpuset);
402 
403 	XC_STAT_INC(x_dstat[lcx][XT_ALL_OTHER]);
404 	XC_SPL_EXIT(lcx, opl);
405 }
406 
407 /*
408  * xc_one - send a "x-call" to a cpu
409  */
410 void
411 xc_one(int cix, xcfunc_t *func, uint64_t arg1, uint64_t arg2)
412 {
413 	int lcx;
414 	int opl;
415 	uint64_t loop_cnt = 0;
416 	cpuset_t tset;
417 	int first_time = 1;
418 
419 	/*
420 	 * send to nobody; just return
421 	 */
422 	if (!CPU_IN_SET(cpu_ready_set, cix))
423 		return;
424 
425 	ASSERT((uintptr_t)func > KERNELBASE);
426 	ASSERT(((uintptr_t)func % PC_ALIGN) == 0);
427 
428 	CPUSET_ZERO(tset);
429 
430 	kpreempt_disable();
431 
432 	XC_SPL_ENTER(lcx, opl);		/* lcx set by the macro */
433 
434 	CPUSET_ADD(tset, cix);
435 
436 	if (cix == lcx) {	/* same cpu just do it */
437 		XC_TRACE(XC_ONE_SELF, &tset, func, arg1, arg2);
438 		(*func)(arg1, arg2);
439 		XC_STAT_INC(x_dstat[lcx][XC_ONE_SELF]);
440 		XC_SPL_EXIT(lcx, opl);
441 		kpreempt_enable();
442 		return;
443 	}
444 
445 	if (xc_holder == lcx) {		/* got the xc_sys_mutex already */
446 		ASSERT(MUTEX_HELD(&xc_sys_mutex));
447 		ASSERT(CPU_IN_SET(xc_mbox[lcx].xc_cpuset, lcx));
448 		ASSERT(CPU_IN_SET(xc_mbox[cix].xc_cpuset, cix));
449 		ASSERT(xc_mbox[cix].xc_state == XC_WAIT);
450 		XC_TRACE(XC_ONE_OTHER_H, &tset, func, arg1, arg2);
451 
452 		/*
453 		 * target processor's xc_loop should be waiting
454 		 * for the work to do; just set up the xc_mbox
455 		 */
456 		XC_SETUP(cix, func, arg1, arg2);
457 		membar_stld();
458 
459 		while (xc_mbox[cix].xc_state != XC_WAIT) {
460 			if (loop_cnt++ > xc_func_time_limit) {
461 				if (sendmondo_in_recover) {
462 					drv_usecwait(1);
463 					loop_cnt = 0;
464 					continue;
465 				}
466 				cmn_err(CE_PANIC, "xc_one() timeout, "
467 				    "xc_state[%d] != XC_WAIT", cix);
468 			}
469 		}
470 		XC_STAT_INC(x_dstat[lcx][XC_ONE_OTHER_H]);
471 		XC_SPL_EXIT(lcx, opl);
472 		kpreempt_enable();
473 		return;
474 	}
475 
476 	/*
477 	 * Avoid dead lock if someone has sent us a xc_loop request while
478 	 * we are trying to grab xc_sys_mutex.
479 	 */
480 	XC_SPL_EXIT(lcx, opl);
481 
482 	/*
483 	 * At this point, since we don't own xc_sys_mutex,
484 	 * our pil shouldn't run at or above the XCALL_PIL.
485 	 */
486 	ASSERT(getpil() < XCALL_PIL);
487 
488 	/*
489 	 * Since xc_holder is not owned by us, it could be that
490 	 * no one owns it, or we are not informed to enter into
491 	 * xc_loop(). In either case, we need to grab the
492 	 * xc_sys_mutex before we write to the xc_mbox, and
493 	 * we shouldn't release it until the request is finished.
494 	 */
495 
496 	mutex_enter(&xc_sys_mutex);
497 	xc_spl_enter[lcx] = 1;
498 
499 	/*
500 	 * Since we own xc_sys_mutex now, we are safe to
501 	 * write to the xc_mobx.
502 	 */
503 	ASSERT(xc_mbox[cix].xc_state == XC_IDLE);
504 	XC_TRACE(XC_ONE_OTHER, &tset, func, arg1, arg2);
505 	XC_SETUP(cix, func, arg1, arg2);
506 	init_mondo(setsoftint_tl1, xc_serv_inum, 0);
507 	send_one_mondo(cix);
508 
509 	/* xc_serv does membar_stld */
510 	while (xc_mbox[cix].xc_state != XC_IDLE) {
511 		if (loop_cnt++ > xc_func_time_limit) {
512 			if (sendmondo_in_recover) {
513 				drv_usecwait(1);
514 				loop_cnt = 0;
515 				continue;
516 			}
517 			if (first_time) {
518 				XT_SYNC_ONE(cix);
519 				first_time = 0;
520 				loop_cnt = 0;
521 				continue;
522 			}
523 			cmn_err(CE_PANIC, "xc_one() timeout, "
524 			    "xc_state[%d] != XC_IDLE", cix);
525 		}
526 	}
527 	xc_spl_enter[lcx] = 0;
528 	XC_STAT_INC(x_dstat[lcx][XC_ONE_OTHER]);
529 	mutex_exit(&xc_sys_mutex);
530 
531 	kpreempt_enable();
532 }
533 
534 /*
535  * xc_some - send a "x-call" to some cpus; sending to self is excluded
536  */
537 void
538 xc_some(cpuset_t cpuset, xcfunc_t *func, uint64_t arg1, uint64_t arg2)
539 {
540 	int lcx;
541 	int opl;
542 	cpuset_t xc_cpuset, tset;
543 
544 	ASSERT((uintptr_t)func > KERNELBASE);
545 	ASSERT(((uintptr_t)func % PC_ALIGN) == 0);
546 
547 	CPUSET_ZERO(tset);
548 
549 	kpreempt_disable();
550 	XC_SPL_ENTER(lcx, opl);			/* lcx set by the macro */
551 
552 	CPUSET_ADD(tset, lcx);
553 
554 	/*
555 	 * only send to the CPU_READY ones
556 	 */
557 	xc_cpuset = cpu_ready_set;
558 	CPUSET_AND(xc_cpuset, cpuset);
559 
560 	/*
561 	 * send to nobody; just return
562 	 */
563 	if (CPUSET_ISNULL(xc_cpuset)) {
564 		XC_SPL_EXIT(lcx, opl);
565 		kpreempt_enable();
566 		return;
567 	}
568 
569 	if (CPU_IN_SET(xc_cpuset, lcx)) {
570 		/*
571 		 * same cpu just do it
572 		 */
573 		(*func)(arg1, arg2);
574 		CPUSET_DEL(xc_cpuset, lcx);
575 		if (CPUSET_ISNULL(xc_cpuset)) {
576 			XC_STAT_INC(x_dstat[lcx][XC_SOME_SELF]);
577 			XC_TRACE(XC_SOME_SELF, &tset, func, arg1, arg2);
578 			XC_SPL_EXIT(lcx, opl);
579 			kpreempt_enable();
580 			return;
581 		}
582 	}
583 
584 	if (xc_holder == lcx) {		/* got the xc_sys_mutex already */
585 		cpuset_t mset = xc_mbox[lcx].xc_cpuset;
586 
587 		CPUSET_AND(mset, cpuset);
588 		ASSERT(MUTEX_HELD(&xc_sys_mutex));
589 		ASSERT(CPUSET_ISEQUAL(mset, cpuset));
590 		SEND_MBOX_ONLY(xc_cpuset, func, arg1, arg2, lcx, XC_WAIT);
591 		WAIT_MBOX_DONE(xc_cpuset, lcx, XC_WAIT, 0);
592 		XC_STAT_INC(x_dstat[lcx][XC_SOME_OTHER_H]);
593 		XC_TRACE(XC_SOME_OTHER_H, &xc_cpuset, func, arg1, arg2);
594 		XC_SPL_EXIT(lcx, opl);
595 		kpreempt_enable();
596 		return;
597 	}
598 
599 	/*
600 	 * Avoid dead lock if someone has sent us a xc_loop request while
601 	 * we are trying to grab xc_sys_mutex.
602 	 */
603 	XC_SPL_EXIT(lcx, opl);
604 
605 	/*
606 	 * At this point, since we don't own xc_sys_mutex,
607 	 * our pil shouldn't run at or above the XCALL_PIL.
608 	 */
609 	ASSERT(getpil() < XCALL_PIL);
610 
611 	/*
612 	 * grab xc_sys_mutex before writing to the xc_mbox
613 	 */
614 	mutex_enter(&xc_sys_mutex);
615 	xc_spl_enter[lcx] = 1;
616 
617 	XC_TRACE(XC_SOME_OTHER, &xc_cpuset, func, arg1, arg2);
618 	init_mondo(setsoftint_tl1, xc_serv_inum, 0);
619 	SEND_MBOX_MONDO(xc_cpuset, func, arg1, arg2, XC_IDLE);
620 	WAIT_MBOX_DONE(xc_cpuset, lcx, XC_IDLE, 1);
621 
622 	xc_spl_enter[lcx] = 0;
623 	XC_STAT_INC(x_dstat[lcx][XC_SOME_OTHER]);
624 	mutex_exit(&xc_sys_mutex);
625 	kpreempt_enable();
626 }
627 
628 /*
629  * xc_all - send a "x-call" to all cpus
630  */
631 void
632 xc_all(xcfunc_t *func, uint64_t arg1, uint64_t arg2)
633 {
634 	int lcx;
635 	int opl;
636 	cpuset_t xc_cpuset, tset;
637 
638 	ASSERT((uintptr_t)func > KERNELBASE);
639 	ASSERT(((uintptr_t)func % PC_ALIGN) == 0);
640 
641 	CPUSET_ZERO(tset);
642 
643 	kpreempt_disable();
644 	XC_SPL_ENTER(lcx, opl);			/* lcx set by the macro */
645 
646 	CPUSET_ADD(tset, lcx);
647 
648 	/*
649 	 * same cpu just do it
650 	 */
651 	(*func)(arg1, arg2);
652 	xc_cpuset = cpu_ready_set;
653 	CPUSET_DEL(xc_cpuset, lcx);
654 
655 	if (CPUSET_ISNULL(xc_cpuset)) {
656 		XC_STAT_INC(x_dstat[lcx][XC_ALL_SELF]);
657 		XC_TRACE(XC_ALL_SELF, &tset, func, arg1, arg2);
658 		XC_SPL_EXIT(lcx, opl);
659 		kpreempt_enable();
660 		return;
661 	}
662 
663 	if (xc_holder == lcx) {		/* got the xc_sys_mutex already */
664 		cpuset_t mset = xc_mbox[lcx].xc_cpuset;
665 
666 		CPUSET_AND(mset, xc_cpuset);
667 		ASSERT(MUTEX_HELD(&xc_sys_mutex));
668 		ASSERT(CPUSET_ISEQUAL(mset, xc_cpuset));
669 		XC_TRACE(XC_ALL_OTHER_H, &xc_cpuset, func, arg1, arg2);
670 		SEND_MBOX_ONLY(xc_cpuset, func, arg1, arg2, lcx, XC_WAIT);
671 		WAIT_MBOX_DONE(xc_cpuset, lcx, XC_WAIT, 0);
672 		XC_STAT_INC(x_dstat[lcx][XC_ALL_OTHER_H]);
673 		XC_SPL_EXIT(lcx, opl);
674 		kpreempt_enable();
675 		return;
676 	}
677 
678 	/*
679 	 * Avoid dead lock if someone has sent us a xc_loop request while
680 	 * we are trying to grab xc_sys_mutex.
681 	 */
682 	XC_SPL_EXIT(lcx, opl);
683 
684 	/*
685 	 * At this point, since we don't own xc_sys_mutex,
686 	 * our pil shouldn't run at or above the XCALL_PIL.
687 	 */
688 	ASSERT(getpil() < XCALL_PIL);
689 
690 	/*
691 	 * grab xc_sys_mutex before writing to the xc_mbox
692 	 */
693 	mutex_enter(&xc_sys_mutex);
694 	xc_spl_enter[lcx] = 1;
695 
696 	XC_TRACE(XC_ALL_OTHER, &xc_cpuset, func, arg1, arg2);
697 	init_mondo(setsoftint_tl1, xc_serv_inum, 0);
698 	SEND_MBOX_MONDO(xc_cpuset, func, arg1, arg2, XC_IDLE);
699 	WAIT_MBOX_DONE(xc_cpuset, lcx, XC_IDLE, 1);
700 
701 	xc_spl_enter[lcx] = 0;
702 	XC_STAT_INC(x_dstat[lcx][XC_ALL_OTHER]);
703 	mutex_exit(&xc_sys_mutex);
704 	kpreempt_enable();
705 }
706 
707 /*
708  * xc_attention - paired with xc_dismissed()
709  *
710  * xt_attention() holds the xc_sys_mutex and xc_dismissed() releases it
711  * called when an initiator wants to capture some/all cpus for a critical
712  * session.
713  */
714 void
715 xc_attention(cpuset_t cpuset)
716 {
717 	int pix, lcx;
718 	cpuset_t xc_cpuset, tmpset;
719 	cpuset_t recv_cpuset;
720 	uint64_t loop_cnt = 0;
721 	int first_time = 1;
722 
723 	CPUSET_ZERO(recv_cpuset);
724 
725 	/*
726 	 * don't migrate the cpu until xc_dismissed() is finished
727 	 */
728 	ASSERT(getpil() < XCALL_PIL);
729 	mutex_enter(&xc_sys_mutex);
730 	lcx = (int)(CPU->cpu_id);
731 	ASSERT(x_dstat[lcx][XC_ATTENTION] ==
732 	    x_dstat[lcx][XC_DISMISSED]);
733 	ASSERT(xc_holder == -1);
734 	xc_mbox[lcx].xc_cpuset = cpuset;
735 	xc_holder = lcx; /* no membar; only current cpu needs the right lcx */
736 
737 	/*
738 	 * only send to the CPU_READY ones
739 	 */
740 	xc_cpuset = cpu_ready_set;
741 	CPUSET_AND(xc_cpuset, cpuset);
742 
743 	/*
744 	 * don't send mondo to self
745 	 */
746 	CPUSET_DEL(xc_cpuset, lcx);
747 
748 	XC_STAT_INC(x_dstat[lcx][XC_ATTENTION]);
749 	XC_TRACE(XC_ATTENTION, &xc_cpuset, NULL, NULL, NULL);
750 
751 	if (CPUSET_ISNULL(xc_cpuset))
752 		return;
753 
754 	xc_spl_enter[lcx] = 1;
755 	/*
756 	 * inform the target processors to enter into xc_loop()
757 	 */
758 	tmpset = xc_cpuset;
759 	init_mondo(setsoftint_tl1, xc_loop_inum, 0);
760 	for (pix = 0; pix < NCPU; pix++) {
761 		if (CPU_IN_SET(tmpset, pix)) {
762 			ASSERT(xc_mbox[pix].xc_state == XC_IDLE);
763 			xc_mbox[pix].xc_state = XC_ENTER;
764 			send_one_mondo(pix);
765 			CPUSET_DEL(tmpset, pix);
766 			if (CPUSET_ISNULL(tmpset)) {
767 				break;
768 			}
769 		}
770 	}
771 	xc_spl_enter[lcx] = 0;
772 
773 	/*
774 	 * make sure target processors have entered into xc_loop()
775 	 */
776 	while (!CPUSET_ISEQUAL(recv_cpuset, xc_cpuset)) {
777 		tmpset = xc_cpuset;
778 		for (pix = 0; pix < NCPU; pix++) {
779 			if (CPU_IN_SET(tmpset, pix)) {
780 				/*
781 				 * membar_stld() is done in xc_loop
782 				 */
783 				if (xc_mbox[pix].xc_state == XC_WAIT) {
784 					CPUSET_ADD(recv_cpuset, pix);
785 				}
786 				CPUSET_DEL(tmpset, pix);
787 				if (CPUSET_ISNULL(tmpset)) {
788 					break;
789 				}
790 			}
791 		}
792 		if (loop_cnt++ > xc_mondo_time_limit) {
793 			if (sendmondo_in_recover) {
794 				drv_usecwait(1);
795 				loop_cnt = 0;
796 				continue;
797 			}
798 			if (first_time) {
799 				XT_SYNC_SOME(xc_cpuset);
800 				first_time = 0;
801 				loop_cnt = 0;
802 				continue;
803 			}
804 			cmn_err(CE_PANIC, "xc_attention() timeout");
805 		}
806 	}
807 
808 	/*
809 	 * xc_sys_mutex remains held until xc_dismissed() is finished
810 	 */
811 }
812 
813 /*
814  * xc_dismissed - paired with xc_attention()
815  *
816  * Called after the critical session is finished.
817  */
818 void
819 xc_dismissed(cpuset_t cpuset)
820 {
821 	int pix;
822 	int lcx = (int)(CPU->cpu_id);
823 	cpuset_t xc_cpuset, tmpset;
824 	cpuset_t recv_cpuset;
825 	uint64_t loop_cnt = 0;
826 
827 	ASSERT(lcx == xc_holder);
828 	ASSERT(CPUSET_ISEQUAL(xc_mbox[lcx].xc_cpuset, cpuset));
829 	ASSERT(getpil() >= XCALL_PIL);
830 	CPUSET_ZERO(xc_mbox[lcx].xc_cpuset);
831 	CPUSET_ZERO(recv_cpuset);
832 	membar_stld();
833 
834 	XC_STAT_INC(x_dstat[lcx][XC_DISMISSED]);
835 	ASSERT(x_dstat[lcx][XC_DISMISSED] == x_dstat[lcx][XC_ATTENTION]);
836 
837 	/*
838 	 * only send to the CPU_READY ones
839 	 */
840 	xc_cpuset = cpu_ready_set;
841 	CPUSET_AND(xc_cpuset, cpuset);
842 
843 	/*
844 	 * exclude itself
845 	 */
846 	CPUSET_DEL(xc_cpuset, lcx);
847 	XC_TRACE(XC_DISMISSED, &xc_cpuset, NULL, NULL, NULL);
848 	if (CPUSET_ISNULL(xc_cpuset)) {
849 		xc_holder = -1;
850 		mutex_exit(&xc_sys_mutex);
851 		return;
852 	}
853 
854 	/*
855 	 * inform other processors to get out of xc_loop()
856 	 */
857 	tmpset = xc_cpuset;
858 	for (pix = 0; pix < NCPU; pix++) {
859 		if (CPU_IN_SET(tmpset, pix)) {
860 			xc_mbox[pix].xc_state = XC_EXIT;
861 			membar_stld();
862 			CPUSET_DEL(tmpset, pix);
863 			if (CPUSET_ISNULL(tmpset)) {
864 				break;
865 			}
866 		}
867 	}
868 
869 	/*
870 	 * make sure target processors have exited from xc_loop()
871 	 */
872 	while (!CPUSET_ISEQUAL(recv_cpuset, xc_cpuset)) {
873 		tmpset = xc_cpuset;
874 		for (pix = 0; pix < NCPU; pix++) {
875 			if (CPU_IN_SET(tmpset, pix)) {
876 				/*
877 				 * membar_stld() is done in xc_loop
878 				 */
879 				if (xc_mbox[pix].xc_state == XC_IDLE) {
880 					CPUSET_ADD(recv_cpuset, pix);
881 				}
882 				CPUSET_DEL(tmpset, pix);
883 				if (CPUSET_ISNULL(tmpset)) {
884 					break;
885 				}
886 			}
887 		}
888 		if (loop_cnt++ > xc_func_time_limit) {
889 				if (sendmondo_in_recover) {
890 					drv_usecwait(1);
891 					loop_cnt = 0;
892 					continue;
893 				}
894 			cmn_err(CE_PANIC, "xc_dismissed() timeout");
895 		}
896 	}
897 	xc_holder = -1;
898 	mutex_exit(&xc_sys_mutex);
899 }
900 
901 /*
902  * xc_serv - "x-call" handler at TL=0; serves only one x-call request
903  * runs at XCALL_PIL level.
904  */
905 uint_t
906 xc_serv(void)
907 {
908 	int lcx = (int)(CPU->cpu_id);
909 	struct xc_mbox *xmp;
910 	xcfunc_t *func;
911 	uint64_t arg1, arg2;
912 	cpuset_t tset;
913 
914 	ASSERT(getpil() == XCALL_PIL);
915 	CPUSET_ZERO(tset);
916 	CPUSET_ADD(tset, lcx);
917 	flush_windows();
918 	xmp = &xc_mbox[lcx];
919 	ASSERT(lcx != xc_holder);
920 	ASSERT(xmp->xc_state == XC_DOIT);
921 	func = xmp->xc_func;
922 	XC_TRACE(XC_SERV, &tset, func, xmp->xc_arg1, xmp->xc_arg2);
923 	if (func != NULL) {
924 		arg1 = xmp->xc_arg1;
925 		arg2 = xmp->xc_arg2;
926 		(*func)(arg1, arg2);
927 	}
928 	XC_STAT_INC(x_rstat[lcx][XC_SERV]);
929 	XC_TRACE(XC_SERV, &tset, func, arg1, arg2);
930 	xmp->xc_state = XC_IDLE;
931 	membar_stld();
932 	return (1);
933 }
934 
935 /*
936  * if == 1, an xc_loop timeout will cause a panic
937  * otherwise print a warning
938  */
939 uint_t xc_loop_panic = 0;
940 
941 /*
942  * xc_loop - "x-call" handler at TL=0; capture the cpu for a critial
943  * session, or serve multiple x-call requests runs at XCALL_PIL level.
944  */
945 uint_t
946 xc_loop(void)
947 {
948 	int lcx = (int)(CPU->cpu_id);
949 	struct xc_mbox *xmp;
950 	xcfunc_t *func;
951 	uint64_t arg1, arg2;
952 	uint64_t loop_cnt = 0;
953 	cpuset_t tset;
954 
955 	ASSERT(getpil() == XCALL_PIL);
956 
957 	CPUSET_ZERO(tset);
958 	flush_windows();
959 
960 	/*
961 	 * Some one must have owned the xc_sys_mutex;
962 	 * no further interrupt (at XCALL_PIL or below) can
963 	 * be taken by this processor until xc_loop exits.
964 	 *
965 	 * The owner of xc_sys_mutex (or xc_holder) can expect
966 	 * its xc/xt requests are handled as follows:
967 	 * 	xc requests use xc_mbox's handshaking for their services
968 	 * 	xt requests at TL>0 will be handled immediately
969 	 * 	xt requests at TL=0:
970 	 *		if their handlers'pils are <= XCALL_PIL, then
971 	 *			they will be handled after xc_loop exits
972 	 *			(so, they probably should not be used)
973 	 *		else they will be handled immediately
974 	 *
975 	 * For those who are not informed to enter xc_loop, if they
976 	 * send xc/xt requests to this processor at this moment,
977 	 * the requests will be handled as follows:
978 	 *	xc requests will be handled after they grab xc_sys_mutex
979 	 *	xt requests at TL>0 will be handled immediately
980 	 * 	xt requests at TL=0:
981 	 *		if their handlers'pils are <= XCALL_PIL, then
982 	 *			they will be handled after xc_loop exits
983 	 *		else they will be handled immediately
984 	 */
985 	xmp = &xc_mbox[lcx];
986 	ASSERT(lcx != xc_holder);
987 	ASSERT(xmp->xc_state == XC_ENTER);
988 	xmp->xc_state = XC_WAIT;
989 	CPUSET_ADD(tset, lcx);
990 	membar_stld();
991 	XC_STAT_INC(x_rstat[lcx][XC_LOOP]);
992 	XC_TRACE(XC_LOOP_ENTER, &tset, NULL, NULL, NULL);
993 	while (xmp->xc_state != XC_EXIT) {
994 		if (xmp->xc_state == XC_DOIT) {
995 			func = xmp->xc_func;
996 			arg1 = xmp->xc_arg1;
997 			arg2 = xmp->xc_arg2;
998 			XC_TRACE(XC_LOOP_DOIT, &tset, func, arg1, arg2);
999 			if (func != NULL)
1000 				(*func)(arg1, arg2);
1001 			xmp->xc_state = XC_WAIT;
1002 			membar_stld();
1003 			/*
1004 			 * reset the timeout counter
1005 			 * since some work was done
1006 			 */
1007 			loop_cnt = 0;
1008 		} else {
1009 			/* patience is a virtue... */
1010 			loop_cnt++;
1011 		}
1012 
1013 		if (loop_cnt > xc_func_time_limit) {
1014 			if (sendmondo_in_recover) {
1015 				drv_usecwait(1);
1016 				loop_cnt = 0;
1017 				continue;
1018 			}
1019 			cmn_err(xc_loop_panic ? CE_PANIC : CE_WARN,
1020 			    "xc_loop() timeout");
1021 			/*
1022 			 * if the above displayed a warning,
1023 			 * reset the timeout counter and be patient
1024 			 */
1025 			loop_cnt = 0;
1026 		}
1027 	}
1028 	ASSERT(xmp->xc_state == XC_EXIT);
1029 	ASSERT(xc_holder != -1);
1030 	XC_TRACE(XC_LOOP_EXIT, &tset, NULL, NULL, NULL);
1031 	xmp->xc_state = XC_IDLE;
1032 	membar_stld();
1033 	return (1);
1034 }
1035