xref: /titanic_44/usr/src/uts/sun4/os/x_call.c (revision 8eea8e29cc4374d1ee24c25a07f45af132db3499)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/systm.h>
30 #include <sys/archsystm.h>
31 #include <sys/machsystm.h>
32 #include <sys/cpuvar.h>
33 #include <sys/intreg.h>
34 #include <sys/x_call.h>
35 #include <sys/cmn_err.h>
36 #include <sys/membar.h>
37 #include <sys/disp.h>
38 #include <sys/debug.h>
39 #include <sys/privregs.h>
40 #include <sys/xc_impl.h>
41 #include <sys/ivintr.h>
42 #include <sys/dmv.h>
43 
44 #ifdef DEBUG
45 uint_t x_dstat[NCPU][XC_LOOP_EXIT+1];
46 uint_t x_rstat[NCPU][4];
47 #endif /* DEBUG */
48 
49 static int xc_serv_inum;	/* software interrupt number for xc_serv() */
50 static int xc_loop_inum;	/* software interrupt number for xc_loop() */
51 kmutex_t xc_sys_mutex;		/* protect xcall session and xc_mbox */
52 int xc_spl_enter[NCPU];		/* protect sending x-call */
53 static int xc_holder = -1; /* the cpu who initiates xc_attention, 0 is valid */
54 
55 /*
56  * Mail box for handshaking and xcall request; protected by xc_sys_mutex
57  */
58 static struct xc_mbox {
59 	xcfunc_t *xc_func;
60 	uint64_t xc_arg1;
61 	uint64_t xc_arg2;
62 	cpuset_t xc_cpuset;
63 	volatile uint_t	xc_state;
64 } xc_mbox[NCPU];
65 
66 uint64_t xc_tick_limit;		/* send_mondo() tick limit value */
67 uint64_t xc_tick_limit_scale = 1;	/* scale used to increase the limit */
68 uint64_t xc_tick_jump_limit;	/* send_mondo() irregular tick jump limit */
69 
70 /* timeout value for xcalls to be received by the target CPU */
71 uint64_t xc_mondo_time_limit;
72 
73 /* timeout value for xcall functions to be executed on the target CPU */
74 uint64_t xc_func_time_limit;
75 
76 uint64_t xc_scale = 1;	/* scale used to calculate timeout limits */
77 
78 uint_t sendmondo_in_recover;
79 
80 /*
81  * sending x-calls
82  */
83 void	init_mondo(xcfunc_t *func, uint64_t arg1, uint64_t arg2);
84 void	send_one_mondo(int cpuid);
85 void	send_mondo_set(cpuset_t set);
86 
87 /*
88  * xc_init - initialize x-call related locks
89  */
90 void
91 xc_init(void)
92 {
93 #ifdef DEBUG
94 	int pix;
95 #endif /* DEBUG */
96 
97 	mutex_init(&xc_sys_mutex, NULL, MUTEX_SPIN,
98 	    (void *)ipltospl(XCALL_PIL));
99 
100 #ifdef DEBUG
101 	/* Initialize for all possible CPUs. */
102 	for (pix = 0; pix < NCPU; pix++) {
103 		XC_STAT_INIT(pix);
104 	}
105 #endif /* DEBUG */
106 
107 	xc_serv_inum = add_softintr(XCALL_PIL, (softintrfunc)xc_serv, 0);
108 	xc_loop_inum = add_softintr(XCALL_PIL, (softintrfunc)xc_loop, 0);
109 
110 	/*
111 	 * Initialize the calibrated tick limit for send_mondo.
112 	 * The value represents the maximum tick count to wait.
113 	 */
114 	xc_tick_limit =
115 	    ((uint64_t)sys_tick_freq * XC_SEND_MONDO_MSEC) / 1000;
116 	xc_tick_jump_limit = xc_tick_limit / 32;
117 	xc_tick_limit *= xc_tick_limit_scale;
118 
119 	/*
120 	 * Maximum number of loops to wait before timing out in xc_attention.
121 	 */
122 	xc_mondo_time_limit = cpunodes[CPU->cpu_id].clock_freq * xc_scale;
123 
124 	/*
125 	 * Maximum number of loops to wait for a xcall function to be
126 	 * executed on the target CPU.  Default to 10 times the value
127 	 * of xc_mondo_time_limit.
128 	 */
129 	xc_func_time_limit = xc_mondo_time_limit * 10;
130 }
131 
132 /*
133  * The following routines basically provide callers with two kinds of
134  * inter-processor interrupt services:
135  *	1. cross calls (x-calls) - requests are handled at target cpu's TL=0
136  *	2. cross traps (c-traps) - requests are handled at target cpu's TL>0
137  *
138  * Although these routines protect the services from migrating to other cpus
139  * "after" they are called, it is the caller's choice or responsibility to
140  * prevent the cpu migration "before" calling them.
141  *
142  * X-call routines:
143  *
144  *	xc_one()  - send a request to one processor
145  *	xc_some() - send a request to some processors
146  *	xc_all()  - send a request to all processors
147  *
148  *	Their common parameters:
149  *		func - a TL=0 handler address
150  *		arg1 and arg2  - optional
151  *
152  *	The services provided by x-call routines allow callers
153  *	to send a request to target cpus to execute a TL=0
154  *	handler.
155  *	The interface of the registers of the TL=0 handler:
156  *		%o0: arg1
157  *		%o1: arg2
158  *
159  * X-trap routines:
160  *
161  *	xt_one()  - send a request to one processor
162  *	xt_some() - send a request to some processors
163  *	xt_all()  - send a request to all processors
164  *
165  *	Their common parameters:
166  *		func - a TL>0 handler address or an interrupt number
167  *		arg1, arg2
168  *		       optional when "func" is an address;
169  *		       0        when "func" is an interrupt number
170  *
171  *	If the request of "func" is a kernel address, then
172  *	the target cpu will execute the request of "func" with
173  *	args at "TL>0" level.
174  *	The interface of the registers of the TL>0 handler:
175  *		%g1: arg1
176  *		%g2: arg2
177  *
178  *	If the request of "func" is not a kernel address, then it has
179  *	to be an assigned interrupt number through add_softintr().
180  *	An interrupt number is an index to the interrupt vector table,
181  *	which entry contains an interrupt handler address with its
182  *	corresponding interrupt level and argument.
183  *	The target cpu will arrange the request to be serviced according
184  *	to its pre-registered information.
185  *	args are assumed to be zeros in this case.
186  *
187  * In addition, callers are allowed to capture and release cpus by
188  * calling the routines: xc_attention() and xc_dismissed().
189  */
190 
191 /*
192  * xt_one - send a "x-trap" to a cpu
193  */
194 void
195 xt_one(int cix, xcfunc_t *func, uint64_t arg1, uint64_t arg2)
196 {
197 	if (!CPU_IN_SET(cpu_ready_set, cix)) {
198 		return;
199 	}
200 	xt_one_unchecked(cix, func, arg1, arg2);
201 }
202 
203 /*
204  * xt_one_unchecked - send a "x-trap" to a cpu without checking for its
205  * existance in cpu_ready_set
206  */
207 void
208 xt_one_unchecked(int cix, xcfunc_t *func, uint64_t arg1, uint64_t arg2)
209 {
210 	int lcx;
211 	int opl;
212 	cpuset_t tset;
213 
214 	/*
215 	 * Make sure the function address will not be interpreted as a
216 	 * dmv interrupt
217 	 */
218 	ASSERT(!DMV_IS_DMV(func));
219 
220 	/*
221 	 * It's illegal to send software inums through the cross-trap
222 	 * interface.
223 	 */
224 	ASSERT((uintptr_t)func >= KERNELBASE);
225 
226 	CPUSET_ZERO(tset);
227 
228 	XC_SPL_ENTER(lcx, opl);			/* lcx set by the macro */
229 
230 	CPUSET_ADD(tset, cix);
231 
232 	if (cix == lcx) {
233 		/*
234 		 * same cpu - use software fast trap
235 		 */
236 		send_self_xcall(CPU, arg1, arg2, func);
237 		XC_STAT_INC(x_dstat[lcx][XT_ONE_SELF]);
238 		XC_TRACE(XT_ONE_SELF, &tset, func, arg1, arg2);
239 	} else {	/* other cpu - send a mondo to the target cpu */
240 		/*
241 		 * other cpu - send a mondo to the target cpu
242 		 */
243 		XC_TRACE(XT_ONE_OTHER, &tset, func, arg1, arg2);
244 		init_mondo(func, arg1, arg2);
245 		send_one_mondo(cix);
246 		XC_STAT_INC(x_dstat[lcx][XT_ONE_OTHER]);
247 	}
248 	XC_SPL_EXIT(lcx, opl);
249 }
250 
251 /*
252  * xt_some - send a "x-trap" to some cpus
253  */
254 void
255 xt_some(cpuset_t cpuset, xcfunc_t *func, uint64_t arg1, uint64_t arg2)
256 {
257 	int lcx;
258 	int opl;
259 	cpuset_t xc_cpuset, tset;
260 
261 	/*
262 	 * Make sure the function address will not be interpreted as a
263 	 * dmv interrupt
264 	 */
265 	ASSERT(!DMV_IS_DMV(func));
266 
267 	/*
268 	 * It's illegal to send software inums through the cross-trap
269 	 * interface.
270 	 */
271 	ASSERT((uintptr_t)func >= KERNELBASE);
272 
273 	CPUSET_ZERO(tset);
274 
275 	XC_SPL_ENTER(lcx, opl);		/* lcx set by the macro */
276 
277 	CPUSET_ADD(tset, lcx);
278 
279 	/*
280 	 * only send to the CPU_READY ones
281 	 */
282 	xc_cpuset = cpu_ready_set;
283 	CPUSET_AND(xc_cpuset, cpuset);
284 
285 	/*
286 	 * send to nobody; just return
287 	 */
288 	if (CPUSET_ISNULL(xc_cpuset)) {
289 		XC_SPL_EXIT(lcx, opl);
290 		return;
291 	}
292 
293 	/*
294 	 * don't send mondo to self
295 	 */
296 	if (CPU_IN_SET(xc_cpuset, lcx)) {
297 		/*
298 		 * same cpu - use software fast trap
299 		 */
300 		send_self_xcall(CPU, arg1, arg2, func);
301 		XC_STAT_INC(x_dstat[lcx][XT_SOME_SELF]);
302 		XC_TRACE(XT_SOME_SELF, &tset, func, arg1, arg2);
303 		CPUSET_DEL(xc_cpuset, lcx);
304 		if (CPUSET_ISNULL(xc_cpuset)) {
305 			XC_SPL_EXIT(lcx, opl);
306 			return;
307 		}
308 	}
309 	XC_TRACE(XT_SOME_OTHER, &xc_cpuset, func, arg1, arg2);
310 	init_mondo(func, arg1, arg2);
311 	send_mondo_set(xc_cpuset);
312 	XC_STAT_INC(x_dstat[lcx][XT_SOME_OTHER]);
313 
314 	XC_SPL_EXIT(lcx, opl);
315 }
316 
317 /*
318  * xt_all - send a "x-trap" to all cpus
319  */
320 void
321 xt_all(xcfunc_t *func, uint64_t arg1, uint64_t arg2)
322 {
323 	int lcx;
324 	int opl;
325 	cpuset_t xc_cpuset, tset;
326 
327 	/*
328 	 * Make sure the function address will not be interpreted as a
329 	 * dmv interrupt
330 	 */
331 	ASSERT(!DMV_IS_DMV(func));
332 
333 	/*
334 	 * It's illegal to send software inums through the cross-trap
335 	 * interface.
336 	 */
337 	ASSERT((uintptr_t)func >= KERNELBASE);
338 
339 	CPUSET_ZERO(tset);
340 
341 	XC_SPL_ENTER(lcx, opl);		/* lcx set by the macro */
342 
343 	CPUSET_ADD(tset, lcx);
344 
345 	/*
346 	 * same cpu - use software fast trap
347 	 */
348 	if (CPU_IN_SET(cpu_ready_set, lcx))
349 		send_self_xcall(CPU, arg1, arg2, func);
350 
351 	XC_TRACE(XT_ALL_OTHER, &cpu_ready_set, func, arg1, arg2);
352 
353 	/*
354 	 * don't send mondo to self
355 	 */
356 	xc_cpuset = cpu_ready_set;
357 	CPUSET_DEL(xc_cpuset, lcx);
358 
359 	if (CPUSET_ISNULL(xc_cpuset)) {
360 		XC_STAT_INC(x_dstat[lcx][XT_ALL_SELF]);
361 		XC_TRACE(XT_ALL_SELF, &tset, func, arg1, arg2);
362 		XC_SPL_EXIT(lcx, opl);
363 		return;
364 	}
365 
366 	init_mondo(func, arg1, arg2);
367 	send_mondo_set(xc_cpuset);
368 
369 	XC_STAT_INC(x_dstat[lcx][XT_ALL_OTHER]);
370 	XC_SPL_EXIT(lcx, opl);
371 }
372 
373 /*
374  * xc_one - send a "x-call" to a cpu
375  */
376 void
377 xc_one(int cix, xcfunc_t *func, uint64_t arg1, uint64_t arg2)
378 {
379 	int lcx;
380 	int opl;
381 	uint64_t loop_cnt = 0;
382 	cpuset_t tset;
383 	int first_time = 1;
384 
385 	/*
386 	 * send to nobody; just return
387 	 */
388 	if (!CPU_IN_SET(cpu_ready_set, cix))
389 		return;
390 
391 	ASSERT((uintptr_t)func > KERNELBASE);
392 	ASSERT(((uintptr_t)func % PC_ALIGN) == 0);
393 
394 	CPUSET_ZERO(tset);
395 
396 	kpreempt_disable();
397 
398 	XC_SPL_ENTER(lcx, opl);		/* lcx set by the macro */
399 
400 	CPUSET_ADD(tset, cix);
401 
402 	if (cix == lcx) {	/* same cpu just do it */
403 		XC_TRACE(XC_ONE_SELF, &tset, func, arg1, arg2);
404 		(*func)(arg1, arg2);
405 		XC_STAT_INC(x_dstat[lcx][XC_ONE_SELF]);
406 		XC_SPL_EXIT(lcx, opl);
407 		kpreempt_enable();
408 		return;
409 	}
410 
411 	if (xc_holder == lcx) {		/* got the xc_sys_mutex already */
412 		ASSERT(MUTEX_HELD(&xc_sys_mutex));
413 		ASSERT(CPU_IN_SET(xc_mbox[lcx].xc_cpuset, lcx));
414 		ASSERT(CPU_IN_SET(xc_mbox[cix].xc_cpuset, cix));
415 		ASSERT(xc_mbox[cix].xc_state == XC_WAIT);
416 		XC_TRACE(XC_ONE_OTHER_H, &tset, func, arg1, arg2);
417 
418 		/*
419 		 * target processor's xc_loop should be waiting
420 		 * for the work to do; just set up the xc_mbox
421 		 */
422 		XC_SETUP(cix, func, arg1, arg2);
423 		membar_stld();
424 
425 		while (xc_mbox[cix].xc_state != XC_WAIT) {
426 			if (loop_cnt++ > xc_func_time_limit) {
427 				if (sendmondo_in_recover) {
428 					drv_usecwait(1);
429 					loop_cnt = 0;
430 					continue;
431 				}
432 				cmn_err(CE_PANIC, "xc_one() timeout, "
433 				    "xc_state[%d] != XC_WAIT", cix);
434 			}
435 		}
436 		XC_STAT_INC(x_dstat[lcx][XC_ONE_OTHER_H]);
437 		XC_SPL_EXIT(lcx, opl);
438 		kpreempt_enable();
439 		return;
440 	}
441 
442 	/*
443 	 * Avoid dead lock if someone has sent us a xc_loop request while
444 	 * we are trying to grab xc_sys_mutex.
445 	 */
446 	XC_SPL_EXIT(lcx, opl);
447 
448 	/*
449 	 * At this point, since we don't own xc_sys_mutex,
450 	 * our pil shouldn't run at or above the XCALL_PIL.
451 	 */
452 	ASSERT(getpil() < XCALL_PIL);
453 
454 	/*
455 	 * Since xc_holder is not owned by us, it could be that
456 	 * no one owns it, or we are not informed to enter into
457 	 * xc_loop(). In either case, we need to grab the
458 	 * xc_sys_mutex before we write to the xc_mbox, and
459 	 * we shouldn't release it until the request is finished.
460 	 */
461 
462 	mutex_enter(&xc_sys_mutex);
463 	xc_spl_enter[lcx] = 1;
464 
465 	/*
466 	 * Since we own xc_sys_mutex now, we are safe to
467 	 * write to the xc_mobx.
468 	 */
469 	ASSERT(xc_mbox[cix].xc_state == XC_IDLE);
470 	XC_TRACE(XC_ONE_OTHER, &tset, func, arg1, arg2);
471 	XC_SETUP(cix, func, arg1, arg2);
472 	init_mondo(setsoftint_tl1, xc_serv_inum, 0);
473 	send_one_mondo(cix);
474 
475 	/* xc_serv does membar_stld */
476 	while (xc_mbox[cix].xc_state != XC_IDLE) {
477 		if (loop_cnt++ > xc_func_time_limit) {
478 			if (sendmondo_in_recover) {
479 				drv_usecwait(1);
480 				loop_cnt = 0;
481 				continue;
482 			}
483 			if (first_time) {
484 				XT_SYNC_ONE(cix);
485 				first_time = 0;
486 				loop_cnt = 0;
487 				continue;
488 			}
489 			cmn_err(CE_PANIC, "xc_one() timeout, "
490 			    "xc_state[%d] != XC_IDLE", cix);
491 		}
492 	}
493 	xc_spl_enter[lcx] = 0;
494 	XC_STAT_INC(x_dstat[lcx][XC_ONE_OTHER]);
495 	mutex_exit(&xc_sys_mutex);
496 
497 	kpreempt_enable();
498 }
499 
500 /*
501  * xc_some - send a "x-call" to some cpus; sending to self is excluded
502  */
503 void
504 xc_some(cpuset_t cpuset, xcfunc_t *func, uint64_t arg1, uint64_t arg2)
505 {
506 	int lcx;
507 	int opl;
508 	cpuset_t xc_cpuset, tset;
509 
510 	ASSERT((uintptr_t)func > KERNELBASE);
511 	ASSERT(((uintptr_t)func % PC_ALIGN) == 0);
512 
513 	CPUSET_ZERO(tset);
514 
515 	kpreempt_disable();
516 	XC_SPL_ENTER(lcx, opl);			/* lcx set by the macro */
517 
518 	CPUSET_ADD(tset, lcx);
519 
520 	/*
521 	 * only send to the CPU_READY ones
522 	 */
523 	xc_cpuset = cpu_ready_set;
524 	CPUSET_AND(xc_cpuset, cpuset);
525 
526 	/*
527 	 * send to nobody; just return
528 	 */
529 	if (CPUSET_ISNULL(xc_cpuset)) {
530 		XC_SPL_EXIT(lcx, opl);
531 		kpreempt_enable();
532 		return;
533 	}
534 
535 	if (CPU_IN_SET(xc_cpuset, lcx)) {
536 		/*
537 		 * same cpu just do it
538 		 */
539 		(*func)(arg1, arg2);
540 		CPUSET_DEL(xc_cpuset, lcx);
541 		if (CPUSET_ISNULL(xc_cpuset)) {
542 			XC_STAT_INC(x_dstat[lcx][XC_SOME_SELF]);
543 			XC_TRACE(XC_SOME_SELF, &tset, func, arg1, arg2);
544 			XC_SPL_EXIT(lcx, opl);
545 			kpreempt_enable();
546 			return;
547 		}
548 	}
549 
550 	if (xc_holder == lcx) {		/* got the xc_sys_mutex already */
551 		cpuset_t mset = xc_mbox[lcx].xc_cpuset;
552 
553 		CPUSET_AND(mset, cpuset);
554 		ASSERT(MUTEX_HELD(&xc_sys_mutex));
555 		ASSERT(CPUSET_ISEQUAL(mset, cpuset));
556 		SEND_MBOX_ONLY(xc_cpuset, func, arg1, arg2, lcx, XC_WAIT);
557 		WAIT_MBOX_DONE(xc_cpuset, lcx, XC_WAIT, 0);
558 		XC_STAT_INC(x_dstat[lcx][XC_SOME_OTHER_H]);
559 		XC_TRACE(XC_SOME_OTHER_H, &xc_cpuset, func, arg1, arg2);
560 		XC_SPL_EXIT(lcx, opl);
561 		kpreempt_enable();
562 		return;
563 	}
564 
565 	/*
566 	 * Avoid dead lock if someone has sent us a xc_loop request while
567 	 * we are trying to grab xc_sys_mutex.
568 	 */
569 	XC_SPL_EXIT(lcx, opl);
570 
571 	/*
572 	 * At this point, since we don't own xc_sys_mutex,
573 	 * our pil shouldn't run at or above the XCALL_PIL.
574 	 */
575 	ASSERT(getpil() < XCALL_PIL);
576 
577 	/*
578 	 * grab xc_sys_mutex before writing to the xc_mbox
579 	 */
580 	mutex_enter(&xc_sys_mutex);
581 	xc_spl_enter[lcx] = 1;
582 
583 	XC_TRACE(XC_SOME_OTHER, &xc_cpuset, func, arg1, arg2);
584 	init_mondo(setsoftint_tl1, xc_serv_inum, 0);
585 	SEND_MBOX_MONDO(xc_cpuset, func, arg1, arg2, XC_IDLE);
586 	WAIT_MBOX_DONE(xc_cpuset, lcx, XC_IDLE, 1);
587 
588 	xc_spl_enter[lcx] = 0;
589 	XC_STAT_INC(x_dstat[lcx][XC_SOME_OTHER]);
590 	mutex_exit(&xc_sys_mutex);
591 	kpreempt_enable();
592 }
593 
594 /*
595  * xc_all - send a "x-call" to all cpus
596  */
597 void
598 xc_all(xcfunc_t *func, uint64_t arg1, uint64_t arg2)
599 {
600 	int lcx;
601 	int opl;
602 	cpuset_t xc_cpuset, tset;
603 
604 	ASSERT((uintptr_t)func > KERNELBASE);
605 	ASSERT(((uintptr_t)func % PC_ALIGN) == 0);
606 
607 	CPUSET_ZERO(tset);
608 
609 	kpreempt_disable();
610 	XC_SPL_ENTER(lcx, opl);			/* lcx set by the macro */
611 
612 	CPUSET_ADD(tset, lcx);
613 
614 	/*
615 	 * same cpu just do it
616 	 */
617 	(*func)(arg1, arg2);
618 	xc_cpuset = cpu_ready_set;
619 	CPUSET_DEL(xc_cpuset, lcx);
620 
621 	if (CPUSET_ISNULL(xc_cpuset)) {
622 		XC_STAT_INC(x_dstat[lcx][XC_ALL_SELF]);
623 		XC_TRACE(XC_ALL_SELF, &tset, func, arg1, arg2);
624 		XC_SPL_EXIT(lcx, opl);
625 		kpreempt_enable();
626 		return;
627 	}
628 
629 	if (xc_holder == lcx) {		/* got the xc_sys_mutex already */
630 		cpuset_t mset = xc_mbox[lcx].xc_cpuset;
631 
632 		CPUSET_AND(mset, xc_cpuset);
633 		ASSERT(MUTEX_HELD(&xc_sys_mutex));
634 		ASSERT(CPUSET_ISEQUAL(mset, xc_cpuset));
635 		XC_TRACE(XC_ALL_OTHER_H, &xc_cpuset, func, arg1, arg2);
636 		SEND_MBOX_ONLY(xc_cpuset, func, arg1, arg2, lcx, XC_WAIT);
637 		WAIT_MBOX_DONE(xc_cpuset, lcx, XC_WAIT, 0);
638 		XC_STAT_INC(x_dstat[lcx][XC_ALL_OTHER_H]);
639 		XC_SPL_EXIT(lcx, opl);
640 		kpreempt_enable();
641 		return;
642 	}
643 
644 	/*
645 	 * Avoid dead lock if someone has sent us a xc_loop request while
646 	 * we are trying to grab xc_sys_mutex.
647 	 */
648 	XC_SPL_EXIT(lcx, opl);
649 
650 	/*
651 	 * At this point, since we don't own xc_sys_mutex,
652 	 * our pil shouldn't run at or above the XCALL_PIL.
653 	 */
654 	ASSERT(getpil() < XCALL_PIL);
655 
656 	/*
657 	 * grab xc_sys_mutex before writing to the xc_mbox
658 	 */
659 	mutex_enter(&xc_sys_mutex);
660 	xc_spl_enter[lcx] = 1;
661 
662 	XC_TRACE(XC_ALL_OTHER, &xc_cpuset, func, arg1, arg2);
663 	init_mondo(setsoftint_tl1, xc_serv_inum, 0);
664 	SEND_MBOX_MONDO(xc_cpuset, func, arg1, arg2, XC_IDLE);
665 	WAIT_MBOX_DONE(xc_cpuset, lcx, XC_IDLE, 1);
666 
667 	xc_spl_enter[lcx] = 0;
668 	XC_STAT_INC(x_dstat[lcx][XC_ALL_OTHER]);
669 	mutex_exit(&xc_sys_mutex);
670 	kpreempt_enable();
671 }
672 
673 /*
674  * xc_attention - paired with xc_dismissed()
675  *
676  * xt_attention() holds the xc_sys_mutex and xc_dismissed() releases it
677  * called when an initiator wants to capture some/all cpus for a critical
678  * session.
679  */
680 void
681 xc_attention(cpuset_t cpuset)
682 {
683 	int pix, lcx;
684 	cpuset_t xc_cpuset, tmpset;
685 	cpuset_t recv_cpuset;
686 	uint64_t loop_cnt = 0;
687 	int first_time = 1;
688 
689 	CPUSET_ZERO(recv_cpuset);
690 
691 	/*
692 	 * don't migrate the cpu until xc_dismissed() is finished
693 	 */
694 	ASSERT(getpil() < XCALL_PIL);
695 	mutex_enter(&xc_sys_mutex);
696 	lcx = (int)(CPU->cpu_id);
697 	ASSERT(x_dstat[lcx][XC_ATTENTION] ==
698 	    x_dstat[lcx][XC_DISMISSED]);
699 	ASSERT(xc_holder == -1);
700 	xc_mbox[lcx].xc_cpuset = cpuset;
701 	xc_holder = lcx; /* no membar; only current cpu needs the right lcx */
702 
703 	/*
704 	 * only send to the CPU_READY ones
705 	 */
706 	xc_cpuset = cpu_ready_set;
707 	CPUSET_AND(xc_cpuset, cpuset);
708 
709 	/*
710 	 * don't send mondo to self
711 	 */
712 	CPUSET_DEL(xc_cpuset, lcx);
713 
714 	XC_STAT_INC(x_dstat[lcx][XC_ATTENTION]);
715 	XC_TRACE(XC_ATTENTION, &xc_cpuset, NULL, NULL, NULL);
716 
717 	if (CPUSET_ISNULL(xc_cpuset))
718 		return;
719 
720 	xc_spl_enter[lcx] = 1;
721 	/*
722 	 * inform the target processors to enter into xc_loop()
723 	 */
724 	tmpset = xc_cpuset;
725 	init_mondo(setsoftint_tl1, xc_loop_inum, 0);
726 	for (pix = 0; pix < NCPU; pix++) {
727 		if (CPU_IN_SET(tmpset, pix)) {
728 			ASSERT(xc_mbox[pix].xc_state == XC_IDLE);
729 			xc_mbox[pix].xc_state = XC_ENTER;
730 			send_one_mondo(pix);
731 			CPUSET_DEL(tmpset, pix);
732 			if (CPUSET_ISNULL(tmpset)) {
733 				break;
734 			}
735 		}
736 	}
737 	xc_spl_enter[lcx] = 0;
738 
739 	/*
740 	 * make sure target processors have entered into xc_loop()
741 	 */
742 	while (!CPUSET_ISEQUAL(recv_cpuset, xc_cpuset)) {
743 		tmpset = xc_cpuset;
744 		for (pix = 0; pix < NCPU; pix++) {
745 			if (CPU_IN_SET(tmpset, pix)) {
746 				/*
747 				 * membar_stld() is done in xc_loop
748 				 */
749 				if (xc_mbox[pix].xc_state == XC_WAIT) {
750 					CPUSET_ADD(recv_cpuset, pix);
751 				}
752 				CPUSET_DEL(tmpset, pix);
753 				if (CPUSET_ISNULL(tmpset)) {
754 					break;
755 				}
756 			}
757 		}
758 		if (loop_cnt++ > xc_mondo_time_limit) {
759 			if (sendmondo_in_recover) {
760 				drv_usecwait(1);
761 				loop_cnt = 0;
762 				continue;
763 			}
764 			if (first_time) {
765 				XT_SYNC_SOME(xc_cpuset);
766 				first_time = 0;
767 				loop_cnt = 0;
768 				continue;
769 			}
770 			cmn_err(CE_PANIC, "xc_attention() timeout");
771 		}
772 	}
773 
774 	/*
775 	 * xc_sys_mutex remains held until xc_dismissed() is finished
776 	 */
777 }
778 
779 /*
780  * xc_dismissed - paired with xc_attention()
781  *
782  * Called after the critical session is finished.
783  */
784 void
785 xc_dismissed(cpuset_t cpuset)
786 {
787 	int pix;
788 	int lcx = (int)(CPU->cpu_id);
789 	cpuset_t xc_cpuset, tmpset;
790 	cpuset_t recv_cpuset;
791 	uint64_t loop_cnt = 0;
792 
793 	ASSERT(lcx == xc_holder);
794 	ASSERT(CPUSET_ISEQUAL(xc_mbox[lcx].xc_cpuset, cpuset));
795 	ASSERT(getpil() >= XCALL_PIL);
796 	CPUSET_ZERO(xc_mbox[lcx].xc_cpuset);
797 	CPUSET_ZERO(recv_cpuset);
798 	membar_stld();
799 
800 	XC_STAT_INC(x_dstat[lcx][XC_DISMISSED]);
801 	ASSERT(x_dstat[lcx][XC_DISMISSED] == x_dstat[lcx][XC_ATTENTION]);
802 
803 	/*
804 	 * only send to the CPU_READY ones
805 	 */
806 	xc_cpuset = cpu_ready_set;
807 	CPUSET_AND(xc_cpuset, cpuset);
808 
809 	/*
810 	 * exclude itself
811 	 */
812 	CPUSET_DEL(xc_cpuset, lcx);
813 	XC_TRACE(XC_DISMISSED, &xc_cpuset, NULL, NULL, NULL);
814 	if (CPUSET_ISNULL(xc_cpuset)) {
815 		xc_holder = -1;
816 		mutex_exit(&xc_sys_mutex);
817 		return;
818 	}
819 
820 	/*
821 	 * inform other processors to get out of xc_loop()
822 	 */
823 	tmpset = xc_cpuset;
824 	for (pix = 0; pix < NCPU; pix++) {
825 		if (CPU_IN_SET(tmpset, pix)) {
826 			xc_mbox[pix].xc_state = XC_EXIT;
827 			membar_stld();
828 			CPUSET_DEL(tmpset, pix);
829 			if (CPUSET_ISNULL(tmpset)) {
830 				break;
831 			}
832 		}
833 	}
834 
835 	/*
836 	 * make sure target processors have exited from xc_loop()
837 	 */
838 	while (!CPUSET_ISEQUAL(recv_cpuset, xc_cpuset)) {
839 		tmpset = xc_cpuset;
840 		for (pix = 0; pix < NCPU; pix++) {
841 			if (CPU_IN_SET(tmpset, pix)) {
842 				/*
843 				 * membar_stld() is done in xc_loop
844 				 */
845 				if (xc_mbox[pix].xc_state == XC_IDLE) {
846 					CPUSET_ADD(recv_cpuset, pix);
847 				}
848 				CPUSET_DEL(tmpset, pix);
849 				if (CPUSET_ISNULL(tmpset)) {
850 					break;
851 				}
852 			}
853 		}
854 		if (loop_cnt++ > xc_func_time_limit) {
855 				if (sendmondo_in_recover) {
856 					drv_usecwait(1);
857 					loop_cnt = 0;
858 					continue;
859 				}
860 			cmn_err(CE_PANIC, "xc_dismissed() timeout");
861 		}
862 	}
863 	xc_holder = -1;
864 	mutex_exit(&xc_sys_mutex);
865 }
866 
867 /*
868  * xc_serv - "x-call" handler at TL=0; serves only one x-call request
869  * runs at XCALL_PIL level.
870  */
871 uint_t
872 xc_serv(void)
873 {
874 	int lcx = (int)(CPU->cpu_id);
875 	struct xc_mbox *xmp;
876 	xcfunc_t *func;
877 	uint64_t arg1, arg2;
878 	cpuset_t tset;
879 
880 	ASSERT(getpil() == XCALL_PIL);
881 	CPUSET_ZERO(tset);
882 	CPUSET_ADD(tset, lcx);
883 	flush_windows();
884 	xmp = &xc_mbox[lcx];
885 	ASSERT(lcx != xc_holder);
886 	ASSERT(xmp->xc_state == XC_DOIT);
887 	func = xmp->xc_func;
888 	XC_TRACE(XC_SERV, &tset, func, xmp->xc_arg1, xmp->xc_arg2);
889 	if (func != NULL) {
890 		arg1 = xmp->xc_arg1;
891 		arg2 = xmp->xc_arg2;
892 		(*func)(arg1, arg2);
893 	}
894 	XC_STAT_INC(x_rstat[lcx][XC_SERV]);
895 	XC_TRACE(XC_SERV, &tset, func, arg1, arg2);
896 	xmp->xc_state = XC_IDLE;
897 	membar_stld();
898 	return (1);
899 }
900 
901 /*
902  * if == 1, an xc_loop timeout will cause a panic
903  * otherwise print a warning
904  */
905 uint_t xc_loop_panic = 0;
906 
907 /*
908  * xc_loop - "x-call" handler at TL=0; capture the cpu for a critial
909  * session, or serve multiple x-call requests runs at XCALL_PIL level.
910  */
911 uint_t
912 xc_loop(void)
913 {
914 	int lcx = (int)(CPU->cpu_id);
915 	struct xc_mbox *xmp;
916 	xcfunc_t *func;
917 	uint64_t arg1, arg2;
918 	uint64_t loop_cnt = 0;
919 	cpuset_t tset;
920 
921 	ASSERT(getpil() == XCALL_PIL);
922 
923 	CPUSET_ZERO(tset);
924 	flush_windows();
925 
926 	/*
927 	 * Some one must have owned the xc_sys_mutex;
928 	 * no further interrupt (at XCALL_PIL or below) can
929 	 * be taken by this processor until xc_loop exits.
930 	 *
931 	 * The owner of xc_sys_mutex (or xc_holder) can expect
932 	 * its xc/xt requests are handled as follows:
933 	 * 	xc requests use xc_mbox's handshaking for their services
934 	 * 	xt requests at TL>0 will be handled immediately
935 	 * 	xt requests at TL=0:
936 	 *		if their handlers'pils are <= XCALL_PIL, then
937 	 *			they will be handled after xc_loop exits
938 	 *			(so, they probably should not be used)
939 	 *		else they will be handled immediately
940 	 *
941 	 * For those who are not informed to enter xc_loop, if they
942 	 * send xc/xt requests to this processor at this moment,
943 	 * the requests will be handled as follows:
944 	 *	xc requests will be handled after they grab xc_sys_mutex
945 	 *	xt requests at TL>0 will be handled immediately
946 	 * 	xt requests at TL=0:
947 	 *		if their handlers'pils are <= XCALL_PIL, then
948 	 *			they will be handled after xc_loop exits
949 	 *		else they will be handled immediately
950 	 */
951 	xmp = &xc_mbox[lcx];
952 	ASSERT(lcx != xc_holder);
953 	ASSERT(xmp->xc_state == XC_ENTER);
954 	xmp->xc_state = XC_WAIT;
955 	CPUSET_ADD(tset, lcx);
956 	membar_stld();
957 	XC_STAT_INC(x_rstat[lcx][XC_LOOP]);
958 	XC_TRACE(XC_LOOP_ENTER, &tset, NULL, NULL, NULL);
959 	while (xmp->xc_state != XC_EXIT) {
960 		if (xmp->xc_state == XC_DOIT) {
961 			func = xmp->xc_func;
962 			arg1 = xmp->xc_arg1;
963 			arg2 = xmp->xc_arg2;
964 			XC_TRACE(XC_LOOP_DOIT, &tset, func, arg1, arg2);
965 			if (func != NULL)
966 				(*func)(arg1, arg2);
967 			xmp->xc_state = XC_WAIT;
968 			membar_stld();
969 			/*
970 			 * reset the timeout counter
971 			 * since some work was done
972 			 */
973 			loop_cnt = 0;
974 		} else {
975 			/* patience is a virtue... */
976 			loop_cnt++;
977 		}
978 
979 		if (loop_cnt > xc_func_time_limit) {
980 			if (sendmondo_in_recover) {
981 				drv_usecwait(1);
982 				loop_cnt = 0;
983 				continue;
984 			}
985 			cmn_err(xc_loop_panic ? CE_PANIC : CE_WARN,
986 			    "xc_loop() timeout");
987 			/*
988 			 * if the above displayed a warning,
989 			 * reset the timeout counter and be patient
990 			 */
991 			loop_cnt = 0;
992 		}
993 	}
994 	ASSERT(xmp->xc_state == XC_EXIT);
995 	ASSERT(xc_holder != -1);
996 	XC_TRACE(XC_LOOP_EXIT, &tset, NULL, NULL, NULL);
997 	xmp->xc_state = XC_IDLE;
998 	membar_stld();
999 	return (1);
1000 }
1001