17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate * CDDL HEADER START
37c478bd9Sstevel@tonic-gate *
47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the
541791439Sandrei * Common Development and Distribution License (the "License").
641791439Sandrei * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate *
87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate * and limitations under the License.
127c478bd9Sstevel@tonic-gate *
137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate *
197c478bd9Sstevel@tonic-gate * CDDL HEADER END
207c478bd9Sstevel@tonic-gate */
217c478bd9Sstevel@tonic-gate /*
22f34a7178SJoe Bonasera * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
237c478bd9Sstevel@tonic-gate * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate */
25a3114836SGerry Liu /*
26a3114836SGerry Liu * Copyright (c) 2010, Intel Corporation.
27a3114836SGerry Liu * All rights reserved.
28a3114836SGerry Liu */
297c478bd9Sstevel@tonic-gate
307c478bd9Sstevel@tonic-gate #include <sys/types.h>
317c478bd9Sstevel@tonic-gate #include <sys/param.h>
327c478bd9Sstevel@tonic-gate #include <sys/t_lock.h>
337c478bd9Sstevel@tonic-gate #include <sys/thread.h>
347c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
357c478bd9Sstevel@tonic-gate #include <sys/x_call.h>
36f34a7178SJoe Bonasera #include <sys/xc_levels.h>
377c478bd9Sstevel@tonic-gate #include <sys/cpu.h>
387c478bd9Sstevel@tonic-gate #include <sys/psw.h>
397c478bd9Sstevel@tonic-gate #include <sys/sunddi.h>
407c478bd9Sstevel@tonic-gate #include <sys/debug.h>
417c478bd9Sstevel@tonic-gate #include <sys/systm.h>
42ae115bc7Smrj #include <sys/archsystm.h>
437c478bd9Sstevel@tonic-gate #include <sys/machsystm.h>
447c478bd9Sstevel@tonic-gate #include <sys/mutex_impl.h>
45f34a7178SJoe Bonasera #include <sys/stack.h>
46f34a7178SJoe Bonasera #include <sys/promif.h>
47f34a7178SJoe Bonasera #include <sys/x86_archext.h>
48ae115bc7Smrj
497c478bd9Sstevel@tonic-gate /*
50f34a7178SJoe Bonasera * Implementation for cross-processor calls via interprocessor interrupts
51f34a7178SJoe Bonasera *
52f34a7178SJoe Bonasera * This implementation uses a message passing architecture to allow multiple
53f34a7178SJoe Bonasera * concurrent cross calls to be in flight at any given time. We use the cmpxchg
5475d94465SJosef 'Jeff' Sipek * instruction, aka atomic_cas_ptr(), to implement simple efficient work
5575d94465SJosef 'Jeff' Sipek * queues for message passing between CPUs with almost no need for regular
5675d94465SJosef 'Jeff' Sipek * locking. See xc_extract() and xc_insert() below.
57f34a7178SJoe Bonasera *
58f34a7178SJoe Bonasera * The general idea is that initiating a cross call means putting a message
59f34a7178SJoe Bonasera * on a target(s) CPU's work queue. Any synchronization is handled by passing
60f34a7178SJoe Bonasera * the message back and forth between initiator and target(s).
61f34a7178SJoe Bonasera *
62f34a7178SJoe Bonasera * Every CPU has xc_work_cnt, which indicates it has messages to process.
63f34a7178SJoe Bonasera * This value is incremented as message traffic is initiated and decremented
64f34a7178SJoe Bonasera * with every message that finishes all processing.
65f34a7178SJoe Bonasera *
66f34a7178SJoe Bonasera * The code needs no mfence or other membar_*() calls. The uses of
6775d94465SJosef 'Jeff' Sipek * atomic_cas_ptr(), atomic_cas_32() and atomic_dec_32() for the message
6875d94465SJosef 'Jeff' Sipek * passing are implemented with LOCK prefix instructions which are
6975d94465SJosef 'Jeff' Sipek * equivalent to mfence.
70f34a7178SJoe Bonasera *
71f34a7178SJoe Bonasera * One interesting aspect of this implmentation is that it allows 2 or more
72f34a7178SJoe Bonasera * CPUs to initiate cross calls to intersecting sets of CPUs at the same time.
73f34a7178SJoe Bonasera * The cross call processing by the CPUs will happen in any order with only
74f34a7178SJoe Bonasera * a guarantee, for xc_call() and xc_sync(), that an initiator won't return
75f34a7178SJoe Bonasera * from cross calls before all slaves have invoked the function.
76f34a7178SJoe Bonasera *
77f34a7178SJoe Bonasera * The reason for this asynchronous approach is to allow for fast global
78f34a7178SJoe Bonasera * TLB shootdowns. If all CPUs, say N, tried to do a global TLB invalidation
79f34a7178SJoe Bonasera * on a different Virtual Address at the same time. The old code required
80f34a7178SJoe Bonasera * N squared IPIs. With this method, depending on timing, it could happen
81f34a7178SJoe Bonasera * with just N IPIs.
827c478bd9Sstevel@tonic-gate */
837c478bd9Sstevel@tonic-gate
84f34a7178SJoe Bonasera /*
85f34a7178SJoe Bonasera * The default is to not enable collecting counts of IPI information, since
86f34a7178SJoe Bonasera * the updating of shared cachelines could cause excess bus traffic.
87f34a7178SJoe Bonasera */
88f34a7178SJoe Bonasera uint_t xc_collect_enable = 0;
89f34a7178SJoe Bonasera uint64_t xc_total_cnt = 0; /* total #IPIs sent for cross calls */
90f34a7178SJoe Bonasera uint64_t xc_multi_cnt = 0; /* # times we piggy backed on another IPI */
91f34a7178SJoe Bonasera
92f34a7178SJoe Bonasera /*
93f34a7178SJoe Bonasera * Values for message states. Here are the normal transitions. A transition
94f34a7178SJoe Bonasera * of "->" happens in the slave cpu and "=>" happens in the master cpu as
95f34a7178SJoe Bonasera * the messages are passed back and forth.
96f34a7178SJoe Bonasera *
97f34a7178SJoe Bonasera * FREE => ASYNC -> DONE => FREE
98f34a7178SJoe Bonasera * FREE => CALL -> DONE => FREE
99f34a7178SJoe Bonasera * FREE => SYNC -> WAITING => RELEASED -> DONE => FREE
100f34a7178SJoe Bonasera *
101f34a7178SJoe Bonasera * The interesing one above is ASYNC. You might ask, why not go directly
102f34a7178SJoe Bonasera * to FREE, instead of DONE. If it did that, it might be possible to exhaust
103f34a7178SJoe Bonasera * the master's xc_free list if a master can generate ASYNC messages faster
104f34a7178SJoe Bonasera * then the slave can process them. That could be handled with more complicated
105f34a7178SJoe Bonasera * handling. However since nothing important uses ASYNC, I've not bothered.
106f34a7178SJoe Bonasera */
107f34a7178SJoe Bonasera #define XC_MSG_FREE (0) /* msg in xc_free queue */
108f34a7178SJoe Bonasera #define XC_MSG_ASYNC (1) /* msg in slave xc_msgbox */
109f34a7178SJoe Bonasera #define XC_MSG_CALL (2) /* msg in slave xc_msgbox */
110f34a7178SJoe Bonasera #define XC_MSG_SYNC (3) /* msg in slave xc_msgbox */
111f34a7178SJoe Bonasera #define XC_MSG_WAITING (4) /* msg in master xc_msgbox or xc_waiters */
112f34a7178SJoe Bonasera #define XC_MSG_RELEASED (5) /* msg in slave xc_msgbox */
113f34a7178SJoe Bonasera #define XC_MSG_DONE (6) /* msg in master xc_msgbox */
114f34a7178SJoe Bonasera
115f34a7178SJoe Bonasera /*
116f34a7178SJoe Bonasera * We allow for one high priority message at a time to happen in the system.
117f34a7178SJoe Bonasera * This is used for panic, kmdb, etc., so no locking is done.
118f34a7178SJoe Bonasera */
119c03aa626SJoe Bonasera static volatile cpuset_t xc_priority_set_store;
120c03aa626SJoe Bonasera static volatile ulong_t *xc_priority_set = CPUSET2BV(xc_priority_set_store);
121f34a7178SJoe Bonasera static xc_data_t xc_priority_data;
122f34a7178SJoe Bonasera
123f34a7178SJoe Bonasera /*
124c03aa626SJoe Bonasera * Wrappers to avoid C compiler warnings due to volatile. The atomic bit
125c03aa626SJoe Bonasera * operations don't accept volatile bit vectors - which is a bit silly.
126c03aa626SJoe Bonasera */
127c03aa626SJoe Bonasera #define XC_BT_SET(vector, b) BT_ATOMIC_SET((ulong_t *)(vector), (b))
128c03aa626SJoe Bonasera #define XC_BT_CLEAR(vector, b) BT_ATOMIC_CLEAR((ulong_t *)(vector), (b))
129c03aa626SJoe Bonasera
130c03aa626SJoe Bonasera /*
131f34a7178SJoe Bonasera * Decrement a CPU's work count
132f34a7178SJoe Bonasera */
133f34a7178SJoe Bonasera static void
xc_decrement(struct machcpu * mcpu)134f34a7178SJoe Bonasera xc_decrement(struct machcpu *mcpu)
135f34a7178SJoe Bonasera {
136f34a7178SJoe Bonasera atomic_dec_32(&mcpu->xc_work_cnt);
137f34a7178SJoe Bonasera }
138f34a7178SJoe Bonasera
139f34a7178SJoe Bonasera /*
140f34a7178SJoe Bonasera * Increment a CPU's work count and return the old value
141f34a7178SJoe Bonasera */
142f34a7178SJoe Bonasera static int
xc_increment(struct machcpu * mcpu)143f34a7178SJoe Bonasera xc_increment(struct machcpu *mcpu)
144f34a7178SJoe Bonasera {
145f34a7178SJoe Bonasera int old;
146f34a7178SJoe Bonasera do {
147f34a7178SJoe Bonasera old = mcpu->xc_work_cnt;
14875d94465SJosef 'Jeff' Sipek } while (atomic_cas_32(&mcpu->xc_work_cnt, old, old + 1) != old);
149f34a7178SJoe Bonasera return (old);
150f34a7178SJoe Bonasera }
151f34a7178SJoe Bonasera
152f34a7178SJoe Bonasera /*
153f34a7178SJoe Bonasera * Put a message into a queue. The insertion is atomic no matter
154f34a7178SJoe Bonasera * how many different inserts/extracts to the same queue happen.
155f34a7178SJoe Bonasera */
156f34a7178SJoe Bonasera static void
xc_insert(void * queue,xc_msg_t * msg)157f34a7178SJoe Bonasera xc_insert(void *queue, xc_msg_t *msg)
158f34a7178SJoe Bonasera {
159f34a7178SJoe Bonasera xc_msg_t *old_head;
160bf73eaa5SJoe Bonasera
161bf73eaa5SJoe Bonasera /*
162bf73eaa5SJoe Bonasera * FREE messages should only ever be getting inserted into
163bf73eaa5SJoe Bonasera * the xc_master CPUs xc_free queue.
164bf73eaa5SJoe Bonasera */
165bf73eaa5SJoe Bonasera ASSERT(msg->xc_command != XC_MSG_FREE ||
166bf73eaa5SJoe Bonasera cpu[msg->xc_master] == NULL || /* possible only during init */
167bf73eaa5SJoe Bonasera queue == &cpu[msg->xc_master]->cpu_m.xc_free);
168bf73eaa5SJoe Bonasera
169f34a7178SJoe Bonasera do {
170f34a7178SJoe Bonasera old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue;
171f34a7178SJoe Bonasera msg->xc_next = old_head;
17275d94465SJosef 'Jeff' Sipek } while (atomic_cas_ptr(queue, old_head, msg) != old_head);
173f34a7178SJoe Bonasera }
174f34a7178SJoe Bonasera
175f34a7178SJoe Bonasera /*
176f34a7178SJoe Bonasera * Extract a message from a queue. The extraction is atomic only
177f34a7178SJoe Bonasera * when just one thread does extractions from the queue.
178f34a7178SJoe Bonasera * If the queue is empty, NULL is returned.
179f34a7178SJoe Bonasera */
180f34a7178SJoe Bonasera static xc_msg_t *
xc_extract(xc_msg_t ** queue)181f34a7178SJoe Bonasera xc_extract(xc_msg_t **queue)
182f34a7178SJoe Bonasera {
183f34a7178SJoe Bonasera xc_msg_t *old_head;
184f34a7178SJoe Bonasera
185f34a7178SJoe Bonasera do {
186f34a7178SJoe Bonasera old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue;
187f34a7178SJoe Bonasera if (old_head == NULL)
188f34a7178SJoe Bonasera return (old_head);
18975d94465SJosef 'Jeff' Sipek } while (atomic_cas_ptr(queue, old_head, old_head->xc_next) !=
19075d94465SJosef 'Jeff' Sipek old_head);
191f34a7178SJoe Bonasera old_head->xc_next = NULL;
192f34a7178SJoe Bonasera return (old_head);
193f34a7178SJoe Bonasera }
194f34a7178SJoe Bonasera
195f34a7178SJoe Bonasera /*
196f34a7178SJoe Bonasera * Initialize the machcpu fields used for cross calls
197f34a7178SJoe Bonasera */
198f34a7178SJoe Bonasera static uint_t xc_initialized = 0;
199a3114836SGerry Liu
200f34a7178SJoe Bonasera void
xc_init_cpu(struct cpu * cpup)201f34a7178SJoe Bonasera xc_init_cpu(struct cpu *cpup)
202f34a7178SJoe Bonasera {
203f34a7178SJoe Bonasera xc_msg_t *msg;
204f34a7178SJoe Bonasera int c;
205f34a7178SJoe Bonasera
206f34a7178SJoe Bonasera /*
207a3114836SGerry Liu * Allocate message buffers for the new CPU.
208f34a7178SJoe Bonasera */
209a3114836SGerry Liu for (c = 0; c < max_ncpus; ++c) {
210a3114836SGerry Liu if (plat_dr_support_cpu()) {
211a3114836SGerry Liu /*
212a3114836SGerry Liu * Allocate a message buffer for every CPU possible
213a3114836SGerry Liu * in system, including our own, and add them to our xc
214a3114836SGerry Liu * message queue.
215a3114836SGerry Liu */
216a3114836SGerry Liu msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
217a3114836SGerry Liu msg->xc_command = XC_MSG_FREE;
218a3114836SGerry Liu msg->xc_master = cpup->cpu_id;
219a3114836SGerry Liu xc_insert(&cpup->cpu_m.xc_free, msg);
220a3114836SGerry Liu } else if (cpu[c] != NULL && cpu[c] != cpup) {
221a3114836SGerry Liu /*
222a3114836SGerry Liu * Add a new message buffer to each existing CPU's free
223a3114836SGerry Liu * list, as well as one for my list for each of them.
224a3114836SGerry Liu * Note: cpu0 is statically inserted into cpu[] array,
225a3114836SGerry Liu * so need to check cpu[c] isn't cpup itself to avoid
226a3114836SGerry Liu * allocating extra message buffers for cpu0.
227a3114836SGerry Liu */
228f34a7178SJoe Bonasera msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
229f34a7178SJoe Bonasera msg->xc_command = XC_MSG_FREE;
230bf73eaa5SJoe Bonasera msg->xc_master = c;
231f34a7178SJoe Bonasera xc_insert(&cpu[c]->cpu_m.xc_free, msg);
232f34a7178SJoe Bonasera
233f34a7178SJoe Bonasera msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
234f34a7178SJoe Bonasera msg->xc_command = XC_MSG_FREE;
235bf73eaa5SJoe Bonasera msg->xc_master = cpup->cpu_id;
236f34a7178SJoe Bonasera xc_insert(&cpup->cpu_m.xc_free, msg);
237f34a7178SJoe Bonasera }
238a3114836SGerry Liu }
239f34a7178SJoe Bonasera
240a3114836SGerry Liu if (!plat_dr_support_cpu()) {
241f34a7178SJoe Bonasera /*
242a3114836SGerry Liu * Add one for self messages if CPU hotplug is disabled.
243f34a7178SJoe Bonasera */
244f34a7178SJoe Bonasera msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
245f34a7178SJoe Bonasera msg->xc_command = XC_MSG_FREE;
246bf73eaa5SJoe Bonasera msg->xc_master = cpup->cpu_id;
247f34a7178SJoe Bonasera xc_insert(&cpup->cpu_m.xc_free, msg);
248a3114836SGerry Liu }
249f34a7178SJoe Bonasera
250f34a7178SJoe Bonasera if (!xc_initialized)
2517c478bd9Sstevel@tonic-gate xc_initialized = 1;
2527c478bd9Sstevel@tonic-gate }
2537c478bd9Sstevel@tonic-gate
254a3114836SGerry Liu void
xc_fini_cpu(struct cpu * cpup)255a3114836SGerry Liu xc_fini_cpu(struct cpu *cpup)
256a3114836SGerry Liu {
257a3114836SGerry Liu xc_msg_t *msg;
258a3114836SGerry Liu
259a3114836SGerry Liu ASSERT((cpup->cpu_flags & CPU_READY) == 0);
260a3114836SGerry Liu ASSERT(cpup->cpu_m.xc_msgbox == NULL);
261a3114836SGerry Liu ASSERT(cpup->cpu_m.xc_work_cnt == 0);
262a3114836SGerry Liu
263a3114836SGerry Liu while ((msg = xc_extract(&cpup->cpu_m.xc_free)) != NULL) {
264a3114836SGerry Liu kmem_free(msg, sizeof (*msg));
265a3114836SGerry Liu }
266a3114836SGerry Liu }
267a3114836SGerry Liu
268a3114836SGerry Liu #define XC_FLUSH_MAX_WAITS 1000
269a3114836SGerry Liu
270a3114836SGerry Liu /* Flush inflight message buffers. */
271a3114836SGerry Liu int
xc_flush_cpu(struct cpu * cpup)272a3114836SGerry Liu xc_flush_cpu(struct cpu *cpup)
273a3114836SGerry Liu {
274a3114836SGerry Liu int i;
275a3114836SGerry Liu
276a3114836SGerry Liu ASSERT((cpup->cpu_flags & CPU_READY) == 0);
277a3114836SGerry Liu
278a3114836SGerry Liu /*
279a3114836SGerry Liu * Pause all working CPUs, which ensures that there's no CPU in
280a3114836SGerry Liu * function xc_common().
281a3114836SGerry Liu * This is used to work around a race condition window in xc_common()
282a3114836SGerry Liu * between checking CPU_READY flag and increasing working item count.
283a3114836SGerry Liu */
284*0ed5c46eSJosef 'Jeff' Sipek pause_cpus(cpup, NULL);
285a3114836SGerry Liu start_cpus();
286a3114836SGerry Liu
287a3114836SGerry Liu for (i = 0; i < XC_FLUSH_MAX_WAITS; i++) {
288a3114836SGerry Liu if (cpup->cpu_m.xc_work_cnt == 0) {
289a3114836SGerry Liu break;
290a3114836SGerry Liu }
291a3114836SGerry Liu DELAY(1);
292a3114836SGerry Liu }
293a3114836SGerry Liu for (; i < XC_FLUSH_MAX_WAITS; i++) {
294a3114836SGerry Liu if (!BT_TEST(xc_priority_set, cpup->cpu_id)) {
295a3114836SGerry Liu break;
296a3114836SGerry Liu }
297a3114836SGerry Liu DELAY(1);
298a3114836SGerry Liu }
299a3114836SGerry Liu
300a3114836SGerry Liu return (i >= XC_FLUSH_MAX_WAITS ? ETIME : 0);
301a3114836SGerry Liu }
302a3114836SGerry Liu
3037c478bd9Sstevel@tonic-gate /*
304f34a7178SJoe Bonasera * X-call message processing routine. Note that this is used by both
305f34a7178SJoe Bonasera * senders and recipients of messages.
3067c478bd9Sstevel@tonic-gate *
307f34a7178SJoe Bonasera * We're protected against changing CPUs by either being in a high-priority
308f34a7178SJoe Bonasera * interrupt, having preemption disabled or by having a raised SPL.
3097c478bd9Sstevel@tonic-gate */
3107c478bd9Sstevel@tonic-gate /*ARGSUSED*/
3117c478bd9Sstevel@tonic-gate uint_t
xc_serv(caddr_t arg1,caddr_t arg2)3127c478bd9Sstevel@tonic-gate xc_serv(caddr_t arg1, caddr_t arg2)
3137c478bd9Sstevel@tonic-gate {
314f34a7178SJoe Bonasera struct machcpu *mcpup = &(CPU->cpu_m);
315f34a7178SJoe Bonasera xc_msg_t *msg;
316f34a7178SJoe Bonasera xc_data_t *data;
317f34a7178SJoe Bonasera xc_msg_t *xc_waiters = NULL;
318f34a7178SJoe Bonasera uint32_t num_waiting = 0;
319f34a7178SJoe Bonasera xc_func_t func;
320f34a7178SJoe Bonasera xc_arg_t a1;
321f34a7178SJoe Bonasera xc_arg_t a2;
322f34a7178SJoe Bonasera xc_arg_t a3;
323f34a7178SJoe Bonasera uint_t rc = DDI_INTR_UNCLAIMED;
324ae115bc7Smrj
325f34a7178SJoe Bonasera while (mcpup->xc_work_cnt != 0) {
326f34a7178SJoe Bonasera rc = DDI_INTR_CLAIMED;
3277c478bd9Sstevel@tonic-gate
328f34a7178SJoe Bonasera /*
329f34a7178SJoe Bonasera * We may have to wait for a message to arrive.
330f34a7178SJoe Bonasera */
331bf73eaa5SJoe Bonasera for (msg = NULL; msg == NULL;
332bf73eaa5SJoe Bonasera msg = xc_extract(&mcpup->xc_msgbox)) {
333bf73eaa5SJoe Bonasera
334f34a7178SJoe Bonasera /*
335c03aa626SJoe Bonasera * Alway check for and handle a priority message.
336f34a7178SJoe Bonasera */
337c03aa626SJoe Bonasera if (BT_TEST(xc_priority_set, CPU->cpu_id)) {
338f34a7178SJoe Bonasera func = xc_priority_data.xc_func;
339f34a7178SJoe Bonasera a1 = xc_priority_data.xc_a1;
340f34a7178SJoe Bonasera a2 = xc_priority_data.xc_a2;
341f34a7178SJoe Bonasera a3 = xc_priority_data.xc_a3;
342c03aa626SJoe Bonasera XC_BT_CLEAR(xc_priority_set, CPU->cpu_id);
343f34a7178SJoe Bonasera xc_decrement(mcpup);
344f34a7178SJoe Bonasera func(a1, a2, a3);
345f34a7178SJoe Bonasera if (mcpup->xc_work_cnt == 0)
346f34a7178SJoe Bonasera return (rc);
347f34a7178SJoe Bonasera }
348f34a7178SJoe Bonasera
349f34a7178SJoe Bonasera /*
350f34a7178SJoe Bonasera * wait for a message to arrive
3517c478bd9Sstevel@tonic-gate */
352ae115bc7Smrj SMT_PAUSE();
3537c478bd9Sstevel@tonic-gate }
354f34a7178SJoe Bonasera
3557c478bd9Sstevel@tonic-gate
3567c478bd9Sstevel@tonic-gate /*
357f34a7178SJoe Bonasera * process the message
3587c478bd9Sstevel@tonic-gate */
359f34a7178SJoe Bonasera switch (msg->xc_command) {
3607c478bd9Sstevel@tonic-gate
3617c478bd9Sstevel@tonic-gate /*
362f34a7178SJoe Bonasera * ASYNC gives back the message immediately, then we do the
363f34a7178SJoe Bonasera * function and return with no more waiting.
3647c478bd9Sstevel@tonic-gate */
365f34a7178SJoe Bonasera case XC_MSG_ASYNC:
366f34a7178SJoe Bonasera data = &cpu[msg->xc_master]->cpu_m.xc_data;
367f34a7178SJoe Bonasera func = data->xc_func;
368f34a7178SJoe Bonasera a1 = data->xc_a1;
369f34a7178SJoe Bonasera a2 = data->xc_a2;
370f34a7178SJoe Bonasera a3 = data->xc_a3;
371f34a7178SJoe Bonasera msg->xc_command = XC_MSG_DONE;
372f34a7178SJoe Bonasera xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
373f34a7178SJoe Bonasera if (func != NULL)
374f34a7178SJoe Bonasera (void) (*func)(a1, a2, a3);
375f34a7178SJoe Bonasera xc_decrement(mcpup);
376f34a7178SJoe Bonasera break;
377f34a7178SJoe Bonasera
3787c478bd9Sstevel@tonic-gate /*
379f34a7178SJoe Bonasera * SYNC messages do the call, then send it back to the master
380f34a7178SJoe Bonasera * in WAITING mode
3817c478bd9Sstevel@tonic-gate */
382f34a7178SJoe Bonasera case XC_MSG_SYNC:
383f34a7178SJoe Bonasera data = &cpu[msg->xc_master]->cpu_m.xc_data;
384f34a7178SJoe Bonasera if (data->xc_func != NULL)
385f34a7178SJoe Bonasera (void) (*data->xc_func)(data->xc_a1,
386f34a7178SJoe Bonasera data->xc_a2, data->xc_a3);
387f34a7178SJoe Bonasera msg->xc_command = XC_MSG_WAITING;
388f34a7178SJoe Bonasera xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
389f34a7178SJoe Bonasera break;
390f34a7178SJoe Bonasera
391f34a7178SJoe Bonasera /*
392f34a7178SJoe Bonasera * WAITING messsages are collected by the master until all
393f34a7178SJoe Bonasera * have arrived. Once all arrive, we release them back to
394f34a7178SJoe Bonasera * the slaves
395f34a7178SJoe Bonasera */
396f34a7178SJoe Bonasera case XC_MSG_WAITING:
397f34a7178SJoe Bonasera xc_insert(&xc_waiters, msg);
398f34a7178SJoe Bonasera if (++num_waiting < mcpup->xc_wait_cnt)
399f34a7178SJoe Bonasera break;
400f34a7178SJoe Bonasera while ((msg = xc_extract(&xc_waiters)) != NULL) {
401f34a7178SJoe Bonasera msg->xc_command = XC_MSG_RELEASED;
402f34a7178SJoe Bonasera xc_insert(&cpu[msg->xc_slave]->cpu_m.xc_msgbox,
403f34a7178SJoe Bonasera msg);
404f34a7178SJoe Bonasera --num_waiting;
4057c478bd9Sstevel@tonic-gate }
406f34a7178SJoe Bonasera if (num_waiting != 0)
407f34a7178SJoe Bonasera panic("wrong number waiting");
408f34a7178SJoe Bonasera mcpup->xc_wait_cnt = 0;
409f34a7178SJoe Bonasera break;
410f34a7178SJoe Bonasera
411f34a7178SJoe Bonasera /*
412f34a7178SJoe Bonasera * CALL messages do the function and then, like RELEASE,
413f34a7178SJoe Bonasera * send the message is back to master as DONE.
414f34a7178SJoe Bonasera */
415f34a7178SJoe Bonasera case XC_MSG_CALL:
416f34a7178SJoe Bonasera data = &cpu[msg->xc_master]->cpu_m.xc_data;
417f34a7178SJoe Bonasera if (data->xc_func != NULL)
418f34a7178SJoe Bonasera (void) (*data->xc_func)(data->xc_a1,
419f34a7178SJoe Bonasera data->xc_a2, data->xc_a3);
420f34a7178SJoe Bonasera /*FALLTHROUGH*/
421f34a7178SJoe Bonasera case XC_MSG_RELEASED:
422f34a7178SJoe Bonasera msg->xc_command = XC_MSG_DONE;
423f34a7178SJoe Bonasera xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
424f34a7178SJoe Bonasera xc_decrement(mcpup);
425f34a7178SJoe Bonasera break;
426f34a7178SJoe Bonasera
427f34a7178SJoe Bonasera /*
428f34a7178SJoe Bonasera * DONE means a slave has completely finished up.
429f34a7178SJoe Bonasera * Once we collect all the DONE messages, we'll exit
430f34a7178SJoe Bonasera * processing too.
431f34a7178SJoe Bonasera */
432f34a7178SJoe Bonasera case XC_MSG_DONE:
433f34a7178SJoe Bonasera msg->xc_command = XC_MSG_FREE;
434f34a7178SJoe Bonasera xc_insert(&mcpup->xc_free, msg);
435f34a7178SJoe Bonasera xc_decrement(mcpup);
436f34a7178SJoe Bonasera break;
437f34a7178SJoe Bonasera
438f34a7178SJoe Bonasera case XC_MSG_FREE:
439bf73eaa5SJoe Bonasera panic("free message 0x%p in msgbox", (void *)msg);
440f34a7178SJoe Bonasera break;
441f34a7178SJoe Bonasera
442f34a7178SJoe Bonasera default:
443bf73eaa5SJoe Bonasera panic("bad message 0x%p in msgbox", (void *)msg);
4447c478bd9Sstevel@tonic-gate break;
4457c478bd9Sstevel@tonic-gate }
4467c478bd9Sstevel@tonic-gate }
447f34a7178SJoe Bonasera return (rc);
4487c478bd9Sstevel@tonic-gate }
4497c478bd9Sstevel@tonic-gate
4507c478bd9Sstevel@tonic-gate /*
451f34a7178SJoe Bonasera * Initiate cross call processing.
4527c478bd9Sstevel@tonic-gate */
4537c478bd9Sstevel@tonic-gate static void
xc_common(xc_func_t func,xc_arg_t arg1,xc_arg_t arg2,xc_arg_t arg3,ulong_t * set,uint_t command)4547c478bd9Sstevel@tonic-gate xc_common(
4557c478bd9Sstevel@tonic-gate xc_func_t func,
4567c478bd9Sstevel@tonic-gate xc_arg_t arg1,
4577c478bd9Sstevel@tonic-gate xc_arg_t arg2,
4587c478bd9Sstevel@tonic-gate xc_arg_t arg3,
459f34a7178SJoe Bonasera ulong_t *set,
460f34a7178SJoe Bonasera uint_t command)
4617c478bd9Sstevel@tonic-gate {
462f34a7178SJoe Bonasera int c;
4637c478bd9Sstevel@tonic-gate struct cpu *cpup;
464f34a7178SJoe Bonasera xc_msg_t *msg;
465f34a7178SJoe Bonasera xc_data_t *data;
466f34a7178SJoe Bonasera int cnt;
467f34a7178SJoe Bonasera int save_spl;
4687c478bd9Sstevel@tonic-gate
469f34a7178SJoe Bonasera if (!xc_initialized) {
470f34a7178SJoe Bonasera if (BT_TEST(set, CPU->cpu_id) && (CPU->cpu_flags & CPU_READY) &&
471f34a7178SJoe Bonasera func != NULL)
472f34a7178SJoe Bonasera (void) (*func)(arg1, arg2, arg3);
473f34a7178SJoe Bonasera return;
474a563a037Sbholler }
475a563a037Sbholler
476f34a7178SJoe Bonasera save_spl = splr(ipltospl(XC_HI_PIL));
477f34a7178SJoe Bonasera
4787c478bd9Sstevel@tonic-gate /*
479f34a7178SJoe Bonasera * fill in cross call data
4807c478bd9Sstevel@tonic-gate */
481f34a7178SJoe Bonasera data = &CPU->cpu_m.xc_data;
482f34a7178SJoe Bonasera data->xc_func = func;
483f34a7178SJoe Bonasera data->xc_a1 = arg1;
484f34a7178SJoe Bonasera data->xc_a2 = arg2;
485f34a7178SJoe Bonasera data->xc_a3 = arg3;
486f34a7178SJoe Bonasera
487f34a7178SJoe Bonasera /*
488f34a7178SJoe Bonasera * Post messages to all CPUs involved that are CPU_READY
489f34a7178SJoe Bonasera */
490f34a7178SJoe Bonasera CPU->cpu_m.xc_wait_cnt = 0;
491a3114836SGerry Liu for (c = 0; c < max_ncpus; ++c) {
492f34a7178SJoe Bonasera if (!BT_TEST(set, c))
493f34a7178SJoe Bonasera continue;
494f34a7178SJoe Bonasera cpup = cpu[c];
495f34a7178SJoe Bonasera if (cpup == NULL || !(cpup->cpu_flags & CPU_READY))
496a563a037Sbholler continue;
497a563a037Sbholler
4987c478bd9Sstevel@tonic-gate /*
499f34a7178SJoe Bonasera * Fill out a new message.
5007c478bd9Sstevel@tonic-gate */
501f34a7178SJoe Bonasera msg = xc_extract(&CPU->cpu_m.xc_free);
502f34a7178SJoe Bonasera if (msg == NULL)
503f34a7178SJoe Bonasera panic("Ran out of free xc_msg_t's");
504f34a7178SJoe Bonasera msg->xc_command = command;
505bf73eaa5SJoe Bonasera if (msg->xc_master != CPU->cpu_id)
506bf73eaa5SJoe Bonasera panic("msg %p has wrong xc_master", (void *)msg);
507f34a7178SJoe Bonasera msg->xc_slave = c;
508f34a7178SJoe Bonasera
509f34a7178SJoe Bonasera /*
510f34a7178SJoe Bonasera * Increment my work count for all messages that I'll
511f34a7178SJoe Bonasera * transition from DONE to FREE.
512f34a7178SJoe Bonasera * Also remember how many XC_MSG_WAITINGs to look for
513f34a7178SJoe Bonasera */
514f34a7178SJoe Bonasera (void) xc_increment(&CPU->cpu_m);
515f34a7178SJoe Bonasera if (command == XC_MSG_SYNC)
516f34a7178SJoe Bonasera ++CPU->cpu_m.xc_wait_cnt;
517f34a7178SJoe Bonasera
518f34a7178SJoe Bonasera /*
519f34a7178SJoe Bonasera * Increment the target CPU work count then insert the message
520f34a7178SJoe Bonasera * in the target msgbox. If I post the first bit of work
521f34a7178SJoe Bonasera * for the target to do, send an IPI to the target CPU.
522f34a7178SJoe Bonasera */
523f34a7178SJoe Bonasera cnt = xc_increment(&cpup->cpu_m);
524f34a7178SJoe Bonasera xc_insert(&cpup->cpu_m.xc_msgbox, msg);
525f34a7178SJoe Bonasera if (cpup != CPU) {
526f34a7178SJoe Bonasera if (cnt == 0) {
5277c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
528f34a7178SJoe Bonasera send_dirint(c, XC_HI_PIL);
529f34a7178SJoe Bonasera if (xc_collect_enable)
530f34a7178SJoe Bonasera ++xc_total_cnt;
531f34a7178SJoe Bonasera } else if (xc_collect_enable) {
532f34a7178SJoe Bonasera ++xc_multi_cnt;
5337c478bd9Sstevel@tonic-gate }
5347c478bd9Sstevel@tonic-gate }
5357c478bd9Sstevel@tonic-gate }
5367c478bd9Sstevel@tonic-gate
5377c478bd9Sstevel@tonic-gate /*
538f34a7178SJoe Bonasera * Now drop into the message handler until all work is done
5397c478bd9Sstevel@tonic-gate */
540f34a7178SJoe Bonasera (void) xc_serv(NULL, NULL);
541f34a7178SJoe Bonasera splx(save_spl);
5427c478bd9Sstevel@tonic-gate }
5437c478bd9Sstevel@tonic-gate
5447c478bd9Sstevel@tonic-gate /*
545f34a7178SJoe Bonasera * Push out a priority cross call.
5467c478bd9Sstevel@tonic-gate */
547f34a7178SJoe Bonasera static void
xc_priority_common(xc_func_t func,xc_arg_t arg1,xc_arg_t arg2,xc_arg_t arg3,ulong_t * set)548f34a7178SJoe Bonasera xc_priority_common(
549f34a7178SJoe Bonasera xc_func_t func,
5507c478bd9Sstevel@tonic-gate xc_arg_t arg1,
5517c478bd9Sstevel@tonic-gate xc_arg_t arg2,
5527c478bd9Sstevel@tonic-gate xc_arg_t arg3,
553f34a7178SJoe Bonasera ulong_t *set)
554f34a7178SJoe Bonasera {
555f34a7178SJoe Bonasera int i;
556f34a7178SJoe Bonasera int c;
557f34a7178SJoe Bonasera struct cpu *cpup;
558f34a7178SJoe Bonasera
559f34a7178SJoe Bonasera /*
560c03aa626SJoe Bonasera * Wait briefly for any previous xc_priority to have finished.
561f34a7178SJoe Bonasera */
562a3114836SGerry Liu for (c = 0; c < max_ncpus; ++c) {
563c03aa626SJoe Bonasera cpup = cpu[c];
564c03aa626SJoe Bonasera if (cpup == NULL || !(cpup->cpu_flags & CPU_READY))
565c03aa626SJoe Bonasera continue;
566c03aa626SJoe Bonasera
567c03aa626SJoe Bonasera /*
568c03aa626SJoe Bonasera * The value of 40000 here is from old kernel code. It
569c03aa626SJoe Bonasera * really should be changed to some time based value, since
570c03aa626SJoe Bonasera * under a hypervisor, there's no guarantee a remote CPU
571c03aa626SJoe Bonasera * is even scheduled.
572c03aa626SJoe Bonasera */
573c03aa626SJoe Bonasera for (i = 0; BT_TEST(xc_priority_set, c) && i < 40000; ++i)
574f34a7178SJoe Bonasera SMT_PAUSE();
575c03aa626SJoe Bonasera
576c03aa626SJoe Bonasera /*
577c03aa626SJoe Bonasera * Some CPU did not respond to a previous priority request. It's
578c03aa626SJoe Bonasera * probably deadlocked with interrupts blocked or some such
579c03aa626SJoe Bonasera * problem. We'll just erase the previous request - which was
580c03aa626SJoe Bonasera * most likely a kmdb_enter that has already expired - and plow
581c03aa626SJoe Bonasera * ahead.
582c03aa626SJoe Bonasera */
583c03aa626SJoe Bonasera if (BT_TEST(xc_priority_set, c)) {
584c03aa626SJoe Bonasera XC_BT_CLEAR(xc_priority_set, c);
585c03aa626SJoe Bonasera if (cpup->cpu_m.xc_work_cnt > 0)
586c03aa626SJoe Bonasera xc_decrement(&cpup->cpu_m);
587c03aa626SJoe Bonasera }
588f34a7178SJoe Bonasera }
589f34a7178SJoe Bonasera
590f34a7178SJoe Bonasera /*
591f34a7178SJoe Bonasera * fill in cross call data
592f34a7178SJoe Bonasera */
593f34a7178SJoe Bonasera xc_priority_data.xc_func = func;
594f34a7178SJoe Bonasera xc_priority_data.xc_a1 = arg1;
595f34a7178SJoe Bonasera xc_priority_data.xc_a2 = arg2;
596f34a7178SJoe Bonasera xc_priority_data.xc_a3 = arg3;
597f34a7178SJoe Bonasera
598f34a7178SJoe Bonasera /*
599f34a7178SJoe Bonasera * Post messages to all CPUs involved that are CPU_READY
600f34a7178SJoe Bonasera * We'll always IPI, plus bang on the xc_msgbox for i86_mwait()
601f34a7178SJoe Bonasera */
602a3114836SGerry Liu for (c = 0; c < max_ncpus; ++c) {
603f34a7178SJoe Bonasera if (!BT_TEST(set, c))
604f34a7178SJoe Bonasera continue;
605f34a7178SJoe Bonasera cpup = cpu[c];
606f34a7178SJoe Bonasera if (cpup == NULL || !(cpup->cpu_flags & CPU_READY) ||
607f34a7178SJoe Bonasera cpup == CPU)
608f34a7178SJoe Bonasera continue;
609f34a7178SJoe Bonasera (void) xc_increment(&cpup->cpu_m);
610c03aa626SJoe Bonasera XC_BT_SET(xc_priority_set, c);
611f34a7178SJoe Bonasera send_dirint(c, XC_HI_PIL);
612f34a7178SJoe Bonasera for (i = 0; i < 10; ++i) {
61375d94465SJosef 'Jeff' Sipek (void) atomic_cas_ptr(&cpup->cpu_m.xc_msgbox,
614f34a7178SJoe Bonasera cpup->cpu_m.xc_msgbox, cpup->cpu_m.xc_msgbox);
615f34a7178SJoe Bonasera }
616f34a7178SJoe Bonasera }
617f34a7178SJoe Bonasera }
618f34a7178SJoe Bonasera
619f34a7178SJoe Bonasera /*
620f34a7178SJoe Bonasera * Do cross call to all other CPUs with absolutely no waiting or handshaking.
621f34a7178SJoe Bonasera * This should only be used for extraordinary operations, like panic(), which
622f34a7178SJoe Bonasera * need to work, in some fashion, in a not completely functional system.
623f34a7178SJoe Bonasera * All other uses that want minimal waiting should use xc_call_nowait().
624f34a7178SJoe Bonasera */
625f34a7178SJoe Bonasera void
xc_priority(xc_arg_t arg1,xc_arg_t arg2,xc_arg_t arg3,ulong_t * set,xc_func_t func)626f34a7178SJoe Bonasera xc_priority(
627f34a7178SJoe Bonasera xc_arg_t arg1,
628f34a7178SJoe Bonasera xc_arg_t arg2,
629f34a7178SJoe Bonasera xc_arg_t arg3,
630f34a7178SJoe Bonasera ulong_t *set,
6317c478bd9Sstevel@tonic-gate xc_func_t func)
6327c478bd9Sstevel@tonic-gate {
6337c478bd9Sstevel@tonic-gate extern int IGNORE_KERNEL_PREEMPTION;
634f34a7178SJoe Bonasera int save_spl = splr(ipltospl(XC_HI_PIL));
635f34a7178SJoe Bonasera int save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
6367c478bd9Sstevel@tonic-gate
6377c478bd9Sstevel@tonic-gate IGNORE_KERNEL_PREEMPTION = 1;
638f34a7178SJoe Bonasera xc_priority_common((xc_func_t)func, arg1, arg2, arg3, set);
6397c478bd9Sstevel@tonic-gate IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
640f34a7178SJoe Bonasera splx(save_spl);
6417c478bd9Sstevel@tonic-gate }
6427c478bd9Sstevel@tonic-gate
6437c478bd9Sstevel@tonic-gate /*
644f34a7178SJoe Bonasera * Wrapper for kmdb to capture other CPUs, causing them to enter the debugger.
6457c478bd9Sstevel@tonic-gate */
6467c478bd9Sstevel@tonic-gate void
kdi_xc_others(int this_cpu,void (* func)(void))6477c478bd9Sstevel@tonic-gate kdi_xc_others(int this_cpu, void (*func)(void))
6487c478bd9Sstevel@tonic-gate {
6497c478bd9Sstevel@tonic-gate extern int IGNORE_KERNEL_PREEMPTION;
6507c478bd9Sstevel@tonic-gate int save_kernel_preemption;
6517c478bd9Sstevel@tonic-gate cpuset_t set;
6527c478bd9Sstevel@tonic-gate
653ae115bc7Smrj if (!xc_initialized)
654ae115bc7Smrj return;
655ae115bc7Smrj
6567c478bd9Sstevel@tonic-gate save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
6577c478bd9Sstevel@tonic-gate IGNORE_KERNEL_PREEMPTION = 1;
658f34a7178SJoe Bonasera CPUSET_ALL_BUT(set, this_cpu);
659f34a7178SJoe Bonasera xc_priority_common((xc_func_t)func, 0, 0, 0, CPUSET2BV(set));
6607c478bd9Sstevel@tonic-gate IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
6617c478bd9Sstevel@tonic-gate }
662f34a7178SJoe Bonasera
663f34a7178SJoe Bonasera
664f34a7178SJoe Bonasera
665f34a7178SJoe Bonasera /*
666f34a7178SJoe Bonasera * Invoke function on specified processors. Remotes may continue after
667f34a7178SJoe Bonasera * service with no waiting. xc_call_nowait() may return immediately too.
668f34a7178SJoe Bonasera */
669f34a7178SJoe Bonasera void
xc_call_nowait(xc_arg_t arg1,xc_arg_t arg2,xc_arg_t arg3,ulong_t * set,xc_func_t func)670f34a7178SJoe Bonasera xc_call_nowait(
671f34a7178SJoe Bonasera xc_arg_t arg1,
672f34a7178SJoe Bonasera xc_arg_t arg2,
673f34a7178SJoe Bonasera xc_arg_t arg3,
674f34a7178SJoe Bonasera ulong_t *set,
675f34a7178SJoe Bonasera xc_func_t func)
676f34a7178SJoe Bonasera {
677f34a7178SJoe Bonasera xc_common(func, arg1, arg2, arg3, set, XC_MSG_ASYNC);
678f34a7178SJoe Bonasera }
679f34a7178SJoe Bonasera
680f34a7178SJoe Bonasera /*
681f34a7178SJoe Bonasera * Invoke function on specified processors. Remotes may continue after
682f34a7178SJoe Bonasera * service with no waiting. xc_call() returns only after remotes have finished.
683f34a7178SJoe Bonasera */
684f34a7178SJoe Bonasera void
xc_call(xc_arg_t arg1,xc_arg_t arg2,xc_arg_t arg3,ulong_t * set,xc_func_t func)685f34a7178SJoe Bonasera xc_call(
686f34a7178SJoe Bonasera xc_arg_t arg1,
687f34a7178SJoe Bonasera xc_arg_t arg2,
688f34a7178SJoe Bonasera xc_arg_t arg3,
689f34a7178SJoe Bonasera ulong_t *set,
690f34a7178SJoe Bonasera xc_func_t func)
691f34a7178SJoe Bonasera {
692f34a7178SJoe Bonasera xc_common(func, arg1, arg2, arg3, set, XC_MSG_CALL);
693f34a7178SJoe Bonasera }
694f34a7178SJoe Bonasera
695f34a7178SJoe Bonasera /*
696f34a7178SJoe Bonasera * Invoke function on specified processors. Remotes wait until all have
697f34a7178SJoe Bonasera * finished. xc_sync() also waits until all remotes have finished.
698f34a7178SJoe Bonasera */
699f34a7178SJoe Bonasera void
xc_sync(xc_arg_t arg1,xc_arg_t arg2,xc_arg_t arg3,ulong_t * set,xc_func_t func)700f34a7178SJoe Bonasera xc_sync(
701f34a7178SJoe Bonasera xc_arg_t arg1,
702f34a7178SJoe Bonasera xc_arg_t arg2,
703f34a7178SJoe Bonasera xc_arg_t arg3,
704f34a7178SJoe Bonasera ulong_t *set,
705f34a7178SJoe Bonasera xc_func_t func)
706f34a7178SJoe Bonasera {
707f34a7178SJoe Bonasera xc_common(func, arg1, arg2, arg3, set, XC_MSG_SYNC);
708f34a7178SJoe Bonasera }
709