1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * evtchn.c
29 *
30 * Communication via hypervisor event channels.
31 *
32 * Copyright (c) 2002-2005, K A Fraser
33 *
34 * This file may be distributed separately from the Linux kernel, or
35 * incorporated into other software packages, subject to the following license:
36 *
37 * Permission is hereby granted, free of charge, to any person obtaining a copy
38 * of this source file (the "Software"), to deal in the Software without
39 * restriction, including without limitation the rights to use, copy, modify,
40 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
41 * and to permit persons to whom the Software is furnished to do so, subject to
42 * the following conditions:
43 *
44 * The above copyright notice and this permission notice shall be included in
45 * all copies or substantial portions of the Software.
46 *
47 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
48 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
49 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
50 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
51 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
52 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
53 * IN THE SOFTWARE.
54 */
55
56 /* some parts derived from netbsd's hypervisor_machdep.c 1.2.2.2 */
57
58 /*
59 *
60 * Copyright (c) 2004 Christian Limpach.
61 * All rights reserved.
62 *
63 * Redistribution and use in source and binary forms, with or without
64 * modification, are permitted provided that the following conditions
65 * are met:
66 * 1. Redistributions of source code must retain the above copyright
67 * notice, this list of conditions and the following disclaimer.
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in the
70 * documentation and/or other materials provided with the distribution.
71 * 3. This section intentionally left blank.
72 * 4. The name of the author may not be used to endorse or promote products
73 * derived from this software without specific prior written permission.
74 *
75 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
76 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
77 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
78 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
79 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
80 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
81 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
82 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
83 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
84 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
85 */
86 /*
87 * Section 3 of the above license was updated in response to bug 6379571.
88 */
89
90 #include <sys/types.h>
91 #include <sys/hypervisor.h>
92 #include <sys/machsystm.h>
93 #include <sys/mutex.h>
94 #include <sys/evtchn_impl.h>
95 #include <sys/ddi_impldefs.h>
96 #include <sys/avintr.h>
97 #include <sys/cpuvar.h>
98 #include <sys/smp_impldefs.h>
99 #include <sys/archsystm.h>
100 #include <sys/sysmacros.h>
101 #include <sys/cmn_err.h>
102 #include <sys/promif.h>
103 #include <sys/debug.h>
104 #include <sys/psm.h>
105 #include <sys/privregs.h>
106 #include <sys/trap.h>
107 #include <sys/atomic.h>
108 #include <sys/cpu.h>
109 #include <sys/psw.h>
110 #include <sys/traptrace.h>
111 #include <sys/stack.h>
112 #include <sys/x_call.h>
113 #include <xen/public/physdev.h>
114
115 /*
116 * This file manages our association between hypervisor event channels and
117 * Solaris's IRQs. This is a one-to-one mapping, with the exception of
118 * IPI IRQs, for which there is one event channel per CPU participating
119 * in the IPI, and the clock VIRQ which also has an event channel per cpu
120 * and the IRQ for /dev/xen/evtchn. The IRQ types are:
121 *
122 * IRQT_VIRQ:
123 * The hypervisor's standard virtual IRQ, used for the clock timer, for
124 * example. This code allows any cpu to bind to one of these, although
125 * some are treated specially (i.e. VIRQ_DEBUG).
126 * Event channel binding is done via EVTCHNOP_bind_virq.
127 *
128 * IRQT_PIRQ:
129 * These associate a physical IRQ with an event channel via
130 * EVTCHNOP_bind_pirq.
131 *
132 * IRQT_IPI:
133 * A cross-call IRQ. Maps to "ncpus" event channels, each of which is
134 * bound to exactly one of the vcpus. We do not currently support
135 * unbinding of IPIs (since Solaris doesn't need it). Uses
136 * EVTCHNOP_bind_ipi.
137 *
138 * IRQT_EVTCHN:
139 * A "normal" binding to an event channel, typically used by the frontend
140 * drivers to bind to the their backend event channel.
141 *
142 * IRQT_DEV_EVTCHN:
143 * This is a one-time IRQ used by /dev/xen/evtchn. Unlike other IRQs, we
144 * have a one-IRQ to many-evtchn mapping. We only track evtchn->irq for
145 * these event channels, which are managed via ec_irq_add/rm_evtchn().
146 * We enforce that IRQT_DEV_EVTCHN's representative evtchn (->ii_evtchn)
147 * is zero, and make any calls to irq_evtchn() an error, to prevent
148 * accidentally attempting to use the illegal evtchn 0.
149 *
150 * Suspend/resume
151 *
152 * During a suspend/resume cycle, we need to tear down the event channels.
153 * All other mapping data is kept. The drivers will remove their own event
154 * channels via xendev on receiving a DDI_SUSPEND. This leaves us with
155 * the IPIs and VIRQs, which we handle in ec_suspend() and ec_resume()
156 * below.
157 *
158 * CPU binding
159 *
160 * When an event channel is bound to a CPU, we set a bit in a mask present
161 * in the machcpu (evt_affinity) to indicate that this CPU can accept this
162 * event channel. For both IPIs and VIRQs, this binding is fixed at
163 * allocation time and we never modify it. All other event channels are
164 * bound via the PSM either as part of add_avintr(), or interrupt
165 * redistribution (xen_psm_dis/enable_intr()) as a result of CPU
166 * offline/online.
167 *
168 * Locking
169 *
170 * Updates are done holding the ec_lock. The xen_callback_handler()
171 * routine reads the mapping data in a lockless fashion. Additionally
172 * suspend takes ec_lock to prevent update races during a suspend/resume
173 * cycle. The IPI info is also examined without the lock; this is OK
174 * since we only ever change IPI info during initial setup and resume.
175 */
176
177 #define IRQ_IS_CPUPOKE(irq) (ipi_info[XC_CPUPOKE_PIL].mi_irq == (irq))
178
179 #define EVTCHN_MASKED(ev) \
180 (HYPERVISOR_shared_info->evtchn_mask[(ev) >> EVTCHN_SHIFT] & \
181 (1ul << ((ev) & ((1ul << EVTCHN_SHIFT) - 1))))
182
183 static short evtchn_to_irq[NR_EVENT_CHANNELS];
184 static cpuset_t evtchn_cpus[NR_EVENT_CHANNELS];
185 static int evtchn_owner[NR_EVENT_CHANNELS];
186 #ifdef DEBUG
187 static kthread_t *evtchn_owner_thread[NR_EVENT_CHANNELS];
188 #endif
189
190 static irq_info_t irq_info[NR_IRQS];
191 static mec_info_t ipi_info[MAXIPL];
192 static mec_info_t virq_info[NR_VIRQS];
193
194 /*
195 * See the locking description above.
196 */
197 kmutex_t ec_lock;
198
199 /*
200 * Bitmap indicating which PIRQs require the hypervisor to be notified
201 * on unmask.
202 */
203 static unsigned long pirq_needs_eoi[NR_PIRQS / (sizeof (unsigned long) * NBBY)];
204
205 static int ec_debug_irq = INVALID_IRQ;
206 int ec_dev_irq = INVALID_IRQ;
207
208 int
xen_bind_virq(unsigned int virq,processorid_t cpu,int * port)209 xen_bind_virq(unsigned int virq, processorid_t cpu, int *port)
210 {
211 evtchn_bind_virq_t bind;
212 int err;
213
214 bind.virq = virq;
215 bind.vcpu = cpu;
216 if ((err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind)) == 0)
217 *port = bind.port;
218 else
219 err = xen_xlate_errcode(err);
220 return (err);
221 }
222
223 int
xen_bind_interdomain(int domid,int remote_port,int * port)224 xen_bind_interdomain(int domid, int remote_port, int *port)
225 {
226 evtchn_bind_interdomain_t bind;
227 int err;
228
229 bind.remote_dom = domid;
230 bind.remote_port = remote_port;
231 if ((err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
232 &bind)) == 0)
233 *port = bind.local_port;
234 else
235 err = xen_xlate_errcode(err);
236 return (err);
237 }
238
239 int
xen_alloc_unbound_evtchn(int domid,int * evtchnp)240 xen_alloc_unbound_evtchn(int domid, int *evtchnp)
241 {
242 evtchn_alloc_unbound_t alloc;
243 int err;
244
245 alloc.dom = DOMID_SELF;
246 alloc.remote_dom = domid;
247
248 if ((err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
249 &alloc)) == 0) {
250 *evtchnp = alloc.port;
251 /* ensure evtchn is masked till we're ready to use it */
252 (void) ec_mask_evtchn(*evtchnp);
253 } else {
254 err = xen_xlate_errcode(err);
255 }
256
257 return (err);
258 }
259
260 static int
xen_close_evtchn(int evtchn)261 xen_close_evtchn(int evtchn)
262 {
263 evtchn_close_t close;
264 int err;
265
266 close.port = evtchn;
267 err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
268 if (err)
269 err = xen_xlate_errcode(err);
270 return (err);
271 }
272
273 static int
xen_bind_ipi(processorid_t cpu)274 xen_bind_ipi(processorid_t cpu)
275 {
276 evtchn_bind_ipi_t bind;
277
278 ASSERT(MUTEX_HELD(&ec_lock));
279
280 bind.vcpu = cpu;
281 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind) != 0)
282 panic("xen_bind_ipi() failed");
283 return (bind.port);
284 }
285
286 /* Send future instances of this interrupt to other vcpu. */
287 static void
xen_bind_vcpu(int evtchn,int cpu)288 xen_bind_vcpu(int evtchn, int cpu)
289 {
290 evtchn_bind_vcpu_t bind;
291
292 ASSERT(MUTEX_HELD(&ec_lock));
293
294 bind.port = evtchn;
295 bind.vcpu = cpu;
296 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind) != 0)
297 panic("xen_bind_vcpu() failed");
298 }
299
300 static int
xen_bind_pirq(int pirq)301 xen_bind_pirq(int pirq)
302 {
303 evtchn_bind_pirq_t bind;
304 int ret;
305
306 bind.pirq = pirq;
307 bind.flags = BIND_PIRQ__WILL_SHARE;
308 if ((ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind)) != 0)
309 panic("xen_bind_pirq() failed (err %d)", ret);
310 return (bind.port);
311 }
312
313 /* unmask an evtchn and send upcall to appropriate vcpu if pending bit is set */
314 static void
xen_evtchn_unmask(int evtchn)315 xen_evtchn_unmask(int evtchn)
316 {
317 evtchn_unmask_t unmask;
318
319 unmask.port = evtchn;
320 if (HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask) != 0)
321 panic("xen_evtchn_unmask() failed");
322 }
323
324 static void
update_evtchn_affinity(int evtchn)325 update_evtchn_affinity(int evtchn)
326 {
327 cpu_t *cp;
328 struct xen_evt_data *cpe;
329
330 ASSERT(evtchn_to_irq[evtchn] != INVALID_IRQ);
331 ASSERT(MUTEX_HELD(&ec_lock));
332
333 /*
334 * Use lockless search of cpu_list, similar to mutex_vector_enter().
335 */
336 kpreempt_disable();
337 cp = cpu_list;
338 do {
339 cpe = cp->cpu_m.mcpu_evt_pend;
340 if (CPU_IN_SET(evtchn_cpus[evtchn], cp->cpu_id))
341 SET_EVTCHN_BIT(evtchn, cpe->evt_affinity);
342 else
343 CLEAR_EVTCHN_BIT(evtchn, cpe->evt_affinity);
344 } while ((cp = cp->cpu_next) != cpu_list);
345 kpreempt_enable();
346 }
347
348 static void
bind_evtchn_to_cpuset(int evtchn,cpuset_t cpus)349 bind_evtchn_to_cpuset(int evtchn, cpuset_t cpus)
350 {
351 ASSERT(evtchn_to_irq[evtchn] != INVALID_IRQ);
352
353 CPUSET_ZERO(evtchn_cpus[evtchn]);
354 CPUSET_OR(evtchn_cpus[evtchn], cpus);
355 update_evtchn_affinity(evtchn);
356 }
357
358 static void
clear_evtchn_affinity(int evtchn)359 clear_evtchn_affinity(int evtchn)
360 {
361 CPUSET_ZERO(evtchn_cpus[evtchn]);
362 update_evtchn_affinity(evtchn);
363 }
364
365 static void
alloc_irq_evtchn(int irq,int index,int evtchn,int cpu)366 alloc_irq_evtchn(int irq, int index, int evtchn, int cpu)
367 {
368 irq_info_t *irqp = &irq_info[irq];
369
370 switch (irqp->ii_type) {
371 case IRQT_IPI:
372 ipi_info[index].mi_evtchns[cpu] = evtchn;
373 irqp->ii_u.index = index;
374 break;
375 case IRQT_VIRQ:
376 virq_info[index].mi_evtchns[cpu] = evtchn;
377 irqp->ii_u.index = index;
378 break;
379 default:
380 irqp->ii_u.evtchn = evtchn;
381 break;
382 }
383
384 evtchn_to_irq[evtchn] = irq;
385
386 /*
387 * If a CPU is not specified, we expect to bind it to a CPU later via
388 * the PSM.
389 */
390 if (cpu != -1) {
391 cpuset_t tcpus;
392 CPUSET_ONLY(tcpus, cpu);
393 bind_evtchn_to_cpuset(evtchn, tcpus);
394 }
395 }
396
397 static int
alloc_irq(int type,int index,int evtchn,int cpu)398 alloc_irq(int type, int index, int evtchn, int cpu)
399 {
400 int irq;
401 irq_info_t *irqp;
402
403 ASSERT(MUTEX_HELD(&ec_lock));
404 ASSERT(type != IRQT_IPI || cpu != -1);
405
406 for (irq = 0; irq < NR_IRQS; irq++) {
407 if (irq_info[irq].ii_type == IRQT_UNBOUND)
408 break;
409 }
410
411 if (irq == NR_IRQS)
412 panic("No available IRQ to bind to: increase NR_IRQS!\n");
413
414 irqp = &irq_info[irq];
415
416 irqp->ii_type = type;
417 /*
418 * Set irq/has_handler field to zero which means handler not installed
419 */
420 irqp->ii_u2.has_handler = 0;
421
422 alloc_irq_evtchn(irq, index, evtchn, cpu);
423 return (irq);
424 }
425
426 static int
irq_evtchn(irq_info_t * irqp)427 irq_evtchn(irq_info_t *irqp)
428 {
429 int evtchn;
430
431 ASSERT(irqp->ii_type != IRQT_DEV_EVTCHN);
432
433 switch (irqp->ii_type) {
434 case IRQT_IPI:
435 ASSERT(irqp->ii_u.index != 0);
436 evtchn = ipi_info[irqp->ii_u.index].mi_evtchns[CPU->cpu_id];
437 break;
438 case IRQT_VIRQ:
439 evtchn = virq_info[irqp->ii_u.index].mi_evtchns[CPU->cpu_id];
440 break;
441 default:
442 evtchn = irqp->ii_u.evtchn;
443 break;
444 }
445
446 return (evtchn);
447 }
448
449 int
ec_is_edge_pirq(int irq)450 ec_is_edge_pirq(int irq)
451 {
452 return (irq_info[irq].ii_type == IRQT_PIRQ &&
453 !TEST_EVTCHN_BIT(irq, &pirq_needs_eoi[0]));
454 }
455
456 static void
unbind_evtchn(ushort_t * evtchnp)457 unbind_evtchn(ushort_t *evtchnp)
458 {
459 int err;
460
461 ASSERT(MUTEX_HELD(&ec_lock));
462
463 ASSERT(*evtchnp != 0);
464
465 err = xen_close_evtchn(*evtchnp);
466 ASSERT(err == 0);
467 clear_evtchn_affinity(*evtchnp);
468 evtchn_to_irq[*evtchnp] = INVALID_IRQ;
469 *evtchnp = 0;
470 }
471
472 static void
pirq_unmask_notify(int pirq)473 pirq_unmask_notify(int pirq)
474 {
475 struct physdev_eoi eoi;
476
477 if (TEST_EVTCHN_BIT(pirq, &pirq_needs_eoi[0])) {
478 eoi.irq = pirq;
479 (void) HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
480 }
481 }
482
483 static void
pirq_query_unmask(int pirq)484 pirq_query_unmask(int pirq)
485 {
486 struct physdev_irq_status_query irq_status;
487
488 irq_status.irq = pirq;
489 (void) HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status);
490 CLEAR_EVTCHN_BIT(pirq, &pirq_needs_eoi[0]);
491 if (irq_status.flags & XENIRQSTAT_needs_eoi)
492 SET_EVTCHN_BIT(pirq, &pirq_needs_eoi[0]);
493 }
494
495 static void
end_pirq(int irq)496 end_pirq(int irq)
497 {
498 int evtchn = irq_evtchn(&irq_info[irq]);
499
500 /*
501 * If it is an edge-triggered interrupt we have already unmasked
502 */
503 if (TEST_EVTCHN_BIT(irq, &pirq_needs_eoi[0])) {
504 ec_unmask_evtchn(evtchn);
505 pirq_unmask_notify(IRQ_TO_PIRQ(irq));
506 }
507 }
508
509 /*
510 * Bind an event channel to a vcpu
511 */
512 void
ec_bind_vcpu(int evtchn,int cpu)513 ec_bind_vcpu(int evtchn, int cpu)
514 {
515 mutex_enter(&ec_lock);
516 xen_bind_vcpu(evtchn, cpu);
517 mutex_exit(&ec_lock);
518 }
519
520 /*
521 * Set up a physical device irq to be associated with an event channel.
522 */
523 void
ec_setup_pirq(int irq,int ipl,cpuset_t * cpusp)524 ec_setup_pirq(int irq, int ipl, cpuset_t *cpusp)
525 {
526 int evtchn;
527 irq_info_t *irqp = &irq_info[irq];
528
529 /*
530 * Test if this PIRQ is already bound to an evtchn,
531 * which means it is a shared IRQ and we don't want to
532 * bind and do some initial setup that has already been
533 * done for this irq on a previous trip through this code.
534 */
535 if (irqp->ii_u.evtchn == INVALID_EVTCHN) {
536 evtchn = xen_bind_pirq(irq);
537
538 pirq_query_unmask(IRQ_TO_PIRQ(irq));
539
540 irqp->ii_type = IRQT_PIRQ;
541 irqp->ii_u.evtchn = evtchn;
542
543 evtchn_to_irq[evtchn] = irq;
544 irqp->ii_u2.ipl = ipl;
545 ec_set_irq_affinity(irq, *cpusp);
546 ec_enable_irq(irq);
547 pirq_unmask_notify(IRQ_TO_PIRQ(irq));
548 } else {
549 ASSERT(irqp->ii_u2.ipl != 0);
550 cmn_err(CE_NOTE, "!IRQ%d is shared", irq);
551 if (ipl > irqp->ii_u2.ipl)
552 irqp->ii_u2.ipl = ipl;
553 *cpusp = evtchn_cpus[irqp->ii_u.evtchn];
554 }
555 }
556
557 void
ec_unbind_irq(int irq)558 ec_unbind_irq(int irq)
559 {
560 irq_info_t *irqp = &irq_info[irq];
561 mec_info_t *virqp;
562 int drop_lock = 0;
563 int type, i;
564
565 /*
566 * Nasty, but we need this during suspend.
567 */
568 if (mutex_owner(&ec_lock) != curthread) {
569 mutex_enter(&ec_lock);
570 drop_lock = 1;
571 }
572
573 type = irqp->ii_type;
574
575 ASSERT((type == IRQT_EVTCHN) || (type == IRQT_PIRQ) ||
576 (type == IRQT_VIRQ));
577
578 if ((type == IRQT_EVTCHN) || (type == IRQT_PIRQ)) {
579 /* There's only one event channel associated with this irq */
580 unbind_evtchn(&irqp->ii_u.evtchn);
581 } else if (type == IRQT_VIRQ) {
582 /*
583 * Each cpu on the system can have it's own event channel
584 * associated with a virq. Unbind them all.
585 */
586 virqp = &virq_info[irqp->ii_u.index];
587 for (i = 0; i < NCPU; i++) {
588 if (virqp->mi_evtchns[i] != 0)
589 unbind_evtchn(&virqp->mi_evtchns[i]);
590 }
591 /* Mark the virq structure as invalid. */
592 virqp->mi_irq = INVALID_IRQ;
593 }
594
595 bzero(irqp, sizeof (*irqp));
596 /* Re-reserve PIRQ. */
597 if (type == IRQT_PIRQ)
598 irqp->ii_type = IRQT_PIRQ;
599
600 if (drop_lock)
601 mutex_exit(&ec_lock);
602 }
603
604 /*
605 * Rebind an event channel for delivery to a CPU.
606 */
607 void
ec_set_irq_affinity(int irq,cpuset_t dest)608 ec_set_irq_affinity(int irq, cpuset_t dest)
609 {
610 int evtchn, tcpu;
611 irq_info_t *irqp = &irq_info[irq];
612
613 mutex_enter(&ec_lock);
614
615 ASSERT(irq < NR_IRQS);
616 ASSERT(irqp->ii_type != IRQT_UNBOUND);
617
618 /*
619 * Binding is done at allocation time for these types, so we should
620 * never modify them.
621 */
622 if (irqp->ii_type == IRQT_IPI || irqp->ii_type == IRQT_VIRQ ||
623 irqp->ii_type == IRQT_DEV_EVTCHN) {
624 mutex_exit(&ec_lock);
625 return;
626 }
627
628 CPUSET_FIND(dest, tcpu);
629 ASSERT(tcpu != CPUSET_NOTINSET);
630
631 evtchn = irq_evtchn(irqp);
632
633 xen_bind_vcpu(evtchn, tcpu);
634
635 bind_evtchn_to_cpuset(evtchn, dest);
636
637 mutex_exit(&ec_lock);
638
639 /*
640 * Now send the new target processor a NOP IPI.
641 * It will check for any pending interrupts, and so service any that
642 * got delivered to the wrong processor by mistake.
643 */
644 if (ncpus > 1)
645 poke_cpu(tcpu);
646 }
647
648 int
ec_set_irq_priority(int irq,int pri)649 ec_set_irq_priority(int irq, int pri)
650 {
651 irq_info_t *irqp;
652
653 if (irq >= NR_IRQS)
654 return (-1);
655
656 irqp = &irq_info[irq];
657
658 if (irqp->ii_type == IRQT_UNBOUND)
659 return (-1);
660
661 irqp->ii_u2.ipl = pri;
662
663 return (0);
664 }
665
666 void
ec_clear_irq_priority(int irq)667 ec_clear_irq_priority(int irq)
668 {
669 irq_info_t *irqp = &irq_info[irq];
670
671 ASSERT(irq < NR_IRQS);
672 ASSERT(irqp->ii_type != IRQT_UNBOUND);
673
674 irqp->ii_u2.ipl = 0;
675 }
676
677 int
ec_bind_evtchn_to_irq(int evtchn)678 ec_bind_evtchn_to_irq(int evtchn)
679 {
680 mutex_enter(&ec_lock);
681
682 ASSERT(evtchn_to_irq[evtchn] == INVALID_IRQ);
683
684 (void) alloc_irq(IRQT_EVTCHN, 0, evtchn, -1);
685
686 mutex_exit(&ec_lock);
687 return (evtchn_to_irq[evtchn]);
688 }
689
690 int
ec_bind_virq_to_irq(int virq,int cpu)691 ec_bind_virq_to_irq(int virq, int cpu)
692 {
693 int err;
694 int evtchn;
695 mec_info_t *virqp;
696
697 virqp = &virq_info[virq];
698 mutex_enter(&ec_lock);
699
700 err = xen_bind_virq(virq, cpu, &evtchn);
701 ASSERT(err == 0);
702
703 ASSERT(evtchn_to_irq[evtchn] == INVALID_IRQ);
704
705 if (virqp->mi_irq == INVALID_IRQ) {
706 virqp->mi_irq = alloc_irq(IRQT_VIRQ, virq, evtchn, cpu);
707 } else {
708 alloc_irq_evtchn(virqp->mi_irq, virq, evtchn, cpu);
709 }
710
711 mutex_exit(&ec_lock);
712
713 return (virqp->mi_irq);
714 }
715
716 int
ec_bind_ipi_to_irq(int ipl,int cpu)717 ec_bind_ipi_to_irq(int ipl, int cpu)
718 {
719 int evtchn;
720 ulong_t flags;
721 mec_info_t *ipip;
722
723 mutex_enter(&ec_lock);
724
725 ipip = &ipi_info[ipl];
726
727 evtchn = xen_bind_ipi(cpu);
728
729 ASSERT(evtchn_to_irq[evtchn] == INVALID_IRQ);
730
731 if (ipip->mi_irq == INVALID_IRQ) {
732 ipip->mi_irq = alloc_irq(IRQT_IPI, ipl, evtchn, cpu);
733 } else {
734 alloc_irq_evtchn(ipip->mi_irq, ipl, evtchn, cpu);
735 }
736
737 /*
738 * Unmask the new evtchn so that it can be seen by the target cpu
739 */
740 flags = intr_clear();
741 ec_unmask_evtchn(evtchn);
742 intr_restore(flags);
743
744 mutex_exit(&ec_lock);
745 return (ipip->mi_irq);
746 }
747
748 /*
749 * When bringing up a CPU, bind to all the IPIs that CPU0 bound.
750 */
751 void
ec_bind_cpu_ipis(int cpu)752 ec_bind_cpu_ipis(int cpu)
753 {
754 int i;
755
756 for (i = 0; i < MAXIPL; i++) {
757 mec_info_t *ipip = &ipi_info[i];
758 if (ipip->mi_irq == INVALID_IRQ)
759 continue;
760
761 (void) ec_bind_ipi_to_irq(i, cpu);
762 }
763 }
764
765 /*
766 * Can this IRQ be rebound to another CPU?
767 */
768 int
ec_irq_rebindable(int irq)769 ec_irq_rebindable(int irq)
770 {
771 irq_info_t *irqp = &irq_info[irq];
772
773 if (irqp->ii_u.evtchn == 0)
774 return (0);
775
776 return (irqp->ii_type == IRQT_EVTCHN || irqp->ii_type == IRQT_PIRQ);
777 }
778
779 /*
780 * Should this IRQ be unbound from this CPU (which is being offlined) to
781 * another?
782 */
783 int
ec_irq_needs_rebind(int irq,int cpu)784 ec_irq_needs_rebind(int irq, int cpu)
785 {
786 irq_info_t *irqp = &irq_info[irq];
787
788 return (ec_irq_rebindable(irq) &&
789 CPU_IN_SET(evtchn_cpus[irqp->ii_u.evtchn], cpu));
790 }
791
792 void
ec_send_ipi(int ipl,int cpu)793 ec_send_ipi(int ipl, int cpu)
794 {
795 mec_info_t *ipip = &ipi_info[ipl];
796
797 ASSERT(ipip->mi_irq != INVALID_IRQ);
798
799 ec_notify_via_evtchn(ipip->mi_evtchns[cpu]);
800 }
801
802 void
ec_try_ipi(int ipl,int cpu)803 ec_try_ipi(int ipl, int cpu)
804 {
805 mec_info_t *ipip = &ipi_info[ipl];
806
807 if (ipip->mi_irq == INVALID_IRQ || ipip->mi_irq == 0)
808 return;
809
810 ec_notify_via_evtchn(ipip->mi_evtchns[cpu]);
811 }
812
813 void
ec_irq_add_evtchn(int irq,int evtchn)814 ec_irq_add_evtchn(int irq, int evtchn)
815 {
816 mutex_enter(&ec_lock);
817
818 /*
819 * See description of IRQT_DEV_EVTCHN above.
820 */
821 ASSERT(irq == ec_dev_irq);
822
823 alloc_irq_evtchn(irq, 0, evtchn, 0);
824 /*
825 * We enforce that the representative event channel for IRQT_DEV_EVTCHN
826 * is zero, so PSM operations on it have no effect.
827 */
828 irq_info[irq].ii_u.evtchn = 0;
829 mutex_exit(&ec_lock);
830 }
831
832 void
ec_irq_rm_evtchn(int irq,int evtchn)833 ec_irq_rm_evtchn(int irq, int evtchn)
834 {
835 ushort_t ec = evtchn;
836
837 mutex_enter(&ec_lock);
838 ASSERT(irq == ec_dev_irq);
839 unbind_evtchn(&ec);
840 mutex_exit(&ec_lock);
841 }
842
843 /*
844 * Allocate an /dev/xen/evtchn IRQ. See the big comment at the top
845 * for an explanation.
846 */
847 int
ec_dev_alloc_irq(void)848 ec_dev_alloc_irq(void)
849 {
850 int i;
851 irq_info_t *irqp;
852
853 for (i = 0; i < NR_IRQS; i++) {
854 if (irq_info[i].ii_type == IRQT_UNBOUND)
855 break;
856 }
857
858 ASSERT(i != NR_IRQS);
859
860 irqp = &irq_info[i];
861 irqp->ii_type = IRQT_DEV_EVTCHN;
862 irqp->ii_u2.ipl = IPL_EVTCHN;
863 /*
864 * Force the evtchn to zero for the special evtchn device irq
865 */
866 irqp->ii_u.evtchn = 0;
867 return (i);
868 }
869
870 void
ec_enable_irq(unsigned int irq)871 ec_enable_irq(unsigned int irq)
872 {
873 ulong_t flag;
874 irq_info_t *irqp = &irq_info[irq];
875
876 if (irqp->ii_type == IRQT_DEV_EVTCHN)
877 return;
878
879 flag = intr_clear();
880 ec_unmask_evtchn(irq_evtchn(irqp));
881 intr_restore(flag);
882 }
883
884 void
ec_disable_irq(unsigned int irq)885 ec_disable_irq(unsigned int irq)
886 {
887 irq_info_t *irqp = &irq_info[irq];
888
889 if (irqp->ii_type == IRQT_DEV_EVTCHN)
890 return;
891
892 /*
893 * Spin till we are the one to mask the evtchn
894 * Ensures no one else can be servicing this evtchn.
895 */
896 while (!ec_mask_evtchn(irq_evtchn(irqp)))
897 SMT_PAUSE();
898 }
899
900 static int
ec_evtchn_pending(uint_t ev)901 ec_evtchn_pending(uint_t ev)
902 {
903 uint_t evi;
904 shared_info_t *si = HYPERVISOR_shared_info;
905
906 evi = ev >> EVTCHN_SHIFT;
907 ev &= (1ul << EVTCHN_SHIFT) - 1;
908 return ((si->evtchn_pending[evi] & (1ul << ev)) != 0);
909 }
910
911 int
ec_pending_irq(unsigned int irq)912 ec_pending_irq(unsigned int irq)
913 {
914 int evtchn = irq_evtchn(&irq_info[irq]);
915
916 return (ec_evtchn_pending(evtchn));
917 }
918
919 void
ec_clear_irq(int irq)920 ec_clear_irq(int irq)
921 {
922 irq_info_t *irqp = &irq_info[irq];
923 int evtchn;
924
925 if (irqp->ii_type == IRQT_DEV_EVTCHN)
926 return;
927
928 ASSERT(irqp->ii_type != IRQT_UNBOUND);
929
930 evtchn = irq_evtchn(irqp);
931
932 ASSERT(EVTCHN_MASKED(evtchn));
933 ec_clear_evtchn(evtchn);
934 }
935
936 void
ec_unmask_irq(int irq)937 ec_unmask_irq(int irq)
938 {
939 ulong_t flags;
940 irq_info_t *irqp = &irq_info[irq];
941
942 flags = intr_clear();
943 switch (irqp->ii_type) {
944 case IRQT_PIRQ:
945 end_pirq(irq);
946 break;
947 case IRQT_DEV_EVTCHN:
948 break;
949 default:
950 ec_unmask_evtchn(irq_evtchn(irqp));
951 break;
952 }
953 intr_restore(flags);
954 }
955
956 void
ec_try_unmask_irq(int irq)957 ec_try_unmask_irq(int irq)
958 {
959 ulong_t flags;
960 irq_info_t *irqp = &irq_info[irq];
961 int evtchn;
962
963 flags = intr_clear();
964 switch (irqp->ii_type) {
965 case IRQT_PIRQ:
966 end_pirq(irq);
967 break;
968 case IRQT_DEV_EVTCHN:
969 break;
970 default:
971 if ((evtchn = irq_evtchn(irqp)) != 0)
972 ec_unmask_evtchn(evtchn);
973 break;
974 }
975 intr_restore(flags);
976 }
977
978 /*
979 * Poll until an event channel is ready or 'check_func' returns true. This can
980 * only be used in a situation where interrupts are masked, otherwise we have a
981 * classic time-of-check vs. time-of-use race.
982 */
983 void
ec_wait_on_evtchn(int evtchn,int (* check_func)(void *),void * arg)984 ec_wait_on_evtchn(int evtchn, int (*check_func)(void *), void *arg)
985 {
986 if (DOMAIN_IS_INITDOMAIN(xen_info)) {
987 while (!check_func(arg))
988 (void) HYPERVISOR_yield();
989 return;
990 }
991
992 ASSERT(CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask != 0);
993
994 for (;;) {
995 evtchn_port_t ports[1];
996
997 ports[0] = evtchn;
998
999 ec_clear_evtchn(evtchn);
1000
1001 if (check_func(arg))
1002 return;
1003
1004 (void) HYPERVISOR_poll(ports, 1, 0);
1005 }
1006 }
1007
1008 void
ec_wait_on_ipi(int ipl,int (* check_func)(void *),void * arg)1009 ec_wait_on_ipi(int ipl, int (*check_func)(void *), void *arg)
1010 {
1011 mec_info_t *ipip = &ipi_info[ipl];
1012
1013 if (ipip->mi_irq == INVALID_IRQ || ipip->mi_irq == 0)
1014 return;
1015
1016 ec_wait_on_evtchn(ipip->mi_evtchns[CPU->cpu_id], check_func, arg);
1017 }
1018
1019 void
ec_suspend(void)1020 ec_suspend(void)
1021 {
1022 irq_info_t *irqp;
1023 ushort_t *evtchnp;
1024 int i;
1025 int c;
1026
1027 ASSERT(MUTEX_HELD(&ec_lock));
1028
1029 for (i = 0; i < MAXIPL; i++) {
1030 if (ipi_info[i].mi_irq == INVALID_IRQ)
1031 continue;
1032
1033 for (c = 0; c < NCPU; c++) {
1034 if (cpu[c] == NULL)
1035 continue;
1036
1037 if (CPU_IN_SET(cpu_suspend_lost_set, c))
1038 continue;
1039
1040 evtchnp = &ipi_info[i].mi_evtchns[c];
1041 ASSERT(*evtchnp != 0);
1042 unbind_evtchn(evtchnp);
1043 }
1044 }
1045
1046 for (i = 0; i < NR_VIRQS; i++) {
1047 if (virq_info[i].mi_irq == INVALID_IRQ)
1048 continue;
1049
1050 /*
1051 * If we're sharing a single event channel across all CPUs, we
1052 * should only unbind once.
1053 */
1054 if (virq_info[i].mi_shared) {
1055 evtchnp = &virq_info[i].mi_evtchns[0];
1056 unbind_evtchn(evtchnp);
1057 for (c = 1; c < NCPU; c++)
1058 virq_info[i].mi_evtchns[c] = 0;
1059 } else {
1060 for (c = 0; c < NCPU; c++) {
1061 if (cpu[c] == NULL)
1062 continue;
1063
1064 evtchnp = &virq_info[i].mi_evtchns[c];
1065 if (*evtchnp != 0)
1066 unbind_evtchn(evtchnp);
1067 }
1068 }
1069 }
1070
1071 for (i = 0; i < NR_IRQS; i++) {
1072 irqp = &irq_info[i];
1073
1074 switch (irqp->ii_type) {
1075 case IRQT_EVTCHN:
1076 case IRQT_DEV_EVTCHN:
1077 (void) HYPERVISOR_shutdown(SHUTDOWN_crash);
1078 break;
1079 case IRQT_PIRQ:
1080 if (irqp->ii_u.evtchn != 0)
1081 (void) HYPERVISOR_shutdown(SHUTDOWN_crash);
1082 break;
1083 default:
1084 break;
1085 }
1086 }
1087 }
1088
1089 /*
1090 * The debug irq is special, we only have one evtchn and irq but we allow all
1091 * cpus to service it. It's marked as shared and we propogate the event
1092 * channel into all CPUs by hand.
1093 */
1094 static void
share_virq(mec_info_t * virqp)1095 share_virq(mec_info_t *virqp)
1096 {
1097 int evtchn = virqp->mi_evtchns[0];
1098 cpuset_t tset;
1099 int i;
1100
1101 ASSERT(evtchn != 0);
1102
1103 virqp->mi_shared = 1;
1104
1105 for (i = 1; i < NCPU; i++)
1106 virqp->mi_evtchns[i] = evtchn;
1107 CPUSET_ALL(tset);
1108 bind_evtchn_to_cpuset(evtchn, tset);
1109 }
1110
1111 static void
virq_resume(int virq)1112 virq_resume(int virq)
1113 {
1114 mec_info_t *virqp = &virq_info[virq];
1115 int evtchn;
1116 int i, err;
1117
1118 for (i = 0; i < NCPU; i++) {
1119 cpuset_t tcpus;
1120
1121 if (cpu[i] == NULL || CPU_IN_SET(cpu_suspend_lost_set, i))
1122 continue;
1123
1124 err = xen_bind_virq(virq, i, &evtchn);
1125 ASSERT(err == 0);
1126
1127 virqp->mi_evtchns[i] = evtchn;
1128 evtchn_to_irq[evtchn] = virqp->mi_irq;
1129 CPUSET_ONLY(tcpus, i);
1130 bind_evtchn_to_cpuset(evtchn, tcpus);
1131 ec_unmask_evtchn(evtchn);
1132 /*
1133 * only timer VIRQ is bound to all cpus
1134 */
1135 if (virq != VIRQ_TIMER)
1136 break;
1137 }
1138
1139 if (virqp->mi_shared)
1140 share_virq(virqp);
1141 }
1142
1143 static void
ipi_resume(int ipl)1144 ipi_resume(int ipl)
1145 {
1146 mec_info_t *ipip = &ipi_info[ipl];
1147 int i;
1148
1149 for (i = 0; i < NCPU; i++) {
1150 cpuset_t tcpus;
1151 int evtchn;
1152
1153 if (cpu[i] == NULL || CPU_IN_SET(cpu_suspend_lost_set, i))
1154 continue;
1155
1156 evtchn = xen_bind_ipi(i);
1157 ipip->mi_evtchns[i] = evtchn;
1158 evtchn_to_irq[evtchn] = ipip->mi_irq;
1159 CPUSET_ONLY(tcpus, i);
1160 bind_evtchn_to_cpuset(evtchn, tcpus);
1161 ec_unmask_evtchn(evtchn);
1162 }
1163 }
1164
1165 void
ec_resume(void)1166 ec_resume(void)
1167 {
1168 int i;
1169
1170 /* New event-channel space is not 'live' yet. */
1171 for (i = 0; i < NR_EVENT_CHANNELS; i++)
1172 (void) ec_mask_evtchn(i);
1173
1174 for (i = 0; i < MAXIPL; i++) {
1175 if (ipi_info[i].mi_irq == INVALID_IRQ)
1176 continue;
1177 ipi_resume(i);
1178 }
1179
1180 for (i = 0; i < NR_VIRQS; i++) {
1181 if (virq_info[i].mi_irq == INVALID_IRQ)
1182 continue;
1183 virq_resume(i);
1184 }
1185 }
1186
1187 int
ec_init(void)1188 ec_init(void)
1189 {
1190 int i;
1191 mutex_init(&ec_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL7));
1192
1193 for (i = 0; i < NR_EVENT_CHANNELS; i++) {
1194 CPUSET_ZERO(evtchn_cpus[i]);
1195 evtchn_to_irq[i] = INVALID_IRQ;
1196 (void) ec_mask_evtchn(i);
1197 }
1198
1199 for (i = 0; i < MAXIPL; i++)
1200 ipi_info[i].mi_irq = INVALID_IRQ;
1201
1202 for (i = 0; i < NR_VIRQS; i++)
1203 virq_info[i].mi_irq = INVALID_IRQ;
1204
1205 /*
1206 * Phys IRQ space is statically bound (1:1 mapping), grab the IRQs
1207 * now.
1208 */
1209 for (i = PIRQ_BASE; i < NR_PIRQS; i++) {
1210 irq_info[PIRQ_TO_IRQ(i)].ii_type = IRQT_PIRQ;
1211 }
1212
1213 return (0);
1214 }
1215
1216 void
ec_init_debug_irq()1217 ec_init_debug_irq()
1218 {
1219 int irq;
1220
1221 irq = ec_bind_virq_to_irq(VIRQ_DEBUG, 0);
1222 (void) add_avintr(NULL, IPL_DEBUG, (avfunc)xen_debug_handler,
1223 "debug", irq, NULL, NULL, NULL, NULL);
1224
1225 mutex_enter(&ec_lock);
1226 share_virq(&virq_info[irq_info[irq].ii_u.index]);
1227 mutex_exit(&ec_lock);
1228 ec_debug_irq = irq;
1229 }
1230
1231 #define UNBLOCKED_EVENTS(si, ix, cpe, cpu_id) \
1232 ((si)->evtchn_pending[ix] & ~(si)->evtchn_mask[ix] & \
1233 (cpe)->evt_affinity[ix])
1234
1235
1236 /*
1237 * This is the entry point for processing events from xen
1238 *
1239 * (See the commentary associated with the shared_info_st structure
1240 * in hypervisor-if.h)
1241 *
1242 * Since the event channel mechanism doesn't really implement the
1243 * concept of priority like hardware interrupt controllers, we simulate
1244 * that in software here using the cpu priority field and the pending
1245 * interrupts field. Events/interrupts that are not able to be serviced
1246 * now because they are at a lower priority than the current cpu priority
1247 * cause a level bit to be recorded in the pending interrupts word. When
1248 * the priority is lowered (either by spl or interrupt exit code) the pending
1249 * levels are checked and an upcall is scheduled if there are events/interrupts
1250 * that have become deliverable.
1251 */
1252 void
xen_callback_handler(struct regs * rp,trap_trace_rec_t * ttp)1253 xen_callback_handler(struct regs *rp, trap_trace_rec_t *ttp)
1254 {
1255 ulong_t pending_sels, pe, selbit;
1256 int i, j, port, pri, curpri, irq, sipri;
1257 uint16_t pending_ints, sip;
1258 struct cpu *cpu = CPU;
1259 volatile shared_info_t *si = HYPERVISOR_shared_info;
1260 volatile vcpu_info_t *vci = cpu->cpu_m.mcpu_vcpu_info;
1261 volatile struct xen_evt_data *cpe = cpu->cpu_m.mcpu_evt_pend;
1262 volatile uint16_t *cpu_ipp = &cpu->cpu_m.mcpu_intr_pending;
1263 extern void dosoftint(struct regs *);
1264
1265 ASSERT(rp->r_trapno == T_AST && rp->r_err == 0);
1266 ASSERT(&si->vcpu_info[cpu->cpu_id] == vci);
1267 ASSERT_STACK_ALIGNED();
1268
1269 vci->evtchn_upcall_pending = 0;
1270
1271 /*
1272 * To expedite scanning of pending notifications, any 0->1
1273 * pending transition on an unmasked channel causes a
1274 * corresponding bit in evtchn_pending_sel to be set.
1275 * Each bit in the selector covers a 32-bit word in
1276 * the evtchn_pending[] array.
1277 */
1278 membar_enter();
1279 do {
1280 pending_sels = vci->evtchn_pending_sel;
1281 } while (atomic_cas_ulong((volatile ulong_t *)&vci->evtchn_pending_sel,
1282 pending_sels, 0) != pending_sels);
1283
1284 pending_ints = *cpu_ipp;
1285 while ((i = ffs(pending_sels)) != 0) {
1286 i--;
1287 selbit = 1ul << i;
1288 pending_sels &= ~selbit;
1289
1290 membar_enter();
1291 while ((pe = UNBLOCKED_EVENTS(si, i, cpe, cpu->cpu_id)) != 0) {
1292 j = ffs(pe) - 1;
1293 pe &= ~(1ul << j);
1294
1295 port = (i << EVTCHN_SHIFT) + j;
1296
1297 irq = evtchn_to_irq[port];
1298
1299 /*
1300 * If no irq set, just ignore the event.
1301 * On e.g. netbsd they call evtchn_device_upcall(port)
1302 * We require the evtchn driver to install a handler
1303 * so there will be an irq associated with user mode
1304 * evtchns.
1305 */
1306 if (irq == INVALID_IRQ) {
1307 ec_clear_evtchn(port);
1308 continue;
1309 }
1310
1311 /*
1312 * If there's no handler, it could be a poke, so just
1313 * accept the event and continue.
1314 */
1315 if (!irq_info[irq].ii_u2.has_handler) {
1316 #ifdef TRAPTRACE
1317 ttp->ttr_ipl = 0xff;
1318 if (IRQ_IS_CPUPOKE(irq)) {
1319 ttp->ttr_ipl = XC_CPUPOKE_PIL;
1320 ttp->ttr_marker = TT_INTERRUPT;
1321 }
1322 ttp->ttr_pri = cpu->cpu_pri;
1323 ttp->ttr_spl = cpu->cpu_base_spl;
1324 ttp->ttr_vector = 0xff;
1325 #endif /* TRAPTRACE */
1326 if (ec_mask_evtchn(port)) {
1327 ec_clear_evtchn(port);
1328 ec_unmask_evtchn(port);
1329 continue;
1330 }
1331 }
1332
1333 pri = irq_info[irq].ii_u2.ipl;
1334
1335 /*
1336 * If we are the cpu that successfully masks
1337 * the event, then record it as a pending event
1338 * for this cpu to service
1339 */
1340 if (ec_mask_evtchn(port)) {
1341 if (ec_evtchn_pending(port)) {
1342 cpe->pending_sel[pri] |= selbit;
1343 cpe->pending_evts[pri][i] |= (1ul << j);
1344 pending_ints |= 1 << pri;
1345 /*
1346 * We have recorded a pending interrupt
1347 * for this cpu. If it is an edge
1348 * triggered interrupt then we go ahead
1349 * and clear the pending and mask bits
1350 * from the shared info to avoid having
1351 * the hypervisor see the pending event
1352 * again and possibly disabling the
1353 * interrupt. This should also help
1354 * keep us from missing an interrupt.
1355 */
1356 if (ec_is_edge_pirq(irq)) {
1357 ec_clear_evtchn(port);
1358 ec_unmask_evtchn(port);
1359 }
1360 } else {
1361 /*
1362 * another cpu serviced this event
1363 * before us, clear the mask.
1364 */
1365 ec_unmask_evtchn(port);
1366 }
1367 }
1368 }
1369 }
1370 *cpu_ipp = pending_ints;
1371 if (pending_ints == 0)
1372 return;
1373 /*
1374 * We have gathered all the pending events/interrupts,
1375 * go service all the ones we can from highest priority to lowest.
1376 * Note: This loop may not actually complete and service all
1377 * pending interrupts since one of the interrupt threads may
1378 * block and the pinned thread runs. In that case, when we
1379 * exit the interrupt thread that blocked we will check for
1380 * any unserviced interrupts and re-post an upcall to process
1381 * any unserviced pending events.
1382 */
1383 restart:
1384 curpri = cpu->cpu_pri;
1385 pri = bsrw_insn(*cpu_ipp);
1386 while (pri > curpri) {
1387 while ((pending_sels = cpe->pending_sel[pri]) != 0) {
1388 i = ffs(pending_sels) - 1;
1389 while ((pe = cpe->pending_evts[pri][i]) != 0) {
1390 j = ffs(pe) - 1;
1391 port = (i << EVTCHN_SHIFT) + j;
1392 pe &= ~(1ul << j);
1393 cpe->pending_evts[pri][i] = pe;
1394 if (pe == 0) {
1395 /*
1396 * Must reload pending selector bits
1397 * here as they could have changed on
1398 * a previous trip around the inner loop
1399 * while we were interrupt enabled
1400 * in a interrupt service routine.
1401 */
1402 pending_sels = cpe->pending_sel[pri];
1403 pending_sels &= ~(1ul << i);
1404 cpe->pending_sel[pri] = pending_sels;
1405 if (pending_sels == 0)
1406 *cpu_ipp &= ~(1 << pri);
1407 }
1408 irq = evtchn_to_irq[port];
1409 if (irq == INVALID_IRQ) {
1410 /*
1411 * No longer a handler for this event
1412 * channel. Clear the event and
1413 * ignore it, unmask the event.
1414 */
1415 ec_clear_evtchn(port);
1416 ec_unmask_evtchn(port);
1417 continue;
1418 }
1419 if (irq == ec_dev_irq) {
1420 ASSERT(cpu->cpu_m.mcpu_ec_mbox == 0);
1421 cpu->cpu_m.mcpu_ec_mbox = port;
1422 }
1423 /*
1424 * Set up the regs struct to
1425 * look like a normal hardware int
1426 * and do normal interrupt handling.
1427 */
1428 rp->r_trapno = irq;
1429 do_interrupt(rp, ttp);
1430 /*
1431 * Check for cpu priority change
1432 * Can happen if int thread blocks
1433 */
1434 if (cpu->cpu_pri != curpri)
1435 goto restart;
1436 }
1437 }
1438 /*
1439 * Dispatch any soft interrupts that are
1440 * higher priority than any hard ones remaining.
1441 */
1442 pri = bsrw_insn(*cpu_ipp);
1443 sip = (uint16_t)cpu->cpu_softinfo.st_pending;
1444 if (sip != 0) {
1445 sipri = bsrw_insn(sip);
1446 if (sipri > pri && sipri > cpu->cpu_pri) {
1447 dosoftint(rp);
1448 /*
1449 * Check for cpu priority change
1450 * Can happen if softint thread blocks
1451 */
1452 if (cpu->cpu_pri != curpri)
1453 goto restart;
1454 }
1455 }
1456 }
1457 /*
1458 * Deliver any pending soft interrupts.
1459 */
1460 if (cpu->cpu_softinfo.st_pending)
1461 dosoftint(rp);
1462 }
1463
1464
1465 void
ec_unmask_evtchn(unsigned int ev)1466 ec_unmask_evtchn(unsigned int ev)
1467 {
1468 uint_t evi, evb;
1469 volatile shared_info_t *si = HYPERVISOR_shared_info;
1470 volatile vcpu_info_t *vci = CPU->cpu_m.mcpu_vcpu_info;
1471 volatile ulong_t *ulp;
1472
1473 ASSERT(!interrupts_enabled());
1474 /*
1475 * Check if we need to take slow path
1476 */
1477 if (!CPU_IN_SET(evtchn_cpus[ev], CPU->cpu_id)) {
1478 xen_evtchn_unmask(ev);
1479 return;
1480 }
1481 evi = ev >> EVTCHN_SHIFT;
1482 evb = ev & ((1ul << EVTCHN_SHIFT) - 1);
1483 ulp = (volatile ulong_t *)&si->evtchn_mask[evi];
1484 atomic_and_ulong(ulp, ~(1ul << evb));
1485 /*
1486 * The following is basically the equivalent of
1487 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose the
1488 * interrupt edge' if the channel is masked.
1489 * XXPV - slight race if upcall was about to be set, we may get
1490 * an extra upcall.
1491 */
1492 membar_enter();
1493 if (si->evtchn_pending[evi] & (1ul << evb)) {
1494 membar_consumer();
1495 ulp = (volatile ulong_t *)&vci->evtchn_pending_sel;
1496 if (!(*ulp & (1ul << evi))) {
1497 atomic_or_ulong(ulp, (1ul << evi));
1498 }
1499 vci->evtchn_upcall_pending = 1;
1500 }
1501 }
1502
1503 /*
1504 * Set a bit in an evtchan mask word, return true if we are the cpu that
1505 * set the bit.
1506 */
1507 int
ec_mask_evtchn(unsigned int ev)1508 ec_mask_evtchn(unsigned int ev)
1509 {
1510 uint_t evi, evb;
1511 ulong_t new, old, bit;
1512 volatile shared_info_t *si = HYPERVISOR_shared_info;
1513 volatile ulong_t *maskp;
1514 int masked;
1515
1516 kpreempt_disable();
1517 evi = ev >> EVTCHN_SHIFT;
1518 evb = ev & ((1ul << EVTCHN_SHIFT) - 1);
1519 bit = 1ul << evb;
1520 maskp = (volatile ulong_t *)&si->evtchn_mask[evi];
1521 do {
1522 old = si->evtchn_mask[evi];
1523 new = old | bit;
1524 } while (atomic_cas_ulong(maskp, old, new) != old);
1525 masked = (old & bit) == 0;
1526 if (masked) {
1527 evtchn_owner[ev] = CPU->cpu_id;
1528 #ifdef DEBUG
1529 evtchn_owner_thread[ev] = curthread;
1530 #endif
1531 }
1532 kpreempt_enable();
1533 return (masked);
1534 }
1535
1536 void
ec_clear_evtchn(unsigned int ev)1537 ec_clear_evtchn(unsigned int ev)
1538 {
1539 uint_t evi;
1540 shared_info_t *si = HYPERVISOR_shared_info;
1541 volatile ulong_t *pendp;
1542
1543 evi = ev >> EVTCHN_SHIFT;
1544 ev &= (1ul << EVTCHN_SHIFT) - 1;
1545 pendp = (volatile ulong_t *)&si->evtchn_pending[evi];
1546 atomic_and_ulong(pendp, ~(1ul << ev));
1547 }
1548
1549 void
ec_notify_via_evtchn(unsigned int port)1550 ec_notify_via_evtchn(unsigned int port)
1551 {
1552 evtchn_send_t send;
1553
1554 ASSERT(port != INVALID_EVTCHN);
1555
1556 send.port = port;
1557 (void) HYPERVISOR_event_channel_op(EVTCHNOP_send, &send);
1558 }
1559
1560 int
ec_block_irq(int irq)1561 ec_block_irq(int irq)
1562 {
1563 irq_info_t *irqp = &irq_info[irq];
1564 int evtchn;
1565
1566
1567 evtchn = irq_evtchn(irqp);
1568 (void) ec_mask_evtchn(evtchn);
1569 return (evtchn_owner[evtchn]);
1570 }
1571
1572 /*
1573 * Make a event that is pending for delivery on the current cpu "go away"
1574 * without servicing the interrupt.
1575 */
1576 void
ec_unpend_irq(int irq)1577 ec_unpend_irq(int irq)
1578 {
1579 irq_info_t *irqp = &irq_info[irq];
1580 int pri = irqp->ii_u2.ipl;
1581 ulong_t flags;
1582 uint_t evtchn, evi, bit;
1583 unsigned long pe, pending_sels;
1584 struct xen_evt_data *cpe;
1585
1586 /*
1587 * The evtchn must be masked
1588 */
1589 evtchn = irq_evtchn(irqp);
1590 ASSERT(EVTCHN_MASKED(evtchn));
1591 evi = evtchn >> EVTCHN_SHIFT;
1592 bit = evtchn & (1ul << EVTCHN_SHIFT) - 1;
1593 flags = intr_clear();
1594 cpe = CPU->cpu_m.mcpu_evt_pend;
1595 pe = cpe->pending_evts[pri][evi] & ~(1ul << bit);
1596 cpe->pending_evts[pri][evi] = pe;
1597 if (pe == 0) {
1598 pending_sels = cpe->pending_sel[pri];
1599 pending_sels &= ~(1ul << evi);
1600 cpe->pending_sel[pri] = pending_sels;
1601 if (pending_sels == 0)
1602 CPU->cpu_m.mcpu_intr_pending &= ~(1 << pri);
1603 }
1604 intr_restore(flags);
1605 }
1606