xref: /titanic_44/usr/src/uts/i86xpv/io/psm/xpv_psm.c (revision a31148363f598def767ac48c5d82e1572e44b935)
1843e1988Sjohnlev /*
2843e1988Sjohnlev  * CDDL HEADER START
3843e1988Sjohnlev  *
4843e1988Sjohnlev  * The contents of this file are subject to the terms of the
5843e1988Sjohnlev  * Common Development and Distribution License (the "License").
6843e1988Sjohnlev  * You may not use this file except in compliance with the License.
7843e1988Sjohnlev  *
8843e1988Sjohnlev  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9843e1988Sjohnlev  * or http://www.opensolaris.org/os/licensing.
10843e1988Sjohnlev  * See the License for the specific language governing permissions
11843e1988Sjohnlev  * and limitations under the License.
12843e1988Sjohnlev  *
13843e1988Sjohnlev  * When distributing Covered Code, include this CDDL HEADER in each
14843e1988Sjohnlev  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15843e1988Sjohnlev  * If applicable, add the following below this CDDL HEADER, with the
16843e1988Sjohnlev  * fields enclosed by brackets "[]" replaced with your own identifying
17843e1988Sjohnlev  * information: Portions Copyright [yyyy] [name of copyright owner]
18843e1988Sjohnlev  *
19843e1988Sjohnlev  * CDDL HEADER END
20843e1988Sjohnlev  */
21843e1988Sjohnlev 
22843e1988Sjohnlev /*
2363ea9ad2SEvan Yan  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24843e1988Sjohnlev  * Use is subject to license terms.
25843e1988Sjohnlev  */
26843e1988Sjohnlev 
27*a3114836SGerry Liu #define	PSMI_1_7
28843e1988Sjohnlev 
29843e1988Sjohnlev #include <sys/mutex.h>
30843e1988Sjohnlev #include <sys/types.h>
31843e1988Sjohnlev #include <sys/time.h>
32843e1988Sjohnlev #include <sys/clock.h>
33843e1988Sjohnlev #include <sys/machlock.h>
34843e1988Sjohnlev #include <sys/smp_impldefs.h>
35843e1988Sjohnlev #include <sys/uadmin.h>
36843e1988Sjohnlev #include <sys/promif.h>
37843e1988Sjohnlev #include <sys/psm.h>
38843e1988Sjohnlev #include <sys/psm_common.h>
39843e1988Sjohnlev #include <sys/atomic.h>
40843e1988Sjohnlev #include <sys/apic.h>
41843e1988Sjohnlev #include <sys/archsystm.h>
42843e1988Sjohnlev #include <sys/mach_intr.h>
43843e1988Sjohnlev #include <sys/hypervisor.h>
44843e1988Sjohnlev #include <sys/evtchn_impl.h>
45843e1988Sjohnlev #include <sys/modctl.h>
46843e1988Sjohnlev #include <sys/trap.h>
47843e1988Sjohnlev #include <sys/panic.h>
48349b53ddSStuart Maybee #include <sys/sysmacros.h>
49349b53ddSStuart Maybee #include <sys/pci_intr_lib.h>
50349b53ddSStuart Maybee #include <vm/hat_i86.h>
51843e1988Sjohnlev 
52843e1988Sjohnlev #include <xen/public/vcpu.h>
53843e1988Sjohnlev #include <xen/public/physdev.h>
54843e1988Sjohnlev 
55843e1988Sjohnlev 
56843e1988Sjohnlev /*
57843e1988Sjohnlev  * Global Data
58843e1988Sjohnlev  */
59843e1988Sjohnlev 
60843e1988Sjohnlev int xen_psm_verbose = 0;
61843e1988Sjohnlev 
62b6917abeSmishra /* As of now we don't support x2apic in xVM */
63843e1988Sjohnlev volatile uint32_t *apicadr = NULL;	/* dummy, so common code will link */
64843e1988Sjohnlev int apic_error = 0;
65843e1988Sjohnlev int apic_verbose = 0;
66843e1988Sjohnlev cpuset_t apic_cpumask;
67843e1988Sjohnlev int apic_forceload = 0;
68843e1988Sjohnlev uchar_t apic_vectortoipl[APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL] = {
69843e1988Sjohnlev 	3, 4, 5, 5, 6, 6, 9, 10, 11, 12, 13, 14, 15, 15
70843e1988Sjohnlev };
71843e1988Sjohnlev uchar_t apic_ipltopri[MAXIPL + 1];
72843e1988Sjohnlev uchar_t apic_ipls[APIC_AVAIL_VECTOR];
73843e1988Sjohnlev uint_t apic_picinit_called;
74843e1988Sjohnlev apic_cpus_info_t *apic_cpus;
75843e1988Sjohnlev int xen_psm_intr_policy = INTR_ROUND_ROBIN_WITH_AFFINITY;
76843e1988Sjohnlev /* use to make sure only one cpu handles the nmi */
77843e1988Sjohnlev static lock_t xen_psm_nmi_lock;
78843e1988Sjohnlev int xen_psm_kmdb_on_nmi = 0;		/* 0 - no, 1 - yes enter kmdb */
79843e1988Sjohnlev int xen_psm_panic_on_nmi = 0;
80843e1988Sjohnlev int xen_psm_num_nmis = 0;
81843e1988Sjohnlev 
82843e1988Sjohnlev cpuset_t xen_psm_cpus_online;	/* online cpus */
83843e1988Sjohnlev int xen_psm_ncpus = 1;		/* cpu count */
84843e1988Sjohnlev int xen_psm_next_bind_cpu;	/* next cpu to bind an interrupt to */
85843e1988Sjohnlev 
8677979b9bSStuart Maybee int xen_support_msi = 0;
87843e1988Sjohnlev 
88843e1988Sjohnlev static int xen_clock_irq = INVALID_IRQ;
89843e1988Sjohnlev 
90843e1988Sjohnlev /* flag definitions for xen_psm_verbose */
91843e1988Sjohnlev #define	XEN_PSM_VERBOSE_IRQ_FLAG		0x00000001
92843e1988Sjohnlev #define	XEN_PSM_VERBOSE_POWEROFF_FLAG		0x00000002
93843e1988Sjohnlev #define	XEN_PSM_VERBOSE_POWEROFF_PAUSE_FLAG	0x00000004
94843e1988Sjohnlev 
95843e1988Sjohnlev #define	XEN_PSM_VERBOSE_IRQ(fmt) \
96843e1988Sjohnlev 	if (xen_psm_verbose & XEN_PSM_VERBOSE_IRQ_FLAG) \
97843e1988Sjohnlev 		cmn_err fmt;
98843e1988Sjohnlev 
99843e1988Sjohnlev #define	XEN_PSM_VERBOSE_POWEROFF(fmt) \
100843e1988Sjohnlev 	if (xen_psm_verbose & XEN_PSM_VERBOSE_POWEROFF_FLAG) \
101843e1988Sjohnlev 		prom_printf fmt;
102843e1988Sjohnlev 
103843e1988Sjohnlev /*
104843e1988Sjohnlev  * Dummy apic array to point common routines at that want to do some apic
105843e1988Sjohnlev  * manipulation.  Xen doesn't allow guest apic access so we point at these
106843e1988Sjohnlev  * memory locations to fake out those who want to do apic fiddling.
107843e1988Sjohnlev  */
108843e1988Sjohnlev uint32_t xen_psm_dummy_apic[APIC_IRR_REG + 1];
109843e1988Sjohnlev 
110843e1988Sjohnlev static struct psm_info xen_psm_info;
111843e1988Sjohnlev static void xen_psm_setspl(int);
112843e1988Sjohnlev 
113349b53ddSStuart Maybee int
114349b53ddSStuart Maybee apic_alloc_msi_vectors(dev_info_t *dip, int inum, int count, int pri,
115349b53ddSStuart Maybee     int behavior);
116349b53ddSStuart Maybee int
117349b53ddSStuart Maybee apic_alloc_msix_vectors(dev_info_t *dip, int inum, int count, int pri,
118349b53ddSStuart Maybee     int behavior);
119843e1988Sjohnlev 
120843e1988Sjohnlev /*
121843e1988Sjohnlev  * Local support routines
122843e1988Sjohnlev  */
123843e1988Sjohnlev 
124843e1988Sjohnlev /*
125843e1988Sjohnlev  * Select vcpu to bind xen virtual device interrupt to.
126843e1988Sjohnlev  */
127843e1988Sjohnlev /*ARGSUSED*/
128843e1988Sjohnlev int
xen_psm_bind_intr(int irq)129843e1988Sjohnlev xen_psm_bind_intr(int irq)
130843e1988Sjohnlev {
131349b53ddSStuart Maybee 	int bind_cpu;
132843e1988Sjohnlev 	apic_irq_t *irqptr;
133843e1988Sjohnlev 
134349b53ddSStuart Maybee 	bind_cpu = IRQ_UNBOUND;
135843e1988Sjohnlev 	if (xen_psm_intr_policy == INTR_LOWEST_PRIORITY)
136349b53ddSStuart Maybee 		return (bind_cpu);
137843e1988Sjohnlev 	if (irq <= APIC_MAX_VECTOR)
138843e1988Sjohnlev 		irqptr = apic_irq_table[irq];
139843e1988Sjohnlev 	else
140843e1988Sjohnlev 		irqptr = NULL;
141349b53ddSStuart Maybee 	if (irqptr && (irqptr->airq_cpu != IRQ_UNBOUND))
142349b53ddSStuart Maybee 		bind_cpu = irqptr->airq_cpu & ~IRQ_USER_BOUND;
143349b53ddSStuart Maybee 	if (bind_cpu != IRQ_UNBOUND) {
144349b53ddSStuart Maybee 		if (!CPU_IN_SET(xen_psm_cpus_online, bind_cpu))
145843e1988Sjohnlev 			bind_cpu = 0;
146843e1988Sjohnlev 		goto done;
147843e1988Sjohnlev 	}
148843e1988Sjohnlev 	if (xen_psm_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) {
149843e1988Sjohnlev 		do {
150843e1988Sjohnlev 			bind_cpu = xen_psm_next_bind_cpu++;
151843e1988Sjohnlev 			if (xen_psm_next_bind_cpu >= xen_psm_ncpus)
152843e1988Sjohnlev 				xen_psm_next_bind_cpu = 0;
153843e1988Sjohnlev 		} while (!CPU_IN_SET(xen_psm_cpus_online, bind_cpu));
154843e1988Sjohnlev 	} else {
155843e1988Sjohnlev 		bind_cpu = 0;
156843e1988Sjohnlev 	}
157843e1988Sjohnlev done:
158843e1988Sjohnlev 	return (bind_cpu);
159843e1988Sjohnlev }
160843e1988Sjohnlev 
161843e1988Sjohnlev /*
162843e1988Sjohnlev  * Autoconfiguration Routines
163843e1988Sjohnlev  */
164843e1988Sjohnlev 
165843e1988Sjohnlev static int
xen_psm_probe(void)166843e1988Sjohnlev xen_psm_probe(void)
167843e1988Sjohnlev {
168843e1988Sjohnlev 	int ret = PSM_SUCCESS;
169843e1988Sjohnlev 
170843e1988Sjohnlev 	if (DOMAIN_IS_INITDOMAIN(xen_info))
171843e1988Sjohnlev 		ret = apic_probe_common(xen_psm_info.p_mach_idstring);
172843e1988Sjohnlev 	return (ret);
173843e1988Sjohnlev }
174843e1988Sjohnlev 
175843e1988Sjohnlev static void
xen_psm_softinit(void)176843e1988Sjohnlev xen_psm_softinit(void)
177843e1988Sjohnlev {
178843e1988Sjohnlev 	/* LINTED logical expression always true: op "||" */
179843e1988Sjohnlev 	ASSERT((1 << EVTCHN_SHIFT) == NBBY * sizeof (ulong_t));
180b9bc7f78Ssmaybe 	CPUSET_ATOMIC_ADD(xen_psm_cpus_online, 0);
181843e1988Sjohnlev 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
182843e1988Sjohnlev 		apic_init_common();
183843e1988Sjohnlev 	}
184843e1988Sjohnlev }
185843e1988Sjohnlev 
186843e1988Sjohnlev #define	XEN_NSEC_PER_TICK	10 /* XXX - assume we have a 100 Mhz clock */
187843e1988Sjohnlev 
188843e1988Sjohnlev /*ARGSUSED*/
189843e1988Sjohnlev static int
xen_psm_clkinit(int hertz)190843e1988Sjohnlev xen_psm_clkinit(int hertz)
191843e1988Sjohnlev {
192843e1988Sjohnlev 	extern enum tod_fault_type tod_fault(enum tod_fault_type, int);
193843e1988Sjohnlev 	extern int dosynctodr;
194843e1988Sjohnlev 
195843e1988Sjohnlev 	/*
196843e1988Sjohnlev 	 * domU cannot set the TOD hardware, fault the TOD clock now to
197843e1988Sjohnlev 	 * indicate that and turn off attempts to sync TOD hardware
198843e1988Sjohnlev 	 * with the hires timer.
199843e1988Sjohnlev 	 */
200843e1988Sjohnlev 	if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
201843e1988Sjohnlev 		mutex_enter(&tod_lock);
202843e1988Sjohnlev 		(void) tod_fault(TOD_RDONLY, 0);
203843e1988Sjohnlev 		dosynctodr = 0;
204843e1988Sjohnlev 		mutex_exit(&tod_lock);
205843e1988Sjohnlev 	}
206843e1988Sjohnlev 	/*
207843e1988Sjohnlev 	 * The hypervisor provides a timer based on the local APIC timer.
208843e1988Sjohnlev 	 * The interface supports requests of nanosecond resolution.
209843e1988Sjohnlev 	 * A common frequency of the apic clock is 100 Mhz which
210843e1988Sjohnlev 	 * gives a resolution of 10 nsec per tick.  What we would really like
211843e1988Sjohnlev 	 * is a way to get the ns per tick value from xen.
212843e1988Sjohnlev 	 * XXPV - This is an assumption that needs checking and may change
213843e1988Sjohnlev 	 */
214843e1988Sjohnlev 	return (XEN_NSEC_PER_TICK);
215843e1988Sjohnlev }
216843e1988Sjohnlev 
217843e1988Sjohnlev static void
xen_psm_hrtimeinit(void)218843e1988Sjohnlev xen_psm_hrtimeinit(void)
219843e1988Sjohnlev {
220843e1988Sjohnlev 	extern int gethrtime_hires;
221843e1988Sjohnlev 	gethrtime_hires = 1;
222843e1988Sjohnlev }
223843e1988Sjohnlev 
224843e1988Sjohnlev /* xen_psm NMI handler */
225843e1988Sjohnlev /*ARGSUSED*/
226843e1988Sjohnlev static void
xen_psm_nmi_intr(caddr_t arg,struct regs * rp)227843e1988Sjohnlev xen_psm_nmi_intr(caddr_t arg, struct regs *rp)
228843e1988Sjohnlev {
229843e1988Sjohnlev 	xen_psm_num_nmis++;
230843e1988Sjohnlev 
231843e1988Sjohnlev 	if (!lock_try(&xen_psm_nmi_lock))
232843e1988Sjohnlev 		return;
233843e1988Sjohnlev 
234843e1988Sjohnlev 	if (xen_psm_kmdb_on_nmi && psm_debugger()) {
235843e1988Sjohnlev 		debug_enter("NMI received: entering kmdb\n");
236843e1988Sjohnlev 	} else if (xen_psm_panic_on_nmi) {
237843e1988Sjohnlev 		/* Keep panic from entering kmdb. */
238843e1988Sjohnlev 		nopanicdebug = 1;
239843e1988Sjohnlev 		panic("NMI received\n");
240843e1988Sjohnlev 	} else {
241843e1988Sjohnlev 		/*
242843e1988Sjohnlev 		 * prom_printf is the best shot we have of something which is
243843e1988Sjohnlev 		 * problem free from high level/NMI type of interrupts
244843e1988Sjohnlev 		 */
245843e1988Sjohnlev 		prom_printf("NMI received\n");
246843e1988Sjohnlev 	}
247843e1988Sjohnlev 
248843e1988Sjohnlev 	lock_clear(&xen_psm_nmi_lock);
249843e1988Sjohnlev }
250843e1988Sjohnlev 
251843e1988Sjohnlev static void
xen_psm_picinit()252843e1988Sjohnlev xen_psm_picinit()
253843e1988Sjohnlev {
254843e1988Sjohnlev 	int cpu, irqno;
255843e1988Sjohnlev 	cpuset_t cpus;
256843e1988Sjohnlev 
257843e1988Sjohnlev 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
258843e1988Sjohnlev 		/* set a flag so we know we have run xen_psm_picinit() */
259843e1988Sjohnlev 		apic_picinit_called = 1;
260843e1988Sjohnlev 		LOCK_INIT_CLEAR(&apic_ioapic_lock);
261843e1988Sjohnlev 
262843e1988Sjohnlev 		/* XXPV - do we need to do this? */
263843e1988Sjohnlev 		picsetup();	 /* initialise the 8259 */
264843e1988Sjohnlev 
265843e1988Sjohnlev 		/* enable apic mode if imcr present */
266843e1988Sjohnlev 		/* XXPV - do we need to do this either? */
267843e1988Sjohnlev 		if (apic_imcrp) {
268843e1988Sjohnlev 			outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
269843e1988Sjohnlev 			outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC);
270843e1988Sjohnlev 		}
271843e1988Sjohnlev 
272843e1988Sjohnlev 		ioapic_init_intr(IOAPIC_NOMASK);
273843e1988Sjohnlev 		/*
274843e1988Sjohnlev 		 * We never called xen_psm_addspl() when the SCI
275843e1988Sjohnlev 		 * interrupt was added because that happened before the
276843e1988Sjohnlev 		 * PSM module was loaded.  Fix that up here by doing
277843e1988Sjohnlev 		 * any missed operations (e.g. bind to CPU)
278843e1988Sjohnlev 		 */
279843e1988Sjohnlev 		if ((irqno = apic_sci_vect) > 0) {
280843e1988Sjohnlev 			if ((cpu = xen_psm_bind_intr(irqno)) == IRQ_UNBOUND) {
281843e1988Sjohnlev 				CPUSET_ZERO(cpus);
282843e1988Sjohnlev 				CPUSET_OR(cpus, xen_psm_cpus_online);
283843e1988Sjohnlev 			} else {
284843e1988Sjohnlev 				CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND);
285843e1988Sjohnlev 			}
286843e1988Sjohnlev 			ec_set_irq_affinity(irqno, cpus);
287b9bc7f78Ssmaybe 			apic_irq_table[irqno]->airq_temp_cpu =
288b9bc7f78Ssmaybe 			    (uchar_t)(cpu & ~IRQ_USER_BOUND);
289843e1988Sjohnlev 			ec_enable_irq(irqno);
290843e1988Sjohnlev 		}
291843e1988Sjohnlev 	}
292843e1988Sjohnlev 
293843e1988Sjohnlev 	/* add nmi handler - least priority nmi handler */
294843e1988Sjohnlev 	LOCK_INIT_CLEAR(&xen_psm_nmi_lock);
295843e1988Sjohnlev 
296843e1988Sjohnlev 	if (!psm_add_nmintr(0, (avfunc) xen_psm_nmi_intr,
297b9bc7f78Ssmaybe 	    "xVM_psm NMI handler", (caddr_t)NULL))
298b9bc7f78Ssmaybe 		cmn_err(CE_WARN, "xVM_psm: Unable to add nmi handler");
299843e1988Sjohnlev }
300843e1988Sjohnlev 
301843e1988Sjohnlev 
302843e1988Sjohnlev /*
303843e1988Sjohnlev  * generates an interprocessor interrupt to another CPU
304843e1988Sjohnlev  */
305843e1988Sjohnlev static void
xen_psm_send_ipi(int cpun,int ipl)306843e1988Sjohnlev xen_psm_send_ipi(int cpun, int ipl)
307843e1988Sjohnlev {
308843e1988Sjohnlev 	ulong_t flag = intr_clear();
309843e1988Sjohnlev 
310843e1988Sjohnlev 	ec_send_ipi(ipl, cpun);
311843e1988Sjohnlev 	intr_restore(flag);
312843e1988Sjohnlev }
313843e1988Sjohnlev 
314843e1988Sjohnlev /*ARGSUSED*/
315843e1988Sjohnlev static int
xen_psm_addspl(int irqno,int ipl,int min_ipl,int max_ipl)316843e1988Sjohnlev xen_psm_addspl(int irqno, int ipl, int min_ipl, int max_ipl)
317843e1988Sjohnlev {
318843e1988Sjohnlev 	int cpu, ret;
319843e1988Sjohnlev 	cpuset_t cpus;
320843e1988Sjohnlev 
321843e1988Sjohnlev 	/*
322843e1988Sjohnlev 	 * We are called at splhi() so we can't call anything that might end
323843e1988Sjohnlev 	 * up trying to context switch.
324843e1988Sjohnlev 	 */
325843e1988Sjohnlev 	if (irqno >= PIRQ_BASE && irqno < NR_PIRQS &&
326843e1988Sjohnlev 	    DOMAIN_IS_INITDOMAIN(xen_info)) {
327843e1988Sjohnlev 		/*
328843e1988Sjohnlev 		 * Priority/affinity/enable for PIRQ's is set in ec_setup_pirq()
329843e1988Sjohnlev 		 */
330843e1988Sjohnlev 		ret = apic_addspl_common(irqno, ipl, min_ipl, max_ipl);
331843e1988Sjohnlev 	} else {
332843e1988Sjohnlev 		/*
333843e1988Sjohnlev 		 * Set priority/affinity/enable for non PIRQs
334843e1988Sjohnlev 		 */
335843e1988Sjohnlev 		ret = ec_set_irq_priority(irqno, ipl);
336843e1988Sjohnlev 		ASSERT(ret == 0);
337843e1988Sjohnlev 		if ((cpu = xen_psm_bind_intr(irqno)) == IRQ_UNBOUND) {
338843e1988Sjohnlev 			CPUSET_ZERO(cpus);
339843e1988Sjohnlev 			CPUSET_OR(cpus, xen_psm_cpus_online);
340843e1988Sjohnlev 		} else {
341843e1988Sjohnlev 			CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND);
342843e1988Sjohnlev 		}
343843e1988Sjohnlev 		ec_set_irq_affinity(irqno, cpus);
344843e1988Sjohnlev 		ec_enable_irq(irqno);
345843e1988Sjohnlev 	}
346843e1988Sjohnlev 	return (ret);
347843e1988Sjohnlev }
348843e1988Sjohnlev 
349843e1988Sjohnlev /*
350843e1988Sjohnlev  * Acquire ownership of this irq on this cpu
351843e1988Sjohnlev  */
352843e1988Sjohnlev void
xen_psm_acquire_irq(int irq)353843e1988Sjohnlev xen_psm_acquire_irq(int irq)
354843e1988Sjohnlev {
355843e1988Sjohnlev 	ulong_t flags;
356843e1988Sjohnlev 	int cpuid;
357843e1988Sjohnlev 
358843e1988Sjohnlev 	/*
359843e1988Sjohnlev 	 * If the irq is currently being serviced by another cpu
360843e1988Sjohnlev 	 * we busy-wait for the other cpu to finish.  Take any
361843e1988Sjohnlev 	 * pending interrupts before retrying.
362843e1988Sjohnlev 	 */
363843e1988Sjohnlev 	do {
364843e1988Sjohnlev 		flags = intr_clear();
365843e1988Sjohnlev 		cpuid = ec_block_irq(irq);
366843e1988Sjohnlev 		intr_restore(flags);
367843e1988Sjohnlev 	} while (cpuid != CPU->cpu_id);
368843e1988Sjohnlev }
369843e1988Sjohnlev 
370843e1988Sjohnlev /*ARGSUSED*/
371843e1988Sjohnlev static int
xen_psm_delspl(int irqno,int ipl,int min_ipl,int max_ipl)372843e1988Sjohnlev xen_psm_delspl(int irqno, int ipl, int min_ipl, int max_ipl)
373843e1988Sjohnlev {
374843e1988Sjohnlev 	apic_irq_t *irqptr;
375843e1988Sjohnlev 	int err = PSM_SUCCESS;
376843e1988Sjohnlev 
377843e1988Sjohnlev 	if (irqno >= PIRQ_BASE && irqno < NR_PIRQS &&
378843e1988Sjohnlev 	    DOMAIN_IS_INITDOMAIN(xen_info)) {
379843e1988Sjohnlev 		irqptr = apic_irq_table[irqno];
380843e1988Sjohnlev 		/*
381843e1988Sjohnlev 		 * unbind if no more sharers of this irq/evtchn
382843e1988Sjohnlev 		 */
383843e1988Sjohnlev 		if (irqptr->airq_share == 1) {
384843e1988Sjohnlev 			xen_psm_acquire_irq(irqno);
385843e1988Sjohnlev 			ec_unbind_irq(irqno);
386843e1988Sjohnlev 		}
387843e1988Sjohnlev 		err = apic_delspl_common(irqno, ipl, min_ipl, max_ipl);
388843e1988Sjohnlev 		/*
389843e1988Sjohnlev 		 * If still in use reset priority
390843e1988Sjohnlev 		 */
391843e1988Sjohnlev 		if (!err && irqptr->airq_share != 0) {
392843e1988Sjohnlev 			err = ec_set_irq_priority(irqno, max_ipl);
393843e1988Sjohnlev 			return (err);
394843e1988Sjohnlev 		}
395843e1988Sjohnlev 	} else {
396843e1988Sjohnlev 		xen_psm_acquire_irq(irqno);
397843e1988Sjohnlev 		ec_unbind_irq(irqno);
398843e1988Sjohnlev 	}
399843e1988Sjohnlev 	return (err);
400843e1988Sjohnlev }
401843e1988Sjohnlev 
402843e1988Sjohnlev static processorid_t
xen_psm_get_next_processorid(processorid_t id)403843e1988Sjohnlev xen_psm_get_next_processorid(processorid_t id)
404843e1988Sjohnlev {
405843e1988Sjohnlev 	if (id == -1)
406843e1988Sjohnlev 		return (0);
407843e1988Sjohnlev 
408843e1988Sjohnlev 	for (id++; id < NCPU; id++) {
409843e1988Sjohnlev 		switch (-HYPERVISOR_vcpu_op(VCPUOP_is_up, id, NULL)) {
410843e1988Sjohnlev 		case 0:		/* yeah, that one's there */
411843e1988Sjohnlev 			return (id);
412843e1988Sjohnlev 		default:
413843e1988Sjohnlev 		case X_EINVAL:	/* out of range */
414843e1988Sjohnlev 			return (-1);
415843e1988Sjohnlev 		case X_ENOENT:	/* not present in the domain */
416843e1988Sjohnlev 			/*
417843e1988Sjohnlev 			 * It's not clear that we -need- to keep looking
418843e1988Sjohnlev 			 * at this point, if, e.g., we can guarantee
419843e1988Sjohnlev 			 * the hypervisor always keeps a contiguous range
420843e1988Sjohnlev 			 * of vcpus around this is equivalent to "out of range".
421843e1988Sjohnlev 			 *
422843e1988Sjohnlev 			 * But it would be sad to miss a vcpu we're
423843e1988Sjohnlev 			 * supposed to be using ..
424843e1988Sjohnlev 			 */
425843e1988Sjohnlev 			break;
426843e1988Sjohnlev 		}
427843e1988Sjohnlev 	}
428843e1988Sjohnlev 
429843e1988Sjohnlev 	return (-1);
430843e1988Sjohnlev }
431843e1988Sjohnlev 
432843e1988Sjohnlev /*
433843e1988Sjohnlev  * XXPV - undo the start cpu op change; return to ignoring this value
434843e1988Sjohnlev  *	- also tweak error handling in main startup loop
435843e1988Sjohnlev  */
436843e1988Sjohnlev /*ARGSUSED*/
437843e1988Sjohnlev static int
xen_psm_cpu_start(processorid_t id,caddr_t arg)438843e1988Sjohnlev xen_psm_cpu_start(processorid_t id, caddr_t arg)
439843e1988Sjohnlev {
440843e1988Sjohnlev 	int ret;
441843e1988Sjohnlev 
442843e1988Sjohnlev 	ASSERT(id > 0);
443b9bc7f78Ssmaybe 	CPUSET_ATOMIC_ADD(xen_psm_cpus_online, id);
444843e1988Sjohnlev 	ec_bind_cpu_ipis(id);
445843e1988Sjohnlev 	(void) ec_bind_virq_to_irq(VIRQ_TIMER, id);
446843e1988Sjohnlev 	if ((ret = xen_vcpu_up(id)) == 0)
447843e1988Sjohnlev 		xen_psm_ncpus++;
448843e1988Sjohnlev 	else
449843e1988Sjohnlev 		ret = EINVAL;
450843e1988Sjohnlev 	return (ret);
451843e1988Sjohnlev }
452843e1988Sjohnlev 
453843e1988Sjohnlev /*
454843e1988Sjohnlev  * Allocate an irq for inter cpu signaling
455843e1988Sjohnlev  */
456843e1988Sjohnlev /*ARGSUSED*/
457843e1988Sjohnlev static int
xen_psm_get_ipivect(int ipl,int type)458843e1988Sjohnlev xen_psm_get_ipivect(int ipl, int type)
459843e1988Sjohnlev {
460843e1988Sjohnlev 	return (ec_bind_ipi_to_irq(ipl, 0));
461843e1988Sjohnlev }
462843e1988Sjohnlev 
463843e1988Sjohnlev /*ARGSUSED*/
464843e1988Sjohnlev static int
xen_psm_get_clockirq(int ipl)465843e1988Sjohnlev xen_psm_get_clockirq(int ipl)
466843e1988Sjohnlev {
467843e1988Sjohnlev 	if (xen_clock_irq != INVALID_IRQ)
468843e1988Sjohnlev 		return (xen_clock_irq);
469843e1988Sjohnlev 
470843e1988Sjohnlev 	xen_clock_irq = ec_bind_virq_to_irq(VIRQ_TIMER, 0);
471843e1988Sjohnlev 	return (xen_clock_irq);
472843e1988Sjohnlev }
473843e1988Sjohnlev 
474843e1988Sjohnlev /*ARGSUSED*/
475843e1988Sjohnlev static void
xen_psm_shutdown(int cmd,int fcn)476843e1988Sjohnlev xen_psm_shutdown(int cmd, int fcn)
477843e1988Sjohnlev {
478843e1988Sjohnlev 	XEN_PSM_VERBOSE_POWEROFF(("xen_psm_shutdown(%d,%d);\n", cmd, fcn));
479843e1988Sjohnlev 
480843e1988Sjohnlev 	switch (cmd) {
481843e1988Sjohnlev 	case A_SHUTDOWN:
482843e1988Sjohnlev 		switch (fcn) {
483843e1988Sjohnlev 		case AD_BOOT:
484843e1988Sjohnlev 		case AD_IBOOT:
485843e1988Sjohnlev 			(void) HYPERVISOR_shutdown(SHUTDOWN_reboot);
486843e1988Sjohnlev 			break;
487843e1988Sjohnlev 		case AD_POWEROFF:
488843e1988Sjohnlev 			/* fall through if domU or if poweroff fails */
489843e1988Sjohnlev 			if (DOMAIN_IS_INITDOMAIN(xen_info))
490843e1988Sjohnlev 				if (apic_enable_acpi)
491843e1988Sjohnlev 					(void) acpi_poweroff();
492843e1988Sjohnlev 			/* FALLTHRU */
493843e1988Sjohnlev 		case AD_HALT:
494843e1988Sjohnlev 		default:
495843e1988Sjohnlev 			(void) HYPERVISOR_shutdown(SHUTDOWN_poweroff);
496843e1988Sjohnlev 			break;
497843e1988Sjohnlev 		}
498843e1988Sjohnlev 		break;
499843e1988Sjohnlev 	case A_REBOOT:
500843e1988Sjohnlev 		(void) HYPERVISOR_shutdown(SHUTDOWN_reboot);
501843e1988Sjohnlev 		break;
502843e1988Sjohnlev 	default:
503843e1988Sjohnlev 		return;
504843e1988Sjohnlev 	}
505843e1988Sjohnlev }
506843e1988Sjohnlev 
507843e1988Sjohnlev 
508843e1988Sjohnlev static int
xen_psm_translate_irq(dev_info_t * dip,int irqno)509843e1988Sjohnlev xen_psm_translate_irq(dev_info_t *dip, int irqno)
510843e1988Sjohnlev {
511843e1988Sjohnlev 	if (dip == NULL) {
512843e1988Sjohnlev 		XEN_PSM_VERBOSE_IRQ((CE_CONT, "!xen_psm: irqno = %d"
513843e1988Sjohnlev 		    " dip = NULL\n", irqno));
514843e1988Sjohnlev 		return (irqno);
515843e1988Sjohnlev 	}
516843e1988Sjohnlev 	return (irqno);
517843e1988Sjohnlev }
518843e1988Sjohnlev 
519843e1988Sjohnlev /*
520843e1988Sjohnlev  * xen_psm_intr_enter() acks the event that triggered the interrupt and
521843e1988Sjohnlev  * returns the new priority level,
522843e1988Sjohnlev  */
523843e1988Sjohnlev /*ARGSUSED*/
524843e1988Sjohnlev static int
xen_psm_intr_enter(int ipl,int * vector)525843e1988Sjohnlev xen_psm_intr_enter(int ipl, int *vector)
526843e1988Sjohnlev {
527843e1988Sjohnlev 	int newipl;
528843e1988Sjohnlev 	uint_t intno;
529843e1988Sjohnlev 	cpu_t *cpu = CPU;
530843e1988Sjohnlev 
531843e1988Sjohnlev 	intno = (*vector);
532843e1988Sjohnlev 
533843e1988Sjohnlev 	ASSERT(intno < NR_IRQS);
534843e1988Sjohnlev 	ASSERT(cpu->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask != 0);
535843e1988Sjohnlev 
536349b53ddSStuart Maybee 	if (!ec_is_edge_pirq(intno))
537843e1988Sjohnlev 		ec_clear_irq(intno);
538843e1988Sjohnlev 
539843e1988Sjohnlev 	newipl = autovect[intno].avh_hi_pri;
540843e1988Sjohnlev 	if (newipl == 0) {
541843e1988Sjohnlev 		/*
542843e1988Sjohnlev 		 * (newipl == 0) means we have no service routines for this
543843e1988Sjohnlev 		 * vector.  We will treat this as a spurious interrupt.
544843e1988Sjohnlev 		 * We have cleared the pending bit already, clear the event
545843e1988Sjohnlev 		 * mask and return a spurious interrupt.  This case can happen
546843e1988Sjohnlev 		 * when an interrupt delivery is racing with the removal of
547843e1988Sjohnlev 		 * of the service routine for that interrupt.
548843e1988Sjohnlev 		 */
549843e1988Sjohnlev 		ec_unmask_irq(intno);
550843e1988Sjohnlev 		newipl = -1;	/* flag spurious interrupt */
551843e1988Sjohnlev 	} else if (newipl <= cpu->cpu_pri) {
552843e1988Sjohnlev 		/*
553843e1988Sjohnlev 		 * (newipl <= cpu->cpu_pri) means that we must be trying to
554843e1988Sjohnlev 		 * service a vector that was shared with a higher priority
555843e1988Sjohnlev 		 * isr.  The higher priority handler has been removed and
556843e1988Sjohnlev 		 * we need to service this int.  We can't return a lower
557843e1988Sjohnlev 		 * priority than current cpu priority.  Just synthesize a
558843e1988Sjohnlev 		 * priority to return that should be acceptable.
5590bc46f0dSStuart Maybee 		 * It should never happen that we synthesize a priority that
5600bc46f0dSStuart Maybee 		 * moves us from low-priority to high-priority that would make
5610bc46f0dSStuart Maybee 		 * a us incorrectly run on the high priority stack.
562843e1988Sjohnlev 		 */
563843e1988Sjohnlev 		newipl = cpu->cpu_pri + 1;	/* synthetic priority */
5640bc46f0dSStuart Maybee 		ASSERT(newipl != LOCK_LEVEL + 1);
565843e1988Sjohnlev 	}
566843e1988Sjohnlev 	return (newipl);
567843e1988Sjohnlev }
568843e1988Sjohnlev 
569843e1988Sjohnlev 
570843e1988Sjohnlev /*
571843e1988Sjohnlev  * xen_psm_intr_exit() restores the old interrupt
572843e1988Sjohnlev  * priority level after processing an interrupt.
573843e1988Sjohnlev  * It is called with interrupts disabled, and does not enable interrupts.
574843e1988Sjohnlev  */
575843e1988Sjohnlev /* ARGSUSED */
576843e1988Sjohnlev static void
xen_psm_intr_exit(int ipl,int vector)577843e1988Sjohnlev xen_psm_intr_exit(int ipl, int vector)
578843e1988Sjohnlev {
579843e1988Sjohnlev 	ec_try_unmask_irq(vector);
580843e1988Sjohnlev 	xen_psm_setspl(ipl);
581843e1988Sjohnlev }
582843e1988Sjohnlev 
583843e1988Sjohnlev intr_exit_fn_t
psm_intr_exit_fn(void)584843e1988Sjohnlev psm_intr_exit_fn(void)
585843e1988Sjohnlev {
586843e1988Sjohnlev 	return (xen_psm_intr_exit);
587843e1988Sjohnlev }
588843e1988Sjohnlev 
589843e1988Sjohnlev /*
590843e1988Sjohnlev  * Check if new ipl level allows delivery of previously unserviced events
591843e1988Sjohnlev  */
592843e1988Sjohnlev static void
xen_psm_setspl(int ipl)593843e1988Sjohnlev xen_psm_setspl(int ipl)
594843e1988Sjohnlev {
595843e1988Sjohnlev 	struct cpu *cpu = CPU;
596843e1988Sjohnlev 	volatile vcpu_info_t *vci = cpu->cpu_m.mcpu_vcpu_info;
597843e1988Sjohnlev 	uint16_t pending;
598843e1988Sjohnlev 
599843e1988Sjohnlev 	ASSERT(vci->evtchn_upcall_mask != 0);
600843e1988Sjohnlev 
601843e1988Sjohnlev 	/*
602843e1988Sjohnlev 	 * If new ipl level will enable any pending interrupts, setup so the
603843e1988Sjohnlev 	 * upcoming sti will cause us to get an upcall.
604843e1988Sjohnlev 	 */
605843e1988Sjohnlev 	pending = cpu->cpu_m.mcpu_intr_pending & ~((1 << (ipl + 1)) - 1);
606843e1988Sjohnlev 	if (pending) {
607843e1988Sjohnlev 		int i;
608843e1988Sjohnlev 		ulong_t pending_sels = 0;
609843e1988Sjohnlev 		volatile ulong_t *selp;
610843e1988Sjohnlev 		struct xen_evt_data *cpe = cpu->cpu_m.mcpu_evt_pend;
611843e1988Sjohnlev 
612843e1988Sjohnlev 		for (i = bsrw_insn(pending); i > ipl; i--)
613843e1988Sjohnlev 			pending_sels |= cpe->pending_sel[i];
614843e1988Sjohnlev 		ASSERT(pending_sels);
615843e1988Sjohnlev 		selp = (volatile ulong_t *)&vci->evtchn_pending_sel;
616843e1988Sjohnlev 		atomic_or_ulong(selp, pending_sels);
617843e1988Sjohnlev 		vci->evtchn_upcall_pending = 1;
618843e1988Sjohnlev 	}
619843e1988Sjohnlev }
620843e1988Sjohnlev 
621843e1988Sjohnlev /*
622843e1988Sjohnlev  * This function provides external interface to the nexus for all
623843e1988Sjohnlev  * functionality related to the new DDI interrupt framework.
624843e1988Sjohnlev  *
625843e1988Sjohnlev  * Input:
626843e1988Sjohnlev  * dip     - pointer to the dev_info structure of the requested device
627843e1988Sjohnlev  * hdlp    - pointer to the internal interrupt handle structure for the
628843e1988Sjohnlev  *	     requested interrupt
629843e1988Sjohnlev  * intr_op - opcode for this call
630843e1988Sjohnlev  * result  - pointer to the integer that will hold the result to be
631843e1988Sjohnlev  *	     passed back if return value is PSM_SUCCESS
632843e1988Sjohnlev  *
633843e1988Sjohnlev  * Output:
634843e1988Sjohnlev  * return value is either PSM_SUCCESS or PSM_FAILURE
635843e1988Sjohnlev  */
636843e1988Sjohnlev int
xen_intr_ops(dev_info_t * dip,ddi_intr_handle_impl_t * hdlp,psm_intr_op_t intr_op,int * result)637843e1988Sjohnlev xen_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp,
638843e1988Sjohnlev     psm_intr_op_t intr_op, int *result)
639843e1988Sjohnlev {
640843e1988Sjohnlev 	int		cap;
641843e1988Sjohnlev 	int		err;
642843e1988Sjohnlev 	int		new_priority;
643843e1988Sjohnlev 	apic_irq_t	*irqp;
644843e1988Sjohnlev 	struct intrspec *ispec;
645843e1988Sjohnlev 
646843e1988Sjohnlev 	DDI_INTR_IMPLDBG((CE_CONT, "xen_intr_ops: dip: %p hdlp: %p "
647843e1988Sjohnlev 	    "intr_op: %x\n", (void *)dip, (void *)hdlp, intr_op));
648843e1988Sjohnlev 
649843e1988Sjohnlev 	switch (intr_op) {
650843e1988Sjohnlev 	case PSM_INTR_OP_CHECK_MSI:
651349b53ddSStuart Maybee 		/*
652349b53ddSStuart Maybee 		 * Till PCI passthru is supported, only dom0 has MSI/MSIX
653349b53ddSStuart Maybee 		 */
654843e1988Sjohnlev 		if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
655843e1988Sjohnlev 			*result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI |
656843e1988Sjohnlev 			    DDI_INTR_TYPE_MSIX);
657843e1988Sjohnlev 			break;
658843e1988Sjohnlev 		}
659843e1988Sjohnlev 		/*
660843e1988Sjohnlev 		 * Check MSI/X is supported or not at APIC level and
661843e1988Sjohnlev 		 * masked off the MSI/X bits in hdlp->ih_type if not
662843e1988Sjohnlev 		 * supported before return.  If MSI/X is supported,
663843e1988Sjohnlev 		 * leave the ih_type unchanged and return.
664843e1988Sjohnlev 		 *
665843e1988Sjohnlev 		 * hdlp->ih_type passed in from the nexus has all the
666843e1988Sjohnlev 		 * interrupt types supported by the device.
667843e1988Sjohnlev 		 */
668843e1988Sjohnlev 		if (xen_support_msi == 0) {
669843e1988Sjohnlev 			/*
670843e1988Sjohnlev 			 * if xen_support_msi is not set, call
671843e1988Sjohnlev 			 * apic_check_msi_support() to check whether msi
672843e1988Sjohnlev 			 * is supported first
673843e1988Sjohnlev 			 */
674843e1988Sjohnlev 			if (apic_check_msi_support() == PSM_SUCCESS)
675843e1988Sjohnlev 				xen_support_msi = 1;
676843e1988Sjohnlev 			else
677843e1988Sjohnlev 				xen_support_msi = -1;
678843e1988Sjohnlev 		}
679843e1988Sjohnlev 		if (xen_support_msi == 1)
680843e1988Sjohnlev 			*result = hdlp->ih_type;
681843e1988Sjohnlev 		else
682843e1988Sjohnlev 			*result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI |
683843e1988Sjohnlev 			    DDI_INTR_TYPE_MSIX);
684843e1988Sjohnlev 		break;
685843e1988Sjohnlev 	case PSM_INTR_OP_ALLOC_VECTORS:
686349b53ddSStuart Maybee 		if (hdlp->ih_type == DDI_INTR_TYPE_MSI)
687349b53ddSStuart Maybee 			*result = apic_alloc_msi_vectors(dip, hdlp->ih_inum,
688349b53ddSStuart Maybee 			    hdlp->ih_scratch1, hdlp->ih_pri,
689349b53ddSStuart Maybee 			    (int)(uintptr_t)hdlp->ih_scratch2);
690349b53ddSStuart Maybee 		else
691349b53ddSStuart Maybee 			*result = apic_alloc_msix_vectors(dip, hdlp->ih_inum,
692349b53ddSStuart Maybee 			    hdlp->ih_scratch1, hdlp->ih_pri,
693843e1988Sjohnlev 			    (int)(uintptr_t)hdlp->ih_scratch2);
694843e1988Sjohnlev 		break;
695843e1988Sjohnlev 	case PSM_INTR_OP_FREE_VECTORS:
696843e1988Sjohnlev 		apic_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1,
697843e1988Sjohnlev 		    hdlp->ih_pri, hdlp->ih_type);
698843e1988Sjohnlev 		break;
699843e1988Sjohnlev 	case PSM_INTR_OP_NAVAIL_VECTORS:
700843e1988Sjohnlev 		/*
701843e1988Sjohnlev 		 * XXPV - maybe we should make this be:
702843e1988Sjohnlev 		 * min(APIC_VECTOR_PER_IPL, count of all avail vectors);
703843e1988Sjohnlev 		 */
704843e1988Sjohnlev 		if (DOMAIN_IS_INITDOMAIN(xen_info))
705843e1988Sjohnlev 			*result = APIC_VECTOR_PER_IPL;
706843e1988Sjohnlev 		else
707843e1988Sjohnlev 			*result = 1;
708843e1988Sjohnlev 		break;
709843e1988Sjohnlev 	case PSM_INTR_OP_XLATE_VECTOR:
710843e1988Sjohnlev 		ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp;
711843e1988Sjohnlev 		if (ispec->intrspec_vec >= PIRQ_BASE &&
712843e1988Sjohnlev 		    ispec->intrspec_vec < NR_PIRQS &&
713843e1988Sjohnlev 		    DOMAIN_IS_INITDOMAIN(xen_info)) {
714843e1988Sjohnlev 			*result = apic_introp_xlate(dip, ispec, hdlp->ih_type);
715843e1988Sjohnlev 		} else {
716843e1988Sjohnlev 			*result = ispec->intrspec_vec;
717843e1988Sjohnlev 		}
718843e1988Sjohnlev 		break;
719843e1988Sjohnlev 	case PSM_INTR_OP_GET_PENDING:
720843e1988Sjohnlev 		/* XXPV - is this enough for dom0 or do we need to ref ioapic */
721843e1988Sjohnlev 		*result = ec_pending_irq(hdlp->ih_vector);
722843e1988Sjohnlev 		break;
723843e1988Sjohnlev 	case PSM_INTR_OP_CLEAR_MASK:
724843e1988Sjohnlev 		/* XXPV - is this enough for dom0 or do we need to set ioapic */
725843e1988Sjohnlev 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
726843e1988Sjohnlev 			return (PSM_FAILURE);
727843e1988Sjohnlev 		ec_enable_irq(hdlp->ih_vector);
728843e1988Sjohnlev 		break;
729843e1988Sjohnlev 	case PSM_INTR_OP_SET_MASK:
730843e1988Sjohnlev 		/* XXPV - is this enough for dom0 or do we need to set ioapic */
731843e1988Sjohnlev 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
732843e1988Sjohnlev 			return (PSM_FAILURE);
733843e1988Sjohnlev 		ec_disable_irq(hdlp->ih_vector);
734843e1988Sjohnlev 		break;
735843e1988Sjohnlev 	case PSM_INTR_OP_GET_CAP:
736843e1988Sjohnlev 		cap = DDI_INTR_FLAG_PENDING | DDI_INTR_FLAG_EDGE;
737843e1988Sjohnlev 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED)
738843e1988Sjohnlev 			cap |= DDI_INTR_FLAG_MASKABLE;
739843e1988Sjohnlev 		*result = cap;
740843e1988Sjohnlev 		break;
741843e1988Sjohnlev 	case PSM_INTR_OP_GET_SHARED:
742843e1988Sjohnlev 		if (DOMAIN_IS_INITDOMAIN(xen_info)) {
743843e1988Sjohnlev 			if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
744843e1988Sjohnlev 				return (PSM_FAILURE);
74596f82fefSSophia Li 			ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp;
746843e1988Sjohnlev 			if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type))
747843e1988Sjohnlev 			    == NULL)
748843e1988Sjohnlev 				return (PSM_FAILURE);
74996f82fefSSophia Li 			*result = (irqp->airq_share > 1) ? 1: 0;
750843e1988Sjohnlev 		} else {
751843e1988Sjohnlev 			return (PSM_FAILURE);
752843e1988Sjohnlev 		}
753843e1988Sjohnlev 		break;
754843e1988Sjohnlev 	case PSM_INTR_OP_SET_PRI:
755843e1988Sjohnlev 		new_priority = *(int *)result;
756843e1988Sjohnlev 		err = ec_set_irq_priority(hdlp->ih_vector, new_priority);
757843e1988Sjohnlev 		if (err != 0)
758843e1988Sjohnlev 			return (PSM_FAILURE);
759843e1988Sjohnlev 		break;
760843e1988Sjohnlev 	case PSM_INTR_OP_GET_INTR:
761843e1988Sjohnlev 		if (!DOMAIN_IS_INITDOMAIN(xen_info))
762843e1988Sjohnlev 			return (PSM_FAILURE);
763843e1988Sjohnlev 		/*
764843e1988Sjohnlev 		 * The interrupt handle given here has been allocated
765843e1988Sjohnlev 		 * specifically for this command, and ih_private carries
766843e1988Sjohnlev 		 * a pointer to a apic_get_intr_t.
767843e1988Sjohnlev 		 */
768843e1988Sjohnlev 		if (apic_get_vector_intr_info(
769843e1988Sjohnlev 		    hdlp->ih_vector, hdlp->ih_private) != PSM_SUCCESS)
770843e1988Sjohnlev 			return (PSM_FAILURE);
771843e1988Sjohnlev 		break;
772843e1988Sjohnlev 	case PSM_INTR_OP_SET_CAP:
773843e1988Sjohnlev 		/* FALLTHRU */
774843e1988Sjohnlev 	default:
775843e1988Sjohnlev 		return (PSM_FAILURE);
776843e1988Sjohnlev 	}
777843e1988Sjohnlev 	return (PSM_SUCCESS);
778843e1988Sjohnlev }
779843e1988Sjohnlev 
780843e1988Sjohnlev static void
xen_psm_rebind_irq(int irq)781843e1988Sjohnlev xen_psm_rebind_irq(int irq)
782843e1988Sjohnlev {
783843e1988Sjohnlev 	cpuset_t ncpu;
784843e1988Sjohnlev 	processorid_t newcpu;
785b9bc7f78Ssmaybe 	apic_irq_t *irqptr;
786843e1988Sjohnlev 
787843e1988Sjohnlev 	newcpu = xen_psm_bind_intr(irq);
788843e1988Sjohnlev 	if (newcpu == IRQ_UNBOUND) {
789843e1988Sjohnlev 		CPUSET_ZERO(ncpu);
790843e1988Sjohnlev 		CPUSET_OR(ncpu, xen_psm_cpus_online);
791843e1988Sjohnlev 	} else {
792843e1988Sjohnlev 		CPUSET_ONLY(ncpu, newcpu & ~IRQ_USER_BOUND);
793843e1988Sjohnlev 	}
794843e1988Sjohnlev 	ec_set_irq_affinity(irq, ncpu);
795a43153bfSsmaybe 	if (irq <= APIC_MAX_VECTOR) {
796b9bc7f78Ssmaybe 		irqptr = apic_irq_table[irq];
797b9bc7f78Ssmaybe 		ASSERT(irqptr != NULL);
798b9bc7f78Ssmaybe 		irqptr->airq_temp_cpu = (uchar_t)newcpu;
799843e1988Sjohnlev 	}
800a43153bfSsmaybe }
801843e1988Sjohnlev 
802843e1988Sjohnlev /*
803843e1988Sjohnlev  * Disable all device interrupts for the given cpu.
804843e1988Sjohnlev  * High priority interrupts are not disabled and will still be serviced.
805843e1988Sjohnlev  */
806843e1988Sjohnlev static int
xen_psm_disable_intr(processorid_t cpun)807843e1988Sjohnlev xen_psm_disable_intr(processorid_t cpun)
808843e1988Sjohnlev {
809843e1988Sjohnlev 	int irq;
810843e1988Sjohnlev 
811843e1988Sjohnlev 	/*
812843e1988Sjohnlev 	 * Can't offline VCPU 0 on this hypervisor.  There's no reason
813843e1988Sjohnlev 	 * anyone would want to given that the CPUs are virtual. Also note
814843e1988Sjohnlev 	 * that the hypervisor requires suspend/resume to be on VCPU 0.
815843e1988Sjohnlev 	 */
816843e1988Sjohnlev 	if (cpun == 0)
817843e1988Sjohnlev 		return (PSM_FAILURE);
818843e1988Sjohnlev 
819b9bc7f78Ssmaybe 	CPUSET_ATOMIC_DEL(xen_psm_cpus_online, cpun);
820843e1988Sjohnlev 	for (irq = 0; irq < NR_IRQS; irq++) {
821843e1988Sjohnlev 		if (!ec_irq_needs_rebind(irq, cpun))
822843e1988Sjohnlev 			continue;
823843e1988Sjohnlev 		xen_psm_rebind_irq(irq);
824843e1988Sjohnlev 	}
825843e1988Sjohnlev 	return (PSM_SUCCESS);
826843e1988Sjohnlev }
827843e1988Sjohnlev 
828843e1988Sjohnlev static void
xen_psm_enable_intr(processorid_t cpun)829843e1988Sjohnlev xen_psm_enable_intr(processorid_t cpun)
830843e1988Sjohnlev {
831843e1988Sjohnlev 	int irq;
832843e1988Sjohnlev 
833843e1988Sjohnlev 	if (cpun == 0)
834843e1988Sjohnlev 		return;
835843e1988Sjohnlev 
836b9bc7f78Ssmaybe 	CPUSET_ATOMIC_ADD(xen_psm_cpus_online, cpun);
837843e1988Sjohnlev 
838843e1988Sjohnlev 	/*
839843e1988Sjohnlev 	 * Rebalance device interrupts among online processors
840843e1988Sjohnlev 	 */
841843e1988Sjohnlev 	for (irq = 0; irq < NR_IRQS; irq++) {
842843e1988Sjohnlev 		if (!ec_irq_rebindable(irq))
843843e1988Sjohnlev 			continue;
844843e1988Sjohnlev 		xen_psm_rebind_irq(irq);
845843e1988Sjohnlev 	}
846afbc4541Ssherrym 
847afbc4541Ssherrym 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
848afbc4541Ssherrym 		apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE;
849afbc4541Ssherrym 	}
850843e1988Sjohnlev }
851843e1988Sjohnlev 
852b9bc7f78Ssmaybe static int
xen_psm_post_cpu_start()853b9bc7f78Ssmaybe xen_psm_post_cpu_start()
854b9bc7f78Ssmaybe {
855b9bc7f78Ssmaybe 	processorid_t cpun;
856b9bc7f78Ssmaybe 
857b9bc7f78Ssmaybe 	cpun = psm_get_cpu_id();
858410c4fb9Ssmaybe 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
859643e2e74Sbholler 		/*
860643e2e74Sbholler 		 * Non-virtualized environments can call psm_post_cpu_start
861643e2e74Sbholler 		 * from Suspend/Resume with the APIC_CPU_INTR_ENABLE bit set.
862643e2e74Sbholler 		 * xen_psm_post_cpu_start() is only called from boot.
863643e2e74Sbholler 		 */
864643e2e74Sbholler 		apic_cpus[cpun].aci_status |= APIC_CPU_ONLINE;
865b9bc7f78Ssmaybe 	}
866b9bc7f78Ssmaybe 	return (PSM_SUCCESS);
867b9bc7f78Ssmaybe }
868b9bc7f78Ssmaybe 
869843e1988Sjohnlev /*
870843e1988Sjohnlev  * This function will reprogram the timer.
871843e1988Sjohnlev  *
872843e1988Sjohnlev  * When in oneshot mode the argument is the absolute time in future at which to
873843e1988Sjohnlev  * generate the interrupt.
874843e1988Sjohnlev  *
875843e1988Sjohnlev  * When in periodic mode, the argument is the interval at which the
876843e1988Sjohnlev  * interrupts should be generated. There is no need to support the periodic
877843e1988Sjohnlev  * mode timer change at this time.
878843e1988Sjohnlev  *
879843e1988Sjohnlev  * Note that we must be careful to convert from hrtime to Xen system time (see
880843e1988Sjohnlev  * xpv_timestamp.c).
881843e1988Sjohnlev  */
882843e1988Sjohnlev static void
xen_psm_timer_reprogram(hrtime_t timer_req)883843e1988Sjohnlev xen_psm_timer_reprogram(hrtime_t timer_req)
884843e1988Sjohnlev {
885843e1988Sjohnlev 	hrtime_t now, timer_new, time_delta, xen_time;
886843e1988Sjohnlev 	ulong_t flags;
887843e1988Sjohnlev 
888843e1988Sjohnlev 	flags = intr_clear();
889843e1988Sjohnlev 	/*
890843e1988Sjohnlev 	 * We should be called from high PIL context (CBE_HIGH_PIL),
891843e1988Sjohnlev 	 * so kpreempt is disabled.
892843e1988Sjohnlev 	 */
893843e1988Sjohnlev 
894843e1988Sjohnlev 	now = xpv_gethrtime();
895843e1988Sjohnlev 	xen_time = xpv_getsystime();
896843e1988Sjohnlev 	if (timer_req <= now) {
897843e1988Sjohnlev 		/*
898843e1988Sjohnlev 		 * requested to generate an interrupt in the past
899843e1988Sjohnlev 		 * generate an interrupt as soon as possible
900843e1988Sjohnlev 		 */
901843e1988Sjohnlev 		time_delta = XEN_NSEC_PER_TICK;
902843e1988Sjohnlev 	} else
903843e1988Sjohnlev 		time_delta = timer_req - now;
904843e1988Sjohnlev 
905843e1988Sjohnlev 	timer_new = xen_time + time_delta;
906843e1988Sjohnlev 	if (HYPERVISOR_set_timer_op(timer_new) != 0)
907843e1988Sjohnlev 		panic("can't set hypervisor timer?");
908843e1988Sjohnlev 	intr_restore(flags);
909843e1988Sjohnlev }
910843e1988Sjohnlev 
911843e1988Sjohnlev /*
912843e1988Sjohnlev  * This function will enable timer interrupts.
913843e1988Sjohnlev  */
914843e1988Sjohnlev static void
xen_psm_timer_enable(void)915843e1988Sjohnlev xen_psm_timer_enable(void)
916843e1988Sjohnlev {
917843e1988Sjohnlev 	ec_unmask_irq(xen_clock_irq);
918843e1988Sjohnlev }
919843e1988Sjohnlev 
920843e1988Sjohnlev /*
921843e1988Sjohnlev  * This function will disable timer interrupts on the current cpu.
922843e1988Sjohnlev  */
923843e1988Sjohnlev static void
xen_psm_timer_disable(void)924843e1988Sjohnlev xen_psm_timer_disable(void)
925843e1988Sjohnlev {
926843e1988Sjohnlev 	(void) ec_block_irq(xen_clock_irq);
927843e1988Sjohnlev 	/*
928843e1988Sjohnlev 	 * If the clock irq is pending on this cpu then we need to
929843e1988Sjohnlev 	 * clear the pending interrupt.
930843e1988Sjohnlev 	 */
931843e1988Sjohnlev 	ec_unpend_irq(xen_clock_irq);
932843e1988Sjohnlev }
933843e1988Sjohnlev 
934843e1988Sjohnlev /*
935843e1988Sjohnlev  *
936843e1988Sjohnlev  * The following functions are in the platform specific file so that they
937843e1988Sjohnlev  * can be different functions depending on whether we are running on
938843e1988Sjohnlev  * bare metal or a hypervisor.
939843e1988Sjohnlev  */
940843e1988Sjohnlev 
941843e1988Sjohnlev /*
942843e1988Sjohnlev  * Allocate a free vector for irq at ipl.
943843e1988Sjohnlev  */
944843e1988Sjohnlev /* ARGSUSED */
945843e1988Sjohnlev uchar_t
apic_allocate_vector(int ipl,int irq,int pri)946843e1988Sjohnlev apic_allocate_vector(int ipl, int irq, int pri)
947843e1988Sjohnlev {
948843e1988Sjohnlev 	physdev_irq_t irq_op;
949843e1988Sjohnlev 	uchar_t vector;
950349b53ddSStuart Maybee 	int rc;
951843e1988Sjohnlev 
952843e1988Sjohnlev 	irq_op.irq = irq;
953843e1988Sjohnlev 
954349b53ddSStuart Maybee 	if ((rc = HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
955349b53ddSStuart Maybee 	    != 0)
956349b53ddSStuart Maybee 		panic("Hypervisor alloc vector failed err: %d", -rc);
957843e1988Sjohnlev 	vector = irq_op.vector;
958843e1988Sjohnlev 	/*
959843e1988Sjohnlev 	 * No need to worry about vector colliding with our reserved vectors
960843e1988Sjohnlev 	 * e.g. T_FASTTRAP, xen can differentiate between hardware and software
961843e1988Sjohnlev 	 * generated traps and handle them properly.
962843e1988Sjohnlev 	 */
963843e1988Sjohnlev 	apic_vector_to_irq[vector] = (uchar_t)irq;
964843e1988Sjohnlev 	return (vector);
965843e1988Sjohnlev }
966843e1988Sjohnlev 
967843e1988Sjohnlev /* Mark vector as not being used by any irq */
968843e1988Sjohnlev void
apic_free_vector(uchar_t vector)969843e1988Sjohnlev apic_free_vector(uchar_t vector)
970843e1988Sjohnlev {
971843e1988Sjohnlev 	apic_vector_to_irq[vector] = APIC_RESV_IRQ;
972843e1988Sjohnlev }
973843e1988Sjohnlev 
974843e1988Sjohnlev /*
975349b53ddSStuart Maybee  * This function returns the no. of vectors available for the pri.
976349b53ddSStuart Maybee  * dip is not used at this moment.  If we really don't need that,
977349b53ddSStuart Maybee  * it will be removed.  Since priority is not limited by hardware
978349b53ddSStuart Maybee  * when running on the hypervisor we simply return the maximum no.
979349b53ddSStuart Maybee  * of available contiguous vectors.
980349b53ddSStuart Maybee  */
981349b53ddSStuart Maybee /*ARGSUSED*/
982349b53ddSStuart Maybee int
apic_navail_vector(dev_info_t * dip,int pri)983349b53ddSStuart Maybee apic_navail_vector(dev_info_t *dip, int pri)
984349b53ddSStuart Maybee {
985349b53ddSStuart Maybee 	int	lowest, highest, i, navail, count;
986349b53ddSStuart Maybee 
987349b53ddSStuart Maybee 	DDI_INTR_IMPLDBG((CE_CONT, "apic_navail_vector: dip: %p, pri: %x\n",
988349b53ddSStuart Maybee 	    (void *)dip, pri));
989349b53ddSStuart Maybee 
990349b53ddSStuart Maybee 	highest = APIC_MAX_VECTOR;
991349b53ddSStuart Maybee 	lowest = APIC_BASE_VECT;
992349b53ddSStuart Maybee 	navail = count = 0;
993349b53ddSStuart Maybee 
994349b53ddSStuart Maybee 	/* It has to be contiguous */
995349b53ddSStuart Maybee 	for (i = lowest; i < highest; i++) {
996349b53ddSStuart Maybee 		count = 0;
997349b53ddSStuart Maybee 		while ((apic_vector_to_irq[i] == APIC_RESV_IRQ) &&
998349b53ddSStuart Maybee 		    (i < highest)) {
999349b53ddSStuart Maybee 			count++;
1000349b53ddSStuart Maybee 			i++;
1001349b53ddSStuart Maybee 		}
1002349b53ddSStuart Maybee 		if (count > navail)
1003349b53ddSStuart Maybee 			navail = count;
1004349b53ddSStuart Maybee 	}
1005349b53ddSStuart Maybee 	return (navail);
1006349b53ddSStuart Maybee }
1007349b53ddSStuart Maybee 
1008349b53ddSStuart Maybee static physdev_manage_pci_t *managed_devlist;
1009349b53ddSStuart Maybee static int mdev_cnt;
1010349b53ddSStuart Maybee static int mdev_size = 128;
1011349b53ddSStuart Maybee static uchar_t	msi_vector_to_pirq[APIC_MAX_VECTOR+1];
1012349b53ddSStuart Maybee 
1013349b53ddSStuart Maybee /*
1014349b53ddSStuart Maybee  * Add devfn on given bus to devices managed by hypervisor
1015843e1988Sjohnlev  */
1016843e1988Sjohnlev static int
xen_manage_device(uint8_t bus,uint8_t devfn)1017349b53ddSStuart Maybee xen_manage_device(uint8_t bus, uint8_t devfn)
1018349b53ddSStuart Maybee {
1019349b53ddSStuart Maybee 	physdev_manage_pci_t manage_pci, *newlist;
1020349b53ddSStuart Maybee 	int rc, i, oldsize;
1021349b53ddSStuart Maybee 
1022349b53ddSStuart Maybee 	/*
1023349b53ddSStuart Maybee 	 * Check if bus/devfn already managed.  If so just return success.
1024349b53ddSStuart Maybee 	 */
1025349b53ddSStuart Maybee 	if (managed_devlist == NULL) {
1026349b53ddSStuart Maybee 		managed_devlist = kmem_alloc(sizeof (physdev_manage_pci_t) *
1027349b53ddSStuart Maybee 		    mdev_size, KM_NOSLEEP);
1028349b53ddSStuart Maybee 		if (managed_devlist == NULL) {
1029349b53ddSStuart Maybee 			cmn_err(CE_WARN,
1030349b53ddSStuart Maybee 			    "Can't alloc space for managed device list");
1031349b53ddSStuart Maybee 			return (0);
1032349b53ddSStuart Maybee 		}
1033349b53ddSStuart Maybee 	};
1034349b53ddSStuart Maybee 	for (i = 0; i < mdev_cnt; i++) {
1035349b53ddSStuart Maybee 		if (managed_devlist[i].bus == bus &&
1036349b53ddSStuart Maybee 		    managed_devlist[i].devfn == devfn)
1037349b53ddSStuart Maybee 			return (1); /* device already managed */
1038349b53ddSStuart Maybee 	}
1039349b53ddSStuart Maybee 	manage_pci.bus = bus;
1040349b53ddSStuart Maybee 	manage_pci.devfn = devfn;
1041349b53ddSStuart Maybee 	rc = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add, &manage_pci);
1042349b53ddSStuart Maybee 	if (rc < 0) {
1043349b53ddSStuart Maybee 		cmn_err(CE_WARN,
1044349b53ddSStuart Maybee 		    "hypervisor add pci device call failed bus:0x%x"
1045349b53ddSStuart Maybee 		    " devfn:0x%x", bus, devfn);
1046349b53ddSStuart Maybee 		return (0);
1047349b53ddSStuart Maybee 	}
1048349b53ddSStuart Maybee 	/*
1049349b53ddSStuart Maybee 	 * Add device to the managed device list
1050349b53ddSStuart Maybee 	 */
1051349b53ddSStuart Maybee 	if (i == mdev_size) {
1052349b53ddSStuart Maybee 		/*
1053349b53ddSStuart Maybee 		 * grow the managed device list
1054349b53ddSStuart Maybee 		 */
1055349b53ddSStuart Maybee 		oldsize = mdev_size * sizeof (physdev_manage_pci_t);
1056349b53ddSStuart Maybee 		mdev_size *= 2;
1057349b53ddSStuart Maybee 		newlist = kmem_alloc(sizeof (physdev_manage_pci_t) * mdev_size,
1058349b53ddSStuart Maybee 		    KM_NOSLEEP);
1059349b53ddSStuart Maybee 		if (newlist == NULL) {
1060349b53ddSStuart Maybee 			cmn_err(CE_WARN, "Can't grow managed device list");
1061349b53ddSStuart Maybee 			return (0);
1062349b53ddSStuart Maybee 		}
1063349b53ddSStuart Maybee 		bcopy(managed_devlist, newlist, oldsize);
1064349b53ddSStuart Maybee 		kmem_free(managed_devlist, oldsize);
1065349b53ddSStuart Maybee 		managed_devlist = newlist;
1066349b53ddSStuart Maybee 	}
1067349b53ddSStuart Maybee 	managed_devlist[i].bus = bus;
1068349b53ddSStuart Maybee 	managed_devlist[i].devfn = devfn;
1069349b53ddSStuart Maybee 	mdev_cnt++;
1070349b53ddSStuart Maybee 	return (1);
1071349b53ddSStuart Maybee }
1072349b53ddSStuart Maybee 
1073349b53ddSStuart Maybee /*
1074349b53ddSStuart Maybee  * allocate an apic irq struct for an MSI interrupt
1075349b53ddSStuart Maybee  */
1076349b53ddSStuart Maybee static int
msi_allocate_irq(int irq)1077349b53ddSStuart Maybee msi_allocate_irq(int irq)
1078349b53ddSStuart Maybee {
1079349b53ddSStuart Maybee 	apic_irq_t *irqptr = apic_irq_table[irq];
1080349b53ddSStuart Maybee 
1081349b53ddSStuart Maybee 	if (irqptr == NULL) {
1082349b53ddSStuart Maybee 		irqptr = kmem_zalloc(sizeof (apic_irq_t), KM_NOSLEEP);
1083349b53ddSStuart Maybee 		if (irqptr == NULL) {
1084349b53ddSStuart Maybee 			cmn_err(CE_WARN, "xpv_psm: NO memory to allocate IRQ");
1085349b53ddSStuart Maybee 			return (-1);
1086349b53ddSStuart Maybee 		}
1087349b53ddSStuart Maybee 		apic_irq_table[irq] = irqptr;
1088349b53ddSStuart Maybee 	} else {
1089349b53ddSStuart Maybee 		if (irq == APIC_RESV_IRQ && irqptr->airq_mps_intr_index == 0)
1090349b53ddSStuart Maybee 			irqptr->airq_mps_intr_index = FREE_INDEX;
1091349b53ddSStuart Maybee 		if (irqptr->airq_mps_intr_index != FREE_INDEX) {
1092349b53ddSStuart Maybee 			cmn_err(CE_WARN, "xpv_psm: MSI IRQ already in use");
1093349b53ddSStuart Maybee 			return (-1);
1094349b53ddSStuart Maybee 		}
1095349b53ddSStuart Maybee 	}
1096349b53ddSStuart Maybee 	irqptr->airq_mps_intr_index = FREE_INDEX;
1097349b53ddSStuart Maybee 	return (irq);
1098349b53ddSStuart Maybee }
1099349b53ddSStuart Maybee 
1100349b53ddSStuart Maybee /*
1101349b53ddSStuart Maybee  * read MSI/MSIX vector out of config space
1102349b53ddSStuart Maybee  */
1103349b53ddSStuart Maybee static uchar_t
xpv_psm_get_msi_vector(dev_info_t * dip,int type,int entry)1104349b53ddSStuart Maybee xpv_psm_get_msi_vector(dev_info_t *dip, int type, int entry)
1105349b53ddSStuart Maybee {
1106349b53ddSStuart Maybee 	uint64_t		msi_data = 0;
1107349b53ddSStuart Maybee 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
1108349b53ddSStuart Maybee 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(dip);
1109349b53ddSStuart Maybee 	ushort_t		msi_ctrl;
1110349b53ddSStuart Maybee 	uchar_t			vector;
1111349b53ddSStuart Maybee 
1112349b53ddSStuart Maybee 	ASSERT((handle != NULL) && (cap_ptr != 0));
1113349b53ddSStuart Maybee 	if (type == DDI_INTR_TYPE_MSI) {
1114349b53ddSStuart Maybee 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1115349b53ddSStuart Maybee 		/*
1116349b53ddSStuart Maybee 		 * Get vector
1117349b53ddSStuart Maybee 		 */
1118349b53ddSStuart Maybee 		if (msi_ctrl &  PCI_MSI_64BIT_MASK) {
1119349b53ddSStuart Maybee 			msi_data = pci_config_get16(handle,
1120349b53ddSStuart Maybee 			    cap_ptr + PCI_MSI_64BIT_DATA);
1121349b53ddSStuart Maybee 		} else {
1122349b53ddSStuart Maybee 			msi_data = pci_config_get16(handle,
1123349b53ddSStuart Maybee 			    cap_ptr + PCI_MSI_32BIT_DATA);
1124349b53ddSStuart Maybee 		}
1125c5d6fa0cSFrank Van Der Linden 		vector = (msi_data & 0xff) + entry;
1126349b53ddSStuart Maybee 	} else if (type == DDI_INTR_TYPE_MSIX) {
1127349b53ddSStuart Maybee 		uintptr_t	off;
1128349b53ddSStuart Maybee 		ddi_intr_msix_t	*msix_p = i_ddi_get_msix(dip);
1129349b53ddSStuart Maybee 
1130349b53ddSStuart Maybee 		/* Offset into the given entry in the MSI-X table */
1131349b53ddSStuart Maybee 		off = (uintptr_t)msix_p->msix_tbl_addr +
1132349b53ddSStuart Maybee 		    (entry  * PCI_MSIX_VECTOR_SIZE);
1133349b53ddSStuart Maybee 
1134349b53ddSStuart Maybee 		msi_data = ddi_get32(msix_p->msix_tbl_hdl,
1135349b53ddSStuart Maybee 		    (uint32_t *)(off + PCI_MSIX_DATA_OFFSET));
1136349b53ddSStuart Maybee 		vector = msi_data & 0xff;
1137c5d6fa0cSFrank Van Der Linden 	}
1138349b53ddSStuart Maybee 	return (vector);
1139349b53ddSStuart Maybee }
1140349b53ddSStuart Maybee 
1141349b53ddSStuart Maybee 
1142349b53ddSStuart Maybee static void
get_busdevfn(dev_info_t * dip,int * busp,int * devfnp)1143349b53ddSStuart Maybee get_busdevfn(dev_info_t *dip, int *busp, int *devfnp)
1144349b53ddSStuart Maybee {
1145349b53ddSStuart Maybee 	pci_regspec_t *regspec;
1146349b53ddSStuart Maybee 	int reglen;
1147349b53ddSStuart Maybee 
1148349b53ddSStuart Maybee 	/*
1149349b53ddSStuart Maybee 	 * Get device reg spec, first word has PCI bus and
1150349b53ddSStuart Maybee 	 * device/function info we need.
1151349b53ddSStuart Maybee 	 */
1152349b53ddSStuart Maybee 	if (ddi_getlongprop(DDI_DEV_T_NONE, dip, DDI_PROP_DONTPASS, "reg",
1153349b53ddSStuart Maybee 	    (caddr_t)&regspec, &reglen) != DDI_SUCCESS) {
1154349b53ddSStuart Maybee 		cmn_err(CE_WARN,
1155349b53ddSStuart Maybee 		    "get_busdevfn() failed to get regspec.");
1156349b53ddSStuart Maybee 		return;
1157349b53ddSStuart Maybee 	}
1158349b53ddSStuart Maybee 	/*
1159349b53ddSStuart Maybee 	 * get PCI bus # from reg spec for device
1160349b53ddSStuart Maybee 	 */
1161349b53ddSStuart Maybee 	*busp = PCI_REG_BUS_G(regspec[0].pci_phys_hi);
1162349b53ddSStuart Maybee 	/*
1163349b53ddSStuart Maybee 	 * get combined device/function from reg spec for device.
1164349b53ddSStuart Maybee 	 */
1165349b53ddSStuart Maybee 	*devfnp = (regspec[0].pci_phys_hi & (PCI_REG_FUNC_M | PCI_REG_DEV_M)) >>
1166349b53ddSStuart Maybee 	    PCI_REG_FUNC_SHIFT;
1167349b53ddSStuart Maybee 
1168349b53ddSStuart Maybee 	kmem_free(regspec, reglen);
1169349b53ddSStuart Maybee }
1170349b53ddSStuart Maybee 
1171349b53ddSStuart Maybee /*
1172349b53ddSStuart Maybee  * This function allocates "count" MSI vector(s) for the given "dip/pri/type"
1173349b53ddSStuart Maybee  */
1174349b53ddSStuart Maybee int
apic_alloc_msi_vectors(dev_info_t * dip,int inum,int count,int pri,int behavior)1175349b53ddSStuart Maybee apic_alloc_msi_vectors(dev_info_t *dip, int inum, int count, int pri,
1176843e1988Sjohnlev     int behavior)
1177843e1988Sjohnlev {
1178349b53ddSStuart Maybee 	int	rcount, i, rc, irqno;
1179843e1988Sjohnlev 	uchar_t	vector, cpu;
1180843e1988Sjohnlev 	major_t	major;
1181843e1988Sjohnlev 	apic_irq_t	*irqptr;
1182349b53ddSStuart Maybee 	physdev_map_pirq_t map_irq;
1183349b53ddSStuart Maybee 	int busnum, devfn;
1184843e1988Sjohnlev 
1185349b53ddSStuart Maybee 	DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: dip=0x%p "
1186843e1988Sjohnlev 	    "inum=0x%x  pri=0x%x count=0x%x behavior=%d\n",
1187349b53ddSStuart Maybee 	    (void *)dip, inum, pri, count, behavior));
1188843e1988Sjohnlev 
1189843e1988Sjohnlev 	if (count > 1) {
1190843e1988Sjohnlev 		if (behavior == DDI_INTR_ALLOC_STRICT &&
119163ea9ad2SEvan Yan 		    apic_multi_msi_enable == 0)
1192843e1988Sjohnlev 			return (0);
1193843e1988Sjohnlev 		if (apic_multi_msi_enable == 0)
1194843e1988Sjohnlev 			count = 1;
1195843e1988Sjohnlev 	}
1196843e1988Sjohnlev 
1197349b53ddSStuart Maybee 	if ((rcount = apic_navail_vector(dip, pri)) > count)
1198843e1988Sjohnlev 		rcount = count;
1199349b53ddSStuart Maybee 	else if (rcount == 0 || (rcount < count &&
1200349b53ddSStuart Maybee 	    behavior == DDI_INTR_ALLOC_STRICT))
1201349b53ddSStuart Maybee 		return (0);
1202349b53ddSStuart Maybee 
1203349b53ddSStuart Maybee 	/* if not ISP2, then round it down */
1204349b53ddSStuart Maybee 	if (!ISP2(rcount))
1205349b53ddSStuart Maybee 		rcount = 1 << (highbit(rcount) - 1);
1206349b53ddSStuart Maybee 
1207349b53ddSStuart Maybee 	/*
1208349b53ddSStuart Maybee 	 * get PCI bus #  and devfn from reg spec for device
1209349b53ddSStuart Maybee 	 */
1210349b53ddSStuart Maybee 	get_busdevfn(dip, &busnum, &devfn);
1211349b53ddSStuart Maybee 
1212349b53ddSStuart Maybee 	/*
1213349b53ddSStuart Maybee 	 * Tell xen about this pci device
1214349b53ddSStuart Maybee 	 */
1215349b53ddSStuart Maybee 	if (!xen_manage_device(busnum, devfn))
1216349b53ddSStuart Maybee 		return (0);
1217843e1988Sjohnlev 
1218843e1988Sjohnlev 	mutex_enter(&airq_mutex);
1219843e1988Sjohnlev 
1220349b53ddSStuart Maybee 	major = (dip != NULL) ? ddi_name_to_major(ddi_get_name(dip)) : 0;
1221843e1988Sjohnlev 	for (i = 0; i < rcount; i++) {
1222349b53ddSStuart Maybee 		/*
1223349b53ddSStuart Maybee 		 * use PHYSDEVOP_map_pirq to have xen map MSI to a pirq
1224349b53ddSStuart Maybee 		 */
1225349b53ddSStuart Maybee 		map_irq.domid = DOMID_SELF;
1226349b53ddSStuart Maybee 		map_irq.type = MAP_PIRQ_TYPE_MSI;
1227c5d6fa0cSFrank Van Der Linden 		map_irq.index = -rcount; /* hypervisor auto allocates vectors */
1228349b53ddSStuart Maybee 		map_irq.pirq = -1;
1229349b53ddSStuart Maybee 		map_irq.bus = busnum;
1230349b53ddSStuart Maybee 		map_irq.devfn = devfn;
1231c5d6fa0cSFrank Van Der Linden 		map_irq.entry_nr = i;
1232349b53ddSStuart Maybee 		map_irq.table_base = 0;
1233349b53ddSStuart Maybee 		rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
1234349b53ddSStuart Maybee 		irqno = map_irq.pirq;
1235349b53ddSStuart Maybee 		if (rc < 0) {
1236843e1988Sjohnlev 			mutex_exit(&airq_mutex);
1237349b53ddSStuart Maybee 			cmn_err(CE_WARN, "map MSI irq failed err: %d", -rc);
1238c5d6fa0cSFrank Van Der Linden 			return (i);
1239843e1988Sjohnlev 		}
1240349b53ddSStuart Maybee 		if (irqno < 0) {
1241349b53ddSStuart Maybee 			mutex_exit(&airq_mutex);
1242349b53ddSStuart Maybee 			cmn_err(CE_NOTE,
1243349b53ddSStuart Maybee 			    "!hypervisor not configured for MSI support");
1244349b53ddSStuart Maybee 			xen_support_msi = -1;
1245349b53ddSStuart Maybee 			return (0);
1246349b53ddSStuart Maybee 		}
1247c5d6fa0cSFrank Van Der Linden 
1248349b53ddSStuart Maybee 		/*
1249349b53ddSStuart Maybee 		 * Find out what vector the hypervisor assigned
1250349b53ddSStuart Maybee 		 */
1251c5d6fa0cSFrank Van Der Linden 		vector = xpv_psm_get_msi_vector(dip, DDI_INTR_TYPE_MSI, i);
1252c5d6fa0cSFrank Van Der Linden 
1253c5d6fa0cSFrank Van Der Linden 		if (msi_allocate_irq(irqno) < 0) {
1254c5d6fa0cSFrank Van Der Linden 			mutex_exit(&airq_mutex);
1255c5d6fa0cSFrank Van Der Linden 			return (i);
1256c5d6fa0cSFrank Van Der Linden 		}
1257843e1988Sjohnlev 		apic_max_device_irq = max(irqno, apic_max_device_irq);
1258843e1988Sjohnlev 		apic_min_device_irq = min(irqno, apic_min_device_irq);
1259843e1988Sjohnlev 		irqptr = apic_irq_table[irqno];
1260349b53ddSStuart Maybee 		ASSERT(irqptr != NULL);
1261843e1988Sjohnlev #ifdef	DEBUG
1262843e1988Sjohnlev 		if (apic_vector_to_irq[vector] != APIC_RESV_IRQ)
1263349b53ddSStuart Maybee 			DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: "
1264843e1988Sjohnlev 			    "apic_vector_to_irq is not APIC_RESV_IRQ\n"));
1265843e1988Sjohnlev #endif
1266349b53ddSStuart Maybee 		apic_vector_to_irq[vector] = (uchar_t)irqno;
1267349b53ddSStuart Maybee 		msi_vector_to_pirq[vector] = (uchar_t)irqno;
1268843e1988Sjohnlev 
1269843e1988Sjohnlev 		irqptr->airq_vector = vector;
1270843e1988Sjohnlev 		irqptr->airq_ioapicindex = (uchar_t)inum;	/* start */
1271843e1988Sjohnlev 		irqptr->airq_intin_no = (uchar_t)rcount;
1272843e1988Sjohnlev 		irqptr->airq_ipl = pri;
1273843e1988Sjohnlev 		irqptr->airq_origirq = (uchar_t)(inum + i);
1274843e1988Sjohnlev 		irqptr->airq_share_id = 0;
1275843e1988Sjohnlev 		irqptr->airq_mps_intr_index = MSI_INDEX;
1276843e1988Sjohnlev 		irqptr->airq_dip = dip;
1277843e1988Sjohnlev 		irqptr->airq_major = major;
1278349b53ddSStuart Maybee 		if (i == 0) /* they all bind to the same cpu */
1279349b53ddSStuart Maybee 			cpu = irqptr->airq_cpu = xen_psm_bind_intr(irqno);
1280843e1988Sjohnlev 		else
1281843e1988Sjohnlev 			irqptr->airq_cpu = cpu;
1282349b53ddSStuart Maybee 		DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: irq=0x%x "
1283843e1988Sjohnlev 		    "dip=0x%p vector=0x%x origirq=0x%x pri=0x%x\n", irqno,
1284843e1988Sjohnlev 		    (void *)irqptr->airq_dip, irqptr->airq_vector,
1285843e1988Sjohnlev 		    irqptr->airq_origirq, pri));
1286843e1988Sjohnlev 	}
1287843e1988Sjohnlev 	mutex_exit(&airq_mutex);
1288843e1988Sjohnlev 	return (rcount);
1289843e1988Sjohnlev }
1290843e1988Sjohnlev 
1291843e1988Sjohnlev /*
1292349b53ddSStuart Maybee  * This function allocates "count" MSI-X vector(s) for the given "dip/pri/type"
1293349b53ddSStuart Maybee  */
1294349b53ddSStuart Maybee int
apic_alloc_msix_vectors(dev_info_t * dip,int inum,int count,int pri,int behavior)1295349b53ddSStuart Maybee apic_alloc_msix_vectors(dev_info_t *dip, int inum, int count, int pri,
1296349b53ddSStuart Maybee     int behavior)
1297349b53ddSStuart Maybee {
1298349b53ddSStuart Maybee 	int	rcount, i, rc;
1299349b53ddSStuart Maybee 	major_t	major;
1300349b53ddSStuart Maybee 	physdev_map_pirq_t map_irq;
1301349b53ddSStuart Maybee 	int busnum, devfn;
1302349b53ddSStuart Maybee 	ddi_intr_msix_t *msix_p = i_ddi_get_msix(dip);
1303349b53ddSStuart Maybee 	uint64_t table_base;
1304349b53ddSStuart Maybee 	pfn_t pfnum;
1305349b53ddSStuart Maybee 
1306349b53ddSStuart Maybee 	if (msix_p == NULL) {
1307349b53ddSStuart Maybee 		msix_p = pci_msix_init(dip);
1308349b53ddSStuart Maybee 		if (msix_p != NULL) {
1309349b53ddSStuart Maybee 			i_ddi_set_msix(dip, msix_p);
1310349b53ddSStuart Maybee 		} else {
1311349b53ddSStuart Maybee 			cmn_err(CE_WARN, "apic_alloc_msix_vectors()"
1312349b53ddSStuart Maybee 			    " msix_init failed");
1313349b53ddSStuart Maybee 			return (0);
1314349b53ddSStuart Maybee 		}
1315349b53ddSStuart Maybee 	}
1316349b53ddSStuart Maybee 	/*
131777979b9bSStuart Maybee 	 * Hypervisor wants PCI config space address of msix table base
1318349b53ddSStuart Maybee 	 */
1319349b53ddSStuart Maybee 	pfnum = hat_getpfnum(kas.a_hat, (caddr_t)msix_p->msix_tbl_addr) &
1320349b53ddSStuart Maybee 	    ~PFN_IS_FOREIGN_MFN;
132177979b9bSStuart Maybee 	table_base = (uint64_t)((pfnum << PAGESHIFT) - msix_p->msix_tbl_offset |
1322349b53ddSStuart Maybee 	    ((uintptr_t)msix_p->msix_tbl_addr & PAGEOFFSET));
1323349b53ddSStuart Maybee 	/*
1324349b53ddSStuart Maybee 	 * get PCI bus #  and devfn from reg spec for device
1325349b53ddSStuart Maybee 	 */
1326349b53ddSStuart Maybee 	get_busdevfn(dip, &busnum, &devfn);
1327349b53ddSStuart Maybee 
1328349b53ddSStuart Maybee 	/*
1329349b53ddSStuart Maybee 	 * Tell xen about this pci device
1330349b53ddSStuart Maybee 	 */
1331349b53ddSStuart Maybee 	if (!xen_manage_device(busnum, devfn))
1332349b53ddSStuart Maybee 		return (0);
1333349b53ddSStuart Maybee 	mutex_enter(&airq_mutex);
1334349b53ddSStuart Maybee 
1335349b53ddSStuart Maybee 	if ((rcount = apic_navail_vector(dip, pri)) > count)
1336349b53ddSStuart Maybee 		rcount = count;
1337349b53ddSStuart Maybee 	else if (rcount == 0 || (rcount < count &&
1338349b53ddSStuart Maybee 	    behavior == DDI_INTR_ALLOC_STRICT)) {
1339349b53ddSStuart Maybee 		rcount = 0;
1340349b53ddSStuart Maybee 		goto out;
1341349b53ddSStuart Maybee 	}
1342349b53ddSStuart Maybee 
1343349b53ddSStuart Maybee 	major = (dip != NULL) ? ddi_name_to_major(ddi_get_name(dip)) : 0;
1344349b53ddSStuart Maybee 	for (i = 0; i < rcount; i++) {
1345349b53ddSStuart Maybee 		int irqno;
1346349b53ddSStuart Maybee 		uchar_t	vector;
1347349b53ddSStuart Maybee 		apic_irq_t	*irqptr;
1348349b53ddSStuart Maybee 
1349349b53ddSStuart Maybee 		/*
1350349b53ddSStuart Maybee 		 * use PHYSDEVOP_map_pirq to have xen map MSI-X to a pirq
1351349b53ddSStuart Maybee 		 */
1352349b53ddSStuart Maybee 		map_irq.domid = DOMID_SELF;
1353349b53ddSStuart Maybee 		map_irq.type = MAP_PIRQ_TYPE_MSI;
1354349b53ddSStuart Maybee 		map_irq.index = -1; /* hypervisor auto allocates vector */
1355349b53ddSStuart Maybee 		map_irq.pirq = -1;
1356349b53ddSStuart Maybee 		map_irq.bus = busnum;
1357349b53ddSStuart Maybee 		map_irq.devfn = devfn;
1358349b53ddSStuart Maybee 		map_irq.entry_nr = i;
1359349b53ddSStuart Maybee 		map_irq.table_base = table_base;
1360349b53ddSStuart Maybee 		rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
1361349b53ddSStuart Maybee 		irqno = map_irq.pirq;
1362349b53ddSStuart Maybee 		if (rc < 0) {
1363349b53ddSStuart Maybee 			mutex_exit(&airq_mutex);
1364349b53ddSStuart Maybee 			cmn_err(CE_WARN, "map MSI irq failed err: %d", -rc);
1365c5d6fa0cSFrank Van Der Linden 			return (i);
1366349b53ddSStuart Maybee 		}
1367349b53ddSStuart Maybee 		if (irqno < 0) {
1368349b53ddSStuart Maybee 			mutex_exit(&airq_mutex);
1369349b53ddSStuart Maybee 			cmn_err(CE_NOTE,
1370349b53ddSStuart Maybee 			    "!hypervisor not configured for MSI support");
1371349b53ddSStuart Maybee 			xen_support_msi = -1;
1372349b53ddSStuart Maybee 			return (0);
1373349b53ddSStuart Maybee 		}
1374349b53ddSStuart Maybee 		/*
1375349b53ddSStuart Maybee 		 * Find out what vector the hypervisor assigned
1376349b53ddSStuart Maybee 		 */
1377349b53ddSStuart Maybee 		vector = xpv_psm_get_msi_vector(dip, DDI_INTR_TYPE_MSIX, i);
1378c5d6fa0cSFrank Van Der Linden 
1379349b53ddSStuart Maybee 		if (msi_allocate_irq(irqno) < 0) {
1380349b53ddSStuart Maybee 			mutex_exit(&airq_mutex);
1381c5d6fa0cSFrank Van Der Linden 			return (i);
1382349b53ddSStuart Maybee 		}
1383349b53ddSStuart Maybee 		apic_vector_to_irq[vector] = (uchar_t)irqno;
1384349b53ddSStuart Maybee 		msi_vector_to_pirq[vector] = (uchar_t)irqno;
1385349b53ddSStuart Maybee 		apic_max_device_irq = max(irqno, apic_max_device_irq);
1386349b53ddSStuart Maybee 		apic_min_device_irq = min(irqno, apic_min_device_irq);
1387349b53ddSStuart Maybee 		irqptr = apic_irq_table[irqno];
1388349b53ddSStuart Maybee 		ASSERT(irqptr != NULL);
1389349b53ddSStuart Maybee 		irqptr->airq_vector = (uchar_t)vector;
1390349b53ddSStuart Maybee 		irqptr->airq_ipl = pri;
1391349b53ddSStuart Maybee 		irqptr->airq_origirq = (uchar_t)(inum + i);
1392349b53ddSStuart Maybee 		irqptr->airq_share_id = 0;
1393349b53ddSStuart Maybee 		irqptr->airq_mps_intr_index = MSIX_INDEX;
1394349b53ddSStuart Maybee 		irqptr->airq_dip = dip;
1395349b53ddSStuart Maybee 		irqptr->airq_major = major;
1396349b53ddSStuart Maybee 		irqptr->airq_cpu = IRQ_UNBOUND; /* will be bound when addspl */
1397349b53ddSStuart Maybee 	}
1398349b53ddSStuart Maybee out:
1399349b53ddSStuart Maybee 	mutex_exit(&airq_mutex);
1400349b53ddSStuart Maybee 	return (rcount);
1401349b53ddSStuart Maybee }
1402349b53ddSStuart Maybee 
1403349b53ddSStuart Maybee 
1404349b53ddSStuart Maybee /*
1405349b53ddSStuart Maybee  * This finds the apic_irq_t associated with the dip, ispec and type.
1406349b53ddSStuart Maybee  * The entry should have already been freed, but it can not have been
1407349b53ddSStuart Maybee  * reused yet since the hypervisor can not have reassigned the pirq since
1408349b53ddSStuart Maybee  * we have not freed that yet.
1409349b53ddSStuart Maybee  */
1410349b53ddSStuart Maybee static apic_irq_t *
msi_find_irq(dev_info_t * dip,struct intrspec * ispec)1411349b53ddSStuart Maybee msi_find_irq(dev_info_t *dip, struct intrspec *ispec)
1412349b53ddSStuart Maybee {
1413349b53ddSStuart Maybee 	apic_irq_t	*irqp;
1414349b53ddSStuart Maybee 	int i;
1415349b53ddSStuart Maybee 
1416349b53ddSStuart Maybee 	for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) {
1417349b53ddSStuart Maybee 		if ((irqp = apic_irq_table[i]) == NULL)
1418349b53ddSStuart Maybee 			continue;
1419349b53ddSStuart Maybee 		if ((irqp->airq_dip == dip) &&
1420349b53ddSStuart Maybee 		    (irqp->airq_origirq == ispec->intrspec_vec) &&
1421349b53ddSStuart Maybee 		    (irqp->airq_ipl == ispec->intrspec_pri)) {
1422349b53ddSStuart Maybee 			return (irqp);
1423349b53ddSStuart Maybee 		}
1424349b53ddSStuart Maybee 	}
1425349b53ddSStuart Maybee 	return (NULL);
1426349b53ddSStuart Maybee }
1427349b53ddSStuart Maybee 
1428349b53ddSStuart Maybee void
apic_free_vectors(dev_info_t * dip,int inum,int count,int pri,int type)1429349b53ddSStuart Maybee apic_free_vectors(dev_info_t *dip, int inum, int count, int pri, int type)
1430349b53ddSStuart Maybee {
1431349b53ddSStuart Maybee 	int i, rc;
1432349b53ddSStuart Maybee 	physdev_unmap_pirq_t unmap_pirq;
1433349b53ddSStuart Maybee 	apic_irq_t *irqptr;
1434349b53ddSStuart Maybee 	struct intrspec ispec;
1435349b53ddSStuart Maybee 
1436349b53ddSStuart Maybee 	DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: dip: %p inum: %x "
1437349b53ddSStuart Maybee 	    "count: %x pri: %x type: %x\n",
1438349b53ddSStuart Maybee 	    (void *)dip, inum, count, pri, type));
1439349b53ddSStuart Maybee 
1440349b53ddSStuart Maybee 	/* for MSI/X only */
1441349b53ddSStuart Maybee 	if (!DDI_INTR_IS_MSI_OR_MSIX(type))
1442349b53ddSStuart Maybee 		return;
1443349b53ddSStuart Maybee 
1444349b53ddSStuart Maybee 	for (i = 0; i < count; i++) {
1445349b53ddSStuart Maybee 		DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: inum=0x%x "
1446349b53ddSStuart Maybee 		    "pri=0x%x count=0x%x\n", inum, pri, count));
1447349b53ddSStuart Maybee 		ispec.intrspec_vec = inum + i;
1448349b53ddSStuart Maybee 		ispec.intrspec_pri = pri;
1449349b53ddSStuart Maybee 		if ((irqptr = msi_find_irq(dip, &ispec)) == NULL) {
1450349b53ddSStuart Maybee 			cmn_err(CE_WARN,
1451349b53ddSStuart Maybee 			    "couldn't find irq %s,%s dip: 0x%p vec: %x pri: %x",
1452349b53ddSStuart Maybee 			    ddi_get_name(dip), ddi_get_name_addr(dip),
1453349b53ddSStuart Maybee 			    (void *)dip, inum + i, pri);
1454349b53ddSStuart Maybee 			continue;
1455349b53ddSStuart Maybee 		}
1456349b53ddSStuart Maybee 		/*
1457349b53ddSStuart Maybee 		 * use PHYSDEVOP_unmap_pirq to have xen unmap MSI from a pirq
1458349b53ddSStuart Maybee 		 */
1459349b53ddSStuart Maybee 		unmap_pirq.domid = DOMID_SELF;
1460349b53ddSStuart Maybee 		unmap_pirq.pirq = msi_vector_to_pirq[irqptr->airq_vector];
1461349b53ddSStuart Maybee 		rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_pirq);
1462349b53ddSStuart Maybee 		if (rc < 0) {
1463349b53ddSStuart Maybee 			cmn_err(CE_WARN, "unmap pirq failed");
1464349b53ddSStuart Maybee 			return;
1465349b53ddSStuart Maybee 		}
1466349b53ddSStuart Maybee 		irqptr->airq_mps_intr_index = FREE_INDEX;
1467349b53ddSStuart Maybee 		apic_vector_to_irq[irqptr->airq_vector] = APIC_RESV_IRQ;
1468349b53ddSStuart Maybee 	}
1469349b53ddSStuart Maybee }
1470349b53ddSStuart Maybee 
1471349b53ddSStuart Maybee /*
1472843e1988Sjohnlev  * The hypervisor doesn't permit access to local apics directly
1473843e1988Sjohnlev  */
1474843e1988Sjohnlev /* ARGSUSED */
1475843e1988Sjohnlev uint32_t *
mapin_apic(uint32_t addr,size_t len,int flags)1476843e1988Sjohnlev mapin_apic(uint32_t addr, size_t len, int flags)
1477843e1988Sjohnlev {
1478843e1988Sjohnlev 	/*
1479843e1988Sjohnlev 	 * Return a pointer to a memory area to fake out the
1480843e1988Sjohnlev 	 * probe code that wants to read apic registers.
1481843e1988Sjohnlev 	 * The dummy values will end up being ignored by xen
1482843e1988Sjohnlev 	 * later on when they are used anyway.
1483843e1988Sjohnlev 	 */
1484843e1988Sjohnlev 	xen_psm_dummy_apic[APIC_VERS_REG] = APIC_INTEGRATED_VERS;
1485843e1988Sjohnlev 	return (xen_psm_dummy_apic);
1486843e1988Sjohnlev }
1487843e1988Sjohnlev 
1488843e1988Sjohnlev /* ARGSUSED */
1489843e1988Sjohnlev uint32_t *
mapin_ioapic(uint32_t addr,size_t len,int flags)1490843e1988Sjohnlev mapin_ioapic(uint32_t addr, size_t len, int flags)
1491843e1988Sjohnlev {
1492843e1988Sjohnlev 	/*
1493843e1988Sjohnlev 	 * Return non-null here to fake out configure code that calls this.
1494843e1988Sjohnlev 	 * The i86xpv platform will not reference through the returned value..
1495843e1988Sjohnlev 	 */
1496843e1988Sjohnlev 	return ((uint32_t *)0x1);
1497843e1988Sjohnlev }
1498843e1988Sjohnlev 
1499843e1988Sjohnlev /* ARGSUSED */
1500843e1988Sjohnlev void
mapout_apic(caddr_t addr,size_t len)1501843e1988Sjohnlev mapout_apic(caddr_t addr, size_t len)
1502843e1988Sjohnlev {
1503843e1988Sjohnlev }
1504843e1988Sjohnlev 
1505843e1988Sjohnlev /* ARGSUSED */
1506843e1988Sjohnlev void
mapout_ioapic(caddr_t addr,size_t len)1507843e1988Sjohnlev mapout_ioapic(caddr_t addr, size_t len)
1508843e1988Sjohnlev {
1509843e1988Sjohnlev }
1510843e1988Sjohnlev 
1511843e1988Sjohnlev uint32_t
ioapic_read(int apic_ix,uint32_t reg)1512843e1988Sjohnlev ioapic_read(int apic_ix, uint32_t reg)
1513843e1988Sjohnlev {
1514843e1988Sjohnlev 	physdev_apic_t apic;
1515843e1988Sjohnlev 
1516843e1988Sjohnlev 	apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix];
1517843e1988Sjohnlev 	apic.reg = reg;
1518843e1988Sjohnlev 	if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic))
1519843e1988Sjohnlev 		panic("read ioapic %d reg %d failed", apic_ix, reg);
1520843e1988Sjohnlev 	return (apic.value);
1521843e1988Sjohnlev }
1522843e1988Sjohnlev 
1523843e1988Sjohnlev void
ioapic_write(int apic_ix,uint32_t reg,uint32_t value)1524843e1988Sjohnlev ioapic_write(int apic_ix, uint32_t reg, uint32_t value)
1525843e1988Sjohnlev {
1526843e1988Sjohnlev 	physdev_apic_t apic;
1527843e1988Sjohnlev 
1528843e1988Sjohnlev 	apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix];
1529843e1988Sjohnlev 	apic.reg = reg;
1530843e1988Sjohnlev 	apic.value = value;
1531843e1988Sjohnlev 	if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic))
1532843e1988Sjohnlev 		panic("write ioapic %d reg %d failed", apic_ix, reg);
1533843e1988Sjohnlev }
1534843e1988Sjohnlev 
1535843e1988Sjohnlev /*
1536b6917abeSmishra  * This function was added as part of x2APIC support in pcplusmp.
1537b6917abeSmishra  */
1538b6917abeSmishra void
ioapic_write_eoi(int apic_ix,uint32_t value)1539b6917abeSmishra ioapic_write_eoi(int apic_ix, uint32_t value)
1540b6917abeSmishra {
1541b6917abeSmishra 	physdev_apic_t apic;
1542b6917abeSmishra 
1543b6917abeSmishra 	apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix];
1544b6917abeSmishra 	apic.reg = APIC_IO_EOI;
1545b6917abeSmishra 	apic.value = value;
1546b6917abeSmishra 	if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic))
1547b6917abeSmishra 		panic("write ioapic reg : APIC_IO_EOI %d failed", apic_ix);
1548b6917abeSmishra }
1549b6917abeSmishra 
1550b6917abeSmishra /*
1551b6917abeSmishra  * This function was added as part of x2APIC support in pcplusmp to resolve
1552b6917abeSmishra  * undefined symbol in xpv_psm.
1553b6917abeSmishra  */
1554b6917abeSmishra void
x2apic_update_psm()1555b6917abeSmishra x2apic_update_psm()
1556b6917abeSmishra {
1557b6917abeSmishra }
1558b6917abeSmishra 
1559b6917abeSmishra /*
1560b6917abeSmishra  * This function was added as part of x2APIC support in pcplusmp to resolve
1561b6917abeSmishra  * undefined symbol in xpv_psm.
1562b6917abeSmishra  */
1563b6917abeSmishra void
apic_ret()1564b6917abeSmishra apic_ret()
1565b6917abeSmishra {
1566b6917abeSmishra }
1567b6917abeSmishra 
1568b6917abeSmishra /*
1569843e1988Sjohnlev  * Call rebind to do the actual programming.
1570843e1988Sjohnlev  */
1571843e1988Sjohnlev int
apic_setup_io_intr(void * p,int irq,boolean_t deferred)1572843e1988Sjohnlev apic_setup_io_intr(void *p, int irq, boolean_t deferred)
1573843e1988Sjohnlev {
1574843e1988Sjohnlev 	apic_irq_t *irqptr;
1575843e1988Sjohnlev 	struct ioapic_reprogram_data *drep = NULL;
1576843e1988Sjohnlev 	int rv, cpu;
1577843e1988Sjohnlev 	cpuset_t cpus;
1578843e1988Sjohnlev 
1579843e1988Sjohnlev 	if (deferred) {
1580843e1988Sjohnlev 		drep = (struct ioapic_reprogram_data *)p;
1581843e1988Sjohnlev 		ASSERT(drep != NULL);
1582843e1988Sjohnlev 		irqptr = drep->irqp;
1583843e1988Sjohnlev 	} else {
1584843e1988Sjohnlev 		irqptr = (apic_irq_t *)p;
1585843e1988Sjohnlev 	}
1586843e1988Sjohnlev 	ASSERT(irqptr != NULL);
1587349b53ddSStuart Maybee 	/*
1588349b53ddSStuart Maybee 	 * Set cpu based on xen idea of online cpu's not apic tables.
1589349b53ddSStuart Maybee 	 * Note that xen ignores/sets to it's own preferred value the
1590349b53ddSStuart Maybee 	 * target cpu field when programming ioapic anyway.
1591349b53ddSStuart Maybee 	 */
1592349b53ddSStuart Maybee 	if (irqptr->airq_mps_intr_index == MSI_INDEX)
1593349b53ddSStuart Maybee 		cpu = irqptr->airq_cpu; /* MSI cpus are already set */
1594349b53ddSStuart Maybee 	else {
1595349b53ddSStuart Maybee 		cpu = xen_psm_bind_intr(irq);
1596349b53ddSStuart Maybee 		irqptr->airq_cpu = cpu;
1597349b53ddSStuart Maybee 	}
1598349b53ddSStuart Maybee 	if (cpu == IRQ_UNBOUND) {
1599349b53ddSStuart Maybee 		CPUSET_ZERO(cpus);
1600349b53ddSStuart Maybee 		CPUSET_OR(cpus, xen_psm_cpus_online);
1601349b53ddSStuart Maybee 	} else {
1602349b53ddSStuart Maybee 		CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND);
1603349b53ddSStuart Maybee 	}
1604843e1988Sjohnlev 	rv = apic_rebind(irqptr, cpu, drep);
1605843e1988Sjohnlev 	if (rv) {
1606843e1988Sjohnlev 		/* CPU is not up or interrupt is disabled. Fall back to 0 */
1607843e1988Sjohnlev 		cpu = 0;
1608349b53ddSStuart Maybee 		irqptr->airq_cpu = cpu;
1609843e1988Sjohnlev 		rv = apic_rebind(irqptr, cpu, drep);
1610843e1988Sjohnlev 	}
1611843e1988Sjohnlev 	/*
1612843e1988Sjohnlev 	 * If rebind successful bind the irq to an event channel
1613843e1988Sjohnlev 	 */
1614b9bc7f78Ssmaybe 	if (rv == 0) {
1615b9bc7f78Ssmaybe 		ec_setup_pirq(irq, irqptr->airq_ipl, &cpus);
1616b9bc7f78Ssmaybe 		CPUSET_FIND(cpus, cpu);
1617b9bc7f78Ssmaybe 		apic_irq_table[irq]->airq_temp_cpu = cpu & ~IRQ_USER_BOUND;
1618b9bc7f78Ssmaybe 	}
1619843e1988Sjohnlev 	return (rv);
1620843e1988Sjohnlev }
1621843e1988Sjohnlev 
1622843e1988Sjohnlev /*
1623843e1988Sjohnlev  * Allocate a new vector for the given irq
1624843e1988Sjohnlev  */
1625843e1988Sjohnlev /* ARGSUSED */
1626843e1988Sjohnlev uchar_t
apic_modify_vector(uchar_t vector,int irq)1627843e1988Sjohnlev apic_modify_vector(uchar_t vector, int irq)
1628843e1988Sjohnlev {
1629843e1988Sjohnlev 	return (apic_allocate_vector(0, irq, 0));
1630843e1988Sjohnlev }
1631843e1988Sjohnlev 
1632843e1988Sjohnlev /*
1633843e1988Sjohnlev  * The rest of the file is just generic psm module boilerplate
1634843e1988Sjohnlev  */
1635843e1988Sjohnlev 
1636843e1988Sjohnlev static struct psm_ops xen_psm_ops = {
1637843e1988Sjohnlev 	xen_psm_probe,				/* psm_probe		*/
1638843e1988Sjohnlev 
1639843e1988Sjohnlev 	xen_psm_softinit,			/* psm_init		*/
1640843e1988Sjohnlev 	xen_psm_picinit,			/* psm_picinit		*/
1641843e1988Sjohnlev 	xen_psm_intr_enter,			/* psm_intr_enter	*/
1642843e1988Sjohnlev 	xen_psm_intr_exit,			/* psm_intr_exit	*/
1643843e1988Sjohnlev 	xen_psm_setspl,				/* psm_setspl		*/
1644843e1988Sjohnlev 	xen_psm_addspl,				/* psm_addspl		*/
1645843e1988Sjohnlev 	xen_psm_delspl,				/* psm_delspl		*/
1646843e1988Sjohnlev 	xen_psm_disable_intr,			/* psm_disable_intr	*/
1647843e1988Sjohnlev 	xen_psm_enable_intr,			/* psm_enable_intr	*/
1648843e1988Sjohnlev 	(int (*)(int))NULL,			/* psm_softlvl_to_irq	*/
1649843e1988Sjohnlev 	(void (*)(int))NULL,			/* psm_set_softintr	*/
1650843e1988Sjohnlev 	(void (*)(processorid_t))NULL,		/* psm_set_idlecpu	*/
1651843e1988Sjohnlev 	(void (*)(processorid_t))NULL,		/* psm_unset_idlecpu	*/
1652843e1988Sjohnlev 
1653843e1988Sjohnlev 	xen_psm_clkinit,			/* psm_clkinit		*/
1654843e1988Sjohnlev 	xen_psm_get_clockirq,			/* psm_get_clockirq	*/
1655843e1988Sjohnlev 	xen_psm_hrtimeinit,			/* psm_hrtimeinit	*/
1656843e1988Sjohnlev 	xpv_gethrtime,				/* psm_gethrtime	*/
1657843e1988Sjohnlev 
1658843e1988Sjohnlev 	xen_psm_get_next_processorid,		/* psm_get_next_processorid */
1659843e1988Sjohnlev 	xen_psm_cpu_start,			/* psm_cpu_start	*/
1660843e1988Sjohnlev 	xen_psm_post_cpu_start,			/* psm_post_cpu_start	*/
1661843e1988Sjohnlev 	xen_psm_shutdown,			/* psm_shutdown		*/
1662843e1988Sjohnlev 	xen_psm_get_ipivect,			/* psm_get_ipivect	*/
1663843e1988Sjohnlev 	xen_psm_send_ipi,			/* psm_send_ipi		*/
1664843e1988Sjohnlev 
1665843e1988Sjohnlev 	xen_psm_translate_irq,			/* psm_translate_irq	*/
1666843e1988Sjohnlev 
1667843e1988Sjohnlev 	(void (*)(int, char *))NULL,		/* psm_notify_error	*/
1668843e1988Sjohnlev 	(void (*)(int msg))NULL,		/* psm_notify_func	*/
1669843e1988Sjohnlev 	xen_psm_timer_reprogram,		/* psm_timer_reprogram	*/
1670843e1988Sjohnlev 	xen_psm_timer_enable,			/* psm_timer_enable	*/
1671843e1988Sjohnlev 	xen_psm_timer_disable,			/* psm_timer_disable	*/
1672843e1988Sjohnlev 	(void (*)(void *arg))NULL,		/* psm_post_cyclic_setup */
1673843e1988Sjohnlev 	(void (*)(int, int))NULL,		/* psm_preshutdown	*/
1674e8ed0869SJohn Beck 	xen_intr_ops,			/* Advanced DDI Interrupt framework */
1675*a3114836SGerry Liu 	(int (*)(psm_state_request_t *))NULL,	/* psm_state		*/
1676*a3114836SGerry Liu 	(int (*)(psm_cpu_request_t *))NULL	/* psm_cpu_ops		*/
1677843e1988Sjohnlev };
1678843e1988Sjohnlev 
1679843e1988Sjohnlev static struct psm_info xen_psm_info = {
1680843e1988Sjohnlev 	PSM_INFO_VER01_5,	/* version				*/
1681cc7a88b5Smrj 	PSM_OWN_EXCLUSIVE,	/* ownership				*/
1682843e1988Sjohnlev 	&xen_psm_ops,		/* operation				*/
1683b9bc7f78Ssmaybe 	"xVM_psm",		/* machine name				*/
1684613b2871SRichard Bean 	"platform module"	/* machine descriptions			*/
1685843e1988Sjohnlev };
1686843e1988Sjohnlev 
1687843e1988Sjohnlev static void *xen_psm_hdlp;
1688843e1988Sjohnlev 
1689843e1988Sjohnlev int
_init(void)1690843e1988Sjohnlev _init(void)
1691843e1988Sjohnlev {
1692843e1988Sjohnlev 	return (psm_mod_init(&xen_psm_hdlp, &xen_psm_info));
1693843e1988Sjohnlev }
1694843e1988Sjohnlev 
1695843e1988Sjohnlev int
_fini(void)1696843e1988Sjohnlev _fini(void)
1697843e1988Sjohnlev {
1698843e1988Sjohnlev 	return (psm_mod_fini(&xen_psm_hdlp, &xen_psm_info));
1699843e1988Sjohnlev }
1700843e1988Sjohnlev 
1701843e1988Sjohnlev int
_info(struct modinfo * modinfop)1702843e1988Sjohnlev _info(struct modinfo *modinfop)
1703843e1988Sjohnlev {
1704843e1988Sjohnlev 	return (psm_mod_info(&xen_psm_hdlp, &xen_psm_info, modinfop));
1705843e1988Sjohnlev }
1706