xref: /freebsd/sys/powerpc/powernv/xive.c (revision 58f351825a371d1a3dd693d6f64a1245ea851a51)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright 2019 Justin Hibbits
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
20  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
22  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include "opt_platform.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/module.h>
36 #include <sys/bus.h>
37 #include <sys/conf.h>
38 #include <sys/endian.h>
39 #include <sys/kernel.h>
40 #include <sys/lock.h>
41 #include <sys/malloc.h>
42 #include <sys/mutex.h>
43 #include <sys/smp.h>
44 
45 #include <vm/vm.h>
46 #include <vm/pmap.h>
47 
48 #include <machine/bus.h>
49 #include <machine/intr_machdep.h>
50 #include <machine/md_var.h>
51 
52 #include <dev/ofw/ofw_bus.h>
53 #include <dev/ofw/ofw_bus_subr.h>
54 
55 #ifdef POWERNV
56 #include <powerpc/powernv/opal.h>
57 #endif
58 
59 #include "pic_if.h"
60 
61 #define XIVE_PRIORITY	7	/* Random non-zero number */
62 #define MAX_XIVE_IRQS	(1<<24)	/* 24-bit XIRR field */
63 
64 /* Registers */
65 #define	XIVE_TM_QW1_OS		0x010	/* Guest OS registers */
66 #define	XIVE_TM_QW2_HV_POOL	0x020	/* Hypervisor pool registers */
67 #define	XIVE_TM_QW3_HV		0x030	/* Hypervisor registers */
68 
69 #define	XIVE_TM_NSR	0x00
70 #define	XIVE_TM_CPPR	0x01
71 #define	XIVE_TM_IPB	0x02
72 #define	XIVE_TM_LSMFB	0x03
73 #define	XIVE_TM_ACK_CNT	0x04
74 #define	XIVE_TM_INC	0x05
75 #define	XIVE_TM_AGE	0x06
76 #define	XIVE_TM_PIPR	0x07
77 
78 #define	TM_WORD0	0x0
79 #define	TM_WORD2	0x8
80 #define	  TM_QW2W2_VP	  0x80000000
81 
82 #define	XIVE_TM_SPC_ACK			0x800
83 #define	  TM_QW3NSR_HE_SHIFT		  14
84 #define	  TM_QW3_NSR_HE_NONE		  0
85 #define	  TM_QW3_NSR_HE_POOL		  1
86 #define	  TM_QW3_NSR_HE_PHYS		  2
87 #define	  TM_QW3_NSR_HE_LSI		  3
88 #define	XIVE_TM_SPC_PULL_POOL_CTX	0x828
89 
90 #define	XIVE_IRQ_LOAD_EOI	0x000
91 #define	XIVE_IRQ_STORE_EOI	0x400
92 #define	XIVE_IRQ_PQ_00		0xc00
93 #define	XIVE_IRQ_PQ_01		0xd00
94 
95 #define	XIVE_IRQ_VAL_P		0x02
96 #define	XIVE_IRQ_VAL_Q		0x01
97 
98 struct xive_softc;
99 struct xive_irq;
100 
101 extern void (*powernv_smp_ap_extra_init)(void);
102 
103 /* Private support */
104 static void	xive_setup_cpu(void);
105 static void	xive_smp_cpu_startup(void);
106 static void	xive_init_irq(struct xive_irq *irqd, u_int irq);
107 static struct xive_irq	*xive_configure_irq(u_int irq);
108 static int	xive_provision_page(struct xive_softc *sc);
109 
110 /* Interfaces */
111 static int	xive_probe(device_t);
112 static int	xive_attach(device_t);
113 static int	xics_probe(device_t);
114 static int	xics_attach(device_t);
115 
116 static void	xive_bind(device_t, u_int, cpuset_t, void **);
117 static void	xive_dispatch(device_t, struct trapframe *);
118 static void	xive_enable(device_t, u_int, u_int, void **);
119 static void	xive_eoi(device_t, u_int, void *);
120 static void	xive_ipi(device_t, u_int);
121 static void	xive_mask(device_t, u_int, void *);
122 static void	xive_unmask(device_t, u_int, void *);
123 static void	xive_translate_code(device_t dev, u_int irq, int code,
124 		    enum intr_trigger *trig, enum intr_polarity *pol);
125 
126 static device_method_t  xive_methods[] = {
127 	/* Device interface */
128 	DEVMETHOD(device_probe,		xive_probe),
129 	DEVMETHOD(device_attach,	xive_attach),
130 
131 	/* PIC interface */
132 	DEVMETHOD(pic_bind,		xive_bind),
133 	DEVMETHOD(pic_dispatch,		xive_dispatch),
134 	DEVMETHOD(pic_enable,		xive_enable),
135 	DEVMETHOD(pic_eoi,		xive_eoi),
136 	DEVMETHOD(pic_ipi,		xive_ipi),
137 	DEVMETHOD(pic_mask,		xive_mask),
138 	DEVMETHOD(pic_unmask,		xive_unmask),
139 	DEVMETHOD(pic_translate_code,	xive_translate_code),
140 
141 	DEVMETHOD_END
142 };
143 
144 static device_method_t  xics_methods[] = {
145 	/* Device interface */
146 	DEVMETHOD(device_probe,		xics_probe),
147 	DEVMETHOD(device_attach,	xics_attach),
148 
149 	DEVMETHOD_END
150 };
151 
152 struct xive_softc {
153 	struct mtx sc_mtx;
154 	struct resource *sc_mem;
155 	vm_size_t	sc_prov_page_size;
156 	uint32_t	sc_offset;
157 };
158 
159 struct xive_queue {
160 	uint32_t	*q_page;
161 	uint32_t	*q_eoi_page;
162 	uint32_t	 q_toggle;
163 	uint32_t	 q_size;
164 	uint32_t	 q_index;
165 	uint32_t	 q_mask;
166 };
167 
168 struct xive_irq {
169 	uint32_t	girq;
170 	uint32_t	lirq;
171 	uint64_t	vp;
172 	uint64_t	flags;
173 #define	OPAL_XIVE_IRQ_EOI_VIA_FW	0x00000020
174 #define	OPAL_XIVE_IRQ_MASK_VIA_FW	0x00000010
175 #define	OPAL_XIVE_IRQ_SHIFT_BUG		0x00000008
176 #define	OPAL_XIVE_IRQ_LSI		0x00000004
177 #define	OPAL_XIVE_IRQ_STORE_EOI		0x00000002
178 #define	OPAL_XIVE_IRQ_TRIGGER_PAGE	0x00000001
179 	uint8_t	prio;
180 	vm_offset_t	eoi_page;
181 	vm_offset_t	trig_page;
182 	vm_size_t	esb_size;
183 	int		chip;
184 };
185 
186 struct xive_cpu {
187 	uint64_t	vp;
188 	uint64_t	flags;
189 	struct xive_irq	ipi_data;
190 	struct xive_queue	queue; /* We only use a single queue for now. */
191 	uint64_t	cam;
192 	uint32_t	chip;
193 };
194 
195 static driver_t xive_driver = {
196 	"xive",
197 	xive_methods,
198 	sizeof(struct xive_softc)
199 };
200 
201 static driver_t xics_driver = {
202 	"xivevc",
203 	xics_methods,
204 	0
205 };
206 
207 static devclass_t xive_devclass;
208 static devclass_t xics_devclass;
209 
210 EARLY_DRIVER_MODULE(xive, ofwbus, xive_driver, xive_devclass, 0, 0,
211     BUS_PASS_INTERRUPT-1);
212 EARLY_DRIVER_MODULE(xivevc, ofwbus, xics_driver, xics_devclass, 0, 0,
213     BUS_PASS_INTERRUPT);
214 
215 MALLOC_DEFINE(M_XIVE, "xive", "XIVE Memory");
216 
217 DPCPU_DEFINE_STATIC(struct xive_cpu, xive_cpu_data);
218 
219 static int xive_ipi_vector = -1;
220 
221 /*
222  * XIVE Exploitation mode driver.
223  *
224  * The XIVE, present in the POWER9 CPU, can run in two modes: XICS emulation
225  * mode, and "Exploitation mode".  XICS emulation mode is compatible with the
226  * POWER8 and earlier XICS interrupt controller, using OPAL calls to emulate
227  * hypervisor calls and memory accesses.  Exploitation mode gives us raw access
228  * to the XIVE MMIO, improving performance significantly.
229  *
230  * The XIVE controller is a very bizarre interrupt controller.  It uses queues
231  * in memory to pass interrupts around, and maps itself into 512GB of physical
232  * device address space, giving each interrupt in the system one or more pages
233  * of address space.  An IRQ is tied to a virtual processor, which could be a
234  * physical CPU thread, or a guest CPU thread (LPAR running on a physical
235  * thread).  Thus, the controller can route interrupts directly to guest OSes
236  * bypassing processing by the hypervisor, thereby improving performance of the
237  * guest OS.
238  *
239  * An IRQ, in addition to being tied to a virtual processor, has one or two
240  * page mappings: an EOI page, and an optional trigger page.  The trigger page
241  * could be the same as the EOI page.  Level-sensitive interrupts (LSIs) don't
242  * have a trigger page, as they're external interrupts controlled by physical
243  * lines.  MSIs and IPIs have trigger pages.  An IPI is really just another IRQ
244  * in the XIVE, which is triggered by software.
245  *
246  * An interesting behavior of the XIVE controller is that oftentimes the
247  * contents of an address location don't actually matter, but the direction of
248  * the action is the signifier (read vs write), and the address is significant.
249  * Hence, masking and unmasking an interrupt is done by reading different
250  * addresses in the EOI page, and triggering an interrupt consists of writing to
251  * the trigger page.
252  *
253  * Additionally, the MMIO region mapped is CPU-sensitive, just like the
254  * per-processor register space (private access) in OpenPIC.  In order for a CPU
255  * to receive interrupts it must itself configure its CPPR (Current Processor
256  * Priority Register), it cannot be set by any other processor.  This
257  * necessitates the xive_smp_cpu_startup() function.
258  *
259  * Queues are pages of memory, sized powers-of-two, that are shared with the
260  * XIVE.  The XIVE writes into the queue with an alternating polarity bit, which
261  * flips when the queue wraps.
262  */
263 
264 /*
265  * Offset-based read/write interfaces.
266  */
267 static uint16_t
268 xive_read_2(struct xive_softc *sc, bus_size_t offset)
269 {
270 
271 	return (bus_read_2(sc->sc_mem, sc->sc_offset + offset));
272 }
273 
274 static void
275 xive_write_1(struct xive_softc *sc, bus_size_t offset, uint8_t val)
276 {
277 
278 	bus_write_1(sc->sc_mem, sc->sc_offset + offset, val);
279 }
280 
281 /* EOI and Trigger page access interfaces. */
282 static uint64_t
283 xive_read_mmap8(vm_offset_t addr)
284 {
285 	return (*(volatile uint64_t *)addr);
286 }
287 
288 static void
289 xive_write_mmap8(vm_offset_t addr, uint64_t val)
290 {
291 	*(uint64_t *)(addr) = val;
292 }
293 
294 /* Device interfaces. */
295 static int
296 xive_probe(device_t dev)
297 {
298 
299 	if (!ofw_bus_is_compatible(dev, "ibm,opal-xive-pe"))
300 		return (ENXIO);
301 
302 	device_set_desc(dev, "External Interrupt Virtualization Engine");
303 
304 	/* Make sure we always win against the xicp driver. */
305 	return (BUS_PROBE_DEFAULT);
306 }
307 
308 static int
309 xics_probe(device_t dev)
310 {
311 
312 	if (!ofw_bus_is_compatible(dev, "ibm,opal-xive-vc"))
313 		return (ENXIO);
314 
315 	device_set_desc(dev, "External Interrupt Virtualization Engine Root");
316 	return (BUS_PROBE_DEFAULT);
317 }
318 
319 static int
320 xive_attach(device_t dev)
321 {
322 	struct xive_softc *sc = device_get_softc(dev);
323 	struct xive_cpu *xive_cpud;
324 	phandle_t phandle = ofw_bus_get_node(dev);
325 	int64_t vp_block;
326 	int error;
327 	int rid;
328 	int i, order;
329 	uint64_t vp_id;
330 	int64_t ipi_irq;
331 
332 	opal_call(OPAL_XIVE_RESET, OPAL_XIVE_XICS_MODE_EXP);
333 
334 	error = OF_getencprop(phandle, "ibm,xive-provision-page-size",
335 	    (pcell_t *)&sc->sc_prov_page_size, sizeof(sc->sc_prov_page_size));
336 
337 	rid = 1;	/* Get the Hypervisor-level register set. */
338 	sc->sc_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
339 	    &rid, RF_ACTIVE);
340 	sc->sc_offset = XIVE_TM_QW3_HV;
341 
342 	mtx_init(&sc->sc_mtx, "XIVE", NULL, MTX_DEF);
343 
344 	/* Workaround for qemu single-thread powernv */
345 	if (mp_maxid == 0)
346 		order = 1;
347 	else
348 		order = fls(mp_maxid + (mp_maxid - 1)) - 1;
349 
350 	do {
351 		vp_block = opal_call(OPAL_XIVE_ALLOCATE_VP_BLOCK, order);
352 		if (vp_block == OPAL_BUSY)
353 			DELAY(10);
354 		else if (vp_block == OPAL_XIVE_PROVISIONING)
355 			xive_provision_page(sc);
356 		else
357 			break;
358 	} while (1);
359 
360 	if (vp_block < 0) {
361 		device_printf(dev,
362 		    "Unable to allocate VP block.  Opal error %d\n",
363 		    (int)vp_block);
364 		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->sc_mem);
365 		return (ENXIO);
366 	}
367 
368 	/*
369 	 * Set up the VPs.  Try to do as much as we can in attach, to lessen
370 	 * what's needed at AP spawn time.
371 	 */
372 	CPU_FOREACH(i) {
373 		vp_id = pcpu_find(i)->pc_hwref;
374 
375 		xive_cpud = DPCPU_ID_PTR(i, xive_cpu_data);
376 		xive_cpud->vp = vp_id + vp_block;
377 		opal_call(OPAL_XIVE_GET_VP_INFO, xive_cpud->vp, NULL,
378 		    vtophys(&xive_cpud->cam), NULL, vtophys(&xive_cpud->chip));
379 
380 		/* Allocate the queue page and populate the queue state data. */
381 		xive_cpud->queue.q_page = contigmalloc(PAGE_SIZE, M_XIVE,
382 		    M_ZERO | M_WAITOK, 0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
383 		xive_cpud->queue.q_size = 1 << PAGE_SHIFT;
384 		xive_cpud->queue.q_mask =
385 		    ((xive_cpud->queue.q_size / sizeof(int)) - 1);
386 		xive_cpud->queue.q_toggle = 0;
387 		xive_cpud->queue.q_index = 0;
388 		do {
389 			error = opal_call(OPAL_XIVE_SET_VP_INFO, xive_cpud->vp,
390 			    OPAL_XIVE_VP_ENABLED, 0);
391 		} while (error == OPAL_BUSY);
392 		error = opal_call(OPAL_XIVE_SET_QUEUE_INFO, vp_id,
393 		    XIVE_PRIORITY, vtophys(xive_cpud->queue.q_page), PAGE_SHIFT,
394 		    OPAL_XIVE_EQ_ALWAYS_NOTIFY | OPAL_XIVE_EQ_ENABLED);
395 
396 		do {
397 			ipi_irq = opal_call(OPAL_XIVE_ALLOCATE_IRQ,
398 			    xive_cpud->chip);
399 		} while (ipi_irq == OPAL_BUSY);
400 
401 		if (ipi_irq < 0)
402 			device_printf(root_pic,
403 			    "Failed allocating IPI.  OPAL error %d\n",
404 			    (int)ipi_irq);
405 		else {
406 			xive_init_irq(&xive_cpud->ipi_data, ipi_irq);
407 			xive_cpud->ipi_data.vp = vp_id;
408 			xive_cpud->ipi_data.lirq = MAX_XIVE_IRQS;
409 			opal_call(OPAL_XIVE_SET_IRQ_CONFIG, ipi_irq,
410 			    xive_cpud->ipi_data.vp, XIVE_PRIORITY,
411 			    MAX_XIVE_IRQS);
412 		}
413 	}
414 
415 	powerpc_register_pic(dev, OF_xref_from_node(phandle), MAX_XIVE_IRQS,
416 	    1 /* Number of IPIs */, FALSE);
417 	root_pic = dev;
418 
419 	xive_setup_cpu();
420 	powernv_smp_ap_extra_init = xive_smp_cpu_startup;
421 
422 	return (0);
423 }
424 
425 static int
426 xics_attach(device_t dev)
427 {
428 	phandle_t phandle = ofw_bus_get_node(dev);
429 
430 	/* The XIVE (root PIC) will handle all our interrupts */
431 	powerpc_register_pic(root_pic, OF_xref_from_node(phandle),
432 	    MAX_XIVE_IRQS, 1 /* Number of IPIs */, FALSE);
433 
434 	return (0);
435 }
436 
437 /*
438  * PIC I/F methods.
439  */
440 
441 static void
442 xive_bind(device_t dev, u_int irq, cpuset_t cpumask, void **priv)
443 {
444 	struct xive_irq *irqd;
445 	int cpu;
446 	int ncpus, i, error;
447 
448 	if (*priv == NULL)
449 		*priv = xive_configure_irq(irq);
450 
451 	irqd = *priv;
452 
453 	/*
454 	 * This doesn't appear to actually support affinity groups, so pick a
455 	 * random CPU.
456 	 */
457 	ncpus = 0;
458 	CPU_FOREACH(cpu)
459 		if (CPU_ISSET(cpu, &cpumask)) ncpus++;
460 
461 	i = mftb() % ncpus;
462 	ncpus = 0;
463 	CPU_FOREACH(cpu) {
464 		if (!CPU_ISSET(cpu, &cpumask))
465 			continue;
466 		if (ncpus == i)
467 			break;
468 		ncpus++;
469 	}
470 
471 	opal_call(OPAL_XIVE_SYNC, OPAL_XIVE_SYNC_QUEUE, irq);
472 
473 	irqd->vp = pcpu_find(cpu)->pc_hwref;
474 	error = opal_call(OPAL_XIVE_SET_IRQ_CONFIG, irq, irqd->vp,
475 	    XIVE_PRIORITY, irqd->lirq);
476 
477 	if (error < 0)
478 		panic("Cannot bind interrupt %d to CPU %d", irq, cpu);
479 
480 	xive_eoi(dev, irq, irqd);
481 }
482 
483 /* Read the next entry in the queue page and update the index. */
484 static int
485 xive_read_eq(struct xive_queue *q)
486 {
487 	uint32_t i = be32toh(q->q_page[q->q_index]);
488 
489 	/* Check validity, using current queue polarity. */
490 	if ((i >> 31) == q->q_toggle)
491 		return (0);
492 
493 	q->q_index = (q->q_index + 1) & q->q_mask;
494 
495 	if (q->q_index == 0)
496 		q->q_toggle ^= 1;
497 
498 	return (i & 0x7fffffff);
499 }
500 
501 static void
502 xive_dispatch(device_t dev, struct trapframe *tf)
503 {
504 	struct xive_softc *sc;
505 	struct xive_cpu *xive_cpud;
506 	uint32_t vector;
507 	uint16_t ack;
508 	uint8_t cppr, he;
509 
510 	sc = device_get_softc(dev);
511 
512 	xive_cpud = DPCPU_PTR(xive_cpu_data);
513 	for (;;) {
514 		ack = xive_read_2(sc, XIVE_TM_SPC_ACK);
515 		cppr = (ack & 0xff);
516 
517 		he = ack >> TM_QW3NSR_HE_SHIFT;
518 
519 		if (he == TM_QW3_NSR_HE_NONE)
520 			break;
521 
522 		else if (__predict_false(he != TM_QW3_NSR_HE_PHYS)) {
523 			/*
524 			 * We don't support TM_QW3_NSR_HE_POOL or
525 			 * TM_QW3_NSR_HE_LSI interrupts.
526 			 */
527 			device_printf(dev,
528 			    "Unexpected interrupt he type: %d\n", he);
529 			goto end;
530 		}
531 
532 		xive_write_1(sc, XIVE_TM_CPPR, cppr);
533 
534 		for (;;) {
535 			vector = xive_read_eq(&xive_cpud->queue);
536 
537 			if (vector == 0)
538 				break;
539 
540 			if (vector == MAX_XIVE_IRQS)
541 				vector = xive_ipi_vector;
542 
543 			powerpc_dispatch_intr(vector, tf);
544 		}
545 	}
546 end:
547 	xive_write_1(sc, XIVE_TM_CPPR, 0xff);
548 }
549 
550 static void
551 xive_enable(device_t dev, u_int irq, u_int vector, void **priv)
552 {
553 	struct xive_irq *irqd;
554 	cell_t status, cpu;
555 
556 	if (irq == MAX_XIVE_IRQS) {
557 		if (xive_ipi_vector == -1)
558 			xive_ipi_vector = vector;
559 		return;
560 	}
561 	if (*priv == NULL)
562 		*priv = xive_configure_irq(irq);
563 
564 	irqd = *priv;
565 
566 	/* Bind to this CPU to start */
567 	cpu = PCPU_GET(hwref);
568 	irqd->lirq = vector;
569 
570 	for (;;) {
571 		status = opal_call(OPAL_XIVE_SET_IRQ_CONFIG, irq, cpu,
572 		    XIVE_PRIORITY, vector);
573 		if (status != OPAL_BUSY)
574 			break;
575 		DELAY(10);
576 	}
577 
578 	if (status != 0)
579 		panic("OPAL_SET_XIVE IRQ %d -> cpu %d failed: %d", irq,
580 		    cpu, status);
581 
582 	xive_unmask(dev, irq, *priv);
583 }
584 
585 static void
586 xive_eoi(device_t dev, u_int irq, void *priv)
587 {
588 	struct xive_irq *rirq;
589 	struct xive_cpu *cpud;
590 	uint8_t eoi_val;
591 
592 	if (irq == MAX_XIVE_IRQS) {
593 		cpud = DPCPU_PTR(xive_cpu_data);
594 		rirq = &cpud->ipi_data;
595 	} else
596 		rirq = priv;
597 
598 	if (rirq->flags & OPAL_XIVE_IRQ_EOI_VIA_FW)
599 		opal_call(OPAL_INT_EOI, irq);
600 	else if (rirq->flags & OPAL_XIVE_IRQ_STORE_EOI)
601 		xive_write_mmap8(rirq->eoi_page + XIVE_IRQ_STORE_EOI, 0);
602 	else if (rirq->flags & OPAL_XIVE_IRQ_LSI)
603 		xive_read_mmap8(rirq->eoi_page + XIVE_IRQ_LOAD_EOI);
604 	else {
605 		eoi_val = xive_read_mmap8(rirq->eoi_page + XIVE_IRQ_PQ_00);
606 		if ((eoi_val & XIVE_IRQ_VAL_Q) && rirq->trig_page != 0)
607 			xive_write_mmap8(rirq->trig_page, 0);
608 	}
609 }
610 
611 static void
612 xive_ipi(device_t dev, u_int cpu)
613 {
614 	struct xive_cpu *xive_cpud;
615 
616 	xive_cpud = DPCPU_ID_PTR(cpu, xive_cpu_data);
617 
618 	if (xive_cpud->ipi_data.trig_page == 0)
619 		return;
620 	xive_write_mmap8(xive_cpud->ipi_data.trig_page, 0);
621 }
622 
623 static void
624 xive_mask(device_t dev, u_int irq, void *priv)
625 {
626 	struct xive_irq *rirq;
627 
628 	/* Never mask IPIs */
629 	if (irq == MAX_XIVE_IRQS)
630 		return;
631 
632 	rirq = priv;
633 
634 	if (!(rirq->flags & OPAL_XIVE_IRQ_LSI))
635 		return;
636 	xive_read_mmap8(rirq->eoi_page + XIVE_IRQ_PQ_01);
637 }
638 
639 static void
640 xive_unmask(device_t dev, u_int irq, void *priv)
641 {
642 	struct xive_irq *rirq;
643 
644 	rirq = priv;
645 
646 	xive_read_mmap8(rirq->eoi_page + XIVE_IRQ_PQ_00);
647 }
648 
649 static void
650 xive_translate_code(device_t dev, u_int irq, int code,
651     enum intr_trigger *trig, enum intr_polarity *pol)
652 {
653 	switch (code) {
654 	case 0:
655 		/* L to H edge */
656 		*trig = INTR_TRIGGER_EDGE;
657 		*pol = INTR_POLARITY_HIGH;
658 		break;
659 	case 1:
660 		/* Active L level */
661 		*trig = INTR_TRIGGER_LEVEL;
662 		*pol = INTR_POLARITY_LOW;
663 		break;
664 	default:
665 		*trig = INTR_TRIGGER_CONFORM;
666 		*pol = INTR_POLARITY_CONFORM;
667 	}
668 }
669 
670 /* Private functions. */
671 /*
672  * Setup the current CPU.  Called by the BSP at driver attachment, and by each
673  * AP at wakeup (via xive_smp_cpu_startup()).
674  */
675 static void
676 xive_setup_cpu(void)
677 {
678 	struct xive_softc *sc;
679 	struct xive_cpu *cpup;
680 	uint32_t val;
681 
682 	cpup = DPCPU_PTR(xive_cpu_data);
683 
684 	sc = device_get_softc(root_pic);
685 
686 	val = bus_read_4(sc->sc_mem, XIVE_TM_QW2_HV_POOL + TM_WORD2);
687 	if (val & TM_QW2W2_VP)
688 		bus_read_8(sc->sc_mem, XIVE_TM_SPC_PULL_POOL_CTX);
689 
690 	bus_write_4(sc->sc_mem, XIVE_TM_QW2_HV_POOL + TM_WORD0, 0xff);
691 	bus_write_4(sc->sc_mem, XIVE_TM_QW2_HV_POOL + TM_WORD2,
692 	    TM_QW2W2_VP | cpup->cam);
693 
694 	xive_unmask(root_pic, cpup->ipi_data.girq, &cpup->ipi_data);
695 	xive_write_1(sc, XIVE_TM_CPPR, 0xff);
696 }
697 
698 /* Populate an IRQ structure, mapping the EOI and trigger pages. */
699 static void
700 xive_init_irq(struct xive_irq *irqd, u_int irq)
701 {
702 	uint64_t eoi_phys, trig_phys;
703 	uint32_t esb_shift;
704 
705 	opal_call(OPAL_XIVE_GET_IRQ_INFO, irq,
706 	    vtophys(&irqd->flags), vtophys(&eoi_phys),
707 	    vtophys(&trig_phys), vtophys(&esb_shift),
708 	    vtophys(&irqd->chip));
709 
710 	irqd->girq = irq;
711 	irqd->esb_size = 1 << esb_shift;
712 	irqd->eoi_page = (vm_offset_t)pmap_mapdev(eoi_phys, irqd->esb_size);
713 
714 	if (eoi_phys == trig_phys)
715 		irqd->trig_page = irqd->eoi_page;
716 	else if (trig_phys != 0)
717 		irqd->trig_page = (vm_offset_t)pmap_mapdev(trig_phys,
718 		    irqd->esb_size);
719 	else
720 		irqd->trig_page = 0;
721 
722 	opal_call(OPAL_XIVE_GET_IRQ_CONFIG, irq, vtophys(&irqd->vp),
723 	    vtophys(&irqd->prio), vtophys(&irqd->lirq));
724 }
725 
726 /* Allocate an IRQ struct before populating it. */
727 static struct xive_irq *
728 xive_configure_irq(u_int irq)
729 {
730 	struct xive_irq *irqd;
731 
732 	irqd = malloc(sizeof(struct xive_irq), M_XIVE, M_WAITOK);
733 
734 	xive_init_irq(irqd, irq);
735 
736 	return (irqd);
737 }
738 
739 /*
740  * Part of the OPAL API.  OPAL_XIVE_ALLOCATE_VP_BLOCK might require more pages,
741  * provisioned through this call.
742  */
743 static int
744 xive_provision_page(struct xive_softc *sc)
745 {
746 	void *prov_page;
747 	int error;
748 
749 	do {
750 		prov_page = contigmalloc(sc->sc_prov_page_size, M_XIVE, 0,
751 		    0, BUS_SPACE_MAXADDR,
752 		    sc->sc_prov_page_size, sc->sc_prov_page_size);
753 
754 		error = opal_call(OPAL_XIVE_DONATE_PAGE, -1,
755 		    vtophys(prov_page));
756 	} while (error == OPAL_XIVE_PROVISIONING);
757 
758 	return (0);
759 }
760 
761 /* The XIVE_TM_CPPR register must be set by each thread */
762 static void
763 xive_smp_cpu_startup(void)
764 {
765 
766 	xive_setup_cpu();
767 }
768