xref: /freebsd/sys/powerpc/powernv/opal_pci.c (revision d2033021a73db7b8d910c1ffc52f4d1d0def7162)
1 /*-
2  * Copyright (c) 2015-2016 Nathan Whitehorn
3  * Copyright (c) 2017-2018 Semihalf
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/module.h>
31 #include <sys/bus.h>
32 #include <sys/conf.h>
33 #include <sys/kernel.h>
34 #include <sys/pciio.h>
35 #include <sys/endian.h>
36 #include <sys/rman.h>
37 #include <sys/vmem.h>
38 
39 #include <dev/ofw/openfirm.h>
40 #include <dev/ofw/ofw_pci.h>
41 #include <dev/ofw/ofw_bus.h>
42 #include <dev/ofw/ofw_bus_subr.h>
43 #include <dev/ofw/ofwpci.h>
44 
45 #include <dev/pci/pcivar.h>
46 #include <dev/pci/pcireg.h>
47 
48 #include <machine/bus.h>
49 #include <machine/intr_machdep.h>
50 #include <machine/md_var.h>
51 
52 #include <vm/vm.h>
53 #include <vm/pmap.h>
54 
55 #include "pcib_if.h"
56 #include "pic_if.h"
57 #include "iommu_if.h"
58 #include "opal.h"
59 
60 #define	OPAL_PCI_TCE_MAX_ENTRIES	(1024*1024UL)
61 #define	OPAL_PCI_TCE_DEFAULT_SEG_SIZE	(16*1024*1024UL)
62 #define	OPAL_PCI_TCE_R			(1UL << 0)
63 #define	OPAL_PCI_TCE_W			(1UL << 1)
64 #define	PHB3_TCE_KILL_INVAL_ALL		(1UL << 63)
65 
66 /*
67  * Device interface.
68  */
69 static int		opalpci_probe(device_t);
70 static int		opalpci_attach(device_t);
71 
72 /*
73  * pcib interface.
74  */
75 static uint32_t		opalpci_read_config(device_t, u_int, u_int, u_int,
76 			    u_int, int);
77 static void		opalpci_write_config(device_t, u_int, u_int, u_int,
78 			    u_int, u_int32_t, int);
79 static int		opalpci_alloc_msi(device_t dev, device_t child,
80 			    int count, int maxcount, int *irqs);
81 static int		opalpci_release_msi(device_t dev, device_t child,
82 			    int count, int *irqs);
83 static int		opalpci_alloc_msix(device_t dev, device_t child,
84 			    int *irq);
85 static int		opalpci_release_msix(device_t dev, device_t child,
86 			    int irq);
87 static int		opalpci_map_msi(device_t dev, device_t child,
88 			    int irq, uint64_t *addr, uint32_t *data);
89 static int opalpci_route_interrupt(device_t bus, device_t dev, int pin);
90 
91 /*
92  * MSI PIC interface.
93  */
94 static void opalpic_pic_enable(device_t dev, u_int irq, u_int vector, void **);
95 static void opalpic_pic_eoi(device_t dev, u_int irq, void *);
96 
97 /* Bus interface */
98 static bus_dma_tag_t opalpci_get_dma_tag(device_t dev, device_t child);
99 
100 /*
101  * Commands
102  */
103 #define	OPAL_M32_WINDOW_TYPE		1
104 #define	OPAL_M64_WINDOW_TYPE		2
105 #define	OPAL_IO_WINDOW_TYPE		3
106 
107 #define	OPAL_RESET_PHB_COMPLETE		1
108 #define	OPAL_RESET_PCI_IODA_TABLE	6
109 
110 #define	OPAL_DISABLE_M64		0
111 #define	OPAL_ENABLE_M64_SPLIT		1
112 #define	OPAL_ENABLE_M64_NON_SPLIT	2
113 
114 #define	OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO	1
115 #define	OPAL_EEH_ACTION_CLEAR_FREEZE_DMA	2
116 #define	OPAL_EEH_ACTION_CLEAR_FREEZE_ALL	3
117 
118 #define	OPAL_EEH_STOPPED_NOT_FROZEN		0
119 
120 /*
121  * Constants
122  */
123 #define OPAL_PCI_DEFAULT_PE			1
124 
125 #define OPAL_PCI_BUS_SPACE_LOWADDR_32BIT	0x7FFFFFFFUL
126 
127 /*
128  * Driver methods.
129  */
130 static device_method_t	opalpci_methods[] = {
131 	/* Device interface */
132 	DEVMETHOD(device_probe,		opalpci_probe),
133 	DEVMETHOD(device_attach,	opalpci_attach),
134 
135 	/* pcib interface */
136 	DEVMETHOD(pcib_read_config,	opalpci_read_config),
137 	DEVMETHOD(pcib_write_config,	opalpci_write_config),
138 
139 	DEVMETHOD(pcib_alloc_msi,	opalpci_alloc_msi),
140 	DEVMETHOD(pcib_release_msi,	opalpci_release_msi),
141 	DEVMETHOD(pcib_alloc_msix,	opalpci_alloc_msix),
142 	DEVMETHOD(pcib_release_msix,	opalpci_release_msix),
143 	DEVMETHOD(pcib_map_msi,		opalpci_map_msi),
144 	DEVMETHOD(pcib_route_interrupt,	opalpci_route_interrupt),
145 
146 	/* PIC interface for MSIs */
147 	DEVMETHOD(pic_enable,		opalpic_pic_enable),
148 	DEVMETHOD(pic_eoi,		opalpic_pic_eoi),
149 
150 	/* Bus interface */
151 	DEVMETHOD(bus_get_dma_tag,	opalpci_get_dma_tag),
152 	DEVMETHOD(bus_get_cpus,		ofw_pcibus_get_cpus),
153 	DEVMETHOD(bus_get_domain,	ofw_pcibus_get_domain),
154 
155 	DEVMETHOD_END
156 };
157 
158 struct opalpci_softc {
159 	struct ofw_pci_softc ofw_sc;
160 	uint64_t phb_id;
161 	vmem_t *msi_vmem;
162 	int msi_base;		/* Base XIVE number */
163 	int base_msi_irq;	/* Base IRQ assigned by FreeBSD to this PIC */
164 	uint64_t *tce;		/* TCE table for 1:1 mapping */
165 	struct resource *r_reg;
166 };
167 
168 DEFINE_CLASS_1(pcib, opalpci_driver, opalpci_methods,
169     sizeof(struct opalpci_softc), ofw_pcib_driver);
170 EARLY_DRIVER_MODULE(opalpci, ofwbus, opalpci_driver, 0, 0, BUS_PASS_BUS);
171 
172 static int
173 opalpci_probe(device_t dev)
174 {
175 	const char	*type;
176 
177 	if (opal_check() != 0)
178 		return (ENXIO);
179 
180 	type = ofw_bus_get_type(dev);
181 
182 	if (type == NULL || (strcmp(type, "pci") != 0 &&
183 	    strcmp(type, "pciex") != 0))
184 		return (ENXIO);
185 
186 	if (!OF_hasprop(ofw_bus_get_node(dev), "ibm,opal-phbid"))
187 		return (ENXIO);
188 
189 	device_set_desc(dev, "OPAL Host-PCI bridge");
190 	return (BUS_PROBE_GENERIC);
191 }
192 
193 static void
194 pci_phb3_tce_invalidate_entire(struct opalpci_softc *sc)
195 {
196 
197 	mb();
198 	bus_write_8(sc->r_reg, 0x210, PHB3_TCE_KILL_INVAL_ALL);
199 	mb();
200 }
201 
202 /* Simple function to round to a power of 2 */
203 static uint64_t
204 round_pow2(uint64_t val)
205 {
206 
207 	return (1 << (flsl(val + (val - 1)) - 1));
208 }
209 
210 /*
211  * Starting with skiboot 5.10 PCIe nodes have a new property,
212  * "ibm,supported-tce-sizes", to denote the TCE sizes available.  This allows us
213  * to avoid hard-coding the maximum TCE size allowed, and instead provide a sane
214  * default (however, the "sane" default, which works for all targets, is 64k,
215  * limiting us to 64GB if we have 1M entries.
216  */
217 static uint64_t
218 max_tce_size(device_t dev)
219 {
220 	phandle_t node;
221 	cell_t sizes[64]; /* Property is a list of bit-widths, up to 64-bits */
222 	int count;
223 
224 	node = ofw_bus_get_node(dev);
225 
226 	count = OF_getencprop(node, "ibm,supported-tce-sizes",
227 	    sizes, sizeof(sizes));
228 	if (count < (int) sizeof(cell_t))
229 		return OPAL_PCI_TCE_DEFAULT_SEG_SIZE;
230 
231 	count /= sizeof(cell_t);
232 
233 	return (1ULL << sizes[count - 1]);
234 }
235 
236 static int
237 opalpci_attach(device_t dev)
238 {
239 	struct opalpci_softc *sc;
240 	cell_t id[2], m64ranges[2], m64window[6], npe;
241 	phandle_t node;
242 	int i, err;
243 	uint64_t maxmem;
244 	uint64_t entries;
245 	uint64_t tce_size;
246 	uint64_t tce_tbl_size;
247 	int m64bar;
248 	int rid;
249 
250 	sc = device_get_softc(dev);
251 	node = ofw_bus_get_node(dev);
252 
253 	switch (OF_getproplen(node, "ibm,opal-phbid")) {
254 	case 8:
255 		OF_getencprop(node, "ibm,opal-phbid", id, 8);
256 		sc->phb_id = ((uint64_t)id[0] << 32) | id[1];
257 		break;
258 	case 4:
259 		OF_getencprop(node, "ibm,opal-phbid", id, 4);
260 		sc->phb_id = id[0];
261 		break;
262 	default:
263 		device_printf(dev, "PHB ID property had wrong length (%zd)\n",
264 		    OF_getproplen(node, "ibm,opal-phbid"));
265 		return (ENXIO);
266 	}
267 
268 	if (bootverbose)
269 		device_printf(dev, "OPAL ID %#lx\n", sc->phb_id);
270 
271 	rid = 0;
272 	sc->r_reg = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
273 	    &rid, RF_ACTIVE | RF_SHAREABLE);
274 	if (sc->r_reg == NULL) {
275 		device_printf(dev, "Failed to allocate PHB[%jd] registers\n",
276 		    (uintmax_t)sc->phb_id);
277 		return (ENXIO);
278 	}
279 
280 #if 0
281 	/*
282 	 * Reset PCI IODA table
283 	 */
284 	err = opal_call(OPAL_PCI_RESET, sc->phb_id, OPAL_RESET_PCI_IODA_TABLE,
285 	    1);
286 	if (err != 0) {
287 		device_printf(dev, "IODA table reset failed: %d\n", err);
288 		return (ENXIO);
289 	}
290 	err = opal_call(OPAL_PCI_RESET, sc->phb_id, OPAL_RESET_PHB_COMPLETE,
291 	    1);
292 	if (err < 0) {
293 		device_printf(dev, "PHB reset failed: %d\n", err);
294 		return (ENXIO);
295 	}
296 	if (err > 0) {
297 		while ((err = opal_call(OPAL_PCI_POLL, sc->phb_id)) > 0) {
298 			DELAY(1000*(err + 1)); /* Returns expected delay in ms */
299 		}
300 	}
301 	if (err < 0) {
302 		device_printf(dev, "WARNING: PHB IODA reset poll failed: %d\n", err);
303 	}
304 	err = opal_call(OPAL_PCI_RESET, sc->phb_id, OPAL_RESET_PHB_COMPLETE,
305 	    0);
306 	if (err < 0) {
307 		device_printf(dev, "PHB reset failed: %d\n", err);
308 		return (ENXIO);
309 	}
310 	if (err > 0) {
311 		while ((err = opal_call(OPAL_PCI_POLL, sc->phb_id)) > 0) {
312 			DELAY(1000*(err + 1)); /* Returns expected delay in ms */
313 		}
314 	}
315 #endif
316 
317 	/*
318 	 * Map all devices on the bus to partitionable endpoint one until
319 	 * such time as we start wanting to do things like bhyve.
320 	 */
321 	err = opal_call(OPAL_PCI_SET_PE, sc->phb_id, OPAL_PCI_DEFAULT_PE,
322 	    0, OPAL_PCI_BUS_ANY, OPAL_IGNORE_RID_DEVICE_NUMBER,
323 	    OPAL_IGNORE_RID_FUNC_NUMBER, OPAL_MAP_PE);
324 	if (err != 0) {
325 		device_printf(dev, "PE mapping failed: %d\n", err);
326 		return (ENXIO);
327 	}
328 
329 	/*
330 	 * Turn on MMIO, mapped to PE 1
331 	 */
332 	if (OF_getencprop(node, "ibm,opal-num-pes", &npe, 4) != 4)
333 		npe = 1;
334 	for (i = 0; i < npe; i++) {
335 		err = opal_call(OPAL_PCI_MAP_PE_MMIO_WINDOW, sc->phb_id,
336 		    OPAL_PCI_DEFAULT_PE, OPAL_M32_WINDOW_TYPE, 0, i);
337 		if (err != 0)
338 			device_printf(dev, "MMIO %d map failed: %d\n", i, err);
339 	}
340 
341 	if (OF_getencprop(node, "ibm,opal-available-m64-ranges",
342 	    m64ranges, sizeof(m64ranges)) == sizeof(m64ranges))
343 		m64bar = m64ranges[0];
344 	else
345 	    m64bar = 0;
346 
347 	/* XXX: multiple M64 windows? */
348 	if (OF_getencprop(node, "ibm,opal-m64-window",
349 	    m64window, sizeof(m64window)) == sizeof(m64window)) {
350 		opal_call(OPAL_PCI_PHB_MMIO_ENABLE, sc->phb_id,
351 		    OPAL_M64_WINDOW_TYPE, m64bar, 0);
352 		opal_call(OPAL_PCI_SET_PHB_MEM_WINDOW, sc->phb_id,
353 		    OPAL_M64_WINDOW_TYPE, m64bar /* index */,
354 		    ((uint64_t)m64window[2] << 32) | m64window[3], 0,
355 		    ((uint64_t)m64window[4] << 32) | m64window[5]);
356 		opal_call(OPAL_PCI_MAP_PE_MMIO_WINDOW, sc->phb_id,
357 		    OPAL_PCI_DEFAULT_PE, OPAL_M64_WINDOW_TYPE,
358 		    m64bar /* index */, 0);
359 		opal_call(OPAL_PCI_PHB_MMIO_ENABLE, sc->phb_id,
360 		    OPAL_M64_WINDOW_TYPE, m64bar, OPAL_ENABLE_M64_NON_SPLIT);
361 	}
362 
363 	/*
364 	 * Enable IOMMU for PE1 - map everything 1:1 using
365 	 * segments of max_tce_size size
366 	 */
367 	tce_size = max_tce_size(dev);
368 	maxmem = roundup2(powerpc_ptob(Maxmem), tce_size);
369 	entries = round_pow2(maxmem / tce_size);
370 	tce_tbl_size = MAX(entries * sizeof(uint64_t), 4096);
371 	if (entries > OPAL_PCI_TCE_MAX_ENTRIES)
372 		panic("POWERNV supports only %jdGB of memory space\n",
373 		    (uintmax_t)((OPAL_PCI_TCE_MAX_ENTRIES * tce_size) >> 30));
374 	if (bootverbose)
375 		device_printf(dev, "Mapping 0-%#jx for DMA\n", (uintmax_t)maxmem);
376 	sc->tce = contigmalloc(tce_tbl_size,
377 	    M_DEVBUF, M_NOWAIT | M_ZERO, 0,
378 	    BUS_SPACE_MAXADDR, tce_tbl_size, 0);
379 	if (sc->tce == NULL)
380 		panic("Failed to allocate TCE memory for PHB %jd\n",
381 		    (uintmax_t)sc->phb_id);
382 
383 	for (i = 0; i < entries; i++)
384 		sc->tce[i] = htobe64((i * tce_size) | OPAL_PCI_TCE_R | OPAL_PCI_TCE_W);
385 
386 	/* Map TCE for every PE. It seems necessary for Power8 */
387 	for (i = 0; i < npe; i++) {
388 		err = opal_call(OPAL_PCI_MAP_PE_DMA_WINDOW, sc->phb_id,
389 		    i, (i << 1),
390 		    1, pmap_kextract((uint64_t)&sc->tce[0]),
391 		    tce_tbl_size, tce_size);
392 		if (err != 0) {
393 			device_printf(dev, "DMA IOMMU mapping failed: %d\n", err);
394 			return (ENXIO);
395 		}
396 
397 		err = opal_call(OPAL_PCI_MAP_PE_DMA_WINDOW_REAL, sc->phb_id,
398 		    i, (i << 1) + 1,
399 		    (1UL << 59), maxmem);
400 		if (err != 0) {
401 			device_printf(dev, "DMA 64b bypass mapping failed: %d\n", err);
402 			return (ENXIO);
403 		}
404 	}
405 
406 	/*
407 	 * Invalidate all previous TCE entries.
408 	 */
409 	if (ofw_bus_is_compatible(dev, "power8-pciex"))
410 		pci_phb3_tce_invalidate_entire(sc);
411 	else
412 		opal_call(OPAL_PCI_TCE_KILL, sc->phb_id, OPAL_PCI_TCE_KILL_ALL,
413 		    OPAL_PCI_DEFAULT_PE, 0, 0, 0);
414 
415 	/*
416 	 * Get MSI properties
417 	 */
418 	sc->msi_vmem = NULL;
419 	if (OF_getproplen(node, "ibm,opal-msi-ranges") > 0) {
420 		cell_t msi_ranges[2];
421 		OF_getencprop(node, "ibm,opal-msi-ranges",
422 		    msi_ranges, sizeof(msi_ranges));
423 		sc->msi_base = msi_ranges[0];
424 
425 		sc->msi_vmem = vmem_create("OPAL MSI", msi_ranges[0],
426 		    msi_ranges[1], 1, 0, M_BESTFIT | M_WAITOK);
427 
428 		sc->base_msi_irq = powerpc_register_pic(dev,
429 		    OF_xref_from_node(node),
430 		    msi_ranges[0] + msi_ranges[1], 0, FALSE);
431 
432 		if (bootverbose)
433 			device_printf(dev, "Supports %d MSIs starting at %d\n",
434 			    msi_ranges[1], msi_ranges[0]);
435 	}
436 
437 	/* Create the parent DMA tag */
438 	/*
439 	 * Constrain it to POWER8 PHB (ioda2) for now.  It seems to mess up on
440 	 * POWER9 systems.
441 	 */
442 	if (ofw_bus_is_compatible(dev, "ibm,ioda2-phb")) {
443 		err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
444 		    1, 0,				/* alignment, bounds */
445 		    OPAL_PCI_BUS_SPACE_LOWADDR_32BIT,	/* lowaddr */
446 		    BUS_SPACE_MAXADDR_32BIT,		/* highaddr */
447 		    NULL, NULL,				/* filter, filterarg */
448 		    BUS_SPACE_MAXSIZE,			/* maxsize */
449 		    BUS_SPACE_UNRESTRICTED,		/* nsegments */
450 		    BUS_SPACE_MAXSIZE,			/* maxsegsize */
451 		    0,					/* flags */
452 		    NULL, NULL,				/* lockfunc, lockarg */
453 		    &sc->ofw_sc.sc_dmat);
454 		if (err != 0) {
455 			device_printf(dev, "Failed to create DMA tag\n");
456 			return (err);
457 		}
458 	}
459 
460 	/*
461 	 * General OFW PCI attach
462 	 */
463 	err = ofw_pcib_init(dev);
464 	if (err != 0)
465 		return (err);
466 
467 	/*
468 	 * Unfreeze non-config-space PCI operations. Let this fail silently
469 	 * if e.g. there is no current freeze.
470 	 */
471 	opal_call(OPAL_PCI_EEH_FREEZE_CLEAR, sc->phb_id, OPAL_PCI_DEFAULT_PE,
472 	    OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
473 
474 	/*
475 	 * OPAL stores 64-bit BARs in a special property rather than "ranges"
476 	 */
477 	if (OF_getencprop(node, "ibm,opal-m64-window",
478 	    m64window, sizeof(m64window)) == sizeof(m64window)) {
479 		struct ofw_pci_range *rp;
480 
481 		sc->ofw_sc.sc_nrange++;
482 		sc->ofw_sc.sc_range = realloc(sc->ofw_sc.sc_range,
483 		    sc->ofw_sc.sc_nrange * sizeof(sc->ofw_sc.sc_range[0]),
484 		    M_DEVBUF, M_WAITOK);
485 		rp = &sc->ofw_sc.sc_range[sc->ofw_sc.sc_nrange-1];
486 		rp->pci_hi = OFW_PCI_PHYS_HI_SPACE_MEM64 |
487 		    OFW_PCI_PHYS_HI_PREFETCHABLE;
488 		rp->pci = ((uint64_t)m64window[0] << 32) | m64window[1];
489 		rp->host = ((uint64_t)m64window[2] << 32) | m64window[3];
490 		rp->size = ((uint64_t)m64window[4] << 32) | m64window[5];
491 		rman_manage_region(&sc->ofw_sc.sc_mem_rman, rp->pci,
492 		   rp->pci + rp->size - 1);
493 	}
494 
495 	return (ofw_pcib_attach(dev));
496 }
497 
498 static uint32_t
499 opalpci_read_config(device_t dev, u_int bus, u_int slot, u_int func, u_int reg,
500     int width)
501 {
502 	struct opalpci_softc *sc;
503 	uint64_t config_addr;
504 	uint8_t byte, eeh_state;
505 	uint16_t half;
506 	uint32_t word;
507 	int error;
508 	uint16_t err_type;
509 
510 	sc = device_get_softc(dev);
511 
512 	config_addr = (bus << 8) | ((slot & 0x1f) << 3) | (func & 0x7);
513 
514 	switch (width) {
515 	case 1:
516 		error = opal_call(OPAL_PCI_CONFIG_READ_BYTE, sc->phb_id,
517 		    config_addr, reg, vtophys(&byte));
518 		word = byte;
519 		break;
520 	case 2:
521 		error = opal_call(OPAL_PCI_CONFIG_READ_HALF_WORD, sc->phb_id,
522 		    config_addr, reg, vtophys(&half));
523 		word = be16toh(half);
524 		break;
525 	case 4:
526 		error = opal_call(OPAL_PCI_CONFIG_READ_WORD, sc->phb_id,
527 		    config_addr, reg, vtophys(&word));
528 		word = be32toh(word);
529 		break;
530 	default:
531 		error = OPAL_SUCCESS;
532 		word = 0xffffffff;
533 		width = 4;
534 	}
535 
536 	/*
537 	 * Poking config state for non-existant devices can make
538 	 * the host bridge hang up. Clear any errors.
539 	 */
540 
541 	if (error != OPAL_SUCCESS ||
542 	    (word == ((1UL << (8 * width)) - 1))) {
543 		if (error != OPAL_HARDWARE) {
544 			opal_call(OPAL_PCI_EEH_FREEZE_STATUS, sc->phb_id,
545 			    OPAL_PCI_DEFAULT_PE, vtophys(&eeh_state),
546 			    vtophys(&err_type), NULL);
547 			err_type = be16toh(err_type); /* XXX unused */
548 			if (eeh_state != OPAL_EEH_STOPPED_NOT_FROZEN)
549 				opal_call(OPAL_PCI_EEH_FREEZE_CLEAR,
550 				    sc->phb_id, OPAL_PCI_DEFAULT_PE,
551 				    OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
552 		}
553 		if (error != OPAL_SUCCESS)
554 			word = 0xffffffff;
555 	}
556 
557 	return (word);
558 }
559 
560 static void
561 opalpci_write_config(device_t dev, u_int bus, u_int slot, u_int func,
562     u_int reg, uint32_t val, int width)
563 {
564 	struct opalpci_softc *sc;
565 	uint64_t config_addr;
566 	int error = OPAL_SUCCESS;
567 
568 	sc = device_get_softc(dev);
569 
570 	config_addr = (bus << 8) | ((slot & 0x1f) << 3) | (func & 0x7);
571 
572 	switch (width) {
573 	case 1:
574 		error = opal_call(OPAL_PCI_CONFIG_WRITE_BYTE, sc->phb_id,
575 		    config_addr, reg, val);
576 		break;
577 	case 2:
578 		error = opal_call(OPAL_PCI_CONFIG_WRITE_HALF_WORD, sc->phb_id,
579 		    config_addr, reg, val);
580 		break;
581 	case 4:
582 		error = opal_call(OPAL_PCI_CONFIG_WRITE_WORD, sc->phb_id,
583 		    config_addr, reg, val);
584 		break;
585 	}
586 
587 	if (error != OPAL_SUCCESS) {
588 		/*
589 		 * Poking config state for non-existant devices can make
590 		 * the host bridge hang up. Clear any errors.
591 		 */
592 		if (error != OPAL_HARDWARE) {
593 			opal_call(OPAL_PCI_EEH_FREEZE_CLEAR,
594 			    sc->phb_id, OPAL_PCI_DEFAULT_PE,
595 			    OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
596 		}
597 	}
598 }
599 
600 static int
601 opalpci_route_interrupt(device_t bus, device_t dev, int pin)
602 {
603 
604 	return (pin);
605 }
606 
607 static int
608 opalpci_alloc_msi(device_t dev, device_t child, int count, int maxcount,
609     int *irqs)
610 {
611 	struct opalpci_softc *sc;
612 	vmem_addr_t start;
613 	phandle_t xref;
614 	int err, i;
615 
616 	sc = device_get_softc(dev);
617 	if (sc->msi_vmem == NULL)
618 		return (ENODEV);
619 
620 	err = vmem_xalloc(sc->msi_vmem, count, powerof2(count), 0, 0,
621 	    VMEM_ADDR_MIN, VMEM_ADDR_MAX, M_BESTFIT | M_WAITOK, &start);
622 
623 	if (err)
624 		return (err);
625 
626 	xref = OF_xref_from_node(ofw_bus_get_node(dev));
627 	for (i = 0; i < count; i++)
628 		irqs[i] = MAP_IRQ(xref, start + i);
629 
630 	return (0);
631 }
632 
633 static int
634 opalpci_release_msi(device_t dev, device_t child, int count, int *irqs)
635 {
636 	struct opalpci_softc *sc;
637 
638 	sc = device_get_softc(dev);
639 	if (sc->msi_vmem == NULL)
640 		return (ENODEV);
641 
642 	vmem_xfree(sc->msi_vmem, irqs[0] - sc->base_msi_irq, count);
643 	return (0);
644 }
645 
646 static int
647 opalpci_alloc_msix(device_t dev, device_t child, int *irq)
648 {
649 	return (opalpci_alloc_msi(dev, child, 1, 1, irq));
650 }
651 
652 static int
653 opalpci_release_msix(device_t dev, device_t child, int irq)
654 {
655 	return (opalpci_release_msi(dev, child, 1, &irq));
656 }
657 
658 static int
659 opalpci_map_msi(device_t dev, device_t child, int irq, uint64_t *addr,
660     uint32_t *data)
661 {
662 	struct opalpci_softc *sc;
663 	struct pci_devinfo *dinfo;
664 	int err, xive;
665 
666 	sc = device_get_softc(dev);
667 	if (sc->msi_vmem == NULL)
668 		return (ENODEV);
669 
670 	xive = irq - sc->base_msi_irq - sc->msi_base;
671 	opal_call(OPAL_PCI_SET_XIVE_PE, sc->phb_id, OPAL_PCI_DEFAULT_PE, xive);
672 
673 	dinfo = device_get_ivars(child);
674 	if (dinfo->cfg.msi.msi_alloc > 0 &&
675 	    (dinfo->cfg.msi.msi_ctrl & PCIM_MSICTRL_64BIT) == 0) {
676 		uint32_t msi32;
677 		err = opal_call(OPAL_GET_MSI_32, sc->phb_id,
678 		    OPAL_PCI_DEFAULT_PE, xive, 1, vtophys(&msi32),
679 		    vtophys(data));
680 		*addr = be32toh(msi32);
681 	} else {
682 		err = opal_call(OPAL_GET_MSI_64, sc->phb_id,
683 		    OPAL_PCI_DEFAULT_PE, xive, 1, vtophys(addr), vtophys(data));
684 		*addr = be64toh(*addr);
685 	}
686 	*data = be32toh(*data);
687 
688 	if (bootverbose && err != 0)
689 		device_printf(child, "OPAL MSI mapping error: %d\n", err);
690 
691 	return ((err == 0) ? 0 : ENXIO);
692 }
693 
694 static void
695 opalpic_pic_enable(device_t dev, u_int irq, u_int vector, void **priv)
696 {
697 	struct opalpci_softc *sc = device_get_softc(dev);
698 
699 	PIC_ENABLE(root_pic, irq, vector, priv);
700 	opal_call(OPAL_PCI_MSI_EOI, sc->phb_id, irq, priv);
701 }
702 
703 static void opalpic_pic_eoi(device_t dev, u_int irq, void *priv)
704 {
705 	struct opalpci_softc *sc;
706 
707 	sc = device_get_softc(dev);
708 	opal_call(OPAL_PCI_MSI_EOI, sc->phb_id, irq);
709 
710 	PIC_EOI(root_pic, irq, priv);
711 }
712 
713 static bus_dma_tag_t
714 opalpci_get_dma_tag(device_t dev, device_t child)
715 {
716 	struct opalpci_softc *sc;
717 
718 	sc = device_get_softc(dev);
719 	return (sc->ofw_sc.sc_dmat);
720 }
721