xref: /freebsd/sys/dev/pci/pci.c (revision a907c6914c5879870b2597a63253cea0a5b7bdb8)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/limits.h>
39 #include <sys/linker.h>
40 #include <sys/fcntl.h>
41 #include <sys/conf.h>
42 #include <sys/kernel.h>
43 #include <sys/queue.h>
44 #include <sys/sysctl.h>
45 #include <sys/endian.h>
46 
47 #include <vm/vm.h>
48 #include <vm/pmap.h>
49 #include <vm/vm_extern.h>
50 
51 #include <sys/bus.h>
52 #include <machine/bus.h>
53 #include <sys/rman.h>
54 #include <machine/resource.h>
55 #include <machine/stdarg.h>
56 
57 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
58 #include <machine/intr_machdep.h>
59 #endif
60 
61 #include <sys/pciio.h>
62 #include <dev/pci/pcireg.h>
63 #include <dev/pci/pcivar.h>
64 #include <dev/pci/pci_private.h>
65 
66 #ifdef PCI_IOV
67 #include <sys/nv.h>
68 #include <dev/pci/pci_iov_private.h>
69 #endif
70 
71 #include <dev/usb/controller/xhcireg.h>
72 #include <dev/usb/controller/ehcireg.h>
73 #include <dev/usb/controller/ohcireg.h>
74 #include <dev/usb/controller/uhcireg.h>
75 
76 #include "pcib_if.h"
77 #include "pci_if.h"
78 
79 #define	PCIR_IS_BIOS(cfg, reg)						\
80 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
81 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
82 
83 static int		pci_has_quirk(uint32_t devid, int quirk);
84 static pci_addr_t	pci_mapbase(uint64_t mapreg);
85 static const char	*pci_maptype(uint64_t mapreg);
86 static int		pci_maprange(uint64_t mapreg);
87 static pci_addr_t	pci_rombase(uint64_t mapreg);
88 static int		pci_romsize(uint64_t testval);
89 static void		pci_fixancient(pcicfgregs *cfg);
90 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
91 
92 static int		pci_porten(device_t dev);
93 static int		pci_memen(device_t dev);
94 static void		pci_assign_interrupt(device_t bus, device_t dev,
95 			    int force_route);
96 static int		pci_add_map(device_t bus, device_t dev, int reg,
97 			    struct resource_list *rl, int force, int prefetch);
98 static int		pci_probe(device_t dev);
99 static int		pci_attach(device_t dev);
100 #ifdef PCI_RES_BUS
101 static int		pci_detach(device_t dev);
102 #endif
103 static void		pci_load_vendor_data(void);
104 static int		pci_describe_parse_line(char **ptr, int *vendor,
105 			    int *device, char **desc);
106 static char		*pci_describe_device(device_t dev);
107 static int		pci_modevent(module_t mod, int what, void *arg);
108 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
109 			    pcicfgregs *cfg);
110 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
111 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
112 			    int reg, uint32_t *data);
113 #if 0
114 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
115 			    int reg, uint32_t data);
116 #endif
117 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
118 static void		pci_mask_msix(device_t dev, u_int index);
119 static void		pci_unmask_msix(device_t dev, u_int index);
120 static int		pci_msi_blacklisted(void);
121 static int		pci_msix_blacklisted(void);
122 static void		pci_resume_msi(device_t dev);
123 static void		pci_resume_msix(device_t dev);
124 static int		pci_remap_intr_method(device_t bus, device_t dev,
125 			    u_int irq);
126 
127 static uint16_t		pci_get_rid_method(device_t dev, device_t child);
128 
129 static struct pci_devinfo * pci_fill_devinfo(device_t pcib, device_t bus, int d,
130     int b, int s, int f, uint16_t vid, uint16_t did);
131 
132 static device_method_t pci_methods[] = {
133 	/* Device interface */
134 	DEVMETHOD(device_probe,		pci_probe),
135 	DEVMETHOD(device_attach,	pci_attach),
136 #ifdef PCI_RES_BUS
137 	DEVMETHOD(device_detach,	pci_detach),
138 #else
139 	DEVMETHOD(device_detach,	bus_generic_detach),
140 #endif
141 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
142 	DEVMETHOD(device_suspend,	bus_generic_suspend),
143 	DEVMETHOD(device_resume,	pci_resume),
144 
145 	/* Bus interface */
146 	DEVMETHOD(bus_print_child,	pci_print_child),
147 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
148 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
149 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
150 	DEVMETHOD(bus_driver_added,	pci_driver_added),
151 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
152 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
153 
154 	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
155 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
156 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
157 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
158 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
159 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
160 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
161 	DEVMETHOD(bus_release_resource,	pci_release_resource),
162 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
163 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
164 	DEVMETHOD(bus_child_deleted,	pci_child_deleted),
165 	DEVMETHOD(bus_child_detached,	pci_child_detached),
166 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
167 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
168 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
169 	DEVMETHOD(bus_suspend_child,	pci_suspend_child),
170 	DEVMETHOD(bus_resume_child,	pci_resume_child),
171 
172 	/* PCI interface */
173 	DEVMETHOD(pci_read_config,	pci_read_config_method),
174 	DEVMETHOD(pci_write_config,	pci_write_config_method),
175 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
176 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
177 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
178 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
179 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
180 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
181 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
182 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
183 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
184 	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
185 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
186 	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
187 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
188 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
189 	DEVMETHOD(pci_enable_msi,	pci_enable_msi_method),
190 	DEVMETHOD(pci_enable_msix,	pci_enable_msix_method),
191 	DEVMETHOD(pci_disable_msi,	pci_disable_msi_method),
192 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
193 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
194 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
195 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
196 	DEVMETHOD(pci_msix_pba_bar,	pci_msix_pba_bar_method),
197 	DEVMETHOD(pci_msix_table_bar,	pci_msix_table_bar_method),
198 	DEVMETHOD(pci_get_rid,		pci_get_rid_method),
199 	DEVMETHOD(pci_alloc_devinfo,	pci_alloc_devinfo_method),
200 	DEVMETHOD(pci_child_added,	pci_child_added_method),
201 #ifdef PCI_IOV
202 	DEVMETHOD(pci_iov_attach,	pci_iov_attach_method),
203 	DEVMETHOD(pci_iov_detach,	pci_iov_detach_method),
204 	DEVMETHOD(pci_create_iov_child,	pci_create_iov_child_method),
205 #endif
206 
207 	DEVMETHOD_END
208 };
209 
210 DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
211 
212 static devclass_t pci_devclass;
213 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
214 MODULE_VERSION(pci, 1);
215 
216 static char	*pci_vendordata;
217 static size_t	pci_vendordata_size;
218 
219 struct pci_quirk {
220 	uint32_t devid;	/* Vendor/device of the card */
221 	int	type;
222 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
223 #define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
224 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
225 #define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
226 #define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
227 #define	PCI_QUIRK_MSI_INTX_BUG	6 /* PCIM_CMD_INTxDIS disables MSI */
228 	int	arg1;
229 	int	arg2;
230 };
231 
232 static const struct pci_quirk pci_quirks[] = {
233 	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
234 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
235 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
236 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
237 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
238 
239 	/*
240 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
241 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
242 	 */
243 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
244 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
245 
246 	/*
247 	 * MSI doesn't work on earlier Intel chipsets including
248 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
249 	 */
250 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
251 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
252 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
253 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
254 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
255 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
256 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
257 
258 	/*
259 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
260 	 * bridge.
261 	 */
262 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
263 
264 	/*
265 	 * MSI-X allocation doesn't work properly for devices passed through
266 	 * by VMware up to at least ESXi 5.1.
267 	 */
268 	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
269 	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
270 
271 	/*
272 	 * Some virtualization environments emulate an older chipset
273 	 * but support MSI just fine.  QEMU uses the Intel 82440.
274 	 */
275 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
276 
277 	/*
278 	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
279 	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
280 	 * It prevents us from attaching hpet(4) when the bit is unset.
281 	 * Note this quirk only affects SB600 revision A13 and earlier.
282 	 * For SB600 A21 and later, firmware must set the bit to hide it.
283 	 * For SB700 and later, it is unused and hardcoded to zero.
284 	 */
285 	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
286 
287 	/*
288 	 * Atheros AR8161/AR8162/E2200 Ethernet controllers have a bug that
289 	 * MSI interrupt does not assert if PCIM_CMD_INTxDIS bit of the
290 	 * command register is set.
291 	 */
292 	{ 0x10911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
293 	{ 0xE0911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
294 	{ 0x10901969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
295 
296 	/*
297 	 * Broadcom BCM5714(S)/BCM5715(S)/BCM5780(S) Ethernet MACs don't
298 	 * issue MSI interrupts with PCIM_CMD_INTxDIS set either.
299 	 */
300 	{ 0x166814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714 */
301 	{ 0x166914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714S */
302 	{ 0x166a14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780 */
303 	{ 0x166b14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780S */
304 	{ 0x167814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715 */
305 	{ 0x167914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715S */
306 
307 	{ 0 }
308 };
309 
310 /* map register information */
311 #define	PCI_MAPMEM	0x01	/* memory map */
312 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
313 #define	PCI_MAPPORT	0x04	/* port map */
314 
315 struct devlist pci_devq;
316 uint32_t pci_generation;
317 uint32_t pci_numdevs = 0;
318 static int pcie_chipset, pcix_chipset;
319 
320 /* sysctl vars */
321 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
322 
323 static int pci_enable_io_modes = 1;
324 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RWTUN,
325     &pci_enable_io_modes, 1,
326     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
327 enable these bits correctly.  We'd like to do this all the time, but there\n\
328 are some peripherals that this causes problems with.");
329 
330 static int pci_do_realloc_bars = 0;
331 SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RWTUN,
332     &pci_do_realloc_bars, 0,
333     "Attempt to allocate a new range for any BARs whose original "
334     "firmware-assigned ranges fail to allocate during the initial device scan.");
335 
336 static int pci_do_power_nodriver = 0;
337 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RWTUN,
338     &pci_do_power_nodriver, 0,
339   "Place a function into D3 state when no driver attaches to it.  0 means\n\
340 disable.  1 means conservatively place devices into D3 state.  2 means\n\
341 agressively place devices into D3 state.  3 means put absolutely everything\n\
342 in D3 state.");
343 
344 int pci_do_power_resume = 1;
345 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RWTUN,
346     &pci_do_power_resume, 1,
347   "Transition from D3 -> D0 on resume.");
348 
349 int pci_do_power_suspend = 1;
350 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RWTUN,
351     &pci_do_power_suspend, 1,
352   "Transition from D0 -> D3 on suspend.");
353 
354 static int pci_do_msi = 1;
355 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RWTUN, &pci_do_msi, 1,
356     "Enable support for MSI interrupts");
357 
358 static int pci_do_msix = 1;
359 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RWTUN, &pci_do_msix, 1,
360     "Enable support for MSI-X interrupts");
361 
362 static int pci_honor_msi_blacklist = 1;
363 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RDTUN,
364     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
365 
366 #if defined(__i386__) || defined(__amd64__)
367 static int pci_usb_takeover = 1;
368 #else
369 static int pci_usb_takeover = 0;
370 #endif
371 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
372     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
373 Disable this if you depend on BIOS emulation of USB devices, that is\n\
374 you use USB devices (like keyboard or mouse) but do not load USB drivers");
375 
376 static int pci_clear_bars;
377 SYSCTL_INT(_hw_pci, OID_AUTO, clear_bars, CTLFLAG_RDTUN, &pci_clear_bars, 0,
378     "Ignore firmware-assigned resources for BARs.");
379 
380 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
381 static int pci_clear_buses;
382 SYSCTL_INT(_hw_pci, OID_AUTO, clear_buses, CTLFLAG_RDTUN, &pci_clear_buses, 0,
383     "Ignore firmware-assigned bus numbers.");
384 #endif
385 
386 static int pci_enable_ari = 1;
387 SYSCTL_INT(_hw_pci, OID_AUTO, enable_ari, CTLFLAG_RDTUN, &pci_enable_ari,
388     0, "Enable support for PCIe Alternative RID Interpretation");
389 
390 static int
391 pci_has_quirk(uint32_t devid, int quirk)
392 {
393 	const struct pci_quirk *q;
394 
395 	for (q = &pci_quirks[0]; q->devid; q++) {
396 		if (q->devid == devid && q->type == quirk)
397 			return (1);
398 	}
399 	return (0);
400 }
401 
402 /* Find a device_t by bus/slot/function in domain 0 */
403 
404 device_t
405 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
406 {
407 
408 	return (pci_find_dbsf(0, bus, slot, func));
409 }
410 
411 /* Find a device_t by domain/bus/slot/function */
412 
413 device_t
414 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
415 {
416 	struct pci_devinfo *dinfo;
417 
418 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
419 		if ((dinfo->cfg.domain == domain) &&
420 		    (dinfo->cfg.bus == bus) &&
421 		    (dinfo->cfg.slot == slot) &&
422 		    (dinfo->cfg.func == func)) {
423 			return (dinfo->cfg.dev);
424 		}
425 	}
426 
427 	return (NULL);
428 }
429 
430 /* Find a device_t by vendor/device ID */
431 
432 device_t
433 pci_find_device(uint16_t vendor, uint16_t device)
434 {
435 	struct pci_devinfo *dinfo;
436 
437 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
438 		if ((dinfo->cfg.vendor == vendor) &&
439 		    (dinfo->cfg.device == device)) {
440 			return (dinfo->cfg.dev);
441 		}
442 	}
443 
444 	return (NULL);
445 }
446 
447 device_t
448 pci_find_class(uint8_t class, uint8_t subclass)
449 {
450 	struct pci_devinfo *dinfo;
451 
452 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
453 		if (dinfo->cfg.baseclass == class &&
454 		    dinfo->cfg.subclass == subclass) {
455 			return (dinfo->cfg.dev);
456 		}
457 	}
458 
459 	return (NULL);
460 }
461 
462 static int
463 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
464 {
465 	va_list ap;
466 	int retval;
467 
468 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
469 	    cfg->func);
470 	va_start(ap, fmt);
471 	retval += vprintf(fmt, ap);
472 	va_end(ap);
473 	return (retval);
474 }
475 
476 /* return base address of memory or port map */
477 
478 static pci_addr_t
479 pci_mapbase(uint64_t mapreg)
480 {
481 
482 	if (PCI_BAR_MEM(mapreg))
483 		return (mapreg & PCIM_BAR_MEM_BASE);
484 	else
485 		return (mapreg & PCIM_BAR_IO_BASE);
486 }
487 
488 /* return map type of memory or port map */
489 
490 static const char *
491 pci_maptype(uint64_t mapreg)
492 {
493 
494 	if (PCI_BAR_IO(mapreg))
495 		return ("I/O Port");
496 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
497 		return ("Prefetchable Memory");
498 	return ("Memory");
499 }
500 
501 /* return log2 of map size decoded for memory or port map */
502 
503 int
504 pci_mapsize(uint64_t testval)
505 {
506 	int ln2size;
507 
508 	testval = pci_mapbase(testval);
509 	ln2size = 0;
510 	if (testval != 0) {
511 		while ((testval & 1) == 0)
512 		{
513 			ln2size++;
514 			testval >>= 1;
515 		}
516 	}
517 	return (ln2size);
518 }
519 
520 /* return base address of device ROM */
521 
522 static pci_addr_t
523 pci_rombase(uint64_t mapreg)
524 {
525 
526 	return (mapreg & PCIM_BIOS_ADDR_MASK);
527 }
528 
529 /* return log2 of map size decided for device ROM */
530 
531 static int
532 pci_romsize(uint64_t testval)
533 {
534 	int ln2size;
535 
536 	testval = pci_rombase(testval);
537 	ln2size = 0;
538 	if (testval != 0) {
539 		while ((testval & 1) == 0)
540 		{
541 			ln2size++;
542 			testval >>= 1;
543 		}
544 	}
545 	return (ln2size);
546 }
547 
548 /* return log2 of address range supported by map register */
549 
550 static int
551 pci_maprange(uint64_t mapreg)
552 {
553 	int ln2range = 0;
554 
555 	if (PCI_BAR_IO(mapreg))
556 		ln2range = 32;
557 	else
558 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
559 		case PCIM_BAR_MEM_32:
560 			ln2range = 32;
561 			break;
562 		case PCIM_BAR_MEM_1MB:
563 			ln2range = 20;
564 			break;
565 		case PCIM_BAR_MEM_64:
566 			ln2range = 64;
567 			break;
568 		}
569 	return (ln2range);
570 }
571 
572 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
573 
574 static void
575 pci_fixancient(pcicfgregs *cfg)
576 {
577 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
578 		return;
579 
580 	/* PCI to PCI bridges use header type 1 */
581 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
582 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
583 }
584 
585 /* extract header type specific config data */
586 
587 static void
588 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
589 {
590 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
591 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
592 	case PCIM_HDRTYPE_NORMAL:
593 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
594 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
595 		cfg->mingnt         = REG(PCIR_MINGNT, 1);
596 		cfg->maxlat         = REG(PCIR_MAXLAT, 1);
597 		cfg->nummaps	    = PCI_MAXMAPS_0;
598 		break;
599 	case PCIM_HDRTYPE_BRIDGE:
600 		cfg->bridge.br_seclat = REG(PCIR_SECLAT_1, 1);
601 		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_1, 1);
602 		cfg->bridge.br_secbus = REG(PCIR_SECBUS_1, 1);
603 		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_1, 1);
604 		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_1, 2);
605 		cfg->nummaps	    = PCI_MAXMAPS_1;
606 		break;
607 	case PCIM_HDRTYPE_CARDBUS:
608 		cfg->bridge.br_seclat = REG(PCIR_SECLAT_2, 1);
609 		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_2, 1);
610 		cfg->bridge.br_secbus = REG(PCIR_SECBUS_2, 1);
611 		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_2, 1);
612 		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_2, 2);
613 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
614 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
615 		cfg->nummaps	    = PCI_MAXMAPS_2;
616 		break;
617 	}
618 #undef REG
619 }
620 
621 /* read configuration header into pcicfgregs structure */
622 struct pci_devinfo *
623 pci_read_device(device_t pcib, device_t bus, int d, int b, int s, int f)
624 {
625 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
626 	uint16_t vid, did;
627 
628 	vid = REG(PCIR_VENDOR, 2);
629 	did = REG(PCIR_DEVICE, 2);
630 	if (vid != 0xffff)
631 		return (pci_fill_devinfo(pcib, bus, d, b, s, f, vid, did));
632 
633 	return (NULL);
634 }
635 
636 struct pci_devinfo *
637 pci_alloc_devinfo_method(device_t dev)
638 {
639 
640 	return (malloc(sizeof(struct pci_devinfo), M_DEVBUF,
641 	    M_WAITOK | M_ZERO));
642 }
643 
644 static struct pci_devinfo *
645 pci_fill_devinfo(device_t pcib, device_t bus, int d, int b, int s, int f,
646     uint16_t vid, uint16_t did)
647 {
648 	struct pci_devinfo *devlist_entry;
649 	pcicfgregs *cfg;
650 
651 	devlist_entry = PCI_ALLOC_DEVINFO(bus);
652 
653 	cfg = &devlist_entry->cfg;
654 
655 	cfg->domain		= d;
656 	cfg->bus		= b;
657 	cfg->slot		= s;
658 	cfg->func		= f;
659 	cfg->vendor		= vid;
660 	cfg->device		= did;
661 	cfg->cmdreg		= REG(PCIR_COMMAND, 2);
662 	cfg->statreg		= REG(PCIR_STATUS, 2);
663 	cfg->baseclass		= REG(PCIR_CLASS, 1);
664 	cfg->subclass		= REG(PCIR_SUBCLASS, 1);
665 	cfg->progif		= REG(PCIR_PROGIF, 1);
666 	cfg->revid		= REG(PCIR_REVID, 1);
667 	cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
668 	cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
669 	cfg->lattimer		= REG(PCIR_LATTIMER, 1);
670 	cfg->intpin		= REG(PCIR_INTPIN, 1);
671 	cfg->intline		= REG(PCIR_INTLINE, 1);
672 
673 	cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
674 	cfg->hdrtype		&= ~PCIM_MFDEV;
675 	STAILQ_INIT(&cfg->maps);
676 
677 	cfg->iov		= NULL;
678 
679 	pci_fixancient(cfg);
680 	pci_hdrtypedata(pcib, b, s, f, cfg);
681 
682 	if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
683 		pci_read_cap(pcib, cfg);
684 
685 	STAILQ_INSERT_TAIL(&pci_devq, devlist_entry, pci_links);
686 
687 	devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
688 	devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
689 	devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
690 	devlist_entry->conf.pc_sel.pc_func = cfg->func;
691 	devlist_entry->conf.pc_hdr = cfg->hdrtype;
692 
693 	devlist_entry->conf.pc_subvendor = cfg->subvendor;
694 	devlist_entry->conf.pc_subdevice = cfg->subdevice;
695 	devlist_entry->conf.pc_vendor = cfg->vendor;
696 	devlist_entry->conf.pc_device = cfg->device;
697 
698 	devlist_entry->conf.pc_class = cfg->baseclass;
699 	devlist_entry->conf.pc_subclass = cfg->subclass;
700 	devlist_entry->conf.pc_progif = cfg->progif;
701 	devlist_entry->conf.pc_revid = cfg->revid;
702 
703 	pci_numdevs++;
704 	pci_generation++;
705 
706 	return (devlist_entry);
707 }
708 #undef REG
709 
710 static void
711 pci_ea_fill_info(device_t pcib, pcicfgregs *cfg)
712 {
713 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, \
714     cfg->ea.ea_location + (n), w)
715 	int num_ent;
716 	int ptr;
717 	int a, b;
718 	uint32_t val;
719 	int ent_size;
720 	uint32_t dw[4];
721 	uint64_t base, max_offset;
722 	struct pci_ea_entry *eae;
723 
724 	if (cfg->ea.ea_location == 0)
725 		return;
726 
727 	STAILQ_INIT(&cfg->ea.ea_entries);
728 
729 	/* Determine the number of entries */
730 	num_ent = REG(PCIR_EA_NUM_ENT, 2);
731 	num_ent &= PCIM_EA_NUM_ENT_MASK;
732 
733 	/* Find the first entry to care of */
734 	ptr = PCIR_EA_FIRST_ENT;
735 
736 	/* Skip DWORD 2 for type 1 functions */
737 	if ((cfg->hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_BRIDGE)
738 		ptr += 4;
739 
740 	for (a = 0; a < num_ent; a++) {
741 
742 		eae = malloc(sizeof(*eae), M_DEVBUF, M_WAITOK | M_ZERO);
743 		eae->eae_cfg_offset = cfg->ea.ea_location + ptr;
744 
745 		/* Read a number of dwords in the entry */
746 		val = REG(ptr, 4);
747 		ptr += 4;
748 		ent_size = (val & PCIM_EA_ES);
749 
750 		for (b = 0; b < ent_size; b++) {
751 			dw[b] = REG(ptr, 4);
752 			ptr += 4;
753 		}
754 
755 		eae->eae_flags = val;
756 		eae->eae_bei = (PCIM_EA_BEI & val) >> PCIM_EA_BEI_OFFSET;
757 
758 		base = dw[0] & PCIM_EA_FIELD_MASK;
759 		max_offset = dw[1] | ~PCIM_EA_FIELD_MASK;
760 		b = 2;
761 		if (((dw[0] & PCIM_EA_IS_64) != 0) && (b < ent_size)) {
762 			base |= (uint64_t)dw[b] << 32UL;
763 			b++;
764 		}
765 		if (((dw[1] & PCIM_EA_IS_64) != 0)
766 		    && (b < ent_size)) {
767 			max_offset |= (uint64_t)dw[b] << 32UL;
768 			b++;
769 		}
770 
771 		eae->eae_base = base;
772 		eae->eae_max_offset = max_offset;
773 
774 		STAILQ_INSERT_TAIL(&cfg->ea.ea_entries, eae, eae_link);
775 
776 		if (bootverbose) {
777 			printf("PCI(EA) dev %04x:%04x, bei %d, flags #%x, base #%jx, max_offset #%jx\n",
778 			    cfg->vendor, cfg->device, eae->eae_bei, eae->eae_flags,
779 			    (uintmax_t)eae->eae_base, (uintmax_t)eae->eae_max_offset);
780 		}
781 	}
782 }
783 #undef REG
784 
785 static void
786 pci_read_cap(device_t pcib, pcicfgregs *cfg)
787 {
788 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
789 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
790 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
791 	uint64_t addr;
792 #endif
793 	uint32_t val;
794 	int	ptr, nextptr, ptrptr;
795 
796 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
797 	case PCIM_HDRTYPE_NORMAL:
798 	case PCIM_HDRTYPE_BRIDGE:
799 		ptrptr = PCIR_CAP_PTR;
800 		break;
801 	case PCIM_HDRTYPE_CARDBUS:
802 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
803 		break;
804 	default:
805 		return;		/* no extended capabilities support */
806 	}
807 	nextptr = REG(ptrptr, 1);	/* sanity check? */
808 
809 	/*
810 	 * Read capability entries.
811 	 */
812 	while (nextptr != 0) {
813 		/* Sanity check */
814 		if (nextptr > 255) {
815 			printf("illegal PCI extended capability offset %d\n",
816 			    nextptr);
817 			return;
818 		}
819 		/* Find the next entry */
820 		ptr = nextptr;
821 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
822 
823 		/* Process this entry */
824 		switch (REG(ptr + PCICAP_ID, 1)) {
825 		case PCIY_PMG:		/* PCI power management */
826 			if (cfg->pp.pp_cap == 0) {
827 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
828 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
829 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
830 				if ((nextptr - ptr) > PCIR_POWER_DATA)
831 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
832 			}
833 			break;
834 		case PCIY_HT:		/* HyperTransport */
835 			/* Determine HT-specific capability type. */
836 			val = REG(ptr + PCIR_HT_COMMAND, 2);
837 
838 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
839 				cfg->ht.ht_slave = ptr;
840 
841 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
842 			switch (val & PCIM_HTCMD_CAP_MASK) {
843 			case PCIM_HTCAP_MSI_MAPPING:
844 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
845 					/* Sanity check the mapping window. */
846 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
847 					    4);
848 					addr <<= 32;
849 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
850 					    4);
851 					if (addr != MSI_INTEL_ADDR_BASE)
852 						device_printf(pcib,
853 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
854 						    cfg->domain, cfg->bus,
855 						    cfg->slot, cfg->func,
856 						    (long long)addr);
857 				} else
858 					addr = MSI_INTEL_ADDR_BASE;
859 
860 				cfg->ht.ht_msimap = ptr;
861 				cfg->ht.ht_msictrl = val;
862 				cfg->ht.ht_msiaddr = addr;
863 				break;
864 			}
865 #endif
866 			break;
867 		case PCIY_MSI:		/* PCI MSI */
868 			cfg->msi.msi_location = ptr;
869 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
870 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
871 						     PCIM_MSICTRL_MMC_MASK)>>1);
872 			break;
873 		case PCIY_MSIX:		/* PCI MSI-X */
874 			cfg->msix.msix_location = ptr;
875 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
876 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
877 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
878 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
879 			cfg->msix.msix_table_bar = PCIR_BAR(val &
880 			    PCIM_MSIX_BIR_MASK);
881 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
882 			val = REG(ptr + PCIR_MSIX_PBA, 4);
883 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
884 			    PCIM_MSIX_BIR_MASK);
885 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
886 			break;
887 		case PCIY_VPD:		/* PCI Vital Product Data */
888 			cfg->vpd.vpd_reg = ptr;
889 			break;
890 		case PCIY_SUBVENDOR:
891 			/* Should always be true. */
892 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
893 			    PCIM_HDRTYPE_BRIDGE) {
894 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
895 				cfg->subvendor = val & 0xffff;
896 				cfg->subdevice = val >> 16;
897 			}
898 			break;
899 		case PCIY_PCIX:		/* PCI-X */
900 			/*
901 			 * Assume we have a PCI-X chipset if we have
902 			 * at least one PCI-PCI bridge with a PCI-X
903 			 * capability.  Note that some systems with
904 			 * PCI-express or HT chipsets might match on
905 			 * this check as well.
906 			 */
907 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
908 			    PCIM_HDRTYPE_BRIDGE)
909 				pcix_chipset = 1;
910 			cfg->pcix.pcix_location = ptr;
911 			break;
912 		case PCIY_EXPRESS:	/* PCI-express */
913 			/*
914 			 * Assume we have a PCI-express chipset if we have
915 			 * at least one PCI-express device.
916 			 */
917 			pcie_chipset = 1;
918 			cfg->pcie.pcie_location = ptr;
919 			val = REG(ptr + PCIER_FLAGS, 2);
920 			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
921 			break;
922 		case PCIY_EA:		/* Enhanced Allocation */
923 			cfg->ea.ea_location = ptr;
924 			pci_ea_fill_info(pcib, cfg);
925 			break;
926 		default:
927 			break;
928 		}
929 	}
930 
931 #if defined(__powerpc__)
932 	/*
933 	 * Enable the MSI mapping window for all HyperTransport
934 	 * slaves.  PCI-PCI bridges have their windows enabled via
935 	 * PCIB_MAP_MSI().
936 	 */
937 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
938 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
939 		device_printf(pcib,
940 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
941 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
942 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
943 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
944 		     2);
945 	}
946 #endif
947 /* REG and WREG use carry through to next functions */
948 }
949 
950 /*
951  * PCI Vital Product Data
952  */
953 
954 #define	PCI_VPD_TIMEOUT		1000000
955 
956 static int
957 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
958 {
959 	int count = PCI_VPD_TIMEOUT;
960 
961 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
962 
963 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
964 
965 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
966 		if (--count < 0)
967 			return (ENXIO);
968 		DELAY(1);	/* limit looping */
969 	}
970 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
971 
972 	return (0);
973 }
974 
975 #if 0
976 static int
977 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
978 {
979 	int count = PCI_VPD_TIMEOUT;
980 
981 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
982 
983 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
984 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
985 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
986 		if (--count < 0)
987 			return (ENXIO);
988 		DELAY(1);	/* limit looping */
989 	}
990 
991 	return (0);
992 }
993 #endif
994 
995 #undef PCI_VPD_TIMEOUT
996 
997 struct vpd_readstate {
998 	device_t	pcib;
999 	pcicfgregs	*cfg;
1000 	uint32_t	val;
1001 	int		bytesinval;
1002 	int		off;
1003 	uint8_t		cksum;
1004 };
1005 
1006 static int
1007 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
1008 {
1009 	uint32_t reg;
1010 	uint8_t byte;
1011 
1012 	if (vrs->bytesinval == 0) {
1013 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
1014 			return (ENXIO);
1015 		vrs->val = le32toh(reg);
1016 		vrs->off += 4;
1017 		byte = vrs->val & 0xff;
1018 		vrs->bytesinval = 3;
1019 	} else {
1020 		vrs->val = vrs->val >> 8;
1021 		byte = vrs->val & 0xff;
1022 		vrs->bytesinval--;
1023 	}
1024 
1025 	vrs->cksum += byte;
1026 	*data = byte;
1027 	return (0);
1028 }
1029 
1030 static void
1031 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
1032 {
1033 	struct vpd_readstate vrs;
1034 	int state;
1035 	int name;
1036 	int remain;
1037 	int i;
1038 	int alloc, off;		/* alloc/off for RO/W arrays */
1039 	int cksumvalid;
1040 	int dflen;
1041 	uint8_t byte;
1042 	uint8_t byte2;
1043 
1044 	/* init vpd reader */
1045 	vrs.bytesinval = 0;
1046 	vrs.off = 0;
1047 	vrs.pcib = pcib;
1048 	vrs.cfg = cfg;
1049 	vrs.cksum = 0;
1050 
1051 	state = 0;
1052 	name = remain = i = 0;	/* shut up stupid gcc */
1053 	alloc = off = 0;	/* shut up stupid gcc */
1054 	dflen = 0;		/* shut up stupid gcc */
1055 	cksumvalid = -1;
1056 	while (state >= 0) {
1057 		if (vpd_nextbyte(&vrs, &byte)) {
1058 			state = -2;
1059 			break;
1060 		}
1061 #if 0
1062 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
1063 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
1064 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
1065 #endif
1066 		switch (state) {
1067 		case 0:		/* item name */
1068 			if (byte & 0x80) {
1069 				if (vpd_nextbyte(&vrs, &byte2)) {
1070 					state = -2;
1071 					break;
1072 				}
1073 				remain = byte2;
1074 				if (vpd_nextbyte(&vrs, &byte2)) {
1075 					state = -2;
1076 					break;
1077 				}
1078 				remain |= byte2 << 8;
1079 				if (remain > (0x7f*4 - vrs.off)) {
1080 					state = -1;
1081 					pci_printf(cfg,
1082 					    "invalid VPD data, remain %#x\n",
1083 					    remain);
1084 				}
1085 				name = byte & 0x7f;
1086 			} else {
1087 				remain = byte & 0x7;
1088 				name = (byte >> 3) & 0xf;
1089 			}
1090 			switch (name) {
1091 			case 0x2:	/* String */
1092 				cfg->vpd.vpd_ident = malloc(remain + 1,
1093 				    M_DEVBUF, M_WAITOK);
1094 				i = 0;
1095 				state = 1;
1096 				break;
1097 			case 0xf:	/* End */
1098 				state = -1;
1099 				break;
1100 			case 0x10:	/* VPD-R */
1101 				alloc = 8;
1102 				off = 0;
1103 				cfg->vpd.vpd_ros = malloc(alloc *
1104 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1105 				    M_WAITOK | M_ZERO);
1106 				state = 2;
1107 				break;
1108 			case 0x11:	/* VPD-W */
1109 				alloc = 8;
1110 				off = 0;
1111 				cfg->vpd.vpd_w = malloc(alloc *
1112 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1113 				    M_WAITOK | M_ZERO);
1114 				state = 5;
1115 				break;
1116 			default:	/* Invalid data, abort */
1117 				state = -1;
1118 				break;
1119 			}
1120 			break;
1121 
1122 		case 1:	/* Identifier String */
1123 			cfg->vpd.vpd_ident[i++] = byte;
1124 			remain--;
1125 			if (remain == 0)  {
1126 				cfg->vpd.vpd_ident[i] = '\0';
1127 				state = 0;
1128 			}
1129 			break;
1130 
1131 		case 2:	/* VPD-R Keyword Header */
1132 			if (off == alloc) {
1133 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1134 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1135 				    M_DEVBUF, M_WAITOK | M_ZERO);
1136 			}
1137 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1138 			if (vpd_nextbyte(&vrs, &byte2)) {
1139 				state = -2;
1140 				break;
1141 			}
1142 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1143 			if (vpd_nextbyte(&vrs, &byte2)) {
1144 				state = -2;
1145 				break;
1146 			}
1147 			cfg->vpd.vpd_ros[off].len = dflen = byte2;
1148 			if (dflen == 0 &&
1149 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1150 			    2) == 0) {
1151 				/*
1152 				 * if this happens, we can't trust the rest
1153 				 * of the VPD.
1154 				 */
1155 				pci_printf(cfg, "bad keyword length: %d\n",
1156 				    dflen);
1157 				cksumvalid = 0;
1158 				state = -1;
1159 				break;
1160 			} else if (dflen == 0) {
1161 				cfg->vpd.vpd_ros[off].value = malloc(1 *
1162 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1163 				    M_DEVBUF, M_WAITOK);
1164 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1165 			} else
1166 				cfg->vpd.vpd_ros[off].value = malloc(
1167 				    (dflen + 1) *
1168 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1169 				    M_DEVBUF, M_WAITOK);
1170 			remain -= 3;
1171 			i = 0;
1172 			/* keep in sync w/ state 3's transistions */
1173 			if (dflen == 0 && remain == 0)
1174 				state = 0;
1175 			else if (dflen == 0)
1176 				state = 2;
1177 			else
1178 				state = 3;
1179 			break;
1180 
1181 		case 3:	/* VPD-R Keyword Value */
1182 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1183 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1184 			    "RV", 2) == 0 && cksumvalid == -1) {
1185 				if (vrs.cksum == 0)
1186 					cksumvalid = 1;
1187 				else {
1188 					if (bootverbose)
1189 						pci_printf(cfg,
1190 					    "bad VPD cksum, remain %hhu\n",
1191 						    vrs.cksum);
1192 					cksumvalid = 0;
1193 					state = -1;
1194 					break;
1195 				}
1196 			}
1197 			dflen--;
1198 			remain--;
1199 			/* keep in sync w/ state 2's transistions */
1200 			if (dflen == 0)
1201 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1202 			if (dflen == 0 && remain == 0) {
1203 				cfg->vpd.vpd_rocnt = off;
1204 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1205 				    off * sizeof(*cfg->vpd.vpd_ros),
1206 				    M_DEVBUF, M_WAITOK | M_ZERO);
1207 				state = 0;
1208 			} else if (dflen == 0)
1209 				state = 2;
1210 			break;
1211 
1212 		case 4:
1213 			remain--;
1214 			if (remain == 0)
1215 				state = 0;
1216 			break;
1217 
1218 		case 5:	/* VPD-W Keyword Header */
1219 			if (off == alloc) {
1220 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1221 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1222 				    M_DEVBUF, M_WAITOK | M_ZERO);
1223 			}
1224 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1225 			if (vpd_nextbyte(&vrs, &byte2)) {
1226 				state = -2;
1227 				break;
1228 			}
1229 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1230 			if (vpd_nextbyte(&vrs, &byte2)) {
1231 				state = -2;
1232 				break;
1233 			}
1234 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1235 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1236 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1237 			    sizeof(*cfg->vpd.vpd_w[off].value),
1238 			    M_DEVBUF, M_WAITOK);
1239 			remain -= 3;
1240 			i = 0;
1241 			/* keep in sync w/ state 6's transistions */
1242 			if (dflen == 0 && remain == 0)
1243 				state = 0;
1244 			else if (dflen == 0)
1245 				state = 5;
1246 			else
1247 				state = 6;
1248 			break;
1249 
1250 		case 6:	/* VPD-W Keyword Value */
1251 			cfg->vpd.vpd_w[off].value[i++] = byte;
1252 			dflen--;
1253 			remain--;
1254 			/* keep in sync w/ state 5's transistions */
1255 			if (dflen == 0)
1256 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1257 			if (dflen == 0 && remain == 0) {
1258 				cfg->vpd.vpd_wcnt = off;
1259 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1260 				    off * sizeof(*cfg->vpd.vpd_w),
1261 				    M_DEVBUF, M_WAITOK | M_ZERO);
1262 				state = 0;
1263 			} else if (dflen == 0)
1264 				state = 5;
1265 			break;
1266 
1267 		default:
1268 			pci_printf(cfg, "invalid state: %d\n", state);
1269 			state = -1;
1270 			break;
1271 		}
1272 	}
1273 
1274 	if (cksumvalid == 0 || state < -1) {
1275 		/* read-only data bad, clean up */
1276 		if (cfg->vpd.vpd_ros != NULL) {
1277 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1278 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1279 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1280 			cfg->vpd.vpd_ros = NULL;
1281 		}
1282 	}
1283 	if (state < -1) {
1284 		/* I/O error, clean up */
1285 		pci_printf(cfg, "failed to read VPD data.\n");
1286 		if (cfg->vpd.vpd_ident != NULL) {
1287 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1288 			cfg->vpd.vpd_ident = NULL;
1289 		}
1290 		if (cfg->vpd.vpd_w != NULL) {
1291 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1292 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1293 			free(cfg->vpd.vpd_w, M_DEVBUF);
1294 			cfg->vpd.vpd_w = NULL;
1295 		}
1296 	}
1297 	cfg->vpd.vpd_cached = 1;
1298 #undef REG
1299 #undef WREG
1300 }
1301 
1302 int
1303 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1304 {
1305 	struct pci_devinfo *dinfo = device_get_ivars(child);
1306 	pcicfgregs *cfg = &dinfo->cfg;
1307 
1308 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1309 		pci_read_vpd(device_get_parent(dev), cfg);
1310 
1311 	*identptr = cfg->vpd.vpd_ident;
1312 
1313 	if (*identptr == NULL)
1314 		return (ENXIO);
1315 
1316 	return (0);
1317 }
1318 
1319 int
1320 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1321 	const char **vptr)
1322 {
1323 	struct pci_devinfo *dinfo = device_get_ivars(child);
1324 	pcicfgregs *cfg = &dinfo->cfg;
1325 	int i;
1326 
1327 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1328 		pci_read_vpd(device_get_parent(dev), cfg);
1329 
1330 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1331 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1332 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1333 			*vptr = cfg->vpd.vpd_ros[i].value;
1334 			return (0);
1335 		}
1336 
1337 	*vptr = NULL;
1338 	return (ENXIO);
1339 }
1340 
1341 struct pcicfg_vpd *
1342 pci_fetch_vpd_list(device_t dev)
1343 {
1344 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1345 	pcicfgregs *cfg = &dinfo->cfg;
1346 
1347 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1348 		pci_read_vpd(device_get_parent(device_get_parent(dev)), cfg);
1349 	return (&cfg->vpd);
1350 }
1351 
1352 /*
1353  * Find the requested HyperTransport capability and return the offset
1354  * in configuration space via the pointer provided.  The function
1355  * returns 0 on success and an error code otherwise.
1356  */
1357 int
1358 pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1359 {
1360 	int ptr, error;
1361 	uint16_t val;
1362 
1363 	error = pci_find_cap(child, PCIY_HT, &ptr);
1364 	if (error)
1365 		return (error);
1366 
1367 	/*
1368 	 * Traverse the capabilities list checking each HT capability
1369 	 * to see if it matches the requested HT capability.
1370 	 */
1371 	while (ptr != 0) {
1372 		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1373 		if (capability == PCIM_HTCAP_SLAVE ||
1374 		    capability == PCIM_HTCAP_HOST)
1375 			val &= 0xe000;
1376 		else
1377 			val &= PCIM_HTCMD_CAP_MASK;
1378 		if (val == capability) {
1379 			if (capreg != NULL)
1380 				*capreg = ptr;
1381 			return (0);
1382 		}
1383 
1384 		/* Skip to the next HT capability. */
1385 		while (ptr != 0) {
1386 			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1387 			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1388 			    PCIY_HT)
1389 				break;
1390 		}
1391 	}
1392 	return (ENOENT);
1393 }
1394 
1395 /*
1396  * Find the requested capability and return the offset in
1397  * configuration space via the pointer provided.  The function returns
1398  * 0 on success and an error code otherwise.
1399  */
1400 int
1401 pci_find_cap_method(device_t dev, device_t child, int capability,
1402     int *capreg)
1403 {
1404 	struct pci_devinfo *dinfo = device_get_ivars(child);
1405 	pcicfgregs *cfg = &dinfo->cfg;
1406 	u_int32_t status;
1407 	u_int8_t ptr;
1408 
1409 	/*
1410 	 * Check the CAP_LIST bit of the PCI status register first.
1411 	 */
1412 	status = pci_read_config(child, PCIR_STATUS, 2);
1413 	if (!(status & PCIM_STATUS_CAPPRESENT))
1414 		return (ENXIO);
1415 
1416 	/*
1417 	 * Determine the start pointer of the capabilities list.
1418 	 */
1419 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1420 	case PCIM_HDRTYPE_NORMAL:
1421 	case PCIM_HDRTYPE_BRIDGE:
1422 		ptr = PCIR_CAP_PTR;
1423 		break;
1424 	case PCIM_HDRTYPE_CARDBUS:
1425 		ptr = PCIR_CAP_PTR_2;
1426 		break;
1427 	default:
1428 		/* XXX: panic? */
1429 		return (ENXIO);		/* no extended capabilities support */
1430 	}
1431 	ptr = pci_read_config(child, ptr, 1);
1432 
1433 	/*
1434 	 * Traverse the capabilities list.
1435 	 */
1436 	while (ptr != 0) {
1437 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1438 			if (capreg != NULL)
1439 				*capreg = ptr;
1440 			return (0);
1441 		}
1442 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1443 	}
1444 
1445 	return (ENOENT);
1446 }
1447 
1448 /*
1449  * Find the requested extended capability and return the offset in
1450  * configuration space via the pointer provided.  The function returns
1451  * 0 on success and an error code otherwise.
1452  */
1453 int
1454 pci_find_extcap_method(device_t dev, device_t child, int capability,
1455     int *capreg)
1456 {
1457 	struct pci_devinfo *dinfo = device_get_ivars(child);
1458 	pcicfgregs *cfg = &dinfo->cfg;
1459 	uint32_t ecap;
1460 	uint16_t ptr;
1461 
1462 	/* Only supported for PCI-express devices. */
1463 	if (cfg->pcie.pcie_location == 0)
1464 		return (ENXIO);
1465 
1466 	ptr = PCIR_EXTCAP;
1467 	ecap = pci_read_config(child, ptr, 4);
1468 	if (ecap == 0xffffffff || ecap == 0)
1469 		return (ENOENT);
1470 	for (;;) {
1471 		if (PCI_EXTCAP_ID(ecap) == capability) {
1472 			if (capreg != NULL)
1473 				*capreg = ptr;
1474 			return (0);
1475 		}
1476 		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1477 		if (ptr == 0)
1478 			break;
1479 		ecap = pci_read_config(child, ptr, 4);
1480 	}
1481 
1482 	return (ENOENT);
1483 }
1484 
1485 /*
1486  * Support for MSI-X message interrupts.
1487  */
1488 void
1489 pci_enable_msix_method(device_t dev, device_t child, u_int index,
1490     uint64_t address, uint32_t data)
1491 {
1492 	struct pci_devinfo *dinfo = device_get_ivars(child);
1493 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1494 	uint32_t offset;
1495 
1496 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1497 	offset = msix->msix_table_offset + index * 16;
1498 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1499 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1500 	bus_write_4(msix->msix_table_res, offset + 8, data);
1501 
1502 	/* Enable MSI -> HT mapping. */
1503 	pci_ht_map_msi(child, address);
1504 }
1505 
1506 void
1507 pci_mask_msix(device_t dev, u_int index)
1508 {
1509 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1510 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1511 	uint32_t offset, val;
1512 
1513 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1514 	offset = msix->msix_table_offset + index * 16 + 12;
1515 	val = bus_read_4(msix->msix_table_res, offset);
1516 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1517 		val |= PCIM_MSIX_VCTRL_MASK;
1518 		bus_write_4(msix->msix_table_res, offset, val);
1519 	}
1520 }
1521 
1522 void
1523 pci_unmask_msix(device_t dev, u_int index)
1524 {
1525 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1526 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1527 	uint32_t offset, val;
1528 
1529 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1530 	offset = msix->msix_table_offset + index * 16 + 12;
1531 	val = bus_read_4(msix->msix_table_res, offset);
1532 	if (val & PCIM_MSIX_VCTRL_MASK) {
1533 		val &= ~PCIM_MSIX_VCTRL_MASK;
1534 		bus_write_4(msix->msix_table_res, offset, val);
1535 	}
1536 }
1537 
1538 int
1539 pci_pending_msix(device_t dev, u_int index)
1540 {
1541 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1542 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1543 	uint32_t offset, bit;
1544 
1545 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1546 	offset = msix->msix_pba_offset + (index / 32) * 4;
1547 	bit = 1 << index % 32;
1548 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1549 }
1550 
1551 /*
1552  * Restore MSI-X registers and table during resume.  If MSI-X is
1553  * enabled then walk the virtual table to restore the actual MSI-X
1554  * table.
1555  */
1556 static void
1557 pci_resume_msix(device_t dev)
1558 {
1559 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1560 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1561 	struct msix_table_entry *mte;
1562 	struct msix_vector *mv;
1563 	int i;
1564 
1565 	if (msix->msix_alloc > 0) {
1566 		/* First, mask all vectors. */
1567 		for (i = 0; i < msix->msix_msgnum; i++)
1568 			pci_mask_msix(dev, i);
1569 
1570 		/* Second, program any messages with at least one handler. */
1571 		for (i = 0; i < msix->msix_table_len; i++) {
1572 			mte = &msix->msix_table[i];
1573 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1574 				continue;
1575 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1576 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1577 			pci_unmask_msix(dev, i);
1578 		}
1579 	}
1580 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1581 	    msix->msix_ctrl, 2);
1582 }
1583 
1584 /*
1585  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1586  * returned in *count.  After this function returns, each message will be
1587  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1588  */
1589 int
1590 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1591 {
1592 	struct pci_devinfo *dinfo = device_get_ivars(child);
1593 	pcicfgregs *cfg = &dinfo->cfg;
1594 	struct resource_list_entry *rle;
1595 	int actual, error, i, irq, max;
1596 
1597 	/* Don't let count == 0 get us into trouble. */
1598 	if (*count == 0)
1599 		return (EINVAL);
1600 
1601 	/* If rid 0 is allocated, then fail. */
1602 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1603 	if (rle != NULL && rle->res != NULL)
1604 		return (ENXIO);
1605 
1606 	/* Already have allocated messages? */
1607 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1608 		return (ENXIO);
1609 
1610 	/* If MSI-X is blacklisted for this system, fail. */
1611 	if (pci_msix_blacklisted())
1612 		return (ENXIO);
1613 
1614 	/* MSI-X capability present? */
1615 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1616 		return (ENODEV);
1617 
1618 	/* Make sure the appropriate BARs are mapped. */
1619 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1620 	    cfg->msix.msix_table_bar);
1621 	if (rle == NULL || rle->res == NULL ||
1622 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1623 		return (ENXIO);
1624 	cfg->msix.msix_table_res = rle->res;
1625 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1626 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1627 		    cfg->msix.msix_pba_bar);
1628 		if (rle == NULL || rle->res == NULL ||
1629 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1630 			return (ENXIO);
1631 	}
1632 	cfg->msix.msix_pba_res = rle->res;
1633 
1634 	if (bootverbose)
1635 		device_printf(child,
1636 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1637 		    *count, cfg->msix.msix_msgnum);
1638 	max = min(*count, cfg->msix.msix_msgnum);
1639 	for (i = 0; i < max; i++) {
1640 		/* Allocate a message. */
1641 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1642 		if (error) {
1643 			if (i == 0)
1644 				return (error);
1645 			break;
1646 		}
1647 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1648 		    irq, 1);
1649 	}
1650 	actual = i;
1651 
1652 	if (bootverbose) {
1653 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1654 		if (actual == 1)
1655 			device_printf(child, "using IRQ %ju for MSI-X\n",
1656 			    rle->start);
1657 		else {
1658 			int run;
1659 
1660 			/*
1661 			 * Be fancy and try to print contiguous runs of
1662 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1663 			 * 'run' is true if we are in a range.
1664 			 */
1665 			device_printf(child, "using IRQs %ju", rle->start);
1666 			irq = rle->start;
1667 			run = 0;
1668 			for (i = 1; i < actual; i++) {
1669 				rle = resource_list_find(&dinfo->resources,
1670 				    SYS_RES_IRQ, i + 1);
1671 
1672 				/* Still in a run? */
1673 				if (rle->start == irq + 1) {
1674 					run = 1;
1675 					irq++;
1676 					continue;
1677 				}
1678 
1679 				/* Finish previous range. */
1680 				if (run) {
1681 					printf("-%d", irq);
1682 					run = 0;
1683 				}
1684 
1685 				/* Start new range. */
1686 				printf(",%ju", rle->start);
1687 				irq = rle->start;
1688 			}
1689 
1690 			/* Unfinished range? */
1691 			if (run)
1692 				printf("-%d", irq);
1693 			printf(" for MSI-X\n");
1694 		}
1695 	}
1696 
1697 	/* Mask all vectors. */
1698 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1699 		pci_mask_msix(child, i);
1700 
1701 	/* Allocate and initialize vector data and virtual table. */
1702 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1703 	    M_DEVBUF, M_WAITOK | M_ZERO);
1704 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1705 	    M_DEVBUF, M_WAITOK | M_ZERO);
1706 	for (i = 0; i < actual; i++) {
1707 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1708 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1709 		cfg->msix.msix_table[i].mte_vector = i + 1;
1710 	}
1711 
1712 	/* Update control register to enable MSI-X. */
1713 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1714 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1715 	    cfg->msix.msix_ctrl, 2);
1716 
1717 	/* Update counts of alloc'd messages. */
1718 	cfg->msix.msix_alloc = actual;
1719 	cfg->msix.msix_table_len = actual;
1720 	*count = actual;
1721 	return (0);
1722 }
1723 
1724 /*
1725  * By default, pci_alloc_msix() will assign the allocated IRQ
1726  * resources consecutively to the first N messages in the MSI-X table.
1727  * However, device drivers may want to use different layouts if they
1728  * either receive fewer messages than they asked for, or they wish to
1729  * populate the MSI-X table sparsely.  This method allows the driver
1730  * to specify what layout it wants.  It must be called after a
1731  * successful pci_alloc_msix() but before any of the associated
1732  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1733  *
1734  * The 'vectors' array contains 'count' message vectors.  The array
1735  * maps directly to the MSI-X table in that index 0 in the array
1736  * specifies the vector for the first message in the MSI-X table, etc.
1737  * The vector value in each array index can either be 0 to indicate
1738  * that no vector should be assigned to a message slot, or it can be a
1739  * number from 1 to N (where N is the count returned from a
1740  * succcessful call to pci_alloc_msix()) to indicate which message
1741  * vector (IRQ) to be used for the corresponding message.
1742  *
1743  * On successful return, each message with a non-zero vector will have
1744  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1745  * 1.  Additionally, if any of the IRQs allocated via the previous
1746  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1747  * will be freed back to the system automatically.
1748  *
1749  * For example, suppose a driver has a MSI-X table with 6 messages and
1750  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1751  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1752  * C.  After the call to pci_alloc_msix(), the device will be setup to
1753  * have an MSI-X table of ABC--- (where - means no vector assigned).
1754  * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1755  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1756  * be freed back to the system.  This device will also have valid
1757  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1758  *
1759  * In any case, the SYS_RES_IRQ rid X will always map to the message
1760  * at MSI-X table index X - 1 and will only be valid if a vector is
1761  * assigned to that table entry.
1762  */
1763 int
1764 pci_remap_msix_method(device_t dev, device_t child, int count,
1765     const u_int *vectors)
1766 {
1767 	struct pci_devinfo *dinfo = device_get_ivars(child);
1768 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1769 	struct resource_list_entry *rle;
1770 	int i, irq, j, *used;
1771 
1772 	/*
1773 	 * Have to have at least one message in the table but the
1774 	 * table can't be bigger than the actual MSI-X table in the
1775 	 * device.
1776 	 */
1777 	if (count == 0 || count > msix->msix_msgnum)
1778 		return (EINVAL);
1779 
1780 	/* Sanity check the vectors. */
1781 	for (i = 0; i < count; i++)
1782 		if (vectors[i] > msix->msix_alloc)
1783 			return (EINVAL);
1784 
1785 	/*
1786 	 * Make sure there aren't any holes in the vectors to be used.
1787 	 * It's a big pain to support it, and it doesn't really make
1788 	 * sense anyway.  Also, at least one vector must be used.
1789 	 */
1790 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1791 	    M_ZERO);
1792 	for (i = 0; i < count; i++)
1793 		if (vectors[i] != 0)
1794 			used[vectors[i] - 1] = 1;
1795 	for (i = 0; i < msix->msix_alloc - 1; i++)
1796 		if (used[i] == 0 && used[i + 1] == 1) {
1797 			free(used, M_DEVBUF);
1798 			return (EINVAL);
1799 		}
1800 	if (used[0] != 1) {
1801 		free(used, M_DEVBUF);
1802 		return (EINVAL);
1803 	}
1804 
1805 	/* Make sure none of the resources are allocated. */
1806 	for (i = 0; i < msix->msix_table_len; i++) {
1807 		if (msix->msix_table[i].mte_vector == 0)
1808 			continue;
1809 		if (msix->msix_table[i].mte_handlers > 0) {
1810 			free(used, M_DEVBUF);
1811 			return (EBUSY);
1812 		}
1813 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1814 		KASSERT(rle != NULL, ("missing resource"));
1815 		if (rle->res != NULL) {
1816 			free(used, M_DEVBUF);
1817 			return (EBUSY);
1818 		}
1819 	}
1820 
1821 	/* Free the existing resource list entries. */
1822 	for (i = 0; i < msix->msix_table_len; i++) {
1823 		if (msix->msix_table[i].mte_vector == 0)
1824 			continue;
1825 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1826 	}
1827 
1828 	/*
1829 	 * Build the new virtual table keeping track of which vectors are
1830 	 * used.
1831 	 */
1832 	free(msix->msix_table, M_DEVBUF);
1833 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1834 	    M_DEVBUF, M_WAITOK | M_ZERO);
1835 	for (i = 0; i < count; i++)
1836 		msix->msix_table[i].mte_vector = vectors[i];
1837 	msix->msix_table_len = count;
1838 
1839 	/* Free any unused IRQs and resize the vectors array if necessary. */
1840 	j = msix->msix_alloc - 1;
1841 	if (used[j] == 0) {
1842 		struct msix_vector *vec;
1843 
1844 		while (used[j] == 0) {
1845 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1846 			    msix->msix_vectors[j].mv_irq);
1847 			j--;
1848 		}
1849 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1850 		    M_WAITOK);
1851 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1852 		    (j + 1));
1853 		free(msix->msix_vectors, M_DEVBUF);
1854 		msix->msix_vectors = vec;
1855 		msix->msix_alloc = j + 1;
1856 	}
1857 	free(used, M_DEVBUF);
1858 
1859 	/* Map the IRQs onto the rids. */
1860 	for (i = 0; i < count; i++) {
1861 		if (vectors[i] == 0)
1862 			continue;
1863 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1864 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1865 		    irq, 1);
1866 	}
1867 
1868 	if (bootverbose) {
1869 		device_printf(child, "Remapped MSI-X IRQs as: ");
1870 		for (i = 0; i < count; i++) {
1871 			if (i != 0)
1872 				printf(", ");
1873 			if (vectors[i] == 0)
1874 				printf("---");
1875 			else
1876 				printf("%d",
1877 				    msix->msix_vectors[vectors[i]].mv_irq);
1878 		}
1879 		printf("\n");
1880 	}
1881 
1882 	return (0);
1883 }
1884 
1885 static int
1886 pci_release_msix(device_t dev, device_t child)
1887 {
1888 	struct pci_devinfo *dinfo = device_get_ivars(child);
1889 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1890 	struct resource_list_entry *rle;
1891 	int i;
1892 
1893 	/* Do we have any messages to release? */
1894 	if (msix->msix_alloc == 0)
1895 		return (ENODEV);
1896 
1897 	/* Make sure none of the resources are allocated. */
1898 	for (i = 0; i < msix->msix_table_len; i++) {
1899 		if (msix->msix_table[i].mte_vector == 0)
1900 			continue;
1901 		if (msix->msix_table[i].mte_handlers > 0)
1902 			return (EBUSY);
1903 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1904 		KASSERT(rle != NULL, ("missing resource"));
1905 		if (rle->res != NULL)
1906 			return (EBUSY);
1907 	}
1908 
1909 	/* Update control register to disable MSI-X. */
1910 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1911 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1912 	    msix->msix_ctrl, 2);
1913 
1914 	/* Free the resource list entries. */
1915 	for (i = 0; i < msix->msix_table_len; i++) {
1916 		if (msix->msix_table[i].mte_vector == 0)
1917 			continue;
1918 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1919 	}
1920 	free(msix->msix_table, M_DEVBUF);
1921 	msix->msix_table_len = 0;
1922 
1923 	/* Release the IRQs. */
1924 	for (i = 0; i < msix->msix_alloc; i++)
1925 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1926 		    msix->msix_vectors[i].mv_irq);
1927 	free(msix->msix_vectors, M_DEVBUF);
1928 	msix->msix_alloc = 0;
1929 	return (0);
1930 }
1931 
1932 /*
1933  * Return the max supported MSI-X messages this device supports.
1934  * Basically, assuming the MD code can alloc messages, this function
1935  * should return the maximum value that pci_alloc_msix() can return.
1936  * Thus, it is subject to the tunables, etc.
1937  */
1938 int
1939 pci_msix_count_method(device_t dev, device_t child)
1940 {
1941 	struct pci_devinfo *dinfo = device_get_ivars(child);
1942 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1943 
1944 	if (pci_do_msix && msix->msix_location != 0)
1945 		return (msix->msix_msgnum);
1946 	return (0);
1947 }
1948 
1949 int
1950 pci_msix_pba_bar_method(device_t dev, device_t child)
1951 {
1952 	struct pci_devinfo *dinfo = device_get_ivars(child);
1953 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1954 
1955 	if (pci_do_msix && msix->msix_location != 0)
1956 		return (msix->msix_pba_bar);
1957 	return (-1);
1958 }
1959 
1960 int
1961 pci_msix_table_bar_method(device_t dev, device_t child)
1962 {
1963 	struct pci_devinfo *dinfo = device_get_ivars(child);
1964 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1965 
1966 	if (pci_do_msix && msix->msix_location != 0)
1967 		return (msix->msix_table_bar);
1968 	return (-1);
1969 }
1970 
1971 /*
1972  * HyperTransport MSI mapping control
1973  */
1974 void
1975 pci_ht_map_msi(device_t dev, uint64_t addr)
1976 {
1977 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1978 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1979 
1980 	if (!ht->ht_msimap)
1981 		return;
1982 
1983 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1984 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1985 		/* Enable MSI -> HT mapping. */
1986 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1987 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1988 		    ht->ht_msictrl, 2);
1989 	}
1990 
1991 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1992 		/* Disable MSI -> HT mapping. */
1993 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1994 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1995 		    ht->ht_msictrl, 2);
1996 	}
1997 }
1998 
1999 int
2000 pci_get_max_read_req(device_t dev)
2001 {
2002 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2003 	int cap;
2004 	uint16_t val;
2005 
2006 	cap = dinfo->cfg.pcie.pcie_location;
2007 	if (cap == 0)
2008 		return (0);
2009 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2010 	val &= PCIEM_CTL_MAX_READ_REQUEST;
2011 	val >>= 12;
2012 	return (1 << (val + 7));
2013 }
2014 
2015 int
2016 pci_set_max_read_req(device_t dev, int size)
2017 {
2018 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2019 	int cap;
2020 	uint16_t val;
2021 
2022 	cap = dinfo->cfg.pcie.pcie_location;
2023 	if (cap == 0)
2024 		return (0);
2025 	if (size < 128)
2026 		size = 128;
2027 	if (size > 4096)
2028 		size = 4096;
2029 	size = (1 << (fls(size) - 1));
2030 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2031 	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
2032 	val |= (fls(size) - 8) << 12;
2033 	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
2034 	return (size);
2035 }
2036 
2037 uint32_t
2038 pcie_read_config(device_t dev, int reg, int width)
2039 {
2040 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2041 	int cap;
2042 
2043 	cap = dinfo->cfg.pcie.pcie_location;
2044 	if (cap == 0) {
2045 		if (width == 2)
2046 			return (0xffff);
2047 		return (0xffffffff);
2048 	}
2049 
2050 	return (pci_read_config(dev, cap + reg, width));
2051 }
2052 
2053 void
2054 pcie_write_config(device_t dev, int reg, uint32_t value, int width)
2055 {
2056 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2057 	int cap;
2058 
2059 	cap = dinfo->cfg.pcie.pcie_location;
2060 	if (cap == 0)
2061 		return;
2062 	pci_write_config(dev, cap + reg, value, width);
2063 }
2064 
2065 /*
2066  * Adjusts a PCI-e capability register by clearing the bits in mask
2067  * and setting the bits in (value & mask).  Bits not set in mask are
2068  * not adjusted.
2069  *
2070  * Returns the old value on success or all ones on failure.
2071  */
2072 uint32_t
2073 pcie_adjust_config(device_t dev, int reg, uint32_t mask, uint32_t value,
2074     int width)
2075 {
2076 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2077 	uint32_t old, new;
2078 	int cap;
2079 
2080 	cap = dinfo->cfg.pcie.pcie_location;
2081 	if (cap == 0) {
2082 		if (width == 2)
2083 			return (0xffff);
2084 		return (0xffffffff);
2085 	}
2086 
2087 	old = pci_read_config(dev, cap + reg, width);
2088 	new = old & ~mask;
2089 	new |= (value & mask);
2090 	pci_write_config(dev, cap + reg, new, width);
2091 	return (old);
2092 }
2093 
2094 /*
2095  * Support for MSI message signalled interrupts.
2096  */
2097 void
2098 pci_enable_msi_method(device_t dev, device_t child, uint64_t address,
2099     uint16_t data)
2100 {
2101 	struct pci_devinfo *dinfo = device_get_ivars(child);
2102 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2103 
2104 	/* Write data and address values. */
2105 	pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR,
2106 	    address & 0xffffffff, 4);
2107 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2108 		pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR_HIGH,
2109 		    address >> 32, 4);
2110 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA_64BIT,
2111 		    data, 2);
2112 	} else
2113 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA, data,
2114 		    2);
2115 
2116 	/* Enable MSI in the control register. */
2117 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
2118 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2119 	    msi->msi_ctrl, 2);
2120 
2121 	/* Enable MSI -> HT mapping. */
2122 	pci_ht_map_msi(child, address);
2123 }
2124 
2125 void
2126 pci_disable_msi_method(device_t dev, device_t child)
2127 {
2128 	struct pci_devinfo *dinfo = device_get_ivars(child);
2129 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2130 
2131 	/* Disable MSI -> HT mapping. */
2132 	pci_ht_map_msi(child, 0);
2133 
2134 	/* Disable MSI in the control register. */
2135 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
2136 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2137 	    msi->msi_ctrl, 2);
2138 }
2139 
2140 /*
2141  * Restore MSI registers during resume.  If MSI is enabled then
2142  * restore the data and address registers in addition to the control
2143  * register.
2144  */
2145 static void
2146 pci_resume_msi(device_t dev)
2147 {
2148 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2149 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2150 	uint64_t address;
2151 	uint16_t data;
2152 
2153 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
2154 		address = msi->msi_addr;
2155 		data = msi->msi_data;
2156 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
2157 		    address & 0xffffffff, 4);
2158 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2159 			pci_write_config(dev, msi->msi_location +
2160 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
2161 			pci_write_config(dev, msi->msi_location +
2162 			    PCIR_MSI_DATA_64BIT, data, 2);
2163 		} else
2164 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
2165 			    data, 2);
2166 	}
2167 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
2168 	    2);
2169 }
2170 
2171 static int
2172 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
2173 {
2174 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2175 	pcicfgregs *cfg = &dinfo->cfg;
2176 	struct resource_list_entry *rle;
2177 	struct msix_table_entry *mte;
2178 	struct msix_vector *mv;
2179 	uint64_t addr;
2180 	uint32_t data;
2181 	int error, i, j;
2182 
2183 	/*
2184 	 * Handle MSI first.  We try to find this IRQ among our list
2185 	 * of MSI IRQs.  If we find it, we request updated address and
2186 	 * data registers and apply the results.
2187 	 */
2188 	if (cfg->msi.msi_alloc > 0) {
2189 
2190 		/* If we don't have any active handlers, nothing to do. */
2191 		if (cfg->msi.msi_handlers == 0)
2192 			return (0);
2193 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
2194 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
2195 			    i + 1);
2196 			if (rle->start == irq) {
2197 				error = PCIB_MAP_MSI(device_get_parent(bus),
2198 				    dev, irq, &addr, &data);
2199 				if (error)
2200 					return (error);
2201 				pci_disable_msi(dev);
2202 				dinfo->cfg.msi.msi_addr = addr;
2203 				dinfo->cfg.msi.msi_data = data;
2204 				pci_enable_msi(dev, addr, data);
2205 				return (0);
2206 			}
2207 		}
2208 		return (ENOENT);
2209 	}
2210 
2211 	/*
2212 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
2213 	 * we request the updated mapping info.  If that works, we go
2214 	 * through all the slots that use this IRQ and update them.
2215 	 */
2216 	if (cfg->msix.msix_alloc > 0) {
2217 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
2218 			mv = &cfg->msix.msix_vectors[i];
2219 			if (mv->mv_irq == irq) {
2220 				error = PCIB_MAP_MSI(device_get_parent(bus),
2221 				    dev, irq, &addr, &data);
2222 				if (error)
2223 					return (error);
2224 				mv->mv_address = addr;
2225 				mv->mv_data = data;
2226 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
2227 					mte = &cfg->msix.msix_table[j];
2228 					if (mte->mte_vector != i + 1)
2229 						continue;
2230 					if (mte->mte_handlers == 0)
2231 						continue;
2232 					pci_mask_msix(dev, j);
2233 					pci_enable_msix(dev, j, addr, data);
2234 					pci_unmask_msix(dev, j);
2235 				}
2236 			}
2237 		}
2238 		return (ENOENT);
2239 	}
2240 
2241 	return (ENOENT);
2242 }
2243 
2244 /*
2245  * Returns true if the specified device is blacklisted because MSI
2246  * doesn't work.
2247  */
2248 int
2249 pci_msi_device_blacklisted(device_t dev)
2250 {
2251 
2252 	if (!pci_honor_msi_blacklist)
2253 		return (0);
2254 
2255 	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2256 }
2257 
2258 /*
2259  * Determine if MSI is blacklisted globally on this system.  Currently,
2260  * we just check for blacklisted chipsets as represented by the
2261  * host-PCI bridge at device 0:0:0.  In the future, it may become
2262  * necessary to check other system attributes, such as the kenv values
2263  * that give the motherboard manufacturer and model number.
2264  */
2265 static int
2266 pci_msi_blacklisted(void)
2267 {
2268 	device_t dev;
2269 
2270 	if (!pci_honor_msi_blacklist)
2271 		return (0);
2272 
2273 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2274 	if (!(pcie_chipset || pcix_chipset)) {
2275 		if (vm_guest != VM_GUEST_NO) {
2276 			/*
2277 			 * Whitelist older chipsets in virtual
2278 			 * machines known to support MSI.
2279 			 */
2280 			dev = pci_find_bsf(0, 0, 0);
2281 			if (dev != NULL)
2282 				return (!pci_has_quirk(pci_get_devid(dev),
2283 					PCI_QUIRK_ENABLE_MSI_VM));
2284 		}
2285 		return (1);
2286 	}
2287 
2288 	dev = pci_find_bsf(0, 0, 0);
2289 	if (dev != NULL)
2290 		return (pci_msi_device_blacklisted(dev));
2291 	return (0);
2292 }
2293 
2294 /*
2295  * Returns true if the specified device is blacklisted because MSI-X
2296  * doesn't work.  Note that this assumes that if MSI doesn't work,
2297  * MSI-X doesn't either.
2298  */
2299 int
2300 pci_msix_device_blacklisted(device_t dev)
2301 {
2302 
2303 	if (!pci_honor_msi_blacklist)
2304 		return (0);
2305 
2306 	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2307 		return (1);
2308 
2309 	return (pci_msi_device_blacklisted(dev));
2310 }
2311 
2312 /*
2313  * Determine if MSI-X is blacklisted globally on this system.  If MSI
2314  * is blacklisted, assume that MSI-X is as well.  Check for additional
2315  * chipsets where MSI works but MSI-X does not.
2316  */
2317 static int
2318 pci_msix_blacklisted(void)
2319 {
2320 	device_t dev;
2321 
2322 	if (!pci_honor_msi_blacklist)
2323 		return (0);
2324 
2325 	dev = pci_find_bsf(0, 0, 0);
2326 	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2327 	    PCI_QUIRK_DISABLE_MSIX))
2328 		return (1);
2329 
2330 	return (pci_msi_blacklisted());
2331 }
2332 
2333 /*
2334  * Attempt to allocate *count MSI messages.  The actual number allocated is
2335  * returned in *count.  After this function returns, each message will be
2336  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2337  */
2338 int
2339 pci_alloc_msi_method(device_t dev, device_t child, int *count)
2340 {
2341 	struct pci_devinfo *dinfo = device_get_ivars(child);
2342 	pcicfgregs *cfg = &dinfo->cfg;
2343 	struct resource_list_entry *rle;
2344 	int actual, error, i, irqs[32];
2345 	uint16_t ctrl;
2346 
2347 	/* Don't let count == 0 get us into trouble. */
2348 	if (*count == 0)
2349 		return (EINVAL);
2350 
2351 	/* If rid 0 is allocated, then fail. */
2352 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2353 	if (rle != NULL && rle->res != NULL)
2354 		return (ENXIO);
2355 
2356 	/* Already have allocated messages? */
2357 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2358 		return (ENXIO);
2359 
2360 	/* If MSI is blacklisted for this system, fail. */
2361 	if (pci_msi_blacklisted())
2362 		return (ENXIO);
2363 
2364 	/* MSI capability present? */
2365 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2366 		return (ENODEV);
2367 
2368 	if (bootverbose)
2369 		device_printf(child,
2370 		    "attempting to allocate %d MSI vectors (%d supported)\n",
2371 		    *count, cfg->msi.msi_msgnum);
2372 
2373 	/* Don't ask for more than the device supports. */
2374 	actual = min(*count, cfg->msi.msi_msgnum);
2375 
2376 	/* Don't ask for more than 32 messages. */
2377 	actual = min(actual, 32);
2378 
2379 	/* MSI requires power of 2 number of messages. */
2380 	if (!powerof2(actual))
2381 		return (EINVAL);
2382 
2383 	for (;;) {
2384 		/* Try to allocate N messages. */
2385 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2386 		    actual, irqs);
2387 		if (error == 0)
2388 			break;
2389 		if (actual == 1)
2390 			return (error);
2391 
2392 		/* Try N / 2. */
2393 		actual >>= 1;
2394 	}
2395 
2396 	/*
2397 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2398 	 * resources in the irqs[] array, so add new resources
2399 	 * starting at rid 1.
2400 	 */
2401 	for (i = 0; i < actual; i++)
2402 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2403 		    irqs[i], irqs[i], 1);
2404 
2405 	if (bootverbose) {
2406 		if (actual == 1)
2407 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2408 		else {
2409 			int run;
2410 
2411 			/*
2412 			 * Be fancy and try to print contiguous runs
2413 			 * of IRQ values as ranges.  'run' is true if
2414 			 * we are in a range.
2415 			 */
2416 			device_printf(child, "using IRQs %d", irqs[0]);
2417 			run = 0;
2418 			for (i = 1; i < actual; i++) {
2419 
2420 				/* Still in a run? */
2421 				if (irqs[i] == irqs[i - 1] + 1) {
2422 					run = 1;
2423 					continue;
2424 				}
2425 
2426 				/* Finish previous range. */
2427 				if (run) {
2428 					printf("-%d", irqs[i - 1]);
2429 					run = 0;
2430 				}
2431 
2432 				/* Start new range. */
2433 				printf(",%d", irqs[i]);
2434 			}
2435 
2436 			/* Unfinished range? */
2437 			if (run)
2438 				printf("-%d", irqs[actual - 1]);
2439 			printf(" for MSI\n");
2440 		}
2441 	}
2442 
2443 	/* Update control register with actual count. */
2444 	ctrl = cfg->msi.msi_ctrl;
2445 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2446 	ctrl |= (ffs(actual) - 1) << 4;
2447 	cfg->msi.msi_ctrl = ctrl;
2448 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2449 
2450 	/* Update counts of alloc'd messages. */
2451 	cfg->msi.msi_alloc = actual;
2452 	cfg->msi.msi_handlers = 0;
2453 	*count = actual;
2454 	return (0);
2455 }
2456 
2457 /* Release the MSI messages associated with this device. */
2458 int
2459 pci_release_msi_method(device_t dev, device_t child)
2460 {
2461 	struct pci_devinfo *dinfo = device_get_ivars(child);
2462 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2463 	struct resource_list_entry *rle;
2464 	int error, i, irqs[32];
2465 
2466 	/* Try MSI-X first. */
2467 	error = pci_release_msix(dev, child);
2468 	if (error != ENODEV)
2469 		return (error);
2470 
2471 	/* Do we have any messages to release? */
2472 	if (msi->msi_alloc == 0)
2473 		return (ENODEV);
2474 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2475 
2476 	/* Make sure none of the resources are allocated. */
2477 	if (msi->msi_handlers > 0)
2478 		return (EBUSY);
2479 	for (i = 0; i < msi->msi_alloc; i++) {
2480 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2481 		KASSERT(rle != NULL, ("missing MSI resource"));
2482 		if (rle->res != NULL)
2483 			return (EBUSY);
2484 		irqs[i] = rle->start;
2485 	}
2486 
2487 	/* Update control register with 0 count. */
2488 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2489 	    ("%s: MSI still enabled", __func__));
2490 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2491 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2492 	    msi->msi_ctrl, 2);
2493 
2494 	/* Release the messages. */
2495 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2496 	for (i = 0; i < msi->msi_alloc; i++)
2497 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2498 
2499 	/* Update alloc count. */
2500 	msi->msi_alloc = 0;
2501 	msi->msi_addr = 0;
2502 	msi->msi_data = 0;
2503 	return (0);
2504 }
2505 
2506 /*
2507  * Return the max supported MSI messages this device supports.
2508  * Basically, assuming the MD code can alloc messages, this function
2509  * should return the maximum value that pci_alloc_msi() can return.
2510  * Thus, it is subject to the tunables, etc.
2511  */
2512 int
2513 pci_msi_count_method(device_t dev, device_t child)
2514 {
2515 	struct pci_devinfo *dinfo = device_get_ivars(child);
2516 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2517 
2518 	if (pci_do_msi && msi->msi_location != 0)
2519 		return (msi->msi_msgnum);
2520 	return (0);
2521 }
2522 
2523 /* free pcicfgregs structure and all depending data structures */
2524 
2525 int
2526 pci_freecfg(struct pci_devinfo *dinfo)
2527 {
2528 	struct devlist *devlist_head;
2529 	struct pci_map *pm, *next;
2530 	int i;
2531 
2532 	devlist_head = &pci_devq;
2533 
2534 	if (dinfo->cfg.vpd.vpd_reg) {
2535 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2536 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2537 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2538 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2539 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2540 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2541 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2542 	}
2543 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2544 		free(pm, M_DEVBUF);
2545 	}
2546 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2547 	free(dinfo, M_DEVBUF);
2548 
2549 	/* increment the generation count */
2550 	pci_generation++;
2551 
2552 	/* we're losing one device */
2553 	pci_numdevs--;
2554 	return (0);
2555 }
2556 
2557 /*
2558  * PCI power manangement
2559  */
2560 int
2561 pci_set_powerstate_method(device_t dev, device_t child, int state)
2562 {
2563 	struct pci_devinfo *dinfo = device_get_ivars(child);
2564 	pcicfgregs *cfg = &dinfo->cfg;
2565 	uint16_t status;
2566 	int oldstate, highest, delay;
2567 
2568 	if (cfg->pp.pp_cap == 0)
2569 		return (EOPNOTSUPP);
2570 
2571 	/*
2572 	 * Optimize a no state change request away.  While it would be OK to
2573 	 * write to the hardware in theory, some devices have shown odd
2574 	 * behavior when going from D3 -> D3.
2575 	 */
2576 	oldstate = pci_get_powerstate(child);
2577 	if (oldstate == state)
2578 		return (0);
2579 
2580 	/*
2581 	 * The PCI power management specification states that after a state
2582 	 * transition between PCI power states, system software must
2583 	 * guarantee a minimal delay before the function accesses the device.
2584 	 * Compute the worst case delay that we need to guarantee before we
2585 	 * access the device.  Many devices will be responsive much more
2586 	 * quickly than this delay, but there are some that don't respond
2587 	 * instantly to state changes.  Transitions to/from D3 state require
2588 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2589 	 * is done below with DELAY rather than a sleeper function because
2590 	 * this function can be called from contexts where we cannot sleep.
2591 	 */
2592 	highest = (oldstate > state) ? oldstate : state;
2593 	if (highest == PCI_POWERSTATE_D3)
2594 	    delay = 10000;
2595 	else if (highest == PCI_POWERSTATE_D2)
2596 	    delay = 200;
2597 	else
2598 	    delay = 0;
2599 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2600 	    & ~PCIM_PSTAT_DMASK;
2601 	switch (state) {
2602 	case PCI_POWERSTATE_D0:
2603 		status |= PCIM_PSTAT_D0;
2604 		break;
2605 	case PCI_POWERSTATE_D1:
2606 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2607 			return (EOPNOTSUPP);
2608 		status |= PCIM_PSTAT_D1;
2609 		break;
2610 	case PCI_POWERSTATE_D2:
2611 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2612 			return (EOPNOTSUPP);
2613 		status |= PCIM_PSTAT_D2;
2614 		break;
2615 	case PCI_POWERSTATE_D3:
2616 		status |= PCIM_PSTAT_D3;
2617 		break;
2618 	default:
2619 		return (EINVAL);
2620 	}
2621 
2622 	if (bootverbose)
2623 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2624 		    state);
2625 
2626 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2627 	if (delay)
2628 		DELAY(delay);
2629 	return (0);
2630 }
2631 
2632 int
2633 pci_get_powerstate_method(device_t dev, device_t child)
2634 {
2635 	struct pci_devinfo *dinfo = device_get_ivars(child);
2636 	pcicfgregs *cfg = &dinfo->cfg;
2637 	uint16_t status;
2638 	int result;
2639 
2640 	if (cfg->pp.pp_cap != 0) {
2641 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2642 		switch (status & PCIM_PSTAT_DMASK) {
2643 		case PCIM_PSTAT_D0:
2644 			result = PCI_POWERSTATE_D0;
2645 			break;
2646 		case PCIM_PSTAT_D1:
2647 			result = PCI_POWERSTATE_D1;
2648 			break;
2649 		case PCIM_PSTAT_D2:
2650 			result = PCI_POWERSTATE_D2;
2651 			break;
2652 		case PCIM_PSTAT_D3:
2653 			result = PCI_POWERSTATE_D3;
2654 			break;
2655 		default:
2656 			result = PCI_POWERSTATE_UNKNOWN;
2657 			break;
2658 		}
2659 	} else {
2660 		/* No support, device is always at D0 */
2661 		result = PCI_POWERSTATE_D0;
2662 	}
2663 	return (result);
2664 }
2665 
2666 /*
2667  * Some convenience functions for PCI device drivers.
2668  */
2669 
2670 static __inline void
2671 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2672 {
2673 	uint16_t	command;
2674 
2675 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2676 	command |= bit;
2677 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2678 }
2679 
2680 static __inline void
2681 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2682 {
2683 	uint16_t	command;
2684 
2685 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2686 	command &= ~bit;
2687 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2688 }
2689 
2690 int
2691 pci_enable_busmaster_method(device_t dev, device_t child)
2692 {
2693 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2694 	return (0);
2695 }
2696 
2697 int
2698 pci_disable_busmaster_method(device_t dev, device_t child)
2699 {
2700 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2701 	return (0);
2702 }
2703 
2704 int
2705 pci_enable_io_method(device_t dev, device_t child, int space)
2706 {
2707 	uint16_t bit;
2708 
2709 	switch(space) {
2710 	case SYS_RES_IOPORT:
2711 		bit = PCIM_CMD_PORTEN;
2712 		break;
2713 	case SYS_RES_MEMORY:
2714 		bit = PCIM_CMD_MEMEN;
2715 		break;
2716 	default:
2717 		return (EINVAL);
2718 	}
2719 	pci_set_command_bit(dev, child, bit);
2720 	return (0);
2721 }
2722 
2723 int
2724 pci_disable_io_method(device_t dev, device_t child, int space)
2725 {
2726 	uint16_t bit;
2727 
2728 	switch(space) {
2729 	case SYS_RES_IOPORT:
2730 		bit = PCIM_CMD_PORTEN;
2731 		break;
2732 	case SYS_RES_MEMORY:
2733 		bit = PCIM_CMD_MEMEN;
2734 		break;
2735 	default:
2736 		return (EINVAL);
2737 	}
2738 	pci_clear_command_bit(dev, child, bit);
2739 	return (0);
2740 }
2741 
2742 /*
2743  * New style pci driver.  Parent device is either a pci-host-bridge or a
2744  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2745  */
2746 
2747 void
2748 pci_print_verbose(struct pci_devinfo *dinfo)
2749 {
2750 
2751 	if (bootverbose) {
2752 		pcicfgregs *cfg = &dinfo->cfg;
2753 
2754 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2755 		    cfg->vendor, cfg->device, cfg->revid);
2756 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2757 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2758 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2759 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2760 		    cfg->mfdev);
2761 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2762 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2763 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2764 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2765 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2766 		if (cfg->intpin > 0)
2767 			printf("\tintpin=%c, irq=%d\n",
2768 			    cfg->intpin +'a' -1, cfg->intline);
2769 		if (cfg->pp.pp_cap) {
2770 			uint16_t status;
2771 
2772 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2773 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2774 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2775 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2776 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2777 			    status & PCIM_PSTAT_DMASK);
2778 		}
2779 		if (cfg->msi.msi_location) {
2780 			int ctrl;
2781 
2782 			ctrl = cfg->msi.msi_ctrl;
2783 			printf("\tMSI supports %d message%s%s%s\n",
2784 			    cfg->msi.msi_msgnum,
2785 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2786 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2787 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2788 		}
2789 		if (cfg->msix.msix_location) {
2790 			printf("\tMSI-X supports %d message%s ",
2791 			    cfg->msix.msix_msgnum,
2792 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2793 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2794 				printf("in map 0x%x\n",
2795 				    cfg->msix.msix_table_bar);
2796 			else
2797 				printf("in maps 0x%x and 0x%x\n",
2798 				    cfg->msix.msix_table_bar,
2799 				    cfg->msix.msix_pba_bar);
2800 		}
2801 	}
2802 }
2803 
2804 static int
2805 pci_porten(device_t dev)
2806 {
2807 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2808 }
2809 
2810 static int
2811 pci_memen(device_t dev)
2812 {
2813 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2814 }
2815 
2816 void
2817 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp,
2818     int *bar64)
2819 {
2820 	struct pci_devinfo *dinfo;
2821 	pci_addr_t map, testval;
2822 	int ln2range;
2823 	uint16_t cmd;
2824 
2825 	/*
2826 	 * The device ROM BAR is special.  It is always a 32-bit
2827 	 * memory BAR.  Bit 0 is special and should not be set when
2828 	 * sizing the BAR.
2829 	 */
2830 	dinfo = device_get_ivars(dev);
2831 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2832 		map = pci_read_config(dev, reg, 4);
2833 		pci_write_config(dev, reg, 0xfffffffe, 4);
2834 		testval = pci_read_config(dev, reg, 4);
2835 		pci_write_config(dev, reg, map, 4);
2836 		*mapp = map;
2837 		*testvalp = testval;
2838 		if (bar64 != NULL)
2839 			*bar64 = 0;
2840 		return;
2841 	}
2842 
2843 	map = pci_read_config(dev, reg, 4);
2844 	ln2range = pci_maprange(map);
2845 	if (ln2range == 64)
2846 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2847 
2848 	/*
2849 	 * Disable decoding via the command register before
2850 	 * determining the BAR's length since we will be placing it in
2851 	 * a weird state.
2852 	 */
2853 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2854 	pci_write_config(dev, PCIR_COMMAND,
2855 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2856 
2857 	/*
2858 	 * Determine the BAR's length by writing all 1's.  The bottom
2859 	 * log_2(size) bits of the BAR will stick as 0 when we read
2860 	 * the value back.
2861 	 */
2862 	pci_write_config(dev, reg, 0xffffffff, 4);
2863 	testval = pci_read_config(dev, reg, 4);
2864 	if (ln2range == 64) {
2865 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2866 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2867 	}
2868 
2869 	/*
2870 	 * Restore the original value of the BAR.  We may have reprogrammed
2871 	 * the BAR of the low-level console device and when booting verbose,
2872 	 * we need the console device addressable.
2873 	 */
2874 	pci_write_config(dev, reg, map, 4);
2875 	if (ln2range == 64)
2876 		pci_write_config(dev, reg + 4, map >> 32, 4);
2877 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2878 
2879 	*mapp = map;
2880 	*testvalp = testval;
2881 	if (bar64 != NULL)
2882 		*bar64 = (ln2range == 64);
2883 }
2884 
2885 static void
2886 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2887 {
2888 	struct pci_devinfo *dinfo;
2889 	int ln2range;
2890 
2891 	/* The device ROM BAR is always a 32-bit memory BAR. */
2892 	dinfo = device_get_ivars(dev);
2893 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2894 		ln2range = 32;
2895 	else
2896 		ln2range = pci_maprange(pm->pm_value);
2897 	pci_write_config(dev, pm->pm_reg, base, 4);
2898 	if (ln2range == 64)
2899 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2900 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2901 	if (ln2range == 64)
2902 		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2903 		    pm->pm_reg + 4, 4) << 32;
2904 }
2905 
2906 struct pci_map *
2907 pci_find_bar(device_t dev, int reg)
2908 {
2909 	struct pci_devinfo *dinfo;
2910 	struct pci_map *pm;
2911 
2912 	dinfo = device_get_ivars(dev);
2913 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2914 		if (pm->pm_reg == reg)
2915 			return (pm);
2916 	}
2917 	return (NULL);
2918 }
2919 
2920 int
2921 pci_bar_enabled(device_t dev, struct pci_map *pm)
2922 {
2923 	struct pci_devinfo *dinfo;
2924 	uint16_t cmd;
2925 
2926 	dinfo = device_get_ivars(dev);
2927 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2928 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2929 		return (0);
2930 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2931 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2932 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2933 	else
2934 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2935 }
2936 
2937 struct pci_map *
2938 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2939 {
2940 	struct pci_devinfo *dinfo;
2941 	struct pci_map *pm, *prev;
2942 
2943 	dinfo = device_get_ivars(dev);
2944 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2945 	pm->pm_reg = reg;
2946 	pm->pm_value = value;
2947 	pm->pm_size = size;
2948 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2949 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2950 		    reg));
2951 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2952 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2953 			break;
2954 	}
2955 	if (prev != NULL)
2956 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2957 	else
2958 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2959 	return (pm);
2960 }
2961 
2962 static void
2963 pci_restore_bars(device_t dev)
2964 {
2965 	struct pci_devinfo *dinfo;
2966 	struct pci_map *pm;
2967 	int ln2range;
2968 
2969 	dinfo = device_get_ivars(dev);
2970 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2971 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2972 			ln2range = 32;
2973 		else
2974 			ln2range = pci_maprange(pm->pm_value);
2975 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2976 		if (ln2range == 64)
2977 			pci_write_config(dev, pm->pm_reg + 4,
2978 			    pm->pm_value >> 32, 4);
2979 	}
2980 }
2981 
2982 /*
2983  * Add a resource based on a pci map register. Return 1 if the map
2984  * register is a 32bit map register or 2 if it is a 64bit register.
2985  */
2986 static int
2987 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2988     int force, int prefetch)
2989 {
2990 	struct pci_map *pm;
2991 	pci_addr_t base, map, testval;
2992 	pci_addr_t start, end, count;
2993 	int barlen, basezero, flags, maprange, mapsize, type;
2994 	uint16_t cmd;
2995 	struct resource *res;
2996 
2997 	/*
2998 	 * The BAR may already exist if the device is a CardBus card
2999 	 * whose CIS is stored in this BAR.
3000 	 */
3001 	pm = pci_find_bar(dev, reg);
3002 	if (pm != NULL) {
3003 		maprange = pci_maprange(pm->pm_value);
3004 		barlen = maprange == 64 ? 2 : 1;
3005 		return (barlen);
3006 	}
3007 
3008 	pci_read_bar(dev, reg, &map, &testval, NULL);
3009 	if (PCI_BAR_MEM(map)) {
3010 		type = SYS_RES_MEMORY;
3011 		if (map & PCIM_BAR_MEM_PREFETCH)
3012 			prefetch = 1;
3013 	} else
3014 		type = SYS_RES_IOPORT;
3015 	mapsize = pci_mapsize(testval);
3016 	base = pci_mapbase(map);
3017 #ifdef __PCI_BAR_ZERO_VALID
3018 	basezero = 0;
3019 #else
3020 	basezero = base == 0;
3021 #endif
3022 	maprange = pci_maprange(map);
3023 	barlen = maprange == 64 ? 2 : 1;
3024 
3025 	/*
3026 	 * For I/O registers, if bottom bit is set, and the next bit up
3027 	 * isn't clear, we know we have a BAR that doesn't conform to the
3028 	 * spec, so ignore it.  Also, sanity check the size of the data
3029 	 * areas to the type of memory involved.  Memory must be at least
3030 	 * 16 bytes in size, while I/O ranges must be at least 4.
3031 	 */
3032 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
3033 		return (barlen);
3034 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
3035 	    (type == SYS_RES_IOPORT && mapsize < 2))
3036 		return (barlen);
3037 
3038 	/* Save a record of this BAR. */
3039 	pm = pci_add_bar(dev, reg, map, mapsize);
3040 	if (bootverbose) {
3041 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
3042 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
3043 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
3044 			printf(", port disabled\n");
3045 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
3046 			printf(", memory disabled\n");
3047 		else
3048 			printf(", enabled\n");
3049 	}
3050 
3051 	/*
3052 	 * If base is 0, then we have problems if this architecture does
3053 	 * not allow that.  It is best to ignore such entries for the
3054 	 * moment.  These will be allocated later if the driver specifically
3055 	 * requests them.  However, some removable busses look better when
3056 	 * all resources are allocated, so allow '0' to be overriden.
3057 	 *
3058 	 * Similarly treat maps whose values is the same as the test value
3059 	 * read back.  These maps have had all f's written to them by the
3060 	 * BIOS in an attempt to disable the resources.
3061 	 */
3062 	if (!force && (basezero || map == testval))
3063 		return (barlen);
3064 	if ((u_long)base != base) {
3065 		device_printf(bus,
3066 		    "pci%d:%d:%d:%d bar %#x too many address bits",
3067 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
3068 		    pci_get_function(dev), reg);
3069 		return (barlen);
3070 	}
3071 
3072 	/*
3073 	 * This code theoretically does the right thing, but has
3074 	 * undesirable side effects in some cases where peripherals
3075 	 * respond oddly to having these bits enabled.  Let the user
3076 	 * be able to turn them off (since pci_enable_io_modes is 1 by
3077 	 * default).
3078 	 */
3079 	if (pci_enable_io_modes) {
3080 		/* Turn on resources that have been left off by a lazy BIOS */
3081 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
3082 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3083 			cmd |= PCIM_CMD_PORTEN;
3084 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3085 		}
3086 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
3087 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3088 			cmd |= PCIM_CMD_MEMEN;
3089 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3090 		}
3091 	} else {
3092 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
3093 			return (barlen);
3094 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
3095 			return (barlen);
3096 	}
3097 
3098 	count = (pci_addr_t)1 << mapsize;
3099 	flags = RF_ALIGNMENT_LOG2(mapsize);
3100 	if (prefetch)
3101 		flags |= RF_PREFETCHABLE;
3102 	if (basezero || base == pci_mapbase(testval) || pci_clear_bars) {
3103 		start = 0;	/* Let the parent decide. */
3104 		end = ~0;
3105 	} else {
3106 		start = base;
3107 		end = base + count - 1;
3108 	}
3109 	resource_list_add(rl, type, reg, start, end, count);
3110 
3111 	/*
3112 	 * Try to allocate the resource for this BAR from our parent
3113 	 * so that this resource range is already reserved.  The
3114 	 * driver for this device will later inherit this resource in
3115 	 * pci_alloc_resource().
3116 	 */
3117 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
3118 	    flags);
3119 	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0)) {
3120 		/*
3121 		 * If the allocation fails, try to allocate a resource for
3122 		 * this BAR using any available range.  The firmware felt
3123 		 * it was important enough to assign a resource, so don't
3124 		 * disable decoding if we can help it.
3125 		 */
3126 		resource_list_delete(rl, type, reg);
3127 		resource_list_add(rl, type, reg, 0, ~0, count);
3128 		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0,
3129 		    count, flags);
3130 	}
3131 	if (res == NULL) {
3132 		/*
3133 		 * If the allocation fails, delete the resource list entry
3134 		 * and disable decoding for this device.
3135 		 *
3136 		 * If the driver requests this resource in the future,
3137 		 * pci_reserve_map() will try to allocate a fresh
3138 		 * resource range.
3139 		 */
3140 		resource_list_delete(rl, type, reg);
3141 		pci_disable_io(dev, type);
3142 		if (bootverbose)
3143 			device_printf(bus,
3144 			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
3145 			    pci_get_domain(dev), pci_get_bus(dev),
3146 			    pci_get_slot(dev), pci_get_function(dev), reg);
3147 	} else {
3148 		start = rman_get_start(res);
3149 		pci_write_bar(dev, pm, start);
3150 	}
3151 	return (barlen);
3152 }
3153 
3154 /*
3155  * For ATA devices we need to decide early what addressing mode to use.
3156  * Legacy demands that the primary and secondary ATA ports sits on the
3157  * same addresses that old ISA hardware did. This dictates that we use
3158  * those addresses and ignore the BAR's if we cannot set PCI native
3159  * addressing mode.
3160  */
3161 static void
3162 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
3163     uint32_t prefetchmask)
3164 {
3165 	int rid, type, progif;
3166 #if 0
3167 	/* if this device supports PCI native addressing use it */
3168 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3169 	if ((progif & 0x8a) == 0x8a) {
3170 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
3171 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
3172 			printf("Trying ATA native PCI addressing mode\n");
3173 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
3174 		}
3175 	}
3176 #endif
3177 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3178 	type = SYS_RES_IOPORT;
3179 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
3180 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
3181 		    prefetchmask & (1 << 0));
3182 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
3183 		    prefetchmask & (1 << 1));
3184 	} else {
3185 		rid = PCIR_BAR(0);
3186 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
3187 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
3188 		    0x1f7, 8, 0);
3189 		rid = PCIR_BAR(1);
3190 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
3191 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
3192 		    0x3f6, 1, 0);
3193 	}
3194 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
3195 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
3196 		    prefetchmask & (1 << 2));
3197 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
3198 		    prefetchmask & (1 << 3));
3199 	} else {
3200 		rid = PCIR_BAR(2);
3201 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
3202 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
3203 		    0x177, 8, 0);
3204 		rid = PCIR_BAR(3);
3205 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
3206 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
3207 		    0x376, 1, 0);
3208 	}
3209 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
3210 	    prefetchmask & (1 << 4));
3211 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
3212 	    prefetchmask & (1 << 5));
3213 }
3214 
3215 static void
3216 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
3217 {
3218 	struct pci_devinfo *dinfo = device_get_ivars(dev);
3219 	pcicfgregs *cfg = &dinfo->cfg;
3220 	char tunable_name[64];
3221 	int irq;
3222 
3223 	/* Has to have an intpin to have an interrupt. */
3224 	if (cfg->intpin == 0)
3225 		return;
3226 
3227 	/* Let the user override the IRQ with a tunable. */
3228 	irq = PCI_INVALID_IRQ;
3229 	snprintf(tunable_name, sizeof(tunable_name),
3230 	    "hw.pci%d.%d.%d.INT%c.irq",
3231 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
3232 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
3233 		irq = PCI_INVALID_IRQ;
3234 
3235 	/*
3236 	 * If we didn't get an IRQ via the tunable, then we either use the
3237 	 * IRQ value in the intline register or we ask the bus to route an
3238 	 * interrupt for us.  If force_route is true, then we only use the
3239 	 * value in the intline register if the bus was unable to assign an
3240 	 * IRQ.
3241 	 */
3242 	if (!PCI_INTERRUPT_VALID(irq)) {
3243 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
3244 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
3245 		if (!PCI_INTERRUPT_VALID(irq))
3246 			irq = cfg->intline;
3247 	}
3248 
3249 	/* If after all that we don't have an IRQ, just bail. */
3250 	if (!PCI_INTERRUPT_VALID(irq))
3251 		return;
3252 
3253 	/* Update the config register if it changed. */
3254 	if (irq != cfg->intline) {
3255 		cfg->intline = irq;
3256 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
3257 	}
3258 
3259 	/* Add this IRQ as rid 0 interrupt resource. */
3260 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3261 }
3262 
3263 /* Perform early OHCI takeover from SMM. */
3264 static void
3265 ohci_early_takeover(device_t self)
3266 {
3267 	struct resource *res;
3268 	uint32_t ctl;
3269 	int rid;
3270 	int i;
3271 
3272 	rid = PCIR_BAR(0);
3273 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3274 	if (res == NULL)
3275 		return;
3276 
3277 	ctl = bus_read_4(res, OHCI_CONTROL);
3278 	if (ctl & OHCI_IR) {
3279 		if (bootverbose)
3280 			printf("ohci early: "
3281 			    "SMM active, request owner change\n");
3282 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3283 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3284 			DELAY(1000);
3285 			ctl = bus_read_4(res, OHCI_CONTROL);
3286 		}
3287 		if (ctl & OHCI_IR) {
3288 			if (bootverbose)
3289 				printf("ohci early: "
3290 				    "SMM does not respond, resetting\n");
3291 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3292 		}
3293 		/* Disable interrupts */
3294 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3295 	}
3296 
3297 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3298 }
3299 
3300 /* Perform early UHCI takeover from SMM. */
3301 static void
3302 uhci_early_takeover(device_t self)
3303 {
3304 	struct resource *res;
3305 	int rid;
3306 
3307 	/*
3308 	 * Set the PIRQD enable bit and switch off all the others. We don't
3309 	 * want legacy support to interfere with us XXX Does this also mean
3310 	 * that the BIOS won't touch the keyboard anymore if it is connected
3311 	 * to the ports of the root hub?
3312 	 */
3313 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3314 
3315 	/* Disable interrupts */
3316 	rid = PCI_UHCI_BASE_REG;
3317 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3318 	if (res != NULL) {
3319 		bus_write_2(res, UHCI_INTR, 0);
3320 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3321 	}
3322 }
3323 
3324 /* Perform early EHCI takeover from SMM. */
3325 static void
3326 ehci_early_takeover(device_t self)
3327 {
3328 	struct resource *res;
3329 	uint32_t cparams;
3330 	uint32_t eec;
3331 	uint8_t eecp;
3332 	uint8_t bios_sem;
3333 	uint8_t offs;
3334 	int rid;
3335 	int i;
3336 
3337 	rid = PCIR_BAR(0);
3338 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3339 	if (res == NULL)
3340 		return;
3341 
3342 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3343 
3344 	/* Synchronise with the BIOS if it owns the controller. */
3345 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3346 	    eecp = EHCI_EECP_NEXT(eec)) {
3347 		eec = pci_read_config(self, eecp, 4);
3348 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3349 			continue;
3350 		}
3351 		bios_sem = pci_read_config(self, eecp +
3352 		    EHCI_LEGSUP_BIOS_SEM, 1);
3353 		if (bios_sem == 0) {
3354 			continue;
3355 		}
3356 		if (bootverbose)
3357 			printf("ehci early: "
3358 			    "SMM active, request owner change\n");
3359 
3360 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3361 
3362 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3363 			DELAY(1000);
3364 			bios_sem = pci_read_config(self, eecp +
3365 			    EHCI_LEGSUP_BIOS_SEM, 1);
3366 		}
3367 
3368 		if (bios_sem != 0) {
3369 			if (bootverbose)
3370 				printf("ehci early: "
3371 				    "SMM does not respond\n");
3372 		}
3373 		/* Disable interrupts */
3374 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3375 		bus_write_4(res, offs + EHCI_USBINTR, 0);
3376 	}
3377 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3378 }
3379 
3380 /* Perform early XHCI takeover from SMM. */
3381 static void
3382 xhci_early_takeover(device_t self)
3383 {
3384 	struct resource *res;
3385 	uint32_t cparams;
3386 	uint32_t eec;
3387 	uint8_t eecp;
3388 	uint8_t bios_sem;
3389 	uint8_t offs;
3390 	int rid;
3391 	int i;
3392 
3393 	rid = PCIR_BAR(0);
3394 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3395 	if (res == NULL)
3396 		return;
3397 
3398 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3399 
3400 	eec = -1;
3401 
3402 	/* Synchronise with the BIOS if it owns the controller. */
3403 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3404 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3405 		eec = bus_read_4(res, eecp);
3406 
3407 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3408 			continue;
3409 
3410 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3411 		if (bios_sem == 0)
3412 			continue;
3413 
3414 		if (bootverbose)
3415 			printf("xhci early: "
3416 			    "SMM active, request owner change\n");
3417 
3418 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3419 
3420 		/* wait a maximum of 5 second */
3421 
3422 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3423 			DELAY(1000);
3424 			bios_sem = bus_read_1(res, eecp +
3425 			    XHCI_XECP_BIOS_SEM);
3426 		}
3427 
3428 		if (bios_sem != 0) {
3429 			if (bootverbose)
3430 				printf("xhci early: "
3431 				    "SMM does not respond\n");
3432 		}
3433 
3434 		/* Disable interrupts */
3435 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3436 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3437 		bus_read_4(res, offs + XHCI_USBSTS);
3438 	}
3439 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3440 }
3441 
3442 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3443 static void
3444 pci_reserve_secbus(device_t bus, device_t dev, pcicfgregs *cfg,
3445     struct resource_list *rl)
3446 {
3447 	struct resource *res;
3448 	char *cp;
3449 	rman_res_t start, end, count;
3450 	int rid, sec_bus, sec_reg, sub_bus, sub_reg, sup_bus;
3451 
3452 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3453 	case PCIM_HDRTYPE_BRIDGE:
3454 		sec_reg = PCIR_SECBUS_1;
3455 		sub_reg = PCIR_SUBBUS_1;
3456 		break;
3457 	case PCIM_HDRTYPE_CARDBUS:
3458 		sec_reg = PCIR_SECBUS_2;
3459 		sub_reg = PCIR_SUBBUS_2;
3460 		break;
3461 	default:
3462 		return;
3463 	}
3464 
3465 	/*
3466 	 * If the existing bus range is valid, attempt to reserve it
3467 	 * from our parent.  If this fails for any reason, clear the
3468 	 * secbus and subbus registers.
3469 	 *
3470 	 * XXX: Should we reset sub_bus to sec_bus if it is < sec_bus?
3471 	 * This would at least preserve the existing sec_bus if it is
3472 	 * valid.
3473 	 */
3474 	sec_bus = PCI_READ_CONFIG(bus, dev, sec_reg, 1);
3475 	sub_bus = PCI_READ_CONFIG(bus, dev, sub_reg, 1);
3476 
3477 	/* Quirk handling. */
3478 	switch (pci_get_devid(dev)) {
3479 	case 0x12258086:		/* Intel 82454KX/GX (Orion) */
3480 		sup_bus = pci_read_config(dev, 0x41, 1);
3481 		if (sup_bus != 0xff) {
3482 			sec_bus = sup_bus + 1;
3483 			sub_bus = sup_bus + 1;
3484 			PCI_WRITE_CONFIG(bus, dev, sec_reg, sec_bus, 1);
3485 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3486 		}
3487 		break;
3488 
3489 	case 0x00dd10de:
3490 		/* Compaq R3000 BIOS sets wrong subordinate bus number. */
3491 		if ((cp = kern_getenv("smbios.planar.maker")) == NULL)
3492 			break;
3493 		if (strncmp(cp, "Compal", 6) != 0) {
3494 			freeenv(cp);
3495 			break;
3496 		}
3497 		freeenv(cp);
3498 		if ((cp = kern_getenv("smbios.planar.product")) == NULL)
3499 			break;
3500 		if (strncmp(cp, "08A0", 4) != 0) {
3501 			freeenv(cp);
3502 			break;
3503 		}
3504 		freeenv(cp);
3505 		if (sub_bus < 0xa) {
3506 			sub_bus = 0xa;
3507 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3508 		}
3509 		break;
3510 	}
3511 
3512 	if (bootverbose)
3513 		printf("\tsecbus=%d, subbus=%d\n", sec_bus, sub_bus);
3514 	if (sec_bus > 0 && sub_bus >= sec_bus) {
3515 		start = sec_bus;
3516 		end = sub_bus;
3517 		count = end - start + 1;
3518 
3519 		resource_list_add(rl, PCI_RES_BUS, 0, 0, ~0, count);
3520 
3521 		/*
3522 		 * If requested, clear secondary bus registers in
3523 		 * bridge devices to force a complete renumbering
3524 		 * rather than reserving the existing range.  However,
3525 		 * preserve the existing size.
3526 		 */
3527 		if (pci_clear_buses)
3528 			goto clear;
3529 
3530 		rid = 0;
3531 		res = resource_list_reserve(rl, bus, dev, PCI_RES_BUS, &rid,
3532 		    start, end, count, 0);
3533 		if (res != NULL)
3534 			return;
3535 
3536 		if (bootverbose)
3537 			device_printf(bus,
3538 			    "pci%d:%d:%d:%d secbus failed to allocate\n",
3539 			    pci_get_domain(dev), pci_get_bus(dev),
3540 			    pci_get_slot(dev), pci_get_function(dev));
3541 	}
3542 
3543 clear:
3544 	PCI_WRITE_CONFIG(bus, dev, sec_reg, 0, 1);
3545 	PCI_WRITE_CONFIG(bus, dev, sub_reg, 0, 1);
3546 }
3547 
3548 static struct resource *
3549 pci_alloc_secbus(device_t dev, device_t child, int *rid, rman_res_t start,
3550     rman_res_t end, rman_res_t count, u_int flags)
3551 {
3552 	struct pci_devinfo *dinfo;
3553 	pcicfgregs *cfg;
3554 	struct resource_list *rl;
3555 	struct resource *res;
3556 	int sec_reg, sub_reg;
3557 
3558 	dinfo = device_get_ivars(child);
3559 	cfg = &dinfo->cfg;
3560 	rl = &dinfo->resources;
3561 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3562 	case PCIM_HDRTYPE_BRIDGE:
3563 		sec_reg = PCIR_SECBUS_1;
3564 		sub_reg = PCIR_SUBBUS_1;
3565 		break;
3566 	case PCIM_HDRTYPE_CARDBUS:
3567 		sec_reg = PCIR_SECBUS_2;
3568 		sub_reg = PCIR_SUBBUS_2;
3569 		break;
3570 	default:
3571 		return (NULL);
3572 	}
3573 
3574 	if (*rid != 0)
3575 		return (NULL);
3576 
3577 	if (resource_list_find(rl, PCI_RES_BUS, *rid) == NULL)
3578 		resource_list_add(rl, PCI_RES_BUS, *rid, start, end, count);
3579 	if (!resource_list_reserved(rl, PCI_RES_BUS, *rid)) {
3580 		res = resource_list_reserve(rl, dev, child, PCI_RES_BUS, rid,
3581 		    start, end, count, flags & ~RF_ACTIVE);
3582 		if (res == NULL) {
3583 			resource_list_delete(rl, PCI_RES_BUS, *rid);
3584 			device_printf(child, "allocating %ju bus%s failed\n",
3585 			    count, count == 1 ? "" : "es");
3586 			return (NULL);
3587 		}
3588 		if (bootverbose)
3589 			device_printf(child,
3590 			    "Lazy allocation of %ju bus%s at %ju\n", count,
3591 			    count == 1 ? "" : "es", rman_get_start(res));
3592 		PCI_WRITE_CONFIG(dev, child, sec_reg, rman_get_start(res), 1);
3593 		PCI_WRITE_CONFIG(dev, child, sub_reg, rman_get_end(res), 1);
3594 	}
3595 	return (resource_list_alloc(rl, dev, child, PCI_RES_BUS, rid, start,
3596 	    end, count, flags));
3597 }
3598 #endif
3599 
3600 static int
3601 pci_ea_bei_to_rid(device_t dev, int bei)
3602 {
3603 #ifdef PCI_IOV
3604 	struct pci_devinfo *dinfo;
3605 	int iov_pos;
3606 	struct pcicfg_iov *iov;
3607 
3608 	dinfo = device_get_ivars(dev);
3609 	iov = dinfo->cfg.iov;
3610 	if (iov != NULL)
3611 		iov_pos = iov->iov_pos;
3612 	else
3613 		iov_pos = 0;
3614 #endif
3615 
3616 	/* Check if matches BAR */
3617 	if ((bei >= PCIM_EA_BEI_BAR_0) &&
3618 	    (bei <= PCIM_EA_BEI_BAR_5))
3619 		return (PCIR_BAR(bei));
3620 
3621 	/* Check ROM */
3622 	if (bei == PCIM_EA_BEI_ROM)
3623 		return (PCIR_BIOS);
3624 
3625 #ifdef PCI_IOV
3626 	/* Check if matches VF_BAR */
3627 	if ((iov != NULL) && (bei >= PCIM_EA_BEI_VF_BAR_0) &&
3628 	    (bei <= PCIM_EA_BEI_VF_BAR_5))
3629 		return (PCIR_SRIOV_BAR(bei - PCIM_EA_BEI_VF_BAR_0) +
3630 		    iov_pos);
3631 #endif
3632 
3633 	return (-1);
3634 }
3635 
3636 int
3637 pci_ea_is_enabled(device_t dev, int rid)
3638 {
3639 	struct pci_ea_entry *ea;
3640 	struct pci_devinfo *dinfo;
3641 
3642 	dinfo = device_get_ivars(dev);
3643 
3644 	STAILQ_FOREACH(ea, &dinfo->cfg.ea.ea_entries, eae_link) {
3645 		if (pci_ea_bei_to_rid(dev, ea->eae_bei) == rid)
3646 			return ((ea->eae_flags & PCIM_EA_ENABLE) > 0);
3647 	}
3648 
3649 	return (0);
3650 }
3651 
3652 void
3653 pci_add_resources_ea(device_t bus, device_t dev, int alloc_iov)
3654 {
3655 	struct pci_ea_entry *ea;
3656 	struct pci_devinfo *dinfo;
3657 	pci_addr_t start, end, count;
3658 	struct resource_list *rl;
3659 	int type, flags, rid;
3660 	struct resource *res;
3661 	uint32_t tmp;
3662 #ifdef PCI_IOV
3663 	struct pcicfg_iov *iov;
3664 #endif
3665 
3666 	dinfo = device_get_ivars(dev);
3667 	rl = &dinfo->resources;
3668 	flags = 0;
3669 
3670 #ifdef PCI_IOV
3671 	iov = dinfo->cfg.iov;
3672 #endif
3673 
3674 	if (dinfo->cfg.ea.ea_location == 0)
3675 		return;
3676 
3677 	STAILQ_FOREACH(ea, &dinfo->cfg.ea.ea_entries, eae_link) {
3678 
3679 		/*
3680 		 * TODO: Ignore EA-BAR if is not enabled.
3681 		 *   Currently the EA implementation supports
3682 		 *   only situation, where EA structure contains
3683 		 *   predefined entries. In case they are not enabled
3684 		 *   leave them unallocated and proceed with
3685 		 *   a legacy-BAR mechanism.
3686 		 */
3687 		if ((ea->eae_flags & PCIM_EA_ENABLE) == 0)
3688 			continue;
3689 
3690 		switch ((ea->eae_flags & PCIM_EA_PP) >> PCIM_EA_PP_OFFSET) {
3691 		case PCIM_EA_P_MEM_PREFETCH:
3692 		case PCIM_EA_P_VF_MEM_PREFETCH:
3693 			flags = RF_PREFETCHABLE;
3694 			/* FALLTHROUGH */
3695 		case PCIM_EA_P_VF_MEM:
3696 		case PCIM_EA_P_MEM:
3697 			type = SYS_RES_MEMORY;
3698 			break;
3699 		case PCIM_EA_P_IO:
3700 			type = SYS_RES_IOPORT;
3701 			break;
3702 		default:
3703 			continue;
3704 		}
3705 
3706 		if (alloc_iov != 0) {
3707 #ifdef PCI_IOV
3708 			/* Allocating IOV, confirm BEI matches */
3709 			if ((ea->eae_bei < PCIM_EA_BEI_VF_BAR_0) ||
3710 			    (ea->eae_bei > PCIM_EA_BEI_VF_BAR_5))
3711 				continue;
3712 #else
3713 			continue;
3714 #endif
3715 		} else {
3716 			/* Allocating BAR, confirm BEI matches */
3717 			if (((ea->eae_bei < PCIM_EA_BEI_BAR_0) ||
3718 			    (ea->eae_bei > PCIM_EA_BEI_BAR_5)) &&
3719 			    (ea->eae_bei != PCIM_EA_BEI_ROM))
3720 				continue;
3721 		}
3722 
3723 		rid = pci_ea_bei_to_rid(dev, ea->eae_bei);
3724 		if (rid < 0)
3725 			continue;
3726 
3727 		/* Skip resources already allocated by EA */
3728 		if ((resource_list_find(rl, SYS_RES_MEMORY, rid) != NULL) ||
3729 		    (resource_list_find(rl, SYS_RES_IOPORT, rid) != NULL))
3730 			continue;
3731 
3732 		start = ea->eae_base;
3733 		count = ea->eae_max_offset + 1;
3734 #ifdef PCI_IOV
3735 		if (iov != NULL)
3736 			count = count * iov->iov_num_vfs;
3737 #endif
3738 		end = start + count - 1;
3739 		if (count == 0)
3740 			continue;
3741 
3742 		resource_list_add(rl, type, rid, start, end, count);
3743 		res = resource_list_reserve(rl, bus, dev, type, &rid, start, end, count,
3744 		    flags);
3745 		if (res == NULL) {
3746 			resource_list_delete(rl, type, rid);
3747 
3748 			/*
3749 			 * Failed to allocate using EA, disable entry.
3750 			 * Another attempt to allocation will be performed
3751 			 * further, but this time using legacy BAR registers
3752 			 */
3753 			tmp = pci_read_config(dev, ea->eae_cfg_offset, 4);
3754 			tmp &= ~PCIM_EA_ENABLE;
3755 			pci_write_config(dev, ea->eae_cfg_offset, tmp, 4);
3756 
3757 			/*
3758 			 * Disabling entry might fail in case it is hardwired.
3759 			 * Read flags again to match current status.
3760 			 */
3761 			ea->eae_flags = pci_read_config(dev, ea->eae_cfg_offset, 4);
3762 
3763 			continue;
3764 		}
3765 
3766 		/* As per specification, fill BAR with zeros */
3767 		pci_write_config(dev, rid, 0, 4);
3768 	}
3769 }
3770 
3771 void
3772 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3773 {
3774 	struct pci_devinfo *dinfo;
3775 	pcicfgregs *cfg;
3776 	struct resource_list *rl;
3777 	const struct pci_quirk *q;
3778 	uint32_t devid;
3779 	int i;
3780 
3781 	dinfo = device_get_ivars(dev);
3782 	cfg = &dinfo->cfg;
3783 	rl = &dinfo->resources;
3784 	devid = (cfg->device << 16) | cfg->vendor;
3785 
3786 	/* Allocate resources using Enhanced Allocation */
3787 	pci_add_resources_ea(bus, dev, 0);
3788 
3789 	/* ATA devices needs special map treatment */
3790 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3791 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3792 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3793 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3794 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3795 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3796 	else
3797 		for (i = 0; i < cfg->nummaps;) {
3798 			/* Skip resources already managed by EA */
3799 			if ((resource_list_find(rl, SYS_RES_MEMORY, PCIR_BAR(i)) != NULL) ||
3800 			    (resource_list_find(rl, SYS_RES_IOPORT, PCIR_BAR(i)) != NULL) ||
3801 			    pci_ea_is_enabled(dev, PCIR_BAR(i))) {
3802 				i++;
3803 				continue;
3804 			}
3805 
3806 			/*
3807 			 * Skip quirked resources.
3808 			 */
3809 			for (q = &pci_quirks[0]; q->devid != 0; q++)
3810 				if (q->devid == devid &&
3811 				    q->type == PCI_QUIRK_UNMAP_REG &&
3812 				    q->arg1 == PCIR_BAR(i))
3813 					break;
3814 			if (q->devid != 0) {
3815 				i++;
3816 				continue;
3817 			}
3818 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3819 			    prefetchmask & (1 << i));
3820 		}
3821 
3822 	/*
3823 	 * Add additional, quirked resources.
3824 	 */
3825 	for (q = &pci_quirks[0]; q->devid != 0; q++)
3826 		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3827 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3828 
3829 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3830 #ifdef __PCI_REROUTE_INTERRUPT
3831 		/*
3832 		 * Try to re-route interrupts. Sometimes the BIOS or
3833 		 * firmware may leave bogus values in these registers.
3834 		 * If the re-route fails, then just stick with what we
3835 		 * have.
3836 		 */
3837 		pci_assign_interrupt(bus, dev, 1);
3838 #else
3839 		pci_assign_interrupt(bus, dev, 0);
3840 #endif
3841 	}
3842 
3843 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3844 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3845 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3846 			xhci_early_takeover(dev);
3847 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3848 			ehci_early_takeover(dev);
3849 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3850 			ohci_early_takeover(dev);
3851 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3852 			uhci_early_takeover(dev);
3853 	}
3854 
3855 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3856 	/*
3857 	 * Reserve resources for secondary bus ranges behind bridge
3858 	 * devices.
3859 	 */
3860 	pci_reserve_secbus(bus, dev, cfg, rl);
3861 #endif
3862 }
3863 
3864 static struct pci_devinfo *
3865 pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
3866     int slot, int func)
3867 {
3868 	struct pci_devinfo *dinfo;
3869 
3870 	dinfo = pci_read_device(pcib, dev, domain, busno, slot, func);
3871 	if (dinfo != NULL)
3872 		pci_add_child(dev, dinfo);
3873 
3874 	return (dinfo);
3875 }
3876 
3877 void
3878 pci_add_children(device_t dev, int domain, int busno)
3879 {
3880 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3881 	device_t pcib = device_get_parent(dev);
3882 	struct pci_devinfo *dinfo;
3883 	int maxslots;
3884 	int s, f, pcifunchigh;
3885 	uint8_t hdrtype;
3886 	int first_func;
3887 
3888 	/*
3889 	 * Try to detect a device at slot 0, function 0.  If it exists, try to
3890 	 * enable ARI.  We must enable ARI before detecting the rest of the
3891 	 * functions on this bus as ARI changes the set of slots and functions
3892 	 * that are legal on this bus.
3893 	 */
3894 	dinfo = pci_identify_function(pcib, dev, domain, busno, 0, 0);
3895 	if (dinfo != NULL && pci_enable_ari)
3896 		PCIB_TRY_ENABLE_ARI(pcib, dinfo->cfg.dev);
3897 
3898 	/*
3899 	 * Start looking for new devices on slot 0 at function 1 because we
3900 	 * just identified the device at slot 0, function 0.
3901 	 */
3902 	first_func = 1;
3903 
3904 	maxslots = PCIB_MAXSLOTS(pcib);
3905 	for (s = 0; s <= maxslots; s++, first_func = 0) {
3906 		pcifunchigh = 0;
3907 		f = 0;
3908 		DELAY(1);
3909 		hdrtype = REG(PCIR_HDRTYPE, 1);
3910 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3911 			continue;
3912 		if (hdrtype & PCIM_MFDEV)
3913 			pcifunchigh = PCIB_MAXFUNCS(pcib);
3914 		for (f = first_func; f <= pcifunchigh; f++)
3915 			pci_identify_function(pcib, dev, domain, busno, s, f);
3916 	}
3917 #undef REG
3918 }
3919 
3920 #ifdef PCI_IOV
3921 device_t
3922 pci_add_iov_child(device_t bus, device_t pf, uint16_t rid, uint16_t vid,
3923     uint16_t did)
3924 {
3925 	struct pci_devinfo *pf_dinfo, *vf_dinfo;
3926 	device_t pcib;
3927 	int busno, slot, func;
3928 
3929 	pf_dinfo = device_get_ivars(pf);
3930 
3931 	pcib = device_get_parent(bus);
3932 
3933 	PCIB_DECODE_RID(pcib, rid, &busno, &slot, &func);
3934 
3935 	vf_dinfo = pci_fill_devinfo(pcib, bus, pci_get_domain(pcib), busno,
3936 	    slot, func, vid, did);
3937 
3938 	vf_dinfo->cfg.flags |= PCICFG_VF;
3939 	pci_add_child(bus, vf_dinfo);
3940 
3941 	return (vf_dinfo->cfg.dev);
3942 }
3943 
3944 device_t
3945 pci_create_iov_child_method(device_t bus, device_t pf, uint16_t rid,
3946     uint16_t vid, uint16_t did)
3947 {
3948 
3949 	return (pci_add_iov_child(bus, pf, rid, vid, did));
3950 }
3951 #endif
3952 
3953 void
3954 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3955 {
3956 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3957 	device_set_ivars(dinfo->cfg.dev, dinfo);
3958 	resource_list_init(&dinfo->resources);
3959 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3960 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3961 	pci_print_verbose(dinfo);
3962 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3963 	pci_child_added(dinfo->cfg.dev);
3964 }
3965 
3966 void
3967 pci_child_added_method(device_t dev, device_t child)
3968 {
3969 
3970 }
3971 
3972 static int
3973 pci_probe(device_t dev)
3974 {
3975 
3976 	device_set_desc(dev, "PCI bus");
3977 
3978 	/* Allow other subclasses to override this driver. */
3979 	return (BUS_PROBE_GENERIC);
3980 }
3981 
3982 int
3983 pci_attach_common(device_t dev)
3984 {
3985 	struct pci_softc *sc;
3986 	int busno, domain;
3987 #ifdef PCI_DMA_BOUNDARY
3988 	int error, tag_valid;
3989 #endif
3990 #ifdef PCI_RES_BUS
3991 	int rid;
3992 #endif
3993 
3994 	sc = device_get_softc(dev);
3995 	domain = pcib_get_domain(dev);
3996 	busno = pcib_get_bus(dev);
3997 #ifdef PCI_RES_BUS
3998 	rid = 0;
3999 	sc->sc_bus = bus_alloc_resource(dev, PCI_RES_BUS, &rid, busno, busno,
4000 	    1, 0);
4001 	if (sc->sc_bus == NULL) {
4002 		device_printf(dev, "failed to allocate bus number\n");
4003 		return (ENXIO);
4004 	}
4005 #endif
4006 	if (bootverbose)
4007 		device_printf(dev, "domain=%d, physical bus=%d\n",
4008 		    domain, busno);
4009 #ifdef PCI_DMA_BOUNDARY
4010 	tag_valid = 0;
4011 	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
4012 	    devclass_find("pci")) {
4013 		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
4014 		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
4015 		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
4016 		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
4017 		if (error)
4018 			device_printf(dev, "Failed to create DMA tag: %d\n",
4019 			    error);
4020 		else
4021 			tag_valid = 1;
4022 	}
4023 	if (!tag_valid)
4024 #endif
4025 		sc->sc_dma_tag = bus_get_dma_tag(dev);
4026 	return (0);
4027 }
4028 
4029 static int
4030 pci_attach(device_t dev)
4031 {
4032 	int busno, domain, error;
4033 
4034 	error = pci_attach_common(dev);
4035 	if (error)
4036 		return (error);
4037 
4038 	/*
4039 	 * Since there can be multiple independantly numbered PCI
4040 	 * busses on systems with multiple PCI domains, we can't use
4041 	 * the unit number to decide which bus we are probing. We ask
4042 	 * the parent pcib what our domain and bus numbers are.
4043 	 */
4044 	domain = pcib_get_domain(dev);
4045 	busno = pcib_get_bus(dev);
4046 	pci_add_children(dev, domain, busno);
4047 	return (bus_generic_attach(dev));
4048 }
4049 
4050 #ifdef PCI_RES_BUS
4051 static int
4052 pci_detach(device_t dev)
4053 {
4054 	struct pci_softc *sc;
4055 	int error;
4056 
4057 	error = bus_generic_detach(dev);
4058 	if (error)
4059 		return (error);
4060 	sc = device_get_softc(dev);
4061 	return (bus_release_resource(dev, PCI_RES_BUS, 0, sc->sc_bus));
4062 }
4063 #endif
4064 
4065 static void
4066 pci_set_power_child(device_t dev, device_t child, int state)
4067 {
4068 	device_t pcib;
4069 	int dstate;
4070 
4071 	/*
4072 	 * Set the device to the given state.  If the firmware suggests
4073 	 * a different power state, use it instead.  If power management
4074 	 * is not present, the firmware is responsible for managing
4075 	 * device power.  Skip children who aren't attached since they
4076 	 * are handled separately.
4077 	 */
4078 	pcib = device_get_parent(dev);
4079 	dstate = state;
4080 	if (device_is_attached(child) &&
4081 	    PCIB_POWER_FOR_SLEEP(pcib, child, &dstate) == 0)
4082 		pci_set_powerstate(child, dstate);
4083 }
4084 
4085 int
4086 pci_suspend_child(device_t dev, device_t child)
4087 {
4088 	struct pci_devinfo *dinfo;
4089 	int error;
4090 
4091 	dinfo = device_get_ivars(child);
4092 
4093 	/*
4094 	 * Save the PCI configuration space for the child and set the
4095 	 * device in the appropriate power state for this sleep state.
4096 	 */
4097 	pci_cfg_save(child, dinfo, 0);
4098 
4099 	/* Suspend devices before potentially powering them down. */
4100 	error = bus_generic_suspend_child(dev, child);
4101 
4102 	if (error)
4103 		return (error);
4104 
4105 	if (pci_do_power_suspend)
4106 		pci_set_power_child(dev, child, PCI_POWERSTATE_D3);
4107 
4108 	return (0);
4109 }
4110 
4111 int
4112 pci_resume_child(device_t dev, device_t child)
4113 {
4114 	struct pci_devinfo *dinfo;
4115 
4116 	if (pci_do_power_resume)
4117 		pci_set_power_child(dev, child, PCI_POWERSTATE_D0);
4118 
4119 	dinfo = device_get_ivars(child);
4120 	pci_cfg_restore(child, dinfo);
4121 	if (!device_is_attached(child))
4122 		pci_cfg_save(child, dinfo, 1);
4123 
4124 	bus_generic_resume_child(dev, child);
4125 
4126 	return (0);
4127 }
4128 
4129 int
4130 pci_resume(device_t dev)
4131 {
4132 	device_t child, *devlist;
4133 	int error, i, numdevs;
4134 
4135 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
4136 		return (error);
4137 
4138 	/*
4139 	 * Resume critical devices first, then everything else later.
4140 	 */
4141 	for (i = 0; i < numdevs; i++) {
4142 		child = devlist[i];
4143 		switch (pci_get_class(child)) {
4144 		case PCIC_DISPLAY:
4145 		case PCIC_MEMORY:
4146 		case PCIC_BRIDGE:
4147 		case PCIC_BASEPERIPH:
4148 			BUS_RESUME_CHILD(dev, child);
4149 			break;
4150 		}
4151 	}
4152 	for (i = 0; i < numdevs; i++) {
4153 		child = devlist[i];
4154 		switch (pci_get_class(child)) {
4155 		case PCIC_DISPLAY:
4156 		case PCIC_MEMORY:
4157 		case PCIC_BRIDGE:
4158 		case PCIC_BASEPERIPH:
4159 			break;
4160 		default:
4161 			BUS_RESUME_CHILD(dev, child);
4162 		}
4163 	}
4164 	free(devlist, M_TEMP);
4165 	return (0);
4166 }
4167 
4168 static void
4169 pci_load_vendor_data(void)
4170 {
4171 	caddr_t data;
4172 	void *ptr;
4173 	size_t sz;
4174 
4175 	data = preload_search_by_type("pci_vendor_data");
4176 	if (data != NULL) {
4177 		ptr = preload_fetch_addr(data);
4178 		sz = preload_fetch_size(data);
4179 		if (ptr != NULL && sz != 0) {
4180 			pci_vendordata = ptr;
4181 			pci_vendordata_size = sz;
4182 			/* terminate the database */
4183 			pci_vendordata[pci_vendordata_size] = '\n';
4184 		}
4185 	}
4186 }
4187 
4188 void
4189 pci_driver_added(device_t dev, driver_t *driver)
4190 {
4191 	int numdevs;
4192 	device_t *devlist;
4193 	device_t child;
4194 	struct pci_devinfo *dinfo;
4195 	int i;
4196 
4197 	if (bootverbose)
4198 		device_printf(dev, "driver added\n");
4199 	DEVICE_IDENTIFY(driver, dev);
4200 	if (device_get_children(dev, &devlist, &numdevs) != 0)
4201 		return;
4202 	for (i = 0; i < numdevs; i++) {
4203 		child = devlist[i];
4204 		if (device_get_state(child) != DS_NOTPRESENT)
4205 			continue;
4206 		dinfo = device_get_ivars(child);
4207 		pci_print_verbose(dinfo);
4208 		if (bootverbose)
4209 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
4210 		pci_cfg_restore(child, dinfo);
4211 		if (device_probe_and_attach(child) != 0)
4212 			pci_child_detached(dev, child);
4213 	}
4214 	free(devlist, M_TEMP);
4215 }
4216 
4217 int
4218 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
4219     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
4220 {
4221 	struct pci_devinfo *dinfo;
4222 	struct msix_table_entry *mte;
4223 	struct msix_vector *mv;
4224 	uint64_t addr;
4225 	uint32_t data;
4226 	void *cookie;
4227 	int error, rid;
4228 
4229 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
4230 	    arg, &cookie);
4231 	if (error)
4232 		return (error);
4233 
4234 	/* If this is not a direct child, just bail out. */
4235 	if (device_get_parent(child) != dev) {
4236 		*cookiep = cookie;
4237 		return(0);
4238 	}
4239 
4240 	rid = rman_get_rid(irq);
4241 	if (rid == 0) {
4242 		/* Make sure that INTx is enabled */
4243 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
4244 	} else {
4245 		/*
4246 		 * Check to see if the interrupt is MSI or MSI-X.
4247 		 * Ask our parent to map the MSI and give
4248 		 * us the address and data register values.
4249 		 * If we fail for some reason, teardown the
4250 		 * interrupt handler.
4251 		 */
4252 		dinfo = device_get_ivars(child);
4253 		if (dinfo->cfg.msi.msi_alloc > 0) {
4254 			if (dinfo->cfg.msi.msi_addr == 0) {
4255 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
4256 			    ("MSI has handlers, but vectors not mapped"));
4257 				error = PCIB_MAP_MSI(device_get_parent(dev),
4258 				    child, rman_get_start(irq), &addr, &data);
4259 				if (error)
4260 					goto bad;
4261 				dinfo->cfg.msi.msi_addr = addr;
4262 				dinfo->cfg.msi.msi_data = data;
4263 			}
4264 			if (dinfo->cfg.msi.msi_handlers == 0)
4265 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
4266 				    dinfo->cfg.msi.msi_data);
4267 			dinfo->cfg.msi.msi_handlers++;
4268 		} else {
4269 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4270 			    ("No MSI or MSI-X interrupts allocated"));
4271 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4272 			    ("MSI-X index too high"));
4273 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4274 			KASSERT(mte->mte_vector != 0, ("no message vector"));
4275 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
4276 			KASSERT(mv->mv_irq == rman_get_start(irq),
4277 			    ("IRQ mismatch"));
4278 			if (mv->mv_address == 0) {
4279 				KASSERT(mte->mte_handlers == 0,
4280 		    ("MSI-X table entry has handlers, but vector not mapped"));
4281 				error = PCIB_MAP_MSI(device_get_parent(dev),
4282 				    child, rman_get_start(irq), &addr, &data);
4283 				if (error)
4284 					goto bad;
4285 				mv->mv_address = addr;
4286 				mv->mv_data = data;
4287 			}
4288 			if (mte->mte_handlers == 0) {
4289 				pci_enable_msix(child, rid - 1, mv->mv_address,
4290 				    mv->mv_data);
4291 				pci_unmask_msix(child, rid - 1);
4292 			}
4293 			mte->mte_handlers++;
4294 		}
4295 
4296 		/*
4297 		 * Make sure that INTx is disabled if we are using MSI/MSI-X,
4298 		 * unless the device is affected by PCI_QUIRK_MSI_INTX_BUG,
4299 		 * in which case we "enable" INTx so MSI/MSI-X actually works.
4300 		 */
4301 		if (!pci_has_quirk(pci_get_devid(child),
4302 		    PCI_QUIRK_MSI_INTX_BUG))
4303 			pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4304 		else
4305 			pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
4306 	bad:
4307 		if (error) {
4308 			(void)bus_generic_teardown_intr(dev, child, irq,
4309 			    cookie);
4310 			return (error);
4311 		}
4312 	}
4313 	*cookiep = cookie;
4314 	return (0);
4315 }
4316 
4317 int
4318 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
4319     void *cookie)
4320 {
4321 	struct msix_table_entry *mte;
4322 	struct resource_list_entry *rle;
4323 	struct pci_devinfo *dinfo;
4324 	int error, rid;
4325 
4326 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
4327 		return (EINVAL);
4328 
4329 	/* If this isn't a direct child, just bail out */
4330 	if (device_get_parent(child) != dev)
4331 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
4332 
4333 	rid = rman_get_rid(irq);
4334 	if (rid == 0) {
4335 		/* Mask INTx */
4336 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4337 	} else {
4338 		/*
4339 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
4340 		 * decrement the appropriate handlers count and mask the
4341 		 * MSI-X message, or disable MSI messages if the count
4342 		 * drops to 0.
4343 		 */
4344 		dinfo = device_get_ivars(child);
4345 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
4346 		if (rle->res != irq)
4347 			return (EINVAL);
4348 		if (dinfo->cfg.msi.msi_alloc > 0) {
4349 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
4350 			    ("MSI-X index too high"));
4351 			if (dinfo->cfg.msi.msi_handlers == 0)
4352 				return (EINVAL);
4353 			dinfo->cfg.msi.msi_handlers--;
4354 			if (dinfo->cfg.msi.msi_handlers == 0)
4355 				pci_disable_msi(child);
4356 		} else {
4357 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4358 			    ("No MSI or MSI-X interrupts allocated"));
4359 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4360 			    ("MSI-X index too high"));
4361 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4362 			if (mte->mte_handlers == 0)
4363 				return (EINVAL);
4364 			mte->mte_handlers--;
4365 			if (mte->mte_handlers == 0)
4366 				pci_mask_msix(child, rid - 1);
4367 		}
4368 	}
4369 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
4370 	if (rid > 0)
4371 		KASSERT(error == 0,
4372 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
4373 	return (error);
4374 }
4375 
4376 int
4377 pci_print_child(device_t dev, device_t child)
4378 {
4379 	struct pci_devinfo *dinfo;
4380 	struct resource_list *rl;
4381 	int retval = 0;
4382 
4383 	dinfo = device_get_ivars(child);
4384 	rl = &dinfo->resources;
4385 
4386 	retval += bus_print_child_header(dev, child);
4387 
4388 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#jx");
4389 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#jx");
4390 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%jd");
4391 	if (device_get_flags(dev))
4392 		retval += printf(" flags %#x", device_get_flags(dev));
4393 
4394 	retval += printf(" at device %d.%d", pci_get_slot(child),
4395 	    pci_get_function(child));
4396 
4397 	retval += bus_print_child_domain(dev, child);
4398 	retval += bus_print_child_footer(dev, child);
4399 
4400 	return (retval);
4401 }
4402 
4403 static const struct
4404 {
4405 	int		class;
4406 	int		subclass;
4407 	int		report; /* 0 = bootverbose, 1 = always */
4408 	const char	*desc;
4409 } pci_nomatch_tab[] = {
4410 	{PCIC_OLD,		-1,			1, "old"},
4411 	{PCIC_OLD,		PCIS_OLD_NONVGA,	1, "non-VGA display device"},
4412 	{PCIC_OLD,		PCIS_OLD_VGA,		1, "VGA-compatible display device"},
4413 	{PCIC_STORAGE,		-1,			1, "mass storage"},
4414 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	1, "SCSI"},
4415 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	1, "ATA"},
4416 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	1, "floppy disk"},
4417 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	1, "IPI"},
4418 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	1, "RAID"},
4419 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	1, "ATA (ADMA)"},
4420 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	1, "SATA"},
4421 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	1, "SAS"},
4422 	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	1, "NVM"},
4423 	{PCIC_NETWORK,		-1,			1, "network"},
4424 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	1, "ethernet"},
4425 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	1, "token ring"},
4426 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	1, "fddi"},
4427 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	1, "ATM"},
4428 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	1, "ISDN"},
4429 	{PCIC_DISPLAY,		-1,			1, "display"},
4430 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	1, "VGA"},
4431 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	1, "XGA"},
4432 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	1, "3D"},
4433 	{PCIC_MULTIMEDIA,	-1,			1, "multimedia"},
4434 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	1, "video"},
4435 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	1, "audio"},
4436 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	1, "telephony"},
4437 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	1, "HDA"},
4438 	{PCIC_MEMORY,		-1,			1, "memory"},
4439 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	1, "RAM"},
4440 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	1, "flash"},
4441 	{PCIC_BRIDGE,		-1,			1, "bridge"},
4442 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	1, "HOST-PCI"},
4443 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	1, "PCI-ISA"},
4444 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	1, "PCI-EISA"},
4445 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	1, "PCI-MCA"},
4446 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	1, "PCI-PCI"},
4447 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	1, "PCI-PCMCIA"},
4448 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	1, "PCI-NuBus"},
4449 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	1, "PCI-CardBus"},
4450 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	1, "PCI-RACEway"},
4451 	{PCIC_SIMPLECOMM,	-1,			1, "simple comms"},
4452 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	1, "UART"},	/* could detect 16550 */
4453 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	1, "parallel port"},
4454 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	1, "multiport serial"},
4455 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	1, "generic modem"},
4456 	{PCIC_BASEPERIPH,	-1,			0, "base peripheral"},
4457 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	1, "interrupt controller"},
4458 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	1, "DMA controller"},
4459 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	1, "timer"},
4460 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	1, "realtime clock"},
4461 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	1, "PCI hot-plug controller"},
4462 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	1, "SD host controller"},
4463 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_IOMMU,	1, "IOMMU"},
4464 	{PCIC_INPUTDEV,		-1,			1, "input device"},
4465 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	1, "keyboard"},
4466 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,1, "digitizer"},
4467 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	1, "mouse"},
4468 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	1, "scanner"},
4469 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	1, "gameport"},
4470 	{PCIC_DOCKING,		-1,			1, "docking station"},
4471 	{PCIC_PROCESSOR,	-1,			1, "processor"},
4472 	{PCIC_SERIALBUS,	-1,			1, "serial bus"},
4473 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	1, "FireWire"},
4474 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	1, "AccessBus"},
4475 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	1, "SSA"},
4476 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	1, "USB"},
4477 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	1, "Fibre Channel"},
4478 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	0, "SMBus"},
4479 	{PCIC_WIRELESS,		-1,			1, "wireless controller"},
4480 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	1, "iRDA"},
4481 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	1, "IR"},
4482 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	1, "RF"},
4483 	{PCIC_INTELLIIO,	-1,			1, "intelligent I/O controller"},
4484 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	1, "I2O"},
4485 	{PCIC_SATCOM,		-1,			1, "satellite communication"},
4486 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		1, "sat TV"},
4487 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	1, "sat audio"},
4488 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	1, "sat voice"},
4489 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	1, "sat data"},
4490 	{PCIC_CRYPTO,		-1,			1, "encrypt/decrypt"},
4491 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	1, "network/computer crypto"},
4492 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	1, "entertainment crypto"},
4493 	{PCIC_DASP,		-1,			0, "dasp"},
4494 	{PCIC_DASP,		PCIS_DASP_DPIO,		1, "DPIO module"},
4495 	{0, 0, 0,		NULL}
4496 };
4497 
4498 void
4499 pci_probe_nomatch(device_t dev, device_t child)
4500 {
4501 	int i, report;
4502 	const char *cp, *scp;
4503 	char *device;
4504 
4505 	/*
4506 	 * Look for a listing for this device in a loaded device database.
4507 	 */
4508 	report = 1;
4509 	if ((device = pci_describe_device(child)) != NULL) {
4510 		device_printf(dev, "<%s>", device);
4511 		free(device, M_DEVBUF);
4512 	} else {
4513 		/*
4514 		 * Scan the class/subclass descriptions for a general
4515 		 * description.
4516 		 */
4517 		cp = "unknown";
4518 		scp = NULL;
4519 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
4520 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
4521 				if (pci_nomatch_tab[i].subclass == -1) {
4522 					cp = pci_nomatch_tab[i].desc;
4523 					report = pci_nomatch_tab[i].report;
4524 				} else if (pci_nomatch_tab[i].subclass ==
4525 				    pci_get_subclass(child)) {
4526 					scp = pci_nomatch_tab[i].desc;
4527 					report = pci_nomatch_tab[i].report;
4528 				}
4529 			}
4530 		}
4531 		if (report || bootverbose) {
4532 			device_printf(dev, "<%s%s%s>",
4533 			    cp ? cp : "",
4534 			    ((cp != NULL) && (scp != NULL)) ? ", " : "",
4535 			    scp ? scp : "");
4536 		}
4537 	}
4538 	if (report || bootverbose) {
4539 		printf(" at device %d.%d (no driver attached)\n",
4540 		    pci_get_slot(child), pci_get_function(child));
4541 	}
4542 	pci_cfg_save(child, device_get_ivars(child), 1);
4543 }
4544 
4545 void
4546 pci_child_detached(device_t dev, device_t child)
4547 {
4548 	struct pci_devinfo *dinfo;
4549 	struct resource_list *rl;
4550 
4551 	dinfo = device_get_ivars(child);
4552 	rl = &dinfo->resources;
4553 
4554 	/*
4555 	 * Have to deallocate IRQs before releasing any MSI messages and
4556 	 * have to release MSI messages before deallocating any memory
4557 	 * BARs.
4558 	 */
4559 	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
4560 		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
4561 	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
4562 		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
4563 		(void)pci_release_msi(child);
4564 	}
4565 	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
4566 		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
4567 	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
4568 		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
4569 #ifdef PCI_RES_BUS
4570 	if (resource_list_release_active(rl, dev, child, PCI_RES_BUS) != 0)
4571 		pci_printf(&dinfo->cfg, "Device leaked PCI bus numbers\n");
4572 #endif
4573 
4574 	pci_cfg_save(child, dinfo, 1);
4575 }
4576 
4577 /*
4578  * Parse the PCI device database, if loaded, and return a pointer to a
4579  * description of the device.
4580  *
4581  * The database is flat text formatted as follows:
4582  *
4583  * Any line not in a valid format is ignored.
4584  * Lines are terminated with newline '\n' characters.
4585  *
4586  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
4587  * the vendor name.
4588  *
4589  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
4590  * - devices cannot be listed without a corresponding VENDOR line.
4591  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
4592  * another TAB, then the device name.
4593  */
4594 
4595 /*
4596  * Assuming (ptr) points to the beginning of a line in the database,
4597  * return the vendor or device and description of the next entry.
4598  * The value of (vendor) or (device) inappropriate for the entry type
4599  * is set to -1.  Returns nonzero at the end of the database.
4600  *
4601  * Note that this is slightly unrobust in the face of corrupt data;
4602  * we attempt to safeguard against this by spamming the end of the
4603  * database with a newline when we initialise.
4604  */
4605 static int
4606 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
4607 {
4608 	char	*cp = *ptr;
4609 	int	left;
4610 
4611 	*device = -1;
4612 	*vendor = -1;
4613 	**desc = '\0';
4614 	for (;;) {
4615 		left = pci_vendordata_size - (cp - pci_vendordata);
4616 		if (left <= 0) {
4617 			*ptr = cp;
4618 			return(1);
4619 		}
4620 
4621 		/* vendor entry? */
4622 		if (*cp != '\t' &&
4623 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
4624 			break;
4625 		/* device entry? */
4626 		if (*cp == '\t' &&
4627 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
4628 			break;
4629 
4630 		/* skip to next line */
4631 		while (*cp != '\n' && left > 0) {
4632 			cp++;
4633 			left--;
4634 		}
4635 		if (*cp == '\n') {
4636 			cp++;
4637 			left--;
4638 		}
4639 	}
4640 	/* skip to next line */
4641 	while (*cp != '\n' && left > 0) {
4642 		cp++;
4643 		left--;
4644 	}
4645 	if (*cp == '\n' && left > 0)
4646 		cp++;
4647 	*ptr = cp;
4648 	return(0);
4649 }
4650 
4651 static char *
4652 pci_describe_device(device_t dev)
4653 {
4654 	int	vendor, device;
4655 	char	*desc, *vp, *dp, *line;
4656 
4657 	desc = vp = dp = NULL;
4658 
4659 	/*
4660 	 * If we have no vendor data, we can't do anything.
4661 	 */
4662 	if (pci_vendordata == NULL)
4663 		goto out;
4664 
4665 	/*
4666 	 * Scan the vendor data looking for this device
4667 	 */
4668 	line = pci_vendordata;
4669 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4670 		goto out;
4671 	for (;;) {
4672 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
4673 			goto out;
4674 		if (vendor == pci_get_vendor(dev))
4675 			break;
4676 	}
4677 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4678 		goto out;
4679 	for (;;) {
4680 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
4681 			*dp = 0;
4682 			break;
4683 		}
4684 		if (vendor != -1) {
4685 			*dp = 0;
4686 			break;
4687 		}
4688 		if (device == pci_get_device(dev))
4689 			break;
4690 	}
4691 	if (dp[0] == '\0')
4692 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
4693 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
4694 	    NULL)
4695 		sprintf(desc, "%s, %s", vp, dp);
4696 out:
4697 	if (vp != NULL)
4698 		free(vp, M_DEVBUF);
4699 	if (dp != NULL)
4700 		free(dp, M_DEVBUF);
4701 	return(desc);
4702 }
4703 
4704 int
4705 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
4706 {
4707 	struct pci_devinfo *dinfo;
4708 	pcicfgregs *cfg;
4709 
4710 	dinfo = device_get_ivars(child);
4711 	cfg = &dinfo->cfg;
4712 
4713 	switch (which) {
4714 	case PCI_IVAR_ETHADDR:
4715 		/*
4716 		 * The generic accessor doesn't deal with failure, so
4717 		 * we set the return value, then return an error.
4718 		 */
4719 		*((uint8_t **) result) = NULL;
4720 		return (EINVAL);
4721 	case PCI_IVAR_SUBVENDOR:
4722 		*result = cfg->subvendor;
4723 		break;
4724 	case PCI_IVAR_SUBDEVICE:
4725 		*result = cfg->subdevice;
4726 		break;
4727 	case PCI_IVAR_VENDOR:
4728 		*result = cfg->vendor;
4729 		break;
4730 	case PCI_IVAR_DEVICE:
4731 		*result = cfg->device;
4732 		break;
4733 	case PCI_IVAR_DEVID:
4734 		*result = (cfg->device << 16) | cfg->vendor;
4735 		break;
4736 	case PCI_IVAR_CLASS:
4737 		*result = cfg->baseclass;
4738 		break;
4739 	case PCI_IVAR_SUBCLASS:
4740 		*result = cfg->subclass;
4741 		break;
4742 	case PCI_IVAR_PROGIF:
4743 		*result = cfg->progif;
4744 		break;
4745 	case PCI_IVAR_REVID:
4746 		*result = cfg->revid;
4747 		break;
4748 	case PCI_IVAR_INTPIN:
4749 		*result = cfg->intpin;
4750 		break;
4751 	case PCI_IVAR_IRQ:
4752 		*result = cfg->intline;
4753 		break;
4754 	case PCI_IVAR_DOMAIN:
4755 		*result = cfg->domain;
4756 		break;
4757 	case PCI_IVAR_BUS:
4758 		*result = cfg->bus;
4759 		break;
4760 	case PCI_IVAR_SLOT:
4761 		*result = cfg->slot;
4762 		break;
4763 	case PCI_IVAR_FUNCTION:
4764 		*result = cfg->func;
4765 		break;
4766 	case PCI_IVAR_CMDREG:
4767 		*result = cfg->cmdreg;
4768 		break;
4769 	case PCI_IVAR_CACHELNSZ:
4770 		*result = cfg->cachelnsz;
4771 		break;
4772 	case PCI_IVAR_MINGNT:
4773 		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4774 			*result = -1;
4775 			return (EINVAL);
4776 		}
4777 		*result = cfg->mingnt;
4778 		break;
4779 	case PCI_IVAR_MAXLAT:
4780 		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4781 			*result = -1;
4782 			return (EINVAL);
4783 		}
4784 		*result = cfg->maxlat;
4785 		break;
4786 	case PCI_IVAR_LATTIMER:
4787 		*result = cfg->lattimer;
4788 		break;
4789 	default:
4790 		return (ENOENT);
4791 	}
4792 	return (0);
4793 }
4794 
4795 int
4796 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4797 {
4798 	struct pci_devinfo *dinfo;
4799 
4800 	dinfo = device_get_ivars(child);
4801 
4802 	switch (which) {
4803 	case PCI_IVAR_INTPIN:
4804 		dinfo->cfg.intpin = value;
4805 		return (0);
4806 	case PCI_IVAR_ETHADDR:
4807 	case PCI_IVAR_SUBVENDOR:
4808 	case PCI_IVAR_SUBDEVICE:
4809 	case PCI_IVAR_VENDOR:
4810 	case PCI_IVAR_DEVICE:
4811 	case PCI_IVAR_DEVID:
4812 	case PCI_IVAR_CLASS:
4813 	case PCI_IVAR_SUBCLASS:
4814 	case PCI_IVAR_PROGIF:
4815 	case PCI_IVAR_REVID:
4816 	case PCI_IVAR_IRQ:
4817 	case PCI_IVAR_DOMAIN:
4818 	case PCI_IVAR_BUS:
4819 	case PCI_IVAR_SLOT:
4820 	case PCI_IVAR_FUNCTION:
4821 		return (EINVAL);	/* disallow for now */
4822 
4823 	default:
4824 		return (ENOENT);
4825 	}
4826 }
4827 
4828 #include "opt_ddb.h"
4829 #ifdef DDB
4830 #include <ddb/ddb.h>
4831 #include <sys/cons.h>
4832 
4833 /*
4834  * List resources based on pci map registers, used for within ddb
4835  */
4836 
4837 DB_SHOW_COMMAND(pciregs, db_pci_dump)
4838 {
4839 	struct pci_devinfo *dinfo;
4840 	struct devlist *devlist_head;
4841 	struct pci_conf *p;
4842 	const char *name;
4843 	int i, error, none_count;
4844 
4845 	none_count = 0;
4846 	/* get the head of the device queue */
4847 	devlist_head = &pci_devq;
4848 
4849 	/*
4850 	 * Go through the list of devices and print out devices
4851 	 */
4852 	for (error = 0, i = 0,
4853 	     dinfo = STAILQ_FIRST(devlist_head);
4854 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4855 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4856 
4857 		/* Populate pd_name and pd_unit */
4858 		name = NULL;
4859 		if (dinfo->cfg.dev)
4860 			name = device_get_name(dinfo->cfg.dev);
4861 
4862 		p = &dinfo->conf;
4863 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4864 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4865 			(name && *name) ? name : "none",
4866 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4867 			none_count++,
4868 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4869 			p->pc_sel.pc_func, (p->pc_class << 16) |
4870 			(p->pc_subclass << 8) | p->pc_progif,
4871 			(p->pc_subdevice << 16) | p->pc_subvendor,
4872 			(p->pc_device << 16) | p->pc_vendor,
4873 			p->pc_revid, p->pc_hdr);
4874 	}
4875 }
4876 #endif /* DDB */
4877 
4878 static struct resource *
4879 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4880     rman_res_t start, rman_res_t end, rman_res_t count, u_int num,
4881     u_int flags)
4882 {
4883 	struct pci_devinfo *dinfo = device_get_ivars(child);
4884 	struct resource_list *rl = &dinfo->resources;
4885 	struct resource *res;
4886 	struct pci_map *pm;
4887 	pci_addr_t map, testval;
4888 	int mapsize;
4889 
4890 	res = NULL;
4891 
4892 	/* If rid is managed by EA, ignore it */
4893 	if (pci_ea_is_enabled(child, *rid))
4894 		goto out;
4895 
4896 	pm = pci_find_bar(child, *rid);
4897 	if (pm != NULL) {
4898 		/* This is a BAR that we failed to allocate earlier. */
4899 		mapsize = pm->pm_size;
4900 		map = pm->pm_value;
4901 	} else {
4902 		/*
4903 		 * Weed out the bogons, and figure out how large the
4904 		 * BAR/map is.  BARs that read back 0 here are bogus
4905 		 * and unimplemented.  Note: atapci in legacy mode are
4906 		 * special and handled elsewhere in the code.  If you
4907 		 * have a atapci device in legacy mode and it fails
4908 		 * here, that other code is broken.
4909 		 */
4910 		pci_read_bar(child, *rid, &map, &testval, NULL);
4911 
4912 		/*
4913 		 * Determine the size of the BAR and ignore BARs with a size
4914 		 * of 0.  Device ROM BARs use a different mask value.
4915 		 */
4916 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
4917 			mapsize = pci_romsize(testval);
4918 		else
4919 			mapsize = pci_mapsize(testval);
4920 		if (mapsize == 0)
4921 			goto out;
4922 		pm = pci_add_bar(child, *rid, map, mapsize);
4923 	}
4924 
4925 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
4926 		if (type != SYS_RES_MEMORY) {
4927 			if (bootverbose)
4928 				device_printf(dev,
4929 				    "child %s requested type %d for rid %#x,"
4930 				    " but the BAR says it is an memio\n",
4931 				    device_get_nameunit(child), type, *rid);
4932 			goto out;
4933 		}
4934 	} else {
4935 		if (type != SYS_RES_IOPORT) {
4936 			if (bootverbose)
4937 				device_printf(dev,
4938 				    "child %s requested type %d for rid %#x,"
4939 				    " but the BAR says it is an ioport\n",
4940 				    device_get_nameunit(child), type, *rid);
4941 			goto out;
4942 		}
4943 	}
4944 
4945 	/*
4946 	 * For real BARs, we need to override the size that
4947 	 * the driver requests, because that's what the BAR
4948 	 * actually uses and we would otherwise have a
4949 	 * situation where we might allocate the excess to
4950 	 * another driver, which won't work.
4951 	 */
4952 	count = ((pci_addr_t)1 << mapsize) * num;
4953 	if (RF_ALIGNMENT(flags) < mapsize)
4954 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
4955 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
4956 		flags |= RF_PREFETCHABLE;
4957 
4958 	/*
4959 	 * Allocate enough resource, and then write back the
4960 	 * appropriate BAR for that resource.
4961 	 */
4962 	resource_list_add(rl, type, *rid, start, end, count);
4963 	res = resource_list_reserve(rl, dev, child, type, rid, start, end,
4964 	    count, flags & ~RF_ACTIVE);
4965 	if (res == NULL) {
4966 		resource_list_delete(rl, type, *rid);
4967 		device_printf(child,
4968 		    "%#jx bytes of rid %#x res %d failed (%#jx, %#jx).\n",
4969 		    count, *rid, type, start, end);
4970 		goto out;
4971 	}
4972 	if (bootverbose)
4973 		device_printf(child,
4974 		    "Lazy allocation of %#jx bytes rid %#x type %d at %#jx\n",
4975 		    count, *rid, type, rman_get_start(res));
4976 	map = rman_get_start(res);
4977 	pci_write_bar(child, pm, map);
4978 out:
4979 	return (res);
4980 }
4981 
4982 struct resource *
4983 pci_alloc_multi_resource(device_t dev, device_t child, int type, int *rid,
4984     rman_res_t start, rman_res_t end, rman_res_t count, u_long num,
4985     u_int flags)
4986 {
4987 	struct pci_devinfo *dinfo;
4988 	struct resource_list *rl;
4989 	struct resource_list_entry *rle;
4990 	struct resource *res;
4991 	pcicfgregs *cfg;
4992 
4993 	/*
4994 	 * Perform lazy resource allocation
4995 	 */
4996 	dinfo = device_get_ivars(child);
4997 	rl = &dinfo->resources;
4998 	cfg = &dinfo->cfg;
4999 	switch (type) {
5000 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
5001 	case PCI_RES_BUS:
5002 		return (pci_alloc_secbus(dev, child, rid, start, end, count,
5003 		    flags));
5004 #endif
5005 	case SYS_RES_IRQ:
5006 		/*
5007 		 * Can't alloc legacy interrupt once MSI messages have
5008 		 * been allocated.
5009 		 */
5010 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
5011 		    cfg->msix.msix_alloc > 0))
5012 			return (NULL);
5013 
5014 		/*
5015 		 * If the child device doesn't have an interrupt
5016 		 * routed and is deserving of an interrupt, try to
5017 		 * assign it one.
5018 		 */
5019 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
5020 		    (cfg->intpin != 0))
5021 			pci_assign_interrupt(dev, child, 0);
5022 		break;
5023 	case SYS_RES_IOPORT:
5024 	case SYS_RES_MEMORY:
5025 #ifdef NEW_PCIB
5026 		/*
5027 		 * PCI-PCI bridge I/O window resources are not BARs.
5028 		 * For those allocations just pass the request up the
5029 		 * tree.
5030 		 */
5031 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
5032 			switch (*rid) {
5033 			case PCIR_IOBASEL_1:
5034 			case PCIR_MEMBASE_1:
5035 			case PCIR_PMBASEL_1:
5036 				/*
5037 				 * XXX: Should we bother creating a resource
5038 				 * list entry?
5039 				 */
5040 				return (bus_generic_alloc_resource(dev, child,
5041 				    type, rid, start, end, count, flags));
5042 			}
5043 		}
5044 #endif
5045 		/* Reserve resources for this BAR if needed. */
5046 		rle = resource_list_find(rl, type, *rid);
5047 		if (rle == NULL) {
5048 			res = pci_reserve_map(dev, child, type, rid, start, end,
5049 			    count, num, flags);
5050 			if (res == NULL)
5051 				return (NULL);
5052 		}
5053 	}
5054 	return (resource_list_alloc(rl, dev, child, type, rid,
5055 	    start, end, count, flags));
5056 }
5057 
5058 struct resource *
5059 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
5060     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
5061 {
5062 #ifdef PCI_IOV
5063 	struct pci_devinfo *dinfo;
5064 #endif
5065 
5066 	if (device_get_parent(child) != dev)
5067 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
5068 		    type, rid, start, end, count, flags));
5069 
5070 #ifdef PCI_IOV
5071 	dinfo = device_get_ivars(child);
5072 	if (dinfo->cfg.flags & PCICFG_VF) {
5073 		switch (type) {
5074 		/* VFs can't have I/O BARs. */
5075 		case SYS_RES_IOPORT:
5076 			return (NULL);
5077 		case SYS_RES_MEMORY:
5078 			return (pci_vf_alloc_mem_resource(dev, child, rid,
5079 			    start, end, count, flags));
5080 		}
5081 
5082 		/* Fall through for other types of resource allocations. */
5083 	}
5084 #endif
5085 
5086 	return (pci_alloc_multi_resource(dev, child, type, rid, start, end,
5087 	    count, 1, flags));
5088 }
5089 
5090 int
5091 pci_release_resource(device_t dev, device_t child, int type, int rid,
5092     struct resource *r)
5093 {
5094 	struct pci_devinfo *dinfo;
5095 	struct resource_list *rl;
5096 	pcicfgregs *cfg;
5097 
5098 	if (device_get_parent(child) != dev)
5099 		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
5100 		    type, rid, r));
5101 
5102 	dinfo = device_get_ivars(child);
5103 	cfg = &dinfo->cfg;
5104 
5105 #ifdef PCI_IOV
5106 	if (dinfo->cfg.flags & PCICFG_VF) {
5107 		switch (type) {
5108 		/* VFs can't have I/O BARs. */
5109 		case SYS_RES_IOPORT:
5110 			return (EDOOFUS);
5111 		case SYS_RES_MEMORY:
5112 			return (pci_vf_release_mem_resource(dev, child, rid,
5113 			    r));
5114 		}
5115 
5116 		/* Fall through for other types of resource allocations. */
5117 	}
5118 #endif
5119 
5120 #ifdef NEW_PCIB
5121 	/*
5122 	 * PCI-PCI bridge I/O window resources are not BARs.  For
5123 	 * those allocations just pass the request up the tree.
5124 	 */
5125 	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
5126 	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
5127 		switch (rid) {
5128 		case PCIR_IOBASEL_1:
5129 		case PCIR_MEMBASE_1:
5130 		case PCIR_PMBASEL_1:
5131 			return (bus_generic_release_resource(dev, child, type,
5132 			    rid, r));
5133 		}
5134 	}
5135 #endif
5136 
5137 	rl = &dinfo->resources;
5138 	return (resource_list_release(rl, dev, child, type, rid, r));
5139 }
5140 
5141 int
5142 pci_activate_resource(device_t dev, device_t child, int type, int rid,
5143     struct resource *r)
5144 {
5145 	struct pci_devinfo *dinfo;
5146 	int error;
5147 
5148 	error = bus_generic_activate_resource(dev, child, type, rid, r);
5149 	if (error)
5150 		return (error);
5151 
5152 	/* Enable decoding in the command register when activating BARs. */
5153 	if (device_get_parent(child) == dev) {
5154 		/* Device ROMs need their decoding explicitly enabled. */
5155 		dinfo = device_get_ivars(child);
5156 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
5157 			pci_write_bar(child, pci_find_bar(child, rid),
5158 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
5159 		switch (type) {
5160 		case SYS_RES_IOPORT:
5161 		case SYS_RES_MEMORY:
5162 			error = PCI_ENABLE_IO(dev, child, type);
5163 			break;
5164 		}
5165 	}
5166 	return (error);
5167 }
5168 
5169 int
5170 pci_deactivate_resource(device_t dev, device_t child, int type,
5171     int rid, struct resource *r)
5172 {
5173 	struct pci_devinfo *dinfo;
5174 	int error;
5175 
5176 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
5177 	if (error)
5178 		return (error);
5179 
5180 	/* Disable decoding for device ROMs. */
5181 	if (device_get_parent(child) == dev) {
5182 		dinfo = device_get_ivars(child);
5183 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
5184 			pci_write_bar(child, pci_find_bar(child, rid),
5185 			    rman_get_start(r));
5186 	}
5187 	return (0);
5188 }
5189 
5190 void
5191 pci_child_deleted(device_t dev, device_t child)
5192 {
5193 	struct resource_list_entry *rle;
5194 	struct resource_list *rl;
5195 	struct pci_devinfo *dinfo;
5196 
5197 	dinfo = device_get_ivars(child);
5198 	rl = &dinfo->resources;
5199 
5200 	/* Turn off access to resources we're about to free */
5201 	if (bus_child_present(child) != 0) {
5202 		pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
5203 		    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
5204 
5205 		pci_disable_busmaster(child);
5206 	}
5207 
5208 	/* Free all allocated resources */
5209 	STAILQ_FOREACH(rle, rl, link) {
5210 		if (rle->res) {
5211 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
5212 			    resource_list_busy(rl, rle->type, rle->rid)) {
5213 				pci_printf(&dinfo->cfg,
5214 				    "Resource still owned, oops. "
5215 				    "(type=%d, rid=%d, addr=%lx)\n",
5216 				    rle->type, rle->rid,
5217 				    rman_get_start(rle->res));
5218 				bus_release_resource(child, rle->type, rle->rid,
5219 				    rle->res);
5220 			}
5221 			resource_list_unreserve(rl, dev, child, rle->type,
5222 			    rle->rid);
5223 		}
5224 	}
5225 	resource_list_free(rl);
5226 
5227 	pci_freecfg(dinfo);
5228 }
5229 
5230 void
5231 pci_delete_resource(device_t dev, device_t child, int type, int rid)
5232 {
5233 	struct pci_devinfo *dinfo;
5234 	struct resource_list *rl;
5235 	struct resource_list_entry *rle;
5236 
5237 	if (device_get_parent(child) != dev)
5238 		return;
5239 
5240 	dinfo = device_get_ivars(child);
5241 	rl = &dinfo->resources;
5242 	rle = resource_list_find(rl, type, rid);
5243 	if (rle == NULL)
5244 		return;
5245 
5246 	if (rle->res) {
5247 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
5248 		    resource_list_busy(rl, type, rid)) {
5249 			device_printf(dev, "delete_resource: "
5250 			    "Resource still owned by child, oops. "
5251 			    "(type=%d, rid=%d, addr=%jx)\n",
5252 			    type, rid, rman_get_start(rle->res));
5253 			return;
5254 		}
5255 		resource_list_unreserve(rl, dev, child, type, rid);
5256 	}
5257 	resource_list_delete(rl, type, rid);
5258 }
5259 
5260 struct resource_list *
5261 pci_get_resource_list (device_t dev, device_t child)
5262 {
5263 	struct pci_devinfo *dinfo = device_get_ivars(child);
5264 
5265 	return (&dinfo->resources);
5266 }
5267 
5268 bus_dma_tag_t
5269 pci_get_dma_tag(device_t bus, device_t dev)
5270 {
5271 	struct pci_softc *sc = device_get_softc(bus);
5272 
5273 	return (sc->sc_dma_tag);
5274 }
5275 
5276 uint32_t
5277 pci_read_config_method(device_t dev, device_t child, int reg, int width)
5278 {
5279 	struct pci_devinfo *dinfo = device_get_ivars(child);
5280 	pcicfgregs *cfg = &dinfo->cfg;
5281 
5282 #ifdef PCI_IOV
5283 	/*
5284 	 * SR-IOV VFs don't implement the VID or DID registers, so we have to
5285 	 * emulate them here.
5286 	 */
5287 	if (cfg->flags & PCICFG_VF) {
5288 		if (reg == PCIR_VENDOR) {
5289 			switch (width) {
5290 			case 4:
5291 				return (cfg->device << 16 | cfg->vendor);
5292 			case 2:
5293 				return (cfg->vendor);
5294 			case 1:
5295 				return (cfg->vendor & 0xff);
5296 			default:
5297 				return (0xffffffff);
5298 			}
5299 		} else if (reg == PCIR_DEVICE) {
5300 			switch (width) {
5301 			/* Note that an unaligned 4-byte read is an error. */
5302 			case 2:
5303 				return (cfg->device);
5304 			case 1:
5305 				return (cfg->device & 0xff);
5306 			default:
5307 				return (0xffffffff);
5308 			}
5309 		}
5310 	}
5311 #endif
5312 
5313 	return (PCIB_READ_CONFIG(device_get_parent(dev),
5314 	    cfg->bus, cfg->slot, cfg->func, reg, width));
5315 }
5316 
5317 void
5318 pci_write_config_method(device_t dev, device_t child, int reg,
5319     uint32_t val, int width)
5320 {
5321 	struct pci_devinfo *dinfo = device_get_ivars(child);
5322 	pcicfgregs *cfg = &dinfo->cfg;
5323 
5324 	PCIB_WRITE_CONFIG(device_get_parent(dev),
5325 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
5326 }
5327 
5328 int
5329 pci_child_location_str_method(device_t dev, device_t child, char *buf,
5330     size_t buflen)
5331 {
5332 
5333 	snprintf(buf, buflen, "pci%d:%d:%d:%d", pci_get_domain(child),
5334 	    pci_get_bus(child), pci_get_slot(child), pci_get_function(child));
5335 	return (0);
5336 }
5337 
5338 int
5339 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
5340     size_t buflen)
5341 {
5342 	struct pci_devinfo *dinfo;
5343 	pcicfgregs *cfg;
5344 
5345 	dinfo = device_get_ivars(child);
5346 	cfg = &dinfo->cfg;
5347 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
5348 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
5349 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
5350 	    cfg->progif);
5351 	return (0);
5352 }
5353 
5354 int
5355 pci_assign_interrupt_method(device_t dev, device_t child)
5356 {
5357 	struct pci_devinfo *dinfo = device_get_ivars(child);
5358 	pcicfgregs *cfg = &dinfo->cfg;
5359 
5360 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
5361 	    cfg->intpin));
5362 }
5363 
5364 static void
5365 pci_lookup(void *arg, const char *name, device_t *dev)
5366 {
5367 	long val;
5368 	char *end;
5369 	int domain, bus, slot, func;
5370 
5371 	if (*dev != NULL)
5372 		return;
5373 
5374 	/*
5375 	 * Accept pciconf-style selectors of either pciD:B:S:F or
5376 	 * pciB:S:F.  In the latter case, the domain is assumed to
5377 	 * be zero.
5378 	 */
5379 	if (strncmp(name, "pci", 3) != 0)
5380 		return;
5381 	val = strtol(name + 3, &end, 10);
5382 	if (val < 0 || val > INT_MAX || *end != ':')
5383 		return;
5384 	domain = val;
5385 	val = strtol(end + 1, &end, 10);
5386 	if (val < 0 || val > INT_MAX || *end != ':')
5387 		return;
5388 	bus = val;
5389 	val = strtol(end + 1, &end, 10);
5390 	if (val < 0 || val > INT_MAX)
5391 		return;
5392 	slot = val;
5393 	if (*end == ':') {
5394 		val = strtol(end + 1, &end, 10);
5395 		if (val < 0 || val > INT_MAX || *end != '\0')
5396 			return;
5397 		func = val;
5398 	} else if (*end == '\0') {
5399 		func = slot;
5400 		slot = bus;
5401 		bus = domain;
5402 		domain = 0;
5403 	} else
5404 		return;
5405 
5406 	if (domain > PCI_DOMAINMAX || bus > PCI_BUSMAX || slot > PCI_SLOTMAX ||
5407 	    func > PCIE_ARI_FUNCMAX || (slot != 0 && func > PCI_FUNCMAX))
5408 		return;
5409 
5410 	*dev = pci_find_dbsf(domain, bus, slot, func);
5411 }
5412 
5413 static int
5414 pci_modevent(module_t mod, int what, void *arg)
5415 {
5416 	static struct cdev *pci_cdev;
5417 	static eventhandler_tag tag;
5418 
5419 	switch (what) {
5420 	case MOD_LOAD:
5421 		STAILQ_INIT(&pci_devq);
5422 		pci_generation = 0;
5423 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
5424 		    "pci");
5425 		pci_load_vendor_data();
5426 		tag = EVENTHANDLER_REGISTER(dev_lookup, pci_lookup, NULL,
5427 		    1000);
5428 		break;
5429 
5430 	case MOD_UNLOAD:
5431 		if (tag != NULL)
5432 			EVENTHANDLER_DEREGISTER(dev_lookup, tag);
5433 		destroy_dev(pci_cdev);
5434 		break;
5435 	}
5436 
5437 	return (0);
5438 }
5439 
5440 static void
5441 pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
5442 {
5443 #define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
5444 	struct pcicfg_pcie *cfg;
5445 	int version, pos;
5446 
5447 	cfg = &dinfo->cfg.pcie;
5448 	pos = cfg->pcie_location;
5449 
5450 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5451 
5452 	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
5453 
5454 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5455 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5456 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5457 		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
5458 
5459 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5460 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5461 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5462 		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
5463 
5464 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5465 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5466 		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
5467 
5468 	if (version > 1) {
5469 		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
5470 		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
5471 		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
5472 	}
5473 #undef WREG
5474 }
5475 
5476 static void
5477 pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
5478 {
5479 	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
5480 	    dinfo->cfg.pcix.pcix_command,  2);
5481 }
5482 
5483 void
5484 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
5485 {
5486 
5487 	/*
5488 	 * Restore the device to full power mode.  We must do this
5489 	 * before we restore the registers because moving from D3 to
5490 	 * D0 will cause the chip's BARs and some other registers to
5491 	 * be reset to some unknown power on reset values.  Cut down
5492 	 * the noise on boot by doing nothing if we are already in
5493 	 * state D0.
5494 	 */
5495 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
5496 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5497 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
5498 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
5499 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
5500 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
5501 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
5502 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
5503 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
5504 	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5505 	case PCIM_HDRTYPE_NORMAL:
5506 		pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
5507 		pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
5508 		break;
5509 	case PCIM_HDRTYPE_BRIDGE:
5510 		pci_write_config(dev, PCIR_SECLAT_1,
5511 		    dinfo->cfg.bridge.br_seclat, 1);
5512 		pci_write_config(dev, PCIR_SUBBUS_1,
5513 		    dinfo->cfg.bridge.br_subbus, 1);
5514 		pci_write_config(dev, PCIR_SECBUS_1,
5515 		    dinfo->cfg.bridge.br_secbus, 1);
5516 		pci_write_config(dev, PCIR_PRIBUS_1,
5517 		    dinfo->cfg.bridge.br_pribus, 1);
5518 		pci_write_config(dev, PCIR_BRIDGECTL_1,
5519 		    dinfo->cfg.bridge.br_control, 2);
5520 		break;
5521 	case PCIM_HDRTYPE_CARDBUS:
5522 		pci_write_config(dev, PCIR_SECLAT_2,
5523 		    dinfo->cfg.bridge.br_seclat, 1);
5524 		pci_write_config(dev, PCIR_SUBBUS_2,
5525 		    dinfo->cfg.bridge.br_subbus, 1);
5526 		pci_write_config(dev, PCIR_SECBUS_2,
5527 		    dinfo->cfg.bridge.br_secbus, 1);
5528 		pci_write_config(dev, PCIR_PRIBUS_2,
5529 		    dinfo->cfg.bridge.br_pribus, 1);
5530 		pci_write_config(dev, PCIR_BRIDGECTL_2,
5531 		    dinfo->cfg.bridge.br_control, 2);
5532 		break;
5533 	}
5534 	pci_restore_bars(dev);
5535 
5536 	/*
5537 	 * Restore extended capabilities for PCI-Express and PCI-X
5538 	 */
5539 	if (dinfo->cfg.pcie.pcie_location != 0)
5540 		pci_cfg_restore_pcie(dev, dinfo);
5541 	if (dinfo->cfg.pcix.pcix_location != 0)
5542 		pci_cfg_restore_pcix(dev, dinfo);
5543 
5544 	/* Restore MSI and MSI-X configurations if they are present. */
5545 	if (dinfo->cfg.msi.msi_location != 0)
5546 		pci_resume_msi(dev);
5547 	if (dinfo->cfg.msix.msix_location != 0)
5548 		pci_resume_msix(dev);
5549 }
5550 
5551 static void
5552 pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
5553 {
5554 #define	RREG(n)	pci_read_config(dev, pos + (n), 2)
5555 	struct pcicfg_pcie *cfg;
5556 	int version, pos;
5557 
5558 	cfg = &dinfo->cfg.pcie;
5559 	pos = cfg->pcie_location;
5560 
5561 	cfg->pcie_flags = RREG(PCIER_FLAGS);
5562 
5563 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5564 
5565 	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
5566 
5567 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5568 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5569 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5570 		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
5571 
5572 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5573 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5574 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5575 		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
5576 
5577 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5578 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5579 		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
5580 
5581 	if (version > 1) {
5582 		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
5583 		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
5584 		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
5585 	}
5586 #undef RREG
5587 }
5588 
5589 static void
5590 pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
5591 {
5592 	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
5593 	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
5594 }
5595 
5596 void
5597 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
5598 {
5599 	uint32_t cls;
5600 	int ps;
5601 
5602 	/*
5603 	 * Some drivers apparently write to these registers w/o updating our
5604 	 * cached copy.  No harm happens if we update the copy, so do so here
5605 	 * so we can restore them.  The COMMAND register is modified by the
5606 	 * bus w/o updating the cache.  This should represent the normally
5607 	 * writable portion of the 'defined' part of type 0/1/2 headers.
5608 	 */
5609 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
5610 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
5611 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
5612 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
5613 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
5614 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
5615 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
5616 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
5617 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
5618 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
5619 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
5620 	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5621 	case PCIM_HDRTYPE_NORMAL:
5622 		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
5623 		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
5624 		dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
5625 		dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
5626 		break;
5627 	case PCIM_HDRTYPE_BRIDGE:
5628 		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
5629 		    PCIR_SECLAT_1, 1);
5630 		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
5631 		    PCIR_SUBBUS_1, 1);
5632 		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
5633 		    PCIR_SECBUS_1, 1);
5634 		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
5635 		    PCIR_PRIBUS_1, 1);
5636 		dinfo->cfg.bridge.br_control = pci_read_config(dev,
5637 		    PCIR_BRIDGECTL_1, 2);
5638 		break;
5639 	case PCIM_HDRTYPE_CARDBUS:
5640 		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
5641 		    PCIR_SECLAT_2, 1);
5642 		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
5643 		    PCIR_SUBBUS_2, 1);
5644 		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
5645 		    PCIR_SECBUS_2, 1);
5646 		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
5647 		    PCIR_PRIBUS_2, 1);
5648 		dinfo->cfg.bridge.br_control = pci_read_config(dev,
5649 		    PCIR_BRIDGECTL_2, 2);
5650 		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_2, 2);
5651 		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_2, 2);
5652 		break;
5653 	}
5654 
5655 	if (dinfo->cfg.pcie.pcie_location != 0)
5656 		pci_cfg_save_pcie(dev, dinfo);
5657 
5658 	if (dinfo->cfg.pcix.pcix_location != 0)
5659 		pci_cfg_save_pcix(dev, dinfo);
5660 
5661 	/*
5662 	 * don't set the state for display devices, base peripherals and
5663 	 * memory devices since bad things happen when they are powered down.
5664 	 * We should (a) have drivers that can easily detach and (b) use
5665 	 * generic drivers for these devices so that some device actually
5666 	 * attaches.  We need to make sure that when we implement (a) we don't
5667 	 * power the device down on a reattach.
5668 	 */
5669 	cls = pci_get_class(dev);
5670 	if (!setstate)
5671 		return;
5672 	switch (pci_do_power_nodriver)
5673 	{
5674 		case 0:		/* NO powerdown at all */
5675 			return;
5676 		case 1:		/* Conservative about what to power down */
5677 			if (cls == PCIC_STORAGE)
5678 				return;
5679 			/*FALLTHROUGH*/
5680 		case 2:		/* Agressive about what to power down */
5681 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
5682 			    cls == PCIC_BASEPERIPH)
5683 				return;
5684 			/*FALLTHROUGH*/
5685 		case 3:		/* Power down everything */
5686 			break;
5687 	}
5688 	/*
5689 	 * PCI spec says we can only go into D3 state from D0 state.
5690 	 * Transition from D[12] into D0 before going to D3 state.
5691 	 */
5692 	ps = pci_get_powerstate(dev);
5693 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
5694 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5695 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
5696 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
5697 }
5698 
5699 /* Wrapper APIs suitable for device driver use. */
5700 void
5701 pci_save_state(device_t dev)
5702 {
5703 	struct pci_devinfo *dinfo;
5704 
5705 	dinfo = device_get_ivars(dev);
5706 	pci_cfg_save(dev, dinfo, 0);
5707 }
5708 
5709 void
5710 pci_restore_state(device_t dev)
5711 {
5712 	struct pci_devinfo *dinfo;
5713 
5714 	dinfo = device_get_ivars(dev);
5715 	pci_cfg_restore(dev, dinfo);
5716 }
5717 
5718 static uint16_t
5719 pci_get_rid_method(device_t dev, device_t child)
5720 {
5721 
5722 	return (PCIB_GET_RID(device_get_parent(dev), child));
5723 }
5724 
5725 /* Find the upstream port of a given PCI device in a root complex. */
5726 device_t
5727 pci_find_pcie_root_port(device_t dev)
5728 {
5729 	struct pci_devinfo *dinfo;
5730 	devclass_t pci_class;
5731 	device_t pcib, bus;
5732 
5733 	pci_class = devclass_find("pci");
5734 	KASSERT(device_get_devclass(device_get_parent(dev)) == pci_class,
5735 	    ("%s: non-pci device %s", __func__, device_get_nameunit(dev)));
5736 
5737 	/*
5738 	 * Walk the bridge hierarchy until we find a PCI-e root
5739 	 * port or a non-PCI device.
5740 	 */
5741 	for (;;) {
5742 		bus = device_get_parent(dev);
5743 		KASSERT(bus != NULL, ("%s: null parent of %s", __func__,
5744 		    device_get_nameunit(dev)));
5745 
5746 		pcib = device_get_parent(bus);
5747 		KASSERT(pcib != NULL, ("%s: null bridge of %s", __func__,
5748 		    device_get_nameunit(bus)));
5749 
5750 		/*
5751 		 * pcib's parent must be a PCI bus for this to be a
5752 		 * PCI-PCI bridge.
5753 		 */
5754 		if (device_get_devclass(device_get_parent(pcib)) != pci_class)
5755 			return (NULL);
5756 
5757 		dinfo = device_get_ivars(pcib);
5758 		if (dinfo->cfg.pcie.pcie_location != 0 &&
5759 		    dinfo->cfg.pcie.pcie_type == PCIEM_TYPE_ROOT_PORT)
5760 			return (pcib);
5761 
5762 		dev = pcib;
5763 	}
5764 }
5765