xref: /freebsd/sys/dev/pci/pci.c (revision 18849b5da0c5eaa88500b457be05b038813b51b1)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/limits.h>
39 #include <sys/linker.h>
40 #include <sys/fcntl.h>
41 #include <sys/conf.h>
42 #include <sys/kernel.h>
43 #include <sys/queue.h>
44 #include <sys/sysctl.h>
45 #include <sys/endian.h>
46 
47 #include <vm/vm.h>
48 #include <vm/pmap.h>
49 #include <vm/vm_extern.h>
50 
51 #include <sys/bus.h>
52 #include <machine/bus.h>
53 #include <sys/rman.h>
54 #include <machine/resource.h>
55 #include <machine/stdarg.h>
56 
57 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
58 #include <machine/intr_machdep.h>
59 #endif
60 
61 #include <sys/pciio.h>
62 #include <dev/pci/pcireg.h>
63 #include <dev/pci/pcivar.h>
64 #include <dev/pci/pci_private.h>
65 
66 #ifdef PCI_IOV
67 #include <sys/nv.h>
68 #include <dev/pci/pci_iov_private.h>
69 #endif
70 
71 #include <dev/usb/controller/xhcireg.h>
72 #include <dev/usb/controller/ehcireg.h>
73 #include <dev/usb/controller/ohcireg.h>
74 #include <dev/usb/controller/uhcireg.h>
75 
76 #include "pcib_if.h"
77 #include "pci_if.h"
78 
79 #define	PCIR_IS_BIOS(cfg, reg)						\
80 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
81 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
82 
83 static int		pci_has_quirk(uint32_t devid, int quirk);
84 static pci_addr_t	pci_mapbase(uint64_t mapreg);
85 static const char	*pci_maptype(uint64_t mapreg);
86 static int		pci_maprange(uint64_t mapreg);
87 static pci_addr_t	pci_rombase(uint64_t mapreg);
88 static int		pci_romsize(uint64_t testval);
89 static void		pci_fixancient(pcicfgregs *cfg);
90 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
91 
92 static int		pci_porten(device_t dev);
93 static int		pci_memen(device_t dev);
94 static void		pci_assign_interrupt(device_t bus, device_t dev,
95 			    int force_route);
96 static int		pci_add_map(device_t bus, device_t dev, int reg,
97 			    struct resource_list *rl, int force, int prefetch);
98 static int		pci_probe(device_t dev);
99 static int		pci_attach(device_t dev);
100 static int		pci_detach(device_t dev);
101 static void		pci_load_vendor_data(void);
102 static int		pci_describe_parse_line(char **ptr, int *vendor,
103 			    int *device, char **desc);
104 static char		*pci_describe_device(device_t dev);
105 static int		pci_modevent(module_t mod, int what, void *arg);
106 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
107 			    pcicfgregs *cfg);
108 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
109 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
110 			    int reg, uint32_t *data);
111 #if 0
112 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
113 			    int reg, uint32_t data);
114 #endif
115 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
116 static void		pci_mask_msix(device_t dev, u_int index);
117 static void		pci_unmask_msix(device_t dev, u_int index);
118 static int		pci_msi_blacklisted(void);
119 static int		pci_msix_blacklisted(void);
120 static void		pci_resume_msi(device_t dev);
121 static void		pci_resume_msix(device_t dev);
122 static int		pci_remap_intr_method(device_t bus, device_t dev,
123 			    u_int irq);
124 
125 static int		pci_get_id_method(device_t dev, device_t child,
126 			    enum pci_id_type type, uintptr_t *rid);
127 
128 static struct pci_devinfo * pci_fill_devinfo(device_t pcib, device_t bus, int d,
129     int b, int s, int f, uint16_t vid, uint16_t did);
130 
131 static device_method_t pci_methods[] = {
132 	/* Device interface */
133 	DEVMETHOD(device_probe,		pci_probe),
134 	DEVMETHOD(device_attach,	pci_attach),
135 	DEVMETHOD(device_detach,	pci_detach),
136 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
137 	DEVMETHOD(device_suspend,	bus_generic_suspend),
138 	DEVMETHOD(device_resume,	pci_resume),
139 
140 	/* Bus interface */
141 	DEVMETHOD(bus_print_child,	pci_print_child),
142 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
143 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
144 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
145 	DEVMETHOD(bus_driver_added,	pci_driver_added),
146 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
147 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
148 
149 	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
150 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
151 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
152 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
153 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
154 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
155 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
156 	DEVMETHOD(bus_release_resource,	pci_release_resource),
157 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
158 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
159 	DEVMETHOD(bus_child_deleted,	pci_child_deleted),
160 	DEVMETHOD(bus_child_detached,	pci_child_detached),
161 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
162 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
163 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
164 	DEVMETHOD(bus_suspend_child,	pci_suspend_child),
165 	DEVMETHOD(bus_resume_child,	pci_resume_child),
166 	DEVMETHOD(bus_rescan,		pci_rescan_method),
167 
168 	/* PCI interface */
169 	DEVMETHOD(pci_read_config,	pci_read_config_method),
170 	DEVMETHOD(pci_write_config,	pci_write_config_method),
171 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
172 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
173 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
174 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
175 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
176 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
177 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
178 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
179 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
180 	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
181 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
182 	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
183 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
184 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
185 	DEVMETHOD(pci_enable_msi,	pci_enable_msi_method),
186 	DEVMETHOD(pci_enable_msix,	pci_enable_msix_method),
187 	DEVMETHOD(pci_disable_msi,	pci_disable_msi_method),
188 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
189 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
190 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
191 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
192 	DEVMETHOD(pci_msix_pba_bar,	pci_msix_pba_bar_method),
193 	DEVMETHOD(pci_msix_table_bar,	pci_msix_table_bar_method),
194 	DEVMETHOD(pci_get_id,		pci_get_id_method),
195 	DEVMETHOD(pci_alloc_devinfo,	pci_alloc_devinfo_method),
196 	DEVMETHOD(pci_child_added,	pci_child_added_method),
197 #ifdef PCI_IOV
198 	DEVMETHOD(pci_iov_attach,	pci_iov_attach_method),
199 	DEVMETHOD(pci_iov_detach,	pci_iov_detach_method),
200 	DEVMETHOD(pci_create_iov_child,	pci_create_iov_child_method),
201 #endif
202 
203 	DEVMETHOD_END
204 };
205 
206 DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
207 
208 static devclass_t pci_devclass;
209 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
210 MODULE_VERSION(pci, 1);
211 
212 static char	*pci_vendordata;
213 static size_t	pci_vendordata_size;
214 
215 struct pci_quirk {
216 	uint32_t devid;	/* Vendor/device of the card */
217 	int	type;
218 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
219 #define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
220 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
221 #define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
222 #define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
223 #define	PCI_QUIRK_MSI_INTX_BUG	6 /* PCIM_CMD_INTxDIS disables MSI */
224 	int	arg1;
225 	int	arg2;
226 };
227 
228 static const struct pci_quirk pci_quirks[] = {
229 	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
230 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
231 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
232 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
233 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
234 
235 	/*
236 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
237 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
238 	 */
239 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
240 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
241 
242 	/*
243 	 * MSI doesn't work on earlier Intel chipsets including
244 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
245 	 */
246 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
247 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
248 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
249 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
250 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
251 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
252 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
253 
254 	/*
255 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
256 	 * bridge.
257 	 */
258 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
259 
260 	/*
261 	 * MSI-X allocation doesn't work properly for devices passed through
262 	 * by VMware up to at least ESXi 5.1.
263 	 */
264 	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
265 	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
266 
267 	/*
268 	 * Some virtualization environments emulate an older chipset
269 	 * but support MSI just fine.  QEMU uses the Intel 82440.
270 	 */
271 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
272 
273 	/*
274 	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
275 	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
276 	 * It prevents us from attaching hpet(4) when the bit is unset.
277 	 * Note this quirk only affects SB600 revision A13 and earlier.
278 	 * For SB600 A21 and later, firmware must set the bit to hide it.
279 	 * For SB700 and later, it is unused and hardcoded to zero.
280 	 */
281 	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
282 
283 	/*
284 	 * Atheros AR8161/AR8162/E2200 Ethernet controllers have a bug that
285 	 * MSI interrupt does not assert if PCIM_CMD_INTxDIS bit of the
286 	 * command register is set.
287 	 */
288 	{ 0x10911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
289 	{ 0xE0911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
290 	{ 0x10901969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
291 
292 	/*
293 	 * Broadcom BCM5714(S)/BCM5715(S)/BCM5780(S) Ethernet MACs don't
294 	 * issue MSI interrupts with PCIM_CMD_INTxDIS set either.
295 	 */
296 	{ 0x166814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714 */
297 	{ 0x166914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714S */
298 	{ 0x166a14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780 */
299 	{ 0x166b14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780S */
300 	{ 0x167814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715 */
301 	{ 0x167914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715S */
302 
303 	{ 0 }
304 };
305 
306 /* map register information */
307 #define	PCI_MAPMEM	0x01	/* memory map */
308 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
309 #define	PCI_MAPPORT	0x04	/* port map */
310 
311 struct devlist pci_devq;
312 uint32_t pci_generation;
313 uint32_t pci_numdevs = 0;
314 static int pcie_chipset, pcix_chipset;
315 
316 /* sysctl vars */
317 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
318 
319 static int pci_enable_io_modes = 1;
320 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RWTUN,
321     &pci_enable_io_modes, 1,
322     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
323 enable these bits correctly.  We'd like to do this all the time, but there\n\
324 are some peripherals that this causes problems with.");
325 
326 static int pci_do_realloc_bars = 0;
327 SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RWTUN,
328     &pci_do_realloc_bars, 0,
329     "Attempt to allocate a new range for any BARs whose original "
330     "firmware-assigned ranges fail to allocate during the initial device scan.");
331 
332 static int pci_do_power_nodriver = 0;
333 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RWTUN,
334     &pci_do_power_nodriver, 0,
335   "Place a function into D3 state when no driver attaches to it.  0 means\n\
336 disable.  1 means conservatively place devices into D3 state.  2 means\n\
337 aggressively place devices into D3 state.  3 means put absolutely everything\n\
338 in D3 state.");
339 
340 int pci_do_power_resume = 1;
341 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RWTUN,
342     &pci_do_power_resume, 1,
343   "Transition from D3 -> D0 on resume.");
344 
345 int pci_do_power_suspend = 1;
346 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RWTUN,
347     &pci_do_power_suspend, 1,
348   "Transition from D0 -> D3 on suspend.");
349 
350 static int pci_do_msi = 1;
351 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RWTUN, &pci_do_msi, 1,
352     "Enable support for MSI interrupts");
353 
354 static int pci_do_msix = 1;
355 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RWTUN, &pci_do_msix, 1,
356     "Enable support for MSI-X interrupts");
357 
358 static int pci_honor_msi_blacklist = 1;
359 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RDTUN,
360     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
361 
362 #if defined(__i386__) || defined(__amd64__)
363 static int pci_usb_takeover = 1;
364 #else
365 static int pci_usb_takeover = 0;
366 #endif
367 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
368     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
369 Disable this if you depend on BIOS emulation of USB devices, that is\n\
370 you use USB devices (like keyboard or mouse) but do not load USB drivers");
371 
372 static int pci_clear_bars;
373 SYSCTL_INT(_hw_pci, OID_AUTO, clear_bars, CTLFLAG_RDTUN, &pci_clear_bars, 0,
374     "Ignore firmware-assigned resources for BARs.");
375 
376 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
377 static int pci_clear_buses;
378 SYSCTL_INT(_hw_pci, OID_AUTO, clear_buses, CTLFLAG_RDTUN, &pci_clear_buses, 0,
379     "Ignore firmware-assigned bus numbers.");
380 #endif
381 
382 static int pci_enable_ari = 1;
383 SYSCTL_INT(_hw_pci, OID_AUTO, enable_ari, CTLFLAG_RDTUN, &pci_enable_ari,
384     0, "Enable support for PCIe Alternative RID Interpretation");
385 
386 static int
387 pci_has_quirk(uint32_t devid, int quirk)
388 {
389 	const struct pci_quirk *q;
390 
391 	for (q = &pci_quirks[0]; q->devid; q++) {
392 		if (q->devid == devid && q->type == quirk)
393 			return (1);
394 	}
395 	return (0);
396 }
397 
398 /* Find a device_t by bus/slot/function in domain 0 */
399 
400 device_t
401 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
402 {
403 
404 	return (pci_find_dbsf(0, bus, slot, func));
405 }
406 
407 /* Find a device_t by domain/bus/slot/function */
408 
409 device_t
410 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
411 {
412 	struct pci_devinfo *dinfo;
413 
414 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
415 		if ((dinfo->cfg.domain == domain) &&
416 		    (dinfo->cfg.bus == bus) &&
417 		    (dinfo->cfg.slot == slot) &&
418 		    (dinfo->cfg.func == func)) {
419 			return (dinfo->cfg.dev);
420 		}
421 	}
422 
423 	return (NULL);
424 }
425 
426 /* Find a device_t by vendor/device ID */
427 
428 device_t
429 pci_find_device(uint16_t vendor, uint16_t device)
430 {
431 	struct pci_devinfo *dinfo;
432 
433 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
434 		if ((dinfo->cfg.vendor == vendor) &&
435 		    (dinfo->cfg.device == device)) {
436 			return (dinfo->cfg.dev);
437 		}
438 	}
439 
440 	return (NULL);
441 }
442 
443 device_t
444 pci_find_class(uint8_t class, uint8_t subclass)
445 {
446 	struct pci_devinfo *dinfo;
447 
448 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
449 		if (dinfo->cfg.baseclass == class &&
450 		    dinfo->cfg.subclass == subclass) {
451 			return (dinfo->cfg.dev);
452 		}
453 	}
454 
455 	return (NULL);
456 }
457 
458 static int
459 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
460 {
461 	va_list ap;
462 	int retval;
463 
464 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
465 	    cfg->func);
466 	va_start(ap, fmt);
467 	retval += vprintf(fmt, ap);
468 	va_end(ap);
469 	return (retval);
470 }
471 
472 /* return base address of memory or port map */
473 
474 static pci_addr_t
475 pci_mapbase(uint64_t mapreg)
476 {
477 
478 	if (PCI_BAR_MEM(mapreg))
479 		return (mapreg & PCIM_BAR_MEM_BASE);
480 	else
481 		return (mapreg & PCIM_BAR_IO_BASE);
482 }
483 
484 /* return map type of memory or port map */
485 
486 static const char *
487 pci_maptype(uint64_t mapreg)
488 {
489 
490 	if (PCI_BAR_IO(mapreg))
491 		return ("I/O Port");
492 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
493 		return ("Prefetchable Memory");
494 	return ("Memory");
495 }
496 
497 /* return log2 of map size decoded for memory or port map */
498 
499 int
500 pci_mapsize(uint64_t testval)
501 {
502 	int ln2size;
503 
504 	testval = pci_mapbase(testval);
505 	ln2size = 0;
506 	if (testval != 0) {
507 		while ((testval & 1) == 0)
508 		{
509 			ln2size++;
510 			testval >>= 1;
511 		}
512 	}
513 	return (ln2size);
514 }
515 
516 /* return base address of device ROM */
517 
518 static pci_addr_t
519 pci_rombase(uint64_t mapreg)
520 {
521 
522 	return (mapreg & PCIM_BIOS_ADDR_MASK);
523 }
524 
525 /* return log2 of map size decided for device ROM */
526 
527 static int
528 pci_romsize(uint64_t testval)
529 {
530 	int ln2size;
531 
532 	testval = pci_rombase(testval);
533 	ln2size = 0;
534 	if (testval != 0) {
535 		while ((testval & 1) == 0)
536 		{
537 			ln2size++;
538 			testval >>= 1;
539 		}
540 	}
541 	return (ln2size);
542 }
543 
544 /* return log2 of address range supported by map register */
545 
546 static int
547 pci_maprange(uint64_t mapreg)
548 {
549 	int ln2range = 0;
550 
551 	if (PCI_BAR_IO(mapreg))
552 		ln2range = 32;
553 	else
554 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
555 		case PCIM_BAR_MEM_32:
556 			ln2range = 32;
557 			break;
558 		case PCIM_BAR_MEM_1MB:
559 			ln2range = 20;
560 			break;
561 		case PCIM_BAR_MEM_64:
562 			ln2range = 64;
563 			break;
564 		}
565 	return (ln2range);
566 }
567 
568 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
569 
570 static void
571 pci_fixancient(pcicfgregs *cfg)
572 {
573 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
574 		return;
575 
576 	/* PCI to PCI bridges use header type 1 */
577 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
578 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
579 }
580 
581 /* extract header type specific config data */
582 
583 static void
584 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
585 {
586 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
587 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
588 	case PCIM_HDRTYPE_NORMAL:
589 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
590 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
591 		cfg->mingnt         = REG(PCIR_MINGNT, 1);
592 		cfg->maxlat         = REG(PCIR_MAXLAT, 1);
593 		cfg->nummaps	    = PCI_MAXMAPS_0;
594 		break;
595 	case PCIM_HDRTYPE_BRIDGE:
596 		cfg->bridge.br_seclat = REG(PCIR_SECLAT_1, 1);
597 		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_1, 1);
598 		cfg->bridge.br_secbus = REG(PCIR_SECBUS_1, 1);
599 		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_1, 1);
600 		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_1, 2);
601 		cfg->nummaps	    = PCI_MAXMAPS_1;
602 		break;
603 	case PCIM_HDRTYPE_CARDBUS:
604 		cfg->bridge.br_seclat = REG(PCIR_SECLAT_2, 1);
605 		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_2, 1);
606 		cfg->bridge.br_secbus = REG(PCIR_SECBUS_2, 1);
607 		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_2, 1);
608 		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_2, 2);
609 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
610 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
611 		cfg->nummaps	    = PCI_MAXMAPS_2;
612 		break;
613 	}
614 #undef REG
615 }
616 
617 /* read configuration header into pcicfgregs structure */
618 struct pci_devinfo *
619 pci_read_device(device_t pcib, device_t bus, int d, int b, int s, int f)
620 {
621 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
622 	uint16_t vid, did;
623 
624 	vid = REG(PCIR_VENDOR, 2);
625 	did = REG(PCIR_DEVICE, 2);
626 	if (vid != 0xffff)
627 		return (pci_fill_devinfo(pcib, bus, d, b, s, f, vid, did));
628 
629 	return (NULL);
630 }
631 
632 struct pci_devinfo *
633 pci_alloc_devinfo_method(device_t dev)
634 {
635 
636 	return (malloc(sizeof(struct pci_devinfo), M_DEVBUF,
637 	    M_WAITOK | M_ZERO));
638 }
639 
640 static struct pci_devinfo *
641 pci_fill_devinfo(device_t pcib, device_t bus, int d, int b, int s, int f,
642     uint16_t vid, uint16_t did)
643 {
644 	struct pci_devinfo *devlist_entry;
645 	pcicfgregs *cfg;
646 
647 	devlist_entry = PCI_ALLOC_DEVINFO(bus);
648 
649 	cfg = &devlist_entry->cfg;
650 
651 	cfg->domain		= d;
652 	cfg->bus		= b;
653 	cfg->slot		= s;
654 	cfg->func		= f;
655 	cfg->vendor		= vid;
656 	cfg->device		= did;
657 	cfg->cmdreg		= REG(PCIR_COMMAND, 2);
658 	cfg->statreg		= REG(PCIR_STATUS, 2);
659 	cfg->baseclass		= REG(PCIR_CLASS, 1);
660 	cfg->subclass		= REG(PCIR_SUBCLASS, 1);
661 	cfg->progif		= REG(PCIR_PROGIF, 1);
662 	cfg->revid		= REG(PCIR_REVID, 1);
663 	cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
664 	cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
665 	cfg->lattimer		= REG(PCIR_LATTIMER, 1);
666 	cfg->intpin		= REG(PCIR_INTPIN, 1);
667 	cfg->intline		= REG(PCIR_INTLINE, 1);
668 
669 	cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
670 	cfg->hdrtype		&= ~PCIM_MFDEV;
671 	STAILQ_INIT(&cfg->maps);
672 
673 	cfg->iov		= NULL;
674 
675 	pci_fixancient(cfg);
676 	pci_hdrtypedata(pcib, b, s, f, cfg);
677 
678 	if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
679 		pci_read_cap(pcib, cfg);
680 
681 	STAILQ_INSERT_TAIL(&pci_devq, devlist_entry, pci_links);
682 
683 	devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
684 	devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
685 	devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
686 	devlist_entry->conf.pc_sel.pc_func = cfg->func;
687 	devlist_entry->conf.pc_hdr = cfg->hdrtype;
688 
689 	devlist_entry->conf.pc_subvendor = cfg->subvendor;
690 	devlist_entry->conf.pc_subdevice = cfg->subdevice;
691 	devlist_entry->conf.pc_vendor = cfg->vendor;
692 	devlist_entry->conf.pc_device = cfg->device;
693 
694 	devlist_entry->conf.pc_class = cfg->baseclass;
695 	devlist_entry->conf.pc_subclass = cfg->subclass;
696 	devlist_entry->conf.pc_progif = cfg->progif;
697 	devlist_entry->conf.pc_revid = cfg->revid;
698 
699 	pci_numdevs++;
700 	pci_generation++;
701 
702 	return (devlist_entry);
703 }
704 #undef REG
705 
706 static void
707 pci_ea_fill_info(device_t pcib, pcicfgregs *cfg)
708 {
709 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, \
710     cfg->ea.ea_location + (n), w)
711 	int num_ent;
712 	int ptr;
713 	int a, b;
714 	uint32_t val;
715 	int ent_size;
716 	uint32_t dw[4];
717 	uint64_t base, max_offset;
718 	struct pci_ea_entry *eae;
719 
720 	if (cfg->ea.ea_location == 0)
721 		return;
722 
723 	STAILQ_INIT(&cfg->ea.ea_entries);
724 
725 	/* Determine the number of entries */
726 	num_ent = REG(PCIR_EA_NUM_ENT, 2);
727 	num_ent &= PCIM_EA_NUM_ENT_MASK;
728 
729 	/* Find the first entry to care of */
730 	ptr = PCIR_EA_FIRST_ENT;
731 
732 	/* Skip DWORD 2 for type 1 functions */
733 	if ((cfg->hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_BRIDGE)
734 		ptr += 4;
735 
736 	for (a = 0; a < num_ent; a++) {
737 
738 		eae = malloc(sizeof(*eae), M_DEVBUF, M_WAITOK | M_ZERO);
739 		eae->eae_cfg_offset = cfg->ea.ea_location + ptr;
740 
741 		/* Read a number of dwords in the entry */
742 		val = REG(ptr, 4);
743 		ptr += 4;
744 		ent_size = (val & PCIM_EA_ES);
745 
746 		for (b = 0; b < ent_size; b++) {
747 			dw[b] = REG(ptr, 4);
748 			ptr += 4;
749 		}
750 
751 		eae->eae_flags = val;
752 		eae->eae_bei = (PCIM_EA_BEI & val) >> PCIM_EA_BEI_OFFSET;
753 
754 		base = dw[0] & PCIM_EA_FIELD_MASK;
755 		max_offset = dw[1] | ~PCIM_EA_FIELD_MASK;
756 		b = 2;
757 		if (((dw[0] & PCIM_EA_IS_64) != 0) && (b < ent_size)) {
758 			base |= (uint64_t)dw[b] << 32UL;
759 			b++;
760 		}
761 		if (((dw[1] & PCIM_EA_IS_64) != 0)
762 		    && (b < ent_size)) {
763 			max_offset |= (uint64_t)dw[b] << 32UL;
764 			b++;
765 		}
766 
767 		eae->eae_base = base;
768 		eae->eae_max_offset = max_offset;
769 
770 		STAILQ_INSERT_TAIL(&cfg->ea.ea_entries, eae, eae_link);
771 
772 		if (bootverbose) {
773 			printf("PCI(EA) dev %04x:%04x, bei %d, flags #%x, base #%jx, max_offset #%jx\n",
774 			    cfg->vendor, cfg->device, eae->eae_bei, eae->eae_flags,
775 			    (uintmax_t)eae->eae_base, (uintmax_t)eae->eae_max_offset);
776 		}
777 	}
778 }
779 #undef REG
780 
781 static void
782 pci_read_cap(device_t pcib, pcicfgregs *cfg)
783 {
784 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
785 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
786 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
787 	uint64_t addr;
788 #endif
789 	uint32_t val;
790 	int	ptr, nextptr, ptrptr;
791 
792 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
793 	case PCIM_HDRTYPE_NORMAL:
794 	case PCIM_HDRTYPE_BRIDGE:
795 		ptrptr = PCIR_CAP_PTR;
796 		break;
797 	case PCIM_HDRTYPE_CARDBUS:
798 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
799 		break;
800 	default:
801 		return;		/* no extended capabilities support */
802 	}
803 	nextptr = REG(ptrptr, 1);	/* sanity check? */
804 
805 	/*
806 	 * Read capability entries.
807 	 */
808 	while (nextptr != 0) {
809 		/* Sanity check */
810 		if (nextptr > 255) {
811 			printf("illegal PCI extended capability offset %d\n",
812 			    nextptr);
813 			return;
814 		}
815 		/* Find the next entry */
816 		ptr = nextptr;
817 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
818 
819 		/* Process this entry */
820 		switch (REG(ptr + PCICAP_ID, 1)) {
821 		case PCIY_PMG:		/* PCI power management */
822 			if (cfg->pp.pp_cap == 0) {
823 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
824 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
825 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
826 				if ((nextptr - ptr) > PCIR_POWER_DATA)
827 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
828 			}
829 			break;
830 		case PCIY_HT:		/* HyperTransport */
831 			/* Determine HT-specific capability type. */
832 			val = REG(ptr + PCIR_HT_COMMAND, 2);
833 
834 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
835 				cfg->ht.ht_slave = ptr;
836 
837 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
838 			switch (val & PCIM_HTCMD_CAP_MASK) {
839 			case PCIM_HTCAP_MSI_MAPPING:
840 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
841 					/* Sanity check the mapping window. */
842 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
843 					    4);
844 					addr <<= 32;
845 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
846 					    4);
847 					if (addr != MSI_INTEL_ADDR_BASE)
848 						device_printf(pcib,
849 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
850 						    cfg->domain, cfg->bus,
851 						    cfg->slot, cfg->func,
852 						    (long long)addr);
853 				} else
854 					addr = MSI_INTEL_ADDR_BASE;
855 
856 				cfg->ht.ht_msimap = ptr;
857 				cfg->ht.ht_msictrl = val;
858 				cfg->ht.ht_msiaddr = addr;
859 				break;
860 			}
861 #endif
862 			break;
863 		case PCIY_MSI:		/* PCI MSI */
864 			cfg->msi.msi_location = ptr;
865 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
866 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
867 						     PCIM_MSICTRL_MMC_MASK)>>1);
868 			break;
869 		case PCIY_MSIX:		/* PCI MSI-X */
870 			cfg->msix.msix_location = ptr;
871 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
872 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
873 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
874 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
875 			cfg->msix.msix_table_bar = PCIR_BAR(val &
876 			    PCIM_MSIX_BIR_MASK);
877 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
878 			val = REG(ptr + PCIR_MSIX_PBA, 4);
879 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
880 			    PCIM_MSIX_BIR_MASK);
881 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
882 			break;
883 		case PCIY_VPD:		/* PCI Vital Product Data */
884 			cfg->vpd.vpd_reg = ptr;
885 			break;
886 		case PCIY_SUBVENDOR:
887 			/* Should always be true. */
888 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
889 			    PCIM_HDRTYPE_BRIDGE) {
890 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
891 				cfg->subvendor = val & 0xffff;
892 				cfg->subdevice = val >> 16;
893 			}
894 			break;
895 		case PCIY_PCIX:		/* PCI-X */
896 			/*
897 			 * Assume we have a PCI-X chipset if we have
898 			 * at least one PCI-PCI bridge with a PCI-X
899 			 * capability.  Note that some systems with
900 			 * PCI-express or HT chipsets might match on
901 			 * this check as well.
902 			 */
903 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
904 			    PCIM_HDRTYPE_BRIDGE)
905 				pcix_chipset = 1;
906 			cfg->pcix.pcix_location = ptr;
907 			break;
908 		case PCIY_EXPRESS:	/* PCI-express */
909 			/*
910 			 * Assume we have a PCI-express chipset if we have
911 			 * at least one PCI-express device.
912 			 */
913 			pcie_chipset = 1;
914 			cfg->pcie.pcie_location = ptr;
915 			val = REG(ptr + PCIER_FLAGS, 2);
916 			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
917 			break;
918 		case PCIY_EA:		/* Enhanced Allocation */
919 			cfg->ea.ea_location = ptr;
920 			pci_ea_fill_info(pcib, cfg);
921 			break;
922 		default:
923 			break;
924 		}
925 	}
926 
927 #if defined(__powerpc__)
928 	/*
929 	 * Enable the MSI mapping window for all HyperTransport
930 	 * slaves.  PCI-PCI bridges have their windows enabled via
931 	 * PCIB_MAP_MSI().
932 	 */
933 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
934 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
935 		device_printf(pcib,
936 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
937 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
938 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
939 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
940 		     2);
941 	}
942 #endif
943 /* REG and WREG use carry through to next functions */
944 }
945 
946 /*
947  * PCI Vital Product Data
948  */
949 
950 #define	PCI_VPD_TIMEOUT		1000000
951 
952 static int
953 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
954 {
955 	int count = PCI_VPD_TIMEOUT;
956 
957 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
958 
959 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
960 
961 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
962 		if (--count < 0)
963 			return (ENXIO);
964 		DELAY(1);	/* limit looping */
965 	}
966 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
967 
968 	return (0);
969 }
970 
971 #if 0
972 static int
973 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
974 {
975 	int count = PCI_VPD_TIMEOUT;
976 
977 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
978 
979 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
980 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
981 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
982 		if (--count < 0)
983 			return (ENXIO);
984 		DELAY(1);	/* limit looping */
985 	}
986 
987 	return (0);
988 }
989 #endif
990 
991 #undef PCI_VPD_TIMEOUT
992 
993 struct vpd_readstate {
994 	device_t	pcib;
995 	pcicfgregs	*cfg;
996 	uint32_t	val;
997 	int		bytesinval;
998 	int		off;
999 	uint8_t		cksum;
1000 };
1001 
1002 static int
1003 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
1004 {
1005 	uint32_t reg;
1006 	uint8_t byte;
1007 
1008 	if (vrs->bytesinval == 0) {
1009 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
1010 			return (ENXIO);
1011 		vrs->val = le32toh(reg);
1012 		vrs->off += 4;
1013 		byte = vrs->val & 0xff;
1014 		vrs->bytesinval = 3;
1015 	} else {
1016 		vrs->val = vrs->val >> 8;
1017 		byte = vrs->val & 0xff;
1018 		vrs->bytesinval--;
1019 	}
1020 
1021 	vrs->cksum += byte;
1022 	*data = byte;
1023 	return (0);
1024 }
1025 
1026 static void
1027 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
1028 {
1029 	struct vpd_readstate vrs;
1030 	int state;
1031 	int name;
1032 	int remain;
1033 	int i;
1034 	int alloc, off;		/* alloc/off for RO/W arrays */
1035 	int cksumvalid;
1036 	int dflen;
1037 	uint8_t byte;
1038 	uint8_t byte2;
1039 
1040 	/* init vpd reader */
1041 	vrs.bytesinval = 0;
1042 	vrs.off = 0;
1043 	vrs.pcib = pcib;
1044 	vrs.cfg = cfg;
1045 	vrs.cksum = 0;
1046 
1047 	state = 0;
1048 	name = remain = i = 0;	/* shut up stupid gcc */
1049 	alloc = off = 0;	/* shut up stupid gcc */
1050 	dflen = 0;		/* shut up stupid gcc */
1051 	cksumvalid = -1;
1052 	while (state >= 0) {
1053 		if (vpd_nextbyte(&vrs, &byte)) {
1054 			state = -2;
1055 			break;
1056 		}
1057 #if 0
1058 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
1059 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
1060 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
1061 #endif
1062 		switch (state) {
1063 		case 0:		/* item name */
1064 			if (byte & 0x80) {
1065 				if (vpd_nextbyte(&vrs, &byte2)) {
1066 					state = -2;
1067 					break;
1068 				}
1069 				remain = byte2;
1070 				if (vpd_nextbyte(&vrs, &byte2)) {
1071 					state = -2;
1072 					break;
1073 				}
1074 				remain |= byte2 << 8;
1075 				if (remain > (0x7f*4 - vrs.off)) {
1076 					state = -1;
1077 					pci_printf(cfg,
1078 					    "invalid VPD data, remain %#x\n",
1079 					    remain);
1080 				}
1081 				name = byte & 0x7f;
1082 			} else {
1083 				remain = byte & 0x7;
1084 				name = (byte >> 3) & 0xf;
1085 			}
1086 			switch (name) {
1087 			case 0x2:	/* String */
1088 				cfg->vpd.vpd_ident = malloc(remain + 1,
1089 				    M_DEVBUF, M_WAITOK);
1090 				i = 0;
1091 				state = 1;
1092 				break;
1093 			case 0xf:	/* End */
1094 				state = -1;
1095 				break;
1096 			case 0x10:	/* VPD-R */
1097 				alloc = 8;
1098 				off = 0;
1099 				cfg->vpd.vpd_ros = malloc(alloc *
1100 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1101 				    M_WAITOK | M_ZERO);
1102 				state = 2;
1103 				break;
1104 			case 0x11:	/* VPD-W */
1105 				alloc = 8;
1106 				off = 0;
1107 				cfg->vpd.vpd_w = malloc(alloc *
1108 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1109 				    M_WAITOK | M_ZERO);
1110 				state = 5;
1111 				break;
1112 			default:	/* Invalid data, abort */
1113 				state = -1;
1114 				break;
1115 			}
1116 			break;
1117 
1118 		case 1:	/* Identifier String */
1119 			cfg->vpd.vpd_ident[i++] = byte;
1120 			remain--;
1121 			if (remain == 0)  {
1122 				cfg->vpd.vpd_ident[i] = '\0';
1123 				state = 0;
1124 			}
1125 			break;
1126 
1127 		case 2:	/* VPD-R Keyword Header */
1128 			if (off == alloc) {
1129 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1130 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1131 				    M_DEVBUF, M_WAITOK | M_ZERO);
1132 			}
1133 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1134 			if (vpd_nextbyte(&vrs, &byte2)) {
1135 				state = -2;
1136 				break;
1137 			}
1138 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1139 			if (vpd_nextbyte(&vrs, &byte2)) {
1140 				state = -2;
1141 				break;
1142 			}
1143 			cfg->vpd.vpd_ros[off].len = dflen = byte2;
1144 			if (dflen == 0 &&
1145 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1146 			    2) == 0) {
1147 				/*
1148 				 * if this happens, we can't trust the rest
1149 				 * of the VPD.
1150 				 */
1151 				pci_printf(cfg, "bad keyword length: %d\n",
1152 				    dflen);
1153 				cksumvalid = 0;
1154 				state = -1;
1155 				break;
1156 			} else if (dflen == 0) {
1157 				cfg->vpd.vpd_ros[off].value = malloc(1 *
1158 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1159 				    M_DEVBUF, M_WAITOK);
1160 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1161 			} else
1162 				cfg->vpd.vpd_ros[off].value = malloc(
1163 				    (dflen + 1) *
1164 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1165 				    M_DEVBUF, M_WAITOK);
1166 			remain -= 3;
1167 			i = 0;
1168 			/* keep in sync w/ state 3's transistions */
1169 			if (dflen == 0 && remain == 0)
1170 				state = 0;
1171 			else if (dflen == 0)
1172 				state = 2;
1173 			else
1174 				state = 3;
1175 			break;
1176 
1177 		case 3:	/* VPD-R Keyword Value */
1178 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1179 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1180 			    "RV", 2) == 0 && cksumvalid == -1) {
1181 				if (vrs.cksum == 0)
1182 					cksumvalid = 1;
1183 				else {
1184 					if (bootverbose)
1185 						pci_printf(cfg,
1186 					    "bad VPD cksum, remain %hhu\n",
1187 						    vrs.cksum);
1188 					cksumvalid = 0;
1189 					state = -1;
1190 					break;
1191 				}
1192 			}
1193 			dflen--;
1194 			remain--;
1195 			/* keep in sync w/ state 2's transistions */
1196 			if (dflen == 0)
1197 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1198 			if (dflen == 0 && remain == 0) {
1199 				cfg->vpd.vpd_rocnt = off;
1200 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1201 				    off * sizeof(*cfg->vpd.vpd_ros),
1202 				    M_DEVBUF, M_WAITOK | M_ZERO);
1203 				state = 0;
1204 			} else if (dflen == 0)
1205 				state = 2;
1206 			break;
1207 
1208 		case 4:
1209 			remain--;
1210 			if (remain == 0)
1211 				state = 0;
1212 			break;
1213 
1214 		case 5:	/* VPD-W Keyword Header */
1215 			if (off == alloc) {
1216 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1217 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1218 				    M_DEVBUF, M_WAITOK | M_ZERO);
1219 			}
1220 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1221 			if (vpd_nextbyte(&vrs, &byte2)) {
1222 				state = -2;
1223 				break;
1224 			}
1225 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1226 			if (vpd_nextbyte(&vrs, &byte2)) {
1227 				state = -2;
1228 				break;
1229 			}
1230 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1231 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1232 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1233 			    sizeof(*cfg->vpd.vpd_w[off].value),
1234 			    M_DEVBUF, M_WAITOK);
1235 			remain -= 3;
1236 			i = 0;
1237 			/* keep in sync w/ state 6's transistions */
1238 			if (dflen == 0 && remain == 0)
1239 				state = 0;
1240 			else if (dflen == 0)
1241 				state = 5;
1242 			else
1243 				state = 6;
1244 			break;
1245 
1246 		case 6:	/* VPD-W Keyword Value */
1247 			cfg->vpd.vpd_w[off].value[i++] = byte;
1248 			dflen--;
1249 			remain--;
1250 			/* keep in sync w/ state 5's transistions */
1251 			if (dflen == 0)
1252 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1253 			if (dflen == 0 && remain == 0) {
1254 				cfg->vpd.vpd_wcnt = off;
1255 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1256 				    off * sizeof(*cfg->vpd.vpd_w),
1257 				    M_DEVBUF, M_WAITOK | M_ZERO);
1258 				state = 0;
1259 			} else if (dflen == 0)
1260 				state = 5;
1261 			break;
1262 
1263 		default:
1264 			pci_printf(cfg, "invalid state: %d\n", state);
1265 			state = -1;
1266 			break;
1267 		}
1268 	}
1269 
1270 	if (cksumvalid == 0 || state < -1) {
1271 		/* read-only data bad, clean up */
1272 		if (cfg->vpd.vpd_ros != NULL) {
1273 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1274 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1275 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1276 			cfg->vpd.vpd_ros = NULL;
1277 		}
1278 	}
1279 	if (state < -1) {
1280 		/* I/O error, clean up */
1281 		pci_printf(cfg, "failed to read VPD data.\n");
1282 		if (cfg->vpd.vpd_ident != NULL) {
1283 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1284 			cfg->vpd.vpd_ident = NULL;
1285 		}
1286 		if (cfg->vpd.vpd_w != NULL) {
1287 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1288 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1289 			free(cfg->vpd.vpd_w, M_DEVBUF);
1290 			cfg->vpd.vpd_w = NULL;
1291 		}
1292 	}
1293 	cfg->vpd.vpd_cached = 1;
1294 #undef REG
1295 #undef WREG
1296 }
1297 
1298 int
1299 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1300 {
1301 	struct pci_devinfo *dinfo = device_get_ivars(child);
1302 	pcicfgregs *cfg = &dinfo->cfg;
1303 
1304 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1305 		pci_read_vpd(device_get_parent(dev), cfg);
1306 
1307 	*identptr = cfg->vpd.vpd_ident;
1308 
1309 	if (*identptr == NULL)
1310 		return (ENXIO);
1311 
1312 	return (0);
1313 }
1314 
1315 int
1316 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1317 	const char **vptr)
1318 {
1319 	struct pci_devinfo *dinfo = device_get_ivars(child);
1320 	pcicfgregs *cfg = &dinfo->cfg;
1321 	int i;
1322 
1323 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1324 		pci_read_vpd(device_get_parent(dev), cfg);
1325 
1326 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1327 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1328 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1329 			*vptr = cfg->vpd.vpd_ros[i].value;
1330 			return (0);
1331 		}
1332 
1333 	*vptr = NULL;
1334 	return (ENXIO);
1335 }
1336 
1337 struct pcicfg_vpd *
1338 pci_fetch_vpd_list(device_t dev)
1339 {
1340 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1341 	pcicfgregs *cfg = &dinfo->cfg;
1342 
1343 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1344 		pci_read_vpd(device_get_parent(device_get_parent(dev)), cfg);
1345 	return (&cfg->vpd);
1346 }
1347 
1348 /*
1349  * Find the requested HyperTransport capability and return the offset
1350  * in configuration space via the pointer provided.  The function
1351  * returns 0 on success and an error code otherwise.
1352  */
1353 int
1354 pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1355 {
1356 	int ptr, error;
1357 	uint16_t val;
1358 
1359 	error = pci_find_cap(child, PCIY_HT, &ptr);
1360 	if (error)
1361 		return (error);
1362 
1363 	/*
1364 	 * Traverse the capabilities list checking each HT capability
1365 	 * to see if it matches the requested HT capability.
1366 	 */
1367 	while (ptr != 0) {
1368 		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1369 		if (capability == PCIM_HTCAP_SLAVE ||
1370 		    capability == PCIM_HTCAP_HOST)
1371 			val &= 0xe000;
1372 		else
1373 			val &= PCIM_HTCMD_CAP_MASK;
1374 		if (val == capability) {
1375 			if (capreg != NULL)
1376 				*capreg = ptr;
1377 			return (0);
1378 		}
1379 
1380 		/* Skip to the next HT capability. */
1381 		while (ptr != 0) {
1382 			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1383 			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1384 			    PCIY_HT)
1385 				break;
1386 		}
1387 	}
1388 	return (ENOENT);
1389 }
1390 
1391 /*
1392  * Find the requested capability and return the offset in
1393  * configuration space via the pointer provided.  The function returns
1394  * 0 on success and an error code otherwise.
1395  */
1396 int
1397 pci_find_cap_method(device_t dev, device_t child, int capability,
1398     int *capreg)
1399 {
1400 	struct pci_devinfo *dinfo = device_get_ivars(child);
1401 	pcicfgregs *cfg = &dinfo->cfg;
1402 	u_int32_t status;
1403 	u_int8_t ptr;
1404 
1405 	/*
1406 	 * Check the CAP_LIST bit of the PCI status register first.
1407 	 */
1408 	status = pci_read_config(child, PCIR_STATUS, 2);
1409 	if (!(status & PCIM_STATUS_CAPPRESENT))
1410 		return (ENXIO);
1411 
1412 	/*
1413 	 * Determine the start pointer of the capabilities list.
1414 	 */
1415 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1416 	case PCIM_HDRTYPE_NORMAL:
1417 	case PCIM_HDRTYPE_BRIDGE:
1418 		ptr = PCIR_CAP_PTR;
1419 		break;
1420 	case PCIM_HDRTYPE_CARDBUS:
1421 		ptr = PCIR_CAP_PTR_2;
1422 		break;
1423 	default:
1424 		/* XXX: panic? */
1425 		return (ENXIO);		/* no extended capabilities support */
1426 	}
1427 	ptr = pci_read_config(child, ptr, 1);
1428 
1429 	/*
1430 	 * Traverse the capabilities list.
1431 	 */
1432 	while (ptr != 0) {
1433 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1434 			if (capreg != NULL)
1435 				*capreg = ptr;
1436 			return (0);
1437 		}
1438 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1439 	}
1440 
1441 	return (ENOENT);
1442 }
1443 
1444 /*
1445  * Find the requested extended capability and return the offset in
1446  * configuration space via the pointer provided.  The function returns
1447  * 0 on success and an error code otherwise.
1448  */
1449 int
1450 pci_find_extcap_method(device_t dev, device_t child, int capability,
1451     int *capreg)
1452 {
1453 	struct pci_devinfo *dinfo = device_get_ivars(child);
1454 	pcicfgregs *cfg = &dinfo->cfg;
1455 	uint32_t ecap;
1456 	uint16_t ptr;
1457 
1458 	/* Only supported for PCI-express devices. */
1459 	if (cfg->pcie.pcie_location == 0)
1460 		return (ENXIO);
1461 
1462 	ptr = PCIR_EXTCAP;
1463 	ecap = pci_read_config(child, ptr, 4);
1464 	if (ecap == 0xffffffff || ecap == 0)
1465 		return (ENOENT);
1466 	for (;;) {
1467 		if (PCI_EXTCAP_ID(ecap) == capability) {
1468 			if (capreg != NULL)
1469 				*capreg = ptr;
1470 			return (0);
1471 		}
1472 		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1473 		if (ptr == 0)
1474 			break;
1475 		ecap = pci_read_config(child, ptr, 4);
1476 	}
1477 
1478 	return (ENOENT);
1479 }
1480 
1481 /*
1482  * Support for MSI-X message interrupts.
1483  */
1484 void
1485 pci_enable_msix_method(device_t dev, device_t child, u_int index,
1486     uint64_t address, uint32_t data)
1487 {
1488 	struct pci_devinfo *dinfo = device_get_ivars(child);
1489 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1490 	uint32_t offset;
1491 
1492 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1493 	offset = msix->msix_table_offset + index * 16;
1494 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1495 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1496 	bus_write_4(msix->msix_table_res, offset + 8, data);
1497 
1498 	/* Enable MSI -> HT mapping. */
1499 	pci_ht_map_msi(child, address);
1500 }
1501 
1502 void
1503 pci_mask_msix(device_t dev, u_int index)
1504 {
1505 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1506 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1507 	uint32_t offset, val;
1508 
1509 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1510 	offset = msix->msix_table_offset + index * 16 + 12;
1511 	val = bus_read_4(msix->msix_table_res, offset);
1512 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1513 		val |= PCIM_MSIX_VCTRL_MASK;
1514 		bus_write_4(msix->msix_table_res, offset, val);
1515 	}
1516 }
1517 
1518 void
1519 pci_unmask_msix(device_t dev, u_int index)
1520 {
1521 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1522 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1523 	uint32_t offset, val;
1524 
1525 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1526 	offset = msix->msix_table_offset + index * 16 + 12;
1527 	val = bus_read_4(msix->msix_table_res, offset);
1528 	if (val & PCIM_MSIX_VCTRL_MASK) {
1529 		val &= ~PCIM_MSIX_VCTRL_MASK;
1530 		bus_write_4(msix->msix_table_res, offset, val);
1531 	}
1532 }
1533 
1534 int
1535 pci_pending_msix(device_t dev, u_int index)
1536 {
1537 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1538 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1539 	uint32_t offset, bit;
1540 
1541 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1542 	offset = msix->msix_pba_offset + (index / 32) * 4;
1543 	bit = 1 << index % 32;
1544 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1545 }
1546 
1547 /*
1548  * Restore MSI-X registers and table during resume.  If MSI-X is
1549  * enabled then walk the virtual table to restore the actual MSI-X
1550  * table.
1551  */
1552 static void
1553 pci_resume_msix(device_t dev)
1554 {
1555 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1556 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1557 	struct msix_table_entry *mte;
1558 	struct msix_vector *mv;
1559 	int i;
1560 
1561 	if (msix->msix_alloc > 0) {
1562 		/* First, mask all vectors. */
1563 		for (i = 0; i < msix->msix_msgnum; i++)
1564 			pci_mask_msix(dev, i);
1565 
1566 		/* Second, program any messages with at least one handler. */
1567 		for (i = 0; i < msix->msix_table_len; i++) {
1568 			mte = &msix->msix_table[i];
1569 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1570 				continue;
1571 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1572 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1573 			pci_unmask_msix(dev, i);
1574 		}
1575 	}
1576 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1577 	    msix->msix_ctrl, 2);
1578 }
1579 
1580 /*
1581  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1582  * returned in *count.  After this function returns, each message will be
1583  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1584  */
1585 int
1586 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1587 {
1588 	struct pci_devinfo *dinfo = device_get_ivars(child);
1589 	pcicfgregs *cfg = &dinfo->cfg;
1590 	struct resource_list_entry *rle;
1591 	int actual, error, i, irq, max;
1592 
1593 	/* Don't let count == 0 get us into trouble. */
1594 	if (*count == 0)
1595 		return (EINVAL);
1596 
1597 	/* If rid 0 is allocated, then fail. */
1598 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1599 	if (rle != NULL && rle->res != NULL)
1600 		return (ENXIO);
1601 
1602 	/* Already have allocated messages? */
1603 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1604 		return (ENXIO);
1605 
1606 	/* If MSI-X is blacklisted for this system, fail. */
1607 	if (pci_msix_blacklisted())
1608 		return (ENXIO);
1609 
1610 	/* MSI-X capability present? */
1611 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1612 		return (ENODEV);
1613 
1614 	/* Make sure the appropriate BARs are mapped. */
1615 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1616 	    cfg->msix.msix_table_bar);
1617 	if (rle == NULL || rle->res == NULL ||
1618 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1619 		return (ENXIO);
1620 	cfg->msix.msix_table_res = rle->res;
1621 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1622 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1623 		    cfg->msix.msix_pba_bar);
1624 		if (rle == NULL || rle->res == NULL ||
1625 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1626 			return (ENXIO);
1627 	}
1628 	cfg->msix.msix_pba_res = rle->res;
1629 
1630 	if (bootverbose)
1631 		device_printf(child,
1632 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1633 		    *count, cfg->msix.msix_msgnum);
1634 	max = min(*count, cfg->msix.msix_msgnum);
1635 	for (i = 0; i < max; i++) {
1636 		/* Allocate a message. */
1637 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1638 		if (error) {
1639 			if (i == 0)
1640 				return (error);
1641 			break;
1642 		}
1643 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1644 		    irq, 1);
1645 	}
1646 	actual = i;
1647 
1648 	if (bootverbose) {
1649 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1650 		if (actual == 1)
1651 			device_printf(child, "using IRQ %ju for MSI-X\n",
1652 			    rle->start);
1653 		else {
1654 			int run;
1655 
1656 			/*
1657 			 * Be fancy and try to print contiguous runs of
1658 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1659 			 * 'run' is true if we are in a range.
1660 			 */
1661 			device_printf(child, "using IRQs %ju", rle->start);
1662 			irq = rle->start;
1663 			run = 0;
1664 			for (i = 1; i < actual; i++) {
1665 				rle = resource_list_find(&dinfo->resources,
1666 				    SYS_RES_IRQ, i + 1);
1667 
1668 				/* Still in a run? */
1669 				if (rle->start == irq + 1) {
1670 					run = 1;
1671 					irq++;
1672 					continue;
1673 				}
1674 
1675 				/* Finish previous range. */
1676 				if (run) {
1677 					printf("-%d", irq);
1678 					run = 0;
1679 				}
1680 
1681 				/* Start new range. */
1682 				printf(",%ju", rle->start);
1683 				irq = rle->start;
1684 			}
1685 
1686 			/* Unfinished range? */
1687 			if (run)
1688 				printf("-%d", irq);
1689 			printf(" for MSI-X\n");
1690 		}
1691 	}
1692 
1693 	/* Mask all vectors. */
1694 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1695 		pci_mask_msix(child, i);
1696 
1697 	/* Allocate and initialize vector data and virtual table. */
1698 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1699 	    M_DEVBUF, M_WAITOK | M_ZERO);
1700 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1701 	    M_DEVBUF, M_WAITOK | M_ZERO);
1702 	for (i = 0; i < actual; i++) {
1703 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1704 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1705 		cfg->msix.msix_table[i].mte_vector = i + 1;
1706 	}
1707 
1708 	/* Update control register to enable MSI-X. */
1709 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1710 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1711 	    cfg->msix.msix_ctrl, 2);
1712 
1713 	/* Update counts of alloc'd messages. */
1714 	cfg->msix.msix_alloc = actual;
1715 	cfg->msix.msix_table_len = actual;
1716 	*count = actual;
1717 	return (0);
1718 }
1719 
1720 /*
1721  * By default, pci_alloc_msix() will assign the allocated IRQ
1722  * resources consecutively to the first N messages in the MSI-X table.
1723  * However, device drivers may want to use different layouts if they
1724  * either receive fewer messages than they asked for, or they wish to
1725  * populate the MSI-X table sparsely.  This method allows the driver
1726  * to specify what layout it wants.  It must be called after a
1727  * successful pci_alloc_msix() but before any of the associated
1728  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1729  *
1730  * The 'vectors' array contains 'count' message vectors.  The array
1731  * maps directly to the MSI-X table in that index 0 in the array
1732  * specifies the vector for the first message in the MSI-X table, etc.
1733  * The vector value in each array index can either be 0 to indicate
1734  * that no vector should be assigned to a message slot, or it can be a
1735  * number from 1 to N (where N is the count returned from a
1736  * succcessful call to pci_alloc_msix()) to indicate which message
1737  * vector (IRQ) to be used for the corresponding message.
1738  *
1739  * On successful return, each message with a non-zero vector will have
1740  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1741  * 1.  Additionally, if any of the IRQs allocated via the previous
1742  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1743  * will be freed back to the system automatically.
1744  *
1745  * For example, suppose a driver has a MSI-X table with 6 messages and
1746  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1747  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1748  * C.  After the call to pci_alloc_msix(), the device will be setup to
1749  * have an MSI-X table of ABC--- (where - means no vector assigned).
1750  * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1751  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1752  * be freed back to the system.  This device will also have valid
1753  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1754  *
1755  * In any case, the SYS_RES_IRQ rid X will always map to the message
1756  * at MSI-X table index X - 1 and will only be valid if a vector is
1757  * assigned to that table entry.
1758  */
1759 int
1760 pci_remap_msix_method(device_t dev, device_t child, int count,
1761     const u_int *vectors)
1762 {
1763 	struct pci_devinfo *dinfo = device_get_ivars(child);
1764 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1765 	struct resource_list_entry *rle;
1766 	int i, irq, j, *used;
1767 
1768 	/*
1769 	 * Have to have at least one message in the table but the
1770 	 * table can't be bigger than the actual MSI-X table in the
1771 	 * device.
1772 	 */
1773 	if (count == 0 || count > msix->msix_msgnum)
1774 		return (EINVAL);
1775 
1776 	/* Sanity check the vectors. */
1777 	for (i = 0; i < count; i++)
1778 		if (vectors[i] > msix->msix_alloc)
1779 			return (EINVAL);
1780 
1781 	/*
1782 	 * Make sure there aren't any holes in the vectors to be used.
1783 	 * It's a big pain to support it, and it doesn't really make
1784 	 * sense anyway.  Also, at least one vector must be used.
1785 	 */
1786 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1787 	    M_ZERO);
1788 	for (i = 0; i < count; i++)
1789 		if (vectors[i] != 0)
1790 			used[vectors[i] - 1] = 1;
1791 	for (i = 0; i < msix->msix_alloc - 1; i++)
1792 		if (used[i] == 0 && used[i + 1] == 1) {
1793 			free(used, M_DEVBUF);
1794 			return (EINVAL);
1795 		}
1796 	if (used[0] != 1) {
1797 		free(used, M_DEVBUF);
1798 		return (EINVAL);
1799 	}
1800 
1801 	/* Make sure none of the resources are allocated. */
1802 	for (i = 0; i < msix->msix_table_len; i++) {
1803 		if (msix->msix_table[i].mte_vector == 0)
1804 			continue;
1805 		if (msix->msix_table[i].mte_handlers > 0) {
1806 			free(used, M_DEVBUF);
1807 			return (EBUSY);
1808 		}
1809 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1810 		KASSERT(rle != NULL, ("missing resource"));
1811 		if (rle->res != NULL) {
1812 			free(used, M_DEVBUF);
1813 			return (EBUSY);
1814 		}
1815 	}
1816 
1817 	/* Free the existing resource list entries. */
1818 	for (i = 0; i < msix->msix_table_len; i++) {
1819 		if (msix->msix_table[i].mte_vector == 0)
1820 			continue;
1821 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1822 	}
1823 
1824 	/*
1825 	 * Build the new virtual table keeping track of which vectors are
1826 	 * used.
1827 	 */
1828 	free(msix->msix_table, M_DEVBUF);
1829 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1830 	    M_DEVBUF, M_WAITOK | M_ZERO);
1831 	for (i = 0; i < count; i++)
1832 		msix->msix_table[i].mte_vector = vectors[i];
1833 	msix->msix_table_len = count;
1834 
1835 	/* Free any unused IRQs and resize the vectors array if necessary. */
1836 	j = msix->msix_alloc - 1;
1837 	if (used[j] == 0) {
1838 		struct msix_vector *vec;
1839 
1840 		while (used[j] == 0) {
1841 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1842 			    msix->msix_vectors[j].mv_irq);
1843 			j--;
1844 		}
1845 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1846 		    M_WAITOK);
1847 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1848 		    (j + 1));
1849 		free(msix->msix_vectors, M_DEVBUF);
1850 		msix->msix_vectors = vec;
1851 		msix->msix_alloc = j + 1;
1852 	}
1853 	free(used, M_DEVBUF);
1854 
1855 	/* Map the IRQs onto the rids. */
1856 	for (i = 0; i < count; i++) {
1857 		if (vectors[i] == 0)
1858 			continue;
1859 		irq = msix->msix_vectors[vectors[i] - 1].mv_irq;
1860 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1861 		    irq, 1);
1862 	}
1863 
1864 	if (bootverbose) {
1865 		device_printf(child, "Remapped MSI-X IRQs as: ");
1866 		for (i = 0; i < count; i++) {
1867 			if (i != 0)
1868 				printf(", ");
1869 			if (vectors[i] == 0)
1870 				printf("---");
1871 			else
1872 				printf("%d",
1873 				    msix->msix_vectors[vectors[i] - 1].mv_irq);
1874 		}
1875 		printf("\n");
1876 	}
1877 
1878 	return (0);
1879 }
1880 
1881 static int
1882 pci_release_msix(device_t dev, device_t child)
1883 {
1884 	struct pci_devinfo *dinfo = device_get_ivars(child);
1885 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1886 	struct resource_list_entry *rle;
1887 	int i;
1888 
1889 	/* Do we have any messages to release? */
1890 	if (msix->msix_alloc == 0)
1891 		return (ENODEV);
1892 
1893 	/* Make sure none of the resources are allocated. */
1894 	for (i = 0; i < msix->msix_table_len; i++) {
1895 		if (msix->msix_table[i].mte_vector == 0)
1896 			continue;
1897 		if (msix->msix_table[i].mte_handlers > 0)
1898 			return (EBUSY);
1899 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1900 		KASSERT(rle != NULL, ("missing resource"));
1901 		if (rle->res != NULL)
1902 			return (EBUSY);
1903 	}
1904 
1905 	/* Update control register to disable MSI-X. */
1906 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1907 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1908 	    msix->msix_ctrl, 2);
1909 
1910 	/* Free the resource list entries. */
1911 	for (i = 0; i < msix->msix_table_len; i++) {
1912 		if (msix->msix_table[i].mte_vector == 0)
1913 			continue;
1914 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1915 	}
1916 	free(msix->msix_table, M_DEVBUF);
1917 	msix->msix_table_len = 0;
1918 
1919 	/* Release the IRQs. */
1920 	for (i = 0; i < msix->msix_alloc; i++)
1921 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1922 		    msix->msix_vectors[i].mv_irq);
1923 	free(msix->msix_vectors, M_DEVBUF);
1924 	msix->msix_alloc = 0;
1925 	return (0);
1926 }
1927 
1928 /*
1929  * Return the max supported MSI-X messages this device supports.
1930  * Basically, assuming the MD code can alloc messages, this function
1931  * should return the maximum value that pci_alloc_msix() can return.
1932  * Thus, it is subject to the tunables, etc.
1933  */
1934 int
1935 pci_msix_count_method(device_t dev, device_t child)
1936 {
1937 	struct pci_devinfo *dinfo = device_get_ivars(child);
1938 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1939 
1940 	if (pci_do_msix && msix->msix_location != 0)
1941 		return (msix->msix_msgnum);
1942 	return (0);
1943 }
1944 
1945 int
1946 pci_msix_pba_bar_method(device_t dev, device_t child)
1947 {
1948 	struct pci_devinfo *dinfo = device_get_ivars(child);
1949 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1950 
1951 	if (pci_do_msix && msix->msix_location != 0)
1952 		return (msix->msix_pba_bar);
1953 	return (-1);
1954 }
1955 
1956 int
1957 pci_msix_table_bar_method(device_t dev, device_t child)
1958 {
1959 	struct pci_devinfo *dinfo = device_get_ivars(child);
1960 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1961 
1962 	if (pci_do_msix && msix->msix_location != 0)
1963 		return (msix->msix_table_bar);
1964 	return (-1);
1965 }
1966 
1967 /*
1968  * HyperTransport MSI mapping control
1969  */
1970 void
1971 pci_ht_map_msi(device_t dev, uint64_t addr)
1972 {
1973 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1974 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1975 
1976 	if (!ht->ht_msimap)
1977 		return;
1978 
1979 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1980 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1981 		/* Enable MSI -> HT mapping. */
1982 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1983 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1984 		    ht->ht_msictrl, 2);
1985 	}
1986 
1987 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1988 		/* Disable MSI -> HT mapping. */
1989 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1990 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1991 		    ht->ht_msictrl, 2);
1992 	}
1993 }
1994 
1995 int
1996 pci_get_max_read_req(device_t dev)
1997 {
1998 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1999 	int cap;
2000 	uint16_t val;
2001 
2002 	cap = dinfo->cfg.pcie.pcie_location;
2003 	if (cap == 0)
2004 		return (0);
2005 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2006 	val &= PCIEM_CTL_MAX_READ_REQUEST;
2007 	val >>= 12;
2008 	return (1 << (val + 7));
2009 }
2010 
2011 int
2012 pci_set_max_read_req(device_t dev, int size)
2013 {
2014 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2015 	int cap;
2016 	uint16_t val;
2017 
2018 	cap = dinfo->cfg.pcie.pcie_location;
2019 	if (cap == 0)
2020 		return (0);
2021 	if (size < 128)
2022 		size = 128;
2023 	if (size > 4096)
2024 		size = 4096;
2025 	size = (1 << (fls(size) - 1));
2026 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2027 	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
2028 	val |= (fls(size) - 8) << 12;
2029 	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
2030 	return (size);
2031 }
2032 
2033 uint32_t
2034 pcie_read_config(device_t dev, int reg, int width)
2035 {
2036 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2037 	int cap;
2038 
2039 	cap = dinfo->cfg.pcie.pcie_location;
2040 	if (cap == 0) {
2041 		if (width == 2)
2042 			return (0xffff);
2043 		return (0xffffffff);
2044 	}
2045 
2046 	return (pci_read_config(dev, cap + reg, width));
2047 }
2048 
2049 void
2050 pcie_write_config(device_t dev, int reg, uint32_t value, int width)
2051 {
2052 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2053 	int cap;
2054 
2055 	cap = dinfo->cfg.pcie.pcie_location;
2056 	if (cap == 0)
2057 		return;
2058 	pci_write_config(dev, cap + reg, value, width);
2059 }
2060 
2061 /*
2062  * Adjusts a PCI-e capability register by clearing the bits in mask
2063  * and setting the bits in (value & mask).  Bits not set in mask are
2064  * not adjusted.
2065  *
2066  * Returns the old value on success or all ones on failure.
2067  */
2068 uint32_t
2069 pcie_adjust_config(device_t dev, int reg, uint32_t mask, uint32_t value,
2070     int width)
2071 {
2072 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2073 	uint32_t old, new;
2074 	int cap;
2075 
2076 	cap = dinfo->cfg.pcie.pcie_location;
2077 	if (cap == 0) {
2078 		if (width == 2)
2079 			return (0xffff);
2080 		return (0xffffffff);
2081 	}
2082 
2083 	old = pci_read_config(dev, cap + reg, width);
2084 	new = old & ~mask;
2085 	new |= (value & mask);
2086 	pci_write_config(dev, cap + reg, new, width);
2087 	return (old);
2088 }
2089 
2090 /*
2091  * Support for MSI message signalled interrupts.
2092  */
2093 void
2094 pci_enable_msi_method(device_t dev, device_t child, uint64_t address,
2095     uint16_t data)
2096 {
2097 	struct pci_devinfo *dinfo = device_get_ivars(child);
2098 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2099 
2100 	/* Write data and address values. */
2101 	pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR,
2102 	    address & 0xffffffff, 4);
2103 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2104 		pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR_HIGH,
2105 		    address >> 32, 4);
2106 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA_64BIT,
2107 		    data, 2);
2108 	} else
2109 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA, data,
2110 		    2);
2111 
2112 	/* Enable MSI in the control register. */
2113 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
2114 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2115 	    msi->msi_ctrl, 2);
2116 
2117 	/* Enable MSI -> HT mapping. */
2118 	pci_ht_map_msi(child, address);
2119 }
2120 
2121 void
2122 pci_disable_msi_method(device_t dev, device_t child)
2123 {
2124 	struct pci_devinfo *dinfo = device_get_ivars(child);
2125 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2126 
2127 	/* Disable MSI -> HT mapping. */
2128 	pci_ht_map_msi(child, 0);
2129 
2130 	/* Disable MSI in the control register. */
2131 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
2132 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2133 	    msi->msi_ctrl, 2);
2134 }
2135 
2136 /*
2137  * Restore MSI registers during resume.  If MSI is enabled then
2138  * restore the data and address registers in addition to the control
2139  * register.
2140  */
2141 static void
2142 pci_resume_msi(device_t dev)
2143 {
2144 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2145 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2146 	uint64_t address;
2147 	uint16_t data;
2148 
2149 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
2150 		address = msi->msi_addr;
2151 		data = msi->msi_data;
2152 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
2153 		    address & 0xffffffff, 4);
2154 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2155 			pci_write_config(dev, msi->msi_location +
2156 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
2157 			pci_write_config(dev, msi->msi_location +
2158 			    PCIR_MSI_DATA_64BIT, data, 2);
2159 		} else
2160 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
2161 			    data, 2);
2162 	}
2163 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
2164 	    2);
2165 }
2166 
2167 static int
2168 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
2169 {
2170 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2171 	pcicfgregs *cfg = &dinfo->cfg;
2172 	struct resource_list_entry *rle;
2173 	struct msix_table_entry *mte;
2174 	struct msix_vector *mv;
2175 	uint64_t addr;
2176 	uint32_t data;
2177 	int error, i, j;
2178 
2179 	/*
2180 	 * Handle MSI first.  We try to find this IRQ among our list
2181 	 * of MSI IRQs.  If we find it, we request updated address and
2182 	 * data registers and apply the results.
2183 	 */
2184 	if (cfg->msi.msi_alloc > 0) {
2185 
2186 		/* If we don't have any active handlers, nothing to do. */
2187 		if (cfg->msi.msi_handlers == 0)
2188 			return (0);
2189 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
2190 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
2191 			    i + 1);
2192 			if (rle->start == irq) {
2193 				error = PCIB_MAP_MSI(device_get_parent(bus),
2194 				    dev, irq, &addr, &data);
2195 				if (error)
2196 					return (error);
2197 				pci_disable_msi(dev);
2198 				dinfo->cfg.msi.msi_addr = addr;
2199 				dinfo->cfg.msi.msi_data = data;
2200 				pci_enable_msi(dev, addr, data);
2201 				return (0);
2202 			}
2203 		}
2204 		return (ENOENT);
2205 	}
2206 
2207 	/*
2208 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
2209 	 * we request the updated mapping info.  If that works, we go
2210 	 * through all the slots that use this IRQ and update them.
2211 	 */
2212 	if (cfg->msix.msix_alloc > 0) {
2213 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
2214 			mv = &cfg->msix.msix_vectors[i];
2215 			if (mv->mv_irq == irq) {
2216 				error = PCIB_MAP_MSI(device_get_parent(bus),
2217 				    dev, irq, &addr, &data);
2218 				if (error)
2219 					return (error);
2220 				mv->mv_address = addr;
2221 				mv->mv_data = data;
2222 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
2223 					mte = &cfg->msix.msix_table[j];
2224 					if (mte->mte_vector != i + 1)
2225 						continue;
2226 					if (mte->mte_handlers == 0)
2227 						continue;
2228 					pci_mask_msix(dev, j);
2229 					pci_enable_msix(dev, j, addr, data);
2230 					pci_unmask_msix(dev, j);
2231 				}
2232 			}
2233 		}
2234 		return (ENOENT);
2235 	}
2236 
2237 	return (ENOENT);
2238 }
2239 
2240 /*
2241  * Returns true if the specified device is blacklisted because MSI
2242  * doesn't work.
2243  */
2244 int
2245 pci_msi_device_blacklisted(device_t dev)
2246 {
2247 
2248 	if (!pci_honor_msi_blacklist)
2249 		return (0);
2250 
2251 	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2252 }
2253 
2254 /*
2255  * Determine if MSI is blacklisted globally on this system.  Currently,
2256  * we just check for blacklisted chipsets as represented by the
2257  * host-PCI bridge at device 0:0:0.  In the future, it may become
2258  * necessary to check other system attributes, such as the kenv values
2259  * that give the motherboard manufacturer and model number.
2260  */
2261 static int
2262 pci_msi_blacklisted(void)
2263 {
2264 	device_t dev;
2265 
2266 	if (!pci_honor_msi_blacklist)
2267 		return (0);
2268 
2269 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2270 	if (!(pcie_chipset || pcix_chipset)) {
2271 		if (vm_guest != VM_GUEST_NO) {
2272 			/*
2273 			 * Whitelist older chipsets in virtual
2274 			 * machines known to support MSI.
2275 			 */
2276 			dev = pci_find_bsf(0, 0, 0);
2277 			if (dev != NULL)
2278 				return (!pci_has_quirk(pci_get_devid(dev),
2279 					PCI_QUIRK_ENABLE_MSI_VM));
2280 		}
2281 		return (1);
2282 	}
2283 
2284 	dev = pci_find_bsf(0, 0, 0);
2285 	if (dev != NULL)
2286 		return (pci_msi_device_blacklisted(dev));
2287 	return (0);
2288 }
2289 
2290 /*
2291  * Returns true if the specified device is blacklisted because MSI-X
2292  * doesn't work.  Note that this assumes that if MSI doesn't work,
2293  * MSI-X doesn't either.
2294  */
2295 int
2296 pci_msix_device_blacklisted(device_t dev)
2297 {
2298 
2299 	if (!pci_honor_msi_blacklist)
2300 		return (0);
2301 
2302 	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2303 		return (1);
2304 
2305 	return (pci_msi_device_blacklisted(dev));
2306 }
2307 
2308 /*
2309  * Determine if MSI-X is blacklisted globally on this system.  If MSI
2310  * is blacklisted, assume that MSI-X is as well.  Check for additional
2311  * chipsets where MSI works but MSI-X does not.
2312  */
2313 static int
2314 pci_msix_blacklisted(void)
2315 {
2316 	device_t dev;
2317 
2318 	if (!pci_honor_msi_blacklist)
2319 		return (0);
2320 
2321 	dev = pci_find_bsf(0, 0, 0);
2322 	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2323 	    PCI_QUIRK_DISABLE_MSIX))
2324 		return (1);
2325 
2326 	return (pci_msi_blacklisted());
2327 }
2328 
2329 /*
2330  * Attempt to allocate *count MSI messages.  The actual number allocated is
2331  * returned in *count.  After this function returns, each message will be
2332  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2333  */
2334 int
2335 pci_alloc_msi_method(device_t dev, device_t child, int *count)
2336 {
2337 	struct pci_devinfo *dinfo = device_get_ivars(child);
2338 	pcicfgregs *cfg = &dinfo->cfg;
2339 	struct resource_list_entry *rle;
2340 	int actual, error, i, irqs[32];
2341 	uint16_t ctrl;
2342 
2343 	/* Don't let count == 0 get us into trouble. */
2344 	if (*count == 0)
2345 		return (EINVAL);
2346 
2347 	/* If rid 0 is allocated, then fail. */
2348 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2349 	if (rle != NULL && rle->res != NULL)
2350 		return (ENXIO);
2351 
2352 	/* Already have allocated messages? */
2353 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2354 		return (ENXIO);
2355 
2356 	/* If MSI is blacklisted for this system, fail. */
2357 	if (pci_msi_blacklisted())
2358 		return (ENXIO);
2359 
2360 	/* MSI capability present? */
2361 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2362 		return (ENODEV);
2363 
2364 	if (bootverbose)
2365 		device_printf(child,
2366 		    "attempting to allocate %d MSI vectors (%d supported)\n",
2367 		    *count, cfg->msi.msi_msgnum);
2368 
2369 	/* Don't ask for more than the device supports. */
2370 	actual = min(*count, cfg->msi.msi_msgnum);
2371 
2372 	/* Don't ask for more than 32 messages. */
2373 	actual = min(actual, 32);
2374 
2375 	/* MSI requires power of 2 number of messages. */
2376 	if (!powerof2(actual))
2377 		return (EINVAL);
2378 
2379 	for (;;) {
2380 		/* Try to allocate N messages. */
2381 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2382 		    actual, irqs);
2383 		if (error == 0)
2384 			break;
2385 		if (actual == 1)
2386 			return (error);
2387 
2388 		/* Try N / 2. */
2389 		actual >>= 1;
2390 	}
2391 
2392 	/*
2393 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2394 	 * resources in the irqs[] array, so add new resources
2395 	 * starting at rid 1.
2396 	 */
2397 	for (i = 0; i < actual; i++)
2398 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2399 		    irqs[i], irqs[i], 1);
2400 
2401 	if (bootverbose) {
2402 		if (actual == 1)
2403 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2404 		else {
2405 			int run;
2406 
2407 			/*
2408 			 * Be fancy and try to print contiguous runs
2409 			 * of IRQ values as ranges.  'run' is true if
2410 			 * we are in a range.
2411 			 */
2412 			device_printf(child, "using IRQs %d", irqs[0]);
2413 			run = 0;
2414 			for (i = 1; i < actual; i++) {
2415 
2416 				/* Still in a run? */
2417 				if (irqs[i] == irqs[i - 1] + 1) {
2418 					run = 1;
2419 					continue;
2420 				}
2421 
2422 				/* Finish previous range. */
2423 				if (run) {
2424 					printf("-%d", irqs[i - 1]);
2425 					run = 0;
2426 				}
2427 
2428 				/* Start new range. */
2429 				printf(",%d", irqs[i]);
2430 			}
2431 
2432 			/* Unfinished range? */
2433 			if (run)
2434 				printf("-%d", irqs[actual - 1]);
2435 			printf(" for MSI\n");
2436 		}
2437 	}
2438 
2439 	/* Update control register with actual count. */
2440 	ctrl = cfg->msi.msi_ctrl;
2441 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2442 	ctrl |= (ffs(actual) - 1) << 4;
2443 	cfg->msi.msi_ctrl = ctrl;
2444 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2445 
2446 	/* Update counts of alloc'd messages. */
2447 	cfg->msi.msi_alloc = actual;
2448 	cfg->msi.msi_handlers = 0;
2449 	*count = actual;
2450 	return (0);
2451 }
2452 
2453 /* Release the MSI messages associated with this device. */
2454 int
2455 pci_release_msi_method(device_t dev, device_t child)
2456 {
2457 	struct pci_devinfo *dinfo = device_get_ivars(child);
2458 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2459 	struct resource_list_entry *rle;
2460 	int error, i, irqs[32];
2461 
2462 	/* Try MSI-X first. */
2463 	error = pci_release_msix(dev, child);
2464 	if (error != ENODEV)
2465 		return (error);
2466 
2467 	/* Do we have any messages to release? */
2468 	if (msi->msi_alloc == 0)
2469 		return (ENODEV);
2470 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2471 
2472 	/* Make sure none of the resources are allocated. */
2473 	if (msi->msi_handlers > 0)
2474 		return (EBUSY);
2475 	for (i = 0; i < msi->msi_alloc; i++) {
2476 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2477 		KASSERT(rle != NULL, ("missing MSI resource"));
2478 		if (rle->res != NULL)
2479 			return (EBUSY);
2480 		irqs[i] = rle->start;
2481 	}
2482 
2483 	/* Update control register with 0 count. */
2484 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2485 	    ("%s: MSI still enabled", __func__));
2486 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2487 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2488 	    msi->msi_ctrl, 2);
2489 
2490 	/* Release the messages. */
2491 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2492 	for (i = 0; i < msi->msi_alloc; i++)
2493 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2494 
2495 	/* Update alloc count. */
2496 	msi->msi_alloc = 0;
2497 	msi->msi_addr = 0;
2498 	msi->msi_data = 0;
2499 	return (0);
2500 }
2501 
2502 /*
2503  * Return the max supported MSI messages this device supports.
2504  * Basically, assuming the MD code can alloc messages, this function
2505  * should return the maximum value that pci_alloc_msi() can return.
2506  * Thus, it is subject to the tunables, etc.
2507  */
2508 int
2509 pci_msi_count_method(device_t dev, device_t child)
2510 {
2511 	struct pci_devinfo *dinfo = device_get_ivars(child);
2512 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2513 
2514 	if (pci_do_msi && msi->msi_location != 0)
2515 		return (msi->msi_msgnum);
2516 	return (0);
2517 }
2518 
2519 /* free pcicfgregs structure and all depending data structures */
2520 
2521 int
2522 pci_freecfg(struct pci_devinfo *dinfo)
2523 {
2524 	struct devlist *devlist_head;
2525 	struct pci_map *pm, *next;
2526 	int i;
2527 
2528 	devlist_head = &pci_devq;
2529 
2530 	if (dinfo->cfg.vpd.vpd_reg) {
2531 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2532 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2533 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2534 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2535 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2536 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2537 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2538 	}
2539 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2540 		free(pm, M_DEVBUF);
2541 	}
2542 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2543 	free(dinfo, M_DEVBUF);
2544 
2545 	/* increment the generation count */
2546 	pci_generation++;
2547 
2548 	/* we're losing one device */
2549 	pci_numdevs--;
2550 	return (0);
2551 }
2552 
2553 /*
2554  * PCI power manangement
2555  */
2556 int
2557 pci_set_powerstate_method(device_t dev, device_t child, int state)
2558 {
2559 	struct pci_devinfo *dinfo = device_get_ivars(child);
2560 	pcicfgregs *cfg = &dinfo->cfg;
2561 	uint16_t status;
2562 	int oldstate, highest, delay;
2563 
2564 	if (cfg->pp.pp_cap == 0)
2565 		return (EOPNOTSUPP);
2566 
2567 	/*
2568 	 * Optimize a no state change request away.  While it would be OK to
2569 	 * write to the hardware in theory, some devices have shown odd
2570 	 * behavior when going from D3 -> D3.
2571 	 */
2572 	oldstate = pci_get_powerstate(child);
2573 	if (oldstate == state)
2574 		return (0);
2575 
2576 	/*
2577 	 * The PCI power management specification states that after a state
2578 	 * transition between PCI power states, system software must
2579 	 * guarantee a minimal delay before the function accesses the device.
2580 	 * Compute the worst case delay that we need to guarantee before we
2581 	 * access the device.  Many devices will be responsive much more
2582 	 * quickly than this delay, but there are some that don't respond
2583 	 * instantly to state changes.  Transitions to/from D3 state require
2584 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2585 	 * is done below with DELAY rather than a sleeper function because
2586 	 * this function can be called from contexts where we cannot sleep.
2587 	 */
2588 	highest = (oldstate > state) ? oldstate : state;
2589 	if (highest == PCI_POWERSTATE_D3)
2590 	    delay = 10000;
2591 	else if (highest == PCI_POWERSTATE_D2)
2592 	    delay = 200;
2593 	else
2594 	    delay = 0;
2595 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2596 	    & ~PCIM_PSTAT_DMASK;
2597 	switch (state) {
2598 	case PCI_POWERSTATE_D0:
2599 		status |= PCIM_PSTAT_D0;
2600 		break;
2601 	case PCI_POWERSTATE_D1:
2602 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2603 			return (EOPNOTSUPP);
2604 		status |= PCIM_PSTAT_D1;
2605 		break;
2606 	case PCI_POWERSTATE_D2:
2607 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2608 			return (EOPNOTSUPP);
2609 		status |= PCIM_PSTAT_D2;
2610 		break;
2611 	case PCI_POWERSTATE_D3:
2612 		status |= PCIM_PSTAT_D3;
2613 		break;
2614 	default:
2615 		return (EINVAL);
2616 	}
2617 
2618 	if (bootverbose)
2619 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2620 		    state);
2621 
2622 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2623 	if (delay)
2624 		DELAY(delay);
2625 	return (0);
2626 }
2627 
2628 int
2629 pci_get_powerstate_method(device_t dev, device_t child)
2630 {
2631 	struct pci_devinfo *dinfo = device_get_ivars(child);
2632 	pcicfgregs *cfg = &dinfo->cfg;
2633 	uint16_t status;
2634 	int result;
2635 
2636 	if (cfg->pp.pp_cap != 0) {
2637 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2638 		switch (status & PCIM_PSTAT_DMASK) {
2639 		case PCIM_PSTAT_D0:
2640 			result = PCI_POWERSTATE_D0;
2641 			break;
2642 		case PCIM_PSTAT_D1:
2643 			result = PCI_POWERSTATE_D1;
2644 			break;
2645 		case PCIM_PSTAT_D2:
2646 			result = PCI_POWERSTATE_D2;
2647 			break;
2648 		case PCIM_PSTAT_D3:
2649 			result = PCI_POWERSTATE_D3;
2650 			break;
2651 		default:
2652 			result = PCI_POWERSTATE_UNKNOWN;
2653 			break;
2654 		}
2655 	} else {
2656 		/* No support, device is always at D0 */
2657 		result = PCI_POWERSTATE_D0;
2658 	}
2659 	return (result);
2660 }
2661 
2662 /*
2663  * Some convenience functions for PCI device drivers.
2664  */
2665 
2666 static __inline void
2667 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2668 {
2669 	uint16_t	command;
2670 
2671 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2672 	command |= bit;
2673 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2674 }
2675 
2676 static __inline void
2677 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2678 {
2679 	uint16_t	command;
2680 
2681 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2682 	command &= ~bit;
2683 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2684 }
2685 
2686 int
2687 pci_enable_busmaster_method(device_t dev, device_t child)
2688 {
2689 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2690 	return (0);
2691 }
2692 
2693 int
2694 pci_disable_busmaster_method(device_t dev, device_t child)
2695 {
2696 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2697 	return (0);
2698 }
2699 
2700 int
2701 pci_enable_io_method(device_t dev, device_t child, int space)
2702 {
2703 	uint16_t bit;
2704 
2705 	switch(space) {
2706 	case SYS_RES_IOPORT:
2707 		bit = PCIM_CMD_PORTEN;
2708 		break;
2709 	case SYS_RES_MEMORY:
2710 		bit = PCIM_CMD_MEMEN;
2711 		break;
2712 	default:
2713 		return (EINVAL);
2714 	}
2715 	pci_set_command_bit(dev, child, bit);
2716 	return (0);
2717 }
2718 
2719 int
2720 pci_disable_io_method(device_t dev, device_t child, int space)
2721 {
2722 	uint16_t bit;
2723 
2724 	switch(space) {
2725 	case SYS_RES_IOPORT:
2726 		bit = PCIM_CMD_PORTEN;
2727 		break;
2728 	case SYS_RES_MEMORY:
2729 		bit = PCIM_CMD_MEMEN;
2730 		break;
2731 	default:
2732 		return (EINVAL);
2733 	}
2734 	pci_clear_command_bit(dev, child, bit);
2735 	return (0);
2736 }
2737 
2738 /*
2739  * New style pci driver.  Parent device is either a pci-host-bridge or a
2740  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2741  */
2742 
2743 void
2744 pci_print_verbose(struct pci_devinfo *dinfo)
2745 {
2746 
2747 	if (bootverbose) {
2748 		pcicfgregs *cfg = &dinfo->cfg;
2749 
2750 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2751 		    cfg->vendor, cfg->device, cfg->revid);
2752 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2753 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2754 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2755 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2756 		    cfg->mfdev);
2757 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2758 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2759 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2760 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2761 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2762 		if (cfg->intpin > 0)
2763 			printf("\tintpin=%c, irq=%d\n",
2764 			    cfg->intpin +'a' -1, cfg->intline);
2765 		if (cfg->pp.pp_cap) {
2766 			uint16_t status;
2767 
2768 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2769 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2770 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2771 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2772 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2773 			    status & PCIM_PSTAT_DMASK);
2774 		}
2775 		if (cfg->msi.msi_location) {
2776 			int ctrl;
2777 
2778 			ctrl = cfg->msi.msi_ctrl;
2779 			printf("\tMSI supports %d message%s%s%s\n",
2780 			    cfg->msi.msi_msgnum,
2781 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2782 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2783 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2784 		}
2785 		if (cfg->msix.msix_location) {
2786 			printf("\tMSI-X supports %d message%s ",
2787 			    cfg->msix.msix_msgnum,
2788 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2789 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2790 				printf("in map 0x%x\n",
2791 				    cfg->msix.msix_table_bar);
2792 			else
2793 				printf("in maps 0x%x and 0x%x\n",
2794 				    cfg->msix.msix_table_bar,
2795 				    cfg->msix.msix_pba_bar);
2796 		}
2797 	}
2798 }
2799 
2800 static int
2801 pci_porten(device_t dev)
2802 {
2803 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2804 }
2805 
2806 static int
2807 pci_memen(device_t dev)
2808 {
2809 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2810 }
2811 
2812 void
2813 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp,
2814     int *bar64)
2815 {
2816 	struct pci_devinfo *dinfo;
2817 	pci_addr_t map, testval;
2818 	int ln2range;
2819 	uint16_t cmd;
2820 
2821 	/*
2822 	 * The device ROM BAR is special.  It is always a 32-bit
2823 	 * memory BAR.  Bit 0 is special and should not be set when
2824 	 * sizing the BAR.
2825 	 */
2826 	dinfo = device_get_ivars(dev);
2827 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2828 		map = pci_read_config(dev, reg, 4);
2829 		pci_write_config(dev, reg, 0xfffffffe, 4);
2830 		testval = pci_read_config(dev, reg, 4);
2831 		pci_write_config(dev, reg, map, 4);
2832 		*mapp = map;
2833 		*testvalp = testval;
2834 		if (bar64 != NULL)
2835 			*bar64 = 0;
2836 		return;
2837 	}
2838 
2839 	map = pci_read_config(dev, reg, 4);
2840 	ln2range = pci_maprange(map);
2841 	if (ln2range == 64)
2842 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2843 
2844 	/*
2845 	 * Disable decoding via the command register before
2846 	 * determining the BAR's length since we will be placing it in
2847 	 * a weird state.
2848 	 */
2849 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2850 	pci_write_config(dev, PCIR_COMMAND,
2851 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2852 
2853 	/*
2854 	 * Determine the BAR's length by writing all 1's.  The bottom
2855 	 * log_2(size) bits of the BAR will stick as 0 when we read
2856 	 * the value back.
2857 	 */
2858 	pci_write_config(dev, reg, 0xffffffff, 4);
2859 	testval = pci_read_config(dev, reg, 4);
2860 	if (ln2range == 64) {
2861 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2862 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2863 	}
2864 
2865 	/*
2866 	 * Restore the original value of the BAR.  We may have reprogrammed
2867 	 * the BAR of the low-level console device and when booting verbose,
2868 	 * we need the console device addressable.
2869 	 */
2870 	pci_write_config(dev, reg, map, 4);
2871 	if (ln2range == 64)
2872 		pci_write_config(dev, reg + 4, map >> 32, 4);
2873 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2874 
2875 	*mapp = map;
2876 	*testvalp = testval;
2877 	if (bar64 != NULL)
2878 		*bar64 = (ln2range == 64);
2879 }
2880 
2881 static void
2882 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2883 {
2884 	struct pci_devinfo *dinfo;
2885 	int ln2range;
2886 
2887 	/* The device ROM BAR is always a 32-bit memory BAR. */
2888 	dinfo = device_get_ivars(dev);
2889 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2890 		ln2range = 32;
2891 	else
2892 		ln2range = pci_maprange(pm->pm_value);
2893 	pci_write_config(dev, pm->pm_reg, base, 4);
2894 	if (ln2range == 64)
2895 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2896 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2897 	if (ln2range == 64)
2898 		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2899 		    pm->pm_reg + 4, 4) << 32;
2900 }
2901 
2902 struct pci_map *
2903 pci_find_bar(device_t dev, int reg)
2904 {
2905 	struct pci_devinfo *dinfo;
2906 	struct pci_map *pm;
2907 
2908 	dinfo = device_get_ivars(dev);
2909 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2910 		if (pm->pm_reg == reg)
2911 			return (pm);
2912 	}
2913 	return (NULL);
2914 }
2915 
2916 int
2917 pci_bar_enabled(device_t dev, struct pci_map *pm)
2918 {
2919 	struct pci_devinfo *dinfo;
2920 	uint16_t cmd;
2921 
2922 	dinfo = device_get_ivars(dev);
2923 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2924 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2925 		return (0);
2926 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2927 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2928 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2929 	else
2930 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2931 }
2932 
2933 struct pci_map *
2934 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2935 {
2936 	struct pci_devinfo *dinfo;
2937 	struct pci_map *pm, *prev;
2938 
2939 	dinfo = device_get_ivars(dev);
2940 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2941 	pm->pm_reg = reg;
2942 	pm->pm_value = value;
2943 	pm->pm_size = size;
2944 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2945 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2946 		    reg));
2947 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2948 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2949 			break;
2950 	}
2951 	if (prev != NULL)
2952 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2953 	else
2954 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2955 	return (pm);
2956 }
2957 
2958 static void
2959 pci_restore_bars(device_t dev)
2960 {
2961 	struct pci_devinfo *dinfo;
2962 	struct pci_map *pm;
2963 	int ln2range;
2964 
2965 	dinfo = device_get_ivars(dev);
2966 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2967 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2968 			ln2range = 32;
2969 		else
2970 			ln2range = pci_maprange(pm->pm_value);
2971 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2972 		if (ln2range == 64)
2973 			pci_write_config(dev, pm->pm_reg + 4,
2974 			    pm->pm_value >> 32, 4);
2975 	}
2976 }
2977 
2978 /*
2979  * Add a resource based on a pci map register. Return 1 if the map
2980  * register is a 32bit map register or 2 if it is a 64bit register.
2981  */
2982 static int
2983 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2984     int force, int prefetch)
2985 {
2986 	struct pci_map *pm;
2987 	pci_addr_t base, map, testval;
2988 	pci_addr_t start, end, count;
2989 	int barlen, basezero, flags, maprange, mapsize, type;
2990 	uint16_t cmd;
2991 	struct resource *res;
2992 
2993 	/*
2994 	 * The BAR may already exist if the device is a CardBus card
2995 	 * whose CIS is stored in this BAR.
2996 	 */
2997 	pm = pci_find_bar(dev, reg);
2998 	if (pm != NULL) {
2999 		maprange = pci_maprange(pm->pm_value);
3000 		barlen = maprange == 64 ? 2 : 1;
3001 		return (barlen);
3002 	}
3003 
3004 	pci_read_bar(dev, reg, &map, &testval, NULL);
3005 	if (PCI_BAR_MEM(map)) {
3006 		type = SYS_RES_MEMORY;
3007 		if (map & PCIM_BAR_MEM_PREFETCH)
3008 			prefetch = 1;
3009 	} else
3010 		type = SYS_RES_IOPORT;
3011 	mapsize = pci_mapsize(testval);
3012 	base = pci_mapbase(map);
3013 #ifdef __PCI_BAR_ZERO_VALID
3014 	basezero = 0;
3015 #else
3016 	basezero = base == 0;
3017 #endif
3018 	maprange = pci_maprange(map);
3019 	barlen = maprange == 64 ? 2 : 1;
3020 
3021 	/*
3022 	 * For I/O registers, if bottom bit is set, and the next bit up
3023 	 * isn't clear, we know we have a BAR that doesn't conform to the
3024 	 * spec, so ignore it.  Also, sanity check the size of the data
3025 	 * areas to the type of memory involved.  Memory must be at least
3026 	 * 16 bytes in size, while I/O ranges must be at least 4.
3027 	 */
3028 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
3029 		return (barlen);
3030 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
3031 	    (type == SYS_RES_IOPORT && mapsize < 2))
3032 		return (barlen);
3033 
3034 	/* Save a record of this BAR. */
3035 	pm = pci_add_bar(dev, reg, map, mapsize);
3036 	if (bootverbose) {
3037 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
3038 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
3039 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
3040 			printf(", port disabled\n");
3041 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
3042 			printf(", memory disabled\n");
3043 		else
3044 			printf(", enabled\n");
3045 	}
3046 
3047 	/*
3048 	 * If base is 0, then we have problems if this architecture does
3049 	 * not allow that.  It is best to ignore such entries for the
3050 	 * moment.  These will be allocated later if the driver specifically
3051 	 * requests them.  However, some removable busses look better when
3052 	 * all resources are allocated, so allow '0' to be overriden.
3053 	 *
3054 	 * Similarly treat maps whose values is the same as the test value
3055 	 * read back.  These maps have had all f's written to them by the
3056 	 * BIOS in an attempt to disable the resources.
3057 	 */
3058 	if (!force && (basezero || map == testval))
3059 		return (barlen);
3060 	if ((u_long)base != base) {
3061 		device_printf(bus,
3062 		    "pci%d:%d:%d:%d bar %#x too many address bits",
3063 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
3064 		    pci_get_function(dev), reg);
3065 		return (barlen);
3066 	}
3067 
3068 	/*
3069 	 * This code theoretically does the right thing, but has
3070 	 * undesirable side effects in some cases where peripherals
3071 	 * respond oddly to having these bits enabled.  Let the user
3072 	 * be able to turn them off (since pci_enable_io_modes is 1 by
3073 	 * default).
3074 	 */
3075 	if (pci_enable_io_modes) {
3076 		/* Turn on resources that have been left off by a lazy BIOS */
3077 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
3078 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3079 			cmd |= PCIM_CMD_PORTEN;
3080 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3081 		}
3082 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
3083 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3084 			cmd |= PCIM_CMD_MEMEN;
3085 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3086 		}
3087 	} else {
3088 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
3089 			return (barlen);
3090 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
3091 			return (barlen);
3092 	}
3093 
3094 	count = (pci_addr_t)1 << mapsize;
3095 	flags = RF_ALIGNMENT_LOG2(mapsize);
3096 	if (prefetch)
3097 		flags |= RF_PREFETCHABLE;
3098 	if (basezero || base == pci_mapbase(testval) || pci_clear_bars) {
3099 		start = 0;	/* Let the parent decide. */
3100 		end = ~0;
3101 	} else {
3102 		start = base;
3103 		end = base + count - 1;
3104 	}
3105 	resource_list_add(rl, type, reg, start, end, count);
3106 
3107 	/*
3108 	 * Try to allocate the resource for this BAR from our parent
3109 	 * so that this resource range is already reserved.  The
3110 	 * driver for this device will later inherit this resource in
3111 	 * pci_alloc_resource().
3112 	 */
3113 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
3114 	    flags);
3115 	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0)) {
3116 		/*
3117 		 * If the allocation fails, try to allocate a resource for
3118 		 * this BAR using any available range.  The firmware felt
3119 		 * it was important enough to assign a resource, so don't
3120 		 * disable decoding if we can help it.
3121 		 */
3122 		resource_list_delete(rl, type, reg);
3123 		resource_list_add(rl, type, reg, 0, ~0, count);
3124 		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0,
3125 		    count, flags);
3126 	}
3127 	if (res == NULL) {
3128 		/*
3129 		 * If the allocation fails, delete the resource list entry
3130 		 * and disable decoding for this device.
3131 		 *
3132 		 * If the driver requests this resource in the future,
3133 		 * pci_reserve_map() will try to allocate a fresh
3134 		 * resource range.
3135 		 */
3136 		resource_list_delete(rl, type, reg);
3137 		pci_disable_io(dev, type);
3138 		if (bootverbose)
3139 			device_printf(bus,
3140 			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
3141 			    pci_get_domain(dev), pci_get_bus(dev),
3142 			    pci_get_slot(dev), pci_get_function(dev), reg);
3143 	} else {
3144 		start = rman_get_start(res);
3145 		pci_write_bar(dev, pm, start);
3146 	}
3147 	return (barlen);
3148 }
3149 
3150 /*
3151  * For ATA devices we need to decide early what addressing mode to use.
3152  * Legacy demands that the primary and secondary ATA ports sits on the
3153  * same addresses that old ISA hardware did. This dictates that we use
3154  * those addresses and ignore the BAR's if we cannot set PCI native
3155  * addressing mode.
3156  */
3157 static void
3158 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
3159     uint32_t prefetchmask)
3160 {
3161 	int rid, type, progif;
3162 #if 0
3163 	/* if this device supports PCI native addressing use it */
3164 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3165 	if ((progif & 0x8a) == 0x8a) {
3166 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
3167 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
3168 			printf("Trying ATA native PCI addressing mode\n");
3169 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
3170 		}
3171 	}
3172 #endif
3173 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3174 	type = SYS_RES_IOPORT;
3175 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
3176 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
3177 		    prefetchmask & (1 << 0));
3178 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
3179 		    prefetchmask & (1 << 1));
3180 	} else {
3181 		rid = PCIR_BAR(0);
3182 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
3183 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
3184 		    0x1f7, 8, 0);
3185 		rid = PCIR_BAR(1);
3186 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
3187 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
3188 		    0x3f6, 1, 0);
3189 	}
3190 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
3191 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
3192 		    prefetchmask & (1 << 2));
3193 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
3194 		    prefetchmask & (1 << 3));
3195 	} else {
3196 		rid = PCIR_BAR(2);
3197 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
3198 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
3199 		    0x177, 8, 0);
3200 		rid = PCIR_BAR(3);
3201 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
3202 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
3203 		    0x376, 1, 0);
3204 	}
3205 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
3206 	    prefetchmask & (1 << 4));
3207 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
3208 	    prefetchmask & (1 << 5));
3209 }
3210 
3211 static void
3212 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
3213 {
3214 	struct pci_devinfo *dinfo = device_get_ivars(dev);
3215 	pcicfgregs *cfg = &dinfo->cfg;
3216 	char tunable_name[64];
3217 	int irq;
3218 
3219 	/* Has to have an intpin to have an interrupt. */
3220 	if (cfg->intpin == 0)
3221 		return;
3222 
3223 	/* Let the user override the IRQ with a tunable. */
3224 	irq = PCI_INVALID_IRQ;
3225 	snprintf(tunable_name, sizeof(tunable_name),
3226 	    "hw.pci%d.%d.%d.INT%c.irq",
3227 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
3228 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
3229 		irq = PCI_INVALID_IRQ;
3230 
3231 	/*
3232 	 * If we didn't get an IRQ via the tunable, then we either use the
3233 	 * IRQ value in the intline register or we ask the bus to route an
3234 	 * interrupt for us.  If force_route is true, then we only use the
3235 	 * value in the intline register if the bus was unable to assign an
3236 	 * IRQ.
3237 	 */
3238 	if (!PCI_INTERRUPT_VALID(irq)) {
3239 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
3240 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
3241 		if (!PCI_INTERRUPT_VALID(irq))
3242 			irq = cfg->intline;
3243 	}
3244 
3245 	/* If after all that we don't have an IRQ, just bail. */
3246 	if (!PCI_INTERRUPT_VALID(irq))
3247 		return;
3248 
3249 	/* Update the config register if it changed. */
3250 	if (irq != cfg->intline) {
3251 		cfg->intline = irq;
3252 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
3253 	}
3254 
3255 	/* Add this IRQ as rid 0 interrupt resource. */
3256 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3257 }
3258 
3259 /* Perform early OHCI takeover from SMM. */
3260 static void
3261 ohci_early_takeover(device_t self)
3262 {
3263 	struct resource *res;
3264 	uint32_t ctl;
3265 	int rid;
3266 	int i;
3267 
3268 	rid = PCIR_BAR(0);
3269 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3270 	if (res == NULL)
3271 		return;
3272 
3273 	ctl = bus_read_4(res, OHCI_CONTROL);
3274 	if (ctl & OHCI_IR) {
3275 		if (bootverbose)
3276 			printf("ohci early: "
3277 			    "SMM active, request owner change\n");
3278 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3279 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3280 			DELAY(1000);
3281 			ctl = bus_read_4(res, OHCI_CONTROL);
3282 		}
3283 		if (ctl & OHCI_IR) {
3284 			if (bootverbose)
3285 				printf("ohci early: "
3286 				    "SMM does not respond, resetting\n");
3287 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3288 		}
3289 		/* Disable interrupts */
3290 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3291 	}
3292 
3293 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3294 }
3295 
3296 /* Perform early UHCI takeover from SMM. */
3297 static void
3298 uhci_early_takeover(device_t self)
3299 {
3300 	struct resource *res;
3301 	int rid;
3302 
3303 	/*
3304 	 * Set the PIRQD enable bit and switch off all the others. We don't
3305 	 * want legacy support to interfere with us XXX Does this also mean
3306 	 * that the BIOS won't touch the keyboard anymore if it is connected
3307 	 * to the ports of the root hub?
3308 	 */
3309 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3310 
3311 	/* Disable interrupts */
3312 	rid = PCI_UHCI_BASE_REG;
3313 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3314 	if (res != NULL) {
3315 		bus_write_2(res, UHCI_INTR, 0);
3316 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3317 	}
3318 }
3319 
3320 /* Perform early EHCI takeover from SMM. */
3321 static void
3322 ehci_early_takeover(device_t self)
3323 {
3324 	struct resource *res;
3325 	uint32_t cparams;
3326 	uint32_t eec;
3327 	uint8_t eecp;
3328 	uint8_t bios_sem;
3329 	uint8_t offs;
3330 	int rid;
3331 	int i;
3332 
3333 	rid = PCIR_BAR(0);
3334 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3335 	if (res == NULL)
3336 		return;
3337 
3338 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3339 
3340 	/* Synchronise with the BIOS if it owns the controller. */
3341 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3342 	    eecp = EHCI_EECP_NEXT(eec)) {
3343 		eec = pci_read_config(self, eecp, 4);
3344 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3345 			continue;
3346 		}
3347 		bios_sem = pci_read_config(self, eecp +
3348 		    EHCI_LEGSUP_BIOS_SEM, 1);
3349 		if (bios_sem == 0) {
3350 			continue;
3351 		}
3352 		if (bootverbose)
3353 			printf("ehci early: "
3354 			    "SMM active, request owner change\n");
3355 
3356 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3357 
3358 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3359 			DELAY(1000);
3360 			bios_sem = pci_read_config(self, eecp +
3361 			    EHCI_LEGSUP_BIOS_SEM, 1);
3362 		}
3363 
3364 		if (bios_sem != 0) {
3365 			if (bootverbose)
3366 				printf("ehci early: "
3367 				    "SMM does not respond\n");
3368 		}
3369 		/* Disable interrupts */
3370 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3371 		bus_write_4(res, offs + EHCI_USBINTR, 0);
3372 	}
3373 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3374 }
3375 
3376 /* Perform early XHCI takeover from SMM. */
3377 static void
3378 xhci_early_takeover(device_t self)
3379 {
3380 	struct resource *res;
3381 	uint32_t cparams;
3382 	uint32_t eec;
3383 	uint8_t eecp;
3384 	uint8_t bios_sem;
3385 	uint8_t offs;
3386 	int rid;
3387 	int i;
3388 
3389 	rid = PCIR_BAR(0);
3390 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3391 	if (res == NULL)
3392 		return;
3393 
3394 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3395 
3396 	eec = -1;
3397 
3398 	/* Synchronise with the BIOS if it owns the controller. */
3399 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3400 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3401 		eec = bus_read_4(res, eecp);
3402 
3403 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3404 			continue;
3405 
3406 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3407 		if (bios_sem == 0)
3408 			continue;
3409 
3410 		if (bootverbose)
3411 			printf("xhci early: "
3412 			    "SMM active, request owner change\n");
3413 
3414 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3415 
3416 		/* wait a maximum of 5 second */
3417 
3418 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3419 			DELAY(1000);
3420 			bios_sem = bus_read_1(res, eecp +
3421 			    XHCI_XECP_BIOS_SEM);
3422 		}
3423 
3424 		if (bios_sem != 0) {
3425 			if (bootverbose)
3426 				printf("xhci early: "
3427 				    "SMM does not respond\n");
3428 		}
3429 
3430 		/* Disable interrupts */
3431 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3432 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3433 		bus_read_4(res, offs + XHCI_USBSTS);
3434 	}
3435 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3436 }
3437 
3438 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3439 static void
3440 pci_reserve_secbus(device_t bus, device_t dev, pcicfgregs *cfg,
3441     struct resource_list *rl)
3442 {
3443 	struct resource *res;
3444 	char *cp;
3445 	rman_res_t start, end, count;
3446 	int rid, sec_bus, sec_reg, sub_bus, sub_reg, sup_bus;
3447 
3448 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3449 	case PCIM_HDRTYPE_BRIDGE:
3450 		sec_reg = PCIR_SECBUS_1;
3451 		sub_reg = PCIR_SUBBUS_1;
3452 		break;
3453 	case PCIM_HDRTYPE_CARDBUS:
3454 		sec_reg = PCIR_SECBUS_2;
3455 		sub_reg = PCIR_SUBBUS_2;
3456 		break;
3457 	default:
3458 		return;
3459 	}
3460 
3461 	/*
3462 	 * If the existing bus range is valid, attempt to reserve it
3463 	 * from our parent.  If this fails for any reason, clear the
3464 	 * secbus and subbus registers.
3465 	 *
3466 	 * XXX: Should we reset sub_bus to sec_bus if it is < sec_bus?
3467 	 * This would at least preserve the existing sec_bus if it is
3468 	 * valid.
3469 	 */
3470 	sec_bus = PCI_READ_CONFIG(bus, dev, sec_reg, 1);
3471 	sub_bus = PCI_READ_CONFIG(bus, dev, sub_reg, 1);
3472 
3473 	/* Quirk handling. */
3474 	switch (pci_get_devid(dev)) {
3475 	case 0x12258086:		/* Intel 82454KX/GX (Orion) */
3476 		sup_bus = pci_read_config(dev, 0x41, 1);
3477 		if (sup_bus != 0xff) {
3478 			sec_bus = sup_bus + 1;
3479 			sub_bus = sup_bus + 1;
3480 			PCI_WRITE_CONFIG(bus, dev, sec_reg, sec_bus, 1);
3481 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3482 		}
3483 		break;
3484 
3485 	case 0x00dd10de:
3486 		/* Compaq R3000 BIOS sets wrong subordinate bus number. */
3487 		if ((cp = kern_getenv("smbios.planar.maker")) == NULL)
3488 			break;
3489 		if (strncmp(cp, "Compal", 6) != 0) {
3490 			freeenv(cp);
3491 			break;
3492 		}
3493 		freeenv(cp);
3494 		if ((cp = kern_getenv("smbios.planar.product")) == NULL)
3495 			break;
3496 		if (strncmp(cp, "08A0", 4) != 0) {
3497 			freeenv(cp);
3498 			break;
3499 		}
3500 		freeenv(cp);
3501 		if (sub_bus < 0xa) {
3502 			sub_bus = 0xa;
3503 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3504 		}
3505 		break;
3506 	}
3507 
3508 	if (bootverbose)
3509 		printf("\tsecbus=%d, subbus=%d\n", sec_bus, sub_bus);
3510 	if (sec_bus > 0 && sub_bus >= sec_bus) {
3511 		start = sec_bus;
3512 		end = sub_bus;
3513 		count = end - start + 1;
3514 
3515 		resource_list_add(rl, PCI_RES_BUS, 0, 0, ~0, count);
3516 
3517 		/*
3518 		 * If requested, clear secondary bus registers in
3519 		 * bridge devices to force a complete renumbering
3520 		 * rather than reserving the existing range.  However,
3521 		 * preserve the existing size.
3522 		 */
3523 		if (pci_clear_buses)
3524 			goto clear;
3525 
3526 		rid = 0;
3527 		res = resource_list_reserve(rl, bus, dev, PCI_RES_BUS, &rid,
3528 		    start, end, count, 0);
3529 		if (res != NULL)
3530 			return;
3531 
3532 		if (bootverbose)
3533 			device_printf(bus,
3534 			    "pci%d:%d:%d:%d secbus failed to allocate\n",
3535 			    pci_get_domain(dev), pci_get_bus(dev),
3536 			    pci_get_slot(dev), pci_get_function(dev));
3537 	}
3538 
3539 clear:
3540 	PCI_WRITE_CONFIG(bus, dev, sec_reg, 0, 1);
3541 	PCI_WRITE_CONFIG(bus, dev, sub_reg, 0, 1);
3542 }
3543 
3544 static struct resource *
3545 pci_alloc_secbus(device_t dev, device_t child, int *rid, rman_res_t start,
3546     rman_res_t end, rman_res_t count, u_int flags)
3547 {
3548 	struct pci_devinfo *dinfo;
3549 	pcicfgregs *cfg;
3550 	struct resource_list *rl;
3551 	struct resource *res;
3552 	int sec_reg, sub_reg;
3553 
3554 	dinfo = device_get_ivars(child);
3555 	cfg = &dinfo->cfg;
3556 	rl = &dinfo->resources;
3557 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3558 	case PCIM_HDRTYPE_BRIDGE:
3559 		sec_reg = PCIR_SECBUS_1;
3560 		sub_reg = PCIR_SUBBUS_1;
3561 		break;
3562 	case PCIM_HDRTYPE_CARDBUS:
3563 		sec_reg = PCIR_SECBUS_2;
3564 		sub_reg = PCIR_SUBBUS_2;
3565 		break;
3566 	default:
3567 		return (NULL);
3568 	}
3569 
3570 	if (*rid != 0)
3571 		return (NULL);
3572 
3573 	if (resource_list_find(rl, PCI_RES_BUS, *rid) == NULL)
3574 		resource_list_add(rl, PCI_RES_BUS, *rid, start, end, count);
3575 	if (!resource_list_reserved(rl, PCI_RES_BUS, *rid)) {
3576 		res = resource_list_reserve(rl, dev, child, PCI_RES_BUS, rid,
3577 		    start, end, count, flags & ~RF_ACTIVE);
3578 		if (res == NULL) {
3579 			resource_list_delete(rl, PCI_RES_BUS, *rid);
3580 			device_printf(child, "allocating %ju bus%s failed\n",
3581 			    count, count == 1 ? "" : "es");
3582 			return (NULL);
3583 		}
3584 		if (bootverbose)
3585 			device_printf(child,
3586 			    "Lazy allocation of %ju bus%s at %ju\n", count,
3587 			    count == 1 ? "" : "es", rman_get_start(res));
3588 		PCI_WRITE_CONFIG(dev, child, sec_reg, rman_get_start(res), 1);
3589 		PCI_WRITE_CONFIG(dev, child, sub_reg, rman_get_end(res), 1);
3590 	}
3591 	return (resource_list_alloc(rl, dev, child, PCI_RES_BUS, rid, start,
3592 	    end, count, flags));
3593 }
3594 #endif
3595 
3596 static int
3597 pci_ea_bei_to_rid(device_t dev, int bei)
3598 {
3599 #ifdef PCI_IOV
3600 	struct pci_devinfo *dinfo;
3601 	int iov_pos;
3602 	struct pcicfg_iov *iov;
3603 
3604 	dinfo = device_get_ivars(dev);
3605 	iov = dinfo->cfg.iov;
3606 	if (iov != NULL)
3607 		iov_pos = iov->iov_pos;
3608 	else
3609 		iov_pos = 0;
3610 #endif
3611 
3612 	/* Check if matches BAR */
3613 	if ((bei >= PCIM_EA_BEI_BAR_0) &&
3614 	    (bei <= PCIM_EA_BEI_BAR_5))
3615 		return (PCIR_BAR(bei));
3616 
3617 	/* Check ROM */
3618 	if (bei == PCIM_EA_BEI_ROM)
3619 		return (PCIR_BIOS);
3620 
3621 #ifdef PCI_IOV
3622 	/* Check if matches VF_BAR */
3623 	if ((iov != NULL) && (bei >= PCIM_EA_BEI_VF_BAR_0) &&
3624 	    (bei <= PCIM_EA_BEI_VF_BAR_5))
3625 		return (PCIR_SRIOV_BAR(bei - PCIM_EA_BEI_VF_BAR_0) +
3626 		    iov_pos);
3627 #endif
3628 
3629 	return (-1);
3630 }
3631 
3632 int
3633 pci_ea_is_enabled(device_t dev, int rid)
3634 {
3635 	struct pci_ea_entry *ea;
3636 	struct pci_devinfo *dinfo;
3637 
3638 	dinfo = device_get_ivars(dev);
3639 
3640 	STAILQ_FOREACH(ea, &dinfo->cfg.ea.ea_entries, eae_link) {
3641 		if (pci_ea_bei_to_rid(dev, ea->eae_bei) == rid)
3642 			return ((ea->eae_flags & PCIM_EA_ENABLE) > 0);
3643 	}
3644 
3645 	return (0);
3646 }
3647 
3648 void
3649 pci_add_resources_ea(device_t bus, device_t dev, int alloc_iov)
3650 {
3651 	struct pci_ea_entry *ea;
3652 	struct pci_devinfo *dinfo;
3653 	pci_addr_t start, end, count;
3654 	struct resource_list *rl;
3655 	int type, flags, rid;
3656 	struct resource *res;
3657 	uint32_t tmp;
3658 #ifdef PCI_IOV
3659 	struct pcicfg_iov *iov;
3660 #endif
3661 
3662 	dinfo = device_get_ivars(dev);
3663 	rl = &dinfo->resources;
3664 	flags = 0;
3665 
3666 #ifdef PCI_IOV
3667 	iov = dinfo->cfg.iov;
3668 #endif
3669 
3670 	if (dinfo->cfg.ea.ea_location == 0)
3671 		return;
3672 
3673 	STAILQ_FOREACH(ea, &dinfo->cfg.ea.ea_entries, eae_link) {
3674 
3675 		/*
3676 		 * TODO: Ignore EA-BAR if is not enabled.
3677 		 *   Currently the EA implementation supports
3678 		 *   only situation, where EA structure contains
3679 		 *   predefined entries. In case they are not enabled
3680 		 *   leave them unallocated and proceed with
3681 		 *   a legacy-BAR mechanism.
3682 		 */
3683 		if ((ea->eae_flags & PCIM_EA_ENABLE) == 0)
3684 			continue;
3685 
3686 		switch ((ea->eae_flags & PCIM_EA_PP) >> PCIM_EA_PP_OFFSET) {
3687 		case PCIM_EA_P_MEM_PREFETCH:
3688 		case PCIM_EA_P_VF_MEM_PREFETCH:
3689 			flags = RF_PREFETCHABLE;
3690 			/* FALLTHROUGH */
3691 		case PCIM_EA_P_VF_MEM:
3692 		case PCIM_EA_P_MEM:
3693 			type = SYS_RES_MEMORY;
3694 			break;
3695 		case PCIM_EA_P_IO:
3696 			type = SYS_RES_IOPORT;
3697 			break;
3698 		default:
3699 			continue;
3700 		}
3701 
3702 		if (alloc_iov != 0) {
3703 #ifdef PCI_IOV
3704 			/* Allocating IOV, confirm BEI matches */
3705 			if ((ea->eae_bei < PCIM_EA_BEI_VF_BAR_0) ||
3706 			    (ea->eae_bei > PCIM_EA_BEI_VF_BAR_5))
3707 				continue;
3708 #else
3709 			continue;
3710 #endif
3711 		} else {
3712 			/* Allocating BAR, confirm BEI matches */
3713 			if (((ea->eae_bei < PCIM_EA_BEI_BAR_0) ||
3714 			    (ea->eae_bei > PCIM_EA_BEI_BAR_5)) &&
3715 			    (ea->eae_bei != PCIM_EA_BEI_ROM))
3716 				continue;
3717 		}
3718 
3719 		rid = pci_ea_bei_to_rid(dev, ea->eae_bei);
3720 		if (rid < 0)
3721 			continue;
3722 
3723 		/* Skip resources already allocated by EA */
3724 		if ((resource_list_find(rl, SYS_RES_MEMORY, rid) != NULL) ||
3725 		    (resource_list_find(rl, SYS_RES_IOPORT, rid) != NULL))
3726 			continue;
3727 
3728 		start = ea->eae_base;
3729 		count = ea->eae_max_offset + 1;
3730 #ifdef PCI_IOV
3731 		if (iov != NULL)
3732 			count = count * iov->iov_num_vfs;
3733 #endif
3734 		end = start + count - 1;
3735 		if (count == 0)
3736 			continue;
3737 
3738 		resource_list_add(rl, type, rid, start, end, count);
3739 		res = resource_list_reserve(rl, bus, dev, type, &rid, start, end, count,
3740 		    flags);
3741 		if (res == NULL) {
3742 			resource_list_delete(rl, type, rid);
3743 
3744 			/*
3745 			 * Failed to allocate using EA, disable entry.
3746 			 * Another attempt to allocation will be performed
3747 			 * further, but this time using legacy BAR registers
3748 			 */
3749 			tmp = pci_read_config(dev, ea->eae_cfg_offset, 4);
3750 			tmp &= ~PCIM_EA_ENABLE;
3751 			pci_write_config(dev, ea->eae_cfg_offset, tmp, 4);
3752 
3753 			/*
3754 			 * Disabling entry might fail in case it is hardwired.
3755 			 * Read flags again to match current status.
3756 			 */
3757 			ea->eae_flags = pci_read_config(dev, ea->eae_cfg_offset, 4);
3758 
3759 			continue;
3760 		}
3761 
3762 		/* As per specification, fill BAR with zeros */
3763 		pci_write_config(dev, rid, 0, 4);
3764 	}
3765 }
3766 
3767 void
3768 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3769 {
3770 	struct pci_devinfo *dinfo;
3771 	pcicfgregs *cfg;
3772 	struct resource_list *rl;
3773 	const struct pci_quirk *q;
3774 	uint32_t devid;
3775 	int i;
3776 
3777 	dinfo = device_get_ivars(dev);
3778 	cfg = &dinfo->cfg;
3779 	rl = &dinfo->resources;
3780 	devid = (cfg->device << 16) | cfg->vendor;
3781 
3782 	/* Allocate resources using Enhanced Allocation */
3783 	pci_add_resources_ea(bus, dev, 0);
3784 
3785 	/* ATA devices needs special map treatment */
3786 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3787 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3788 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3789 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3790 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3791 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3792 	else
3793 		for (i = 0; i < cfg->nummaps;) {
3794 			/* Skip resources already managed by EA */
3795 			if ((resource_list_find(rl, SYS_RES_MEMORY, PCIR_BAR(i)) != NULL) ||
3796 			    (resource_list_find(rl, SYS_RES_IOPORT, PCIR_BAR(i)) != NULL) ||
3797 			    pci_ea_is_enabled(dev, PCIR_BAR(i))) {
3798 				i++;
3799 				continue;
3800 			}
3801 
3802 			/*
3803 			 * Skip quirked resources.
3804 			 */
3805 			for (q = &pci_quirks[0]; q->devid != 0; q++)
3806 				if (q->devid == devid &&
3807 				    q->type == PCI_QUIRK_UNMAP_REG &&
3808 				    q->arg1 == PCIR_BAR(i))
3809 					break;
3810 			if (q->devid != 0) {
3811 				i++;
3812 				continue;
3813 			}
3814 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3815 			    prefetchmask & (1 << i));
3816 		}
3817 
3818 	/*
3819 	 * Add additional, quirked resources.
3820 	 */
3821 	for (q = &pci_quirks[0]; q->devid != 0; q++)
3822 		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3823 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3824 
3825 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3826 #ifdef __PCI_REROUTE_INTERRUPT
3827 		/*
3828 		 * Try to re-route interrupts. Sometimes the BIOS or
3829 		 * firmware may leave bogus values in these registers.
3830 		 * If the re-route fails, then just stick with what we
3831 		 * have.
3832 		 */
3833 		pci_assign_interrupt(bus, dev, 1);
3834 #else
3835 		pci_assign_interrupt(bus, dev, 0);
3836 #endif
3837 	}
3838 
3839 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3840 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3841 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3842 			xhci_early_takeover(dev);
3843 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3844 			ehci_early_takeover(dev);
3845 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3846 			ohci_early_takeover(dev);
3847 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3848 			uhci_early_takeover(dev);
3849 	}
3850 
3851 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3852 	/*
3853 	 * Reserve resources for secondary bus ranges behind bridge
3854 	 * devices.
3855 	 */
3856 	pci_reserve_secbus(bus, dev, cfg, rl);
3857 #endif
3858 }
3859 
3860 static struct pci_devinfo *
3861 pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
3862     int slot, int func)
3863 {
3864 	struct pci_devinfo *dinfo;
3865 
3866 	dinfo = pci_read_device(pcib, dev, domain, busno, slot, func);
3867 	if (dinfo != NULL)
3868 		pci_add_child(dev, dinfo);
3869 
3870 	return (dinfo);
3871 }
3872 
3873 void
3874 pci_add_children(device_t dev, int domain, int busno)
3875 {
3876 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3877 	device_t pcib = device_get_parent(dev);
3878 	struct pci_devinfo *dinfo;
3879 	int maxslots;
3880 	int s, f, pcifunchigh;
3881 	uint8_t hdrtype;
3882 	int first_func;
3883 
3884 	/*
3885 	 * Try to detect a device at slot 0, function 0.  If it exists, try to
3886 	 * enable ARI.  We must enable ARI before detecting the rest of the
3887 	 * functions on this bus as ARI changes the set of slots and functions
3888 	 * that are legal on this bus.
3889 	 */
3890 	dinfo = pci_identify_function(pcib, dev, domain, busno, 0, 0);
3891 	if (dinfo != NULL && pci_enable_ari)
3892 		PCIB_TRY_ENABLE_ARI(pcib, dinfo->cfg.dev);
3893 
3894 	/*
3895 	 * Start looking for new devices on slot 0 at function 1 because we
3896 	 * just identified the device at slot 0, function 0.
3897 	 */
3898 	first_func = 1;
3899 
3900 	maxslots = PCIB_MAXSLOTS(pcib);
3901 	for (s = 0; s <= maxslots; s++, first_func = 0) {
3902 		pcifunchigh = 0;
3903 		f = 0;
3904 		DELAY(1);
3905 		hdrtype = REG(PCIR_HDRTYPE, 1);
3906 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3907 			continue;
3908 		if (hdrtype & PCIM_MFDEV)
3909 			pcifunchigh = PCIB_MAXFUNCS(pcib);
3910 		for (f = first_func; f <= pcifunchigh; f++)
3911 			pci_identify_function(pcib, dev, domain, busno, s, f);
3912 	}
3913 #undef REG
3914 }
3915 
3916 int
3917 pci_rescan_method(device_t dev)
3918 {
3919 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3920 	device_t pcib = device_get_parent(dev);
3921 	struct pci_softc *sc;
3922 	device_t child, *devlist, *unchanged;
3923 	int devcount, error, i, j, maxslots, oldcount;
3924 	int busno, domain, s, f, pcifunchigh;
3925 	uint8_t hdrtype;
3926 
3927 	/* No need to check for ARI on a rescan. */
3928 	error = device_get_children(dev, &devlist, &devcount);
3929 	if (error)
3930 		return (error);
3931 	if (devcount != 0) {
3932 		unchanged = malloc(devcount * sizeof(device_t), M_TEMP,
3933 		    M_NOWAIT | M_ZERO);
3934 		if (unchanged == NULL) {
3935 			free(devlist, M_TEMP);
3936 			return (ENOMEM);
3937 		}
3938 	} else
3939 		unchanged = NULL;
3940 
3941 	sc = device_get_softc(dev);
3942 	domain = pcib_get_domain(dev);
3943 	busno = pcib_get_bus(dev);
3944 	maxslots = PCIB_MAXSLOTS(pcib);
3945 	for (s = 0; s <= maxslots; s++) {
3946 		/* If function 0 is not present, skip to the next slot. */
3947 		f = 0;
3948 		if (REG(PCIR_VENDOR, 2) == 0xffff)
3949 			continue;
3950 		pcifunchigh = 0;
3951 		hdrtype = REG(PCIR_HDRTYPE, 1);
3952 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3953 			continue;
3954 		if (hdrtype & PCIM_MFDEV)
3955 			pcifunchigh = PCIB_MAXFUNCS(pcib);
3956 		for (f = 0; f <= pcifunchigh; f++) {
3957 			if (REG(PCIR_VENDOR, 2) == 0xfff)
3958 				continue;
3959 
3960 			/*
3961 			 * Found a valid function.  Check if a
3962 			 * device_t for this device already exists.
3963 			 */
3964 			for (i = 0; i < devcount; i++) {
3965 				child = devlist[i];
3966 				if (child == NULL)
3967 					continue;
3968 				if (pci_get_slot(child) == s &&
3969 				    pci_get_function(child) == f) {
3970 					unchanged[i] = child;
3971 					goto next_func;
3972 				}
3973 			}
3974 
3975 			pci_identify_function(pcib, dev, domain, busno, s, f);
3976 		next_func:;
3977 		}
3978 	}
3979 
3980 	/* Remove devices that are no longer present. */
3981 	for (i = 0; i < devcount; i++) {
3982 		if (unchanged[i] != NULL)
3983 			continue;
3984 		device_delete_child(dev, devlist[i]);
3985 	}
3986 
3987 	free(devlist, M_TEMP);
3988 	oldcount = devcount;
3989 
3990 	/* Try to attach the devices just added. */
3991 	error = device_get_children(dev, &devlist, &devcount);
3992 	if (error) {
3993 		free(unchanged, M_TEMP);
3994 		return (error);
3995 	}
3996 
3997 	for (i = 0; i < devcount; i++) {
3998 		for (j = 0; j < oldcount; j++) {
3999 			if (devlist[i] == unchanged[j])
4000 				goto next_device;
4001 		}
4002 
4003 		device_probe_and_attach(devlist[i]);
4004 	next_device:;
4005 	}
4006 
4007 	free(unchanged, M_TEMP);
4008 	free(devlist, M_TEMP);
4009 	return (0);
4010 #undef REG
4011 }
4012 
4013 #ifdef PCI_IOV
4014 device_t
4015 pci_add_iov_child(device_t bus, device_t pf, uint16_t rid, uint16_t vid,
4016     uint16_t did)
4017 {
4018 	struct pci_devinfo *pf_dinfo, *vf_dinfo;
4019 	device_t pcib;
4020 	int busno, slot, func;
4021 
4022 	pf_dinfo = device_get_ivars(pf);
4023 
4024 	pcib = device_get_parent(bus);
4025 
4026 	PCIB_DECODE_RID(pcib, rid, &busno, &slot, &func);
4027 
4028 	vf_dinfo = pci_fill_devinfo(pcib, bus, pci_get_domain(pcib), busno,
4029 	    slot, func, vid, did);
4030 
4031 	vf_dinfo->cfg.flags |= PCICFG_VF;
4032 	pci_add_child(bus, vf_dinfo);
4033 
4034 	return (vf_dinfo->cfg.dev);
4035 }
4036 
4037 device_t
4038 pci_create_iov_child_method(device_t bus, device_t pf, uint16_t rid,
4039     uint16_t vid, uint16_t did)
4040 {
4041 
4042 	return (pci_add_iov_child(bus, pf, rid, vid, did));
4043 }
4044 #endif
4045 
4046 void
4047 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
4048 {
4049 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
4050 	device_set_ivars(dinfo->cfg.dev, dinfo);
4051 	resource_list_init(&dinfo->resources);
4052 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
4053 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
4054 	pci_print_verbose(dinfo);
4055 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
4056 	pci_child_added(dinfo->cfg.dev);
4057 }
4058 
4059 void
4060 pci_child_added_method(device_t dev, device_t child)
4061 {
4062 
4063 }
4064 
4065 static int
4066 pci_probe(device_t dev)
4067 {
4068 
4069 	device_set_desc(dev, "PCI bus");
4070 
4071 	/* Allow other subclasses to override this driver. */
4072 	return (BUS_PROBE_GENERIC);
4073 }
4074 
4075 int
4076 pci_attach_common(device_t dev)
4077 {
4078 	struct pci_softc *sc;
4079 	int busno, domain;
4080 #ifdef PCI_DMA_BOUNDARY
4081 	int error, tag_valid;
4082 #endif
4083 #ifdef PCI_RES_BUS
4084 	int rid;
4085 #endif
4086 
4087 	sc = device_get_softc(dev);
4088 	domain = pcib_get_domain(dev);
4089 	busno = pcib_get_bus(dev);
4090 #ifdef PCI_RES_BUS
4091 	rid = 0;
4092 	sc->sc_bus = bus_alloc_resource(dev, PCI_RES_BUS, &rid, busno, busno,
4093 	    1, 0);
4094 	if (sc->sc_bus == NULL) {
4095 		device_printf(dev, "failed to allocate bus number\n");
4096 		return (ENXIO);
4097 	}
4098 #endif
4099 	if (bootverbose)
4100 		device_printf(dev, "domain=%d, physical bus=%d\n",
4101 		    domain, busno);
4102 #ifdef PCI_DMA_BOUNDARY
4103 	tag_valid = 0;
4104 	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
4105 	    devclass_find("pci")) {
4106 		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
4107 		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
4108 		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
4109 		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
4110 		if (error)
4111 			device_printf(dev, "Failed to create DMA tag: %d\n",
4112 			    error);
4113 		else
4114 			tag_valid = 1;
4115 	}
4116 	if (!tag_valid)
4117 #endif
4118 		sc->sc_dma_tag = bus_get_dma_tag(dev);
4119 	return (0);
4120 }
4121 
4122 static int
4123 pci_attach(device_t dev)
4124 {
4125 	int busno, domain, error;
4126 
4127 	error = pci_attach_common(dev);
4128 	if (error)
4129 		return (error);
4130 
4131 	/*
4132 	 * Since there can be multiple independently numbered PCI
4133 	 * busses on systems with multiple PCI domains, we can't use
4134 	 * the unit number to decide which bus we are probing. We ask
4135 	 * the parent pcib what our domain and bus numbers are.
4136 	 */
4137 	domain = pcib_get_domain(dev);
4138 	busno = pcib_get_bus(dev);
4139 	pci_add_children(dev, domain, busno);
4140 	return (bus_generic_attach(dev));
4141 }
4142 
4143 static int
4144 pci_detach(device_t dev)
4145 {
4146 #ifdef PCI_RES_BUS
4147 	struct pci_softc *sc;
4148 #endif
4149 	int error;
4150 
4151 	error = bus_generic_detach(dev);
4152 	if (error)
4153 		return (error);
4154 #ifdef PCI_RES_BUS
4155 	sc = device_get_softc(dev);
4156 	error = bus_release_resource(dev, PCI_RES_BUS, 0, sc->sc_bus);
4157 	if (error)
4158 		return (error);
4159 #endif
4160 	return (device_delete_children(dev));
4161 }
4162 
4163 static void
4164 pci_set_power_child(device_t dev, device_t child, int state)
4165 {
4166 	device_t pcib;
4167 	int dstate;
4168 
4169 	/*
4170 	 * Set the device to the given state.  If the firmware suggests
4171 	 * a different power state, use it instead.  If power management
4172 	 * is not present, the firmware is responsible for managing
4173 	 * device power.  Skip children who aren't attached since they
4174 	 * are handled separately.
4175 	 */
4176 	pcib = device_get_parent(dev);
4177 	dstate = state;
4178 	if (device_is_attached(child) &&
4179 	    PCIB_POWER_FOR_SLEEP(pcib, child, &dstate) == 0)
4180 		pci_set_powerstate(child, dstate);
4181 }
4182 
4183 int
4184 pci_suspend_child(device_t dev, device_t child)
4185 {
4186 	struct pci_devinfo *dinfo;
4187 	int error;
4188 
4189 	dinfo = device_get_ivars(child);
4190 
4191 	/*
4192 	 * Save the PCI configuration space for the child and set the
4193 	 * device in the appropriate power state for this sleep state.
4194 	 */
4195 	pci_cfg_save(child, dinfo, 0);
4196 
4197 	/* Suspend devices before potentially powering them down. */
4198 	error = bus_generic_suspend_child(dev, child);
4199 
4200 	if (error)
4201 		return (error);
4202 
4203 	if (pci_do_power_suspend)
4204 		pci_set_power_child(dev, child, PCI_POWERSTATE_D3);
4205 
4206 	return (0);
4207 }
4208 
4209 int
4210 pci_resume_child(device_t dev, device_t child)
4211 {
4212 	struct pci_devinfo *dinfo;
4213 
4214 	if (pci_do_power_resume)
4215 		pci_set_power_child(dev, child, PCI_POWERSTATE_D0);
4216 
4217 	dinfo = device_get_ivars(child);
4218 	pci_cfg_restore(child, dinfo);
4219 	if (!device_is_attached(child))
4220 		pci_cfg_save(child, dinfo, 1);
4221 
4222 	bus_generic_resume_child(dev, child);
4223 
4224 	return (0);
4225 }
4226 
4227 int
4228 pci_resume(device_t dev)
4229 {
4230 	device_t child, *devlist;
4231 	int error, i, numdevs;
4232 
4233 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
4234 		return (error);
4235 
4236 	/*
4237 	 * Resume critical devices first, then everything else later.
4238 	 */
4239 	for (i = 0; i < numdevs; i++) {
4240 		child = devlist[i];
4241 		switch (pci_get_class(child)) {
4242 		case PCIC_DISPLAY:
4243 		case PCIC_MEMORY:
4244 		case PCIC_BRIDGE:
4245 		case PCIC_BASEPERIPH:
4246 			BUS_RESUME_CHILD(dev, child);
4247 			break;
4248 		}
4249 	}
4250 	for (i = 0; i < numdevs; i++) {
4251 		child = devlist[i];
4252 		switch (pci_get_class(child)) {
4253 		case PCIC_DISPLAY:
4254 		case PCIC_MEMORY:
4255 		case PCIC_BRIDGE:
4256 		case PCIC_BASEPERIPH:
4257 			break;
4258 		default:
4259 			BUS_RESUME_CHILD(dev, child);
4260 		}
4261 	}
4262 	free(devlist, M_TEMP);
4263 	return (0);
4264 }
4265 
4266 static void
4267 pci_load_vendor_data(void)
4268 {
4269 	caddr_t data;
4270 	void *ptr;
4271 	size_t sz;
4272 
4273 	data = preload_search_by_type("pci_vendor_data");
4274 	if (data != NULL) {
4275 		ptr = preload_fetch_addr(data);
4276 		sz = preload_fetch_size(data);
4277 		if (ptr != NULL && sz != 0) {
4278 			pci_vendordata = ptr;
4279 			pci_vendordata_size = sz;
4280 			/* terminate the database */
4281 			pci_vendordata[pci_vendordata_size] = '\n';
4282 		}
4283 	}
4284 }
4285 
4286 void
4287 pci_driver_added(device_t dev, driver_t *driver)
4288 {
4289 	int numdevs;
4290 	device_t *devlist;
4291 	device_t child;
4292 	struct pci_devinfo *dinfo;
4293 	int i;
4294 
4295 	if (bootverbose)
4296 		device_printf(dev, "driver added\n");
4297 	DEVICE_IDENTIFY(driver, dev);
4298 	if (device_get_children(dev, &devlist, &numdevs) != 0)
4299 		return;
4300 	for (i = 0; i < numdevs; i++) {
4301 		child = devlist[i];
4302 		if (device_get_state(child) != DS_NOTPRESENT)
4303 			continue;
4304 		dinfo = device_get_ivars(child);
4305 		pci_print_verbose(dinfo);
4306 		if (bootverbose)
4307 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
4308 		pci_cfg_restore(child, dinfo);
4309 		if (device_probe_and_attach(child) != 0)
4310 			pci_child_detached(dev, child);
4311 	}
4312 	free(devlist, M_TEMP);
4313 }
4314 
4315 int
4316 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
4317     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
4318 {
4319 	struct pci_devinfo *dinfo;
4320 	struct msix_table_entry *mte;
4321 	struct msix_vector *mv;
4322 	uint64_t addr;
4323 	uint32_t data;
4324 	void *cookie;
4325 	int error, rid;
4326 
4327 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
4328 	    arg, &cookie);
4329 	if (error)
4330 		return (error);
4331 
4332 	/* If this is not a direct child, just bail out. */
4333 	if (device_get_parent(child) != dev) {
4334 		*cookiep = cookie;
4335 		return(0);
4336 	}
4337 
4338 	rid = rman_get_rid(irq);
4339 	if (rid == 0) {
4340 		/* Make sure that INTx is enabled */
4341 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
4342 	} else {
4343 		/*
4344 		 * Check to see if the interrupt is MSI or MSI-X.
4345 		 * Ask our parent to map the MSI and give
4346 		 * us the address and data register values.
4347 		 * If we fail for some reason, teardown the
4348 		 * interrupt handler.
4349 		 */
4350 		dinfo = device_get_ivars(child);
4351 		if (dinfo->cfg.msi.msi_alloc > 0) {
4352 			if (dinfo->cfg.msi.msi_addr == 0) {
4353 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
4354 			    ("MSI has handlers, but vectors not mapped"));
4355 				error = PCIB_MAP_MSI(device_get_parent(dev),
4356 				    child, rman_get_start(irq), &addr, &data);
4357 				if (error)
4358 					goto bad;
4359 				dinfo->cfg.msi.msi_addr = addr;
4360 				dinfo->cfg.msi.msi_data = data;
4361 			}
4362 			if (dinfo->cfg.msi.msi_handlers == 0)
4363 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
4364 				    dinfo->cfg.msi.msi_data);
4365 			dinfo->cfg.msi.msi_handlers++;
4366 		} else {
4367 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4368 			    ("No MSI or MSI-X interrupts allocated"));
4369 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4370 			    ("MSI-X index too high"));
4371 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4372 			KASSERT(mte->mte_vector != 0, ("no message vector"));
4373 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
4374 			KASSERT(mv->mv_irq == rman_get_start(irq),
4375 			    ("IRQ mismatch"));
4376 			if (mv->mv_address == 0) {
4377 				KASSERT(mte->mte_handlers == 0,
4378 		    ("MSI-X table entry has handlers, but vector not mapped"));
4379 				error = PCIB_MAP_MSI(device_get_parent(dev),
4380 				    child, rman_get_start(irq), &addr, &data);
4381 				if (error)
4382 					goto bad;
4383 				mv->mv_address = addr;
4384 				mv->mv_data = data;
4385 			}
4386 			if (mte->mte_handlers == 0) {
4387 				pci_enable_msix(child, rid - 1, mv->mv_address,
4388 				    mv->mv_data);
4389 				pci_unmask_msix(child, rid - 1);
4390 			}
4391 			mte->mte_handlers++;
4392 		}
4393 
4394 		/*
4395 		 * Make sure that INTx is disabled if we are using MSI/MSI-X,
4396 		 * unless the device is affected by PCI_QUIRK_MSI_INTX_BUG,
4397 		 * in which case we "enable" INTx so MSI/MSI-X actually works.
4398 		 */
4399 		if (!pci_has_quirk(pci_get_devid(child),
4400 		    PCI_QUIRK_MSI_INTX_BUG))
4401 			pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4402 		else
4403 			pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
4404 	bad:
4405 		if (error) {
4406 			(void)bus_generic_teardown_intr(dev, child, irq,
4407 			    cookie);
4408 			return (error);
4409 		}
4410 	}
4411 	*cookiep = cookie;
4412 	return (0);
4413 }
4414 
4415 int
4416 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
4417     void *cookie)
4418 {
4419 	struct msix_table_entry *mte;
4420 	struct resource_list_entry *rle;
4421 	struct pci_devinfo *dinfo;
4422 	int error, rid;
4423 
4424 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
4425 		return (EINVAL);
4426 
4427 	/* If this isn't a direct child, just bail out */
4428 	if (device_get_parent(child) != dev)
4429 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
4430 
4431 	rid = rman_get_rid(irq);
4432 	if (rid == 0) {
4433 		/* Mask INTx */
4434 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4435 	} else {
4436 		/*
4437 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
4438 		 * decrement the appropriate handlers count and mask the
4439 		 * MSI-X message, or disable MSI messages if the count
4440 		 * drops to 0.
4441 		 */
4442 		dinfo = device_get_ivars(child);
4443 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
4444 		if (rle->res != irq)
4445 			return (EINVAL);
4446 		if (dinfo->cfg.msi.msi_alloc > 0) {
4447 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
4448 			    ("MSI-X index too high"));
4449 			if (dinfo->cfg.msi.msi_handlers == 0)
4450 				return (EINVAL);
4451 			dinfo->cfg.msi.msi_handlers--;
4452 			if (dinfo->cfg.msi.msi_handlers == 0)
4453 				pci_disable_msi(child);
4454 		} else {
4455 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4456 			    ("No MSI or MSI-X interrupts allocated"));
4457 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4458 			    ("MSI-X index too high"));
4459 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4460 			if (mte->mte_handlers == 0)
4461 				return (EINVAL);
4462 			mte->mte_handlers--;
4463 			if (mte->mte_handlers == 0)
4464 				pci_mask_msix(child, rid - 1);
4465 		}
4466 	}
4467 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
4468 	if (rid > 0)
4469 		KASSERT(error == 0,
4470 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
4471 	return (error);
4472 }
4473 
4474 int
4475 pci_print_child(device_t dev, device_t child)
4476 {
4477 	struct pci_devinfo *dinfo;
4478 	struct resource_list *rl;
4479 	int retval = 0;
4480 
4481 	dinfo = device_get_ivars(child);
4482 	rl = &dinfo->resources;
4483 
4484 	retval += bus_print_child_header(dev, child);
4485 
4486 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#jx");
4487 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#jx");
4488 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%jd");
4489 	if (device_get_flags(dev))
4490 		retval += printf(" flags %#x", device_get_flags(dev));
4491 
4492 	retval += printf(" at device %d.%d", pci_get_slot(child),
4493 	    pci_get_function(child));
4494 
4495 	retval += bus_print_child_domain(dev, child);
4496 	retval += bus_print_child_footer(dev, child);
4497 
4498 	return (retval);
4499 }
4500 
4501 static const struct
4502 {
4503 	int		class;
4504 	int		subclass;
4505 	int		report; /* 0 = bootverbose, 1 = always */
4506 	const char	*desc;
4507 } pci_nomatch_tab[] = {
4508 	{PCIC_OLD,		-1,			1, "old"},
4509 	{PCIC_OLD,		PCIS_OLD_NONVGA,	1, "non-VGA display device"},
4510 	{PCIC_OLD,		PCIS_OLD_VGA,		1, "VGA-compatible display device"},
4511 	{PCIC_STORAGE,		-1,			1, "mass storage"},
4512 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	1, "SCSI"},
4513 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	1, "ATA"},
4514 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	1, "floppy disk"},
4515 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	1, "IPI"},
4516 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	1, "RAID"},
4517 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	1, "ATA (ADMA)"},
4518 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	1, "SATA"},
4519 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	1, "SAS"},
4520 	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	1, "NVM"},
4521 	{PCIC_NETWORK,		-1,			1, "network"},
4522 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	1, "ethernet"},
4523 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	1, "token ring"},
4524 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	1, "fddi"},
4525 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	1, "ATM"},
4526 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	1, "ISDN"},
4527 	{PCIC_DISPLAY,		-1,			1, "display"},
4528 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	1, "VGA"},
4529 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	1, "XGA"},
4530 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	1, "3D"},
4531 	{PCIC_MULTIMEDIA,	-1,			1, "multimedia"},
4532 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	1, "video"},
4533 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	1, "audio"},
4534 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	1, "telephony"},
4535 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	1, "HDA"},
4536 	{PCIC_MEMORY,		-1,			1, "memory"},
4537 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	1, "RAM"},
4538 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	1, "flash"},
4539 	{PCIC_BRIDGE,		-1,			1, "bridge"},
4540 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	1, "HOST-PCI"},
4541 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	1, "PCI-ISA"},
4542 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	1, "PCI-EISA"},
4543 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	1, "PCI-MCA"},
4544 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	1, "PCI-PCI"},
4545 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	1, "PCI-PCMCIA"},
4546 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	1, "PCI-NuBus"},
4547 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	1, "PCI-CardBus"},
4548 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	1, "PCI-RACEway"},
4549 	{PCIC_SIMPLECOMM,	-1,			1, "simple comms"},
4550 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	1, "UART"},	/* could detect 16550 */
4551 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	1, "parallel port"},
4552 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	1, "multiport serial"},
4553 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	1, "generic modem"},
4554 	{PCIC_BASEPERIPH,	-1,			0, "base peripheral"},
4555 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	1, "interrupt controller"},
4556 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	1, "DMA controller"},
4557 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	1, "timer"},
4558 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	1, "realtime clock"},
4559 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	1, "PCI hot-plug controller"},
4560 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	1, "SD host controller"},
4561 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_IOMMU,	1, "IOMMU"},
4562 	{PCIC_INPUTDEV,		-1,			1, "input device"},
4563 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	1, "keyboard"},
4564 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,1, "digitizer"},
4565 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	1, "mouse"},
4566 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	1, "scanner"},
4567 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	1, "gameport"},
4568 	{PCIC_DOCKING,		-1,			1, "docking station"},
4569 	{PCIC_PROCESSOR,	-1,			1, "processor"},
4570 	{PCIC_SERIALBUS,	-1,			1, "serial bus"},
4571 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	1, "FireWire"},
4572 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	1, "AccessBus"},
4573 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	1, "SSA"},
4574 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	1, "USB"},
4575 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	1, "Fibre Channel"},
4576 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	0, "SMBus"},
4577 	{PCIC_WIRELESS,		-1,			1, "wireless controller"},
4578 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	1, "iRDA"},
4579 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	1, "IR"},
4580 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	1, "RF"},
4581 	{PCIC_INTELLIIO,	-1,			1, "intelligent I/O controller"},
4582 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	1, "I2O"},
4583 	{PCIC_SATCOM,		-1,			1, "satellite communication"},
4584 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		1, "sat TV"},
4585 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	1, "sat audio"},
4586 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	1, "sat voice"},
4587 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	1, "sat data"},
4588 	{PCIC_CRYPTO,		-1,			1, "encrypt/decrypt"},
4589 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	1, "network/computer crypto"},
4590 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	1, "entertainment crypto"},
4591 	{PCIC_DASP,		-1,			0, "dasp"},
4592 	{PCIC_DASP,		PCIS_DASP_DPIO,		1, "DPIO module"},
4593 	{0, 0, 0,		NULL}
4594 };
4595 
4596 void
4597 pci_probe_nomatch(device_t dev, device_t child)
4598 {
4599 	int i, report;
4600 	const char *cp, *scp;
4601 	char *device;
4602 
4603 	/*
4604 	 * Look for a listing for this device in a loaded device database.
4605 	 */
4606 	report = 1;
4607 	if ((device = pci_describe_device(child)) != NULL) {
4608 		device_printf(dev, "<%s>", device);
4609 		free(device, M_DEVBUF);
4610 	} else {
4611 		/*
4612 		 * Scan the class/subclass descriptions for a general
4613 		 * description.
4614 		 */
4615 		cp = "unknown";
4616 		scp = NULL;
4617 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
4618 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
4619 				if (pci_nomatch_tab[i].subclass == -1) {
4620 					cp = pci_nomatch_tab[i].desc;
4621 					report = pci_nomatch_tab[i].report;
4622 				} else if (pci_nomatch_tab[i].subclass ==
4623 				    pci_get_subclass(child)) {
4624 					scp = pci_nomatch_tab[i].desc;
4625 					report = pci_nomatch_tab[i].report;
4626 				}
4627 			}
4628 		}
4629 		if (report || bootverbose) {
4630 			device_printf(dev, "<%s%s%s>",
4631 			    cp ? cp : "",
4632 			    ((cp != NULL) && (scp != NULL)) ? ", " : "",
4633 			    scp ? scp : "");
4634 		}
4635 	}
4636 	if (report || bootverbose) {
4637 		printf(" at device %d.%d (no driver attached)\n",
4638 		    pci_get_slot(child), pci_get_function(child));
4639 	}
4640 	pci_cfg_save(child, device_get_ivars(child), 1);
4641 }
4642 
4643 void
4644 pci_child_detached(device_t dev, device_t child)
4645 {
4646 	struct pci_devinfo *dinfo;
4647 	struct resource_list *rl;
4648 
4649 	dinfo = device_get_ivars(child);
4650 	rl = &dinfo->resources;
4651 
4652 	/*
4653 	 * Have to deallocate IRQs before releasing any MSI messages and
4654 	 * have to release MSI messages before deallocating any memory
4655 	 * BARs.
4656 	 */
4657 	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
4658 		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
4659 	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
4660 		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
4661 		(void)pci_release_msi(child);
4662 	}
4663 	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
4664 		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
4665 	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
4666 		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
4667 #ifdef PCI_RES_BUS
4668 	if (resource_list_release_active(rl, dev, child, PCI_RES_BUS) != 0)
4669 		pci_printf(&dinfo->cfg, "Device leaked PCI bus numbers\n");
4670 #endif
4671 
4672 	pci_cfg_save(child, dinfo, 1);
4673 }
4674 
4675 /*
4676  * Parse the PCI device database, if loaded, and return a pointer to a
4677  * description of the device.
4678  *
4679  * The database is flat text formatted as follows:
4680  *
4681  * Any line not in a valid format is ignored.
4682  * Lines are terminated with newline '\n' characters.
4683  *
4684  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
4685  * the vendor name.
4686  *
4687  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
4688  * - devices cannot be listed without a corresponding VENDOR line.
4689  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
4690  * another TAB, then the device name.
4691  */
4692 
4693 /*
4694  * Assuming (ptr) points to the beginning of a line in the database,
4695  * return the vendor or device and description of the next entry.
4696  * The value of (vendor) or (device) inappropriate for the entry type
4697  * is set to -1.  Returns nonzero at the end of the database.
4698  *
4699  * Note that this is slightly unrobust in the face of corrupt data;
4700  * we attempt to safeguard against this by spamming the end of the
4701  * database with a newline when we initialise.
4702  */
4703 static int
4704 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
4705 {
4706 	char	*cp = *ptr;
4707 	int	left;
4708 
4709 	*device = -1;
4710 	*vendor = -1;
4711 	**desc = '\0';
4712 	for (;;) {
4713 		left = pci_vendordata_size - (cp - pci_vendordata);
4714 		if (left <= 0) {
4715 			*ptr = cp;
4716 			return(1);
4717 		}
4718 
4719 		/* vendor entry? */
4720 		if (*cp != '\t' &&
4721 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
4722 			break;
4723 		/* device entry? */
4724 		if (*cp == '\t' &&
4725 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
4726 			break;
4727 
4728 		/* skip to next line */
4729 		while (*cp != '\n' && left > 0) {
4730 			cp++;
4731 			left--;
4732 		}
4733 		if (*cp == '\n') {
4734 			cp++;
4735 			left--;
4736 		}
4737 	}
4738 	/* skip to next line */
4739 	while (*cp != '\n' && left > 0) {
4740 		cp++;
4741 		left--;
4742 	}
4743 	if (*cp == '\n' && left > 0)
4744 		cp++;
4745 	*ptr = cp;
4746 	return(0);
4747 }
4748 
4749 static char *
4750 pci_describe_device(device_t dev)
4751 {
4752 	int	vendor, device;
4753 	char	*desc, *vp, *dp, *line;
4754 
4755 	desc = vp = dp = NULL;
4756 
4757 	/*
4758 	 * If we have no vendor data, we can't do anything.
4759 	 */
4760 	if (pci_vendordata == NULL)
4761 		goto out;
4762 
4763 	/*
4764 	 * Scan the vendor data looking for this device
4765 	 */
4766 	line = pci_vendordata;
4767 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4768 		goto out;
4769 	for (;;) {
4770 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
4771 			goto out;
4772 		if (vendor == pci_get_vendor(dev))
4773 			break;
4774 	}
4775 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4776 		goto out;
4777 	for (;;) {
4778 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
4779 			*dp = 0;
4780 			break;
4781 		}
4782 		if (vendor != -1) {
4783 			*dp = 0;
4784 			break;
4785 		}
4786 		if (device == pci_get_device(dev))
4787 			break;
4788 	}
4789 	if (dp[0] == '\0')
4790 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
4791 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
4792 	    NULL)
4793 		sprintf(desc, "%s, %s", vp, dp);
4794 out:
4795 	if (vp != NULL)
4796 		free(vp, M_DEVBUF);
4797 	if (dp != NULL)
4798 		free(dp, M_DEVBUF);
4799 	return(desc);
4800 }
4801 
4802 int
4803 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
4804 {
4805 	struct pci_devinfo *dinfo;
4806 	pcicfgregs *cfg;
4807 
4808 	dinfo = device_get_ivars(child);
4809 	cfg = &dinfo->cfg;
4810 
4811 	switch (which) {
4812 	case PCI_IVAR_ETHADDR:
4813 		/*
4814 		 * The generic accessor doesn't deal with failure, so
4815 		 * we set the return value, then return an error.
4816 		 */
4817 		*((uint8_t **) result) = NULL;
4818 		return (EINVAL);
4819 	case PCI_IVAR_SUBVENDOR:
4820 		*result = cfg->subvendor;
4821 		break;
4822 	case PCI_IVAR_SUBDEVICE:
4823 		*result = cfg->subdevice;
4824 		break;
4825 	case PCI_IVAR_VENDOR:
4826 		*result = cfg->vendor;
4827 		break;
4828 	case PCI_IVAR_DEVICE:
4829 		*result = cfg->device;
4830 		break;
4831 	case PCI_IVAR_DEVID:
4832 		*result = (cfg->device << 16) | cfg->vendor;
4833 		break;
4834 	case PCI_IVAR_CLASS:
4835 		*result = cfg->baseclass;
4836 		break;
4837 	case PCI_IVAR_SUBCLASS:
4838 		*result = cfg->subclass;
4839 		break;
4840 	case PCI_IVAR_PROGIF:
4841 		*result = cfg->progif;
4842 		break;
4843 	case PCI_IVAR_REVID:
4844 		*result = cfg->revid;
4845 		break;
4846 	case PCI_IVAR_INTPIN:
4847 		*result = cfg->intpin;
4848 		break;
4849 	case PCI_IVAR_IRQ:
4850 		*result = cfg->intline;
4851 		break;
4852 	case PCI_IVAR_DOMAIN:
4853 		*result = cfg->domain;
4854 		break;
4855 	case PCI_IVAR_BUS:
4856 		*result = cfg->bus;
4857 		break;
4858 	case PCI_IVAR_SLOT:
4859 		*result = cfg->slot;
4860 		break;
4861 	case PCI_IVAR_FUNCTION:
4862 		*result = cfg->func;
4863 		break;
4864 	case PCI_IVAR_CMDREG:
4865 		*result = cfg->cmdreg;
4866 		break;
4867 	case PCI_IVAR_CACHELNSZ:
4868 		*result = cfg->cachelnsz;
4869 		break;
4870 	case PCI_IVAR_MINGNT:
4871 		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4872 			*result = -1;
4873 			return (EINVAL);
4874 		}
4875 		*result = cfg->mingnt;
4876 		break;
4877 	case PCI_IVAR_MAXLAT:
4878 		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4879 			*result = -1;
4880 			return (EINVAL);
4881 		}
4882 		*result = cfg->maxlat;
4883 		break;
4884 	case PCI_IVAR_LATTIMER:
4885 		*result = cfg->lattimer;
4886 		break;
4887 	default:
4888 		return (ENOENT);
4889 	}
4890 	return (0);
4891 }
4892 
4893 int
4894 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4895 {
4896 	struct pci_devinfo *dinfo;
4897 
4898 	dinfo = device_get_ivars(child);
4899 
4900 	switch (which) {
4901 	case PCI_IVAR_INTPIN:
4902 		dinfo->cfg.intpin = value;
4903 		return (0);
4904 	case PCI_IVAR_ETHADDR:
4905 	case PCI_IVAR_SUBVENDOR:
4906 	case PCI_IVAR_SUBDEVICE:
4907 	case PCI_IVAR_VENDOR:
4908 	case PCI_IVAR_DEVICE:
4909 	case PCI_IVAR_DEVID:
4910 	case PCI_IVAR_CLASS:
4911 	case PCI_IVAR_SUBCLASS:
4912 	case PCI_IVAR_PROGIF:
4913 	case PCI_IVAR_REVID:
4914 	case PCI_IVAR_IRQ:
4915 	case PCI_IVAR_DOMAIN:
4916 	case PCI_IVAR_BUS:
4917 	case PCI_IVAR_SLOT:
4918 	case PCI_IVAR_FUNCTION:
4919 		return (EINVAL);	/* disallow for now */
4920 
4921 	default:
4922 		return (ENOENT);
4923 	}
4924 }
4925 
4926 #include "opt_ddb.h"
4927 #ifdef DDB
4928 #include <ddb/ddb.h>
4929 #include <sys/cons.h>
4930 
4931 /*
4932  * List resources based on pci map registers, used for within ddb
4933  */
4934 
4935 DB_SHOW_COMMAND(pciregs, db_pci_dump)
4936 {
4937 	struct pci_devinfo *dinfo;
4938 	struct devlist *devlist_head;
4939 	struct pci_conf *p;
4940 	const char *name;
4941 	int i, error, none_count;
4942 
4943 	none_count = 0;
4944 	/* get the head of the device queue */
4945 	devlist_head = &pci_devq;
4946 
4947 	/*
4948 	 * Go through the list of devices and print out devices
4949 	 */
4950 	for (error = 0, i = 0,
4951 	     dinfo = STAILQ_FIRST(devlist_head);
4952 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4953 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4954 
4955 		/* Populate pd_name and pd_unit */
4956 		name = NULL;
4957 		if (dinfo->cfg.dev)
4958 			name = device_get_name(dinfo->cfg.dev);
4959 
4960 		p = &dinfo->conf;
4961 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4962 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4963 			(name && *name) ? name : "none",
4964 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4965 			none_count++,
4966 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4967 			p->pc_sel.pc_func, (p->pc_class << 16) |
4968 			(p->pc_subclass << 8) | p->pc_progif,
4969 			(p->pc_subdevice << 16) | p->pc_subvendor,
4970 			(p->pc_device << 16) | p->pc_vendor,
4971 			p->pc_revid, p->pc_hdr);
4972 	}
4973 }
4974 #endif /* DDB */
4975 
4976 static struct resource *
4977 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4978     rman_res_t start, rman_res_t end, rman_res_t count, u_int num,
4979     u_int flags)
4980 {
4981 	struct pci_devinfo *dinfo = device_get_ivars(child);
4982 	struct resource_list *rl = &dinfo->resources;
4983 	struct resource *res;
4984 	struct pci_map *pm;
4985 	pci_addr_t map, testval;
4986 	int mapsize;
4987 
4988 	res = NULL;
4989 
4990 	/* If rid is managed by EA, ignore it */
4991 	if (pci_ea_is_enabled(child, *rid))
4992 		goto out;
4993 
4994 	pm = pci_find_bar(child, *rid);
4995 	if (pm != NULL) {
4996 		/* This is a BAR that we failed to allocate earlier. */
4997 		mapsize = pm->pm_size;
4998 		map = pm->pm_value;
4999 	} else {
5000 		/*
5001 		 * Weed out the bogons, and figure out how large the
5002 		 * BAR/map is.  BARs that read back 0 here are bogus
5003 		 * and unimplemented.  Note: atapci in legacy mode are
5004 		 * special and handled elsewhere in the code.  If you
5005 		 * have a atapci device in legacy mode and it fails
5006 		 * here, that other code is broken.
5007 		 */
5008 		pci_read_bar(child, *rid, &map, &testval, NULL);
5009 
5010 		/*
5011 		 * Determine the size of the BAR and ignore BARs with a size
5012 		 * of 0.  Device ROM BARs use a different mask value.
5013 		 */
5014 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
5015 			mapsize = pci_romsize(testval);
5016 		else
5017 			mapsize = pci_mapsize(testval);
5018 		if (mapsize == 0)
5019 			goto out;
5020 		pm = pci_add_bar(child, *rid, map, mapsize);
5021 	}
5022 
5023 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
5024 		if (type != SYS_RES_MEMORY) {
5025 			if (bootverbose)
5026 				device_printf(dev,
5027 				    "child %s requested type %d for rid %#x,"
5028 				    " but the BAR says it is an memio\n",
5029 				    device_get_nameunit(child), type, *rid);
5030 			goto out;
5031 		}
5032 	} else {
5033 		if (type != SYS_RES_IOPORT) {
5034 			if (bootverbose)
5035 				device_printf(dev,
5036 				    "child %s requested type %d for rid %#x,"
5037 				    " but the BAR says it is an ioport\n",
5038 				    device_get_nameunit(child), type, *rid);
5039 			goto out;
5040 		}
5041 	}
5042 
5043 	/*
5044 	 * For real BARs, we need to override the size that
5045 	 * the driver requests, because that's what the BAR
5046 	 * actually uses and we would otherwise have a
5047 	 * situation where we might allocate the excess to
5048 	 * another driver, which won't work.
5049 	 */
5050 	count = ((pci_addr_t)1 << mapsize) * num;
5051 	if (RF_ALIGNMENT(flags) < mapsize)
5052 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
5053 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
5054 		flags |= RF_PREFETCHABLE;
5055 
5056 	/*
5057 	 * Allocate enough resource, and then write back the
5058 	 * appropriate BAR for that resource.
5059 	 */
5060 	resource_list_add(rl, type, *rid, start, end, count);
5061 	res = resource_list_reserve(rl, dev, child, type, rid, start, end,
5062 	    count, flags & ~RF_ACTIVE);
5063 	if (res == NULL) {
5064 		resource_list_delete(rl, type, *rid);
5065 		device_printf(child,
5066 		    "%#jx bytes of rid %#x res %d failed (%#jx, %#jx).\n",
5067 		    count, *rid, type, start, end);
5068 		goto out;
5069 	}
5070 	if (bootverbose)
5071 		device_printf(child,
5072 		    "Lazy allocation of %#jx bytes rid %#x type %d at %#jx\n",
5073 		    count, *rid, type, rman_get_start(res));
5074 	map = rman_get_start(res);
5075 	pci_write_bar(child, pm, map);
5076 out:
5077 	return (res);
5078 }
5079 
5080 struct resource *
5081 pci_alloc_multi_resource(device_t dev, device_t child, int type, int *rid,
5082     rman_res_t start, rman_res_t end, rman_res_t count, u_long num,
5083     u_int flags)
5084 {
5085 	struct pci_devinfo *dinfo;
5086 	struct resource_list *rl;
5087 	struct resource_list_entry *rle;
5088 	struct resource *res;
5089 	pcicfgregs *cfg;
5090 
5091 	/*
5092 	 * Perform lazy resource allocation
5093 	 */
5094 	dinfo = device_get_ivars(child);
5095 	rl = &dinfo->resources;
5096 	cfg = &dinfo->cfg;
5097 	switch (type) {
5098 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
5099 	case PCI_RES_BUS:
5100 		return (pci_alloc_secbus(dev, child, rid, start, end, count,
5101 		    flags));
5102 #endif
5103 	case SYS_RES_IRQ:
5104 		/*
5105 		 * Can't alloc legacy interrupt once MSI messages have
5106 		 * been allocated.
5107 		 */
5108 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
5109 		    cfg->msix.msix_alloc > 0))
5110 			return (NULL);
5111 
5112 		/*
5113 		 * If the child device doesn't have an interrupt
5114 		 * routed and is deserving of an interrupt, try to
5115 		 * assign it one.
5116 		 */
5117 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
5118 		    (cfg->intpin != 0))
5119 			pci_assign_interrupt(dev, child, 0);
5120 		break;
5121 	case SYS_RES_IOPORT:
5122 	case SYS_RES_MEMORY:
5123 #ifdef NEW_PCIB
5124 		/*
5125 		 * PCI-PCI bridge I/O window resources are not BARs.
5126 		 * For those allocations just pass the request up the
5127 		 * tree.
5128 		 */
5129 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
5130 			switch (*rid) {
5131 			case PCIR_IOBASEL_1:
5132 			case PCIR_MEMBASE_1:
5133 			case PCIR_PMBASEL_1:
5134 				/*
5135 				 * XXX: Should we bother creating a resource
5136 				 * list entry?
5137 				 */
5138 				return (bus_generic_alloc_resource(dev, child,
5139 				    type, rid, start, end, count, flags));
5140 			}
5141 		}
5142 #endif
5143 		/* Reserve resources for this BAR if needed. */
5144 		rle = resource_list_find(rl, type, *rid);
5145 		if (rle == NULL) {
5146 			res = pci_reserve_map(dev, child, type, rid, start, end,
5147 			    count, num, flags);
5148 			if (res == NULL)
5149 				return (NULL);
5150 		}
5151 	}
5152 	return (resource_list_alloc(rl, dev, child, type, rid,
5153 	    start, end, count, flags));
5154 }
5155 
5156 struct resource *
5157 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
5158     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
5159 {
5160 #ifdef PCI_IOV
5161 	struct pci_devinfo *dinfo;
5162 #endif
5163 
5164 	if (device_get_parent(child) != dev)
5165 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
5166 		    type, rid, start, end, count, flags));
5167 
5168 #ifdef PCI_IOV
5169 	dinfo = device_get_ivars(child);
5170 	if (dinfo->cfg.flags & PCICFG_VF) {
5171 		switch (type) {
5172 		/* VFs can't have I/O BARs. */
5173 		case SYS_RES_IOPORT:
5174 			return (NULL);
5175 		case SYS_RES_MEMORY:
5176 			return (pci_vf_alloc_mem_resource(dev, child, rid,
5177 			    start, end, count, flags));
5178 		}
5179 
5180 		/* Fall through for other types of resource allocations. */
5181 	}
5182 #endif
5183 
5184 	return (pci_alloc_multi_resource(dev, child, type, rid, start, end,
5185 	    count, 1, flags));
5186 }
5187 
5188 int
5189 pci_release_resource(device_t dev, device_t child, int type, int rid,
5190     struct resource *r)
5191 {
5192 	struct pci_devinfo *dinfo;
5193 	struct resource_list *rl;
5194 	pcicfgregs *cfg;
5195 
5196 	if (device_get_parent(child) != dev)
5197 		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
5198 		    type, rid, r));
5199 
5200 	dinfo = device_get_ivars(child);
5201 	cfg = &dinfo->cfg;
5202 
5203 #ifdef PCI_IOV
5204 	if (dinfo->cfg.flags & PCICFG_VF) {
5205 		switch (type) {
5206 		/* VFs can't have I/O BARs. */
5207 		case SYS_RES_IOPORT:
5208 			return (EDOOFUS);
5209 		case SYS_RES_MEMORY:
5210 			return (pci_vf_release_mem_resource(dev, child, rid,
5211 			    r));
5212 		}
5213 
5214 		/* Fall through for other types of resource allocations. */
5215 	}
5216 #endif
5217 
5218 #ifdef NEW_PCIB
5219 	/*
5220 	 * PCI-PCI bridge I/O window resources are not BARs.  For
5221 	 * those allocations just pass the request up the tree.
5222 	 */
5223 	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
5224 	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
5225 		switch (rid) {
5226 		case PCIR_IOBASEL_1:
5227 		case PCIR_MEMBASE_1:
5228 		case PCIR_PMBASEL_1:
5229 			return (bus_generic_release_resource(dev, child, type,
5230 			    rid, r));
5231 		}
5232 	}
5233 #endif
5234 
5235 	rl = &dinfo->resources;
5236 	return (resource_list_release(rl, dev, child, type, rid, r));
5237 }
5238 
5239 int
5240 pci_activate_resource(device_t dev, device_t child, int type, int rid,
5241     struct resource *r)
5242 {
5243 	struct pci_devinfo *dinfo;
5244 	int error;
5245 
5246 	error = bus_generic_activate_resource(dev, child, type, rid, r);
5247 	if (error)
5248 		return (error);
5249 
5250 	/* Enable decoding in the command register when activating BARs. */
5251 	if (device_get_parent(child) == dev) {
5252 		/* Device ROMs need their decoding explicitly enabled. */
5253 		dinfo = device_get_ivars(child);
5254 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
5255 			pci_write_bar(child, pci_find_bar(child, rid),
5256 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
5257 		switch (type) {
5258 		case SYS_RES_IOPORT:
5259 		case SYS_RES_MEMORY:
5260 			error = PCI_ENABLE_IO(dev, child, type);
5261 			break;
5262 		}
5263 	}
5264 	return (error);
5265 }
5266 
5267 int
5268 pci_deactivate_resource(device_t dev, device_t child, int type,
5269     int rid, struct resource *r)
5270 {
5271 	struct pci_devinfo *dinfo;
5272 	int error;
5273 
5274 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
5275 	if (error)
5276 		return (error);
5277 
5278 	/* Disable decoding for device ROMs. */
5279 	if (device_get_parent(child) == dev) {
5280 		dinfo = device_get_ivars(child);
5281 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
5282 			pci_write_bar(child, pci_find_bar(child, rid),
5283 			    rman_get_start(r));
5284 	}
5285 	return (0);
5286 }
5287 
5288 void
5289 pci_child_deleted(device_t dev, device_t child)
5290 {
5291 	struct resource_list_entry *rle;
5292 	struct resource_list *rl;
5293 	struct pci_devinfo *dinfo;
5294 
5295 	dinfo = device_get_ivars(child);
5296 	rl = &dinfo->resources;
5297 
5298 	/* Turn off access to resources we're about to free */
5299 	if (bus_child_present(child) != 0) {
5300 		pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
5301 		    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
5302 
5303 		pci_disable_busmaster(child);
5304 	}
5305 
5306 	/* Free all allocated resources */
5307 	STAILQ_FOREACH(rle, rl, link) {
5308 		if (rle->res) {
5309 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
5310 			    resource_list_busy(rl, rle->type, rle->rid)) {
5311 				pci_printf(&dinfo->cfg,
5312 				    "Resource still owned, oops. "
5313 				    "(type=%d, rid=%d, addr=%lx)\n",
5314 				    rle->type, rle->rid,
5315 				    rman_get_start(rle->res));
5316 				bus_release_resource(child, rle->type, rle->rid,
5317 				    rle->res);
5318 			}
5319 			resource_list_unreserve(rl, dev, child, rle->type,
5320 			    rle->rid);
5321 		}
5322 	}
5323 	resource_list_free(rl);
5324 
5325 	pci_freecfg(dinfo);
5326 }
5327 
5328 void
5329 pci_delete_resource(device_t dev, device_t child, int type, int rid)
5330 {
5331 	struct pci_devinfo *dinfo;
5332 	struct resource_list *rl;
5333 	struct resource_list_entry *rle;
5334 
5335 	if (device_get_parent(child) != dev)
5336 		return;
5337 
5338 	dinfo = device_get_ivars(child);
5339 	rl = &dinfo->resources;
5340 	rle = resource_list_find(rl, type, rid);
5341 	if (rle == NULL)
5342 		return;
5343 
5344 	if (rle->res) {
5345 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
5346 		    resource_list_busy(rl, type, rid)) {
5347 			device_printf(dev, "delete_resource: "
5348 			    "Resource still owned by child, oops. "
5349 			    "(type=%d, rid=%d, addr=%jx)\n",
5350 			    type, rid, rman_get_start(rle->res));
5351 			return;
5352 		}
5353 		resource_list_unreserve(rl, dev, child, type, rid);
5354 	}
5355 	resource_list_delete(rl, type, rid);
5356 }
5357 
5358 struct resource_list *
5359 pci_get_resource_list (device_t dev, device_t child)
5360 {
5361 	struct pci_devinfo *dinfo = device_get_ivars(child);
5362 
5363 	return (&dinfo->resources);
5364 }
5365 
5366 bus_dma_tag_t
5367 pci_get_dma_tag(device_t bus, device_t dev)
5368 {
5369 	struct pci_softc *sc = device_get_softc(bus);
5370 
5371 	return (sc->sc_dma_tag);
5372 }
5373 
5374 uint32_t
5375 pci_read_config_method(device_t dev, device_t child, int reg, int width)
5376 {
5377 	struct pci_devinfo *dinfo = device_get_ivars(child);
5378 	pcicfgregs *cfg = &dinfo->cfg;
5379 
5380 #ifdef PCI_IOV
5381 	/*
5382 	 * SR-IOV VFs don't implement the VID or DID registers, so we have to
5383 	 * emulate them here.
5384 	 */
5385 	if (cfg->flags & PCICFG_VF) {
5386 		if (reg == PCIR_VENDOR) {
5387 			switch (width) {
5388 			case 4:
5389 				return (cfg->device << 16 | cfg->vendor);
5390 			case 2:
5391 				return (cfg->vendor);
5392 			case 1:
5393 				return (cfg->vendor & 0xff);
5394 			default:
5395 				return (0xffffffff);
5396 			}
5397 		} else if (reg == PCIR_DEVICE) {
5398 			switch (width) {
5399 			/* Note that an unaligned 4-byte read is an error. */
5400 			case 2:
5401 				return (cfg->device);
5402 			case 1:
5403 				return (cfg->device & 0xff);
5404 			default:
5405 				return (0xffffffff);
5406 			}
5407 		}
5408 	}
5409 #endif
5410 
5411 	return (PCIB_READ_CONFIG(device_get_parent(dev),
5412 	    cfg->bus, cfg->slot, cfg->func, reg, width));
5413 }
5414 
5415 void
5416 pci_write_config_method(device_t dev, device_t child, int reg,
5417     uint32_t val, int width)
5418 {
5419 	struct pci_devinfo *dinfo = device_get_ivars(child);
5420 	pcicfgregs *cfg = &dinfo->cfg;
5421 
5422 	PCIB_WRITE_CONFIG(device_get_parent(dev),
5423 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
5424 }
5425 
5426 int
5427 pci_child_location_str_method(device_t dev, device_t child, char *buf,
5428     size_t buflen)
5429 {
5430 
5431 	snprintf(buf, buflen, "slot=%d function=%d dbsf=pci%d:%d:%d:%d",
5432 	    pci_get_slot(child), pci_get_function(child), pci_get_domain(child),
5433 	    pci_get_bus(child), pci_get_slot(child), pci_get_function(child));
5434 	return (0);
5435 }
5436 
5437 int
5438 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
5439     size_t buflen)
5440 {
5441 	struct pci_devinfo *dinfo;
5442 	pcicfgregs *cfg;
5443 
5444 	dinfo = device_get_ivars(child);
5445 	cfg = &dinfo->cfg;
5446 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
5447 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
5448 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
5449 	    cfg->progif);
5450 	return (0);
5451 }
5452 
5453 int
5454 pci_assign_interrupt_method(device_t dev, device_t child)
5455 {
5456 	struct pci_devinfo *dinfo = device_get_ivars(child);
5457 	pcicfgregs *cfg = &dinfo->cfg;
5458 
5459 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
5460 	    cfg->intpin));
5461 }
5462 
5463 static void
5464 pci_lookup(void *arg, const char *name, device_t *dev)
5465 {
5466 	long val;
5467 	char *end;
5468 	int domain, bus, slot, func;
5469 
5470 	if (*dev != NULL)
5471 		return;
5472 
5473 	/*
5474 	 * Accept pciconf-style selectors of either pciD:B:S:F or
5475 	 * pciB:S:F.  In the latter case, the domain is assumed to
5476 	 * be zero.
5477 	 */
5478 	if (strncmp(name, "pci", 3) != 0)
5479 		return;
5480 	val = strtol(name + 3, &end, 10);
5481 	if (val < 0 || val > INT_MAX || *end != ':')
5482 		return;
5483 	domain = val;
5484 	val = strtol(end + 1, &end, 10);
5485 	if (val < 0 || val > INT_MAX || *end != ':')
5486 		return;
5487 	bus = val;
5488 	val = strtol(end + 1, &end, 10);
5489 	if (val < 0 || val > INT_MAX)
5490 		return;
5491 	slot = val;
5492 	if (*end == ':') {
5493 		val = strtol(end + 1, &end, 10);
5494 		if (val < 0 || val > INT_MAX || *end != '\0')
5495 			return;
5496 		func = val;
5497 	} else if (*end == '\0') {
5498 		func = slot;
5499 		slot = bus;
5500 		bus = domain;
5501 		domain = 0;
5502 	} else
5503 		return;
5504 
5505 	if (domain > PCI_DOMAINMAX || bus > PCI_BUSMAX || slot > PCI_SLOTMAX ||
5506 	    func > PCIE_ARI_FUNCMAX || (slot != 0 && func > PCI_FUNCMAX))
5507 		return;
5508 
5509 	*dev = pci_find_dbsf(domain, bus, slot, func);
5510 }
5511 
5512 static int
5513 pci_modevent(module_t mod, int what, void *arg)
5514 {
5515 	static struct cdev *pci_cdev;
5516 	static eventhandler_tag tag;
5517 
5518 	switch (what) {
5519 	case MOD_LOAD:
5520 		STAILQ_INIT(&pci_devq);
5521 		pci_generation = 0;
5522 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
5523 		    "pci");
5524 		pci_load_vendor_data();
5525 		tag = EVENTHANDLER_REGISTER(dev_lookup, pci_lookup, NULL,
5526 		    1000);
5527 		break;
5528 
5529 	case MOD_UNLOAD:
5530 		if (tag != NULL)
5531 			EVENTHANDLER_DEREGISTER(dev_lookup, tag);
5532 		destroy_dev(pci_cdev);
5533 		break;
5534 	}
5535 
5536 	return (0);
5537 }
5538 
5539 static void
5540 pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
5541 {
5542 #define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
5543 	struct pcicfg_pcie *cfg;
5544 	int version, pos;
5545 
5546 	cfg = &dinfo->cfg.pcie;
5547 	pos = cfg->pcie_location;
5548 
5549 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5550 
5551 	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
5552 
5553 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5554 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5555 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5556 		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
5557 
5558 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5559 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5560 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5561 		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
5562 
5563 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5564 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5565 		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
5566 
5567 	if (version > 1) {
5568 		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
5569 		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
5570 		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
5571 	}
5572 #undef WREG
5573 }
5574 
5575 static void
5576 pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
5577 {
5578 	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
5579 	    dinfo->cfg.pcix.pcix_command,  2);
5580 }
5581 
5582 void
5583 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
5584 {
5585 
5586 	/*
5587 	 * Restore the device to full power mode.  We must do this
5588 	 * before we restore the registers because moving from D3 to
5589 	 * D0 will cause the chip's BARs and some other registers to
5590 	 * be reset to some unknown power on reset values.  Cut down
5591 	 * the noise on boot by doing nothing if we are already in
5592 	 * state D0.
5593 	 */
5594 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
5595 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5596 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
5597 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
5598 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
5599 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
5600 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
5601 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
5602 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
5603 	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5604 	case PCIM_HDRTYPE_NORMAL:
5605 		pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
5606 		pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
5607 		break;
5608 	case PCIM_HDRTYPE_BRIDGE:
5609 		pci_write_config(dev, PCIR_SECLAT_1,
5610 		    dinfo->cfg.bridge.br_seclat, 1);
5611 		pci_write_config(dev, PCIR_SUBBUS_1,
5612 		    dinfo->cfg.bridge.br_subbus, 1);
5613 		pci_write_config(dev, PCIR_SECBUS_1,
5614 		    dinfo->cfg.bridge.br_secbus, 1);
5615 		pci_write_config(dev, PCIR_PRIBUS_1,
5616 		    dinfo->cfg.bridge.br_pribus, 1);
5617 		pci_write_config(dev, PCIR_BRIDGECTL_1,
5618 		    dinfo->cfg.bridge.br_control, 2);
5619 		break;
5620 	case PCIM_HDRTYPE_CARDBUS:
5621 		pci_write_config(dev, PCIR_SECLAT_2,
5622 		    dinfo->cfg.bridge.br_seclat, 1);
5623 		pci_write_config(dev, PCIR_SUBBUS_2,
5624 		    dinfo->cfg.bridge.br_subbus, 1);
5625 		pci_write_config(dev, PCIR_SECBUS_2,
5626 		    dinfo->cfg.bridge.br_secbus, 1);
5627 		pci_write_config(dev, PCIR_PRIBUS_2,
5628 		    dinfo->cfg.bridge.br_pribus, 1);
5629 		pci_write_config(dev, PCIR_BRIDGECTL_2,
5630 		    dinfo->cfg.bridge.br_control, 2);
5631 		break;
5632 	}
5633 	pci_restore_bars(dev);
5634 
5635 	/*
5636 	 * Restore extended capabilities for PCI-Express and PCI-X
5637 	 */
5638 	if (dinfo->cfg.pcie.pcie_location != 0)
5639 		pci_cfg_restore_pcie(dev, dinfo);
5640 	if (dinfo->cfg.pcix.pcix_location != 0)
5641 		pci_cfg_restore_pcix(dev, dinfo);
5642 
5643 	/* Restore MSI and MSI-X configurations if they are present. */
5644 	if (dinfo->cfg.msi.msi_location != 0)
5645 		pci_resume_msi(dev);
5646 	if (dinfo->cfg.msix.msix_location != 0)
5647 		pci_resume_msix(dev);
5648 
5649 #ifdef PCI_IOV
5650 	if (dinfo->cfg.iov != NULL)
5651 		pci_iov_cfg_restore(dev, dinfo);
5652 #endif
5653 }
5654 
5655 static void
5656 pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
5657 {
5658 #define	RREG(n)	pci_read_config(dev, pos + (n), 2)
5659 	struct pcicfg_pcie *cfg;
5660 	int version, pos;
5661 
5662 	cfg = &dinfo->cfg.pcie;
5663 	pos = cfg->pcie_location;
5664 
5665 	cfg->pcie_flags = RREG(PCIER_FLAGS);
5666 
5667 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5668 
5669 	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
5670 
5671 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5672 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5673 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5674 		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
5675 
5676 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5677 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5678 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5679 		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
5680 
5681 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5682 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5683 		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
5684 
5685 	if (version > 1) {
5686 		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
5687 		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
5688 		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
5689 	}
5690 #undef RREG
5691 }
5692 
5693 static void
5694 pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
5695 {
5696 	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
5697 	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
5698 }
5699 
5700 void
5701 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
5702 {
5703 	uint32_t cls;
5704 	int ps;
5705 
5706 	/*
5707 	 * Some drivers apparently write to these registers w/o updating our
5708 	 * cached copy.  No harm happens if we update the copy, so do so here
5709 	 * so we can restore them.  The COMMAND register is modified by the
5710 	 * bus w/o updating the cache.  This should represent the normally
5711 	 * writable portion of the 'defined' part of type 0/1/2 headers.
5712 	 */
5713 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
5714 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
5715 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
5716 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
5717 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
5718 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
5719 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
5720 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
5721 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
5722 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
5723 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
5724 	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5725 	case PCIM_HDRTYPE_NORMAL:
5726 		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
5727 		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
5728 		dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
5729 		dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
5730 		break;
5731 	case PCIM_HDRTYPE_BRIDGE:
5732 		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
5733 		    PCIR_SECLAT_1, 1);
5734 		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
5735 		    PCIR_SUBBUS_1, 1);
5736 		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
5737 		    PCIR_SECBUS_1, 1);
5738 		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
5739 		    PCIR_PRIBUS_1, 1);
5740 		dinfo->cfg.bridge.br_control = pci_read_config(dev,
5741 		    PCIR_BRIDGECTL_1, 2);
5742 		break;
5743 	case PCIM_HDRTYPE_CARDBUS:
5744 		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
5745 		    PCIR_SECLAT_2, 1);
5746 		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
5747 		    PCIR_SUBBUS_2, 1);
5748 		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
5749 		    PCIR_SECBUS_2, 1);
5750 		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
5751 		    PCIR_PRIBUS_2, 1);
5752 		dinfo->cfg.bridge.br_control = pci_read_config(dev,
5753 		    PCIR_BRIDGECTL_2, 2);
5754 		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_2, 2);
5755 		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_2, 2);
5756 		break;
5757 	}
5758 
5759 	if (dinfo->cfg.pcie.pcie_location != 0)
5760 		pci_cfg_save_pcie(dev, dinfo);
5761 
5762 	if (dinfo->cfg.pcix.pcix_location != 0)
5763 		pci_cfg_save_pcix(dev, dinfo);
5764 
5765 #ifdef PCI_IOV
5766 	if (dinfo->cfg.iov != NULL)
5767 		pci_iov_cfg_save(dev, dinfo);
5768 #endif
5769 
5770 	/*
5771 	 * don't set the state for display devices, base peripherals and
5772 	 * memory devices since bad things happen when they are powered down.
5773 	 * We should (a) have drivers that can easily detach and (b) use
5774 	 * generic drivers for these devices so that some device actually
5775 	 * attaches.  We need to make sure that when we implement (a) we don't
5776 	 * power the device down on a reattach.
5777 	 */
5778 	cls = pci_get_class(dev);
5779 	if (!setstate)
5780 		return;
5781 	switch (pci_do_power_nodriver)
5782 	{
5783 		case 0:		/* NO powerdown at all */
5784 			return;
5785 		case 1:		/* Conservative about what to power down */
5786 			if (cls == PCIC_STORAGE)
5787 				return;
5788 			/*FALLTHROUGH*/
5789 		case 2:		/* Aggressive about what to power down */
5790 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
5791 			    cls == PCIC_BASEPERIPH)
5792 				return;
5793 			/*FALLTHROUGH*/
5794 		case 3:		/* Power down everything */
5795 			break;
5796 	}
5797 	/*
5798 	 * PCI spec says we can only go into D3 state from D0 state.
5799 	 * Transition from D[12] into D0 before going to D3 state.
5800 	 */
5801 	ps = pci_get_powerstate(dev);
5802 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
5803 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5804 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
5805 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
5806 }
5807 
5808 /* Wrapper APIs suitable for device driver use. */
5809 void
5810 pci_save_state(device_t dev)
5811 {
5812 	struct pci_devinfo *dinfo;
5813 
5814 	dinfo = device_get_ivars(dev);
5815 	pci_cfg_save(dev, dinfo, 0);
5816 }
5817 
5818 void
5819 pci_restore_state(device_t dev)
5820 {
5821 	struct pci_devinfo *dinfo;
5822 
5823 	dinfo = device_get_ivars(dev);
5824 	pci_cfg_restore(dev, dinfo);
5825 }
5826 
5827 static int
5828 pci_get_id_method(device_t dev, device_t child, enum pci_id_type type,
5829     uintptr_t *id)
5830 {
5831 
5832 	return (PCIB_GET_ID(device_get_parent(dev), child, type, id));
5833 }
5834 
5835 /* Find the upstream port of a given PCI device in a root complex. */
5836 device_t
5837 pci_find_pcie_root_port(device_t dev)
5838 {
5839 	struct pci_devinfo *dinfo;
5840 	devclass_t pci_class;
5841 	device_t pcib, bus;
5842 
5843 	pci_class = devclass_find("pci");
5844 	KASSERT(device_get_devclass(device_get_parent(dev)) == pci_class,
5845 	    ("%s: non-pci device %s", __func__, device_get_nameunit(dev)));
5846 
5847 	/*
5848 	 * Walk the bridge hierarchy until we find a PCI-e root
5849 	 * port or a non-PCI device.
5850 	 */
5851 	for (;;) {
5852 		bus = device_get_parent(dev);
5853 		KASSERT(bus != NULL, ("%s: null parent of %s", __func__,
5854 		    device_get_nameunit(dev)));
5855 
5856 		pcib = device_get_parent(bus);
5857 		KASSERT(pcib != NULL, ("%s: null bridge of %s", __func__,
5858 		    device_get_nameunit(bus)));
5859 
5860 		/*
5861 		 * pcib's parent must be a PCI bus for this to be a
5862 		 * PCI-PCI bridge.
5863 		 */
5864 		if (device_get_devclass(device_get_parent(pcib)) != pci_class)
5865 			return (NULL);
5866 
5867 		dinfo = device_get_ivars(pcib);
5868 		if (dinfo->cfg.pcie.pcie_location != 0 &&
5869 		    dinfo->cfg.pcie.pcie_type == PCIEM_TYPE_ROOT_PORT)
5870 			return (pcib);
5871 
5872 		dev = pcib;
5873 	}
5874 }
5875