xref: /freebsd/sys/dev/pci/pci.c (revision cca48a59de682fe40c6ac3b2bb4356d0e42f21dd)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/limits.h>
39 #include <sys/linker.h>
40 #include <sys/fcntl.h>
41 #include <sys/conf.h>
42 #include <sys/kernel.h>
43 #include <sys/queue.h>
44 #include <sys/sysctl.h>
45 #include <sys/endian.h>
46 
47 #include <vm/vm.h>
48 #include <vm/pmap.h>
49 #include <vm/vm_extern.h>
50 
51 #include <sys/bus.h>
52 #include <machine/bus.h>
53 #include <sys/rman.h>
54 #include <machine/resource.h>
55 #include <machine/stdarg.h>
56 
57 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
58 #include <machine/intr_machdep.h>
59 #endif
60 
61 #include <sys/pciio.h>
62 #include <dev/pci/pcireg.h>
63 #include <dev/pci/pcivar.h>
64 #include <dev/pci/pci_private.h>
65 
66 #ifdef PCI_IOV
67 #include <sys/nv.h>
68 #include <dev/pci/pci_iov_private.h>
69 #endif
70 
71 #include <dev/usb/controller/xhcireg.h>
72 #include <dev/usb/controller/ehcireg.h>
73 #include <dev/usb/controller/ohcireg.h>
74 #include <dev/usb/controller/uhcireg.h>
75 
76 #include "pcib_if.h"
77 #include "pci_if.h"
78 
79 #define	PCIR_IS_BIOS(cfg, reg)						\
80 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
81 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
82 
83 static int		pci_has_quirk(uint32_t devid, int quirk);
84 static pci_addr_t	pci_mapbase(uint64_t mapreg);
85 static const char	*pci_maptype(uint64_t mapreg);
86 static int		pci_maprange(uint64_t mapreg);
87 static pci_addr_t	pci_rombase(uint64_t mapreg);
88 static int		pci_romsize(uint64_t testval);
89 static void		pci_fixancient(pcicfgregs *cfg);
90 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
91 
92 static int		pci_porten(device_t dev);
93 static int		pci_memen(device_t dev);
94 static void		pci_assign_interrupt(device_t bus, device_t dev,
95 			    int force_route);
96 static int		pci_add_map(device_t bus, device_t dev, int reg,
97 			    struct resource_list *rl, int force, int prefetch);
98 static int		pci_probe(device_t dev);
99 static int		pci_attach(device_t dev);
100 static int		pci_detach(device_t dev);
101 static void		pci_load_vendor_data(void);
102 static int		pci_describe_parse_line(char **ptr, int *vendor,
103 			    int *device, char **desc);
104 static char		*pci_describe_device(device_t dev);
105 static int		pci_modevent(module_t mod, int what, void *arg);
106 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
107 			    pcicfgregs *cfg);
108 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
109 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
110 			    int reg, uint32_t *data);
111 #if 0
112 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
113 			    int reg, uint32_t data);
114 #endif
115 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
116 static void		pci_mask_msix(device_t dev, u_int index);
117 static void		pci_unmask_msix(device_t dev, u_int index);
118 static int		pci_msi_blacklisted(void);
119 static int		pci_msix_blacklisted(void);
120 static void		pci_resume_msi(device_t dev);
121 static void		pci_resume_msix(device_t dev);
122 static int		pci_remap_intr_method(device_t bus, device_t dev,
123 			    u_int irq);
124 
125 static uint16_t		pci_get_rid_method(device_t dev, device_t child);
126 
127 static struct pci_devinfo * pci_fill_devinfo(device_t pcib, device_t bus, int d,
128     int b, int s, int f, uint16_t vid, uint16_t did);
129 
130 static device_method_t pci_methods[] = {
131 	/* Device interface */
132 	DEVMETHOD(device_probe,		pci_probe),
133 	DEVMETHOD(device_attach,	pci_attach),
134 	DEVMETHOD(device_detach,	pci_detach),
135 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
136 	DEVMETHOD(device_suspend,	bus_generic_suspend),
137 	DEVMETHOD(device_resume,	pci_resume),
138 
139 	/* Bus interface */
140 	DEVMETHOD(bus_print_child,	pci_print_child),
141 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
142 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
143 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
144 	DEVMETHOD(bus_driver_added,	pci_driver_added),
145 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
146 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
147 
148 	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
149 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
150 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
151 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
152 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
153 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
154 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
155 	DEVMETHOD(bus_release_resource,	pci_release_resource),
156 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
157 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
158 	DEVMETHOD(bus_child_deleted,	pci_child_deleted),
159 	DEVMETHOD(bus_child_detached,	pci_child_detached),
160 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
161 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
162 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
163 	DEVMETHOD(bus_suspend_child,	pci_suspend_child),
164 	DEVMETHOD(bus_resume_child,	pci_resume_child),
165 	DEVMETHOD(bus_rescan,		pci_rescan_method),
166 
167 	/* PCI interface */
168 	DEVMETHOD(pci_read_config,	pci_read_config_method),
169 	DEVMETHOD(pci_write_config,	pci_write_config_method),
170 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
171 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
172 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
173 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
174 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
175 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
176 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
177 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
178 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
179 	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
180 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
181 	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
182 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
183 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
184 	DEVMETHOD(pci_enable_msi,	pci_enable_msi_method),
185 	DEVMETHOD(pci_enable_msix,	pci_enable_msix_method),
186 	DEVMETHOD(pci_disable_msi,	pci_disable_msi_method),
187 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
188 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
189 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
190 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
191 	DEVMETHOD(pci_msix_pba_bar,	pci_msix_pba_bar_method),
192 	DEVMETHOD(pci_msix_table_bar,	pci_msix_table_bar_method),
193 	DEVMETHOD(pci_get_rid,		pci_get_rid_method),
194 	DEVMETHOD(pci_alloc_devinfo,	pci_alloc_devinfo_method),
195 	DEVMETHOD(pci_child_added,	pci_child_added_method),
196 #ifdef PCI_IOV
197 	DEVMETHOD(pci_iov_attach,	pci_iov_attach_method),
198 	DEVMETHOD(pci_iov_detach,	pci_iov_detach_method),
199 	DEVMETHOD(pci_create_iov_child,	pci_create_iov_child_method),
200 #endif
201 
202 	DEVMETHOD_END
203 };
204 
205 DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
206 
207 static devclass_t pci_devclass;
208 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
209 MODULE_VERSION(pci, 1);
210 
211 static char	*pci_vendordata;
212 static size_t	pci_vendordata_size;
213 
214 struct pci_quirk {
215 	uint32_t devid;	/* Vendor/device of the card */
216 	int	type;
217 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
218 #define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
219 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
220 #define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
221 #define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
222 #define	PCI_QUIRK_MSI_INTX_BUG	6 /* PCIM_CMD_INTxDIS disables MSI */
223 	int	arg1;
224 	int	arg2;
225 };
226 
227 static const struct pci_quirk pci_quirks[] = {
228 	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
229 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
230 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
231 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
232 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
233 
234 	/*
235 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
236 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
237 	 */
238 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
239 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
240 
241 	/*
242 	 * MSI doesn't work on earlier Intel chipsets including
243 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
244 	 */
245 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
246 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
247 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
248 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
249 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
250 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
251 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
252 
253 	/*
254 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
255 	 * bridge.
256 	 */
257 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
258 
259 	/*
260 	 * MSI-X allocation doesn't work properly for devices passed through
261 	 * by VMware up to at least ESXi 5.1.
262 	 */
263 	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
264 	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
265 
266 	/*
267 	 * Some virtualization environments emulate an older chipset
268 	 * but support MSI just fine.  QEMU uses the Intel 82440.
269 	 */
270 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
271 
272 	/*
273 	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
274 	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
275 	 * It prevents us from attaching hpet(4) when the bit is unset.
276 	 * Note this quirk only affects SB600 revision A13 and earlier.
277 	 * For SB600 A21 and later, firmware must set the bit to hide it.
278 	 * For SB700 and later, it is unused and hardcoded to zero.
279 	 */
280 	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
281 
282 	/*
283 	 * Atheros AR8161/AR8162/E2200 Ethernet controllers have a bug that
284 	 * MSI interrupt does not assert if PCIM_CMD_INTxDIS bit of the
285 	 * command register is set.
286 	 */
287 	{ 0x10911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
288 	{ 0xE0911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
289 	{ 0x10901969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
290 
291 	/*
292 	 * Broadcom BCM5714(S)/BCM5715(S)/BCM5780(S) Ethernet MACs don't
293 	 * issue MSI interrupts with PCIM_CMD_INTxDIS set either.
294 	 */
295 	{ 0x166814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714 */
296 	{ 0x166914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714S */
297 	{ 0x166a14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780 */
298 	{ 0x166b14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780S */
299 	{ 0x167814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715 */
300 	{ 0x167914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715S */
301 
302 	{ 0 }
303 };
304 
305 /* map register information */
306 #define	PCI_MAPMEM	0x01	/* memory map */
307 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
308 #define	PCI_MAPPORT	0x04	/* port map */
309 
310 struct devlist pci_devq;
311 uint32_t pci_generation;
312 uint32_t pci_numdevs = 0;
313 static int pcie_chipset, pcix_chipset;
314 
315 /* sysctl vars */
316 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
317 
318 static int pci_enable_io_modes = 1;
319 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RWTUN,
320     &pci_enable_io_modes, 1,
321     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
322 enable these bits correctly.  We'd like to do this all the time, but there\n\
323 are some peripherals that this causes problems with.");
324 
325 static int pci_do_realloc_bars = 0;
326 SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RWTUN,
327     &pci_do_realloc_bars, 0,
328     "Attempt to allocate a new range for any BARs whose original "
329     "firmware-assigned ranges fail to allocate during the initial device scan.");
330 
331 static int pci_do_power_nodriver = 0;
332 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RWTUN,
333     &pci_do_power_nodriver, 0,
334   "Place a function into D3 state when no driver attaches to it.  0 means\n\
335 disable.  1 means conservatively place devices into D3 state.  2 means\n\
336 agressively place devices into D3 state.  3 means put absolutely everything\n\
337 in D3 state.");
338 
339 int pci_do_power_resume = 1;
340 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RWTUN,
341     &pci_do_power_resume, 1,
342   "Transition from D3 -> D0 on resume.");
343 
344 int pci_do_power_suspend = 1;
345 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RWTUN,
346     &pci_do_power_suspend, 1,
347   "Transition from D0 -> D3 on suspend.");
348 
349 static int pci_do_msi = 1;
350 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RWTUN, &pci_do_msi, 1,
351     "Enable support for MSI interrupts");
352 
353 static int pci_do_msix = 1;
354 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RWTUN, &pci_do_msix, 1,
355     "Enable support for MSI-X interrupts");
356 
357 static int pci_honor_msi_blacklist = 1;
358 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RDTUN,
359     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
360 
361 #if defined(__i386__) || defined(__amd64__)
362 static int pci_usb_takeover = 1;
363 #else
364 static int pci_usb_takeover = 0;
365 #endif
366 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
367     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
368 Disable this if you depend on BIOS emulation of USB devices, that is\n\
369 you use USB devices (like keyboard or mouse) but do not load USB drivers");
370 
371 static int pci_clear_bars;
372 SYSCTL_INT(_hw_pci, OID_AUTO, clear_bars, CTLFLAG_RDTUN, &pci_clear_bars, 0,
373     "Ignore firmware-assigned resources for BARs.");
374 
375 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
376 static int pci_clear_buses;
377 SYSCTL_INT(_hw_pci, OID_AUTO, clear_buses, CTLFLAG_RDTUN, &pci_clear_buses, 0,
378     "Ignore firmware-assigned bus numbers.");
379 #endif
380 
381 static int pci_enable_ari = 1;
382 SYSCTL_INT(_hw_pci, OID_AUTO, enable_ari, CTLFLAG_RDTUN, &pci_enable_ari,
383     0, "Enable support for PCIe Alternative RID Interpretation");
384 
385 static int
386 pci_has_quirk(uint32_t devid, int quirk)
387 {
388 	const struct pci_quirk *q;
389 
390 	for (q = &pci_quirks[0]; q->devid; q++) {
391 		if (q->devid == devid && q->type == quirk)
392 			return (1);
393 	}
394 	return (0);
395 }
396 
397 /* Find a device_t by bus/slot/function in domain 0 */
398 
399 device_t
400 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
401 {
402 
403 	return (pci_find_dbsf(0, bus, slot, func));
404 }
405 
406 /* Find a device_t by domain/bus/slot/function */
407 
408 device_t
409 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
410 {
411 	struct pci_devinfo *dinfo;
412 
413 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
414 		if ((dinfo->cfg.domain == domain) &&
415 		    (dinfo->cfg.bus == bus) &&
416 		    (dinfo->cfg.slot == slot) &&
417 		    (dinfo->cfg.func == func)) {
418 			return (dinfo->cfg.dev);
419 		}
420 	}
421 
422 	return (NULL);
423 }
424 
425 /* Find a device_t by vendor/device ID */
426 
427 device_t
428 pci_find_device(uint16_t vendor, uint16_t device)
429 {
430 	struct pci_devinfo *dinfo;
431 
432 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
433 		if ((dinfo->cfg.vendor == vendor) &&
434 		    (dinfo->cfg.device == device)) {
435 			return (dinfo->cfg.dev);
436 		}
437 	}
438 
439 	return (NULL);
440 }
441 
442 device_t
443 pci_find_class(uint8_t class, uint8_t subclass)
444 {
445 	struct pci_devinfo *dinfo;
446 
447 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
448 		if (dinfo->cfg.baseclass == class &&
449 		    dinfo->cfg.subclass == subclass) {
450 			return (dinfo->cfg.dev);
451 		}
452 	}
453 
454 	return (NULL);
455 }
456 
457 static int
458 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
459 {
460 	va_list ap;
461 	int retval;
462 
463 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
464 	    cfg->func);
465 	va_start(ap, fmt);
466 	retval += vprintf(fmt, ap);
467 	va_end(ap);
468 	return (retval);
469 }
470 
471 /* return base address of memory or port map */
472 
473 static pci_addr_t
474 pci_mapbase(uint64_t mapreg)
475 {
476 
477 	if (PCI_BAR_MEM(mapreg))
478 		return (mapreg & PCIM_BAR_MEM_BASE);
479 	else
480 		return (mapreg & PCIM_BAR_IO_BASE);
481 }
482 
483 /* return map type of memory or port map */
484 
485 static const char *
486 pci_maptype(uint64_t mapreg)
487 {
488 
489 	if (PCI_BAR_IO(mapreg))
490 		return ("I/O Port");
491 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
492 		return ("Prefetchable Memory");
493 	return ("Memory");
494 }
495 
496 /* return log2 of map size decoded for memory or port map */
497 
498 int
499 pci_mapsize(uint64_t testval)
500 {
501 	int ln2size;
502 
503 	testval = pci_mapbase(testval);
504 	ln2size = 0;
505 	if (testval != 0) {
506 		while ((testval & 1) == 0)
507 		{
508 			ln2size++;
509 			testval >>= 1;
510 		}
511 	}
512 	return (ln2size);
513 }
514 
515 /* return base address of device ROM */
516 
517 static pci_addr_t
518 pci_rombase(uint64_t mapreg)
519 {
520 
521 	return (mapreg & PCIM_BIOS_ADDR_MASK);
522 }
523 
524 /* return log2 of map size decided for device ROM */
525 
526 static int
527 pci_romsize(uint64_t testval)
528 {
529 	int ln2size;
530 
531 	testval = pci_rombase(testval);
532 	ln2size = 0;
533 	if (testval != 0) {
534 		while ((testval & 1) == 0)
535 		{
536 			ln2size++;
537 			testval >>= 1;
538 		}
539 	}
540 	return (ln2size);
541 }
542 
543 /* return log2 of address range supported by map register */
544 
545 static int
546 pci_maprange(uint64_t mapreg)
547 {
548 	int ln2range = 0;
549 
550 	if (PCI_BAR_IO(mapreg))
551 		ln2range = 32;
552 	else
553 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
554 		case PCIM_BAR_MEM_32:
555 			ln2range = 32;
556 			break;
557 		case PCIM_BAR_MEM_1MB:
558 			ln2range = 20;
559 			break;
560 		case PCIM_BAR_MEM_64:
561 			ln2range = 64;
562 			break;
563 		}
564 	return (ln2range);
565 }
566 
567 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
568 
569 static void
570 pci_fixancient(pcicfgregs *cfg)
571 {
572 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
573 		return;
574 
575 	/* PCI to PCI bridges use header type 1 */
576 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
577 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
578 }
579 
580 /* extract header type specific config data */
581 
582 static void
583 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
584 {
585 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
586 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
587 	case PCIM_HDRTYPE_NORMAL:
588 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
589 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
590 		cfg->mingnt         = REG(PCIR_MINGNT, 1);
591 		cfg->maxlat         = REG(PCIR_MAXLAT, 1);
592 		cfg->nummaps	    = PCI_MAXMAPS_0;
593 		break;
594 	case PCIM_HDRTYPE_BRIDGE:
595 		cfg->bridge.br_seclat = REG(PCIR_SECLAT_1, 1);
596 		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_1, 1);
597 		cfg->bridge.br_secbus = REG(PCIR_SECBUS_1, 1);
598 		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_1, 1);
599 		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_1, 2);
600 		cfg->nummaps	    = PCI_MAXMAPS_1;
601 		break;
602 	case PCIM_HDRTYPE_CARDBUS:
603 		cfg->bridge.br_seclat = REG(PCIR_SECLAT_2, 1);
604 		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_2, 1);
605 		cfg->bridge.br_secbus = REG(PCIR_SECBUS_2, 1);
606 		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_2, 1);
607 		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_2, 2);
608 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
609 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
610 		cfg->nummaps	    = PCI_MAXMAPS_2;
611 		break;
612 	}
613 #undef REG
614 }
615 
616 /* read configuration header into pcicfgregs structure */
617 struct pci_devinfo *
618 pci_read_device(device_t pcib, device_t bus, int d, int b, int s, int f)
619 {
620 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
621 	uint16_t vid, did;
622 
623 	vid = REG(PCIR_VENDOR, 2);
624 	did = REG(PCIR_DEVICE, 2);
625 	if (vid != 0xffff)
626 		return (pci_fill_devinfo(pcib, bus, d, b, s, f, vid, did));
627 
628 	return (NULL);
629 }
630 
631 struct pci_devinfo *
632 pci_alloc_devinfo_method(device_t dev)
633 {
634 
635 	return (malloc(sizeof(struct pci_devinfo), M_DEVBUF,
636 	    M_WAITOK | M_ZERO));
637 }
638 
639 static struct pci_devinfo *
640 pci_fill_devinfo(device_t pcib, device_t bus, int d, int b, int s, int f,
641     uint16_t vid, uint16_t did)
642 {
643 	struct pci_devinfo *devlist_entry;
644 	pcicfgregs *cfg;
645 
646 	devlist_entry = PCI_ALLOC_DEVINFO(bus);
647 
648 	cfg = &devlist_entry->cfg;
649 
650 	cfg->domain		= d;
651 	cfg->bus		= b;
652 	cfg->slot		= s;
653 	cfg->func		= f;
654 	cfg->vendor		= vid;
655 	cfg->device		= did;
656 	cfg->cmdreg		= REG(PCIR_COMMAND, 2);
657 	cfg->statreg		= REG(PCIR_STATUS, 2);
658 	cfg->baseclass		= REG(PCIR_CLASS, 1);
659 	cfg->subclass		= REG(PCIR_SUBCLASS, 1);
660 	cfg->progif		= REG(PCIR_PROGIF, 1);
661 	cfg->revid		= REG(PCIR_REVID, 1);
662 	cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
663 	cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
664 	cfg->lattimer		= REG(PCIR_LATTIMER, 1);
665 	cfg->intpin		= REG(PCIR_INTPIN, 1);
666 	cfg->intline		= REG(PCIR_INTLINE, 1);
667 
668 	cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
669 	cfg->hdrtype		&= ~PCIM_MFDEV;
670 	STAILQ_INIT(&cfg->maps);
671 
672 	cfg->iov		= NULL;
673 
674 	pci_fixancient(cfg);
675 	pci_hdrtypedata(pcib, b, s, f, cfg);
676 
677 	if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
678 		pci_read_cap(pcib, cfg);
679 
680 	STAILQ_INSERT_TAIL(&pci_devq, devlist_entry, pci_links);
681 
682 	devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
683 	devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
684 	devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
685 	devlist_entry->conf.pc_sel.pc_func = cfg->func;
686 	devlist_entry->conf.pc_hdr = cfg->hdrtype;
687 
688 	devlist_entry->conf.pc_subvendor = cfg->subvendor;
689 	devlist_entry->conf.pc_subdevice = cfg->subdevice;
690 	devlist_entry->conf.pc_vendor = cfg->vendor;
691 	devlist_entry->conf.pc_device = cfg->device;
692 
693 	devlist_entry->conf.pc_class = cfg->baseclass;
694 	devlist_entry->conf.pc_subclass = cfg->subclass;
695 	devlist_entry->conf.pc_progif = cfg->progif;
696 	devlist_entry->conf.pc_revid = cfg->revid;
697 
698 	pci_numdevs++;
699 	pci_generation++;
700 
701 	return (devlist_entry);
702 }
703 #undef REG
704 
705 static void
706 pci_ea_fill_info(device_t pcib, pcicfgregs *cfg)
707 {
708 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, \
709     cfg->ea.ea_location + (n), w)
710 	int num_ent;
711 	int ptr;
712 	int a, b;
713 	uint32_t val;
714 	int ent_size;
715 	uint32_t dw[4];
716 	uint64_t base, max_offset;
717 	struct pci_ea_entry *eae;
718 
719 	if (cfg->ea.ea_location == 0)
720 		return;
721 
722 	STAILQ_INIT(&cfg->ea.ea_entries);
723 
724 	/* Determine the number of entries */
725 	num_ent = REG(PCIR_EA_NUM_ENT, 2);
726 	num_ent &= PCIM_EA_NUM_ENT_MASK;
727 
728 	/* Find the first entry to care of */
729 	ptr = PCIR_EA_FIRST_ENT;
730 
731 	/* Skip DWORD 2 for type 1 functions */
732 	if ((cfg->hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_BRIDGE)
733 		ptr += 4;
734 
735 	for (a = 0; a < num_ent; a++) {
736 
737 		eae = malloc(sizeof(*eae), M_DEVBUF, M_WAITOK | M_ZERO);
738 		eae->eae_cfg_offset = cfg->ea.ea_location + ptr;
739 
740 		/* Read a number of dwords in the entry */
741 		val = REG(ptr, 4);
742 		ptr += 4;
743 		ent_size = (val & PCIM_EA_ES);
744 
745 		for (b = 0; b < ent_size; b++) {
746 			dw[b] = REG(ptr, 4);
747 			ptr += 4;
748 		}
749 
750 		eae->eae_flags = val;
751 		eae->eae_bei = (PCIM_EA_BEI & val) >> PCIM_EA_BEI_OFFSET;
752 
753 		base = dw[0] & PCIM_EA_FIELD_MASK;
754 		max_offset = dw[1] | ~PCIM_EA_FIELD_MASK;
755 		b = 2;
756 		if (((dw[0] & PCIM_EA_IS_64) != 0) && (b < ent_size)) {
757 			base |= (uint64_t)dw[b] << 32UL;
758 			b++;
759 		}
760 		if (((dw[1] & PCIM_EA_IS_64) != 0)
761 		    && (b < ent_size)) {
762 			max_offset |= (uint64_t)dw[b] << 32UL;
763 			b++;
764 		}
765 
766 		eae->eae_base = base;
767 		eae->eae_max_offset = max_offset;
768 
769 		STAILQ_INSERT_TAIL(&cfg->ea.ea_entries, eae, eae_link);
770 
771 		if (bootverbose) {
772 			printf("PCI(EA) dev %04x:%04x, bei %d, flags #%x, base #%jx, max_offset #%jx\n",
773 			    cfg->vendor, cfg->device, eae->eae_bei, eae->eae_flags,
774 			    (uintmax_t)eae->eae_base, (uintmax_t)eae->eae_max_offset);
775 		}
776 	}
777 }
778 #undef REG
779 
780 static void
781 pci_read_cap(device_t pcib, pcicfgregs *cfg)
782 {
783 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
784 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
785 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
786 	uint64_t addr;
787 #endif
788 	uint32_t val;
789 	int	ptr, nextptr, ptrptr;
790 
791 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
792 	case PCIM_HDRTYPE_NORMAL:
793 	case PCIM_HDRTYPE_BRIDGE:
794 		ptrptr = PCIR_CAP_PTR;
795 		break;
796 	case PCIM_HDRTYPE_CARDBUS:
797 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
798 		break;
799 	default:
800 		return;		/* no extended capabilities support */
801 	}
802 	nextptr = REG(ptrptr, 1);	/* sanity check? */
803 
804 	/*
805 	 * Read capability entries.
806 	 */
807 	while (nextptr != 0) {
808 		/* Sanity check */
809 		if (nextptr > 255) {
810 			printf("illegal PCI extended capability offset %d\n",
811 			    nextptr);
812 			return;
813 		}
814 		/* Find the next entry */
815 		ptr = nextptr;
816 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
817 
818 		/* Process this entry */
819 		switch (REG(ptr + PCICAP_ID, 1)) {
820 		case PCIY_PMG:		/* PCI power management */
821 			if (cfg->pp.pp_cap == 0) {
822 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
823 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
824 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
825 				if ((nextptr - ptr) > PCIR_POWER_DATA)
826 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
827 			}
828 			break;
829 		case PCIY_HT:		/* HyperTransport */
830 			/* Determine HT-specific capability type. */
831 			val = REG(ptr + PCIR_HT_COMMAND, 2);
832 
833 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
834 				cfg->ht.ht_slave = ptr;
835 
836 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
837 			switch (val & PCIM_HTCMD_CAP_MASK) {
838 			case PCIM_HTCAP_MSI_MAPPING:
839 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
840 					/* Sanity check the mapping window. */
841 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
842 					    4);
843 					addr <<= 32;
844 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
845 					    4);
846 					if (addr != MSI_INTEL_ADDR_BASE)
847 						device_printf(pcib,
848 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
849 						    cfg->domain, cfg->bus,
850 						    cfg->slot, cfg->func,
851 						    (long long)addr);
852 				} else
853 					addr = MSI_INTEL_ADDR_BASE;
854 
855 				cfg->ht.ht_msimap = ptr;
856 				cfg->ht.ht_msictrl = val;
857 				cfg->ht.ht_msiaddr = addr;
858 				break;
859 			}
860 #endif
861 			break;
862 		case PCIY_MSI:		/* PCI MSI */
863 			cfg->msi.msi_location = ptr;
864 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
865 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
866 						     PCIM_MSICTRL_MMC_MASK)>>1);
867 			break;
868 		case PCIY_MSIX:		/* PCI MSI-X */
869 			cfg->msix.msix_location = ptr;
870 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
871 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
872 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
873 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
874 			cfg->msix.msix_table_bar = PCIR_BAR(val &
875 			    PCIM_MSIX_BIR_MASK);
876 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
877 			val = REG(ptr + PCIR_MSIX_PBA, 4);
878 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
879 			    PCIM_MSIX_BIR_MASK);
880 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
881 			break;
882 		case PCIY_VPD:		/* PCI Vital Product Data */
883 			cfg->vpd.vpd_reg = ptr;
884 			break;
885 		case PCIY_SUBVENDOR:
886 			/* Should always be true. */
887 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
888 			    PCIM_HDRTYPE_BRIDGE) {
889 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
890 				cfg->subvendor = val & 0xffff;
891 				cfg->subdevice = val >> 16;
892 			}
893 			break;
894 		case PCIY_PCIX:		/* PCI-X */
895 			/*
896 			 * Assume we have a PCI-X chipset if we have
897 			 * at least one PCI-PCI bridge with a PCI-X
898 			 * capability.  Note that some systems with
899 			 * PCI-express or HT chipsets might match on
900 			 * this check as well.
901 			 */
902 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
903 			    PCIM_HDRTYPE_BRIDGE)
904 				pcix_chipset = 1;
905 			cfg->pcix.pcix_location = ptr;
906 			break;
907 		case PCIY_EXPRESS:	/* PCI-express */
908 			/*
909 			 * Assume we have a PCI-express chipset if we have
910 			 * at least one PCI-express device.
911 			 */
912 			pcie_chipset = 1;
913 			cfg->pcie.pcie_location = ptr;
914 			val = REG(ptr + PCIER_FLAGS, 2);
915 			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
916 			break;
917 		case PCIY_EA:		/* Enhanced Allocation */
918 			cfg->ea.ea_location = ptr;
919 			pci_ea_fill_info(pcib, cfg);
920 			break;
921 		default:
922 			break;
923 		}
924 	}
925 
926 #if defined(__powerpc__)
927 	/*
928 	 * Enable the MSI mapping window for all HyperTransport
929 	 * slaves.  PCI-PCI bridges have their windows enabled via
930 	 * PCIB_MAP_MSI().
931 	 */
932 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
933 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
934 		device_printf(pcib,
935 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
936 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
937 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
938 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
939 		     2);
940 	}
941 #endif
942 /* REG and WREG use carry through to next functions */
943 }
944 
945 /*
946  * PCI Vital Product Data
947  */
948 
949 #define	PCI_VPD_TIMEOUT		1000000
950 
951 static int
952 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
953 {
954 	int count = PCI_VPD_TIMEOUT;
955 
956 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
957 
958 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
959 
960 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
961 		if (--count < 0)
962 			return (ENXIO);
963 		DELAY(1);	/* limit looping */
964 	}
965 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
966 
967 	return (0);
968 }
969 
970 #if 0
971 static int
972 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
973 {
974 	int count = PCI_VPD_TIMEOUT;
975 
976 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
977 
978 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
979 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
980 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
981 		if (--count < 0)
982 			return (ENXIO);
983 		DELAY(1);	/* limit looping */
984 	}
985 
986 	return (0);
987 }
988 #endif
989 
990 #undef PCI_VPD_TIMEOUT
991 
992 struct vpd_readstate {
993 	device_t	pcib;
994 	pcicfgregs	*cfg;
995 	uint32_t	val;
996 	int		bytesinval;
997 	int		off;
998 	uint8_t		cksum;
999 };
1000 
1001 static int
1002 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
1003 {
1004 	uint32_t reg;
1005 	uint8_t byte;
1006 
1007 	if (vrs->bytesinval == 0) {
1008 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
1009 			return (ENXIO);
1010 		vrs->val = le32toh(reg);
1011 		vrs->off += 4;
1012 		byte = vrs->val & 0xff;
1013 		vrs->bytesinval = 3;
1014 	} else {
1015 		vrs->val = vrs->val >> 8;
1016 		byte = vrs->val & 0xff;
1017 		vrs->bytesinval--;
1018 	}
1019 
1020 	vrs->cksum += byte;
1021 	*data = byte;
1022 	return (0);
1023 }
1024 
1025 static void
1026 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
1027 {
1028 	struct vpd_readstate vrs;
1029 	int state;
1030 	int name;
1031 	int remain;
1032 	int i;
1033 	int alloc, off;		/* alloc/off for RO/W arrays */
1034 	int cksumvalid;
1035 	int dflen;
1036 	uint8_t byte;
1037 	uint8_t byte2;
1038 
1039 	/* init vpd reader */
1040 	vrs.bytesinval = 0;
1041 	vrs.off = 0;
1042 	vrs.pcib = pcib;
1043 	vrs.cfg = cfg;
1044 	vrs.cksum = 0;
1045 
1046 	state = 0;
1047 	name = remain = i = 0;	/* shut up stupid gcc */
1048 	alloc = off = 0;	/* shut up stupid gcc */
1049 	dflen = 0;		/* shut up stupid gcc */
1050 	cksumvalid = -1;
1051 	while (state >= 0) {
1052 		if (vpd_nextbyte(&vrs, &byte)) {
1053 			state = -2;
1054 			break;
1055 		}
1056 #if 0
1057 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
1058 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
1059 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
1060 #endif
1061 		switch (state) {
1062 		case 0:		/* item name */
1063 			if (byte & 0x80) {
1064 				if (vpd_nextbyte(&vrs, &byte2)) {
1065 					state = -2;
1066 					break;
1067 				}
1068 				remain = byte2;
1069 				if (vpd_nextbyte(&vrs, &byte2)) {
1070 					state = -2;
1071 					break;
1072 				}
1073 				remain |= byte2 << 8;
1074 				if (remain > (0x7f*4 - vrs.off)) {
1075 					state = -1;
1076 					pci_printf(cfg,
1077 					    "invalid VPD data, remain %#x\n",
1078 					    remain);
1079 				}
1080 				name = byte & 0x7f;
1081 			} else {
1082 				remain = byte & 0x7;
1083 				name = (byte >> 3) & 0xf;
1084 			}
1085 			switch (name) {
1086 			case 0x2:	/* String */
1087 				cfg->vpd.vpd_ident = malloc(remain + 1,
1088 				    M_DEVBUF, M_WAITOK);
1089 				i = 0;
1090 				state = 1;
1091 				break;
1092 			case 0xf:	/* End */
1093 				state = -1;
1094 				break;
1095 			case 0x10:	/* VPD-R */
1096 				alloc = 8;
1097 				off = 0;
1098 				cfg->vpd.vpd_ros = malloc(alloc *
1099 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1100 				    M_WAITOK | M_ZERO);
1101 				state = 2;
1102 				break;
1103 			case 0x11:	/* VPD-W */
1104 				alloc = 8;
1105 				off = 0;
1106 				cfg->vpd.vpd_w = malloc(alloc *
1107 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1108 				    M_WAITOK | M_ZERO);
1109 				state = 5;
1110 				break;
1111 			default:	/* Invalid data, abort */
1112 				state = -1;
1113 				break;
1114 			}
1115 			break;
1116 
1117 		case 1:	/* Identifier String */
1118 			cfg->vpd.vpd_ident[i++] = byte;
1119 			remain--;
1120 			if (remain == 0)  {
1121 				cfg->vpd.vpd_ident[i] = '\0';
1122 				state = 0;
1123 			}
1124 			break;
1125 
1126 		case 2:	/* VPD-R Keyword Header */
1127 			if (off == alloc) {
1128 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1129 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1130 				    M_DEVBUF, M_WAITOK | M_ZERO);
1131 			}
1132 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1133 			if (vpd_nextbyte(&vrs, &byte2)) {
1134 				state = -2;
1135 				break;
1136 			}
1137 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1138 			if (vpd_nextbyte(&vrs, &byte2)) {
1139 				state = -2;
1140 				break;
1141 			}
1142 			cfg->vpd.vpd_ros[off].len = dflen = byte2;
1143 			if (dflen == 0 &&
1144 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1145 			    2) == 0) {
1146 				/*
1147 				 * if this happens, we can't trust the rest
1148 				 * of the VPD.
1149 				 */
1150 				pci_printf(cfg, "bad keyword length: %d\n",
1151 				    dflen);
1152 				cksumvalid = 0;
1153 				state = -1;
1154 				break;
1155 			} else if (dflen == 0) {
1156 				cfg->vpd.vpd_ros[off].value = malloc(1 *
1157 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1158 				    M_DEVBUF, M_WAITOK);
1159 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1160 			} else
1161 				cfg->vpd.vpd_ros[off].value = malloc(
1162 				    (dflen + 1) *
1163 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1164 				    M_DEVBUF, M_WAITOK);
1165 			remain -= 3;
1166 			i = 0;
1167 			/* keep in sync w/ state 3's transistions */
1168 			if (dflen == 0 && remain == 0)
1169 				state = 0;
1170 			else if (dflen == 0)
1171 				state = 2;
1172 			else
1173 				state = 3;
1174 			break;
1175 
1176 		case 3:	/* VPD-R Keyword Value */
1177 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1178 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1179 			    "RV", 2) == 0 && cksumvalid == -1) {
1180 				if (vrs.cksum == 0)
1181 					cksumvalid = 1;
1182 				else {
1183 					if (bootverbose)
1184 						pci_printf(cfg,
1185 					    "bad VPD cksum, remain %hhu\n",
1186 						    vrs.cksum);
1187 					cksumvalid = 0;
1188 					state = -1;
1189 					break;
1190 				}
1191 			}
1192 			dflen--;
1193 			remain--;
1194 			/* keep in sync w/ state 2's transistions */
1195 			if (dflen == 0)
1196 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1197 			if (dflen == 0 && remain == 0) {
1198 				cfg->vpd.vpd_rocnt = off;
1199 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1200 				    off * sizeof(*cfg->vpd.vpd_ros),
1201 				    M_DEVBUF, M_WAITOK | M_ZERO);
1202 				state = 0;
1203 			} else if (dflen == 0)
1204 				state = 2;
1205 			break;
1206 
1207 		case 4:
1208 			remain--;
1209 			if (remain == 0)
1210 				state = 0;
1211 			break;
1212 
1213 		case 5:	/* VPD-W Keyword Header */
1214 			if (off == alloc) {
1215 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1216 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1217 				    M_DEVBUF, M_WAITOK | M_ZERO);
1218 			}
1219 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1220 			if (vpd_nextbyte(&vrs, &byte2)) {
1221 				state = -2;
1222 				break;
1223 			}
1224 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1225 			if (vpd_nextbyte(&vrs, &byte2)) {
1226 				state = -2;
1227 				break;
1228 			}
1229 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1230 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1231 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1232 			    sizeof(*cfg->vpd.vpd_w[off].value),
1233 			    M_DEVBUF, M_WAITOK);
1234 			remain -= 3;
1235 			i = 0;
1236 			/* keep in sync w/ state 6's transistions */
1237 			if (dflen == 0 && remain == 0)
1238 				state = 0;
1239 			else if (dflen == 0)
1240 				state = 5;
1241 			else
1242 				state = 6;
1243 			break;
1244 
1245 		case 6:	/* VPD-W Keyword Value */
1246 			cfg->vpd.vpd_w[off].value[i++] = byte;
1247 			dflen--;
1248 			remain--;
1249 			/* keep in sync w/ state 5's transistions */
1250 			if (dflen == 0)
1251 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1252 			if (dflen == 0 && remain == 0) {
1253 				cfg->vpd.vpd_wcnt = off;
1254 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1255 				    off * sizeof(*cfg->vpd.vpd_w),
1256 				    M_DEVBUF, M_WAITOK | M_ZERO);
1257 				state = 0;
1258 			} else if (dflen == 0)
1259 				state = 5;
1260 			break;
1261 
1262 		default:
1263 			pci_printf(cfg, "invalid state: %d\n", state);
1264 			state = -1;
1265 			break;
1266 		}
1267 	}
1268 
1269 	if (cksumvalid == 0 || state < -1) {
1270 		/* read-only data bad, clean up */
1271 		if (cfg->vpd.vpd_ros != NULL) {
1272 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1273 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1274 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1275 			cfg->vpd.vpd_ros = NULL;
1276 		}
1277 	}
1278 	if (state < -1) {
1279 		/* I/O error, clean up */
1280 		pci_printf(cfg, "failed to read VPD data.\n");
1281 		if (cfg->vpd.vpd_ident != NULL) {
1282 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1283 			cfg->vpd.vpd_ident = NULL;
1284 		}
1285 		if (cfg->vpd.vpd_w != NULL) {
1286 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1287 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1288 			free(cfg->vpd.vpd_w, M_DEVBUF);
1289 			cfg->vpd.vpd_w = NULL;
1290 		}
1291 	}
1292 	cfg->vpd.vpd_cached = 1;
1293 #undef REG
1294 #undef WREG
1295 }
1296 
1297 int
1298 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1299 {
1300 	struct pci_devinfo *dinfo = device_get_ivars(child);
1301 	pcicfgregs *cfg = &dinfo->cfg;
1302 
1303 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1304 		pci_read_vpd(device_get_parent(dev), cfg);
1305 
1306 	*identptr = cfg->vpd.vpd_ident;
1307 
1308 	if (*identptr == NULL)
1309 		return (ENXIO);
1310 
1311 	return (0);
1312 }
1313 
1314 int
1315 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1316 	const char **vptr)
1317 {
1318 	struct pci_devinfo *dinfo = device_get_ivars(child);
1319 	pcicfgregs *cfg = &dinfo->cfg;
1320 	int i;
1321 
1322 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1323 		pci_read_vpd(device_get_parent(dev), cfg);
1324 
1325 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1326 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1327 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1328 			*vptr = cfg->vpd.vpd_ros[i].value;
1329 			return (0);
1330 		}
1331 
1332 	*vptr = NULL;
1333 	return (ENXIO);
1334 }
1335 
1336 struct pcicfg_vpd *
1337 pci_fetch_vpd_list(device_t dev)
1338 {
1339 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1340 	pcicfgregs *cfg = &dinfo->cfg;
1341 
1342 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1343 		pci_read_vpd(device_get_parent(device_get_parent(dev)), cfg);
1344 	return (&cfg->vpd);
1345 }
1346 
1347 /*
1348  * Find the requested HyperTransport capability and return the offset
1349  * in configuration space via the pointer provided.  The function
1350  * returns 0 on success and an error code otherwise.
1351  */
1352 int
1353 pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1354 {
1355 	int ptr, error;
1356 	uint16_t val;
1357 
1358 	error = pci_find_cap(child, PCIY_HT, &ptr);
1359 	if (error)
1360 		return (error);
1361 
1362 	/*
1363 	 * Traverse the capabilities list checking each HT capability
1364 	 * to see if it matches the requested HT capability.
1365 	 */
1366 	while (ptr != 0) {
1367 		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1368 		if (capability == PCIM_HTCAP_SLAVE ||
1369 		    capability == PCIM_HTCAP_HOST)
1370 			val &= 0xe000;
1371 		else
1372 			val &= PCIM_HTCMD_CAP_MASK;
1373 		if (val == capability) {
1374 			if (capreg != NULL)
1375 				*capreg = ptr;
1376 			return (0);
1377 		}
1378 
1379 		/* Skip to the next HT capability. */
1380 		while (ptr != 0) {
1381 			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1382 			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1383 			    PCIY_HT)
1384 				break;
1385 		}
1386 	}
1387 	return (ENOENT);
1388 }
1389 
1390 /*
1391  * Find the requested capability and return the offset in
1392  * configuration space via the pointer provided.  The function returns
1393  * 0 on success and an error code otherwise.
1394  */
1395 int
1396 pci_find_cap_method(device_t dev, device_t child, int capability,
1397     int *capreg)
1398 {
1399 	struct pci_devinfo *dinfo = device_get_ivars(child);
1400 	pcicfgregs *cfg = &dinfo->cfg;
1401 	u_int32_t status;
1402 	u_int8_t ptr;
1403 
1404 	/*
1405 	 * Check the CAP_LIST bit of the PCI status register first.
1406 	 */
1407 	status = pci_read_config(child, PCIR_STATUS, 2);
1408 	if (!(status & PCIM_STATUS_CAPPRESENT))
1409 		return (ENXIO);
1410 
1411 	/*
1412 	 * Determine the start pointer of the capabilities list.
1413 	 */
1414 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1415 	case PCIM_HDRTYPE_NORMAL:
1416 	case PCIM_HDRTYPE_BRIDGE:
1417 		ptr = PCIR_CAP_PTR;
1418 		break;
1419 	case PCIM_HDRTYPE_CARDBUS:
1420 		ptr = PCIR_CAP_PTR_2;
1421 		break;
1422 	default:
1423 		/* XXX: panic? */
1424 		return (ENXIO);		/* no extended capabilities support */
1425 	}
1426 	ptr = pci_read_config(child, ptr, 1);
1427 
1428 	/*
1429 	 * Traverse the capabilities list.
1430 	 */
1431 	while (ptr != 0) {
1432 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1433 			if (capreg != NULL)
1434 				*capreg = ptr;
1435 			return (0);
1436 		}
1437 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1438 	}
1439 
1440 	return (ENOENT);
1441 }
1442 
1443 /*
1444  * Find the requested extended capability and return the offset in
1445  * configuration space via the pointer provided.  The function returns
1446  * 0 on success and an error code otherwise.
1447  */
1448 int
1449 pci_find_extcap_method(device_t dev, device_t child, int capability,
1450     int *capreg)
1451 {
1452 	struct pci_devinfo *dinfo = device_get_ivars(child);
1453 	pcicfgregs *cfg = &dinfo->cfg;
1454 	uint32_t ecap;
1455 	uint16_t ptr;
1456 
1457 	/* Only supported for PCI-express devices. */
1458 	if (cfg->pcie.pcie_location == 0)
1459 		return (ENXIO);
1460 
1461 	ptr = PCIR_EXTCAP;
1462 	ecap = pci_read_config(child, ptr, 4);
1463 	if (ecap == 0xffffffff || ecap == 0)
1464 		return (ENOENT);
1465 	for (;;) {
1466 		if (PCI_EXTCAP_ID(ecap) == capability) {
1467 			if (capreg != NULL)
1468 				*capreg = ptr;
1469 			return (0);
1470 		}
1471 		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1472 		if (ptr == 0)
1473 			break;
1474 		ecap = pci_read_config(child, ptr, 4);
1475 	}
1476 
1477 	return (ENOENT);
1478 }
1479 
1480 /*
1481  * Support for MSI-X message interrupts.
1482  */
1483 void
1484 pci_enable_msix_method(device_t dev, device_t child, u_int index,
1485     uint64_t address, uint32_t data)
1486 {
1487 	struct pci_devinfo *dinfo = device_get_ivars(child);
1488 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1489 	uint32_t offset;
1490 
1491 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1492 	offset = msix->msix_table_offset + index * 16;
1493 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1494 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1495 	bus_write_4(msix->msix_table_res, offset + 8, data);
1496 
1497 	/* Enable MSI -> HT mapping. */
1498 	pci_ht_map_msi(child, address);
1499 }
1500 
1501 void
1502 pci_mask_msix(device_t dev, u_int index)
1503 {
1504 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1505 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1506 	uint32_t offset, val;
1507 
1508 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1509 	offset = msix->msix_table_offset + index * 16 + 12;
1510 	val = bus_read_4(msix->msix_table_res, offset);
1511 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1512 		val |= PCIM_MSIX_VCTRL_MASK;
1513 		bus_write_4(msix->msix_table_res, offset, val);
1514 	}
1515 }
1516 
1517 void
1518 pci_unmask_msix(device_t dev, u_int index)
1519 {
1520 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1521 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1522 	uint32_t offset, val;
1523 
1524 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1525 	offset = msix->msix_table_offset + index * 16 + 12;
1526 	val = bus_read_4(msix->msix_table_res, offset);
1527 	if (val & PCIM_MSIX_VCTRL_MASK) {
1528 		val &= ~PCIM_MSIX_VCTRL_MASK;
1529 		bus_write_4(msix->msix_table_res, offset, val);
1530 	}
1531 }
1532 
1533 int
1534 pci_pending_msix(device_t dev, u_int index)
1535 {
1536 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1537 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1538 	uint32_t offset, bit;
1539 
1540 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1541 	offset = msix->msix_pba_offset + (index / 32) * 4;
1542 	bit = 1 << index % 32;
1543 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1544 }
1545 
1546 /*
1547  * Restore MSI-X registers and table during resume.  If MSI-X is
1548  * enabled then walk the virtual table to restore the actual MSI-X
1549  * table.
1550  */
1551 static void
1552 pci_resume_msix(device_t dev)
1553 {
1554 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1555 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1556 	struct msix_table_entry *mte;
1557 	struct msix_vector *mv;
1558 	int i;
1559 
1560 	if (msix->msix_alloc > 0) {
1561 		/* First, mask all vectors. */
1562 		for (i = 0; i < msix->msix_msgnum; i++)
1563 			pci_mask_msix(dev, i);
1564 
1565 		/* Second, program any messages with at least one handler. */
1566 		for (i = 0; i < msix->msix_table_len; i++) {
1567 			mte = &msix->msix_table[i];
1568 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1569 				continue;
1570 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1571 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1572 			pci_unmask_msix(dev, i);
1573 		}
1574 	}
1575 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1576 	    msix->msix_ctrl, 2);
1577 }
1578 
1579 /*
1580  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1581  * returned in *count.  After this function returns, each message will be
1582  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1583  */
1584 int
1585 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1586 {
1587 	struct pci_devinfo *dinfo = device_get_ivars(child);
1588 	pcicfgregs *cfg = &dinfo->cfg;
1589 	struct resource_list_entry *rle;
1590 	int actual, error, i, irq, max;
1591 
1592 	/* Don't let count == 0 get us into trouble. */
1593 	if (*count == 0)
1594 		return (EINVAL);
1595 
1596 	/* If rid 0 is allocated, then fail. */
1597 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1598 	if (rle != NULL && rle->res != NULL)
1599 		return (ENXIO);
1600 
1601 	/* Already have allocated messages? */
1602 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1603 		return (ENXIO);
1604 
1605 	/* If MSI-X is blacklisted for this system, fail. */
1606 	if (pci_msix_blacklisted())
1607 		return (ENXIO);
1608 
1609 	/* MSI-X capability present? */
1610 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1611 		return (ENODEV);
1612 
1613 	/* Make sure the appropriate BARs are mapped. */
1614 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1615 	    cfg->msix.msix_table_bar);
1616 	if (rle == NULL || rle->res == NULL ||
1617 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1618 		return (ENXIO);
1619 	cfg->msix.msix_table_res = rle->res;
1620 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1621 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1622 		    cfg->msix.msix_pba_bar);
1623 		if (rle == NULL || rle->res == NULL ||
1624 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1625 			return (ENXIO);
1626 	}
1627 	cfg->msix.msix_pba_res = rle->res;
1628 
1629 	if (bootverbose)
1630 		device_printf(child,
1631 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1632 		    *count, cfg->msix.msix_msgnum);
1633 	max = min(*count, cfg->msix.msix_msgnum);
1634 	for (i = 0; i < max; i++) {
1635 		/* Allocate a message. */
1636 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1637 		if (error) {
1638 			if (i == 0)
1639 				return (error);
1640 			break;
1641 		}
1642 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1643 		    irq, 1);
1644 	}
1645 	actual = i;
1646 
1647 	if (bootverbose) {
1648 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1649 		if (actual == 1)
1650 			device_printf(child, "using IRQ %ju for MSI-X\n",
1651 			    rle->start);
1652 		else {
1653 			int run;
1654 
1655 			/*
1656 			 * Be fancy and try to print contiguous runs of
1657 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1658 			 * 'run' is true if we are in a range.
1659 			 */
1660 			device_printf(child, "using IRQs %ju", rle->start);
1661 			irq = rle->start;
1662 			run = 0;
1663 			for (i = 1; i < actual; i++) {
1664 				rle = resource_list_find(&dinfo->resources,
1665 				    SYS_RES_IRQ, i + 1);
1666 
1667 				/* Still in a run? */
1668 				if (rle->start == irq + 1) {
1669 					run = 1;
1670 					irq++;
1671 					continue;
1672 				}
1673 
1674 				/* Finish previous range. */
1675 				if (run) {
1676 					printf("-%d", irq);
1677 					run = 0;
1678 				}
1679 
1680 				/* Start new range. */
1681 				printf(",%ju", rle->start);
1682 				irq = rle->start;
1683 			}
1684 
1685 			/* Unfinished range? */
1686 			if (run)
1687 				printf("-%d", irq);
1688 			printf(" for MSI-X\n");
1689 		}
1690 	}
1691 
1692 	/* Mask all vectors. */
1693 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1694 		pci_mask_msix(child, i);
1695 
1696 	/* Allocate and initialize vector data and virtual table. */
1697 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1698 	    M_DEVBUF, M_WAITOK | M_ZERO);
1699 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1700 	    M_DEVBUF, M_WAITOK | M_ZERO);
1701 	for (i = 0; i < actual; i++) {
1702 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1703 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1704 		cfg->msix.msix_table[i].mte_vector = i + 1;
1705 	}
1706 
1707 	/* Update control register to enable MSI-X. */
1708 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1709 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1710 	    cfg->msix.msix_ctrl, 2);
1711 
1712 	/* Update counts of alloc'd messages. */
1713 	cfg->msix.msix_alloc = actual;
1714 	cfg->msix.msix_table_len = actual;
1715 	*count = actual;
1716 	return (0);
1717 }
1718 
1719 /*
1720  * By default, pci_alloc_msix() will assign the allocated IRQ
1721  * resources consecutively to the first N messages in the MSI-X table.
1722  * However, device drivers may want to use different layouts if they
1723  * either receive fewer messages than they asked for, or they wish to
1724  * populate the MSI-X table sparsely.  This method allows the driver
1725  * to specify what layout it wants.  It must be called after a
1726  * successful pci_alloc_msix() but before any of the associated
1727  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1728  *
1729  * The 'vectors' array contains 'count' message vectors.  The array
1730  * maps directly to the MSI-X table in that index 0 in the array
1731  * specifies the vector for the first message in the MSI-X table, etc.
1732  * The vector value in each array index can either be 0 to indicate
1733  * that no vector should be assigned to a message slot, or it can be a
1734  * number from 1 to N (where N is the count returned from a
1735  * succcessful call to pci_alloc_msix()) to indicate which message
1736  * vector (IRQ) to be used for the corresponding message.
1737  *
1738  * On successful return, each message with a non-zero vector will have
1739  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1740  * 1.  Additionally, if any of the IRQs allocated via the previous
1741  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1742  * will be freed back to the system automatically.
1743  *
1744  * For example, suppose a driver has a MSI-X table with 6 messages and
1745  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1746  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1747  * C.  After the call to pci_alloc_msix(), the device will be setup to
1748  * have an MSI-X table of ABC--- (where - means no vector assigned).
1749  * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1750  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1751  * be freed back to the system.  This device will also have valid
1752  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1753  *
1754  * In any case, the SYS_RES_IRQ rid X will always map to the message
1755  * at MSI-X table index X - 1 and will only be valid if a vector is
1756  * assigned to that table entry.
1757  */
1758 int
1759 pci_remap_msix_method(device_t dev, device_t child, int count,
1760     const u_int *vectors)
1761 {
1762 	struct pci_devinfo *dinfo = device_get_ivars(child);
1763 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1764 	struct resource_list_entry *rle;
1765 	int i, irq, j, *used;
1766 
1767 	/*
1768 	 * Have to have at least one message in the table but the
1769 	 * table can't be bigger than the actual MSI-X table in the
1770 	 * device.
1771 	 */
1772 	if (count == 0 || count > msix->msix_msgnum)
1773 		return (EINVAL);
1774 
1775 	/* Sanity check the vectors. */
1776 	for (i = 0; i < count; i++)
1777 		if (vectors[i] > msix->msix_alloc)
1778 			return (EINVAL);
1779 
1780 	/*
1781 	 * Make sure there aren't any holes in the vectors to be used.
1782 	 * It's a big pain to support it, and it doesn't really make
1783 	 * sense anyway.  Also, at least one vector must be used.
1784 	 */
1785 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1786 	    M_ZERO);
1787 	for (i = 0; i < count; i++)
1788 		if (vectors[i] != 0)
1789 			used[vectors[i] - 1] = 1;
1790 	for (i = 0; i < msix->msix_alloc - 1; i++)
1791 		if (used[i] == 0 && used[i + 1] == 1) {
1792 			free(used, M_DEVBUF);
1793 			return (EINVAL);
1794 		}
1795 	if (used[0] != 1) {
1796 		free(used, M_DEVBUF);
1797 		return (EINVAL);
1798 	}
1799 
1800 	/* Make sure none of the resources are allocated. */
1801 	for (i = 0; i < msix->msix_table_len; i++) {
1802 		if (msix->msix_table[i].mte_vector == 0)
1803 			continue;
1804 		if (msix->msix_table[i].mte_handlers > 0) {
1805 			free(used, M_DEVBUF);
1806 			return (EBUSY);
1807 		}
1808 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1809 		KASSERT(rle != NULL, ("missing resource"));
1810 		if (rle->res != NULL) {
1811 			free(used, M_DEVBUF);
1812 			return (EBUSY);
1813 		}
1814 	}
1815 
1816 	/* Free the existing resource list entries. */
1817 	for (i = 0; i < msix->msix_table_len; i++) {
1818 		if (msix->msix_table[i].mte_vector == 0)
1819 			continue;
1820 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1821 	}
1822 
1823 	/*
1824 	 * Build the new virtual table keeping track of which vectors are
1825 	 * used.
1826 	 */
1827 	free(msix->msix_table, M_DEVBUF);
1828 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1829 	    M_DEVBUF, M_WAITOK | M_ZERO);
1830 	for (i = 0; i < count; i++)
1831 		msix->msix_table[i].mte_vector = vectors[i];
1832 	msix->msix_table_len = count;
1833 
1834 	/* Free any unused IRQs and resize the vectors array if necessary. */
1835 	j = msix->msix_alloc - 1;
1836 	if (used[j] == 0) {
1837 		struct msix_vector *vec;
1838 
1839 		while (used[j] == 0) {
1840 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1841 			    msix->msix_vectors[j].mv_irq);
1842 			j--;
1843 		}
1844 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1845 		    M_WAITOK);
1846 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1847 		    (j + 1));
1848 		free(msix->msix_vectors, M_DEVBUF);
1849 		msix->msix_vectors = vec;
1850 		msix->msix_alloc = j + 1;
1851 	}
1852 	free(used, M_DEVBUF);
1853 
1854 	/* Map the IRQs onto the rids. */
1855 	for (i = 0; i < count; i++) {
1856 		if (vectors[i] == 0)
1857 			continue;
1858 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1859 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1860 		    irq, 1);
1861 	}
1862 
1863 	if (bootverbose) {
1864 		device_printf(child, "Remapped MSI-X IRQs as: ");
1865 		for (i = 0; i < count; i++) {
1866 			if (i != 0)
1867 				printf(", ");
1868 			if (vectors[i] == 0)
1869 				printf("---");
1870 			else
1871 				printf("%d",
1872 				    msix->msix_vectors[vectors[i]].mv_irq);
1873 		}
1874 		printf("\n");
1875 	}
1876 
1877 	return (0);
1878 }
1879 
1880 static int
1881 pci_release_msix(device_t dev, device_t child)
1882 {
1883 	struct pci_devinfo *dinfo = device_get_ivars(child);
1884 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1885 	struct resource_list_entry *rle;
1886 	int i;
1887 
1888 	/* Do we have any messages to release? */
1889 	if (msix->msix_alloc == 0)
1890 		return (ENODEV);
1891 
1892 	/* Make sure none of the resources are allocated. */
1893 	for (i = 0; i < msix->msix_table_len; i++) {
1894 		if (msix->msix_table[i].mte_vector == 0)
1895 			continue;
1896 		if (msix->msix_table[i].mte_handlers > 0)
1897 			return (EBUSY);
1898 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1899 		KASSERT(rle != NULL, ("missing resource"));
1900 		if (rle->res != NULL)
1901 			return (EBUSY);
1902 	}
1903 
1904 	/* Update control register to disable MSI-X. */
1905 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1906 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1907 	    msix->msix_ctrl, 2);
1908 
1909 	/* Free the resource list entries. */
1910 	for (i = 0; i < msix->msix_table_len; i++) {
1911 		if (msix->msix_table[i].mte_vector == 0)
1912 			continue;
1913 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1914 	}
1915 	free(msix->msix_table, M_DEVBUF);
1916 	msix->msix_table_len = 0;
1917 
1918 	/* Release the IRQs. */
1919 	for (i = 0; i < msix->msix_alloc; i++)
1920 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1921 		    msix->msix_vectors[i].mv_irq);
1922 	free(msix->msix_vectors, M_DEVBUF);
1923 	msix->msix_alloc = 0;
1924 	return (0);
1925 }
1926 
1927 /*
1928  * Return the max supported MSI-X messages this device supports.
1929  * Basically, assuming the MD code can alloc messages, this function
1930  * should return the maximum value that pci_alloc_msix() can return.
1931  * Thus, it is subject to the tunables, etc.
1932  */
1933 int
1934 pci_msix_count_method(device_t dev, device_t child)
1935 {
1936 	struct pci_devinfo *dinfo = device_get_ivars(child);
1937 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1938 
1939 	if (pci_do_msix && msix->msix_location != 0)
1940 		return (msix->msix_msgnum);
1941 	return (0);
1942 }
1943 
1944 int
1945 pci_msix_pba_bar_method(device_t dev, device_t child)
1946 {
1947 	struct pci_devinfo *dinfo = device_get_ivars(child);
1948 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1949 
1950 	if (pci_do_msix && msix->msix_location != 0)
1951 		return (msix->msix_pba_bar);
1952 	return (-1);
1953 }
1954 
1955 int
1956 pci_msix_table_bar_method(device_t dev, device_t child)
1957 {
1958 	struct pci_devinfo *dinfo = device_get_ivars(child);
1959 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1960 
1961 	if (pci_do_msix && msix->msix_location != 0)
1962 		return (msix->msix_table_bar);
1963 	return (-1);
1964 }
1965 
1966 /*
1967  * HyperTransport MSI mapping control
1968  */
1969 void
1970 pci_ht_map_msi(device_t dev, uint64_t addr)
1971 {
1972 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1973 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1974 
1975 	if (!ht->ht_msimap)
1976 		return;
1977 
1978 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1979 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1980 		/* Enable MSI -> HT mapping. */
1981 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1982 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1983 		    ht->ht_msictrl, 2);
1984 	}
1985 
1986 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1987 		/* Disable MSI -> HT mapping. */
1988 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1989 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1990 		    ht->ht_msictrl, 2);
1991 	}
1992 }
1993 
1994 int
1995 pci_get_max_read_req(device_t dev)
1996 {
1997 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1998 	int cap;
1999 	uint16_t val;
2000 
2001 	cap = dinfo->cfg.pcie.pcie_location;
2002 	if (cap == 0)
2003 		return (0);
2004 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2005 	val &= PCIEM_CTL_MAX_READ_REQUEST;
2006 	val >>= 12;
2007 	return (1 << (val + 7));
2008 }
2009 
2010 int
2011 pci_set_max_read_req(device_t dev, int size)
2012 {
2013 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2014 	int cap;
2015 	uint16_t val;
2016 
2017 	cap = dinfo->cfg.pcie.pcie_location;
2018 	if (cap == 0)
2019 		return (0);
2020 	if (size < 128)
2021 		size = 128;
2022 	if (size > 4096)
2023 		size = 4096;
2024 	size = (1 << (fls(size) - 1));
2025 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2026 	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
2027 	val |= (fls(size) - 8) << 12;
2028 	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
2029 	return (size);
2030 }
2031 
2032 uint32_t
2033 pcie_read_config(device_t dev, int reg, int width)
2034 {
2035 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2036 	int cap;
2037 
2038 	cap = dinfo->cfg.pcie.pcie_location;
2039 	if (cap == 0) {
2040 		if (width == 2)
2041 			return (0xffff);
2042 		return (0xffffffff);
2043 	}
2044 
2045 	return (pci_read_config(dev, cap + reg, width));
2046 }
2047 
2048 void
2049 pcie_write_config(device_t dev, int reg, uint32_t value, int width)
2050 {
2051 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2052 	int cap;
2053 
2054 	cap = dinfo->cfg.pcie.pcie_location;
2055 	if (cap == 0)
2056 		return;
2057 	pci_write_config(dev, cap + reg, value, width);
2058 }
2059 
2060 /*
2061  * Adjusts a PCI-e capability register by clearing the bits in mask
2062  * and setting the bits in (value & mask).  Bits not set in mask are
2063  * not adjusted.
2064  *
2065  * Returns the old value on success or all ones on failure.
2066  */
2067 uint32_t
2068 pcie_adjust_config(device_t dev, int reg, uint32_t mask, uint32_t value,
2069     int width)
2070 {
2071 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2072 	uint32_t old, new;
2073 	int cap;
2074 
2075 	cap = dinfo->cfg.pcie.pcie_location;
2076 	if (cap == 0) {
2077 		if (width == 2)
2078 			return (0xffff);
2079 		return (0xffffffff);
2080 	}
2081 
2082 	old = pci_read_config(dev, cap + reg, width);
2083 	new = old & ~mask;
2084 	new |= (value & mask);
2085 	pci_write_config(dev, cap + reg, new, width);
2086 	return (old);
2087 }
2088 
2089 /*
2090  * Support for MSI message signalled interrupts.
2091  */
2092 void
2093 pci_enable_msi_method(device_t dev, device_t child, uint64_t address,
2094     uint16_t data)
2095 {
2096 	struct pci_devinfo *dinfo = device_get_ivars(child);
2097 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2098 
2099 	/* Write data and address values. */
2100 	pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR,
2101 	    address & 0xffffffff, 4);
2102 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2103 		pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR_HIGH,
2104 		    address >> 32, 4);
2105 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA_64BIT,
2106 		    data, 2);
2107 	} else
2108 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA, data,
2109 		    2);
2110 
2111 	/* Enable MSI in the control register. */
2112 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
2113 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2114 	    msi->msi_ctrl, 2);
2115 
2116 	/* Enable MSI -> HT mapping. */
2117 	pci_ht_map_msi(child, address);
2118 }
2119 
2120 void
2121 pci_disable_msi_method(device_t dev, device_t child)
2122 {
2123 	struct pci_devinfo *dinfo = device_get_ivars(child);
2124 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2125 
2126 	/* Disable MSI -> HT mapping. */
2127 	pci_ht_map_msi(child, 0);
2128 
2129 	/* Disable MSI in the control register. */
2130 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
2131 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2132 	    msi->msi_ctrl, 2);
2133 }
2134 
2135 /*
2136  * Restore MSI registers during resume.  If MSI is enabled then
2137  * restore the data and address registers in addition to the control
2138  * register.
2139  */
2140 static void
2141 pci_resume_msi(device_t dev)
2142 {
2143 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2144 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2145 	uint64_t address;
2146 	uint16_t data;
2147 
2148 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
2149 		address = msi->msi_addr;
2150 		data = msi->msi_data;
2151 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
2152 		    address & 0xffffffff, 4);
2153 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2154 			pci_write_config(dev, msi->msi_location +
2155 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
2156 			pci_write_config(dev, msi->msi_location +
2157 			    PCIR_MSI_DATA_64BIT, data, 2);
2158 		} else
2159 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
2160 			    data, 2);
2161 	}
2162 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
2163 	    2);
2164 }
2165 
2166 static int
2167 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
2168 {
2169 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2170 	pcicfgregs *cfg = &dinfo->cfg;
2171 	struct resource_list_entry *rle;
2172 	struct msix_table_entry *mte;
2173 	struct msix_vector *mv;
2174 	uint64_t addr;
2175 	uint32_t data;
2176 	int error, i, j;
2177 
2178 	/*
2179 	 * Handle MSI first.  We try to find this IRQ among our list
2180 	 * of MSI IRQs.  If we find it, we request updated address and
2181 	 * data registers and apply the results.
2182 	 */
2183 	if (cfg->msi.msi_alloc > 0) {
2184 
2185 		/* If we don't have any active handlers, nothing to do. */
2186 		if (cfg->msi.msi_handlers == 0)
2187 			return (0);
2188 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
2189 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
2190 			    i + 1);
2191 			if (rle->start == irq) {
2192 				error = PCIB_MAP_MSI(device_get_parent(bus),
2193 				    dev, irq, &addr, &data);
2194 				if (error)
2195 					return (error);
2196 				pci_disable_msi(dev);
2197 				dinfo->cfg.msi.msi_addr = addr;
2198 				dinfo->cfg.msi.msi_data = data;
2199 				pci_enable_msi(dev, addr, data);
2200 				return (0);
2201 			}
2202 		}
2203 		return (ENOENT);
2204 	}
2205 
2206 	/*
2207 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
2208 	 * we request the updated mapping info.  If that works, we go
2209 	 * through all the slots that use this IRQ and update them.
2210 	 */
2211 	if (cfg->msix.msix_alloc > 0) {
2212 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
2213 			mv = &cfg->msix.msix_vectors[i];
2214 			if (mv->mv_irq == irq) {
2215 				error = PCIB_MAP_MSI(device_get_parent(bus),
2216 				    dev, irq, &addr, &data);
2217 				if (error)
2218 					return (error);
2219 				mv->mv_address = addr;
2220 				mv->mv_data = data;
2221 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
2222 					mte = &cfg->msix.msix_table[j];
2223 					if (mte->mte_vector != i + 1)
2224 						continue;
2225 					if (mte->mte_handlers == 0)
2226 						continue;
2227 					pci_mask_msix(dev, j);
2228 					pci_enable_msix(dev, j, addr, data);
2229 					pci_unmask_msix(dev, j);
2230 				}
2231 			}
2232 		}
2233 		return (ENOENT);
2234 	}
2235 
2236 	return (ENOENT);
2237 }
2238 
2239 /*
2240  * Returns true if the specified device is blacklisted because MSI
2241  * doesn't work.
2242  */
2243 int
2244 pci_msi_device_blacklisted(device_t dev)
2245 {
2246 
2247 	if (!pci_honor_msi_blacklist)
2248 		return (0);
2249 
2250 	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2251 }
2252 
2253 /*
2254  * Determine if MSI is blacklisted globally on this system.  Currently,
2255  * we just check for blacklisted chipsets as represented by the
2256  * host-PCI bridge at device 0:0:0.  In the future, it may become
2257  * necessary to check other system attributes, such as the kenv values
2258  * that give the motherboard manufacturer and model number.
2259  */
2260 static int
2261 pci_msi_blacklisted(void)
2262 {
2263 	device_t dev;
2264 
2265 	if (!pci_honor_msi_blacklist)
2266 		return (0);
2267 
2268 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2269 	if (!(pcie_chipset || pcix_chipset)) {
2270 		if (vm_guest != VM_GUEST_NO) {
2271 			/*
2272 			 * Whitelist older chipsets in virtual
2273 			 * machines known to support MSI.
2274 			 */
2275 			dev = pci_find_bsf(0, 0, 0);
2276 			if (dev != NULL)
2277 				return (!pci_has_quirk(pci_get_devid(dev),
2278 					PCI_QUIRK_ENABLE_MSI_VM));
2279 		}
2280 		return (1);
2281 	}
2282 
2283 	dev = pci_find_bsf(0, 0, 0);
2284 	if (dev != NULL)
2285 		return (pci_msi_device_blacklisted(dev));
2286 	return (0);
2287 }
2288 
2289 /*
2290  * Returns true if the specified device is blacklisted because MSI-X
2291  * doesn't work.  Note that this assumes that if MSI doesn't work,
2292  * MSI-X doesn't either.
2293  */
2294 int
2295 pci_msix_device_blacklisted(device_t dev)
2296 {
2297 
2298 	if (!pci_honor_msi_blacklist)
2299 		return (0);
2300 
2301 	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2302 		return (1);
2303 
2304 	return (pci_msi_device_blacklisted(dev));
2305 }
2306 
2307 /*
2308  * Determine if MSI-X is blacklisted globally on this system.  If MSI
2309  * is blacklisted, assume that MSI-X is as well.  Check for additional
2310  * chipsets where MSI works but MSI-X does not.
2311  */
2312 static int
2313 pci_msix_blacklisted(void)
2314 {
2315 	device_t dev;
2316 
2317 	if (!pci_honor_msi_blacklist)
2318 		return (0);
2319 
2320 	dev = pci_find_bsf(0, 0, 0);
2321 	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2322 	    PCI_QUIRK_DISABLE_MSIX))
2323 		return (1);
2324 
2325 	return (pci_msi_blacklisted());
2326 }
2327 
2328 /*
2329  * Attempt to allocate *count MSI messages.  The actual number allocated is
2330  * returned in *count.  After this function returns, each message will be
2331  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2332  */
2333 int
2334 pci_alloc_msi_method(device_t dev, device_t child, int *count)
2335 {
2336 	struct pci_devinfo *dinfo = device_get_ivars(child);
2337 	pcicfgregs *cfg = &dinfo->cfg;
2338 	struct resource_list_entry *rle;
2339 	int actual, error, i, irqs[32];
2340 	uint16_t ctrl;
2341 
2342 	/* Don't let count == 0 get us into trouble. */
2343 	if (*count == 0)
2344 		return (EINVAL);
2345 
2346 	/* If rid 0 is allocated, then fail. */
2347 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2348 	if (rle != NULL && rle->res != NULL)
2349 		return (ENXIO);
2350 
2351 	/* Already have allocated messages? */
2352 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2353 		return (ENXIO);
2354 
2355 	/* If MSI is blacklisted for this system, fail. */
2356 	if (pci_msi_blacklisted())
2357 		return (ENXIO);
2358 
2359 	/* MSI capability present? */
2360 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2361 		return (ENODEV);
2362 
2363 	if (bootverbose)
2364 		device_printf(child,
2365 		    "attempting to allocate %d MSI vectors (%d supported)\n",
2366 		    *count, cfg->msi.msi_msgnum);
2367 
2368 	/* Don't ask for more than the device supports. */
2369 	actual = min(*count, cfg->msi.msi_msgnum);
2370 
2371 	/* Don't ask for more than 32 messages. */
2372 	actual = min(actual, 32);
2373 
2374 	/* MSI requires power of 2 number of messages. */
2375 	if (!powerof2(actual))
2376 		return (EINVAL);
2377 
2378 	for (;;) {
2379 		/* Try to allocate N messages. */
2380 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2381 		    actual, irqs);
2382 		if (error == 0)
2383 			break;
2384 		if (actual == 1)
2385 			return (error);
2386 
2387 		/* Try N / 2. */
2388 		actual >>= 1;
2389 	}
2390 
2391 	/*
2392 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2393 	 * resources in the irqs[] array, so add new resources
2394 	 * starting at rid 1.
2395 	 */
2396 	for (i = 0; i < actual; i++)
2397 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2398 		    irqs[i], irqs[i], 1);
2399 
2400 	if (bootverbose) {
2401 		if (actual == 1)
2402 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2403 		else {
2404 			int run;
2405 
2406 			/*
2407 			 * Be fancy and try to print contiguous runs
2408 			 * of IRQ values as ranges.  'run' is true if
2409 			 * we are in a range.
2410 			 */
2411 			device_printf(child, "using IRQs %d", irqs[0]);
2412 			run = 0;
2413 			for (i = 1; i < actual; i++) {
2414 
2415 				/* Still in a run? */
2416 				if (irqs[i] == irqs[i - 1] + 1) {
2417 					run = 1;
2418 					continue;
2419 				}
2420 
2421 				/* Finish previous range. */
2422 				if (run) {
2423 					printf("-%d", irqs[i - 1]);
2424 					run = 0;
2425 				}
2426 
2427 				/* Start new range. */
2428 				printf(",%d", irqs[i]);
2429 			}
2430 
2431 			/* Unfinished range? */
2432 			if (run)
2433 				printf("-%d", irqs[actual - 1]);
2434 			printf(" for MSI\n");
2435 		}
2436 	}
2437 
2438 	/* Update control register with actual count. */
2439 	ctrl = cfg->msi.msi_ctrl;
2440 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2441 	ctrl |= (ffs(actual) - 1) << 4;
2442 	cfg->msi.msi_ctrl = ctrl;
2443 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2444 
2445 	/* Update counts of alloc'd messages. */
2446 	cfg->msi.msi_alloc = actual;
2447 	cfg->msi.msi_handlers = 0;
2448 	*count = actual;
2449 	return (0);
2450 }
2451 
2452 /* Release the MSI messages associated with this device. */
2453 int
2454 pci_release_msi_method(device_t dev, device_t child)
2455 {
2456 	struct pci_devinfo *dinfo = device_get_ivars(child);
2457 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2458 	struct resource_list_entry *rle;
2459 	int error, i, irqs[32];
2460 
2461 	/* Try MSI-X first. */
2462 	error = pci_release_msix(dev, child);
2463 	if (error != ENODEV)
2464 		return (error);
2465 
2466 	/* Do we have any messages to release? */
2467 	if (msi->msi_alloc == 0)
2468 		return (ENODEV);
2469 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2470 
2471 	/* Make sure none of the resources are allocated. */
2472 	if (msi->msi_handlers > 0)
2473 		return (EBUSY);
2474 	for (i = 0; i < msi->msi_alloc; i++) {
2475 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2476 		KASSERT(rle != NULL, ("missing MSI resource"));
2477 		if (rle->res != NULL)
2478 			return (EBUSY);
2479 		irqs[i] = rle->start;
2480 	}
2481 
2482 	/* Update control register with 0 count. */
2483 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2484 	    ("%s: MSI still enabled", __func__));
2485 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2486 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2487 	    msi->msi_ctrl, 2);
2488 
2489 	/* Release the messages. */
2490 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2491 	for (i = 0; i < msi->msi_alloc; i++)
2492 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2493 
2494 	/* Update alloc count. */
2495 	msi->msi_alloc = 0;
2496 	msi->msi_addr = 0;
2497 	msi->msi_data = 0;
2498 	return (0);
2499 }
2500 
2501 /*
2502  * Return the max supported MSI messages this device supports.
2503  * Basically, assuming the MD code can alloc messages, this function
2504  * should return the maximum value that pci_alloc_msi() can return.
2505  * Thus, it is subject to the tunables, etc.
2506  */
2507 int
2508 pci_msi_count_method(device_t dev, device_t child)
2509 {
2510 	struct pci_devinfo *dinfo = device_get_ivars(child);
2511 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2512 
2513 	if (pci_do_msi && msi->msi_location != 0)
2514 		return (msi->msi_msgnum);
2515 	return (0);
2516 }
2517 
2518 /* free pcicfgregs structure and all depending data structures */
2519 
2520 int
2521 pci_freecfg(struct pci_devinfo *dinfo)
2522 {
2523 	struct devlist *devlist_head;
2524 	struct pci_map *pm, *next;
2525 	int i;
2526 
2527 	devlist_head = &pci_devq;
2528 
2529 	if (dinfo->cfg.vpd.vpd_reg) {
2530 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2531 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2532 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2533 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2534 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2535 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2536 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2537 	}
2538 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2539 		free(pm, M_DEVBUF);
2540 	}
2541 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2542 	free(dinfo, M_DEVBUF);
2543 
2544 	/* increment the generation count */
2545 	pci_generation++;
2546 
2547 	/* we're losing one device */
2548 	pci_numdevs--;
2549 	return (0);
2550 }
2551 
2552 /*
2553  * PCI power manangement
2554  */
2555 int
2556 pci_set_powerstate_method(device_t dev, device_t child, int state)
2557 {
2558 	struct pci_devinfo *dinfo = device_get_ivars(child);
2559 	pcicfgregs *cfg = &dinfo->cfg;
2560 	uint16_t status;
2561 	int oldstate, highest, delay;
2562 
2563 	if (cfg->pp.pp_cap == 0)
2564 		return (EOPNOTSUPP);
2565 
2566 	/*
2567 	 * Optimize a no state change request away.  While it would be OK to
2568 	 * write to the hardware in theory, some devices have shown odd
2569 	 * behavior when going from D3 -> D3.
2570 	 */
2571 	oldstate = pci_get_powerstate(child);
2572 	if (oldstate == state)
2573 		return (0);
2574 
2575 	/*
2576 	 * The PCI power management specification states that after a state
2577 	 * transition between PCI power states, system software must
2578 	 * guarantee a minimal delay before the function accesses the device.
2579 	 * Compute the worst case delay that we need to guarantee before we
2580 	 * access the device.  Many devices will be responsive much more
2581 	 * quickly than this delay, but there are some that don't respond
2582 	 * instantly to state changes.  Transitions to/from D3 state require
2583 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2584 	 * is done below with DELAY rather than a sleeper function because
2585 	 * this function can be called from contexts where we cannot sleep.
2586 	 */
2587 	highest = (oldstate > state) ? oldstate : state;
2588 	if (highest == PCI_POWERSTATE_D3)
2589 	    delay = 10000;
2590 	else if (highest == PCI_POWERSTATE_D2)
2591 	    delay = 200;
2592 	else
2593 	    delay = 0;
2594 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2595 	    & ~PCIM_PSTAT_DMASK;
2596 	switch (state) {
2597 	case PCI_POWERSTATE_D0:
2598 		status |= PCIM_PSTAT_D0;
2599 		break;
2600 	case PCI_POWERSTATE_D1:
2601 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2602 			return (EOPNOTSUPP);
2603 		status |= PCIM_PSTAT_D1;
2604 		break;
2605 	case PCI_POWERSTATE_D2:
2606 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2607 			return (EOPNOTSUPP);
2608 		status |= PCIM_PSTAT_D2;
2609 		break;
2610 	case PCI_POWERSTATE_D3:
2611 		status |= PCIM_PSTAT_D3;
2612 		break;
2613 	default:
2614 		return (EINVAL);
2615 	}
2616 
2617 	if (bootverbose)
2618 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2619 		    state);
2620 
2621 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2622 	if (delay)
2623 		DELAY(delay);
2624 	return (0);
2625 }
2626 
2627 int
2628 pci_get_powerstate_method(device_t dev, device_t child)
2629 {
2630 	struct pci_devinfo *dinfo = device_get_ivars(child);
2631 	pcicfgregs *cfg = &dinfo->cfg;
2632 	uint16_t status;
2633 	int result;
2634 
2635 	if (cfg->pp.pp_cap != 0) {
2636 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2637 		switch (status & PCIM_PSTAT_DMASK) {
2638 		case PCIM_PSTAT_D0:
2639 			result = PCI_POWERSTATE_D0;
2640 			break;
2641 		case PCIM_PSTAT_D1:
2642 			result = PCI_POWERSTATE_D1;
2643 			break;
2644 		case PCIM_PSTAT_D2:
2645 			result = PCI_POWERSTATE_D2;
2646 			break;
2647 		case PCIM_PSTAT_D3:
2648 			result = PCI_POWERSTATE_D3;
2649 			break;
2650 		default:
2651 			result = PCI_POWERSTATE_UNKNOWN;
2652 			break;
2653 		}
2654 	} else {
2655 		/* No support, device is always at D0 */
2656 		result = PCI_POWERSTATE_D0;
2657 	}
2658 	return (result);
2659 }
2660 
2661 /*
2662  * Some convenience functions for PCI device drivers.
2663  */
2664 
2665 static __inline void
2666 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2667 {
2668 	uint16_t	command;
2669 
2670 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2671 	command |= bit;
2672 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2673 }
2674 
2675 static __inline void
2676 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2677 {
2678 	uint16_t	command;
2679 
2680 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2681 	command &= ~bit;
2682 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2683 }
2684 
2685 int
2686 pci_enable_busmaster_method(device_t dev, device_t child)
2687 {
2688 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2689 	return (0);
2690 }
2691 
2692 int
2693 pci_disable_busmaster_method(device_t dev, device_t child)
2694 {
2695 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2696 	return (0);
2697 }
2698 
2699 int
2700 pci_enable_io_method(device_t dev, device_t child, int space)
2701 {
2702 	uint16_t bit;
2703 
2704 	switch(space) {
2705 	case SYS_RES_IOPORT:
2706 		bit = PCIM_CMD_PORTEN;
2707 		break;
2708 	case SYS_RES_MEMORY:
2709 		bit = PCIM_CMD_MEMEN;
2710 		break;
2711 	default:
2712 		return (EINVAL);
2713 	}
2714 	pci_set_command_bit(dev, child, bit);
2715 	return (0);
2716 }
2717 
2718 int
2719 pci_disable_io_method(device_t dev, device_t child, int space)
2720 {
2721 	uint16_t bit;
2722 
2723 	switch(space) {
2724 	case SYS_RES_IOPORT:
2725 		bit = PCIM_CMD_PORTEN;
2726 		break;
2727 	case SYS_RES_MEMORY:
2728 		bit = PCIM_CMD_MEMEN;
2729 		break;
2730 	default:
2731 		return (EINVAL);
2732 	}
2733 	pci_clear_command_bit(dev, child, bit);
2734 	return (0);
2735 }
2736 
2737 /*
2738  * New style pci driver.  Parent device is either a pci-host-bridge or a
2739  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2740  */
2741 
2742 void
2743 pci_print_verbose(struct pci_devinfo *dinfo)
2744 {
2745 
2746 	if (bootverbose) {
2747 		pcicfgregs *cfg = &dinfo->cfg;
2748 
2749 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2750 		    cfg->vendor, cfg->device, cfg->revid);
2751 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2752 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2753 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2754 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2755 		    cfg->mfdev);
2756 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2757 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2758 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2759 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2760 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2761 		if (cfg->intpin > 0)
2762 			printf("\tintpin=%c, irq=%d\n",
2763 			    cfg->intpin +'a' -1, cfg->intline);
2764 		if (cfg->pp.pp_cap) {
2765 			uint16_t status;
2766 
2767 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2768 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2769 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2770 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2771 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2772 			    status & PCIM_PSTAT_DMASK);
2773 		}
2774 		if (cfg->msi.msi_location) {
2775 			int ctrl;
2776 
2777 			ctrl = cfg->msi.msi_ctrl;
2778 			printf("\tMSI supports %d message%s%s%s\n",
2779 			    cfg->msi.msi_msgnum,
2780 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2781 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2782 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2783 		}
2784 		if (cfg->msix.msix_location) {
2785 			printf("\tMSI-X supports %d message%s ",
2786 			    cfg->msix.msix_msgnum,
2787 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2788 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2789 				printf("in map 0x%x\n",
2790 				    cfg->msix.msix_table_bar);
2791 			else
2792 				printf("in maps 0x%x and 0x%x\n",
2793 				    cfg->msix.msix_table_bar,
2794 				    cfg->msix.msix_pba_bar);
2795 		}
2796 	}
2797 }
2798 
2799 static int
2800 pci_porten(device_t dev)
2801 {
2802 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2803 }
2804 
2805 static int
2806 pci_memen(device_t dev)
2807 {
2808 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2809 }
2810 
2811 void
2812 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp,
2813     int *bar64)
2814 {
2815 	struct pci_devinfo *dinfo;
2816 	pci_addr_t map, testval;
2817 	int ln2range;
2818 	uint16_t cmd;
2819 
2820 	/*
2821 	 * The device ROM BAR is special.  It is always a 32-bit
2822 	 * memory BAR.  Bit 0 is special and should not be set when
2823 	 * sizing the BAR.
2824 	 */
2825 	dinfo = device_get_ivars(dev);
2826 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2827 		map = pci_read_config(dev, reg, 4);
2828 		pci_write_config(dev, reg, 0xfffffffe, 4);
2829 		testval = pci_read_config(dev, reg, 4);
2830 		pci_write_config(dev, reg, map, 4);
2831 		*mapp = map;
2832 		*testvalp = testval;
2833 		if (bar64 != NULL)
2834 			*bar64 = 0;
2835 		return;
2836 	}
2837 
2838 	map = pci_read_config(dev, reg, 4);
2839 	ln2range = pci_maprange(map);
2840 	if (ln2range == 64)
2841 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2842 
2843 	/*
2844 	 * Disable decoding via the command register before
2845 	 * determining the BAR's length since we will be placing it in
2846 	 * a weird state.
2847 	 */
2848 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2849 	pci_write_config(dev, PCIR_COMMAND,
2850 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2851 
2852 	/*
2853 	 * Determine the BAR's length by writing all 1's.  The bottom
2854 	 * log_2(size) bits of the BAR will stick as 0 when we read
2855 	 * the value back.
2856 	 */
2857 	pci_write_config(dev, reg, 0xffffffff, 4);
2858 	testval = pci_read_config(dev, reg, 4);
2859 	if (ln2range == 64) {
2860 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2861 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2862 	}
2863 
2864 	/*
2865 	 * Restore the original value of the BAR.  We may have reprogrammed
2866 	 * the BAR of the low-level console device and when booting verbose,
2867 	 * we need the console device addressable.
2868 	 */
2869 	pci_write_config(dev, reg, map, 4);
2870 	if (ln2range == 64)
2871 		pci_write_config(dev, reg + 4, map >> 32, 4);
2872 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2873 
2874 	*mapp = map;
2875 	*testvalp = testval;
2876 	if (bar64 != NULL)
2877 		*bar64 = (ln2range == 64);
2878 }
2879 
2880 static void
2881 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2882 {
2883 	struct pci_devinfo *dinfo;
2884 	int ln2range;
2885 
2886 	/* The device ROM BAR is always a 32-bit memory BAR. */
2887 	dinfo = device_get_ivars(dev);
2888 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2889 		ln2range = 32;
2890 	else
2891 		ln2range = pci_maprange(pm->pm_value);
2892 	pci_write_config(dev, pm->pm_reg, base, 4);
2893 	if (ln2range == 64)
2894 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2895 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2896 	if (ln2range == 64)
2897 		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2898 		    pm->pm_reg + 4, 4) << 32;
2899 }
2900 
2901 struct pci_map *
2902 pci_find_bar(device_t dev, int reg)
2903 {
2904 	struct pci_devinfo *dinfo;
2905 	struct pci_map *pm;
2906 
2907 	dinfo = device_get_ivars(dev);
2908 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2909 		if (pm->pm_reg == reg)
2910 			return (pm);
2911 	}
2912 	return (NULL);
2913 }
2914 
2915 int
2916 pci_bar_enabled(device_t dev, struct pci_map *pm)
2917 {
2918 	struct pci_devinfo *dinfo;
2919 	uint16_t cmd;
2920 
2921 	dinfo = device_get_ivars(dev);
2922 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2923 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2924 		return (0);
2925 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2926 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2927 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2928 	else
2929 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2930 }
2931 
2932 struct pci_map *
2933 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2934 {
2935 	struct pci_devinfo *dinfo;
2936 	struct pci_map *pm, *prev;
2937 
2938 	dinfo = device_get_ivars(dev);
2939 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2940 	pm->pm_reg = reg;
2941 	pm->pm_value = value;
2942 	pm->pm_size = size;
2943 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2944 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2945 		    reg));
2946 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2947 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2948 			break;
2949 	}
2950 	if (prev != NULL)
2951 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2952 	else
2953 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2954 	return (pm);
2955 }
2956 
2957 static void
2958 pci_restore_bars(device_t dev)
2959 {
2960 	struct pci_devinfo *dinfo;
2961 	struct pci_map *pm;
2962 	int ln2range;
2963 
2964 	dinfo = device_get_ivars(dev);
2965 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2966 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2967 			ln2range = 32;
2968 		else
2969 			ln2range = pci_maprange(pm->pm_value);
2970 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2971 		if (ln2range == 64)
2972 			pci_write_config(dev, pm->pm_reg + 4,
2973 			    pm->pm_value >> 32, 4);
2974 	}
2975 }
2976 
2977 /*
2978  * Add a resource based on a pci map register. Return 1 if the map
2979  * register is a 32bit map register or 2 if it is a 64bit register.
2980  */
2981 static int
2982 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2983     int force, int prefetch)
2984 {
2985 	struct pci_map *pm;
2986 	pci_addr_t base, map, testval;
2987 	pci_addr_t start, end, count;
2988 	int barlen, basezero, flags, maprange, mapsize, type;
2989 	uint16_t cmd;
2990 	struct resource *res;
2991 
2992 	/*
2993 	 * The BAR may already exist if the device is a CardBus card
2994 	 * whose CIS is stored in this BAR.
2995 	 */
2996 	pm = pci_find_bar(dev, reg);
2997 	if (pm != NULL) {
2998 		maprange = pci_maprange(pm->pm_value);
2999 		barlen = maprange == 64 ? 2 : 1;
3000 		return (barlen);
3001 	}
3002 
3003 	pci_read_bar(dev, reg, &map, &testval, NULL);
3004 	if (PCI_BAR_MEM(map)) {
3005 		type = SYS_RES_MEMORY;
3006 		if (map & PCIM_BAR_MEM_PREFETCH)
3007 			prefetch = 1;
3008 	} else
3009 		type = SYS_RES_IOPORT;
3010 	mapsize = pci_mapsize(testval);
3011 	base = pci_mapbase(map);
3012 #ifdef __PCI_BAR_ZERO_VALID
3013 	basezero = 0;
3014 #else
3015 	basezero = base == 0;
3016 #endif
3017 	maprange = pci_maprange(map);
3018 	barlen = maprange == 64 ? 2 : 1;
3019 
3020 	/*
3021 	 * For I/O registers, if bottom bit is set, and the next bit up
3022 	 * isn't clear, we know we have a BAR that doesn't conform to the
3023 	 * spec, so ignore it.  Also, sanity check the size of the data
3024 	 * areas to the type of memory involved.  Memory must be at least
3025 	 * 16 bytes in size, while I/O ranges must be at least 4.
3026 	 */
3027 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
3028 		return (barlen);
3029 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
3030 	    (type == SYS_RES_IOPORT && mapsize < 2))
3031 		return (barlen);
3032 
3033 	/* Save a record of this BAR. */
3034 	pm = pci_add_bar(dev, reg, map, mapsize);
3035 	if (bootverbose) {
3036 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
3037 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
3038 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
3039 			printf(", port disabled\n");
3040 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
3041 			printf(", memory disabled\n");
3042 		else
3043 			printf(", enabled\n");
3044 	}
3045 
3046 	/*
3047 	 * If base is 0, then we have problems if this architecture does
3048 	 * not allow that.  It is best to ignore such entries for the
3049 	 * moment.  These will be allocated later if the driver specifically
3050 	 * requests them.  However, some removable busses look better when
3051 	 * all resources are allocated, so allow '0' to be overriden.
3052 	 *
3053 	 * Similarly treat maps whose values is the same as the test value
3054 	 * read back.  These maps have had all f's written to them by the
3055 	 * BIOS in an attempt to disable the resources.
3056 	 */
3057 	if (!force && (basezero || map == testval))
3058 		return (barlen);
3059 	if ((u_long)base != base) {
3060 		device_printf(bus,
3061 		    "pci%d:%d:%d:%d bar %#x too many address bits",
3062 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
3063 		    pci_get_function(dev), reg);
3064 		return (barlen);
3065 	}
3066 
3067 	/*
3068 	 * This code theoretically does the right thing, but has
3069 	 * undesirable side effects in some cases where peripherals
3070 	 * respond oddly to having these bits enabled.  Let the user
3071 	 * be able to turn them off (since pci_enable_io_modes is 1 by
3072 	 * default).
3073 	 */
3074 	if (pci_enable_io_modes) {
3075 		/* Turn on resources that have been left off by a lazy BIOS */
3076 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
3077 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3078 			cmd |= PCIM_CMD_PORTEN;
3079 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3080 		}
3081 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
3082 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3083 			cmd |= PCIM_CMD_MEMEN;
3084 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3085 		}
3086 	} else {
3087 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
3088 			return (barlen);
3089 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
3090 			return (barlen);
3091 	}
3092 
3093 	count = (pci_addr_t)1 << mapsize;
3094 	flags = RF_ALIGNMENT_LOG2(mapsize);
3095 	if (prefetch)
3096 		flags |= RF_PREFETCHABLE;
3097 	if (basezero || base == pci_mapbase(testval) || pci_clear_bars) {
3098 		start = 0;	/* Let the parent decide. */
3099 		end = ~0;
3100 	} else {
3101 		start = base;
3102 		end = base + count - 1;
3103 	}
3104 	resource_list_add(rl, type, reg, start, end, count);
3105 
3106 	/*
3107 	 * Try to allocate the resource for this BAR from our parent
3108 	 * so that this resource range is already reserved.  The
3109 	 * driver for this device will later inherit this resource in
3110 	 * pci_alloc_resource().
3111 	 */
3112 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
3113 	    flags);
3114 	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0)) {
3115 		/*
3116 		 * If the allocation fails, try to allocate a resource for
3117 		 * this BAR using any available range.  The firmware felt
3118 		 * it was important enough to assign a resource, so don't
3119 		 * disable decoding if we can help it.
3120 		 */
3121 		resource_list_delete(rl, type, reg);
3122 		resource_list_add(rl, type, reg, 0, ~0, count);
3123 		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0,
3124 		    count, flags);
3125 	}
3126 	if (res == NULL) {
3127 		/*
3128 		 * If the allocation fails, delete the resource list entry
3129 		 * and disable decoding for this device.
3130 		 *
3131 		 * If the driver requests this resource in the future,
3132 		 * pci_reserve_map() will try to allocate a fresh
3133 		 * resource range.
3134 		 */
3135 		resource_list_delete(rl, type, reg);
3136 		pci_disable_io(dev, type);
3137 		if (bootverbose)
3138 			device_printf(bus,
3139 			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
3140 			    pci_get_domain(dev), pci_get_bus(dev),
3141 			    pci_get_slot(dev), pci_get_function(dev), reg);
3142 	} else {
3143 		start = rman_get_start(res);
3144 		pci_write_bar(dev, pm, start);
3145 	}
3146 	return (barlen);
3147 }
3148 
3149 /*
3150  * For ATA devices we need to decide early what addressing mode to use.
3151  * Legacy demands that the primary and secondary ATA ports sits on the
3152  * same addresses that old ISA hardware did. This dictates that we use
3153  * those addresses and ignore the BAR's if we cannot set PCI native
3154  * addressing mode.
3155  */
3156 static void
3157 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
3158     uint32_t prefetchmask)
3159 {
3160 	int rid, type, progif;
3161 #if 0
3162 	/* if this device supports PCI native addressing use it */
3163 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3164 	if ((progif & 0x8a) == 0x8a) {
3165 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
3166 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
3167 			printf("Trying ATA native PCI addressing mode\n");
3168 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
3169 		}
3170 	}
3171 #endif
3172 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3173 	type = SYS_RES_IOPORT;
3174 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
3175 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
3176 		    prefetchmask & (1 << 0));
3177 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
3178 		    prefetchmask & (1 << 1));
3179 	} else {
3180 		rid = PCIR_BAR(0);
3181 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
3182 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
3183 		    0x1f7, 8, 0);
3184 		rid = PCIR_BAR(1);
3185 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
3186 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
3187 		    0x3f6, 1, 0);
3188 	}
3189 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
3190 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
3191 		    prefetchmask & (1 << 2));
3192 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
3193 		    prefetchmask & (1 << 3));
3194 	} else {
3195 		rid = PCIR_BAR(2);
3196 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
3197 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
3198 		    0x177, 8, 0);
3199 		rid = PCIR_BAR(3);
3200 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
3201 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
3202 		    0x376, 1, 0);
3203 	}
3204 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
3205 	    prefetchmask & (1 << 4));
3206 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
3207 	    prefetchmask & (1 << 5));
3208 }
3209 
3210 static void
3211 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
3212 {
3213 	struct pci_devinfo *dinfo = device_get_ivars(dev);
3214 	pcicfgregs *cfg = &dinfo->cfg;
3215 	char tunable_name[64];
3216 	int irq;
3217 
3218 	/* Has to have an intpin to have an interrupt. */
3219 	if (cfg->intpin == 0)
3220 		return;
3221 
3222 	/* Let the user override the IRQ with a tunable. */
3223 	irq = PCI_INVALID_IRQ;
3224 	snprintf(tunable_name, sizeof(tunable_name),
3225 	    "hw.pci%d.%d.%d.INT%c.irq",
3226 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
3227 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
3228 		irq = PCI_INVALID_IRQ;
3229 
3230 	/*
3231 	 * If we didn't get an IRQ via the tunable, then we either use the
3232 	 * IRQ value in the intline register or we ask the bus to route an
3233 	 * interrupt for us.  If force_route is true, then we only use the
3234 	 * value in the intline register if the bus was unable to assign an
3235 	 * IRQ.
3236 	 */
3237 	if (!PCI_INTERRUPT_VALID(irq)) {
3238 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
3239 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
3240 		if (!PCI_INTERRUPT_VALID(irq))
3241 			irq = cfg->intline;
3242 	}
3243 
3244 	/* If after all that we don't have an IRQ, just bail. */
3245 	if (!PCI_INTERRUPT_VALID(irq))
3246 		return;
3247 
3248 	/* Update the config register if it changed. */
3249 	if (irq != cfg->intline) {
3250 		cfg->intline = irq;
3251 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
3252 	}
3253 
3254 	/* Add this IRQ as rid 0 interrupt resource. */
3255 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3256 }
3257 
3258 /* Perform early OHCI takeover from SMM. */
3259 static void
3260 ohci_early_takeover(device_t self)
3261 {
3262 	struct resource *res;
3263 	uint32_t ctl;
3264 	int rid;
3265 	int i;
3266 
3267 	rid = PCIR_BAR(0);
3268 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3269 	if (res == NULL)
3270 		return;
3271 
3272 	ctl = bus_read_4(res, OHCI_CONTROL);
3273 	if (ctl & OHCI_IR) {
3274 		if (bootverbose)
3275 			printf("ohci early: "
3276 			    "SMM active, request owner change\n");
3277 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3278 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3279 			DELAY(1000);
3280 			ctl = bus_read_4(res, OHCI_CONTROL);
3281 		}
3282 		if (ctl & OHCI_IR) {
3283 			if (bootverbose)
3284 				printf("ohci early: "
3285 				    "SMM does not respond, resetting\n");
3286 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3287 		}
3288 		/* Disable interrupts */
3289 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3290 	}
3291 
3292 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3293 }
3294 
3295 /* Perform early UHCI takeover from SMM. */
3296 static void
3297 uhci_early_takeover(device_t self)
3298 {
3299 	struct resource *res;
3300 	int rid;
3301 
3302 	/*
3303 	 * Set the PIRQD enable bit and switch off all the others. We don't
3304 	 * want legacy support to interfere with us XXX Does this also mean
3305 	 * that the BIOS won't touch the keyboard anymore if it is connected
3306 	 * to the ports of the root hub?
3307 	 */
3308 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3309 
3310 	/* Disable interrupts */
3311 	rid = PCI_UHCI_BASE_REG;
3312 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3313 	if (res != NULL) {
3314 		bus_write_2(res, UHCI_INTR, 0);
3315 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3316 	}
3317 }
3318 
3319 /* Perform early EHCI takeover from SMM. */
3320 static void
3321 ehci_early_takeover(device_t self)
3322 {
3323 	struct resource *res;
3324 	uint32_t cparams;
3325 	uint32_t eec;
3326 	uint8_t eecp;
3327 	uint8_t bios_sem;
3328 	uint8_t offs;
3329 	int rid;
3330 	int i;
3331 
3332 	rid = PCIR_BAR(0);
3333 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3334 	if (res == NULL)
3335 		return;
3336 
3337 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3338 
3339 	/* Synchronise with the BIOS if it owns the controller. */
3340 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3341 	    eecp = EHCI_EECP_NEXT(eec)) {
3342 		eec = pci_read_config(self, eecp, 4);
3343 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3344 			continue;
3345 		}
3346 		bios_sem = pci_read_config(self, eecp +
3347 		    EHCI_LEGSUP_BIOS_SEM, 1);
3348 		if (bios_sem == 0) {
3349 			continue;
3350 		}
3351 		if (bootverbose)
3352 			printf("ehci early: "
3353 			    "SMM active, request owner change\n");
3354 
3355 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3356 
3357 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3358 			DELAY(1000);
3359 			bios_sem = pci_read_config(self, eecp +
3360 			    EHCI_LEGSUP_BIOS_SEM, 1);
3361 		}
3362 
3363 		if (bios_sem != 0) {
3364 			if (bootverbose)
3365 				printf("ehci early: "
3366 				    "SMM does not respond\n");
3367 		}
3368 		/* Disable interrupts */
3369 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3370 		bus_write_4(res, offs + EHCI_USBINTR, 0);
3371 	}
3372 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3373 }
3374 
3375 /* Perform early XHCI takeover from SMM. */
3376 static void
3377 xhci_early_takeover(device_t self)
3378 {
3379 	struct resource *res;
3380 	uint32_t cparams;
3381 	uint32_t eec;
3382 	uint8_t eecp;
3383 	uint8_t bios_sem;
3384 	uint8_t offs;
3385 	int rid;
3386 	int i;
3387 
3388 	rid = PCIR_BAR(0);
3389 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3390 	if (res == NULL)
3391 		return;
3392 
3393 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3394 
3395 	eec = -1;
3396 
3397 	/* Synchronise with the BIOS if it owns the controller. */
3398 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3399 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3400 		eec = bus_read_4(res, eecp);
3401 
3402 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3403 			continue;
3404 
3405 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3406 		if (bios_sem == 0)
3407 			continue;
3408 
3409 		if (bootverbose)
3410 			printf("xhci early: "
3411 			    "SMM active, request owner change\n");
3412 
3413 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3414 
3415 		/* wait a maximum of 5 second */
3416 
3417 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3418 			DELAY(1000);
3419 			bios_sem = bus_read_1(res, eecp +
3420 			    XHCI_XECP_BIOS_SEM);
3421 		}
3422 
3423 		if (bios_sem != 0) {
3424 			if (bootverbose)
3425 				printf("xhci early: "
3426 				    "SMM does not respond\n");
3427 		}
3428 
3429 		/* Disable interrupts */
3430 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3431 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3432 		bus_read_4(res, offs + XHCI_USBSTS);
3433 	}
3434 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3435 }
3436 
3437 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3438 static void
3439 pci_reserve_secbus(device_t bus, device_t dev, pcicfgregs *cfg,
3440     struct resource_list *rl)
3441 {
3442 	struct resource *res;
3443 	char *cp;
3444 	rman_res_t start, end, count;
3445 	int rid, sec_bus, sec_reg, sub_bus, sub_reg, sup_bus;
3446 
3447 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3448 	case PCIM_HDRTYPE_BRIDGE:
3449 		sec_reg = PCIR_SECBUS_1;
3450 		sub_reg = PCIR_SUBBUS_1;
3451 		break;
3452 	case PCIM_HDRTYPE_CARDBUS:
3453 		sec_reg = PCIR_SECBUS_2;
3454 		sub_reg = PCIR_SUBBUS_2;
3455 		break;
3456 	default:
3457 		return;
3458 	}
3459 
3460 	/*
3461 	 * If the existing bus range is valid, attempt to reserve it
3462 	 * from our parent.  If this fails for any reason, clear the
3463 	 * secbus and subbus registers.
3464 	 *
3465 	 * XXX: Should we reset sub_bus to sec_bus if it is < sec_bus?
3466 	 * This would at least preserve the existing sec_bus if it is
3467 	 * valid.
3468 	 */
3469 	sec_bus = PCI_READ_CONFIG(bus, dev, sec_reg, 1);
3470 	sub_bus = PCI_READ_CONFIG(bus, dev, sub_reg, 1);
3471 
3472 	/* Quirk handling. */
3473 	switch (pci_get_devid(dev)) {
3474 	case 0x12258086:		/* Intel 82454KX/GX (Orion) */
3475 		sup_bus = pci_read_config(dev, 0x41, 1);
3476 		if (sup_bus != 0xff) {
3477 			sec_bus = sup_bus + 1;
3478 			sub_bus = sup_bus + 1;
3479 			PCI_WRITE_CONFIG(bus, dev, sec_reg, sec_bus, 1);
3480 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3481 		}
3482 		break;
3483 
3484 	case 0x00dd10de:
3485 		/* Compaq R3000 BIOS sets wrong subordinate bus number. */
3486 		if ((cp = kern_getenv("smbios.planar.maker")) == NULL)
3487 			break;
3488 		if (strncmp(cp, "Compal", 6) != 0) {
3489 			freeenv(cp);
3490 			break;
3491 		}
3492 		freeenv(cp);
3493 		if ((cp = kern_getenv("smbios.planar.product")) == NULL)
3494 			break;
3495 		if (strncmp(cp, "08A0", 4) != 0) {
3496 			freeenv(cp);
3497 			break;
3498 		}
3499 		freeenv(cp);
3500 		if (sub_bus < 0xa) {
3501 			sub_bus = 0xa;
3502 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3503 		}
3504 		break;
3505 	}
3506 
3507 	if (bootverbose)
3508 		printf("\tsecbus=%d, subbus=%d\n", sec_bus, sub_bus);
3509 	if (sec_bus > 0 && sub_bus >= sec_bus) {
3510 		start = sec_bus;
3511 		end = sub_bus;
3512 		count = end - start + 1;
3513 
3514 		resource_list_add(rl, PCI_RES_BUS, 0, 0, ~0, count);
3515 
3516 		/*
3517 		 * If requested, clear secondary bus registers in
3518 		 * bridge devices to force a complete renumbering
3519 		 * rather than reserving the existing range.  However,
3520 		 * preserve the existing size.
3521 		 */
3522 		if (pci_clear_buses)
3523 			goto clear;
3524 
3525 		rid = 0;
3526 		res = resource_list_reserve(rl, bus, dev, PCI_RES_BUS, &rid,
3527 		    start, end, count, 0);
3528 		if (res != NULL)
3529 			return;
3530 
3531 		if (bootverbose)
3532 			device_printf(bus,
3533 			    "pci%d:%d:%d:%d secbus failed to allocate\n",
3534 			    pci_get_domain(dev), pci_get_bus(dev),
3535 			    pci_get_slot(dev), pci_get_function(dev));
3536 	}
3537 
3538 clear:
3539 	PCI_WRITE_CONFIG(bus, dev, sec_reg, 0, 1);
3540 	PCI_WRITE_CONFIG(bus, dev, sub_reg, 0, 1);
3541 }
3542 
3543 static struct resource *
3544 pci_alloc_secbus(device_t dev, device_t child, int *rid, rman_res_t start,
3545     rman_res_t end, rman_res_t count, u_int flags)
3546 {
3547 	struct pci_devinfo *dinfo;
3548 	pcicfgregs *cfg;
3549 	struct resource_list *rl;
3550 	struct resource *res;
3551 	int sec_reg, sub_reg;
3552 
3553 	dinfo = device_get_ivars(child);
3554 	cfg = &dinfo->cfg;
3555 	rl = &dinfo->resources;
3556 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3557 	case PCIM_HDRTYPE_BRIDGE:
3558 		sec_reg = PCIR_SECBUS_1;
3559 		sub_reg = PCIR_SUBBUS_1;
3560 		break;
3561 	case PCIM_HDRTYPE_CARDBUS:
3562 		sec_reg = PCIR_SECBUS_2;
3563 		sub_reg = PCIR_SUBBUS_2;
3564 		break;
3565 	default:
3566 		return (NULL);
3567 	}
3568 
3569 	if (*rid != 0)
3570 		return (NULL);
3571 
3572 	if (resource_list_find(rl, PCI_RES_BUS, *rid) == NULL)
3573 		resource_list_add(rl, PCI_RES_BUS, *rid, start, end, count);
3574 	if (!resource_list_reserved(rl, PCI_RES_BUS, *rid)) {
3575 		res = resource_list_reserve(rl, dev, child, PCI_RES_BUS, rid,
3576 		    start, end, count, flags & ~RF_ACTIVE);
3577 		if (res == NULL) {
3578 			resource_list_delete(rl, PCI_RES_BUS, *rid);
3579 			device_printf(child, "allocating %ju bus%s failed\n",
3580 			    count, count == 1 ? "" : "es");
3581 			return (NULL);
3582 		}
3583 		if (bootverbose)
3584 			device_printf(child,
3585 			    "Lazy allocation of %ju bus%s at %ju\n", count,
3586 			    count == 1 ? "" : "es", rman_get_start(res));
3587 		PCI_WRITE_CONFIG(dev, child, sec_reg, rman_get_start(res), 1);
3588 		PCI_WRITE_CONFIG(dev, child, sub_reg, rman_get_end(res), 1);
3589 	}
3590 	return (resource_list_alloc(rl, dev, child, PCI_RES_BUS, rid, start,
3591 	    end, count, flags));
3592 }
3593 #endif
3594 
3595 static int
3596 pci_ea_bei_to_rid(device_t dev, int bei)
3597 {
3598 #ifdef PCI_IOV
3599 	struct pci_devinfo *dinfo;
3600 	int iov_pos;
3601 	struct pcicfg_iov *iov;
3602 
3603 	dinfo = device_get_ivars(dev);
3604 	iov = dinfo->cfg.iov;
3605 	if (iov != NULL)
3606 		iov_pos = iov->iov_pos;
3607 	else
3608 		iov_pos = 0;
3609 #endif
3610 
3611 	/* Check if matches BAR */
3612 	if ((bei >= PCIM_EA_BEI_BAR_0) &&
3613 	    (bei <= PCIM_EA_BEI_BAR_5))
3614 		return (PCIR_BAR(bei));
3615 
3616 	/* Check ROM */
3617 	if (bei == PCIM_EA_BEI_ROM)
3618 		return (PCIR_BIOS);
3619 
3620 #ifdef PCI_IOV
3621 	/* Check if matches VF_BAR */
3622 	if ((iov != NULL) && (bei >= PCIM_EA_BEI_VF_BAR_0) &&
3623 	    (bei <= PCIM_EA_BEI_VF_BAR_5))
3624 		return (PCIR_SRIOV_BAR(bei - PCIM_EA_BEI_VF_BAR_0) +
3625 		    iov_pos);
3626 #endif
3627 
3628 	return (-1);
3629 }
3630 
3631 int
3632 pci_ea_is_enabled(device_t dev, int rid)
3633 {
3634 	struct pci_ea_entry *ea;
3635 	struct pci_devinfo *dinfo;
3636 
3637 	dinfo = device_get_ivars(dev);
3638 
3639 	STAILQ_FOREACH(ea, &dinfo->cfg.ea.ea_entries, eae_link) {
3640 		if (pci_ea_bei_to_rid(dev, ea->eae_bei) == rid)
3641 			return ((ea->eae_flags & PCIM_EA_ENABLE) > 0);
3642 	}
3643 
3644 	return (0);
3645 }
3646 
3647 void
3648 pci_add_resources_ea(device_t bus, device_t dev, int alloc_iov)
3649 {
3650 	struct pci_ea_entry *ea;
3651 	struct pci_devinfo *dinfo;
3652 	pci_addr_t start, end, count;
3653 	struct resource_list *rl;
3654 	int type, flags, rid;
3655 	struct resource *res;
3656 	uint32_t tmp;
3657 #ifdef PCI_IOV
3658 	struct pcicfg_iov *iov;
3659 #endif
3660 
3661 	dinfo = device_get_ivars(dev);
3662 	rl = &dinfo->resources;
3663 	flags = 0;
3664 
3665 #ifdef PCI_IOV
3666 	iov = dinfo->cfg.iov;
3667 #endif
3668 
3669 	if (dinfo->cfg.ea.ea_location == 0)
3670 		return;
3671 
3672 	STAILQ_FOREACH(ea, &dinfo->cfg.ea.ea_entries, eae_link) {
3673 
3674 		/*
3675 		 * TODO: Ignore EA-BAR if is not enabled.
3676 		 *   Currently the EA implementation supports
3677 		 *   only situation, where EA structure contains
3678 		 *   predefined entries. In case they are not enabled
3679 		 *   leave them unallocated and proceed with
3680 		 *   a legacy-BAR mechanism.
3681 		 */
3682 		if ((ea->eae_flags & PCIM_EA_ENABLE) == 0)
3683 			continue;
3684 
3685 		switch ((ea->eae_flags & PCIM_EA_PP) >> PCIM_EA_PP_OFFSET) {
3686 		case PCIM_EA_P_MEM_PREFETCH:
3687 		case PCIM_EA_P_VF_MEM_PREFETCH:
3688 			flags = RF_PREFETCHABLE;
3689 			/* FALLTHROUGH */
3690 		case PCIM_EA_P_VF_MEM:
3691 		case PCIM_EA_P_MEM:
3692 			type = SYS_RES_MEMORY;
3693 			break;
3694 		case PCIM_EA_P_IO:
3695 			type = SYS_RES_IOPORT;
3696 			break;
3697 		default:
3698 			continue;
3699 		}
3700 
3701 		if (alloc_iov != 0) {
3702 #ifdef PCI_IOV
3703 			/* Allocating IOV, confirm BEI matches */
3704 			if ((ea->eae_bei < PCIM_EA_BEI_VF_BAR_0) ||
3705 			    (ea->eae_bei > PCIM_EA_BEI_VF_BAR_5))
3706 				continue;
3707 #else
3708 			continue;
3709 #endif
3710 		} else {
3711 			/* Allocating BAR, confirm BEI matches */
3712 			if (((ea->eae_bei < PCIM_EA_BEI_BAR_0) ||
3713 			    (ea->eae_bei > PCIM_EA_BEI_BAR_5)) &&
3714 			    (ea->eae_bei != PCIM_EA_BEI_ROM))
3715 				continue;
3716 		}
3717 
3718 		rid = pci_ea_bei_to_rid(dev, ea->eae_bei);
3719 		if (rid < 0)
3720 			continue;
3721 
3722 		/* Skip resources already allocated by EA */
3723 		if ((resource_list_find(rl, SYS_RES_MEMORY, rid) != NULL) ||
3724 		    (resource_list_find(rl, SYS_RES_IOPORT, rid) != NULL))
3725 			continue;
3726 
3727 		start = ea->eae_base;
3728 		count = ea->eae_max_offset + 1;
3729 #ifdef PCI_IOV
3730 		if (iov != NULL)
3731 			count = count * iov->iov_num_vfs;
3732 #endif
3733 		end = start + count - 1;
3734 		if (count == 0)
3735 			continue;
3736 
3737 		resource_list_add(rl, type, rid, start, end, count);
3738 		res = resource_list_reserve(rl, bus, dev, type, &rid, start, end, count,
3739 		    flags);
3740 		if (res == NULL) {
3741 			resource_list_delete(rl, type, rid);
3742 
3743 			/*
3744 			 * Failed to allocate using EA, disable entry.
3745 			 * Another attempt to allocation will be performed
3746 			 * further, but this time using legacy BAR registers
3747 			 */
3748 			tmp = pci_read_config(dev, ea->eae_cfg_offset, 4);
3749 			tmp &= ~PCIM_EA_ENABLE;
3750 			pci_write_config(dev, ea->eae_cfg_offset, tmp, 4);
3751 
3752 			/*
3753 			 * Disabling entry might fail in case it is hardwired.
3754 			 * Read flags again to match current status.
3755 			 */
3756 			ea->eae_flags = pci_read_config(dev, ea->eae_cfg_offset, 4);
3757 
3758 			continue;
3759 		}
3760 
3761 		/* As per specification, fill BAR with zeros */
3762 		pci_write_config(dev, rid, 0, 4);
3763 	}
3764 }
3765 
3766 void
3767 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3768 {
3769 	struct pci_devinfo *dinfo;
3770 	pcicfgregs *cfg;
3771 	struct resource_list *rl;
3772 	const struct pci_quirk *q;
3773 	uint32_t devid;
3774 	int i;
3775 
3776 	dinfo = device_get_ivars(dev);
3777 	cfg = &dinfo->cfg;
3778 	rl = &dinfo->resources;
3779 	devid = (cfg->device << 16) | cfg->vendor;
3780 
3781 	/* Allocate resources using Enhanced Allocation */
3782 	pci_add_resources_ea(bus, dev, 0);
3783 
3784 	/* ATA devices needs special map treatment */
3785 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3786 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3787 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3788 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3789 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3790 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3791 	else
3792 		for (i = 0; i < cfg->nummaps;) {
3793 			/* Skip resources already managed by EA */
3794 			if ((resource_list_find(rl, SYS_RES_MEMORY, PCIR_BAR(i)) != NULL) ||
3795 			    (resource_list_find(rl, SYS_RES_IOPORT, PCIR_BAR(i)) != NULL) ||
3796 			    pci_ea_is_enabled(dev, PCIR_BAR(i))) {
3797 				i++;
3798 				continue;
3799 			}
3800 
3801 			/*
3802 			 * Skip quirked resources.
3803 			 */
3804 			for (q = &pci_quirks[0]; q->devid != 0; q++)
3805 				if (q->devid == devid &&
3806 				    q->type == PCI_QUIRK_UNMAP_REG &&
3807 				    q->arg1 == PCIR_BAR(i))
3808 					break;
3809 			if (q->devid != 0) {
3810 				i++;
3811 				continue;
3812 			}
3813 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3814 			    prefetchmask & (1 << i));
3815 		}
3816 
3817 	/*
3818 	 * Add additional, quirked resources.
3819 	 */
3820 	for (q = &pci_quirks[0]; q->devid != 0; q++)
3821 		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3822 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3823 
3824 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3825 #ifdef __PCI_REROUTE_INTERRUPT
3826 		/*
3827 		 * Try to re-route interrupts. Sometimes the BIOS or
3828 		 * firmware may leave bogus values in these registers.
3829 		 * If the re-route fails, then just stick with what we
3830 		 * have.
3831 		 */
3832 		pci_assign_interrupt(bus, dev, 1);
3833 #else
3834 		pci_assign_interrupt(bus, dev, 0);
3835 #endif
3836 	}
3837 
3838 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3839 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3840 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3841 			xhci_early_takeover(dev);
3842 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3843 			ehci_early_takeover(dev);
3844 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3845 			ohci_early_takeover(dev);
3846 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3847 			uhci_early_takeover(dev);
3848 	}
3849 
3850 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3851 	/*
3852 	 * Reserve resources for secondary bus ranges behind bridge
3853 	 * devices.
3854 	 */
3855 	pci_reserve_secbus(bus, dev, cfg, rl);
3856 #endif
3857 }
3858 
3859 static struct pci_devinfo *
3860 pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
3861     int slot, int func)
3862 {
3863 	struct pci_devinfo *dinfo;
3864 
3865 	dinfo = pci_read_device(pcib, dev, domain, busno, slot, func);
3866 	if (dinfo != NULL)
3867 		pci_add_child(dev, dinfo);
3868 
3869 	return (dinfo);
3870 }
3871 
3872 void
3873 pci_add_children(device_t dev, int domain, int busno)
3874 {
3875 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3876 	device_t pcib = device_get_parent(dev);
3877 	struct pci_devinfo *dinfo;
3878 	int maxslots;
3879 	int s, f, pcifunchigh;
3880 	uint8_t hdrtype;
3881 	int first_func;
3882 
3883 	/*
3884 	 * Try to detect a device at slot 0, function 0.  If it exists, try to
3885 	 * enable ARI.  We must enable ARI before detecting the rest of the
3886 	 * functions on this bus as ARI changes the set of slots and functions
3887 	 * that are legal on this bus.
3888 	 */
3889 	dinfo = pci_identify_function(pcib, dev, domain, busno, 0, 0);
3890 	if (dinfo != NULL && pci_enable_ari)
3891 		PCIB_TRY_ENABLE_ARI(pcib, dinfo->cfg.dev);
3892 
3893 	/*
3894 	 * Start looking for new devices on slot 0 at function 1 because we
3895 	 * just identified the device at slot 0, function 0.
3896 	 */
3897 	first_func = 1;
3898 
3899 	maxslots = PCIB_MAXSLOTS(pcib);
3900 	for (s = 0; s <= maxslots; s++, first_func = 0) {
3901 		pcifunchigh = 0;
3902 		f = 0;
3903 		DELAY(1);
3904 		hdrtype = REG(PCIR_HDRTYPE, 1);
3905 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3906 			continue;
3907 		if (hdrtype & PCIM_MFDEV)
3908 			pcifunchigh = PCIB_MAXFUNCS(pcib);
3909 		for (f = first_func; f <= pcifunchigh; f++)
3910 			pci_identify_function(pcib, dev, domain, busno, s, f);
3911 	}
3912 #undef REG
3913 }
3914 
3915 int
3916 pci_rescan_method(device_t dev)
3917 {
3918 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3919 	device_t pcib = device_get_parent(dev);
3920 	struct pci_softc *sc;
3921 	device_t child, *devlist, *unchanged;
3922 	int devcount, error, i, j, maxslots, oldcount;
3923 	int busno, domain, s, f, pcifunchigh;
3924 	uint8_t hdrtype;
3925 
3926 	/* No need to check for ARI on a rescan. */
3927 	error = device_get_children(dev, &devlist, &devcount);
3928 	if (error)
3929 		return (error);
3930 	if (devcount != 0) {
3931 		unchanged = malloc(devcount * sizeof(device_t), M_TEMP,
3932 		    M_NOWAIT | M_ZERO);
3933 		if (unchanged == NULL) {
3934 			free(devlist, M_TEMP);
3935 			return (ENOMEM);
3936 		}
3937 	} else
3938 		unchanged = NULL;
3939 
3940 	sc = device_get_softc(dev);
3941 	domain = pcib_get_domain(dev);
3942 	busno = pcib_get_bus(dev);
3943 	maxslots = PCIB_MAXSLOTS(pcib);
3944 	for (s = 0; s <= maxslots; s++) {
3945 		/* If function 0 is not present, skip to the next slot. */
3946 		f = 0;
3947 		if (REG(PCIR_VENDOR, 2) == 0xffff)
3948 			continue;
3949 		pcifunchigh = 0;
3950 		hdrtype = REG(PCIR_HDRTYPE, 1);
3951 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3952 			continue;
3953 		if (hdrtype & PCIM_MFDEV)
3954 			pcifunchigh = PCIB_MAXFUNCS(pcib);
3955 		for (f = 0; f <= pcifunchigh; f++) {
3956 			if (REG(PCIR_VENDOR, 2) == 0xfff)
3957 				continue;
3958 
3959 			/*
3960 			 * Found a valid function.  Check if a
3961 			 * device_t for this device already exists.
3962 			 */
3963 			for (i = 0; i < devcount; i++) {
3964 				child = devlist[i];
3965 				if (child == NULL)
3966 					continue;
3967 				if (pci_get_slot(child) == s &&
3968 				    pci_get_function(child) == f) {
3969 					unchanged[i] = child;
3970 					goto next_func;
3971 				}
3972 			}
3973 
3974 			pci_identify_function(pcib, dev, domain, busno, s, f);
3975 		next_func:;
3976 		}
3977 	}
3978 
3979 	/* Remove devices that are no longer present. */
3980 	for (i = 0; i < devcount; i++) {
3981 		if (unchanged[i] != NULL)
3982 			continue;
3983 		device_delete_child(dev, devlist[i]);
3984 	}
3985 
3986 	free(devlist, M_TEMP);
3987 	oldcount = devcount;
3988 
3989 	/* Try to attach the devices just added. */
3990 	error = device_get_children(dev, &devlist, &devcount);
3991 	if (error) {
3992 		free(unchanged, M_TEMP);
3993 		return (error);
3994 	}
3995 
3996 	for (i = 0; i < devcount; i++) {
3997 		for (j = 0; j < oldcount; j++) {
3998 			if (devlist[i] == unchanged[j])
3999 				goto next_device;
4000 		}
4001 
4002 		device_probe_and_attach(devlist[i]);
4003 	next_device:;
4004 	}
4005 
4006 	free(unchanged, M_TEMP);
4007 	free(devlist, M_TEMP);
4008 	return (0);
4009 #undef REG
4010 }
4011 
4012 #ifdef PCI_IOV
4013 device_t
4014 pci_add_iov_child(device_t bus, device_t pf, uint16_t rid, uint16_t vid,
4015     uint16_t did)
4016 {
4017 	struct pci_devinfo *pf_dinfo, *vf_dinfo;
4018 	device_t pcib;
4019 	int busno, slot, func;
4020 
4021 	pf_dinfo = device_get_ivars(pf);
4022 
4023 	pcib = device_get_parent(bus);
4024 
4025 	PCIB_DECODE_RID(pcib, rid, &busno, &slot, &func);
4026 
4027 	vf_dinfo = pci_fill_devinfo(pcib, bus, pci_get_domain(pcib), busno,
4028 	    slot, func, vid, did);
4029 
4030 	vf_dinfo->cfg.flags |= PCICFG_VF;
4031 	pci_add_child(bus, vf_dinfo);
4032 
4033 	return (vf_dinfo->cfg.dev);
4034 }
4035 
4036 device_t
4037 pci_create_iov_child_method(device_t bus, device_t pf, uint16_t rid,
4038     uint16_t vid, uint16_t did)
4039 {
4040 
4041 	return (pci_add_iov_child(bus, pf, rid, vid, did));
4042 }
4043 #endif
4044 
4045 void
4046 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
4047 {
4048 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
4049 	device_set_ivars(dinfo->cfg.dev, dinfo);
4050 	resource_list_init(&dinfo->resources);
4051 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
4052 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
4053 	pci_print_verbose(dinfo);
4054 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
4055 	pci_child_added(dinfo->cfg.dev);
4056 }
4057 
4058 void
4059 pci_child_added_method(device_t dev, device_t child)
4060 {
4061 
4062 }
4063 
4064 static int
4065 pci_probe(device_t dev)
4066 {
4067 
4068 	device_set_desc(dev, "PCI bus");
4069 
4070 	/* Allow other subclasses to override this driver. */
4071 	return (BUS_PROBE_GENERIC);
4072 }
4073 
4074 int
4075 pci_attach_common(device_t dev)
4076 {
4077 	struct pci_softc *sc;
4078 	int busno, domain;
4079 #ifdef PCI_DMA_BOUNDARY
4080 	int error, tag_valid;
4081 #endif
4082 #ifdef PCI_RES_BUS
4083 	int rid;
4084 #endif
4085 
4086 	sc = device_get_softc(dev);
4087 	domain = pcib_get_domain(dev);
4088 	busno = pcib_get_bus(dev);
4089 #ifdef PCI_RES_BUS
4090 	rid = 0;
4091 	sc->sc_bus = bus_alloc_resource(dev, PCI_RES_BUS, &rid, busno, busno,
4092 	    1, 0);
4093 	if (sc->sc_bus == NULL) {
4094 		device_printf(dev, "failed to allocate bus number\n");
4095 		return (ENXIO);
4096 	}
4097 #endif
4098 	if (bootverbose)
4099 		device_printf(dev, "domain=%d, physical bus=%d\n",
4100 		    domain, busno);
4101 #ifdef PCI_DMA_BOUNDARY
4102 	tag_valid = 0;
4103 	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
4104 	    devclass_find("pci")) {
4105 		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
4106 		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
4107 		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
4108 		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
4109 		if (error)
4110 			device_printf(dev, "Failed to create DMA tag: %d\n",
4111 			    error);
4112 		else
4113 			tag_valid = 1;
4114 	}
4115 	if (!tag_valid)
4116 #endif
4117 		sc->sc_dma_tag = bus_get_dma_tag(dev);
4118 	return (0);
4119 }
4120 
4121 static int
4122 pci_attach(device_t dev)
4123 {
4124 	int busno, domain, error;
4125 
4126 	error = pci_attach_common(dev);
4127 	if (error)
4128 		return (error);
4129 
4130 	/*
4131 	 * Since there can be multiple independantly numbered PCI
4132 	 * busses on systems with multiple PCI domains, we can't use
4133 	 * the unit number to decide which bus we are probing. We ask
4134 	 * the parent pcib what our domain and bus numbers are.
4135 	 */
4136 	domain = pcib_get_domain(dev);
4137 	busno = pcib_get_bus(dev);
4138 	pci_add_children(dev, domain, busno);
4139 	return (bus_generic_attach(dev));
4140 }
4141 
4142 static int
4143 pci_detach(device_t dev)
4144 {
4145 #ifdef PCI_RES_BUS
4146 	struct pci_softc *sc;
4147 #endif
4148 	int error;
4149 
4150 	error = bus_generic_detach(dev);
4151 	if (error)
4152 		return (error);
4153 #ifdef PCI_RES_BUS
4154 	sc = device_get_softc(dev);
4155 	error = bus_release_resource(dev, PCI_RES_BUS, 0, sc->sc_bus);
4156 	if (error)
4157 		return (error);
4158 #endif
4159 	return (device_delete_children(dev));
4160 }
4161 
4162 static void
4163 pci_set_power_child(device_t dev, device_t child, int state)
4164 {
4165 	device_t pcib;
4166 	int dstate;
4167 
4168 	/*
4169 	 * Set the device to the given state.  If the firmware suggests
4170 	 * a different power state, use it instead.  If power management
4171 	 * is not present, the firmware is responsible for managing
4172 	 * device power.  Skip children who aren't attached since they
4173 	 * are handled separately.
4174 	 */
4175 	pcib = device_get_parent(dev);
4176 	dstate = state;
4177 	if (device_is_attached(child) &&
4178 	    PCIB_POWER_FOR_SLEEP(pcib, child, &dstate) == 0)
4179 		pci_set_powerstate(child, dstate);
4180 }
4181 
4182 int
4183 pci_suspend_child(device_t dev, device_t child)
4184 {
4185 	struct pci_devinfo *dinfo;
4186 	int error;
4187 
4188 	dinfo = device_get_ivars(child);
4189 
4190 	/*
4191 	 * Save the PCI configuration space for the child and set the
4192 	 * device in the appropriate power state for this sleep state.
4193 	 */
4194 	pci_cfg_save(child, dinfo, 0);
4195 
4196 	/* Suspend devices before potentially powering them down. */
4197 	error = bus_generic_suspend_child(dev, child);
4198 
4199 	if (error)
4200 		return (error);
4201 
4202 	if (pci_do_power_suspend)
4203 		pci_set_power_child(dev, child, PCI_POWERSTATE_D3);
4204 
4205 	return (0);
4206 }
4207 
4208 int
4209 pci_resume_child(device_t dev, device_t child)
4210 {
4211 	struct pci_devinfo *dinfo;
4212 
4213 	if (pci_do_power_resume)
4214 		pci_set_power_child(dev, child, PCI_POWERSTATE_D0);
4215 
4216 	dinfo = device_get_ivars(child);
4217 	pci_cfg_restore(child, dinfo);
4218 	if (!device_is_attached(child))
4219 		pci_cfg_save(child, dinfo, 1);
4220 
4221 	bus_generic_resume_child(dev, child);
4222 
4223 	return (0);
4224 }
4225 
4226 int
4227 pci_resume(device_t dev)
4228 {
4229 	device_t child, *devlist;
4230 	int error, i, numdevs;
4231 
4232 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
4233 		return (error);
4234 
4235 	/*
4236 	 * Resume critical devices first, then everything else later.
4237 	 */
4238 	for (i = 0; i < numdevs; i++) {
4239 		child = devlist[i];
4240 		switch (pci_get_class(child)) {
4241 		case PCIC_DISPLAY:
4242 		case PCIC_MEMORY:
4243 		case PCIC_BRIDGE:
4244 		case PCIC_BASEPERIPH:
4245 			BUS_RESUME_CHILD(dev, child);
4246 			break;
4247 		}
4248 	}
4249 	for (i = 0; i < numdevs; i++) {
4250 		child = devlist[i];
4251 		switch (pci_get_class(child)) {
4252 		case PCIC_DISPLAY:
4253 		case PCIC_MEMORY:
4254 		case PCIC_BRIDGE:
4255 		case PCIC_BASEPERIPH:
4256 			break;
4257 		default:
4258 			BUS_RESUME_CHILD(dev, child);
4259 		}
4260 	}
4261 	free(devlist, M_TEMP);
4262 	return (0);
4263 }
4264 
4265 static void
4266 pci_load_vendor_data(void)
4267 {
4268 	caddr_t data;
4269 	void *ptr;
4270 	size_t sz;
4271 
4272 	data = preload_search_by_type("pci_vendor_data");
4273 	if (data != NULL) {
4274 		ptr = preload_fetch_addr(data);
4275 		sz = preload_fetch_size(data);
4276 		if (ptr != NULL && sz != 0) {
4277 			pci_vendordata = ptr;
4278 			pci_vendordata_size = sz;
4279 			/* terminate the database */
4280 			pci_vendordata[pci_vendordata_size] = '\n';
4281 		}
4282 	}
4283 }
4284 
4285 void
4286 pci_driver_added(device_t dev, driver_t *driver)
4287 {
4288 	int numdevs;
4289 	device_t *devlist;
4290 	device_t child;
4291 	struct pci_devinfo *dinfo;
4292 	int i;
4293 
4294 	if (bootverbose)
4295 		device_printf(dev, "driver added\n");
4296 	DEVICE_IDENTIFY(driver, dev);
4297 	if (device_get_children(dev, &devlist, &numdevs) != 0)
4298 		return;
4299 	for (i = 0; i < numdevs; i++) {
4300 		child = devlist[i];
4301 		if (device_get_state(child) != DS_NOTPRESENT)
4302 			continue;
4303 		dinfo = device_get_ivars(child);
4304 		pci_print_verbose(dinfo);
4305 		if (bootverbose)
4306 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
4307 		pci_cfg_restore(child, dinfo);
4308 		if (device_probe_and_attach(child) != 0)
4309 			pci_child_detached(dev, child);
4310 	}
4311 	free(devlist, M_TEMP);
4312 }
4313 
4314 int
4315 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
4316     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
4317 {
4318 	struct pci_devinfo *dinfo;
4319 	struct msix_table_entry *mte;
4320 	struct msix_vector *mv;
4321 	uint64_t addr;
4322 	uint32_t data;
4323 	void *cookie;
4324 	int error, rid;
4325 
4326 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
4327 	    arg, &cookie);
4328 	if (error)
4329 		return (error);
4330 
4331 	/* If this is not a direct child, just bail out. */
4332 	if (device_get_parent(child) != dev) {
4333 		*cookiep = cookie;
4334 		return(0);
4335 	}
4336 
4337 	rid = rman_get_rid(irq);
4338 	if (rid == 0) {
4339 		/* Make sure that INTx is enabled */
4340 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
4341 	} else {
4342 		/*
4343 		 * Check to see if the interrupt is MSI or MSI-X.
4344 		 * Ask our parent to map the MSI and give
4345 		 * us the address and data register values.
4346 		 * If we fail for some reason, teardown the
4347 		 * interrupt handler.
4348 		 */
4349 		dinfo = device_get_ivars(child);
4350 		if (dinfo->cfg.msi.msi_alloc > 0) {
4351 			if (dinfo->cfg.msi.msi_addr == 0) {
4352 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
4353 			    ("MSI has handlers, but vectors not mapped"));
4354 				error = PCIB_MAP_MSI(device_get_parent(dev),
4355 				    child, rman_get_start(irq), &addr, &data);
4356 				if (error)
4357 					goto bad;
4358 				dinfo->cfg.msi.msi_addr = addr;
4359 				dinfo->cfg.msi.msi_data = data;
4360 			}
4361 			if (dinfo->cfg.msi.msi_handlers == 0)
4362 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
4363 				    dinfo->cfg.msi.msi_data);
4364 			dinfo->cfg.msi.msi_handlers++;
4365 		} else {
4366 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4367 			    ("No MSI or MSI-X interrupts allocated"));
4368 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4369 			    ("MSI-X index too high"));
4370 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4371 			KASSERT(mte->mte_vector != 0, ("no message vector"));
4372 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
4373 			KASSERT(mv->mv_irq == rman_get_start(irq),
4374 			    ("IRQ mismatch"));
4375 			if (mv->mv_address == 0) {
4376 				KASSERT(mte->mte_handlers == 0,
4377 		    ("MSI-X table entry has handlers, but vector not mapped"));
4378 				error = PCIB_MAP_MSI(device_get_parent(dev),
4379 				    child, rman_get_start(irq), &addr, &data);
4380 				if (error)
4381 					goto bad;
4382 				mv->mv_address = addr;
4383 				mv->mv_data = data;
4384 			}
4385 			if (mte->mte_handlers == 0) {
4386 				pci_enable_msix(child, rid - 1, mv->mv_address,
4387 				    mv->mv_data);
4388 				pci_unmask_msix(child, rid - 1);
4389 			}
4390 			mte->mte_handlers++;
4391 		}
4392 
4393 		/*
4394 		 * Make sure that INTx is disabled if we are using MSI/MSI-X,
4395 		 * unless the device is affected by PCI_QUIRK_MSI_INTX_BUG,
4396 		 * in which case we "enable" INTx so MSI/MSI-X actually works.
4397 		 */
4398 		if (!pci_has_quirk(pci_get_devid(child),
4399 		    PCI_QUIRK_MSI_INTX_BUG))
4400 			pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4401 		else
4402 			pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
4403 	bad:
4404 		if (error) {
4405 			(void)bus_generic_teardown_intr(dev, child, irq,
4406 			    cookie);
4407 			return (error);
4408 		}
4409 	}
4410 	*cookiep = cookie;
4411 	return (0);
4412 }
4413 
4414 int
4415 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
4416     void *cookie)
4417 {
4418 	struct msix_table_entry *mte;
4419 	struct resource_list_entry *rle;
4420 	struct pci_devinfo *dinfo;
4421 	int error, rid;
4422 
4423 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
4424 		return (EINVAL);
4425 
4426 	/* If this isn't a direct child, just bail out */
4427 	if (device_get_parent(child) != dev)
4428 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
4429 
4430 	rid = rman_get_rid(irq);
4431 	if (rid == 0) {
4432 		/* Mask INTx */
4433 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4434 	} else {
4435 		/*
4436 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
4437 		 * decrement the appropriate handlers count and mask the
4438 		 * MSI-X message, or disable MSI messages if the count
4439 		 * drops to 0.
4440 		 */
4441 		dinfo = device_get_ivars(child);
4442 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
4443 		if (rle->res != irq)
4444 			return (EINVAL);
4445 		if (dinfo->cfg.msi.msi_alloc > 0) {
4446 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
4447 			    ("MSI-X index too high"));
4448 			if (dinfo->cfg.msi.msi_handlers == 0)
4449 				return (EINVAL);
4450 			dinfo->cfg.msi.msi_handlers--;
4451 			if (dinfo->cfg.msi.msi_handlers == 0)
4452 				pci_disable_msi(child);
4453 		} else {
4454 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4455 			    ("No MSI or MSI-X interrupts allocated"));
4456 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4457 			    ("MSI-X index too high"));
4458 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4459 			if (mte->mte_handlers == 0)
4460 				return (EINVAL);
4461 			mte->mte_handlers--;
4462 			if (mte->mte_handlers == 0)
4463 				pci_mask_msix(child, rid - 1);
4464 		}
4465 	}
4466 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
4467 	if (rid > 0)
4468 		KASSERT(error == 0,
4469 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
4470 	return (error);
4471 }
4472 
4473 int
4474 pci_print_child(device_t dev, device_t child)
4475 {
4476 	struct pci_devinfo *dinfo;
4477 	struct resource_list *rl;
4478 	int retval = 0;
4479 
4480 	dinfo = device_get_ivars(child);
4481 	rl = &dinfo->resources;
4482 
4483 	retval += bus_print_child_header(dev, child);
4484 
4485 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#jx");
4486 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#jx");
4487 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%jd");
4488 	if (device_get_flags(dev))
4489 		retval += printf(" flags %#x", device_get_flags(dev));
4490 
4491 	retval += printf(" at device %d.%d", pci_get_slot(child),
4492 	    pci_get_function(child));
4493 
4494 	retval += bus_print_child_domain(dev, child);
4495 	retval += bus_print_child_footer(dev, child);
4496 
4497 	return (retval);
4498 }
4499 
4500 static const struct
4501 {
4502 	int		class;
4503 	int		subclass;
4504 	int		report; /* 0 = bootverbose, 1 = always */
4505 	const char	*desc;
4506 } pci_nomatch_tab[] = {
4507 	{PCIC_OLD,		-1,			1, "old"},
4508 	{PCIC_OLD,		PCIS_OLD_NONVGA,	1, "non-VGA display device"},
4509 	{PCIC_OLD,		PCIS_OLD_VGA,		1, "VGA-compatible display device"},
4510 	{PCIC_STORAGE,		-1,			1, "mass storage"},
4511 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	1, "SCSI"},
4512 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	1, "ATA"},
4513 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	1, "floppy disk"},
4514 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	1, "IPI"},
4515 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	1, "RAID"},
4516 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	1, "ATA (ADMA)"},
4517 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	1, "SATA"},
4518 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	1, "SAS"},
4519 	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	1, "NVM"},
4520 	{PCIC_NETWORK,		-1,			1, "network"},
4521 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	1, "ethernet"},
4522 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	1, "token ring"},
4523 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	1, "fddi"},
4524 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	1, "ATM"},
4525 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	1, "ISDN"},
4526 	{PCIC_DISPLAY,		-1,			1, "display"},
4527 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	1, "VGA"},
4528 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	1, "XGA"},
4529 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	1, "3D"},
4530 	{PCIC_MULTIMEDIA,	-1,			1, "multimedia"},
4531 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	1, "video"},
4532 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	1, "audio"},
4533 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	1, "telephony"},
4534 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	1, "HDA"},
4535 	{PCIC_MEMORY,		-1,			1, "memory"},
4536 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	1, "RAM"},
4537 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	1, "flash"},
4538 	{PCIC_BRIDGE,		-1,			1, "bridge"},
4539 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	1, "HOST-PCI"},
4540 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	1, "PCI-ISA"},
4541 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	1, "PCI-EISA"},
4542 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	1, "PCI-MCA"},
4543 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	1, "PCI-PCI"},
4544 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	1, "PCI-PCMCIA"},
4545 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	1, "PCI-NuBus"},
4546 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	1, "PCI-CardBus"},
4547 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	1, "PCI-RACEway"},
4548 	{PCIC_SIMPLECOMM,	-1,			1, "simple comms"},
4549 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	1, "UART"},	/* could detect 16550 */
4550 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	1, "parallel port"},
4551 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	1, "multiport serial"},
4552 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	1, "generic modem"},
4553 	{PCIC_BASEPERIPH,	-1,			0, "base peripheral"},
4554 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	1, "interrupt controller"},
4555 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	1, "DMA controller"},
4556 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	1, "timer"},
4557 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	1, "realtime clock"},
4558 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	1, "PCI hot-plug controller"},
4559 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	1, "SD host controller"},
4560 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_IOMMU,	1, "IOMMU"},
4561 	{PCIC_INPUTDEV,		-1,			1, "input device"},
4562 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	1, "keyboard"},
4563 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,1, "digitizer"},
4564 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	1, "mouse"},
4565 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	1, "scanner"},
4566 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	1, "gameport"},
4567 	{PCIC_DOCKING,		-1,			1, "docking station"},
4568 	{PCIC_PROCESSOR,	-1,			1, "processor"},
4569 	{PCIC_SERIALBUS,	-1,			1, "serial bus"},
4570 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	1, "FireWire"},
4571 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	1, "AccessBus"},
4572 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	1, "SSA"},
4573 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	1, "USB"},
4574 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	1, "Fibre Channel"},
4575 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	0, "SMBus"},
4576 	{PCIC_WIRELESS,		-1,			1, "wireless controller"},
4577 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	1, "iRDA"},
4578 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	1, "IR"},
4579 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	1, "RF"},
4580 	{PCIC_INTELLIIO,	-1,			1, "intelligent I/O controller"},
4581 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	1, "I2O"},
4582 	{PCIC_SATCOM,		-1,			1, "satellite communication"},
4583 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		1, "sat TV"},
4584 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	1, "sat audio"},
4585 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	1, "sat voice"},
4586 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	1, "sat data"},
4587 	{PCIC_CRYPTO,		-1,			1, "encrypt/decrypt"},
4588 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	1, "network/computer crypto"},
4589 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	1, "entertainment crypto"},
4590 	{PCIC_DASP,		-1,			0, "dasp"},
4591 	{PCIC_DASP,		PCIS_DASP_DPIO,		1, "DPIO module"},
4592 	{0, 0, 0,		NULL}
4593 };
4594 
4595 void
4596 pci_probe_nomatch(device_t dev, device_t child)
4597 {
4598 	int i, report;
4599 	const char *cp, *scp;
4600 	char *device;
4601 
4602 	/*
4603 	 * Look for a listing for this device in a loaded device database.
4604 	 */
4605 	report = 1;
4606 	if ((device = pci_describe_device(child)) != NULL) {
4607 		device_printf(dev, "<%s>", device);
4608 		free(device, M_DEVBUF);
4609 	} else {
4610 		/*
4611 		 * Scan the class/subclass descriptions for a general
4612 		 * description.
4613 		 */
4614 		cp = "unknown";
4615 		scp = NULL;
4616 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
4617 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
4618 				if (pci_nomatch_tab[i].subclass == -1) {
4619 					cp = pci_nomatch_tab[i].desc;
4620 					report = pci_nomatch_tab[i].report;
4621 				} else if (pci_nomatch_tab[i].subclass ==
4622 				    pci_get_subclass(child)) {
4623 					scp = pci_nomatch_tab[i].desc;
4624 					report = pci_nomatch_tab[i].report;
4625 				}
4626 			}
4627 		}
4628 		if (report || bootverbose) {
4629 			device_printf(dev, "<%s%s%s>",
4630 			    cp ? cp : "",
4631 			    ((cp != NULL) && (scp != NULL)) ? ", " : "",
4632 			    scp ? scp : "");
4633 		}
4634 	}
4635 	if (report || bootverbose) {
4636 		printf(" at device %d.%d (no driver attached)\n",
4637 		    pci_get_slot(child), pci_get_function(child));
4638 	}
4639 	pci_cfg_save(child, device_get_ivars(child), 1);
4640 }
4641 
4642 void
4643 pci_child_detached(device_t dev, device_t child)
4644 {
4645 	struct pci_devinfo *dinfo;
4646 	struct resource_list *rl;
4647 
4648 	dinfo = device_get_ivars(child);
4649 	rl = &dinfo->resources;
4650 
4651 	/*
4652 	 * Have to deallocate IRQs before releasing any MSI messages and
4653 	 * have to release MSI messages before deallocating any memory
4654 	 * BARs.
4655 	 */
4656 	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
4657 		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
4658 	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
4659 		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
4660 		(void)pci_release_msi(child);
4661 	}
4662 	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
4663 		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
4664 	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
4665 		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
4666 #ifdef PCI_RES_BUS
4667 	if (resource_list_release_active(rl, dev, child, PCI_RES_BUS) != 0)
4668 		pci_printf(&dinfo->cfg, "Device leaked PCI bus numbers\n");
4669 #endif
4670 
4671 	pci_cfg_save(child, dinfo, 1);
4672 }
4673 
4674 /*
4675  * Parse the PCI device database, if loaded, and return a pointer to a
4676  * description of the device.
4677  *
4678  * The database is flat text formatted as follows:
4679  *
4680  * Any line not in a valid format is ignored.
4681  * Lines are terminated with newline '\n' characters.
4682  *
4683  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
4684  * the vendor name.
4685  *
4686  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
4687  * - devices cannot be listed without a corresponding VENDOR line.
4688  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
4689  * another TAB, then the device name.
4690  */
4691 
4692 /*
4693  * Assuming (ptr) points to the beginning of a line in the database,
4694  * return the vendor or device and description of the next entry.
4695  * The value of (vendor) or (device) inappropriate for the entry type
4696  * is set to -1.  Returns nonzero at the end of the database.
4697  *
4698  * Note that this is slightly unrobust in the face of corrupt data;
4699  * we attempt to safeguard against this by spamming the end of the
4700  * database with a newline when we initialise.
4701  */
4702 static int
4703 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
4704 {
4705 	char	*cp = *ptr;
4706 	int	left;
4707 
4708 	*device = -1;
4709 	*vendor = -1;
4710 	**desc = '\0';
4711 	for (;;) {
4712 		left = pci_vendordata_size - (cp - pci_vendordata);
4713 		if (left <= 0) {
4714 			*ptr = cp;
4715 			return(1);
4716 		}
4717 
4718 		/* vendor entry? */
4719 		if (*cp != '\t' &&
4720 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
4721 			break;
4722 		/* device entry? */
4723 		if (*cp == '\t' &&
4724 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
4725 			break;
4726 
4727 		/* skip to next line */
4728 		while (*cp != '\n' && left > 0) {
4729 			cp++;
4730 			left--;
4731 		}
4732 		if (*cp == '\n') {
4733 			cp++;
4734 			left--;
4735 		}
4736 	}
4737 	/* skip to next line */
4738 	while (*cp != '\n' && left > 0) {
4739 		cp++;
4740 		left--;
4741 	}
4742 	if (*cp == '\n' && left > 0)
4743 		cp++;
4744 	*ptr = cp;
4745 	return(0);
4746 }
4747 
4748 static char *
4749 pci_describe_device(device_t dev)
4750 {
4751 	int	vendor, device;
4752 	char	*desc, *vp, *dp, *line;
4753 
4754 	desc = vp = dp = NULL;
4755 
4756 	/*
4757 	 * If we have no vendor data, we can't do anything.
4758 	 */
4759 	if (pci_vendordata == NULL)
4760 		goto out;
4761 
4762 	/*
4763 	 * Scan the vendor data looking for this device
4764 	 */
4765 	line = pci_vendordata;
4766 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4767 		goto out;
4768 	for (;;) {
4769 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
4770 			goto out;
4771 		if (vendor == pci_get_vendor(dev))
4772 			break;
4773 	}
4774 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4775 		goto out;
4776 	for (;;) {
4777 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
4778 			*dp = 0;
4779 			break;
4780 		}
4781 		if (vendor != -1) {
4782 			*dp = 0;
4783 			break;
4784 		}
4785 		if (device == pci_get_device(dev))
4786 			break;
4787 	}
4788 	if (dp[0] == '\0')
4789 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
4790 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
4791 	    NULL)
4792 		sprintf(desc, "%s, %s", vp, dp);
4793 out:
4794 	if (vp != NULL)
4795 		free(vp, M_DEVBUF);
4796 	if (dp != NULL)
4797 		free(dp, M_DEVBUF);
4798 	return(desc);
4799 }
4800 
4801 int
4802 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
4803 {
4804 	struct pci_devinfo *dinfo;
4805 	pcicfgregs *cfg;
4806 
4807 	dinfo = device_get_ivars(child);
4808 	cfg = &dinfo->cfg;
4809 
4810 	switch (which) {
4811 	case PCI_IVAR_ETHADDR:
4812 		/*
4813 		 * The generic accessor doesn't deal with failure, so
4814 		 * we set the return value, then return an error.
4815 		 */
4816 		*((uint8_t **) result) = NULL;
4817 		return (EINVAL);
4818 	case PCI_IVAR_SUBVENDOR:
4819 		*result = cfg->subvendor;
4820 		break;
4821 	case PCI_IVAR_SUBDEVICE:
4822 		*result = cfg->subdevice;
4823 		break;
4824 	case PCI_IVAR_VENDOR:
4825 		*result = cfg->vendor;
4826 		break;
4827 	case PCI_IVAR_DEVICE:
4828 		*result = cfg->device;
4829 		break;
4830 	case PCI_IVAR_DEVID:
4831 		*result = (cfg->device << 16) | cfg->vendor;
4832 		break;
4833 	case PCI_IVAR_CLASS:
4834 		*result = cfg->baseclass;
4835 		break;
4836 	case PCI_IVAR_SUBCLASS:
4837 		*result = cfg->subclass;
4838 		break;
4839 	case PCI_IVAR_PROGIF:
4840 		*result = cfg->progif;
4841 		break;
4842 	case PCI_IVAR_REVID:
4843 		*result = cfg->revid;
4844 		break;
4845 	case PCI_IVAR_INTPIN:
4846 		*result = cfg->intpin;
4847 		break;
4848 	case PCI_IVAR_IRQ:
4849 		*result = cfg->intline;
4850 		break;
4851 	case PCI_IVAR_DOMAIN:
4852 		*result = cfg->domain;
4853 		break;
4854 	case PCI_IVAR_BUS:
4855 		*result = cfg->bus;
4856 		break;
4857 	case PCI_IVAR_SLOT:
4858 		*result = cfg->slot;
4859 		break;
4860 	case PCI_IVAR_FUNCTION:
4861 		*result = cfg->func;
4862 		break;
4863 	case PCI_IVAR_CMDREG:
4864 		*result = cfg->cmdreg;
4865 		break;
4866 	case PCI_IVAR_CACHELNSZ:
4867 		*result = cfg->cachelnsz;
4868 		break;
4869 	case PCI_IVAR_MINGNT:
4870 		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4871 			*result = -1;
4872 			return (EINVAL);
4873 		}
4874 		*result = cfg->mingnt;
4875 		break;
4876 	case PCI_IVAR_MAXLAT:
4877 		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4878 			*result = -1;
4879 			return (EINVAL);
4880 		}
4881 		*result = cfg->maxlat;
4882 		break;
4883 	case PCI_IVAR_LATTIMER:
4884 		*result = cfg->lattimer;
4885 		break;
4886 	default:
4887 		return (ENOENT);
4888 	}
4889 	return (0);
4890 }
4891 
4892 int
4893 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4894 {
4895 	struct pci_devinfo *dinfo;
4896 
4897 	dinfo = device_get_ivars(child);
4898 
4899 	switch (which) {
4900 	case PCI_IVAR_INTPIN:
4901 		dinfo->cfg.intpin = value;
4902 		return (0);
4903 	case PCI_IVAR_ETHADDR:
4904 	case PCI_IVAR_SUBVENDOR:
4905 	case PCI_IVAR_SUBDEVICE:
4906 	case PCI_IVAR_VENDOR:
4907 	case PCI_IVAR_DEVICE:
4908 	case PCI_IVAR_DEVID:
4909 	case PCI_IVAR_CLASS:
4910 	case PCI_IVAR_SUBCLASS:
4911 	case PCI_IVAR_PROGIF:
4912 	case PCI_IVAR_REVID:
4913 	case PCI_IVAR_IRQ:
4914 	case PCI_IVAR_DOMAIN:
4915 	case PCI_IVAR_BUS:
4916 	case PCI_IVAR_SLOT:
4917 	case PCI_IVAR_FUNCTION:
4918 		return (EINVAL);	/* disallow for now */
4919 
4920 	default:
4921 		return (ENOENT);
4922 	}
4923 }
4924 
4925 #include "opt_ddb.h"
4926 #ifdef DDB
4927 #include <ddb/ddb.h>
4928 #include <sys/cons.h>
4929 
4930 /*
4931  * List resources based on pci map registers, used for within ddb
4932  */
4933 
4934 DB_SHOW_COMMAND(pciregs, db_pci_dump)
4935 {
4936 	struct pci_devinfo *dinfo;
4937 	struct devlist *devlist_head;
4938 	struct pci_conf *p;
4939 	const char *name;
4940 	int i, error, none_count;
4941 
4942 	none_count = 0;
4943 	/* get the head of the device queue */
4944 	devlist_head = &pci_devq;
4945 
4946 	/*
4947 	 * Go through the list of devices and print out devices
4948 	 */
4949 	for (error = 0, i = 0,
4950 	     dinfo = STAILQ_FIRST(devlist_head);
4951 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4952 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4953 
4954 		/* Populate pd_name and pd_unit */
4955 		name = NULL;
4956 		if (dinfo->cfg.dev)
4957 			name = device_get_name(dinfo->cfg.dev);
4958 
4959 		p = &dinfo->conf;
4960 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4961 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4962 			(name && *name) ? name : "none",
4963 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4964 			none_count++,
4965 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4966 			p->pc_sel.pc_func, (p->pc_class << 16) |
4967 			(p->pc_subclass << 8) | p->pc_progif,
4968 			(p->pc_subdevice << 16) | p->pc_subvendor,
4969 			(p->pc_device << 16) | p->pc_vendor,
4970 			p->pc_revid, p->pc_hdr);
4971 	}
4972 }
4973 #endif /* DDB */
4974 
4975 static struct resource *
4976 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4977     rman_res_t start, rman_res_t end, rman_res_t count, u_int num,
4978     u_int flags)
4979 {
4980 	struct pci_devinfo *dinfo = device_get_ivars(child);
4981 	struct resource_list *rl = &dinfo->resources;
4982 	struct resource *res;
4983 	struct pci_map *pm;
4984 	pci_addr_t map, testval;
4985 	int mapsize;
4986 
4987 	res = NULL;
4988 
4989 	/* If rid is managed by EA, ignore it */
4990 	if (pci_ea_is_enabled(child, *rid))
4991 		goto out;
4992 
4993 	pm = pci_find_bar(child, *rid);
4994 	if (pm != NULL) {
4995 		/* This is a BAR that we failed to allocate earlier. */
4996 		mapsize = pm->pm_size;
4997 		map = pm->pm_value;
4998 	} else {
4999 		/*
5000 		 * Weed out the bogons, and figure out how large the
5001 		 * BAR/map is.  BARs that read back 0 here are bogus
5002 		 * and unimplemented.  Note: atapci in legacy mode are
5003 		 * special and handled elsewhere in the code.  If you
5004 		 * have a atapci device in legacy mode and it fails
5005 		 * here, that other code is broken.
5006 		 */
5007 		pci_read_bar(child, *rid, &map, &testval, NULL);
5008 
5009 		/*
5010 		 * Determine the size of the BAR and ignore BARs with a size
5011 		 * of 0.  Device ROM BARs use a different mask value.
5012 		 */
5013 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
5014 			mapsize = pci_romsize(testval);
5015 		else
5016 			mapsize = pci_mapsize(testval);
5017 		if (mapsize == 0)
5018 			goto out;
5019 		pm = pci_add_bar(child, *rid, map, mapsize);
5020 	}
5021 
5022 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
5023 		if (type != SYS_RES_MEMORY) {
5024 			if (bootverbose)
5025 				device_printf(dev,
5026 				    "child %s requested type %d for rid %#x,"
5027 				    " but the BAR says it is an memio\n",
5028 				    device_get_nameunit(child), type, *rid);
5029 			goto out;
5030 		}
5031 	} else {
5032 		if (type != SYS_RES_IOPORT) {
5033 			if (bootverbose)
5034 				device_printf(dev,
5035 				    "child %s requested type %d for rid %#x,"
5036 				    " but the BAR says it is an ioport\n",
5037 				    device_get_nameunit(child), type, *rid);
5038 			goto out;
5039 		}
5040 	}
5041 
5042 	/*
5043 	 * For real BARs, we need to override the size that
5044 	 * the driver requests, because that's what the BAR
5045 	 * actually uses and we would otherwise have a
5046 	 * situation where we might allocate the excess to
5047 	 * another driver, which won't work.
5048 	 */
5049 	count = ((pci_addr_t)1 << mapsize) * num;
5050 	if (RF_ALIGNMENT(flags) < mapsize)
5051 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
5052 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
5053 		flags |= RF_PREFETCHABLE;
5054 
5055 	/*
5056 	 * Allocate enough resource, and then write back the
5057 	 * appropriate BAR for that resource.
5058 	 */
5059 	resource_list_add(rl, type, *rid, start, end, count);
5060 	res = resource_list_reserve(rl, dev, child, type, rid, start, end,
5061 	    count, flags & ~RF_ACTIVE);
5062 	if (res == NULL) {
5063 		resource_list_delete(rl, type, *rid);
5064 		device_printf(child,
5065 		    "%#jx bytes of rid %#x res %d failed (%#jx, %#jx).\n",
5066 		    count, *rid, type, start, end);
5067 		goto out;
5068 	}
5069 	if (bootverbose)
5070 		device_printf(child,
5071 		    "Lazy allocation of %#jx bytes rid %#x type %d at %#jx\n",
5072 		    count, *rid, type, rman_get_start(res));
5073 	map = rman_get_start(res);
5074 	pci_write_bar(child, pm, map);
5075 out:
5076 	return (res);
5077 }
5078 
5079 struct resource *
5080 pci_alloc_multi_resource(device_t dev, device_t child, int type, int *rid,
5081     rman_res_t start, rman_res_t end, rman_res_t count, u_long num,
5082     u_int flags)
5083 {
5084 	struct pci_devinfo *dinfo;
5085 	struct resource_list *rl;
5086 	struct resource_list_entry *rle;
5087 	struct resource *res;
5088 	pcicfgregs *cfg;
5089 
5090 	/*
5091 	 * Perform lazy resource allocation
5092 	 */
5093 	dinfo = device_get_ivars(child);
5094 	rl = &dinfo->resources;
5095 	cfg = &dinfo->cfg;
5096 	switch (type) {
5097 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
5098 	case PCI_RES_BUS:
5099 		return (pci_alloc_secbus(dev, child, rid, start, end, count,
5100 		    flags));
5101 #endif
5102 	case SYS_RES_IRQ:
5103 		/*
5104 		 * Can't alloc legacy interrupt once MSI messages have
5105 		 * been allocated.
5106 		 */
5107 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
5108 		    cfg->msix.msix_alloc > 0))
5109 			return (NULL);
5110 
5111 		/*
5112 		 * If the child device doesn't have an interrupt
5113 		 * routed and is deserving of an interrupt, try to
5114 		 * assign it one.
5115 		 */
5116 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
5117 		    (cfg->intpin != 0))
5118 			pci_assign_interrupt(dev, child, 0);
5119 		break;
5120 	case SYS_RES_IOPORT:
5121 	case SYS_RES_MEMORY:
5122 #ifdef NEW_PCIB
5123 		/*
5124 		 * PCI-PCI bridge I/O window resources are not BARs.
5125 		 * For those allocations just pass the request up the
5126 		 * tree.
5127 		 */
5128 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
5129 			switch (*rid) {
5130 			case PCIR_IOBASEL_1:
5131 			case PCIR_MEMBASE_1:
5132 			case PCIR_PMBASEL_1:
5133 				/*
5134 				 * XXX: Should we bother creating a resource
5135 				 * list entry?
5136 				 */
5137 				return (bus_generic_alloc_resource(dev, child,
5138 				    type, rid, start, end, count, flags));
5139 			}
5140 		}
5141 #endif
5142 		/* Reserve resources for this BAR if needed. */
5143 		rle = resource_list_find(rl, type, *rid);
5144 		if (rle == NULL) {
5145 			res = pci_reserve_map(dev, child, type, rid, start, end,
5146 			    count, num, flags);
5147 			if (res == NULL)
5148 				return (NULL);
5149 		}
5150 	}
5151 	return (resource_list_alloc(rl, dev, child, type, rid,
5152 	    start, end, count, flags));
5153 }
5154 
5155 struct resource *
5156 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
5157     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
5158 {
5159 #ifdef PCI_IOV
5160 	struct pci_devinfo *dinfo;
5161 #endif
5162 
5163 	if (device_get_parent(child) != dev)
5164 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
5165 		    type, rid, start, end, count, flags));
5166 
5167 #ifdef PCI_IOV
5168 	dinfo = device_get_ivars(child);
5169 	if (dinfo->cfg.flags & PCICFG_VF) {
5170 		switch (type) {
5171 		/* VFs can't have I/O BARs. */
5172 		case SYS_RES_IOPORT:
5173 			return (NULL);
5174 		case SYS_RES_MEMORY:
5175 			return (pci_vf_alloc_mem_resource(dev, child, rid,
5176 			    start, end, count, flags));
5177 		}
5178 
5179 		/* Fall through for other types of resource allocations. */
5180 	}
5181 #endif
5182 
5183 	return (pci_alloc_multi_resource(dev, child, type, rid, start, end,
5184 	    count, 1, flags));
5185 }
5186 
5187 int
5188 pci_release_resource(device_t dev, device_t child, int type, int rid,
5189     struct resource *r)
5190 {
5191 	struct pci_devinfo *dinfo;
5192 	struct resource_list *rl;
5193 	pcicfgregs *cfg;
5194 
5195 	if (device_get_parent(child) != dev)
5196 		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
5197 		    type, rid, r));
5198 
5199 	dinfo = device_get_ivars(child);
5200 	cfg = &dinfo->cfg;
5201 
5202 #ifdef PCI_IOV
5203 	if (dinfo->cfg.flags & PCICFG_VF) {
5204 		switch (type) {
5205 		/* VFs can't have I/O BARs. */
5206 		case SYS_RES_IOPORT:
5207 			return (EDOOFUS);
5208 		case SYS_RES_MEMORY:
5209 			return (pci_vf_release_mem_resource(dev, child, rid,
5210 			    r));
5211 		}
5212 
5213 		/* Fall through for other types of resource allocations. */
5214 	}
5215 #endif
5216 
5217 #ifdef NEW_PCIB
5218 	/*
5219 	 * PCI-PCI bridge I/O window resources are not BARs.  For
5220 	 * those allocations just pass the request up the tree.
5221 	 */
5222 	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
5223 	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
5224 		switch (rid) {
5225 		case PCIR_IOBASEL_1:
5226 		case PCIR_MEMBASE_1:
5227 		case PCIR_PMBASEL_1:
5228 			return (bus_generic_release_resource(dev, child, type,
5229 			    rid, r));
5230 		}
5231 	}
5232 #endif
5233 
5234 	rl = &dinfo->resources;
5235 	return (resource_list_release(rl, dev, child, type, rid, r));
5236 }
5237 
5238 int
5239 pci_activate_resource(device_t dev, device_t child, int type, int rid,
5240     struct resource *r)
5241 {
5242 	struct pci_devinfo *dinfo;
5243 	int error;
5244 
5245 	error = bus_generic_activate_resource(dev, child, type, rid, r);
5246 	if (error)
5247 		return (error);
5248 
5249 	/* Enable decoding in the command register when activating BARs. */
5250 	if (device_get_parent(child) == dev) {
5251 		/* Device ROMs need their decoding explicitly enabled. */
5252 		dinfo = device_get_ivars(child);
5253 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
5254 			pci_write_bar(child, pci_find_bar(child, rid),
5255 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
5256 		switch (type) {
5257 		case SYS_RES_IOPORT:
5258 		case SYS_RES_MEMORY:
5259 			error = PCI_ENABLE_IO(dev, child, type);
5260 			break;
5261 		}
5262 	}
5263 	return (error);
5264 }
5265 
5266 int
5267 pci_deactivate_resource(device_t dev, device_t child, int type,
5268     int rid, struct resource *r)
5269 {
5270 	struct pci_devinfo *dinfo;
5271 	int error;
5272 
5273 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
5274 	if (error)
5275 		return (error);
5276 
5277 	/* Disable decoding for device ROMs. */
5278 	if (device_get_parent(child) == dev) {
5279 		dinfo = device_get_ivars(child);
5280 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
5281 			pci_write_bar(child, pci_find_bar(child, rid),
5282 			    rman_get_start(r));
5283 	}
5284 	return (0);
5285 }
5286 
5287 void
5288 pci_child_deleted(device_t dev, device_t child)
5289 {
5290 	struct resource_list_entry *rle;
5291 	struct resource_list *rl;
5292 	struct pci_devinfo *dinfo;
5293 
5294 	dinfo = device_get_ivars(child);
5295 	rl = &dinfo->resources;
5296 
5297 	/* Turn off access to resources we're about to free */
5298 	if (bus_child_present(child) != 0) {
5299 		pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
5300 		    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
5301 
5302 		pci_disable_busmaster(child);
5303 	}
5304 
5305 	/* Free all allocated resources */
5306 	STAILQ_FOREACH(rle, rl, link) {
5307 		if (rle->res) {
5308 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
5309 			    resource_list_busy(rl, rle->type, rle->rid)) {
5310 				pci_printf(&dinfo->cfg,
5311 				    "Resource still owned, oops. "
5312 				    "(type=%d, rid=%d, addr=%lx)\n",
5313 				    rle->type, rle->rid,
5314 				    rman_get_start(rle->res));
5315 				bus_release_resource(child, rle->type, rle->rid,
5316 				    rle->res);
5317 			}
5318 			resource_list_unreserve(rl, dev, child, rle->type,
5319 			    rle->rid);
5320 		}
5321 	}
5322 	resource_list_free(rl);
5323 
5324 	pci_freecfg(dinfo);
5325 }
5326 
5327 void
5328 pci_delete_resource(device_t dev, device_t child, int type, int rid)
5329 {
5330 	struct pci_devinfo *dinfo;
5331 	struct resource_list *rl;
5332 	struct resource_list_entry *rle;
5333 
5334 	if (device_get_parent(child) != dev)
5335 		return;
5336 
5337 	dinfo = device_get_ivars(child);
5338 	rl = &dinfo->resources;
5339 	rle = resource_list_find(rl, type, rid);
5340 	if (rle == NULL)
5341 		return;
5342 
5343 	if (rle->res) {
5344 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
5345 		    resource_list_busy(rl, type, rid)) {
5346 			device_printf(dev, "delete_resource: "
5347 			    "Resource still owned by child, oops. "
5348 			    "(type=%d, rid=%d, addr=%jx)\n",
5349 			    type, rid, rman_get_start(rle->res));
5350 			return;
5351 		}
5352 		resource_list_unreserve(rl, dev, child, type, rid);
5353 	}
5354 	resource_list_delete(rl, type, rid);
5355 }
5356 
5357 struct resource_list *
5358 pci_get_resource_list (device_t dev, device_t child)
5359 {
5360 	struct pci_devinfo *dinfo = device_get_ivars(child);
5361 
5362 	return (&dinfo->resources);
5363 }
5364 
5365 bus_dma_tag_t
5366 pci_get_dma_tag(device_t bus, device_t dev)
5367 {
5368 	struct pci_softc *sc = device_get_softc(bus);
5369 
5370 	return (sc->sc_dma_tag);
5371 }
5372 
5373 uint32_t
5374 pci_read_config_method(device_t dev, device_t child, int reg, int width)
5375 {
5376 	struct pci_devinfo *dinfo = device_get_ivars(child);
5377 	pcicfgregs *cfg = &dinfo->cfg;
5378 
5379 #ifdef PCI_IOV
5380 	/*
5381 	 * SR-IOV VFs don't implement the VID or DID registers, so we have to
5382 	 * emulate them here.
5383 	 */
5384 	if (cfg->flags & PCICFG_VF) {
5385 		if (reg == PCIR_VENDOR) {
5386 			switch (width) {
5387 			case 4:
5388 				return (cfg->device << 16 | cfg->vendor);
5389 			case 2:
5390 				return (cfg->vendor);
5391 			case 1:
5392 				return (cfg->vendor & 0xff);
5393 			default:
5394 				return (0xffffffff);
5395 			}
5396 		} else if (reg == PCIR_DEVICE) {
5397 			switch (width) {
5398 			/* Note that an unaligned 4-byte read is an error. */
5399 			case 2:
5400 				return (cfg->device);
5401 			case 1:
5402 				return (cfg->device & 0xff);
5403 			default:
5404 				return (0xffffffff);
5405 			}
5406 		}
5407 	}
5408 #endif
5409 
5410 	return (PCIB_READ_CONFIG(device_get_parent(dev),
5411 	    cfg->bus, cfg->slot, cfg->func, reg, width));
5412 }
5413 
5414 void
5415 pci_write_config_method(device_t dev, device_t child, int reg,
5416     uint32_t val, int width)
5417 {
5418 	struct pci_devinfo *dinfo = device_get_ivars(child);
5419 	pcicfgregs *cfg = &dinfo->cfg;
5420 
5421 	PCIB_WRITE_CONFIG(device_get_parent(dev),
5422 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
5423 }
5424 
5425 int
5426 pci_child_location_str_method(device_t dev, device_t child, char *buf,
5427     size_t buflen)
5428 {
5429 
5430 	snprintf(buf, buflen, "pci%d:%d:%d:%d", pci_get_domain(child),
5431 	    pci_get_bus(child), pci_get_slot(child), pci_get_function(child));
5432 	return (0);
5433 }
5434 
5435 int
5436 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
5437     size_t buflen)
5438 {
5439 	struct pci_devinfo *dinfo;
5440 	pcicfgregs *cfg;
5441 
5442 	dinfo = device_get_ivars(child);
5443 	cfg = &dinfo->cfg;
5444 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
5445 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
5446 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
5447 	    cfg->progif);
5448 	return (0);
5449 }
5450 
5451 int
5452 pci_assign_interrupt_method(device_t dev, device_t child)
5453 {
5454 	struct pci_devinfo *dinfo = device_get_ivars(child);
5455 	pcicfgregs *cfg = &dinfo->cfg;
5456 
5457 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
5458 	    cfg->intpin));
5459 }
5460 
5461 static void
5462 pci_lookup(void *arg, const char *name, device_t *dev)
5463 {
5464 	long val;
5465 	char *end;
5466 	int domain, bus, slot, func;
5467 
5468 	if (*dev != NULL)
5469 		return;
5470 
5471 	/*
5472 	 * Accept pciconf-style selectors of either pciD:B:S:F or
5473 	 * pciB:S:F.  In the latter case, the domain is assumed to
5474 	 * be zero.
5475 	 */
5476 	if (strncmp(name, "pci", 3) != 0)
5477 		return;
5478 	val = strtol(name + 3, &end, 10);
5479 	if (val < 0 || val > INT_MAX || *end != ':')
5480 		return;
5481 	domain = val;
5482 	val = strtol(end + 1, &end, 10);
5483 	if (val < 0 || val > INT_MAX || *end != ':')
5484 		return;
5485 	bus = val;
5486 	val = strtol(end + 1, &end, 10);
5487 	if (val < 0 || val > INT_MAX)
5488 		return;
5489 	slot = val;
5490 	if (*end == ':') {
5491 		val = strtol(end + 1, &end, 10);
5492 		if (val < 0 || val > INT_MAX || *end != '\0')
5493 			return;
5494 		func = val;
5495 	} else if (*end == '\0') {
5496 		func = slot;
5497 		slot = bus;
5498 		bus = domain;
5499 		domain = 0;
5500 	} else
5501 		return;
5502 
5503 	if (domain > PCI_DOMAINMAX || bus > PCI_BUSMAX || slot > PCI_SLOTMAX ||
5504 	    func > PCIE_ARI_FUNCMAX || (slot != 0 && func > PCI_FUNCMAX))
5505 		return;
5506 
5507 	*dev = pci_find_dbsf(domain, bus, slot, func);
5508 }
5509 
5510 static int
5511 pci_modevent(module_t mod, int what, void *arg)
5512 {
5513 	static struct cdev *pci_cdev;
5514 	static eventhandler_tag tag;
5515 
5516 	switch (what) {
5517 	case MOD_LOAD:
5518 		STAILQ_INIT(&pci_devq);
5519 		pci_generation = 0;
5520 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
5521 		    "pci");
5522 		pci_load_vendor_data();
5523 		tag = EVENTHANDLER_REGISTER(dev_lookup, pci_lookup, NULL,
5524 		    1000);
5525 		break;
5526 
5527 	case MOD_UNLOAD:
5528 		if (tag != NULL)
5529 			EVENTHANDLER_DEREGISTER(dev_lookup, tag);
5530 		destroy_dev(pci_cdev);
5531 		break;
5532 	}
5533 
5534 	return (0);
5535 }
5536 
5537 static void
5538 pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
5539 {
5540 #define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
5541 	struct pcicfg_pcie *cfg;
5542 	int version, pos;
5543 
5544 	cfg = &dinfo->cfg.pcie;
5545 	pos = cfg->pcie_location;
5546 
5547 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5548 
5549 	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
5550 
5551 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5552 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5553 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5554 		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
5555 
5556 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5557 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5558 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5559 		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
5560 
5561 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5562 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5563 		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
5564 
5565 	if (version > 1) {
5566 		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
5567 		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
5568 		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
5569 	}
5570 #undef WREG
5571 }
5572 
5573 static void
5574 pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
5575 {
5576 	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
5577 	    dinfo->cfg.pcix.pcix_command,  2);
5578 }
5579 
5580 void
5581 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
5582 {
5583 
5584 	/*
5585 	 * Restore the device to full power mode.  We must do this
5586 	 * before we restore the registers because moving from D3 to
5587 	 * D0 will cause the chip's BARs and some other registers to
5588 	 * be reset to some unknown power on reset values.  Cut down
5589 	 * the noise on boot by doing nothing if we are already in
5590 	 * state D0.
5591 	 */
5592 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
5593 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5594 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
5595 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
5596 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
5597 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
5598 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
5599 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
5600 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
5601 	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5602 	case PCIM_HDRTYPE_NORMAL:
5603 		pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
5604 		pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
5605 		break;
5606 	case PCIM_HDRTYPE_BRIDGE:
5607 		pci_write_config(dev, PCIR_SECLAT_1,
5608 		    dinfo->cfg.bridge.br_seclat, 1);
5609 		pci_write_config(dev, PCIR_SUBBUS_1,
5610 		    dinfo->cfg.bridge.br_subbus, 1);
5611 		pci_write_config(dev, PCIR_SECBUS_1,
5612 		    dinfo->cfg.bridge.br_secbus, 1);
5613 		pci_write_config(dev, PCIR_PRIBUS_1,
5614 		    dinfo->cfg.bridge.br_pribus, 1);
5615 		pci_write_config(dev, PCIR_BRIDGECTL_1,
5616 		    dinfo->cfg.bridge.br_control, 2);
5617 		break;
5618 	case PCIM_HDRTYPE_CARDBUS:
5619 		pci_write_config(dev, PCIR_SECLAT_2,
5620 		    dinfo->cfg.bridge.br_seclat, 1);
5621 		pci_write_config(dev, PCIR_SUBBUS_2,
5622 		    dinfo->cfg.bridge.br_subbus, 1);
5623 		pci_write_config(dev, PCIR_SECBUS_2,
5624 		    dinfo->cfg.bridge.br_secbus, 1);
5625 		pci_write_config(dev, PCIR_PRIBUS_2,
5626 		    dinfo->cfg.bridge.br_pribus, 1);
5627 		pci_write_config(dev, PCIR_BRIDGECTL_2,
5628 		    dinfo->cfg.bridge.br_control, 2);
5629 		break;
5630 	}
5631 	pci_restore_bars(dev);
5632 
5633 	/*
5634 	 * Restore extended capabilities for PCI-Express and PCI-X
5635 	 */
5636 	if (dinfo->cfg.pcie.pcie_location != 0)
5637 		pci_cfg_restore_pcie(dev, dinfo);
5638 	if (dinfo->cfg.pcix.pcix_location != 0)
5639 		pci_cfg_restore_pcix(dev, dinfo);
5640 
5641 	/* Restore MSI and MSI-X configurations if they are present. */
5642 	if (dinfo->cfg.msi.msi_location != 0)
5643 		pci_resume_msi(dev);
5644 	if (dinfo->cfg.msix.msix_location != 0)
5645 		pci_resume_msix(dev);
5646 }
5647 
5648 static void
5649 pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
5650 {
5651 #define	RREG(n)	pci_read_config(dev, pos + (n), 2)
5652 	struct pcicfg_pcie *cfg;
5653 	int version, pos;
5654 
5655 	cfg = &dinfo->cfg.pcie;
5656 	pos = cfg->pcie_location;
5657 
5658 	cfg->pcie_flags = RREG(PCIER_FLAGS);
5659 
5660 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5661 
5662 	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
5663 
5664 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5665 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5666 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5667 		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
5668 
5669 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5670 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5671 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5672 		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
5673 
5674 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5675 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5676 		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
5677 
5678 	if (version > 1) {
5679 		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
5680 		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
5681 		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
5682 	}
5683 #undef RREG
5684 }
5685 
5686 static void
5687 pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
5688 {
5689 	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
5690 	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
5691 }
5692 
5693 void
5694 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
5695 {
5696 	uint32_t cls;
5697 	int ps;
5698 
5699 	/*
5700 	 * Some drivers apparently write to these registers w/o updating our
5701 	 * cached copy.  No harm happens if we update the copy, so do so here
5702 	 * so we can restore them.  The COMMAND register is modified by the
5703 	 * bus w/o updating the cache.  This should represent the normally
5704 	 * writable portion of the 'defined' part of type 0/1/2 headers.
5705 	 */
5706 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
5707 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
5708 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
5709 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
5710 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
5711 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
5712 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
5713 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
5714 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
5715 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
5716 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
5717 	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5718 	case PCIM_HDRTYPE_NORMAL:
5719 		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
5720 		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
5721 		dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
5722 		dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
5723 		break;
5724 	case PCIM_HDRTYPE_BRIDGE:
5725 		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
5726 		    PCIR_SECLAT_1, 1);
5727 		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
5728 		    PCIR_SUBBUS_1, 1);
5729 		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
5730 		    PCIR_SECBUS_1, 1);
5731 		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
5732 		    PCIR_PRIBUS_1, 1);
5733 		dinfo->cfg.bridge.br_control = pci_read_config(dev,
5734 		    PCIR_BRIDGECTL_1, 2);
5735 		break;
5736 	case PCIM_HDRTYPE_CARDBUS:
5737 		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
5738 		    PCIR_SECLAT_2, 1);
5739 		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
5740 		    PCIR_SUBBUS_2, 1);
5741 		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
5742 		    PCIR_SECBUS_2, 1);
5743 		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
5744 		    PCIR_PRIBUS_2, 1);
5745 		dinfo->cfg.bridge.br_control = pci_read_config(dev,
5746 		    PCIR_BRIDGECTL_2, 2);
5747 		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_2, 2);
5748 		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_2, 2);
5749 		break;
5750 	}
5751 
5752 	if (dinfo->cfg.pcie.pcie_location != 0)
5753 		pci_cfg_save_pcie(dev, dinfo);
5754 
5755 	if (dinfo->cfg.pcix.pcix_location != 0)
5756 		pci_cfg_save_pcix(dev, dinfo);
5757 
5758 	/*
5759 	 * don't set the state for display devices, base peripherals and
5760 	 * memory devices since bad things happen when they are powered down.
5761 	 * We should (a) have drivers that can easily detach and (b) use
5762 	 * generic drivers for these devices so that some device actually
5763 	 * attaches.  We need to make sure that when we implement (a) we don't
5764 	 * power the device down on a reattach.
5765 	 */
5766 	cls = pci_get_class(dev);
5767 	if (!setstate)
5768 		return;
5769 	switch (pci_do_power_nodriver)
5770 	{
5771 		case 0:		/* NO powerdown at all */
5772 			return;
5773 		case 1:		/* Conservative about what to power down */
5774 			if (cls == PCIC_STORAGE)
5775 				return;
5776 			/*FALLTHROUGH*/
5777 		case 2:		/* Agressive about what to power down */
5778 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
5779 			    cls == PCIC_BASEPERIPH)
5780 				return;
5781 			/*FALLTHROUGH*/
5782 		case 3:		/* Power down everything */
5783 			break;
5784 	}
5785 	/*
5786 	 * PCI spec says we can only go into D3 state from D0 state.
5787 	 * Transition from D[12] into D0 before going to D3 state.
5788 	 */
5789 	ps = pci_get_powerstate(dev);
5790 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
5791 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5792 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
5793 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
5794 }
5795 
5796 /* Wrapper APIs suitable for device driver use. */
5797 void
5798 pci_save_state(device_t dev)
5799 {
5800 	struct pci_devinfo *dinfo;
5801 
5802 	dinfo = device_get_ivars(dev);
5803 	pci_cfg_save(dev, dinfo, 0);
5804 }
5805 
5806 void
5807 pci_restore_state(device_t dev)
5808 {
5809 	struct pci_devinfo *dinfo;
5810 
5811 	dinfo = device_get_ivars(dev);
5812 	pci_cfg_restore(dev, dinfo);
5813 }
5814 
5815 static uint16_t
5816 pci_get_rid_method(device_t dev, device_t child)
5817 {
5818 
5819 	return (PCIB_GET_RID(device_get_parent(dev), child));
5820 }
5821 
5822 /* Find the upstream port of a given PCI device in a root complex. */
5823 device_t
5824 pci_find_pcie_root_port(device_t dev)
5825 {
5826 	struct pci_devinfo *dinfo;
5827 	devclass_t pci_class;
5828 	device_t pcib, bus;
5829 
5830 	pci_class = devclass_find("pci");
5831 	KASSERT(device_get_devclass(device_get_parent(dev)) == pci_class,
5832 	    ("%s: non-pci device %s", __func__, device_get_nameunit(dev)));
5833 
5834 	/*
5835 	 * Walk the bridge hierarchy until we find a PCI-e root
5836 	 * port or a non-PCI device.
5837 	 */
5838 	for (;;) {
5839 		bus = device_get_parent(dev);
5840 		KASSERT(bus != NULL, ("%s: null parent of %s", __func__,
5841 		    device_get_nameunit(dev)));
5842 
5843 		pcib = device_get_parent(bus);
5844 		KASSERT(pcib != NULL, ("%s: null bridge of %s", __func__,
5845 		    device_get_nameunit(bus)));
5846 
5847 		/*
5848 		 * pcib's parent must be a PCI bus for this to be a
5849 		 * PCI-PCI bridge.
5850 		 */
5851 		if (device_get_devclass(device_get_parent(pcib)) != pci_class)
5852 			return (NULL);
5853 
5854 		dinfo = device_get_ivars(pcib);
5855 		if (dinfo->cfg.pcie.pcie_location != 0 &&
5856 		    dinfo->cfg.pcie.pcie_type == PCIEM_TYPE_ROOT_PORT)
5857 			return (pcib);
5858 
5859 		dev = pcib;
5860 	}
5861 }
5862