xref: /freebsd/sys/dev/pci/pci.c (revision 94086cea279d930eb2fbe7d680585abde7e9c095)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/limits.h>
39 #include <sys/linker.h>
40 #include <sys/fcntl.h>
41 #include <sys/conf.h>
42 #include <sys/kernel.h>
43 #include <sys/queue.h>
44 #include <sys/sysctl.h>
45 #include <sys/endian.h>
46 
47 #include <vm/vm.h>
48 #include <vm/pmap.h>
49 #include <vm/vm_extern.h>
50 
51 #include <sys/bus.h>
52 #include <machine/bus.h>
53 #include <sys/rman.h>
54 #include <machine/resource.h>
55 #include <machine/stdarg.h>
56 
57 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
58 #include <machine/intr_machdep.h>
59 #endif
60 
61 #include <sys/pciio.h>
62 #include <dev/pci/pcireg.h>
63 #include <dev/pci/pcivar.h>
64 #include <dev/pci/pci_private.h>
65 
66 #ifdef PCI_IOV
67 #include <sys/nv.h>
68 #include <dev/pci/pci_iov_private.h>
69 #endif
70 
71 #include <dev/usb/controller/xhcireg.h>
72 #include <dev/usb/controller/ehcireg.h>
73 #include <dev/usb/controller/ohcireg.h>
74 #include <dev/usb/controller/uhcireg.h>
75 
76 #include "pcib_if.h"
77 #include "pci_if.h"
78 
79 #define	PCIR_IS_BIOS(cfg, reg)						\
80 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
81 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
82 
83 static int		pci_has_quirk(uint32_t devid, int quirk);
84 static pci_addr_t	pci_mapbase(uint64_t mapreg);
85 static const char	*pci_maptype(uint64_t mapreg);
86 static int		pci_maprange(uint64_t mapreg);
87 static pci_addr_t	pci_rombase(uint64_t mapreg);
88 static int		pci_romsize(uint64_t testval);
89 static void		pci_fixancient(pcicfgregs *cfg);
90 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
91 
92 static int		pci_porten(device_t dev);
93 static int		pci_memen(device_t dev);
94 static void		pci_assign_interrupt(device_t bus, device_t dev,
95 			    int force_route);
96 static int		pci_add_map(device_t bus, device_t dev, int reg,
97 			    struct resource_list *rl, int force, int prefetch);
98 static int		pci_probe(device_t dev);
99 static int		pci_attach(device_t dev);
100 #ifdef PCI_RES_BUS
101 static int		pci_detach(device_t dev);
102 #endif
103 static void		pci_load_vendor_data(void);
104 static int		pci_describe_parse_line(char **ptr, int *vendor,
105 			    int *device, char **desc);
106 static char		*pci_describe_device(device_t dev);
107 static int		pci_modevent(module_t mod, int what, void *arg);
108 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
109 			    pcicfgregs *cfg);
110 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
111 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
112 			    int reg, uint32_t *data);
113 #if 0
114 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
115 			    int reg, uint32_t data);
116 #endif
117 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
118 static void		pci_mask_msix(device_t dev, u_int index);
119 static void		pci_unmask_msix(device_t dev, u_int index);
120 static int		pci_msi_blacklisted(void);
121 static int		pci_msix_blacklisted(void);
122 static void		pci_resume_msi(device_t dev);
123 static void		pci_resume_msix(device_t dev);
124 static int		pci_remap_intr_method(device_t bus, device_t dev,
125 			    u_int irq);
126 
127 static uint16_t		pci_get_rid_method(device_t dev, device_t child);
128 
129 static struct pci_devinfo * pci_fill_devinfo(device_t pcib, int d, int b, int s,
130     int f, uint16_t vid, uint16_t did, size_t size);
131 
132 static device_method_t pci_methods[] = {
133 	/* Device interface */
134 	DEVMETHOD(device_probe,		pci_probe),
135 	DEVMETHOD(device_attach,	pci_attach),
136 #ifdef PCI_RES_BUS
137 	DEVMETHOD(device_detach,	pci_detach),
138 #else
139 	DEVMETHOD(device_detach,	bus_generic_detach),
140 #endif
141 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
142 	DEVMETHOD(device_suspend,	bus_generic_suspend),
143 	DEVMETHOD(device_resume,	pci_resume),
144 
145 	/* Bus interface */
146 	DEVMETHOD(bus_print_child,	pci_print_child),
147 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
148 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
149 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
150 	DEVMETHOD(bus_driver_added,	pci_driver_added),
151 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
152 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
153 
154 	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
155 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
156 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
157 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
158 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
159 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
160 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
161 	DEVMETHOD(bus_release_resource,	pci_release_resource),
162 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
163 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
164 	DEVMETHOD(bus_child_deleted,	pci_child_deleted),
165 	DEVMETHOD(bus_child_detached,	pci_child_detached),
166 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
167 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
168 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
169 	DEVMETHOD(bus_suspend_child,	pci_suspend_child),
170 	DEVMETHOD(bus_resume_child,	pci_resume_child),
171 
172 	/* PCI interface */
173 	DEVMETHOD(pci_read_config,	pci_read_config_method),
174 	DEVMETHOD(pci_write_config,	pci_write_config_method),
175 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
176 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
177 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
178 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
179 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
180 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
181 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
182 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
183 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
184 	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
185 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
186 	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
187 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
188 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
189 	DEVMETHOD(pci_enable_msi,	pci_enable_msi_method),
190 	DEVMETHOD(pci_enable_msix,	pci_enable_msix_method),
191 	DEVMETHOD(pci_disable_msi,	pci_disable_msi_method),
192 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
193 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
194 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
195 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
196 	DEVMETHOD(pci_msix_pba_bar,	pci_msix_pba_bar_method),
197 	DEVMETHOD(pci_msix_table_bar,	pci_msix_table_bar_method),
198 	DEVMETHOD(pci_get_rid,		pci_get_rid_method),
199 	DEVMETHOD(pci_child_added,	pci_child_added_method),
200 #ifdef PCI_IOV
201 	DEVMETHOD(pci_iov_attach,	pci_iov_attach_method),
202 	DEVMETHOD(pci_iov_detach,	pci_iov_detach_method),
203 	DEVMETHOD(pci_create_iov_child,	pci_create_iov_child_method),
204 #endif
205 
206 	DEVMETHOD_END
207 };
208 
209 DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
210 
211 static devclass_t pci_devclass;
212 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
213 MODULE_VERSION(pci, 1);
214 
215 static char	*pci_vendordata;
216 static size_t	pci_vendordata_size;
217 
218 struct pci_quirk {
219 	uint32_t devid;	/* Vendor/device of the card */
220 	int	type;
221 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
222 #define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
223 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
224 #define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
225 #define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
226 #define	PCI_QUIRK_MSI_INTX_BUG	6 /* PCIM_CMD_INTxDIS disables MSI */
227 	int	arg1;
228 	int	arg2;
229 };
230 
231 static const struct pci_quirk pci_quirks[] = {
232 	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
233 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
234 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
235 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
236 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
237 
238 	/*
239 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
240 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
241 	 */
242 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
243 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
244 
245 	/*
246 	 * MSI doesn't work on earlier Intel chipsets including
247 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
248 	 */
249 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
250 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
251 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
252 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
253 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
254 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
255 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
256 
257 	/*
258 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
259 	 * bridge.
260 	 */
261 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
262 
263 	/*
264 	 * MSI-X allocation doesn't work properly for devices passed through
265 	 * by VMware up to at least ESXi 5.1.
266 	 */
267 	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
268 	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
269 
270 	/*
271 	 * Some virtualization environments emulate an older chipset
272 	 * but support MSI just fine.  QEMU uses the Intel 82440.
273 	 */
274 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
275 
276 	/*
277 	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
278 	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
279 	 * It prevents us from attaching hpet(4) when the bit is unset.
280 	 * Note this quirk only affects SB600 revision A13 and earlier.
281 	 * For SB600 A21 and later, firmware must set the bit to hide it.
282 	 * For SB700 and later, it is unused and hardcoded to zero.
283 	 */
284 	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
285 
286 	/*
287 	 * Atheros AR8161/AR8162/E2200 Ethernet controllers have a bug that
288 	 * MSI interrupt does not assert if PCIM_CMD_INTxDIS bit of the
289 	 * command register is set.
290 	 */
291 	{ 0x10911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
292 	{ 0xE0911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
293 	{ 0x10901969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
294 
295 	/*
296 	 * Broadcom BCM5714(S)/BCM5715(S)/BCM5780(S) Ethernet MACs don't
297 	 * issue MSI interrupts with PCIM_CMD_INTxDIS set either.
298 	 */
299 	{ 0x166814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714 */
300 	{ 0x166914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714S */
301 	{ 0x166a14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780 */
302 	{ 0x166b14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780S */
303 	{ 0x167814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715 */
304 	{ 0x167914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715S */
305 
306 	{ 0 }
307 };
308 
309 /* map register information */
310 #define	PCI_MAPMEM	0x01	/* memory map */
311 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
312 #define	PCI_MAPPORT	0x04	/* port map */
313 
314 struct devlist pci_devq;
315 uint32_t pci_generation;
316 uint32_t pci_numdevs = 0;
317 static int pcie_chipset, pcix_chipset;
318 
319 /* sysctl vars */
320 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
321 
322 static int pci_enable_io_modes = 1;
323 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RWTUN,
324     &pci_enable_io_modes, 1,
325     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
326 enable these bits correctly.  We'd like to do this all the time, but there\n\
327 are some peripherals that this causes problems with.");
328 
329 static int pci_do_realloc_bars = 0;
330 SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RWTUN,
331     &pci_do_realloc_bars, 0,
332     "Attempt to allocate a new range for any BARs whose original "
333     "firmware-assigned ranges fail to allocate during the initial device scan.");
334 
335 static int pci_do_power_nodriver = 0;
336 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RWTUN,
337     &pci_do_power_nodriver, 0,
338   "Place a function into D3 state when no driver attaches to it.  0 means\n\
339 disable.  1 means conservatively place devices into D3 state.  2 means\n\
340 agressively place devices into D3 state.  3 means put absolutely everything\n\
341 in D3 state.");
342 
343 int pci_do_power_resume = 1;
344 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RWTUN,
345     &pci_do_power_resume, 1,
346   "Transition from D3 -> D0 on resume.");
347 
348 int pci_do_power_suspend = 1;
349 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RWTUN,
350     &pci_do_power_suspend, 1,
351   "Transition from D0 -> D3 on suspend.");
352 
353 static int pci_do_msi = 1;
354 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RWTUN, &pci_do_msi, 1,
355     "Enable support for MSI interrupts");
356 
357 static int pci_do_msix = 1;
358 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RWTUN, &pci_do_msix, 1,
359     "Enable support for MSI-X interrupts");
360 
361 static int pci_honor_msi_blacklist = 1;
362 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RDTUN,
363     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
364 
365 #if defined(__i386__) || defined(__amd64__)
366 static int pci_usb_takeover = 1;
367 #else
368 static int pci_usb_takeover = 0;
369 #endif
370 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
371     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
372 Disable this if you depend on BIOS emulation of USB devices, that is\n\
373 you use USB devices (like keyboard or mouse) but do not load USB drivers");
374 
375 static int pci_clear_bars;
376 SYSCTL_INT(_hw_pci, OID_AUTO, clear_bars, CTLFLAG_RDTUN, &pci_clear_bars, 0,
377     "Ignore firmware-assigned resources for BARs.");
378 
379 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
380 static int pci_clear_buses;
381 SYSCTL_INT(_hw_pci, OID_AUTO, clear_buses, CTLFLAG_RDTUN, &pci_clear_buses, 0,
382     "Ignore firmware-assigned bus numbers.");
383 #endif
384 
385 static int pci_enable_ari = 1;
386 SYSCTL_INT(_hw_pci, OID_AUTO, enable_ari, CTLFLAG_RDTUN, &pci_enable_ari,
387     0, "Enable support for PCIe Alternative RID Interpretation");
388 
389 static int
390 pci_has_quirk(uint32_t devid, int quirk)
391 {
392 	const struct pci_quirk *q;
393 
394 	for (q = &pci_quirks[0]; q->devid; q++) {
395 		if (q->devid == devid && q->type == quirk)
396 			return (1);
397 	}
398 	return (0);
399 }
400 
401 /* Find a device_t by bus/slot/function in domain 0 */
402 
403 device_t
404 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
405 {
406 
407 	return (pci_find_dbsf(0, bus, slot, func));
408 }
409 
410 /* Find a device_t by domain/bus/slot/function */
411 
412 device_t
413 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
414 {
415 	struct pci_devinfo *dinfo;
416 
417 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
418 		if ((dinfo->cfg.domain == domain) &&
419 		    (dinfo->cfg.bus == bus) &&
420 		    (dinfo->cfg.slot == slot) &&
421 		    (dinfo->cfg.func == func)) {
422 			return (dinfo->cfg.dev);
423 		}
424 	}
425 
426 	return (NULL);
427 }
428 
429 /* Find a device_t by vendor/device ID */
430 
431 device_t
432 pci_find_device(uint16_t vendor, uint16_t device)
433 {
434 	struct pci_devinfo *dinfo;
435 
436 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
437 		if ((dinfo->cfg.vendor == vendor) &&
438 		    (dinfo->cfg.device == device)) {
439 			return (dinfo->cfg.dev);
440 		}
441 	}
442 
443 	return (NULL);
444 }
445 
446 device_t
447 pci_find_class(uint8_t class, uint8_t subclass)
448 {
449 	struct pci_devinfo *dinfo;
450 
451 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
452 		if (dinfo->cfg.baseclass == class &&
453 		    dinfo->cfg.subclass == subclass) {
454 			return (dinfo->cfg.dev);
455 		}
456 	}
457 
458 	return (NULL);
459 }
460 
461 static int
462 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
463 {
464 	va_list ap;
465 	int retval;
466 
467 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
468 	    cfg->func);
469 	va_start(ap, fmt);
470 	retval += vprintf(fmt, ap);
471 	va_end(ap);
472 	return (retval);
473 }
474 
475 /* return base address of memory or port map */
476 
477 static pci_addr_t
478 pci_mapbase(uint64_t mapreg)
479 {
480 
481 	if (PCI_BAR_MEM(mapreg))
482 		return (mapreg & PCIM_BAR_MEM_BASE);
483 	else
484 		return (mapreg & PCIM_BAR_IO_BASE);
485 }
486 
487 /* return map type of memory or port map */
488 
489 static const char *
490 pci_maptype(uint64_t mapreg)
491 {
492 
493 	if (PCI_BAR_IO(mapreg))
494 		return ("I/O Port");
495 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
496 		return ("Prefetchable Memory");
497 	return ("Memory");
498 }
499 
500 /* return log2 of map size decoded for memory or port map */
501 
502 int
503 pci_mapsize(uint64_t testval)
504 {
505 	int ln2size;
506 
507 	testval = pci_mapbase(testval);
508 	ln2size = 0;
509 	if (testval != 0) {
510 		while ((testval & 1) == 0)
511 		{
512 			ln2size++;
513 			testval >>= 1;
514 		}
515 	}
516 	return (ln2size);
517 }
518 
519 /* return base address of device ROM */
520 
521 static pci_addr_t
522 pci_rombase(uint64_t mapreg)
523 {
524 
525 	return (mapreg & PCIM_BIOS_ADDR_MASK);
526 }
527 
528 /* return log2 of map size decided for device ROM */
529 
530 static int
531 pci_romsize(uint64_t testval)
532 {
533 	int ln2size;
534 
535 	testval = pci_rombase(testval);
536 	ln2size = 0;
537 	if (testval != 0) {
538 		while ((testval & 1) == 0)
539 		{
540 			ln2size++;
541 			testval >>= 1;
542 		}
543 	}
544 	return (ln2size);
545 }
546 
547 /* return log2 of address range supported by map register */
548 
549 static int
550 pci_maprange(uint64_t mapreg)
551 {
552 	int ln2range = 0;
553 
554 	if (PCI_BAR_IO(mapreg))
555 		ln2range = 32;
556 	else
557 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
558 		case PCIM_BAR_MEM_32:
559 			ln2range = 32;
560 			break;
561 		case PCIM_BAR_MEM_1MB:
562 			ln2range = 20;
563 			break;
564 		case PCIM_BAR_MEM_64:
565 			ln2range = 64;
566 			break;
567 		}
568 	return (ln2range);
569 }
570 
571 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
572 
573 static void
574 pci_fixancient(pcicfgregs *cfg)
575 {
576 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
577 		return;
578 
579 	/* PCI to PCI bridges use header type 1 */
580 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
581 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
582 }
583 
584 /* extract header type specific config data */
585 
586 static void
587 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
588 {
589 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
590 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
591 	case PCIM_HDRTYPE_NORMAL:
592 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
593 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
594 		cfg->mingnt         = REG(PCIR_MINGNT, 1);
595 		cfg->maxlat         = REG(PCIR_MAXLAT, 1);
596 		cfg->nummaps	    = PCI_MAXMAPS_0;
597 		break;
598 	case PCIM_HDRTYPE_BRIDGE:
599 		cfg->bridge.br_seclat = REG(PCIR_SECLAT_1, 1);
600 		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_1, 1);
601 		cfg->bridge.br_secbus = REG(PCIR_SECBUS_1, 1);
602 		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_1, 1);
603 		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_1, 2);
604 		cfg->nummaps	    = PCI_MAXMAPS_1;
605 		break;
606 	case PCIM_HDRTYPE_CARDBUS:
607 		cfg->bridge.br_seclat = REG(PCIR_SECLAT_2, 1);
608 		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_2, 1);
609 		cfg->bridge.br_secbus = REG(PCIR_SECBUS_2, 1);
610 		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_2, 1);
611 		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_2, 2);
612 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
613 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
614 		cfg->nummaps	    = PCI_MAXMAPS_2;
615 		break;
616 	}
617 #undef REG
618 }
619 
620 /* read configuration header into pcicfgregs structure */
621 struct pci_devinfo *
622 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
623 {
624 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
625 	uint16_t vid, did;
626 
627 	vid = REG(PCIR_VENDOR, 2);
628 	did = REG(PCIR_DEVICE, 2);
629 	if (vid != 0xffff)
630 		return (pci_fill_devinfo(pcib, d, b, s, f, vid, did, size));
631 
632 	return (NULL);
633 }
634 
635 static struct pci_devinfo *
636 pci_fill_devinfo(device_t pcib, int d, int b, int s, int f, uint16_t vid,
637     uint16_t did, size_t size)
638 {
639 	struct pci_devinfo *devlist_entry;
640 	pcicfgregs *cfg;
641 
642 	devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
643 
644 	cfg = &devlist_entry->cfg;
645 
646 	cfg->domain		= d;
647 	cfg->bus		= b;
648 	cfg->slot		= s;
649 	cfg->func		= f;
650 	cfg->vendor		= vid;
651 	cfg->device		= did;
652 	cfg->cmdreg		= REG(PCIR_COMMAND, 2);
653 	cfg->statreg		= REG(PCIR_STATUS, 2);
654 	cfg->baseclass		= REG(PCIR_CLASS, 1);
655 	cfg->subclass		= REG(PCIR_SUBCLASS, 1);
656 	cfg->progif		= REG(PCIR_PROGIF, 1);
657 	cfg->revid		= REG(PCIR_REVID, 1);
658 	cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
659 	cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
660 	cfg->lattimer		= REG(PCIR_LATTIMER, 1);
661 	cfg->intpin		= REG(PCIR_INTPIN, 1);
662 	cfg->intline		= REG(PCIR_INTLINE, 1);
663 
664 	cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
665 	cfg->hdrtype		&= ~PCIM_MFDEV;
666 	STAILQ_INIT(&cfg->maps);
667 
668 	cfg->devinfo_size	= size;
669 	cfg->iov		= NULL;
670 
671 	pci_fixancient(cfg);
672 	pci_hdrtypedata(pcib, b, s, f, cfg);
673 
674 	if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
675 		pci_read_cap(pcib, cfg);
676 
677 	STAILQ_INSERT_TAIL(&pci_devq, devlist_entry, pci_links);
678 
679 	devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
680 	devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
681 	devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
682 	devlist_entry->conf.pc_sel.pc_func = cfg->func;
683 	devlist_entry->conf.pc_hdr = cfg->hdrtype;
684 
685 	devlist_entry->conf.pc_subvendor = cfg->subvendor;
686 	devlist_entry->conf.pc_subdevice = cfg->subdevice;
687 	devlist_entry->conf.pc_vendor = cfg->vendor;
688 	devlist_entry->conf.pc_device = cfg->device;
689 
690 	devlist_entry->conf.pc_class = cfg->baseclass;
691 	devlist_entry->conf.pc_subclass = cfg->subclass;
692 	devlist_entry->conf.pc_progif = cfg->progif;
693 	devlist_entry->conf.pc_revid = cfg->revid;
694 
695 	pci_numdevs++;
696 	pci_generation++;
697 
698 	return (devlist_entry);
699 }
700 #undef REG
701 
702 static void
703 pci_ea_fill_info(device_t pcib, pcicfgregs *cfg)
704 {
705 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, \
706     cfg->ea.ea_location + (n), w)
707 	int num_ent;
708 	int ptr;
709 	int a, b;
710 	uint32_t val;
711 	int ent_size;
712 	uint32_t dw[4];
713 	uint64_t base, max_offset;
714 	struct pci_ea_entry *eae;
715 
716 	if (cfg->ea.ea_location == 0)
717 		return;
718 
719 	STAILQ_INIT(&cfg->ea.ea_entries);
720 
721 	/* Determine the number of entries */
722 	num_ent = REG(PCIR_EA_NUM_ENT, 2);
723 	num_ent &= PCIM_EA_NUM_ENT_MASK;
724 
725 	/* Find the first entry to care of */
726 	ptr = PCIR_EA_FIRST_ENT;
727 
728 	/* Skip DWORD 2 for type 1 functions */
729 	if ((cfg->hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_BRIDGE)
730 		ptr += 4;
731 
732 	for (a = 0; a < num_ent; a++) {
733 
734 		eae = malloc(sizeof(*eae), M_DEVBUF, M_WAITOK | M_ZERO);
735 		eae->eae_cfg_offset = cfg->ea.ea_location + ptr;
736 
737 		/* Read a number of dwords in the entry */
738 		val = REG(ptr, 4);
739 		ptr += 4;
740 		ent_size = (val & PCIM_EA_ES);
741 
742 		for (b = 0; b < ent_size; b++) {
743 			dw[b] = REG(ptr, 4);
744 			ptr += 4;
745 		}
746 
747 		eae->eae_flags = val;
748 		eae->eae_bei = (PCIM_EA_BEI & val) >> PCIM_EA_BEI_OFFSET;
749 
750 		base = dw[0] & PCIM_EA_FIELD_MASK;
751 		max_offset = dw[1] | ~PCIM_EA_FIELD_MASK;
752 		b = 2;
753 		if (((dw[0] & PCIM_EA_IS_64) != 0) && (b < ent_size)) {
754 			base |= (uint64_t)dw[b] << 32UL;
755 			b++;
756 		}
757 		if (((dw[1] & PCIM_EA_IS_64) != 0)
758 		    && (b < ent_size)) {
759 			max_offset |= (uint64_t)dw[b] << 32UL;
760 			b++;
761 		}
762 
763 		eae->eae_base = base;
764 		eae->eae_max_offset = max_offset;
765 
766 		STAILQ_INSERT_TAIL(&cfg->ea.ea_entries, eae, eae_link);
767 
768 		if (bootverbose) {
769 			printf("PCI(EA) dev %04x:%04x, bei %d, flags #%x, base #%jx, max_offset #%jx\n",
770 			    cfg->vendor, cfg->device, eae->eae_bei, eae->eae_flags,
771 			    (uintmax_t)eae->eae_base, (uintmax_t)eae->eae_max_offset);
772 		}
773 	}
774 }
775 #undef REG
776 
777 static void
778 pci_read_cap(device_t pcib, pcicfgregs *cfg)
779 {
780 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
781 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
782 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
783 	uint64_t addr;
784 #endif
785 	uint32_t val;
786 	int	ptr, nextptr, ptrptr;
787 
788 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
789 	case PCIM_HDRTYPE_NORMAL:
790 	case PCIM_HDRTYPE_BRIDGE:
791 		ptrptr = PCIR_CAP_PTR;
792 		break;
793 	case PCIM_HDRTYPE_CARDBUS:
794 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
795 		break;
796 	default:
797 		return;		/* no extended capabilities support */
798 	}
799 	nextptr = REG(ptrptr, 1);	/* sanity check? */
800 
801 	/*
802 	 * Read capability entries.
803 	 */
804 	while (nextptr != 0) {
805 		/* Sanity check */
806 		if (nextptr > 255) {
807 			printf("illegal PCI extended capability offset %d\n",
808 			    nextptr);
809 			return;
810 		}
811 		/* Find the next entry */
812 		ptr = nextptr;
813 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
814 
815 		/* Process this entry */
816 		switch (REG(ptr + PCICAP_ID, 1)) {
817 		case PCIY_PMG:		/* PCI power management */
818 			if (cfg->pp.pp_cap == 0) {
819 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
820 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
821 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
822 				if ((nextptr - ptr) > PCIR_POWER_DATA)
823 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
824 			}
825 			break;
826 		case PCIY_HT:		/* HyperTransport */
827 			/* Determine HT-specific capability type. */
828 			val = REG(ptr + PCIR_HT_COMMAND, 2);
829 
830 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
831 				cfg->ht.ht_slave = ptr;
832 
833 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
834 			switch (val & PCIM_HTCMD_CAP_MASK) {
835 			case PCIM_HTCAP_MSI_MAPPING:
836 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
837 					/* Sanity check the mapping window. */
838 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
839 					    4);
840 					addr <<= 32;
841 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
842 					    4);
843 					if (addr != MSI_INTEL_ADDR_BASE)
844 						device_printf(pcib,
845 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
846 						    cfg->domain, cfg->bus,
847 						    cfg->slot, cfg->func,
848 						    (long long)addr);
849 				} else
850 					addr = MSI_INTEL_ADDR_BASE;
851 
852 				cfg->ht.ht_msimap = ptr;
853 				cfg->ht.ht_msictrl = val;
854 				cfg->ht.ht_msiaddr = addr;
855 				break;
856 			}
857 #endif
858 			break;
859 		case PCIY_MSI:		/* PCI MSI */
860 			cfg->msi.msi_location = ptr;
861 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
862 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
863 						     PCIM_MSICTRL_MMC_MASK)>>1);
864 			break;
865 		case PCIY_MSIX:		/* PCI MSI-X */
866 			cfg->msix.msix_location = ptr;
867 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
868 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
869 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
870 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
871 			cfg->msix.msix_table_bar = PCIR_BAR(val &
872 			    PCIM_MSIX_BIR_MASK);
873 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
874 			val = REG(ptr + PCIR_MSIX_PBA, 4);
875 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
876 			    PCIM_MSIX_BIR_MASK);
877 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
878 			break;
879 		case PCIY_VPD:		/* PCI Vital Product Data */
880 			cfg->vpd.vpd_reg = ptr;
881 			break;
882 		case PCIY_SUBVENDOR:
883 			/* Should always be true. */
884 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
885 			    PCIM_HDRTYPE_BRIDGE) {
886 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
887 				cfg->subvendor = val & 0xffff;
888 				cfg->subdevice = val >> 16;
889 			}
890 			break;
891 		case PCIY_PCIX:		/* PCI-X */
892 			/*
893 			 * Assume we have a PCI-X chipset if we have
894 			 * at least one PCI-PCI bridge with a PCI-X
895 			 * capability.  Note that some systems with
896 			 * PCI-express or HT chipsets might match on
897 			 * this check as well.
898 			 */
899 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
900 			    PCIM_HDRTYPE_BRIDGE)
901 				pcix_chipset = 1;
902 			cfg->pcix.pcix_location = ptr;
903 			break;
904 		case PCIY_EXPRESS:	/* PCI-express */
905 			/*
906 			 * Assume we have a PCI-express chipset if we have
907 			 * at least one PCI-express device.
908 			 */
909 			pcie_chipset = 1;
910 			cfg->pcie.pcie_location = ptr;
911 			val = REG(ptr + PCIER_FLAGS, 2);
912 			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
913 			break;
914 		case PCIY_EA:		/* Enhanced Allocation */
915 			cfg->ea.ea_location = ptr;
916 			pci_ea_fill_info(pcib, cfg);
917 			break;
918 		default:
919 			break;
920 		}
921 	}
922 
923 #if defined(__powerpc__)
924 	/*
925 	 * Enable the MSI mapping window for all HyperTransport
926 	 * slaves.  PCI-PCI bridges have their windows enabled via
927 	 * PCIB_MAP_MSI().
928 	 */
929 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
930 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
931 		device_printf(pcib,
932 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
933 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
934 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
935 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
936 		     2);
937 	}
938 #endif
939 /* REG and WREG use carry through to next functions */
940 }
941 
942 /*
943  * PCI Vital Product Data
944  */
945 
946 #define	PCI_VPD_TIMEOUT		1000000
947 
948 static int
949 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
950 {
951 	int count = PCI_VPD_TIMEOUT;
952 
953 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
954 
955 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
956 
957 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
958 		if (--count < 0)
959 			return (ENXIO);
960 		DELAY(1);	/* limit looping */
961 	}
962 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
963 
964 	return (0);
965 }
966 
967 #if 0
968 static int
969 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
970 {
971 	int count = PCI_VPD_TIMEOUT;
972 
973 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
974 
975 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
976 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
977 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
978 		if (--count < 0)
979 			return (ENXIO);
980 		DELAY(1);	/* limit looping */
981 	}
982 
983 	return (0);
984 }
985 #endif
986 
987 #undef PCI_VPD_TIMEOUT
988 
989 struct vpd_readstate {
990 	device_t	pcib;
991 	pcicfgregs	*cfg;
992 	uint32_t	val;
993 	int		bytesinval;
994 	int		off;
995 	uint8_t		cksum;
996 };
997 
998 static int
999 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
1000 {
1001 	uint32_t reg;
1002 	uint8_t byte;
1003 
1004 	if (vrs->bytesinval == 0) {
1005 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
1006 			return (ENXIO);
1007 		vrs->val = le32toh(reg);
1008 		vrs->off += 4;
1009 		byte = vrs->val & 0xff;
1010 		vrs->bytesinval = 3;
1011 	} else {
1012 		vrs->val = vrs->val >> 8;
1013 		byte = vrs->val & 0xff;
1014 		vrs->bytesinval--;
1015 	}
1016 
1017 	vrs->cksum += byte;
1018 	*data = byte;
1019 	return (0);
1020 }
1021 
1022 static void
1023 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
1024 {
1025 	struct vpd_readstate vrs;
1026 	int state;
1027 	int name;
1028 	int remain;
1029 	int i;
1030 	int alloc, off;		/* alloc/off for RO/W arrays */
1031 	int cksumvalid;
1032 	int dflen;
1033 	uint8_t byte;
1034 	uint8_t byte2;
1035 
1036 	/* init vpd reader */
1037 	vrs.bytesinval = 0;
1038 	vrs.off = 0;
1039 	vrs.pcib = pcib;
1040 	vrs.cfg = cfg;
1041 	vrs.cksum = 0;
1042 
1043 	state = 0;
1044 	name = remain = i = 0;	/* shut up stupid gcc */
1045 	alloc = off = 0;	/* shut up stupid gcc */
1046 	dflen = 0;		/* shut up stupid gcc */
1047 	cksumvalid = -1;
1048 	while (state >= 0) {
1049 		if (vpd_nextbyte(&vrs, &byte)) {
1050 			state = -2;
1051 			break;
1052 		}
1053 #if 0
1054 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
1055 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
1056 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
1057 #endif
1058 		switch (state) {
1059 		case 0:		/* item name */
1060 			if (byte & 0x80) {
1061 				if (vpd_nextbyte(&vrs, &byte2)) {
1062 					state = -2;
1063 					break;
1064 				}
1065 				remain = byte2;
1066 				if (vpd_nextbyte(&vrs, &byte2)) {
1067 					state = -2;
1068 					break;
1069 				}
1070 				remain |= byte2 << 8;
1071 				if (remain > (0x7f*4 - vrs.off)) {
1072 					state = -1;
1073 					pci_printf(cfg,
1074 					    "invalid VPD data, remain %#x\n",
1075 					    remain);
1076 				}
1077 				name = byte & 0x7f;
1078 			} else {
1079 				remain = byte & 0x7;
1080 				name = (byte >> 3) & 0xf;
1081 			}
1082 			switch (name) {
1083 			case 0x2:	/* String */
1084 				cfg->vpd.vpd_ident = malloc(remain + 1,
1085 				    M_DEVBUF, M_WAITOK);
1086 				i = 0;
1087 				state = 1;
1088 				break;
1089 			case 0xf:	/* End */
1090 				state = -1;
1091 				break;
1092 			case 0x10:	/* VPD-R */
1093 				alloc = 8;
1094 				off = 0;
1095 				cfg->vpd.vpd_ros = malloc(alloc *
1096 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1097 				    M_WAITOK | M_ZERO);
1098 				state = 2;
1099 				break;
1100 			case 0x11:	/* VPD-W */
1101 				alloc = 8;
1102 				off = 0;
1103 				cfg->vpd.vpd_w = malloc(alloc *
1104 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1105 				    M_WAITOK | M_ZERO);
1106 				state = 5;
1107 				break;
1108 			default:	/* Invalid data, abort */
1109 				state = -1;
1110 				break;
1111 			}
1112 			break;
1113 
1114 		case 1:	/* Identifier String */
1115 			cfg->vpd.vpd_ident[i++] = byte;
1116 			remain--;
1117 			if (remain == 0)  {
1118 				cfg->vpd.vpd_ident[i] = '\0';
1119 				state = 0;
1120 			}
1121 			break;
1122 
1123 		case 2:	/* VPD-R Keyword Header */
1124 			if (off == alloc) {
1125 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1126 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1127 				    M_DEVBUF, M_WAITOK | M_ZERO);
1128 			}
1129 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1130 			if (vpd_nextbyte(&vrs, &byte2)) {
1131 				state = -2;
1132 				break;
1133 			}
1134 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1135 			if (vpd_nextbyte(&vrs, &byte2)) {
1136 				state = -2;
1137 				break;
1138 			}
1139 			cfg->vpd.vpd_ros[off].len = dflen = byte2;
1140 			if (dflen == 0 &&
1141 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1142 			    2) == 0) {
1143 				/*
1144 				 * if this happens, we can't trust the rest
1145 				 * of the VPD.
1146 				 */
1147 				pci_printf(cfg, "bad keyword length: %d\n",
1148 				    dflen);
1149 				cksumvalid = 0;
1150 				state = -1;
1151 				break;
1152 			} else if (dflen == 0) {
1153 				cfg->vpd.vpd_ros[off].value = malloc(1 *
1154 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1155 				    M_DEVBUF, M_WAITOK);
1156 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1157 			} else
1158 				cfg->vpd.vpd_ros[off].value = malloc(
1159 				    (dflen + 1) *
1160 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1161 				    M_DEVBUF, M_WAITOK);
1162 			remain -= 3;
1163 			i = 0;
1164 			/* keep in sync w/ state 3's transistions */
1165 			if (dflen == 0 && remain == 0)
1166 				state = 0;
1167 			else if (dflen == 0)
1168 				state = 2;
1169 			else
1170 				state = 3;
1171 			break;
1172 
1173 		case 3:	/* VPD-R Keyword Value */
1174 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1175 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1176 			    "RV", 2) == 0 && cksumvalid == -1) {
1177 				if (vrs.cksum == 0)
1178 					cksumvalid = 1;
1179 				else {
1180 					if (bootverbose)
1181 						pci_printf(cfg,
1182 					    "bad VPD cksum, remain %hhu\n",
1183 						    vrs.cksum);
1184 					cksumvalid = 0;
1185 					state = -1;
1186 					break;
1187 				}
1188 			}
1189 			dflen--;
1190 			remain--;
1191 			/* keep in sync w/ state 2's transistions */
1192 			if (dflen == 0)
1193 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1194 			if (dflen == 0 && remain == 0) {
1195 				cfg->vpd.vpd_rocnt = off;
1196 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1197 				    off * sizeof(*cfg->vpd.vpd_ros),
1198 				    M_DEVBUF, M_WAITOK | M_ZERO);
1199 				state = 0;
1200 			} else if (dflen == 0)
1201 				state = 2;
1202 			break;
1203 
1204 		case 4:
1205 			remain--;
1206 			if (remain == 0)
1207 				state = 0;
1208 			break;
1209 
1210 		case 5:	/* VPD-W Keyword Header */
1211 			if (off == alloc) {
1212 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1213 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1214 				    M_DEVBUF, M_WAITOK | M_ZERO);
1215 			}
1216 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1217 			if (vpd_nextbyte(&vrs, &byte2)) {
1218 				state = -2;
1219 				break;
1220 			}
1221 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1222 			if (vpd_nextbyte(&vrs, &byte2)) {
1223 				state = -2;
1224 				break;
1225 			}
1226 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1227 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1228 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1229 			    sizeof(*cfg->vpd.vpd_w[off].value),
1230 			    M_DEVBUF, M_WAITOK);
1231 			remain -= 3;
1232 			i = 0;
1233 			/* keep in sync w/ state 6's transistions */
1234 			if (dflen == 0 && remain == 0)
1235 				state = 0;
1236 			else if (dflen == 0)
1237 				state = 5;
1238 			else
1239 				state = 6;
1240 			break;
1241 
1242 		case 6:	/* VPD-W Keyword Value */
1243 			cfg->vpd.vpd_w[off].value[i++] = byte;
1244 			dflen--;
1245 			remain--;
1246 			/* keep in sync w/ state 5's transistions */
1247 			if (dflen == 0)
1248 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1249 			if (dflen == 0 && remain == 0) {
1250 				cfg->vpd.vpd_wcnt = off;
1251 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1252 				    off * sizeof(*cfg->vpd.vpd_w),
1253 				    M_DEVBUF, M_WAITOK | M_ZERO);
1254 				state = 0;
1255 			} else if (dflen == 0)
1256 				state = 5;
1257 			break;
1258 
1259 		default:
1260 			pci_printf(cfg, "invalid state: %d\n", state);
1261 			state = -1;
1262 			break;
1263 		}
1264 	}
1265 
1266 	if (cksumvalid == 0 || state < -1) {
1267 		/* read-only data bad, clean up */
1268 		if (cfg->vpd.vpd_ros != NULL) {
1269 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1270 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1271 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1272 			cfg->vpd.vpd_ros = NULL;
1273 		}
1274 	}
1275 	if (state < -1) {
1276 		/* I/O error, clean up */
1277 		pci_printf(cfg, "failed to read VPD data.\n");
1278 		if (cfg->vpd.vpd_ident != NULL) {
1279 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1280 			cfg->vpd.vpd_ident = NULL;
1281 		}
1282 		if (cfg->vpd.vpd_w != NULL) {
1283 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1284 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1285 			free(cfg->vpd.vpd_w, M_DEVBUF);
1286 			cfg->vpd.vpd_w = NULL;
1287 		}
1288 	}
1289 	cfg->vpd.vpd_cached = 1;
1290 #undef REG
1291 #undef WREG
1292 }
1293 
1294 int
1295 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1296 {
1297 	struct pci_devinfo *dinfo = device_get_ivars(child);
1298 	pcicfgregs *cfg = &dinfo->cfg;
1299 
1300 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1301 		pci_read_vpd(device_get_parent(dev), cfg);
1302 
1303 	*identptr = cfg->vpd.vpd_ident;
1304 
1305 	if (*identptr == NULL)
1306 		return (ENXIO);
1307 
1308 	return (0);
1309 }
1310 
1311 int
1312 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1313 	const char **vptr)
1314 {
1315 	struct pci_devinfo *dinfo = device_get_ivars(child);
1316 	pcicfgregs *cfg = &dinfo->cfg;
1317 	int i;
1318 
1319 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1320 		pci_read_vpd(device_get_parent(dev), cfg);
1321 
1322 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1323 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1324 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1325 			*vptr = cfg->vpd.vpd_ros[i].value;
1326 			return (0);
1327 		}
1328 
1329 	*vptr = NULL;
1330 	return (ENXIO);
1331 }
1332 
1333 struct pcicfg_vpd *
1334 pci_fetch_vpd_list(device_t dev)
1335 {
1336 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1337 	pcicfgregs *cfg = &dinfo->cfg;
1338 
1339 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1340 		pci_read_vpd(device_get_parent(device_get_parent(dev)), cfg);
1341 	return (&cfg->vpd);
1342 }
1343 
1344 /*
1345  * Find the requested HyperTransport capability and return the offset
1346  * in configuration space via the pointer provided.  The function
1347  * returns 0 on success and an error code otherwise.
1348  */
1349 int
1350 pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1351 {
1352 	int ptr, error;
1353 	uint16_t val;
1354 
1355 	error = pci_find_cap(child, PCIY_HT, &ptr);
1356 	if (error)
1357 		return (error);
1358 
1359 	/*
1360 	 * Traverse the capabilities list checking each HT capability
1361 	 * to see if it matches the requested HT capability.
1362 	 */
1363 	while (ptr != 0) {
1364 		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1365 		if (capability == PCIM_HTCAP_SLAVE ||
1366 		    capability == PCIM_HTCAP_HOST)
1367 			val &= 0xe000;
1368 		else
1369 			val &= PCIM_HTCMD_CAP_MASK;
1370 		if (val == capability) {
1371 			if (capreg != NULL)
1372 				*capreg = ptr;
1373 			return (0);
1374 		}
1375 
1376 		/* Skip to the next HT capability. */
1377 		while (ptr != 0) {
1378 			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1379 			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1380 			    PCIY_HT)
1381 				break;
1382 		}
1383 	}
1384 	return (ENOENT);
1385 }
1386 
1387 /*
1388  * Find the requested capability and return the offset in
1389  * configuration space via the pointer provided.  The function returns
1390  * 0 on success and an error code otherwise.
1391  */
1392 int
1393 pci_find_cap_method(device_t dev, device_t child, int capability,
1394     int *capreg)
1395 {
1396 	struct pci_devinfo *dinfo = device_get_ivars(child);
1397 	pcicfgregs *cfg = &dinfo->cfg;
1398 	u_int32_t status;
1399 	u_int8_t ptr;
1400 
1401 	/*
1402 	 * Check the CAP_LIST bit of the PCI status register first.
1403 	 */
1404 	status = pci_read_config(child, PCIR_STATUS, 2);
1405 	if (!(status & PCIM_STATUS_CAPPRESENT))
1406 		return (ENXIO);
1407 
1408 	/*
1409 	 * Determine the start pointer of the capabilities list.
1410 	 */
1411 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1412 	case PCIM_HDRTYPE_NORMAL:
1413 	case PCIM_HDRTYPE_BRIDGE:
1414 		ptr = PCIR_CAP_PTR;
1415 		break;
1416 	case PCIM_HDRTYPE_CARDBUS:
1417 		ptr = PCIR_CAP_PTR_2;
1418 		break;
1419 	default:
1420 		/* XXX: panic? */
1421 		return (ENXIO);		/* no extended capabilities support */
1422 	}
1423 	ptr = pci_read_config(child, ptr, 1);
1424 
1425 	/*
1426 	 * Traverse the capabilities list.
1427 	 */
1428 	while (ptr != 0) {
1429 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1430 			if (capreg != NULL)
1431 				*capreg = ptr;
1432 			return (0);
1433 		}
1434 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1435 	}
1436 
1437 	return (ENOENT);
1438 }
1439 
1440 /*
1441  * Find the requested extended capability and return the offset in
1442  * configuration space via the pointer provided.  The function returns
1443  * 0 on success and an error code otherwise.
1444  */
1445 int
1446 pci_find_extcap_method(device_t dev, device_t child, int capability,
1447     int *capreg)
1448 {
1449 	struct pci_devinfo *dinfo = device_get_ivars(child);
1450 	pcicfgregs *cfg = &dinfo->cfg;
1451 	uint32_t ecap;
1452 	uint16_t ptr;
1453 
1454 	/* Only supported for PCI-express devices. */
1455 	if (cfg->pcie.pcie_location == 0)
1456 		return (ENXIO);
1457 
1458 	ptr = PCIR_EXTCAP;
1459 	ecap = pci_read_config(child, ptr, 4);
1460 	if (ecap == 0xffffffff || ecap == 0)
1461 		return (ENOENT);
1462 	for (;;) {
1463 		if (PCI_EXTCAP_ID(ecap) == capability) {
1464 			if (capreg != NULL)
1465 				*capreg = ptr;
1466 			return (0);
1467 		}
1468 		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1469 		if (ptr == 0)
1470 			break;
1471 		ecap = pci_read_config(child, ptr, 4);
1472 	}
1473 
1474 	return (ENOENT);
1475 }
1476 
1477 /*
1478  * Support for MSI-X message interrupts.
1479  */
1480 void
1481 pci_enable_msix_method(device_t dev, device_t child, u_int index,
1482     uint64_t address, uint32_t data)
1483 {
1484 	struct pci_devinfo *dinfo = device_get_ivars(child);
1485 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1486 	uint32_t offset;
1487 
1488 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1489 	offset = msix->msix_table_offset + index * 16;
1490 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1491 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1492 	bus_write_4(msix->msix_table_res, offset + 8, data);
1493 
1494 	/* Enable MSI -> HT mapping. */
1495 	pci_ht_map_msi(child, address);
1496 }
1497 
1498 void
1499 pci_mask_msix(device_t dev, u_int index)
1500 {
1501 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1502 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1503 	uint32_t offset, val;
1504 
1505 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1506 	offset = msix->msix_table_offset + index * 16 + 12;
1507 	val = bus_read_4(msix->msix_table_res, offset);
1508 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1509 		val |= PCIM_MSIX_VCTRL_MASK;
1510 		bus_write_4(msix->msix_table_res, offset, val);
1511 	}
1512 }
1513 
1514 void
1515 pci_unmask_msix(device_t dev, u_int index)
1516 {
1517 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1518 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1519 	uint32_t offset, val;
1520 
1521 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1522 	offset = msix->msix_table_offset + index * 16 + 12;
1523 	val = bus_read_4(msix->msix_table_res, offset);
1524 	if (val & PCIM_MSIX_VCTRL_MASK) {
1525 		val &= ~PCIM_MSIX_VCTRL_MASK;
1526 		bus_write_4(msix->msix_table_res, offset, val);
1527 	}
1528 }
1529 
1530 int
1531 pci_pending_msix(device_t dev, u_int index)
1532 {
1533 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1534 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1535 	uint32_t offset, bit;
1536 
1537 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1538 	offset = msix->msix_pba_offset + (index / 32) * 4;
1539 	bit = 1 << index % 32;
1540 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1541 }
1542 
1543 /*
1544  * Restore MSI-X registers and table during resume.  If MSI-X is
1545  * enabled then walk the virtual table to restore the actual MSI-X
1546  * table.
1547  */
1548 static void
1549 pci_resume_msix(device_t dev)
1550 {
1551 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1552 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1553 	struct msix_table_entry *mte;
1554 	struct msix_vector *mv;
1555 	int i;
1556 
1557 	if (msix->msix_alloc > 0) {
1558 		/* First, mask all vectors. */
1559 		for (i = 0; i < msix->msix_msgnum; i++)
1560 			pci_mask_msix(dev, i);
1561 
1562 		/* Second, program any messages with at least one handler. */
1563 		for (i = 0; i < msix->msix_table_len; i++) {
1564 			mte = &msix->msix_table[i];
1565 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1566 				continue;
1567 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1568 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1569 			pci_unmask_msix(dev, i);
1570 		}
1571 	}
1572 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1573 	    msix->msix_ctrl, 2);
1574 }
1575 
1576 /*
1577  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1578  * returned in *count.  After this function returns, each message will be
1579  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1580  */
1581 int
1582 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1583 {
1584 	struct pci_devinfo *dinfo = device_get_ivars(child);
1585 	pcicfgregs *cfg = &dinfo->cfg;
1586 	struct resource_list_entry *rle;
1587 	int actual, error, i, irq, max;
1588 
1589 	/* Don't let count == 0 get us into trouble. */
1590 	if (*count == 0)
1591 		return (EINVAL);
1592 
1593 	/* If rid 0 is allocated, then fail. */
1594 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1595 	if (rle != NULL && rle->res != NULL)
1596 		return (ENXIO);
1597 
1598 	/* Already have allocated messages? */
1599 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1600 		return (ENXIO);
1601 
1602 	/* If MSI-X is blacklisted for this system, fail. */
1603 	if (pci_msix_blacklisted())
1604 		return (ENXIO);
1605 
1606 	/* MSI-X capability present? */
1607 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1608 		return (ENODEV);
1609 
1610 	/* Make sure the appropriate BARs are mapped. */
1611 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1612 	    cfg->msix.msix_table_bar);
1613 	if (rle == NULL || rle->res == NULL ||
1614 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1615 		return (ENXIO);
1616 	cfg->msix.msix_table_res = rle->res;
1617 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1618 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1619 		    cfg->msix.msix_pba_bar);
1620 		if (rle == NULL || rle->res == NULL ||
1621 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1622 			return (ENXIO);
1623 	}
1624 	cfg->msix.msix_pba_res = rle->res;
1625 
1626 	if (bootverbose)
1627 		device_printf(child,
1628 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1629 		    *count, cfg->msix.msix_msgnum);
1630 	max = min(*count, cfg->msix.msix_msgnum);
1631 	for (i = 0; i < max; i++) {
1632 		/* Allocate a message. */
1633 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1634 		if (error) {
1635 			if (i == 0)
1636 				return (error);
1637 			break;
1638 		}
1639 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1640 		    irq, 1);
1641 	}
1642 	actual = i;
1643 
1644 	if (bootverbose) {
1645 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1646 		if (actual == 1)
1647 			device_printf(child, "using IRQ %ju for MSI-X\n",
1648 			    rle->start);
1649 		else {
1650 			int run;
1651 
1652 			/*
1653 			 * Be fancy and try to print contiguous runs of
1654 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1655 			 * 'run' is true if we are in a range.
1656 			 */
1657 			device_printf(child, "using IRQs %ju", rle->start);
1658 			irq = rle->start;
1659 			run = 0;
1660 			for (i = 1; i < actual; i++) {
1661 				rle = resource_list_find(&dinfo->resources,
1662 				    SYS_RES_IRQ, i + 1);
1663 
1664 				/* Still in a run? */
1665 				if (rle->start == irq + 1) {
1666 					run = 1;
1667 					irq++;
1668 					continue;
1669 				}
1670 
1671 				/* Finish previous range. */
1672 				if (run) {
1673 					printf("-%d", irq);
1674 					run = 0;
1675 				}
1676 
1677 				/* Start new range. */
1678 				printf(",%ju", rle->start);
1679 				irq = rle->start;
1680 			}
1681 
1682 			/* Unfinished range? */
1683 			if (run)
1684 				printf("-%d", irq);
1685 			printf(" for MSI-X\n");
1686 		}
1687 	}
1688 
1689 	/* Mask all vectors. */
1690 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1691 		pci_mask_msix(child, i);
1692 
1693 	/* Allocate and initialize vector data and virtual table. */
1694 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1695 	    M_DEVBUF, M_WAITOK | M_ZERO);
1696 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1697 	    M_DEVBUF, M_WAITOK | M_ZERO);
1698 	for (i = 0; i < actual; i++) {
1699 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1700 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1701 		cfg->msix.msix_table[i].mte_vector = i + 1;
1702 	}
1703 
1704 	/* Update control register to enable MSI-X. */
1705 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1706 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1707 	    cfg->msix.msix_ctrl, 2);
1708 
1709 	/* Update counts of alloc'd messages. */
1710 	cfg->msix.msix_alloc = actual;
1711 	cfg->msix.msix_table_len = actual;
1712 	*count = actual;
1713 	return (0);
1714 }
1715 
1716 /*
1717  * By default, pci_alloc_msix() will assign the allocated IRQ
1718  * resources consecutively to the first N messages in the MSI-X table.
1719  * However, device drivers may want to use different layouts if they
1720  * either receive fewer messages than they asked for, or they wish to
1721  * populate the MSI-X table sparsely.  This method allows the driver
1722  * to specify what layout it wants.  It must be called after a
1723  * successful pci_alloc_msix() but before any of the associated
1724  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1725  *
1726  * The 'vectors' array contains 'count' message vectors.  The array
1727  * maps directly to the MSI-X table in that index 0 in the array
1728  * specifies the vector for the first message in the MSI-X table, etc.
1729  * The vector value in each array index can either be 0 to indicate
1730  * that no vector should be assigned to a message slot, or it can be a
1731  * number from 1 to N (where N is the count returned from a
1732  * succcessful call to pci_alloc_msix()) to indicate which message
1733  * vector (IRQ) to be used for the corresponding message.
1734  *
1735  * On successful return, each message with a non-zero vector will have
1736  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1737  * 1.  Additionally, if any of the IRQs allocated via the previous
1738  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1739  * will be freed back to the system automatically.
1740  *
1741  * For example, suppose a driver has a MSI-X table with 6 messages and
1742  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1743  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1744  * C.  After the call to pci_alloc_msix(), the device will be setup to
1745  * have an MSI-X table of ABC--- (where - means no vector assigned).
1746  * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1747  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1748  * be freed back to the system.  This device will also have valid
1749  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1750  *
1751  * In any case, the SYS_RES_IRQ rid X will always map to the message
1752  * at MSI-X table index X - 1 and will only be valid if a vector is
1753  * assigned to that table entry.
1754  */
1755 int
1756 pci_remap_msix_method(device_t dev, device_t child, int count,
1757     const u_int *vectors)
1758 {
1759 	struct pci_devinfo *dinfo = device_get_ivars(child);
1760 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1761 	struct resource_list_entry *rle;
1762 	int i, irq, j, *used;
1763 
1764 	/*
1765 	 * Have to have at least one message in the table but the
1766 	 * table can't be bigger than the actual MSI-X table in the
1767 	 * device.
1768 	 */
1769 	if (count == 0 || count > msix->msix_msgnum)
1770 		return (EINVAL);
1771 
1772 	/* Sanity check the vectors. */
1773 	for (i = 0; i < count; i++)
1774 		if (vectors[i] > msix->msix_alloc)
1775 			return (EINVAL);
1776 
1777 	/*
1778 	 * Make sure there aren't any holes in the vectors to be used.
1779 	 * It's a big pain to support it, and it doesn't really make
1780 	 * sense anyway.  Also, at least one vector must be used.
1781 	 */
1782 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1783 	    M_ZERO);
1784 	for (i = 0; i < count; i++)
1785 		if (vectors[i] != 0)
1786 			used[vectors[i] - 1] = 1;
1787 	for (i = 0; i < msix->msix_alloc - 1; i++)
1788 		if (used[i] == 0 && used[i + 1] == 1) {
1789 			free(used, M_DEVBUF);
1790 			return (EINVAL);
1791 		}
1792 	if (used[0] != 1) {
1793 		free(used, M_DEVBUF);
1794 		return (EINVAL);
1795 	}
1796 
1797 	/* Make sure none of the resources are allocated. */
1798 	for (i = 0; i < msix->msix_table_len; i++) {
1799 		if (msix->msix_table[i].mte_vector == 0)
1800 			continue;
1801 		if (msix->msix_table[i].mte_handlers > 0) {
1802 			free(used, M_DEVBUF);
1803 			return (EBUSY);
1804 		}
1805 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1806 		KASSERT(rle != NULL, ("missing resource"));
1807 		if (rle->res != NULL) {
1808 			free(used, M_DEVBUF);
1809 			return (EBUSY);
1810 		}
1811 	}
1812 
1813 	/* Free the existing resource list entries. */
1814 	for (i = 0; i < msix->msix_table_len; i++) {
1815 		if (msix->msix_table[i].mte_vector == 0)
1816 			continue;
1817 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1818 	}
1819 
1820 	/*
1821 	 * Build the new virtual table keeping track of which vectors are
1822 	 * used.
1823 	 */
1824 	free(msix->msix_table, M_DEVBUF);
1825 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1826 	    M_DEVBUF, M_WAITOK | M_ZERO);
1827 	for (i = 0; i < count; i++)
1828 		msix->msix_table[i].mte_vector = vectors[i];
1829 	msix->msix_table_len = count;
1830 
1831 	/* Free any unused IRQs and resize the vectors array if necessary. */
1832 	j = msix->msix_alloc - 1;
1833 	if (used[j] == 0) {
1834 		struct msix_vector *vec;
1835 
1836 		while (used[j] == 0) {
1837 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1838 			    msix->msix_vectors[j].mv_irq);
1839 			j--;
1840 		}
1841 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1842 		    M_WAITOK);
1843 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1844 		    (j + 1));
1845 		free(msix->msix_vectors, M_DEVBUF);
1846 		msix->msix_vectors = vec;
1847 		msix->msix_alloc = j + 1;
1848 	}
1849 	free(used, M_DEVBUF);
1850 
1851 	/* Map the IRQs onto the rids. */
1852 	for (i = 0; i < count; i++) {
1853 		if (vectors[i] == 0)
1854 			continue;
1855 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1856 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1857 		    irq, 1);
1858 	}
1859 
1860 	if (bootverbose) {
1861 		device_printf(child, "Remapped MSI-X IRQs as: ");
1862 		for (i = 0; i < count; i++) {
1863 			if (i != 0)
1864 				printf(", ");
1865 			if (vectors[i] == 0)
1866 				printf("---");
1867 			else
1868 				printf("%d",
1869 				    msix->msix_vectors[vectors[i]].mv_irq);
1870 		}
1871 		printf("\n");
1872 	}
1873 
1874 	return (0);
1875 }
1876 
1877 static int
1878 pci_release_msix(device_t dev, device_t child)
1879 {
1880 	struct pci_devinfo *dinfo = device_get_ivars(child);
1881 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1882 	struct resource_list_entry *rle;
1883 	int i;
1884 
1885 	/* Do we have any messages to release? */
1886 	if (msix->msix_alloc == 0)
1887 		return (ENODEV);
1888 
1889 	/* Make sure none of the resources are allocated. */
1890 	for (i = 0; i < msix->msix_table_len; i++) {
1891 		if (msix->msix_table[i].mte_vector == 0)
1892 			continue;
1893 		if (msix->msix_table[i].mte_handlers > 0)
1894 			return (EBUSY);
1895 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1896 		KASSERT(rle != NULL, ("missing resource"));
1897 		if (rle->res != NULL)
1898 			return (EBUSY);
1899 	}
1900 
1901 	/* Update control register to disable MSI-X. */
1902 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1903 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1904 	    msix->msix_ctrl, 2);
1905 
1906 	/* Free the resource list entries. */
1907 	for (i = 0; i < msix->msix_table_len; i++) {
1908 		if (msix->msix_table[i].mte_vector == 0)
1909 			continue;
1910 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1911 	}
1912 	free(msix->msix_table, M_DEVBUF);
1913 	msix->msix_table_len = 0;
1914 
1915 	/* Release the IRQs. */
1916 	for (i = 0; i < msix->msix_alloc; i++)
1917 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1918 		    msix->msix_vectors[i].mv_irq);
1919 	free(msix->msix_vectors, M_DEVBUF);
1920 	msix->msix_alloc = 0;
1921 	return (0);
1922 }
1923 
1924 /*
1925  * Return the max supported MSI-X messages this device supports.
1926  * Basically, assuming the MD code can alloc messages, this function
1927  * should return the maximum value that pci_alloc_msix() can return.
1928  * Thus, it is subject to the tunables, etc.
1929  */
1930 int
1931 pci_msix_count_method(device_t dev, device_t child)
1932 {
1933 	struct pci_devinfo *dinfo = device_get_ivars(child);
1934 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1935 
1936 	if (pci_do_msix && msix->msix_location != 0)
1937 		return (msix->msix_msgnum);
1938 	return (0);
1939 }
1940 
1941 int
1942 pci_msix_pba_bar_method(device_t dev, device_t child)
1943 {
1944 	struct pci_devinfo *dinfo = device_get_ivars(child);
1945 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1946 
1947 	if (pci_do_msix && msix->msix_location != 0)
1948 		return (msix->msix_pba_bar);
1949 	return (-1);
1950 }
1951 
1952 int
1953 pci_msix_table_bar_method(device_t dev, device_t child)
1954 {
1955 	struct pci_devinfo *dinfo = device_get_ivars(child);
1956 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1957 
1958 	if (pci_do_msix && msix->msix_location != 0)
1959 		return (msix->msix_table_bar);
1960 	return (-1);
1961 }
1962 
1963 /*
1964  * HyperTransport MSI mapping control
1965  */
1966 void
1967 pci_ht_map_msi(device_t dev, uint64_t addr)
1968 {
1969 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1970 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1971 
1972 	if (!ht->ht_msimap)
1973 		return;
1974 
1975 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1976 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1977 		/* Enable MSI -> HT mapping. */
1978 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1979 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1980 		    ht->ht_msictrl, 2);
1981 	}
1982 
1983 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1984 		/* Disable MSI -> HT mapping. */
1985 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1986 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1987 		    ht->ht_msictrl, 2);
1988 	}
1989 }
1990 
1991 int
1992 pci_get_max_read_req(device_t dev)
1993 {
1994 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1995 	int cap;
1996 	uint16_t val;
1997 
1998 	cap = dinfo->cfg.pcie.pcie_location;
1999 	if (cap == 0)
2000 		return (0);
2001 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2002 	val &= PCIEM_CTL_MAX_READ_REQUEST;
2003 	val >>= 12;
2004 	return (1 << (val + 7));
2005 }
2006 
2007 int
2008 pci_set_max_read_req(device_t dev, int size)
2009 {
2010 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2011 	int cap;
2012 	uint16_t val;
2013 
2014 	cap = dinfo->cfg.pcie.pcie_location;
2015 	if (cap == 0)
2016 		return (0);
2017 	if (size < 128)
2018 		size = 128;
2019 	if (size > 4096)
2020 		size = 4096;
2021 	size = (1 << (fls(size) - 1));
2022 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2023 	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
2024 	val |= (fls(size) - 8) << 12;
2025 	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
2026 	return (size);
2027 }
2028 
2029 uint32_t
2030 pcie_read_config(device_t dev, int reg, int width)
2031 {
2032 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2033 	int cap;
2034 
2035 	cap = dinfo->cfg.pcie.pcie_location;
2036 	if (cap == 0) {
2037 		if (width == 2)
2038 			return (0xffff);
2039 		return (0xffffffff);
2040 	}
2041 
2042 	return (pci_read_config(dev, cap + reg, width));
2043 }
2044 
2045 void
2046 pcie_write_config(device_t dev, int reg, uint32_t value, int width)
2047 {
2048 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2049 	int cap;
2050 
2051 	cap = dinfo->cfg.pcie.pcie_location;
2052 	if (cap == 0)
2053 		return;
2054 	pci_write_config(dev, cap + reg, value, width);
2055 }
2056 
2057 /*
2058  * Adjusts a PCI-e capability register by clearing the bits in mask
2059  * and setting the bits in (value & mask).  Bits not set in mask are
2060  * not adjusted.
2061  *
2062  * Returns the old value on success or all ones on failure.
2063  */
2064 uint32_t
2065 pcie_adjust_config(device_t dev, int reg, uint32_t mask, uint32_t value,
2066     int width)
2067 {
2068 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2069 	uint32_t old, new;
2070 	int cap;
2071 
2072 	cap = dinfo->cfg.pcie.pcie_location;
2073 	if (cap == 0) {
2074 		if (width == 2)
2075 			return (0xffff);
2076 		return (0xffffffff);
2077 	}
2078 
2079 	old = pci_read_config(dev, cap + reg, width);
2080 	new = old & ~mask;
2081 	new |= (value & mask);
2082 	pci_write_config(dev, cap + reg, new, width);
2083 	return (old);
2084 }
2085 
2086 /*
2087  * Support for MSI message signalled interrupts.
2088  */
2089 void
2090 pci_enable_msi_method(device_t dev, device_t child, uint64_t address,
2091     uint16_t data)
2092 {
2093 	struct pci_devinfo *dinfo = device_get_ivars(child);
2094 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2095 
2096 	/* Write data and address values. */
2097 	pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR,
2098 	    address & 0xffffffff, 4);
2099 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2100 		pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR_HIGH,
2101 		    address >> 32, 4);
2102 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA_64BIT,
2103 		    data, 2);
2104 	} else
2105 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA, data,
2106 		    2);
2107 
2108 	/* Enable MSI in the control register. */
2109 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
2110 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2111 	    msi->msi_ctrl, 2);
2112 
2113 	/* Enable MSI -> HT mapping. */
2114 	pci_ht_map_msi(child, address);
2115 }
2116 
2117 void
2118 pci_disable_msi_method(device_t dev, device_t child)
2119 {
2120 	struct pci_devinfo *dinfo = device_get_ivars(child);
2121 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2122 
2123 	/* Disable MSI -> HT mapping. */
2124 	pci_ht_map_msi(child, 0);
2125 
2126 	/* Disable MSI in the control register. */
2127 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
2128 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2129 	    msi->msi_ctrl, 2);
2130 }
2131 
2132 /*
2133  * Restore MSI registers during resume.  If MSI is enabled then
2134  * restore the data and address registers in addition to the control
2135  * register.
2136  */
2137 static void
2138 pci_resume_msi(device_t dev)
2139 {
2140 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2141 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2142 	uint64_t address;
2143 	uint16_t data;
2144 
2145 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
2146 		address = msi->msi_addr;
2147 		data = msi->msi_data;
2148 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
2149 		    address & 0xffffffff, 4);
2150 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2151 			pci_write_config(dev, msi->msi_location +
2152 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
2153 			pci_write_config(dev, msi->msi_location +
2154 			    PCIR_MSI_DATA_64BIT, data, 2);
2155 		} else
2156 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
2157 			    data, 2);
2158 	}
2159 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
2160 	    2);
2161 }
2162 
2163 static int
2164 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
2165 {
2166 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2167 	pcicfgregs *cfg = &dinfo->cfg;
2168 	struct resource_list_entry *rle;
2169 	struct msix_table_entry *mte;
2170 	struct msix_vector *mv;
2171 	uint64_t addr;
2172 	uint32_t data;
2173 	int error, i, j;
2174 
2175 	/*
2176 	 * Handle MSI first.  We try to find this IRQ among our list
2177 	 * of MSI IRQs.  If we find it, we request updated address and
2178 	 * data registers and apply the results.
2179 	 */
2180 	if (cfg->msi.msi_alloc > 0) {
2181 
2182 		/* If we don't have any active handlers, nothing to do. */
2183 		if (cfg->msi.msi_handlers == 0)
2184 			return (0);
2185 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
2186 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
2187 			    i + 1);
2188 			if (rle->start == irq) {
2189 				error = PCIB_MAP_MSI(device_get_parent(bus),
2190 				    dev, irq, &addr, &data);
2191 				if (error)
2192 					return (error);
2193 				pci_disable_msi(dev);
2194 				dinfo->cfg.msi.msi_addr = addr;
2195 				dinfo->cfg.msi.msi_data = data;
2196 				pci_enable_msi(dev, addr, data);
2197 				return (0);
2198 			}
2199 		}
2200 		return (ENOENT);
2201 	}
2202 
2203 	/*
2204 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
2205 	 * we request the updated mapping info.  If that works, we go
2206 	 * through all the slots that use this IRQ and update them.
2207 	 */
2208 	if (cfg->msix.msix_alloc > 0) {
2209 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
2210 			mv = &cfg->msix.msix_vectors[i];
2211 			if (mv->mv_irq == irq) {
2212 				error = PCIB_MAP_MSI(device_get_parent(bus),
2213 				    dev, irq, &addr, &data);
2214 				if (error)
2215 					return (error);
2216 				mv->mv_address = addr;
2217 				mv->mv_data = data;
2218 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
2219 					mte = &cfg->msix.msix_table[j];
2220 					if (mte->mte_vector != i + 1)
2221 						continue;
2222 					if (mte->mte_handlers == 0)
2223 						continue;
2224 					pci_mask_msix(dev, j);
2225 					pci_enable_msix(dev, j, addr, data);
2226 					pci_unmask_msix(dev, j);
2227 				}
2228 			}
2229 		}
2230 		return (ENOENT);
2231 	}
2232 
2233 	return (ENOENT);
2234 }
2235 
2236 /*
2237  * Returns true if the specified device is blacklisted because MSI
2238  * doesn't work.
2239  */
2240 int
2241 pci_msi_device_blacklisted(device_t dev)
2242 {
2243 
2244 	if (!pci_honor_msi_blacklist)
2245 		return (0);
2246 
2247 	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2248 }
2249 
2250 /*
2251  * Determine if MSI is blacklisted globally on this system.  Currently,
2252  * we just check for blacklisted chipsets as represented by the
2253  * host-PCI bridge at device 0:0:0.  In the future, it may become
2254  * necessary to check other system attributes, such as the kenv values
2255  * that give the motherboard manufacturer and model number.
2256  */
2257 static int
2258 pci_msi_blacklisted(void)
2259 {
2260 	device_t dev;
2261 
2262 	if (!pci_honor_msi_blacklist)
2263 		return (0);
2264 
2265 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2266 	if (!(pcie_chipset || pcix_chipset)) {
2267 		if (vm_guest != VM_GUEST_NO) {
2268 			/*
2269 			 * Whitelist older chipsets in virtual
2270 			 * machines known to support MSI.
2271 			 */
2272 			dev = pci_find_bsf(0, 0, 0);
2273 			if (dev != NULL)
2274 				return (!pci_has_quirk(pci_get_devid(dev),
2275 					PCI_QUIRK_ENABLE_MSI_VM));
2276 		}
2277 		return (1);
2278 	}
2279 
2280 	dev = pci_find_bsf(0, 0, 0);
2281 	if (dev != NULL)
2282 		return (pci_msi_device_blacklisted(dev));
2283 	return (0);
2284 }
2285 
2286 /*
2287  * Returns true if the specified device is blacklisted because MSI-X
2288  * doesn't work.  Note that this assumes that if MSI doesn't work,
2289  * MSI-X doesn't either.
2290  */
2291 int
2292 pci_msix_device_blacklisted(device_t dev)
2293 {
2294 
2295 	if (!pci_honor_msi_blacklist)
2296 		return (0);
2297 
2298 	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2299 		return (1);
2300 
2301 	return (pci_msi_device_blacklisted(dev));
2302 }
2303 
2304 /*
2305  * Determine if MSI-X is blacklisted globally on this system.  If MSI
2306  * is blacklisted, assume that MSI-X is as well.  Check for additional
2307  * chipsets where MSI works but MSI-X does not.
2308  */
2309 static int
2310 pci_msix_blacklisted(void)
2311 {
2312 	device_t dev;
2313 
2314 	if (!pci_honor_msi_blacklist)
2315 		return (0);
2316 
2317 	dev = pci_find_bsf(0, 0, 0);
2318 	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2319 	    PCI_QUIRK_DISABLE_MSIX))
2320 		return (1);
2321 
2322 	return (pci_msi_blacklisted());
2323 }
2324 
2325 /*
2326  * Attempt to allocate *count MSI messages.  The actual number allocated is
2327  * returned in *count.  After this function returns, each message will be
2328  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2329  */
2330 int
2331 pci_alloc_msi_method(device_t dev, device_t child, int *count)
2332 {
2333 	struct pci_devinfo *dinfo = device_get_ivars(child);
2334 	pcicfgregs *cfg = &dinfo->cfg;
2335 	struct resource_list_entry *rle;
2336 	int actual, error, i, irqs[32];
2337 	uint16_t ctrl;
2338 
2339 	/* Don't let count == 0 get us into trouble. */
2340 	if (*count == 0)
2341 		return (EINVAL);
2342 
2343 	/* If rid 0 is allocated, then fail. */
2344 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2345 	if (rle != NULL && rle->res != NULL)
2346 		return (ENXIO);
2347 
2348 	/* Already have allocated messages? */
2349 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2350 		return (ENXIO);
2351 
2352 	/* If MSI is blacklisted for this system, fail. */
2353 	if (pci_msi_blacklisted())
2354 		return (ENXIO);
2355 
2356 	/* MSI capability present? */
2357 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2358 		return (ENODEV);
2359 
2360 	if (bootverbose)
2361 		device_printf(child,
2362 		    "attempting to allocate %d MSI vectors (%d supported)\n",
2363 		    *count, cfg->msi.msi_msgnum);
2364 
2365 	/* Don't ask for more than the device supports. */
2366 	actual = min(*count, cfg->msi.msi_msgnum);
2367 
2368 	/* Don't ask for more than 32 messages. */
2369 	actual = min(actual, 32);
2370 
2371 	/* MSI requires power of 2 number of messages. */
2372 	if (!powerof2(actual))
2373 		return (EINVAL);
2374 
2375 	for (;;) {
2376 		/* Try to allocate N messages. */
2377 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2378 		    actual, irqs);
2379 		if (error == 0)
2380 			break;
2381 		if (actual == 1)
2382 			return (error);
2383 
2384 		/* Try N / 2. */
2385 		actual >>= 1;
2386 	}
2387 
2388 	/*
2389 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2390 	 * resources in the irqs[] array, so add new resources
2391 	 * starting at rid 1.
2392 	 */
2393 	for (i = 0; i < actual; i++)
2394 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2395 		    irqs[i], irqs[i], 1);
2396 
2397 	if (bootverbose) {
2398 		if (actual == 1)
2399 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2400 		else {
2401 			int run;
2402 
2403 			/*
2404 			 * Be fancy and try to print contiguous runs
2405 			 * of IRQ values as ranges.  'run' is true if
2406 			 * we are in a range.
2407 			 */
2408 			device_printf(child, "using IRQs %d", irqs[0]);
2409 			run = 0;
2410 			for (i = 1; i < actual; i++) {
2411 
2412 				/* Still in a run? */
2413 				if (irqs[i] == irqs[i - 1] + 1) {
2414 					run = 1;
2415 					continue;
2416 				}
2417 
2418 				/* Finish previous range. */
2419 				if (run) {
2420 					printf("-%d", irqs[i - 1]);
2421 					run = 0;
2422 				}
2423 
2424 				/* Start new range. */
2425 				printf(",%d", irqs[i]);
2426 			}
2427 
2428 			/* Unfinished range? */
2429 			if (run)
2430 				printf("-%d", irqs[actual - 1]);
2431 			printf(" for MSI\n");
2432 		}
2433 	}
2434 
2435 	/* Update control register with actual count. */
2436 	ctrl = cfg->msi.msi_ctrl;
2437 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2438 	ctrl |= (ffs(actual) - 1) << 4;
2439 	cfg->msi.msi_ctrl = ctrl;
2440 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2441 
2442 	/* Update counts of alloc'd messages. */
2443 	cfg->msi.msi_alloc = actual;
2444 	cfg->msi.msi_handlers = 0;
2445 	*count = actual;
2446 	return (0);
2447 }
2448 
2449 /* Release the MSI messages associated with this device. */
2450 int
2451 pci_release_msi_method(device_t dev, device_t child)
2452 {
2453 	struct pci_devinfo *dinfo = device_get_ivars(child);
2454 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2455 	struct resource_list_entry *rle;
2456 	int error, i, irqs[32];
2457 
2458 	/* Try MSI-X first. */
2459 	error = pci_release_msix(dev, child);
2460 	if (error != ENODEV)
2461 		return (error);
2462 
2463 	/* Do we have any messages to release? */
2464 	if (msi->msi_alloc == 0)
2465 		return (ENODEV);
2466 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2467 
2468 	/* Make sure none of the resources are allocated. */
2469 	if (msi->msi_handlers > 0)
2470 		return (EBUSY);
2471 	for (i = 0; i < msi->msi_alloc; i++) {
2472 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2473 		KASSERT(rle != NULL, ("missing MSI resource"));
2474 		if (rle->res != NULL)
2475 			return (EBUSY);
2476 		irqs[i] = rle->start;
2477 	}
2478 
2479 	/* Update control register with 0 count. */
2480 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2481 	    ("%s: MSI still enabled", __func__));
2482 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2483 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2484 	    msi->msi_ctrl, 2);
2485 
2486 	/* Release the messages. */
2487 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2488 	for (i = 0; i < msi->msi_alloc; i++)
2489 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2490 
2491 	/* Update alloc count. */
2492 	msi->msi_alloc = 0;
2493 	msi->msi_addr = 0;
2494 	msi->msi_data = 0;
2495 	return (0);
2496 }
2497 
2498 /*
2499  * Return the max supported MSI messages this device supports.
2500  * Basically, assuming the MD code can alloc messages, this function
2501  * should return the maximum value that pci_alloc_msi() can return.
2502  * Thus, it is subject to the tunables, etc.
2503  */
2504 int
2505 pci_msi_count_method(device_t dev, device_t child)
2506 {
2507 	struct pci_devinfo *dinfo = device_get_ivars(child);
2508 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2509 
2510 	if (pci_do_msi && msi->msi_location != 0)
2511 		return (msi->msi_msgnum);
2512 	return (0);
2513 }
2514 
2515 /* free pcicfgregs structure and all depending data structures */
2516 
2517 int
2518 pci_freecfg(struct pci_devinfo *dinfo)
2519 {
2520 	struct devlist *devlist_head;
2521 	struct pci_map *pm, *next;
2522 	int i;
2523 
2524 	devlist_head = &pci_devq;
2525 
2526 	if (dinfo->cfg.vpd.vpd_reg) {
2527 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2528 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2529 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2530 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2531 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2532 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2533 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2534 	}
2535 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2536 		free(pm, M_DEVBUF);
2537 	}
2538 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2539 	free(dinfo, M_DEVBUF);
2540 
2541 	/* increment the generation count */
2542 	pci_generation++;
2543 
2544 	/* we're losing one device */
2545 	pci_numdevs--;
2546 	return (0);
2547 }
2548 
2549 /*
2550  * PCI power manangement
2551  */
2552 int
2553 pci_set_powerstate_method(device_t dev, device_t child, int state)
2554 {
2555 	struct pci_devinfo *dinfo = device_get_ivars(child);
2556 	pcicfgregs *cfg = &dinfo->cfg;
2557 	uint16_t status;
2558 	int oldstate, highest, delay;
2559 
2560 	if (cfg->pp.pp_cap == 0)
2561 		return (EOPNOTSUPP);
2562 
2563 	/*
2564 	 * Optimize a no state change request away.  While it would be OK to
2565 	 * write to the hardware in theory, some devices have shown odd
2566 	 * behavior when going from D3 -> D3.
2567 	 */
2568 	oldstate = pci_get_powerstate(child);
2569 	if (oldstate == state)
2570 		return (0);
2571 
2572 	/*
2573 	 * The PCI power management specification states that after a state
2574 	 * transition between PCI power states, system software must
2575 	 * guarantee a minimal delay before the function accesses the device.
2576 	 * Compute the worst case delay that we need to guarantee before we
2577 	 * access the device.  Many devices will be responsive much more
2578 	 * quickly than this delay, but there are some that don't respond
2579 	 * instantly to state changes.  Transitions to/from D3 state require
2580 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2581 	 * is done below with DELAY rather than a sleeper function because
2582 	 * this function can be called from contexts where we cannot sleep.
2583 	 */
2584 	highest = (oldstate > state) ? oldstate : state;
2585 	if (highest == PCI_POWERSTATE_D3)
2586 	    delay = 10000;
2587 	else if (highest == PCI_POWERSTATE_D2)
2588 	    delay = 200;
2589 	else
2590 	    delay = 0;
2591 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2592 	    & ~PCIM_PSTAT_DMASK;
2593 	switch (state) {
2594 	case PCI_POWERSTATE_D0:
2595 		status |= PCIM_PSTAT_D0;
2596 		break;
2597 	case PCI_POWERSTATE_D1:
2598 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2599 			return (EOPNOTSUPP);
2600 		status |= PCIM_PSTAT_D1;
2601 		break;
2602 	case PCI_POWERSTATE_D2:
2603 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2604 			return (EOPNOTSUPP);
2605 		status |= PCIM_PSTAT_D2;
2606 		break;
2607 	case PCI_POWERSTATE_D3:
2608 		status |= PCIM_PSTAT_D3;
2609 		break;
2610 	default:
2611 		return (EINVAL);
2612 	}
2613 
2614 	if (bootverbose)
2615 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2616 		    state);
2617 
2618 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2619 	if (delay)
2620 		DELAY(delay);
2621 	return (0);
2622 }
2623 
2624 int
2625 pci_get_powerstate_method(device_t dev, device_t child)
2626 {
2627 	struct pci_devinfo *dinfo = device_get_ivars(child);
2628 	pcicfgregs *cfg = &dinfo->cfg;
2629 	uint16_t status;
2630 	int result;
2631 
2632 	if (cfg->pp.pp_cap != 0) {
2633 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2634 		switch (status & PCIM_PSTAT_DMASK) {
2635 		case PCIM_PSTAT_D0:
2636 			result = PCI_POWERSTATE_D0;
2637 			break;
2638 		case PCIM_PSTAT_D1:
2639 			result = PCI_POWERSTATE_D1;
2640 			break;
2641 		case PCIM_PSTAT_D2:
2642 			result = PCI_POWERSTATE_D2;
2643 			break;
2644 		case PCIM_PSTAT_D3:
2645 			result = PCI_POWERSTATE_D3;
2646 			break;
2647 		default:
2648 			result = PCI_POWERSTATE_UNKNOWN;
2649 			break;
2650 		}
2651 	} else {
2652 		/* No support, device is always at D0 */
2653 		result = PCI_POWERSTATE_D0;
2654 	}
2655 	return (result);
2656 }
2657 
2658 /*
2659  * Some convenience functions for PCI device drivers.
2660  */
2661 
2662 static __inline void
2663 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2664 {
2665 	uint16_t	command;
2666 
2667 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2668 	command |= bit;
2669 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2670 }
2671 
2672 static __inline void
2673 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2674 {
2675 	uint16_t	command;
2676 
2677 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2678 	command &= ~bit;
2679 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2680 }
2681 
2682 int
2683 pci_enable_busmaster_method(device_t dev, device_t child)
2684 {
2685 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2686 	return (0);
2687 }
2688 
2689 int
2690 pci_disable_busmaster_method(device_t dev, device_t child)
2691 {
2692 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2693 	return (0);
2694 }
2695 
2696 int
2697 pci_enable_io_method(device_t dev, device_t child, int space)
2698 {
2699 	uint16_t bit;
2700 
2701 	switch(space) {
2702 	case SYS_RES_IOPORT:
2703 		bit = PCIM_CMD_PORTEN;
2704 		break;
2705 	case SYS_RES_MEMORY:
2706 		bit = PCIM_CMD_MEMEN;
2707 		break;
2708 	default:
2709 		return (EINVAL);
2710 	}
2711 	pci_set_command_bit(dev, child, bit);
2712 	return (0);
2713 }
2714 
2715 int
2716 pci_disable_io_method(device_t dev, device_t child, int space)
2717 {
2718 	uint16_t bit;
2719 
2720 	switch(space) {
2721 	case SYS_RES_IOPORT:
2722 		bit = PCIM_CMD_PORTEN;
2723 		break;
2724 	case SYS_RES_MEMORY:
2725 		bit = PCIM_CMD_MEMEN;
2726 		break;
2727 	default:
2728 		return (EINVAL);
2729 	}
2730 	pci_clear_command_bit(dev, child, bit);
2731 	return (0);
2732 }
2733 
2734 /*
2735  * New style pci driver.  Parent device is either a pci-host-bridge or a
2736  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2737  */
2738 
2739 void
2740 pci_print_verbose(struct pci_devinfo *dinfo)
2741 {
2742 
2743 	if (bootverbose) {
2744 		pcicfgregs *cfg = &dinfo->cfg;
2745 
2746 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2747 		    cfg->vendor, cfg->device, cfg->revid);
2748 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2749 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2750 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2751 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2752 		    cfg->mfdev);
2753 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2754 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2755 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2756 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2757 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2758 		if (cfg->intpin > 0)
2759 			printf("\tintpin=%c, irq=%d\n",
2760 			    cfg->intpin +'a' -1, cfg->intline);
2761 		if (cfg->pp.pp_cap) {
2762 			uint16_t status;
2763 
2764 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2765 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2766 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2767 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2768 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2769 			    status & PCIM_PSTAT_DMASK);
2770 		}
2771 		if (cfg->msi.msi_location) {
2772 			int ctrl;
2773 
2774 			ctrl = cfg->msi.msi_ctrl;
2775 			printf("\tMSI supports %d message%s%s%s\n",
2776 			    cfg->msi.msi_msgnum,
2777 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2778 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2779 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2780 		}
2781 		if (cfg->msix.msix_location) {
2782 			printf("\tMSI-X supports %d message%s ",
2783 			    cfg->msix.msix_msgnum,
2784 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2785 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2786 				printf("in map 0x%x\n",
2787 				    cfg->msix.msix_table_bar);
2788 			else
2789 				printf("in maps 0x%x and 0x%x\n",
2790 				    cfg->msix.msix_table_bar,
2791 				    cfg->msix.msix_pba_bar);
2792 		}
2793 	}
2794 }
2795 
2796 static int
2797 pci_porten(device_t dev)
2798 {
2799 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2800 }
2801 
2802 static int
2803 pci_memen(device_t dev)
2804 {
2805 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2806 }
2807 
2808 void
2809 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp,
2810     int *bar64)
2811 {
2812 	struct pci_devinfo *dinfo;
2813 	pci_addr_t map, testval;
2814 	int ln2range;
2815 	uint16_t cmd;
2816 
2817 	/*
2818 	 * The device ROM BAR is special.  It is always a 32-bit
2819 	 * memory BAR.  Bit 0 is special and should not be set when
2820 	 * sizing the BAR.
2821 	 */
2822 	dinfo = device_get_ivars(dev);
2823 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2824 		map = pci_read_config(dev, reg, 4);
2825 		pci_write_config(dev, reg, 0xfffffffe, 4);
2826 		testval = pci_read_config(dev, reg, 4);
2827 		pci_write_config(dev, reg, map, 4);
2828 		*mapp = map;
2829 		*testvalp = testval;
2830 		if (bar64 != NULL)
2831 			*bar64 = 0;
2832 		return;
2833 	}
2834 
2835 	map = pci_read_config(dev, reg, 4);
2836 	ln2range = pci_maprange(map);
2837 	if (ln2range == 64)
2838 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2839 
2840 	/*
2841 	 * Disable decoding via the command register before
2842 	 * determining the BAR's length since we will be placing it in
2843 	 * a weird state.
2844 	 */
2845 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2846 	pci_write_config(dev, PCIR_COMMAND,
2847 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2848 
2849 	/*
2850 	 * Determine the BAR's length by writing all 1's.  The bottom
2851 	 * log_2(size) bits of the BAR will stick as 0 when we read
2852 	 * the value back.
2853 	 */
2854 	pci_write_config(dev, reg, 0xffffffff, 4);
2855 	testval = pci_read_config(dev, reg, 4);
2856 	if (ln2range == 64) {
2857 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2858 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2859 	}
2860 
2861 	/*
2862 	 * Restore the original value of the BAR.  We may have reprogrammed
2863 	 * the BAR of the low-level console device and when booting verbose,
2864 	 * we need the console device addressable.
2865 	 */
2866 	pci_write_config(dev, reg, map, 4);
2867 	if (ln2range == 64)
2868 		pci_write_config(dev, reg + 4, map >> 32, 4);
2869 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2870 
2871 	*mapp = map;
2872 	*testvalp = testval;
2873 	if (bar64 != NULL)
2874 		*bar64 = (ln2range == 64);
2875 }
2876 
2877 static void
2878 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2879 {
2880 	struct pci_devinfo *dinfo;
2881 	int ln2range;
2882 
2883 	/* The device ROM BAR is always a 32-bit memory BAR. */
2884 	dinfo = device_get_ivars(dev);
2885 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2886 		ln2range = 32;
2887 	else
2888 		ln2range = pci_maprange(pm->pm_value);
2889 	pci_write_config(dev, pm->pm_reg, base, 4);
2890 	if (ln2range == 64)
2891 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2892 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2893 	if (ln2range == 64)
2894 		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2895 		    pm->pm_reg + 4, 4) << 32;
2896 }
2897 
2898 struct pci_map *
2899 pci_find_bar(device_t dev, int reg)
2900 {
2901 	struct pci_devinfo *dinfo;
2902 	struct pci_map *pm;
2903 
2904 	dinfo = device_get_ivars(dev);
2905 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2906 		if (pm->pm_reg == reg)
2907 			return (pm);
2908 	}
2909 	return (NULL);
2910 }
2911 
2912 int
2913 pci_bar_enabled(device_t dev, struct pci_map *pm)
2914 {
2915 	struct pci_devinfo *dinfo;
2916 	uint16_t cmd;
2917 
2918 	dinfo = device_get_ivars(dev);
2919 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2920 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2921 		return (0);
2922 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2923 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2924 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2925 	else
2926 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2927 }
2928 
2929 struct pci_map *
2930 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2931 {
2932 	struct pci_devinfo *dinfo;
2933 	struct pci_map *pm, *prev;
2934 
2935 	dinfo = device_get_ivars(dev);
2936 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2937 	pm->pm_reg = reg;
2938 	pm->pm_value = value;
2939 	pm->pm_size = size;
2940 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2941 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2942 		    reg));
2943 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2944 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2945 			break;
2946 	}
2947 	if (prev != NULL)
2948 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2949 	else
2950 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2951 	return (pm);
2952 }
2953 
2954 static void
2955 pci_restore_bars(device_t dev)
2956 {
2957 	struct pci_devinfo *dinfo;
2958 	struct pci_map *pm;
2959 	int ln2range;
2960 
2961 	dinfo = device_get_ivars(dev);
2962 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2963 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2964 			ln2range = 32;
2965 		else
2966 			ln2range = pci_maprange(pm->pm_value);
2967 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2968 		if (ln2range == 64)
2969 			pci_write_config(dev, pm->pm_reg + 4,
2970 			    pm->pm_value >> 32, 4);
2971 	}
2972 }
2973 
2974 /*
2975  * Add a resource based on a pci map register. Return 1 if the map
2976  * register is a 32bit map register or 2 if it is a 64bit register.
2977  */
2978 static int
2979 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2980     int force, int prefetch)
2981 {
2982 	struct pci_map *pm;
2983 	pci_addr_t base, map, testval;
2984 	pci_addr_t start, end, count;
2985 	int barlen, basezero, flags, maprange, mapsize, type;
2986 	uint16_t cmd;
2987 	struct resource *res;
2988 
2989 	/*
2990 	 * The BAR may already exist if the device is a CardBus card
2991 	 * whose CIS is stored in this BAR.
2992 	 */
2993 	pm = pci_find_bar(dev, reg);
2994 	if (pm != NULL) {
2995 		maprange = pci_maprange(pm->pm_value);
2996 		barlen = maprange == 64 ? 2 : 1;
2997 		return (barlen);
2998 	}
2999 
3000 	pci_read_bar(dev, reg, &map, &testval, NULL);
3001 	if (PCI_BAR_MEM(map)) {
3002 		type = SYS_RES_MEMORY;
3003 		if (map & PCIM_BAR_MEM_PREFETCH)
3004 			prefetch = 1;
3005 	} else
3006 		type = SYS_RES_IOPORT;
3007 	mapsize = pci_mapsize(testval);
3008 	base = pci_mapbase(map);
3009 #ifdef __PCI_BAR_ZERO_VALID
3010 	basezero = 0;
3011 #else
3012 	basezero = base == 0;
3013 #endif
3014 	maprange = pci_maprange(map);
3015 	barlen = maprange == 64 ? 2 : 1;
3016 
3017 	/*
3018 	 * For I/O registers, if bottom bit is set, and the next bit up
3019 	 * isn't clear, we know we have a BAR that doesn't conform to the
3020 	 * spec, so ignore it.  Also, sanity check the size of the data
3021 	 * areas to the type of memory involved.  Memory must be at least
3022 	 * 16 bytes in size, while I/O ranges must be at least 4.
3023 	 */
3024 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
3025 		return (barlen);
3026 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
3027 	    (type == SYS_RES_IOPORT && mapsize < 2))
3028 		return (barlen);
3029 
3030 	/* Save a record of this BAR. */
3031 	pm = pci_add_bar(dev, reg, map, mapsize);
3032 	if (bootverbose) {
3033 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
3034 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
3035 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
3036 			printf(", port disabled\n");
3037 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
3038 			printf(", memory disabled\n");
3039 		else
3040 			printf(", enabled\n");
3041 	}
3042 
3043 	/*
3044 	 * If base is 0, then we have problems if this architecture does
3045 	 * not allow that.  It is best to ignore such entries for the
3046 	 * moment.  These will be allocated later if the driver specifically
3047 	 * requests them.  However, some removable busses look better when
3048 	 * all resources are allocated, so allow '0' to be overriden.
3049 	 *
3050 	 * Similarly treat maps whose values is the same as the test value
3051 	 * read back.  These maps have had all f's written to them by the
3052 	 * BIOS in an attempt to disable the resources.
3053 	 */
3054 	if (!force && (basezero || map == testval))
3055 		return (barlen);
3056 	if ((u_long)base != base) {
3057 		device_printf(bus,
3058 		    "pci%d:%d:%d:%d bar %#x too many address bits",
3059 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
3060 		    pci_get_function(dev), reg);
3061 		return (barlen);
3062 	}
3063 
3064 	/*
3065 	 * This code theoretically does the right thing, but has
3066 	 * undesirable side effects in some cases where peripherals
3067 	 * respond oddly to having these bits enabled.  Let the user
3068 	 * be able to turn them off (since pci_enable_io_modes is 1 by
3069 	 * default).
3070 	 */
3071 	if (pci_enable_io_modes) {
3072 		/* Turn on resources that have been left off by a lazy BIOS */
3073 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
3074 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3075 			cmd |= PCIM_CMD_PORTEN;
3076 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3077 		}
3078 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
3079 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3080 			cmd |= PCIM_CMD_MEMEN;
3081 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3082 		}
3083 	} else {
3084 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
3085 			return (barlen);
3086 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
3087 			return (barlen);
3088 	}
3089 
3090 	count = (pci_addr_t)1 << mapsize;
3091 	flags = RF_ALIGNMENT_LOG2(mapsize);
3092 	if (prefetch)
3093 		flags |= RF_PREFETCHABLE;
3094 	if (basezero || base == pci_mapbase(testval) || pci_clear_bars) {
3095 		start = 0;	/* Let the parent decide. */
3096 		end = ~0;
3097 	} else {
3098 		start = base;
3099 		end = base + count - 1;
3100 	}
3101 	resource_list_add(rl, type, reg, start, end, count);
3102 
3103 	/*
3104 	 * Try to allocate the resource for this BAR from our parent
3105 	 * so that this resource range is already reserved.  The
3106 	 * driver for this device will later inherit this resource in
3107 	 * pci_alloc_resource().
3108 	 */
3109 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
3110 	    flags);
3111 	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0)) {
3112 		/*
3113 		 * If the allocation fails, try to allocate a resource for
3114 		 * this BAR using any available range.  The firmware felt
3115 		 * it was important enough to assign a resource, so don't
3116 		 * disable decoding if we can help it.
3117 		 */
3118 		resource_list_delete(rl, type, reg);
3119 		resource_list_add(rl, type, reg, 0, ~0, count);
3120 		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0,
3121 		    count, flags);
3122 	}
3123 	if (res == NULL) {
3124 		/*
3125 		 * If the allocation fails, delete the resource list entry
3126 		 * and disable decoding for this device.
3127 		 *
3128 		 * If the driver requests this resource in the future,
3129 		 * pci_reserve_map() will try to allocate a fresh
3130 		 * resource range.
3131 		 */
3132 		resource_list_delete(rl, type, reg);
3133 		pci_disable_io(dev, type);
3134 		if (bootverbose)
3135 			device_printf(bus,
3136 			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
3137 			    pci_get_domain(dev), pci_get_bus(dev),
3138 			    pci_get_slot(dev), pci_get_function(dev), reg);
3139 	} else {
3140 		start = rman_get_start(res);
3141 		pci_write_bar(dev, pm, start);
3142 	}
3143 	return (barlen);
3144 }
3145 
3146 /*
3147  * For ATA devices we need to decide early what addressing mode to use.
3148  * Legacy demands that the primary and secondary ATA ports sits on the
3149  * same addresses that old ISA hardware did. This dictates that we use
3150  * those addresses and ignore the BAR's if we cannot set PCI native
3151  * addressing mode.
3152  */
3153 static void
3154 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
3155     uint32_t prefetchmask)
3156 {
3157 	int rid, type, progif;
3158 #if 0
3159 	/* if this device supports PCI native addressing use it */
3160 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3161 	if ((progif & 0x8a) == 0x8a) {
3162 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
3163 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
3164 			printf("Trying ATA native PCI addressing mode\n");
3165 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
3166 		}
3167 	}
3168 #endif
3169 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3170 	type = SYS_RES_IOPORT;
3171 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
3172 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
3173 		    prefetchmask & (1 << 0));
3174 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
3175 		    prefetchmask & (1 << 1));
3176 	} else {
3177 		rid = PCIR_BAR(0);
3178 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
3179 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
3180 		    0x1f7, 8, 0);
3181 		rid = PCIR_BAR(1);
3182 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
3183 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
3184 		    0x3f6, 1, 0);
3185 	}
3186 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
3187 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
3188 		    prefetchmask & (1 << 2));
3189 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
3190 		    prefetchmask & (1 << 3));
3191 	} else {
3192 		rid = PCIR_BAR(2);
3193 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
3194 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
3195 		    0x177, 8, 0);
3196 		rid = PCIR_BAR(3);
3197 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
3198 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
3199 		    0x376, 1, 0);
3200 	}
3201 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
3202 	    prefetchmask & (1 << 4));
3203 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
3204 	    prefetchmask & (1 << 5));
3205 }
3206 
3207 static void
3208 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
3209 {
3210 	struct pci_devinfo *dinfo = device_get_ivars(dev);
3211 	pcicfgregs *cfg = &dinfo->cfg;
3212 	char tunable_name[64];
3213 	int irq;
3214 
3215 	/* Has to have an intpin to have an interrupt. */
3216 	if (cfg->intpin == 0)
3217 		return;
3218 
3219 	/* Let the user override the IRQ with a tunable. */
3220 	irq = PCI_INVALID_IRQ;
3221 	snprintf(tunable_name, sizeof(tunable_name),
3222 	    "hw.pci%d.%d.%d.INT%c.irq",
3223 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
3224 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
3225 		irq = PCI_INVALID_IRQ;
3226 
3227 	/*
3228 	 * If we didn't get an IRQ via the tunable, then we either use the
3229 	 * IRQ value in the intline register or we ask the bus to route an
3230 	 * interrupt for us.  If force_route is true, then we only use the
3231 	 * value in the intline register if the bus was unable to assign an
3232 	 * IRQ.
3233 	 */
3234 	if (!PCI_INTERRUPT_VALID(irq)) {
3235 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
3236 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
3237 		if (!PCI_INTERRUPT_VALID(irq))
3238 			irq = cfg->intline;
3239 	}
3240 
3241 	/* If after all that we don't have an IRQ, just bail. */
3242 	if (!PCI_INTERRUPT_VALID(irq))
3243 		return;
3244 
3245 	/* Update the config register if it changed. */
3246 	if (irq != cfg->intline) {
3247 		cfg->intline = irq;
3248 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
3249 	}
3250 
3251 	/* Add this IRQ as rid 0 interrupt resource. */
3252 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3253 }
3254 
3255 /* Perform early OHCI takeover from SMM. */
3256 static void
3257 ohci_early_takeover(device_t self)
3258 {
3259 	struct resource *res;
3260 	uint32_t ctl;
3261 	int rid;
3262 	int i;
3263 
3264 	rid = PCIR_BAR(0);
3265 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3266 	if (res == NULL)
3267 		return;
3268 
3269 	ctl = bus_read_4(res, OHCI_CONTROL);
3270 	if (ctl & OHCI_IR) {
3271 		if (bootverbose)
3272 			printf("ohci early: "
3273 			    "SMM active, request owner change\n");
3274 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3275 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3276 			DELAY(1000);
3277 			ctl = bus_read_4(res, OHCI_CONTROL);
3278 		}
3279 		if (ctl & OHCI_IR) {
3280 			if (bootverbose)
3281 				printf("ohci early: "
3282 				    "SMM does not respond, resetting\n");
3283 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3284 		}
3285 		/* Disable interrupts */
3286 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3287 	}
3288 
3289 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3290 }
3291 
3292 /* Perform early UHCI takeover from SMM. */
3293 static void
3294 uhci_early_takeover(device_t self)
3295 {
3296 	struct resource *res;
3297 	int rid;
3298 
3299 	/*
3300 	 * Set the PIRQD enable bit and switch off all the others. We don't
3301 	 * want legacy support to interfere with us XXX Does this also mean
3302 	 * that the BIOS won't touch the keyboard anymore if it is connected
3303 	 * to the ports of the root hub?
3304 	 */
3305 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3306 
3307 	/* Disable interrupts */
3308 	rid = PCI_UHCI_BASE_REG;
3309 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3310 	if (res != NULL) {
3311 		bus_write_2(res, UHCI_INTR, 0);
3312 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3313 	}
3314 }
3315 
3316 /* Perform early EHCI takeover from SMM. */
3317 static void
3318 ehci_early_takeover(device_t self)
3319 {
3320 	struct resource *res;
3321 	uint32_t cparams;
3322 	uint32_t eec;
3323 	uint8_t eecp;
3324 	uint8_t bios_sem;
3325 	uint8_t offs;
3326 	int rid;
3327 	int i;
3328 
3329 	rid = PCIR_BAR(0);
3330 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3331 	if (res == NULL)
3332 		return;
3333 
3334 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3335 
3336 	/* Synchronise with the BIOS if it owns the controller. */
3337 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3338 	    eecp = EHCI_EECP_NEXT(eec)) {
3339 		eec = pci_read_config(self, eecp, 4);
3340 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3341 			continue;
3342 		}
3343 		bios_sem = pci_read_config(self, eecp +
3344 		    EHCI_LEGSUP_BIOS_SEM, 1);
3345 		if (bios_sem == 0) {
3346 			continue;
3347 		}
3348 		if (bootverbose)
3349 			printf("ehci early: "
3350 			    "SMM active, request owner change\n");
3351 
3352 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3353 
3354 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3355 			DELAY(1000);
3356 			bios_sem = pci_read_config(self, eecp +
3357 			    EHCI_LEGSUP_BIOS_SEM, 1);
3358 		}
3359 
3360 		if (bios_sem != 0) {
3361 			if (bootverbose)
3362 				printf("ehci early: "
3363 				    "SMM does not respond\n");
3364 		}
3365 		/* Disable interrupts */
3366 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3367 		bus_write_4(res, offs + EHCI_USBINTR, 0);
3368 	}
3369 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3370 }
3371 
3372 /* Perform early XHCI takeover from SMM. */
3373 static void
3374 xhci_early_takeover(device_t self)
3375 {
3376 	struct resource *res;
3377 	uint32_t cparams;
3378 	uint32_t eec;
3379 	uint8_t eecp;
3380 	uint8_t bios_sem;
3381 	uint8_t offs;
3382 	int rid;
3383 	int i;
3384 
3385 	rid = PCIR_BAR(0);
3386 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3387 	if (res == NULL)
3388 		return;
3389 
3390 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3391 
3392 	eec = -1;
3393 
3394 	/* Synchronise with the BIOS if it owns the controller. */
3395 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3396 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3397 		eec = bus_read_4(res, eecp);
3398 
3399 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3400 			continue;
3401 
3402 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3403 		if (bios_sem == 0)
3404 			continue;
3405 
3406 		if (bootverbose)
3407 			printf("xhci early: "
3408 			    "SMM active, request owner change\n");
3409 
3410 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3411 
3412 		/* wait a maximum of 5 second */
3413 
3414 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3415 			DELAY(1000);
3416 			bios_sem = bus_read_1(res, eecp +
3417 			    XHCI_XECP_BIOS_SEM);
3418 		}
3419 
3420 		if (bios_sem != 0) {
3421 			if (bootverbose)
3422 				printf("xhci early: "
3423 				    "SMM does not respond\n");
3424 		}
3425 
3426 		/* Disable interrupts */
3427 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3428 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3429 		bus_read_4(res, offs + XHCI_USBSTS);
3430 	}
3431 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3432 }
3433 
3434 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3435 static void
3436 pci_reserve_secbus(device_t bus, device_t dev, pcicfgregs *cfg,
3437     struct resource_list *rl)
3438 {
3439 	struct resource *res;
3440 	char *cp;
3441 	rman_res_t start, end, count;
3442 	int rid, sec_bus, sec_reg, sub_bus, sub_reg, sup_bus;
3443 
3444 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3445 	case PCIM_HDRTYPE_BRIDGE:
3446 		sec_reg = PCIR_SECBUS_1;
3447 		sub_reg = PCIR_SUBBUS_1;
3448 		break;
3449 	case PCIM_HDRTYPE_CARDBUS:
3450 		sec_reg = PCIR_SECBUS_2;
3451 		sub_reg = PCIR_SUBBUS_2;
3452 		break;
3453 	default:
3454 		return;
3455 	}
3456 
3457 	/*
3458 	 * If the existing bus range is valid, attempt to reserve it
3459 	 * from our parent.  If this fails for any reason, clear the
3460 	 * secbus and subbus registers.
3461 	 *
3462 	 * XXX: Should we reset sub_bus to sec_bus if it is < sec_bus?
3463 	 * This would at least preserve the existing sec_bus if it is
3464 	 * valid.
3465 	 */
3466 	sec_bus = PCI_READ_CONFIG(bus, dev, sec_reg, 1);
3467 	sub_bus = PCI_READ_CONFIG(bus, dev, sub_reg, 1);
3468 
3469 	/* Quirk handling. */
3470 	switch (pci_get_devid(dev)) {
3471 	case 0x12258086:		/* Intel 82454KX/GX (Orion) */
3472 		sup_bus = pci_read_config(dev, 0x41, 1);
3473 		if (sup_bus != 0xff) {
3474 			sec_bus = sup_bus + 1;
3475 			sub_bus = sup_bus + 1;
3476 			PCI_WRITE_CONFIG(bus, dev, sec_reg, sec_bus, 1);
3477 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3478 		}
3479 		break;
3480 
3481 	case 0x00dd10de:
3482 		/* Compaq R3000 BIOS sets wrong subordinate bus number. */
3483 		if ((cp = kern_getenv("smbios.planar.maker")) == NULL)
3484 			break;
3485 		if (strncmp(cp, "Compal", 6) != 0) {
3486 			freeenv(cp);
3487 			break;
3488 		}
3489 		freeenv(cp);
3490 		if ((cp = kern_getenv("smbios.planar.product")) == NULL)
3491 			break;
3492 		if (strncmp(cp, "08A0", 4) != 0) {
3493 			freeenv(cp);
3494 			break;
3495 		}
3496 		freeenv(cp);
3497 		if (sub_bus < 0xa) {
3498 			sub_bus = 0xa;
3499 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3500 		}
3501 		break;
3502 	}
3503 
3504 	if (bootverbose)
3505 		printf("\tsecbus=%d, subbus=%d\n", sec_bus, sub_bus);
3506 	if (sec_bus > 0 && sub_bus >= sec_bus) {
3507 		start = sec_bus;
3508 		end = sub_bus;
3509 		count = end - start + 1;
3510 
3511 		resource_list_add(rl, PCI_RES_BUS, 0, 0, ~0, count);
3512 
3513 		/*
3514 		 * If requested, clear secondary bus registers in
3515 		 * bridge devices to force a complete renumbering
3516 		 * rather than reserving the existing range.  However,
3517 		 * preserve the existing size.
3518 		 */
3519 		if (pci_clear_buses)
3520 			goto clear;
3521 
3522 		rid = 0;
3523 		res = resource_list_reserve(rl, bus, dev, PCI_RES_BUS, &rid,
3524 		    start, end, count, 0);
3525 		if (res != NULL)
3526 			return;
3527 
3528 		if (bootverbose)
3529 			device_printf(bus,
3530 			    "pci%d:%d:%d:%d secbus failed to allocate\n",
3531 			    pci_get_domain(dev), pci_get_bus(dev),
3532 			    pci_get_slot(dev), pci_get_function(dev));
3533 	}
3534 
3535 clear:
3536 	PCI_WRITE_CONFIG(bus, dev, sec_reg, 0, 1);
3537 	PCI_WRITE_CONFIG(bus, dev, sub_reg, 0, 1);
3538 }
3539 
3540 static struct resource *
3541 pci_alloc_secbus(device_t dev, device_t child, int *rid, rman_res_t start,
3542     rman_res_t end, rman_res_t count, u_int flags)
3543 {
3544 	struct pci_devinfo *dinfo;
3545 	pcicfgregs *cfg;
3546 	struct resource_list *rl;
3547 	struct resource *res;
3548 	int sec_reg, sub_reg;
3549 
3550 	dinfo = device_get_ivars(child);
3551 	cfg = &dinfo->cfg;
3552 	rl = &dinfo->resources;
3553 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3554 	case PCIM_HDRTYPE_BRIDGE:
3555 		sec_reg = PCIR_SECBUS_1;
3556 		sub_reg = PCIR_SUBBUS_1;
3557 		break;
3558 	case PCIM_HDRTYPE_CARDBUS:
3559 		sec_reg = PCIR_SECBUS_2;
3560 		sub_reg = PCIR_SUBBUS_2;
3561 		break;
3562 	default:
3563 		return (NULL);
3564 	}
3565 
3566 	if (*rid != 0)
3567 		return (NULL);
3568 
3569 	if (resource_list_find(rl, PCI_RES_BUS, *rid) == NULL)
3570 		resource_list_add(rl, PCI_RES_BUS, *rid, start, end, count);
3571 	if (!resource_list_reserved(rl, PCI_RES_BUS, *rid)) {
3572 		res = resource_list_reserve(rl, dev, child, PCI_RES_BUS, rid,
3573 		    start, end, count, flags & ~RF_ACTIVE);
3574 		if (res == NULL) {
3575 			resource_list_delete(rl, PCI_RES_BUS, *rid);
3576 			device_printf(child, "allocating %ju bus%s failed\n",
3577 			    count, count == 1 ? "" : "es");
3578 			return (NULL);
3579 		}
3580 		if (bootverbose)
3581 			device_printf(child,
3582 			    "Lazy allocation of %ju bus%s at %ju\n", count,
3583 			    count == 1 ? "" : "es", rman_get_start(res));
3584 		PCI_WRITE_CONFIG(dev, child, sec_reg, rman_get_start(res), 1);
3585 		PCI_WRITE_CONFIG(dev, child, sub_reg, rman_get_end(res), 1);
3586 	}
3587 	return (resource_list_alloc(rl, dev, child, PCI_RES_BUS, rid, start,
3588 	    end, count, flags));
3589 }
3590 #endif
3591 
3592 static int
3593 pci_ea_bei_to_rid(device_t dev, int bei)
3594 {
3595 #ifdef PCI_IOV
3596 	struct pci_devinfo *dinfo;
3597 	int iov_pos;
3598 	struct pcicfg_iov *iov;
3599 
3600 	dinfo = device_get_ivars(dev);
3601 	iov = dinfo->cfg.iov;
3602 	if (iov != NULL)
3603 		iov_pos = iov->iov_pos;
3604 	else
3605 		iov_pos = 0;
3606 #endif
3607 
3608 	/* Check if matches BAR */
3609 	if ((bei >= PCIM_EA_BEI_BAR_0) &&
3610 	    (bei <= PCIM_EA_BEI_BAR_5))
3611 		return (PCIR_BAR(bei));
3612 
3613 	/* Check ROM */
3614 	if (bei == PCIM_EA_BEI_ROM)
3615 		return (PCIR_BIOS);
3616 
3617 #ifdef PCI_IOV
3618 	/* Check if matches VF_BAR */
3619 	if ((iov != NULL) && (bei >= PCIM_EA_BEI_VF_BAR_0) &&
3620 	    (bei <= PCIM_EA_BEI_VF_BAR_5))
3621 		return (PCIR_SRIOV_BAR(bei - PCIM_EA_BEI_VF_BAR_0) +
3622 		    iov_pos);
3623 #endif
3624 
3625 	return (-1);
3626 }
3627 
3628 int
3629 pci_ea_is_enabled(device_t dev, int rid)
3630 {
3631 	struct pci_ea_entry *ea;
3632 	struct pci_devinfo *dinfo;
3633 
3634 	dinfo = device_get_ivars(dev);
3635 
3636 	STAILQ_FOREACH(ea, &dinfo->cfg.ea.ea_entries, eae_link) {
3637 		if (pci_ea_bei_to_rid(dev, ea->eae_bei) == rid)
3638 			return ((ea->eae_flags & PCIM_EA_ENABLE) > 0);
3639 	}
3640 
3641 	return (0);
3642 }
3643 
3644 void
3645 pci_add_resources_ea(device_t bus, device_t dev, int alloc_iov)
3646 {
3647 	struct pci_ea_entry *ea;
3648 	struct pci_devinfo *dinfo;
3649 	pci_addr_t start, end, count;
3650 	struct resource_list *rl;
3651 	int type, flags, rid;
3652 	struct resource *res;
3653 	uint32_t tmp;
3654 #ifdef PCI_IOV
3655 	struct pcicfg_iov *iov;
3656 #endif
3657 
3658 	dinfo = device_get_ivars(dev);
3659 	rl = &dinfo->resources;
3660 	flags = 0;
3661 
3662 #ifdef PCI_IOV
3663 	iov = dinfo->cfg.iov;
3664 #endif
3665 
3666 	if (dinfo->cfg.ea.ea_location == 0)
3667 		return;
3668 
3669 	STAILQ_FOREACH(ea, &dinfo->cfg.ea.ea_entries, eae_link) {
3670 
3671 		/*
3672 		 * TODO: Ignore EA-BAR if is not enabled.
3673 		 *   Currently the EA implementation supports
3674 		 *   only situation, where EA structure contains
3675 		 *   predefined entries. In case they are not enabled
3676 		 *   leave them unallocated and proceed with
3677 		 *   a legacy-BAR mechanism.
3678 		 */
3679 		if ((ea->eae_flags & PCIM_EA_ENABLE) == 0)
3680 			continue;
3681 
3682 		switch ((ea->eae_flags & PCIM_EA_PP) >> PCIM_EA_PP_OFFSET) {
3683 		case PCIM_EA_P_MEM_PREFETCH:
3684 		case PCIM_EA_P_VF_MEM_PREFETCH:
3685 			flags = RF_PREFETCHABLE;
3686 		case PCIM_EA_P_VF_MEM:
3687 		case PCIM_EA_P_MEM:
3688 			type = SYS_RES_MEMORY;
3689 			break;
3690 		case PCIM_EA_P_IO:
3691 			type = SYS_RES_IOPORT;
3692 			break;
3693 		default:
3694 			continue;
3695 		}
3696 
3697 		if (alloc_iov != 0) {
3698 #ifdef PCI_IOV
3699 			/* Allocating IOV, confirm BEI matches */
3700 			if ((ea->eae_bei < PCIM_EA_BEI_VF_BAR_0) ||
3701 			    (ea->eae_bei > PCIM_EA_BEI_VF_BAR_5))
3702 				continue;
3703 #else
3704 			continue;
3705 #endif
3706 		} else {
3707 			/* Allocating BAR, confirm BEI matches */
3708 			if (((ea->eae_bei < PCIM_EA_BEI_BAR_0) ||
3709 			    (ea->eae_bei > PCIM_EA_BEI_BAR_5)) &&
3710 			    (ea->eae_bei != PCIM_EA_BEI_ROM))
3711 				continue;
3712 		}
3713 
3714 		rid = pci_ea_bei_to_rid(dev, ea->eae_bei);
3715 		if (rid < 0)
3716 			continue;
3717 
3718 		/* Skip resources already allocated by EA */
3719 		if ((resource_list_find(rl, SYS_RES_MEMORY, rid) != NULL) ||
3720 		    (resource_list_find(rl, SYS_RES_IOPORT, rid) != NULL))
3721 			continue;
3722 
3723 		start = ea->eae_base;
3724 		count = ea->eae_max_offset + 1;
3725 #ifdef PCI_IOV
3726 		if (iov != NULL)
3727 			count = count * iov->iov_num_vfs;
3728 #endif
3729 		end = start + count - 1;
3730 		if (count == 0)
3731 			continue;
3732 
3733 		resource_list_add(rl, type, rid, start, end, count);
3734 		res = resource_list_reserve(rl, bus, dev, type, &rid, start, end, count,
3735 		    flags);
3736 		if (res == NULL) {
3737 			resource_list_delete(rl, type, rid);
3738 
3739 			/*
3740 			 * Failed to allocate using EA, disable entry.
3741 			 * Another attempt to allocation will be performed
3742 			 * further, but this time using legacy BAR registers
3743 			 */
3744 			tmp = pci_read_config(dev, ea->eae_cfg_offset, 4);
3745 			tmp &= ~PCIM_EA_ENABLE;
3746 			pci_write_config(dev, ea->eae_cfg_offset, tmp, 4);
3747 
3748 			/*
3749 			 * Disabling entry might fail in case it is hardwired.
3750 			 * Read flags again to match current status.
3751 			 */
3752 			ea->eae_flags = pci_read_config(dev, ea->eae_cfg_offset, 4);
3753 
3754 			continue;
3755 		}
3756 
3757 		/* As per specification, fill BAR with zeros */
3758 		pci_write_config(dev, rid, 0, 4);
3759 	}
3760 }
3761 
3762 void
3763 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3764 {
3765 	struct pci_devinfo *dinfo;
3766 	pcicfgregs *cfg;
3767 	struct resource_list *rl;
3768 	const struct pci_quirk *q;
3769 	uint32_t devid;
3770 	int i;
3771 
3772 	dinfo = device_get_ivars(dev);
3773 	cfg = &dinfo->cfg;
3774 	rl = &dinfo->resources;
3775 	devid = (cfg->device << 16) | cfg->vendor;
3776 
3777 	/* Allocate resources using Enhanced Allocation */
3778 	pci_add_resources_ea(bus, dev, 0);
3779 
3780 	/* ATA devices needs special map treatment */
3781 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3782 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3783 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3784 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3785 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3786 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3787 	else
3788 		for (i = 0; i < cfg->nummaps;) {
3789 			/* Skip resources already managed by EA */
3790 			if ((resource_list_find(rl, SYS_RES_MEMORY, PCIR_BAR(i)) != NULL) ||
3791 			    (resource_list_find(rl, SYS_RES_IOPORT, PCIR_BAR(i)) != NULL) ||
3792 			    pci_ea_is_enabled(dev, PCIR_BAR(i))) {
3793 				i++;
3794 				continue;
3795 			}
3796 
3797 			/*
3798 			 * Skip quirked resources.
3799 			 */
3800 			for (q = &pci_quirks[0]; q->devid != 0; q++)
3801 				if (q->devid == devid &&
3802 				    q->type == PCI_QUIRK_UNMAP_REG &&
3803 				    q->arg1 == PCIR_BAR(i))
3804 					break;
3805 			if (q->devid != 0) {
3806 				i++;
3807 				continue;
3808 			}
3809 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3810 			    prefetchmask & (1 << i));
3811 		}
3812 
3813 	/*
3814 	 * Add additional, quirked resources.
3815 	 */
3816 	for (q = &pci_quirks[0]; q->devid != 0; q++)
3817 		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3818 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3819 
3820 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3821 #ifdef __PCI_REROUTE_INTERRUPT
3822 		/*
3823 		 * Try to re-route interrupts. Sometimes the BIOS or
3824 		 * firmware may leave bogus values in these registers.
3825 		 * If the re-route fails, then just stick with what we
3826 		 * have.
3827 		 */
3828 		pci_assign_interrupt(bus, dev, 1);
3829 #else
3830 		pci_assign_interrupt(bus, dev, 0);
3831 #endif
3832 	}
3833 
3834 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3835 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3836 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3837 			xhci_early_takeover(dev);
3838 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3839 			ehci_early_takeover(dev);
3840 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3841 			ohci_early_takeover(dev);
3842 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3843 			uhci_early_takeover(dev);
3844 	}
3845 
3846 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3847 	/*
3848 	 * Reserve resources for secondary bus ranges behind bridge
3849 	 * devices.
3850 	 */
3851 	pci_reserve_secbus(bus, dev, cfg, rl);
3852 #endif
3853 }
3854 
3855 static struct pci_devinfo *
3856 pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
3857     int slot, int func, size_t dinfo_size)
3858 {
3859 	struct pci_devinfo *dinfo;
3860 
3861 	dinfo = pci_read_device(pcib, domain, busno, slot, func, dinfo_size);
3862 	if (dinfo != NULL)
3863 		pci_add_child(dev, dinfo);
3864 
3865 	return (dinfo);
3866 }
3867 
3868 void
3869 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3870 {
3871 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3872 	device_t pcib = device_get_parent(dev);
3873 	struct pci_devinfo *dinfo;
3874 	int maxslots;
3875 	int s, f, pcifunchigh;
3876 	uint8_t hdrtype;
3877 	int first_func;
3878 
3879 	/*
3880 	 * Try to detect a device at slot 0, function 0.  If it exists, try to
3881 	 * enable ARI.  We must enable ARI before detecting the rest of the
3882 	 * functions on this bus as ARI changes the set of slots and functions
3883 	 * that are legal on this bus.
3884 	 */
3885 	dinfo = pci_identify_function(pcib, dev, domain, busno, 0, 0,
3886 	    dinfo_size);
3887 	if (dinfo != NULL && pci_enable_ari)
3888 		PCIB_TRY_ENABLE_ARI(pcib, dinfo->cfg.dev);
3889 
3890 	/*
3891 	 * Start looking for new devices on slot 0 at function 1 because we
3892 	 * just identified the device at slot 0, function 0.
3893 	 */
3894 	first_func = 1;
3895 
3896 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3897 	    ("dinfo_size too small"));
3898 	maxslots = PCIB_MAXSLOTS(pcib);
3899 	for (s = 0; s <= maxslots; s++, first_func = 0) {
3900 		pcifunchigh = 0;
3901 		f = 0;
3902 		DELAY(1);
3903 		hdrtype = REG(PCIR_HDRTYPE, 1);
3904 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3905 			continue;
3906 		if (hdrtype & PCIM_MFDEV)
3907 			pcifunchigh = PCIB_MAXFUNCS(pcib);
3908 		for (f = first_func; f <= pcifunchigh; f++)
3909 			pci_identify_function(pcib, dev, domain, busno, s, f,
3910 			    dinfo_size);
3911 	}
3912 #undef REG
3913 }
3914 
3915 #ifdef PCI_IOV
3916 device_t
3917 pci_add_iov_child(device_t bus, device_t pf, size_t size, uint16_t rid,
3918     uint16_t vid, uint16_t did)
3919 {
3920 	struct pci_devinfo *pf_dinfo, *vf_dinfo;
3921 	device_t pcib;
3922 	int busno, slot, func;
3923 
3924 	pf_dinfo = device_get_ivars(pf);
3925 
3926 	/*
3927 	 * Do a sanity check that we have been passed the correct size.  If this
3928 	 * test fails then likely the pci subclass hasn't implemented the
3929 	 * pci_create_iov_child method like it's supposed it.
3930 	 */
3931 	if (size != pf_dinfo->cfg.devinfo_size) {
3932 		device_printf(pf,
3933 		    "PCI subclass does not properly implement PCI_IOV\n");
3934 		return (NULL);
3935 	}
3936 
3937 	pcib = device_get_parent(bus);
3938 
3939 	PCIB_DECODE_RID(pcib, rid, &busno, &slot, &func);
3940 
3941 	vf_dinfo = pci_fill_devinfo(pcib, pci_get_domain(pcib), busno, slot, func,
3942 	    vid, did, size);
3943 
3944 	vf_dinfo->cfg.flags |= PCICFG_VF;
3945 	pci_add_child(bus, vf_dinfo);
3946 
3947 	return (vf_dinfo->cfg.dev);
3948 }
3949 
3950 device_t
3951 pci_create_iov_child_method(device_t bus, device_t pf, uint16_t rid,
3952     uint16_t vid, uint16_t did)
3953 {
3954 
3955 	return (pci_add_iov_child(bus, pf, sizeof(struct pci_devinfo), rid, vid,
3956 	    did));
3957 }
3958 #endif
3959 
3960 void
3961 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3962 {
3963 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3964 	device_set_ivars(dinfo->cfg.dev, dinfo);
3965 	resource_list_init(&dinfo->resources);
3966 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3967 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3968 	pci_print_verbose(dinfo);
3969 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3970 	pci_child_added(dinfo->cfg.dev);
3971 }
3972 
3973 void
3974 pci_child_added_method(device_t dev, device_t child)
3975 {
3976 
3977 }
3978 
3979 static int
3980 pci_probe(device_t dev)
3981 {
3982 
3983 	device_set_desc(dev, "PCI bus");
3984 
3985 	/* Allow other subclasses to override this driver. */
3986 	return (BUS_PROBE_GENERIC);
3987 }
3988 
3989 int
3990 pci_attach_common(device_t dev)
3991 {
3992 	struct pci_softc *sc;
3993 	int busno, domain;
3994 #ifdef PCI_DMA_BOUNDARY
3995 	int error, tag_valid;
3996 #endif
3997 #ifdef PCI_RES_BUS
3998 	int rid;
3999 #endif
4000 
4001 	sc = device_get_softc(dev);
4002 	domain = pcib_get_domain(dev);
4003 	busno = pcib_get_bus(dev);
4004 #ifdef PCI_RES_BUS
4005 	rid = 0;
4006 	sc->sc_bus = bus_alloc_resource(dev, PCI_RES_BUS, &rid, busno, busno,
4007 	    1, 0);
4008 	if (sc->sc_bus == NULL) {
4009 		device_printf(dev, "failed to allocate bus number\n");
4010 		return (ENXIO);
4011 	}
4012 #endif
4013 	if (bootverbose)
4014 		device_printf(dev, "domain=%d, physical bus=%d\n",
4015 		    domain, busno);
4016 #ifdef PCI_DMA_BOUNDARY
4017 	tag_valid = 0;
4018 	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
4019 	    devclass_find("pci")) {
4020 		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
4021 		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
4022 		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
4023 		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
4024 		if (error)
4025 			device_printf(dev, "Failed to create DMA tag: %d\n",
4026 			    error);
4027 		else
4028 			tag_valid = 1;
4029 	}
4030 	if (!tag_valid)
4031 #endif
4032 		sc->sc_dma_tag = bus_get_dma_tag(dev);
4033 	return (0);
4034 }
4035 
4036 static int
4037 pci_attach(device_t dev)
4038 {
4039 	int busno, domain, error;
4040 
4041 	error = pci_attach_common(dev);
4042 	if (error)
4043 		return (error);
4044 
4045 	/*
4046 	 * Since there can be multiple independantly numbered PCI
4047 	 * busses on systems with multiple PCI domains, we can't use
4048 	 * the unit number to decide which bus we are probing. We ask
4049 	 * the parent pcib what our domain and bus numbers are.
4050 	 */
4051 	domain = pcib_get_domain(dev);
4052 	busno = pcib_get_bus(dev);
4053 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
4054 	return (bus_generic_attach(dev));
4055 }
4056 
4057 #ifdef PCI_RES_BUS
4058 static int
4059 pci_detach(device_t dev)
4060 {
4061 	struct pci_softc *sc;
4062 	int error;
4063 
4064 	error = bus_generic_detach(dev);
4065 	if (error)
4066 		return (error);
4067 	sc = device_get_softc(dev);
4068 	return (bus_release_resource(dev, PCI_RES_BUS, 0, sc->sc_bus));
4069 }
4070 #endif
4071 
4072 static void
4073 pci_set_power_child(device_t dev, device_t child, int state)
4074 {
4075 	device_t pcib;
4076 	int dstate;
4077 
4078 	/*
4079 	 * Set the device to the given state.  If the firmware suggests
4080 	 * a different power state, use it instead.  If power management
4081 	 * is not present, the firmware is responsible for managing
4082 	 * device power.  Skip children who aren't attached since they
4083 	 * are handled separately.
4084 	 */
4085 	pcib = device_get_parent(dev);
4086 	dstate = state;
4087 	if (device_is_attached(child) &&
4088 	    PCIB_POWER_FOR_SLEEP(pcib, child, &dstate) == 0)
4089 		pci_set_powerstate(child, dstate);
4090 }
4091 
4092 int
4093 pci_suspend_child(device_t dev, device_t child)
4094 {
4095 	struct pci_devinfo *dinfo;
4096 	int error;
4097 
4098 	dinfo = device_get_ivars(child);
4099 
4100 	/*
4101 	 * Save the PCI configuration space for the child and set the
4102 	 * device in the appropriate power state for this sleep state.
4103 	 */
4104 	pci_cfg_save(child, dinfo, 0);
4105 
4106 	/* Suspend devices before potentially powering them down. */
4107 	error = bus_generic_suspend_child(dev, child);
4108 
4109 	if (error)
4110 		return (error);
4111 
4112 	if (pci_do_power_suspend)
4113 		pci_set_power_child(dev, child, PCI_POWERSTATE_D3);
4114 
4115 	return (0);
4116 }
4117 
4118 int
4119 pci_resume_child(device_t dev, device_t child)
4120 {
4121 	struct pci_devinfo *dinfo;
4122 
4123 	if (pci_do_power_resume)
4124 		pci_set_power_child(dev, child, PCI_POWERSTATE_D0);
4125 
4126 	dinfo = device_get_ivars(child);
4127 	pci_cfg_restore(child, dinfo);
4128 	if (!device_is_attached(child))
4129 		pci_cfg_save(child, dinfo, 1);
4130 
4131 	bus_generic_resume_child(dev, child);
4132 
4133 	return (0);
4134 }
4135 
4136 int
4137 pci_resume(device_t dev)
4138 {
4139 	device_t child, *devlist;
4140 	int error, i, numdevs;
4141 
4142 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
4143 		return (error);
4144 
4145 	/*
4146 	 * Resume critical devices first, then everything else later.
4147 	 */
4148 	for (i = 0; i < numdevs; i++) {
4149 		child = devlist[i];
4150 		switch (pci_get_class(child)) {
4151 		case PCIC_DISPLAY:
4152 		case PCIC_MEMORY:
4153 		case PCIC_BRIDGE:
4154 		case PCIC_BASEPERIPH:
4155 			BUS_RESUME_CHILD(dev, child);
4156 			break;
4157 		}
4158 	}
4159 	for (i = 0; i < numdevs; i++) {
4160 		child = devlist[i];
4161 		switch (pci_get_class(child)) {
4162 		case PCIC_DISPLAY:
4163 		case PCIC_MEMORY:
4164 		case PCIC_BRIDGE:
4165 		case PCIC_BASEPERIPH:
4166 			break;
4167 		default:
4168 			BUS_RESUME_CHILD(dev, child);
4169 		}
4170 	}
4171 	free(devlist, M_TEMP);
4172 	return (0);
4173 }
4174 
4175 static void
4176 pci_load_vendor_data(void)
4177 {
4178 	caddr_t data;
4179 	void *ptr;
4180 	size_t sz;
4181 
4182 	data = preload_search_by_type("pci_vendor_data");
4183 	if (data != NULL) {
4184 		ptr = preload_fetch_addr(data);
4185 		sz = preload_fetch_size(data);
4186 		if (ptr != NULL && sz != 0) {
4187 			pci_vendordata = ptr;
4188 			pci_vendordata_size = sz;
4189 			/* terminate the database */
4190 			pci_vendordata[pci_vendordata_size] = '\n';
4191 		}
4192 	}
4193 }
4194 
4195 void
4196 pci_driver_added(device_t dev, driver_t *driver)
4197 {
4198 	int numdevs;
4199 	device_t *devlist;
4200 	device_t child;
4201 	struct pci_devinfo *dinfo;
4202 	int i;
4203 
4204 	if (bootverbose)
4205 		device_printf(dev, "driver added\n");
4206 	DEVICE_IDENTIFY(driver, dev);
4207 	if (device_get_children(dev, &devlist, &numdevs) != 0)
4208 		return;
4209 	for (i = 0; i < numdevs; i++) {
4210 		child = devlist[i];
4211 		if (device_get_state(child) != DS_NOTPRESENT)
4212 			continue;
4213 		dinfo = device_get_ivars(child);
4214 		pci_print_verbose(dinfo);
4215 		if (bootverbose)
4216 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
4217 		pci_cfg_restore(child, dinfo);
4218 		if (device_probe_and_attach(child) != 0)
4219 			pci_child_detached(dev, child);
4220 	}
4221 	free(devlist, M_TEMP);
4222 }
4223 
4224 int
4225 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
4226     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
4227 {
4228 	struct pci_devinfo *dinfo;
4229 	struct msix_table_entry *mte;
4230 	struct msix_vector *mv;
4231 	uint64_t addr;
4232 	uint32_t data;
4233 	void *cookie;
4234 	int error, rid;
4235 
4236 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
4237 	    arg, &cookie);
4238 	if (error)
4239 		return (error);
4240 
4241 	/* If this is not a direct child, just bail out. */
4242 	if (device_get_parent(child) != dev) {
4243 		*cookiep = cookie;
4244 		return(0);
4245 	}
4246 
4247 	rid = rman_get_rid(irq);
4248 	if (rid == 0) {
4249 		/* Make sure that INTx is enabled */
4250 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
4251 	} else {
4252 		/*
4253 		 * Check to see if the interrupt is MSI or MSI-X.
4254 		 * Ask our parent to map the MSI and give
4255 		 * us the address and data register values.
4256 		 * If we fail for some reason, teardown the
4257 		 * interrupt handler.
4258 		 */
4259 		dinfo = device_get_ivars(child);
4260 		if (dinfo->cfg.msi.msi_alloc > 0) {
4261 			if (dinfo->cfg.msi.msi_addr == 0) {
4262 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
4263 			    ("MSI has handlers, but vectors not mapped"));
4264 				error = PCIB_MAP_MSI(device_get_parent(dev),
4265 				    child, rman_get_start(irq), &addr, &data);
4266 				if (error)
4267 					goto bad;
4268 				dinfo->cfg.msi.msi_addr = addr;
4269 				dinfo->cfg.msi.msi_data = data;
4270 			}
4271 			if (dinfo->cfg.msi.msi_handlers == 0)
4272 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
4273 				    dinfo->cfg.msi.msi_data);
4274 			dinfo->cfg.msi.msi_handlers++;
4275 		} else {
4276 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4277 			    ("No MSI or MSI-X interrupts allocated"));
4278 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4279 			    ("MSI-X index too high"));
4280 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4281 			KASSERT(mte->mte_vector != 0, ("no message vector"));
4282 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
4283 			KASSERT(mv->mv_irq == rman_get_start(irq),
4284 			    ("IRQ mismatch"));
4285 			if (mv->mv_address == 0) {
4286 				KASSERT(mte->mte_handlers == 0,
4287 		    ("MSI-X table entry has handlers, but vector not mapped"));
4288 				error = PCIB_MAP_MSI(device_get_parent(dev),
4289 				    child, rman_get_start(irq), &addr, &data);
4290 				if (error)
4291 					goto bad;
4292 				mv->mv_address = addr;
4293 				mv->mv_data = data;
4294 			}
4295 			if (mte->mte_handlers == 0) {
4296 				pci_enable_msix(child, rid - 1, mv->mv_address,
4297 				    mv->mv_data);
4298 				pci_unmask_msix(child, rid - 1);
4299 			}
4300 			mte->mte_handlers++;
4301 		}
4302 
4303 		/*
4304 		 * Make sure that INTx is disabled if we are using MSI/MSI-X,
4305 		 * unless the device is affected by PCI_QUIRK_MSI_INTX_BUG,
4306 		 * in which case we "enable" INTx so MSI/MSI-X actually works.
4307 		 */
4308 		if (!pci_has_quirk(pci_get_devid(child),
4309 		    PCI_QUIRK_MSI_INTX_BUG))
4310 			pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4311 		else
4312 			pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
4313 	bad:
4314 		if (error) {
4315 			(void)bus_generic_teardown_intr(dev, child, irq,
4316 			    cookie);
4317 			return (error);
4318 		}
4319 	}
4320 	*cookiep = cookie;
4321 	return (0);
4322 }
4323 
4324 int
4325 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
4326     void *cookie)
4327 {
4328 	struct msix_table_entry *mte;
4329 	struct resource_list_entry *rle;
4330 	struct pci_devinfo *dinfo;
4331 	int error, rid;
4332 
4333 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
4334 		return (EINVAL);
4335 
4336 	/* If this isn't a direct child, just bail out */
4337 	if (device_get_parent(child) != dev)
4338 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
4339 
4340 	rid = rman_get_rid(irq);
4341 	if (rid == 0) {
4342 		/* Mask INTx */
4343 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4344 	} else {
4345 		/*
4346 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
4347 		 * decrement the appropriate handlers count and mask the
4348 		 * MSI-X message, or disable MSI messages if the count
4349 		 * drops to 0.
4350 		 */
4351 		dinfo = device_get_ivars(child);
4352 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
4353 		if (rle->res != irq)
4354 			return (EINVAL);
4355 		if (dinfo->cfg.msi.msi_alloc > 0) {
4356 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
4357 			    ("MSI-X index too high"));
4358 			if (dinfo->cfg.msi.msi_handlers == 0)
4359 				return (EINVAL);
4360 			dinfo->cfg.msi.msi_handlers--;
4361 			if (dinfo->cfg.msi.msi_handlers == 0)
4362 				pci_disable_msi(child);
4363 		} else {
4364 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4365 			    ("No MSI or MSI-X interrupts allocated"));
4366 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4367 			    ("MSI-X index too high"));
4368 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4369 			if (mte->mte_handlers == 0)
4370 				return (EINVAL);
4371 			mte->mte_handlers--;
4372 			if (mte->mte_handlers == 0)
4373 				pci_mask_msix(child, rid - 1);
4374 		}
4375 	}
4376 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
4377 	if (rid > 0)
4378 		KASSERT(error == 0,
4379 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
4380 	return (error);
4381 }
4382 
4383 int
4384 pci_print_child(device_t dev, device_t child)
4385 {
4386 	struct pci_devinfo *dinfo;
4387 	struct resource_list *rl;
4388 	int retval = 0;
4389 
4390 	dinfo = device_get_ivars(child);
4391 	rl = &dinfo->resources;
4392 
4393 	retval += bus_print_child_header(dev, child);
4394 
4395 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#jx");
4396 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#jx");
4397 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%jd");
4398 	if (device_get_flags(dev))
4399 		retval += printf(" flags %#x", device_get_flags(dev));
4400 
4401 	retval += printf(" at device %d.%d", pci_get_slot(child),
4402 	    pci_get_function(child));
4403 
4404 	retval += bus_print_child_domain(dev, child);
4405 	retval += bus_print_child_footer(dev, child);
4406 
4407 	return (retval);
4408 }
4409 
4410 static const struct
4411 {
4412 	int		class;
4413 	int		subclass;
4414 	int		report; /* 0 = bootverbose, 1 = always */
4415 	const char	*desc;
4416 } pci_nomatch_tab[] = {
4417 	{PCIC_OLD,		-1,			1, "old"},
4418 	{PCIC_OLD,		PCIS_OLD_NONVGA,	1, "non-VGA display device"},
4419 	{PCIC_OLD,		PCIS_OLD_VGA,		1, "VGA-compatible display device"},
4420 	{PCIC_STORAGE,		-1,			1, "mass storage"},
4421 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	1, "SCSI"},
4422 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	1, "ATA"},
4423 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	1, "floppy disk"},
4424 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	1, "IPI"},
4425 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	1, "RAID"},
4426 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	1, "ATA (ADMA)"},
4427 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	1, "SATA"},
4428 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	1, "SAS"},
4429 	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	1, "NVM"},
4430 	{PCIC_NETWORK,		-1,			1, "network"},
4431 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	1, "ethernet"},
4432 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	1, "token ring"},
4433 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	1, "fddi"},
4434 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	1, "ATM"},
4435 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	1, "ISDN"},
4436 	{PCIC_DISPLAY,		-1,			1, "display"},
4437 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	1, "VGA"},
4438 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	1, "XGA"},
4439 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	1, "3D"},
4440 	{PCIC_MULTIMEDIA,	-1,			1, "multimedia"},
4441 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	1, "video"},
4442 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	1, "audio"},
4443 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	1, "telephony"},
4444 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	1, "HDA"},
4445 	{PCIC_MEMORY,		-1,			1, "memory"},
4446 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	1, "RAM"},
4447 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	1, "flash"},
4448 	{PCIC_BRIDGE,		-1,			1, "bridge"},
4449 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	1, "HOST-PCI"},
4450 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	1, "PCI-ISA"},
4451 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	1, "PCI-EISA"},
4452 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	1, "PCI-MCA"},
4453 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	1, "PCI-PCI"},
4454 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	1, "PCI-PCMCIA"},
4455 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	1, "PCI-NuBus"},
4456 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	1, "PCI-CardBus"},
4457 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	1, "PCI-RACEway"},
4458 	{PCIC_SIMPLECOMM,	-1,			1, "simple comms"},
4459 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	1, "UART"},	/* could detect 16550 */
4460 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	1, "parallel port"},
4461 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	1, "multiport serial"},
4462 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	1, "generic modem"},
4463 	{PCIC_BASEPERIPH,	-1,			0, "base peripheral"},
4464 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	1, "interrupt controller"},
4465 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	1, "DMA controller"},
4466 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	1, "timer"},
4467 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	1, "realtime clock"},
4468 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	1, "PCI hot-plug controller"},
4469 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	1, "SD host controller"},
4470 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_IOMMU,	1, "IOMMU"},
4471 	{PCIC_INPUTDEV,		-1,			1, "input device"},
4472 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	1, "keyboard"},
4473 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,1, "digitizer"},
4474 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	1, "mouse"},
4475 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	1, "scanner"},
4476 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	1, "gameport"},
4477 	{PCIC_DOCKING,		-1,			1, "docking station"},
4478 	{PCIC_PROCESSOR,	-1,			1, "processor"},
4479 	{PCIC_SERIALBUS,	-1,			1, "serial bus"},
4480 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	1, "FireWire"},
4481 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	1, "AccessBus"},
4482 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	1, "SSA"},
4483 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	1, "USB"},
4484 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	1, "Fibre Channel"},
4485 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	0, "SMBus"},
4486 	{PCIC_WIRELESS,		-1,			1, "wireless controller"},
4487 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	1, "iRDA"},
4488 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	1, "IR"},
4489 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	1, "RF"},
4490 	{PCIC_INTELLIIO,	-1,			1, "intelligent I/O controller"},
4491 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	1, "I2O"},
4492 	{PCIC_SATCOM,		-1,			1, "satellite communication"},
4493 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		1, "sat TV"},
4494 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	1, "sat audio"},
4495 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	1, "sat voice"},
4496 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	1, "sat data"},
4497 	{PCIC_CRYPTO,		-1,			1, "encrypt/decrypt"},
4498 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	1, "network/computer crypto"},
4499 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	1, "entertainment crypto"},
4500 	{PCIC_DASP,		-1,			0, "dasp"},
4501 	{PCIC_DASP,		PCIS_DASP_DPIO,		1, "DPIO module"},
4502 	{0, 0, 0,		NULL}
4503 };
4504 
4505 void
4506 pci_probe_nomatch(device_t dev, device_t child)
4507 {
4508 	int i, report;
4509 	const char *cp, *scp;
4510 	char *device;
4511 
4512 	/*
4513 	 * Look for a listing for this device in a loaded device database.
4514 	 */
4515 	report = 1;
4516 	if ((device = pci_describe_device(child)) != NULL) {
4517 		device_printf(dev, "<%s>", device);
4518 		free(device, M_DEVBUF);
4519 	} else {
4520 		/*
4521 		 * Scan the class/subclass descriptions for a general
4522 		 * description.
4523 		 */
4524 		cp = "unknown";
4525 		scp = NULL;
4526 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
4527 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
4528 				if (pci_nomatch_tab[i].subclass == -1) {
4529 					cp = pci_nomatch_tab[i].desc;
4530 					report = pci_nomatch_tab[i].report;
4531 				} else if (pci_nomatch_tab[i].subclass ==
4532 				    pci_get_subclass(child)) {
4533 					scp = pci_nomatch_tab[i].desc;
4534 					report = pci_nomatch_tab[i].report;
4535 				}
4536 			}
4537 		}
4538 		if (report || bootverbose) {
4539 			device_printf(dev, "<%s%s%s>",
4540 			    cp ? cp : "",
4541 			    ((cp != NULL) && (scp != NULL)) ? ", " : "",
4542 			    scp ? scp : "");
4543 		}
4544 	}
4545 	if (report || bootverbose) {
4546 		printf(" at device %d.%d (no driver attached)\n",
4547 		    pci_get_slot(child), pci_get_function(child));
4548 	}
4549 	pci_cfg_save(child, device_get_ivars(child), 1);
4550 }
4551 
4552 void
4553 pci_child_detached(device_t dev, device_t child)
4554 {
4555 	struct pci_devinfo *dinfo;
4556 	struct resource_list *rl;
4557 
4558 	dinfo = device_get_ivars(child);
4559 	rl = &dinfo->resources;
4560 
4561 	/*
4562 	 * Have to deallocate IRQs before releasing any MSI messages and
4563 	 * have to release MSI messages before deallocating any memory
4564 	 * BARs.
4565 	 */
4566 	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
4567 		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
4568 	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
4569 		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
4570 		(void)pci_release_msi(child);
4571 	}
4572 	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
4573 		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
4574 	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
4575 		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
4576 #ifdef PCI_RES_BUS
4577 	if (resource_list_release_active(rl, dev, child, PCI_RES_BUS) != 0)
4578 		pci_printf(&dinfo->cfg, "Device leaked PCI bus numbers\n");
4579 #endif
4580 
4581 	pci_cfg_save(child, dinfo, 1);
4582 }
4583 
4584 /*
4585  * Parse the PCI device database, if loaded, and return a pointer to a
4586  * description of the device.
4587  *
4588  * The database is flat text formatted as follows:
4589  *
4590  * Any line not in a valid format is ignored.
4591  * Lines are terminated with newline '\n' characters.
4592  *
4593  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
4594  * the vendor name.
4595  *
4596  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
4597  * - devices cannot be listed without a corresponding VENDOR line.
4598  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
4599  * another TAB, then the device name.
4600  */
4601 
4602 /*
4603  * Assuming (ptr) points to the beginning of a line in the database,
4604  * return the vendor or device and description of the next entry.
4605  * The value of (vendor) or (device) inappropriate for the entry type
4606  * is set to -1.  Returns nonzero at the end of the database.
4607  *
4608  * Note that this is slightly unrobust in the face of corrupt data;
4609  * we attempt to safeguard against this by spamming the end of the
4610  * database with a newline when we initialise.
4611  */
4612 static int
4613 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
4614 {
4615 	char	*cp = *ptr;
4616 	int	left;
4617 
4618 	*device = -1;
4619 	*vendor = -1;
4620 	**desc = '\0';
4621 	for (;;) {
4622 		left = pci_vendordata_size - (cp - pci_vendordata);
4623 		if (left <= 0) {
4624 			*ptr = cp;
4625 			return(1);
4626 		}
4627 
4628 		/* vendor entry? */
4629 		if (*cp != '\t' &&
4630 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
4631 			break;
4632 		/* device entry? */
4633 		if (*cp == '\t' &&
4634 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
4635 			break;
4636 
4637 		/* skip to next line */
4638 		while (*cp != '\n' && left > 0) {
4639 			cp++;
4640 			left--;
4641 		}
4642 		if (*cp == '\n') {
4643 			cp++;
4644 			left--;
4645 		}
4646 	}
4647 	/* skip to next line */
4648 	while (*cp != '\n' && left > 0) {
4649 		cp++;
4650 		left--;
4651 	}
4652 	if (*cp == '\n' && left > 0)
4653 		cp++;
4654 	*ptr = cp;
4655 	return(0);
4656 }
4657 
4658 static char *
4659 pci_describe_device(device_t dev)
4660 {
4661 	int	vendor, device;
4662 	char	*desc, *vp, *dp, *line;
4663 
4664 	desc = vp = dp = NULL;
4665 
4666 	/*
4667 	 * If we have no vendor data, we can't do anything.
4668 	 */
4669 	if (pci_vendordata == NULL)
4670 		goto out;
4671 
4672 	/*
4673 	 * Scan the vendor data looking for this device
4674 	 */
4675 	line = pci_vendordata;
4676 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4677 		goto out;
4678 	for (;;) {
4679 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
4680 			goto out;
4681 		if (vendor == pci_get_vendor(dev))
4682 			break;
4683 	}
4684 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4685 		goto out;
4686 	for (;;) {
4687 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
4688 			*dp = 0;
4689 			break;
4690 		}
4691 		if (vendor != -1) {
4692 			*dp = 0;
4693 			break;
4694 		}
4695 		if (device == pci_get_device(dev))
4696 			break;
4697 	}
4698 	if (dp[0] == '\0')
4699 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
4700 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
4701 	    NULL)
4702 		sprintf(desc, "%s, %s", vp, dp);
4703 out:
4704 	if (vp != NULL)
4705 		free(vp, M_DEVBUF);
4706 	if (dp != NULL)
4707 		free(dp, M_DEVBUF);
4708 	return(desc);
4709 }
4710 
4711 int
4712 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
4713 {
4714 	struct pci_devinfo *dinfo;
4715 	pcicfgregs *cfg;
4716 
4717 	dinfo = device_get_ivars(child);
4718 	cfg = &dinfo->cfg;
4719 
4720 	switch (which) {
4721 	case PCI_IVAR_ETHADDR:
4722 		/*
4723 		 * The generic accessor doesn't deal with failure, so
4724 		 * we set the return value, then return an error.
4725 		 */
4726 		*((uint8_t **) result) = NULL;
4727 		return (EINVAL);
4728 	case PCI_IVAR_SUBVENDOR:
4729 		*result = cfg->subvendor;
4730 		break;
4731 	case PCI_IVAR_SUBDEVICE:
4732 		*result = cfg->subdevice;
4733 		break;
4734 	case PCI_IVAR_VENDOR:
4735 		*result = cfg->vendor;
4736 		break;
4737 	case PCI_IVAR_DEVICE:
4738 		*result = cfg->device;
4739 		break;
4740 	case PCI_IVAR_DEVID:
4741 		*result = (cfg->device << 16) | cfg->vendor;
4742 		break;
4743 	case PCI_IVAR_CLASS:
4744 		*result = cfg->baseclass;
4745 		break;
4746 	case PCI_IVAR_SUBCLASS:
4747 		*result = cfg->subclass;
4748 		break;
4749 	case PCI_IVAR_PROGIF:
4750 		*result = cfg->progif;
4751 		break;
4752 	case PCI_IVAR_REVID:
4753 		*result = cfg->revid;
4754 		break;
4755 	case PCI_IVAR_INTPIN:
4756 		*result = cfg->intpin;
4757 		break;
4758 	case PCI_IVAR_IRQ:
4759 		*result = cfg->intline;
4760 		break;
4761 	case PCI_IVAR_DOMAIN:
4762 		*result = cfg->domain;
4763 		break;
4764 	case PCI_IVAR_BUS:
4765 		*result = cfg->bus;
4766 		break;
4767 	case PCI_IVAR_SLOT:
4768 		*result = cfg->slot;
4769 		break;
4770 	case PCI_IVAR_FUNCTION:
4771 		*result = cfg->func;
4772 		break;
4773 	case PCI_IVAR_CMDREG:
4774 		*result = cfg->cmdreg;
4775 		break;
4776 	case PCI_IVAR_CACHELNSZ:
4777 		*result = cfg->cachelnsz;
4778 		break;
4779 	case PCI_IVAR_MINGNT:
4780 		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4781 			*result = -1;
4782 			return (EINVAL);
4783 		}
4784 		*result = cfg->mingnt;
4785 		break;
4786 	case PCI_IVAR_MAXLAT:
4787 		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4788 			*result = -1;
4789 			return (EINVAL);
4790 		}
4791 		*result = cfg->maxlat;
4792 		break;
4793 	case PCI_IVAR_LATTIMER:
4794 		*result = cfg->lattimer;
4795 		break;
4796 	default:
4797 		return (ENOENT);
4798 	}
4799 	return (0);
4800 }
4801 
4802 int
4803 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4804 {
4805 	struct pci_devinfo *dinfo;
4806 
4807 	dinfo = device_get_ivars(child);
4808 
4809 	switch (which) {
4810 	case PCI_IVAR_INTPIN:
4811 		dinfo->cfg.intpin = value;
4812 		return (0);
4813 	case PCI_IVAR_ETHADDR:
4814 	case PCI_IVAR_SUBVENDOR:
4815 	case PCI_IVAR_SUBDEVICE:
4816 	case PCI_IVAR_VENDOR:
4817 	case PCI_IVAR_DEVICE:
4818 	case PCI_IVAR_DEVID:
4819 	case PCI_IVAR_CLASS:
4820 	case PCI_IVAR_SUBCLASS:
4821 	case PCI_IVAR_PROGIF:
4822 	case PCI_IVAR_REVID:
4823 	case PCI_IVAR_IRQ:
4824 	case PCI_IVAR_DOMAIN:
4825 	case PCI_IVAR_BUS:
4826 	case PCI_IVAR_SLOT:
4827 	case PCI_IVAR_FUNCTION:
4828 		return (EINVAL);	/* disallow for now */
4829 
4830 	default:
4831 		return (ENOENT);
4832 	}
4833 }
4834 
4835 #include "opt_ddb.h"
4836 #ifdef DDB
4837 #include <ddb/ddb.h>
4838 #include <sys/cons.h>
4839 
4840 /*
4841  * List resources based on pci map registers, used for within ddb
4842  */
4843 
4844 DB_SHOW_COMMAND(pciregs, db_pci_dump)
4845 {
4846 	struct pci_devinfo *dinfo;
4847 	struct devlist *devlist_head;
4848 	struct pci_conf *p;
4849 	const char *name;
4850 	int i, error, none_count;
4851 
4852 	none_count = 0;
4853 	/* get the head of the device queue */
4854 	devlist_head = &pci_devq;
4855 
4856 	/*
4857 	 * Go through the list of devices and print out devices
4858 	 */
4859 	for (error = 0, i = 0,
4860 	     dinfo = STAILQ_FIRST(devlist_head);
4861 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4862 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4863 
4864 		/* Populate pd_name and pd_unit */
4865 		name = NULL;
4866 		if (dinfo->cfg.dev)
4867 			name = device_get_name(dinfo->cfg.dev);
4868 
4869 		p = &dinfo->conf;
4870 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4871 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4872 			(name && *name) ? name : "none",
4873 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4874 			none_count++,
4875 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4876 			p->pc_sel.pc_func, (p->pc_class << 16) |
4877 			(p->pc_subclass << 8) | p->pc_progif,
4878 			(p->pc_subdevice << 16) | p->pc_subvendor,
4879 			(p->pc_device << 16) | p->pc_vendor,
4880 			p->pc_revid, p->pc_hdr);
4881 	}
4882 }
4883 #endif /* DDB */
4884 
4885 static struct resource *
4886 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4887     rman_res_t start, rman_res_t end, rman_res_t count, u_int num,
4888     u_int flags)
4889 {
4890 	struct pci_devinfo *dinfo = device_get_ivars(child);
4891 	struct resource_list *rl = &dinfo->resources;
4892 	struct resource *res;
4893 	struct pci_map *pm;
4894 	pci_addr_t map, testval;
4895 	int mapsize;
4896 
4897 	res = NULL;
4898 
4899 	/* If rid is managed by EA, ignore it */
4900 	if (pci_ea_is_enabled(child, *rid))
4901 		goto out;
4902 
4903 	pm = pci_find_bar(child, *rid);
4904 	if (pm != NULL) {
4905 		/* This is a BAR that we failed to allocate earlier. */
4906 		mapsize = pm->pm_size;
4907 		map = pm->pm_value;
4908 	} else {
4909 		/*
4910 		 * Weed out the bogons, and figure out how large the
4911 		 * BAR/map is.  BARs that read back 0 here are bogus
4912 		 * and unimplemented.  Note: atapci in legacy mode are
4913 		 * special and handled elsewhere in the code.  If you
4914 		 * have a atapci device in legacy mode and it fails
4915 		 * here, that other code is broken.
4916 		 */
4917 		pci_read_bar(child, *rid, &map, &testval, NULL);
4918 
4919 		/*
4920 		 * Determine the size of the BAR and ignore BARs with a size
4921 		 * of 0.  Device ROM BARs use a different mask value.
4922 		 */
4923 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
4924 			mapsize = pci_romsize(testval);
4925 		else
4926 			mapsize = pci_mapsize(testval);
4927 		if (mapsize == 0)
4928 			goto out;
4929 		pm = pci_add_bar(child, *rid, map, mapsize);
4930 	}
4931 
4932 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
4933 		if (type != SYS_RES_MEMORY) {
4934 			if (bootverbose)
4935 				device_printf(dev,
4936 				    "child %s requested type %d for rid %#x,"
4937 				    " but the BAR says it is an memio\n",
4938 				    device_get_nameunit(child), type, *rid);
4939 			goto out;
4940 		}
4941 	} else {
4942 		if (type != SYS_RES_IOPORT) {
4943 			if (bootverbose)
4944 				device_printf(dev,
4945 				    "child %s requested type %d for rid %#x,"
4946 				    " but the BAR says it is an ioport\n",
4947 				    device_get_nameunit(child), type, *rid);
4948 			goto out;
4949 		}
4950 	}
4951 
4952 	/*
4953 	 * For real BARs, we need to override the size that
4954 	 * the driver requests, because that's what the BAR
4955 	 * actually uses and we would otherwise have a
4956 	 * situation where we might allocate the excess to
4957 	 * another driver, which won't work.
4958 	 */
4959 	count = ((pci_addr_t)1 << mapsize) * num;
4960 	if (RF_ALIGNMENT(flags) < mapsize)
4961 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
4962 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
4963 		flags |= RF_PREFETCHABLE;
4964 
4965 	/*
4966 	 * Allocate enough resource, and then write back the
4967 	 * appropriate BAR for that resource.
4968 	 */
4969 	resource_list_add(rl, type, *rid, start, end, count);
4970 	res = resource_list_reserve(rl, dev, child, type, rid, start, end,
4971 	    count, flags & ~RF_ACTIVE);
4972 	if (res == NULL) {
4973 		resource_list_delete(rl, type, *rid);
4974 		device_printf(child,
4975 		    "%#jx bytes of rid %#x res %d failed (%#jx, %#jx).\n",
4976 		    count, *rid, type, start, end);
4977 		goto out;
4978 	}
4979 	if (bootverbose)
4980 		device_printf(child,
4981 		    "Lazy allocation of %#jx bytes rid %#x type %d at %#jx\n",
4982 		    count, *rid, type, rman_get_start(res));
4983 	map = rman_get_start(res);
4984 	pci_write_bar(child, pm, map);
4985 out:
4986 	return (res);
4987 }
4988 
4989 struct resource *
4990 pci_alloc_multi_resource(device_t dev, device_t child, int type, int *rid,
4991     rman_res_t start, rman_res_t end, rman_res_t count, u_long num,
4992     u_int flags)
4993 {
4994 	struct pci_devinfo *dinfo;
4995 	struct resource_list *rl;
4996 	struct resource_list_entry *rle;
4997 	struct resource *res;
4998 	pcicfgregs *cfg;
4999 
5000 	/*
5001 	 * Perform lazy resource allocation
5002 	 */
5003 	dinfo = device_get_ivars(child);
5004 	rl = &dinfo->resources;
5005 	cfg = &dinfo->cfg;
5006 	switch (type) {
5007 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
5008 	case PCI_RES_BUS:
5009 		return (pci_alloc_secbus(dev, child, rid, start, end, count,
5010 		    flags));
5011 #endif
5012 	case SYS_RES_IRQ:
5013 		/*
5014 		 * Can't alloc legacy interrupt once MSI messages have
5015 		 * been allocated.
5016 		 */
5017 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
5018 		    cfg->msix.msix_alloc > 0))
5019 			return (NULL);
5020 
5021 		/*
5022 		 * If the child device doesn't have an interrupt
5023 		 * routed and is deserving of an interrupt, try to
5024 		 * assign it one.
5025 		 */
5026 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
5027 		    (cfg->intpin != 0))
5028 			pci_assign_interrupt(dev, child, 0);
5029 		break;
5030 	case SYS_RES_IOPORT:
5031 	case SYS_RES_MEMORY:
5032 #ifdef NEW_PCIB
5033 		/*
5034 		 * PCI-PCI bridge I/O window resources are not BARs.
5035 		 * For those allocations just pass the request up the
5036 		 * tree.
5037 		 */
5038 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
5039 			switch (*rid) {
5040 			case PCIR_IOBASEL_1:
5041 			case PCIR_MEMBASE_1:
5042 			case PCIR_PMBASEL_1:
5043 				/*
5044 				 * XXX: Should we bother creating a resource
5045 				 * list entry?
5046 				 */
5047 				return (bus_generic_alloc_resource(dev, child,
5048 				    type, rid, start, end, count, flags));
5049 			}
5050 		}
5051 #endif
5052 		/* Reserve resources for this BAR if needed. */
5053 		rle = resource_list_find(rl, type, *rid);
5054 		if (rle == NULL) {
5055 			res = pci_reserve_map(dev, child, type, rid, start, end,
5056 			    count, num, flags);
5057 			if (res == NULL)
5058 				return (NULL);
5059 		}
5060 	}
5061 	return (resource_list_alloc(rl, dev, child, type, rid,
5062 	    start, end, count, flags));
5063 }
5064 
5065 struct resource *
5066 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
5067     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
5068 {
5069 #ifdef PCI_IOV
5070 	struct pci_devinfo *dinfo;
5071 #endif
5072 
5073 	if (device_get_parent(child) != dev)
5074 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
5075 		    type, rid, start, end, count, flags));
5076 
5077 #ifdef PCI_IOV
5078 	dinfo = device_get_ivars(child);
5079 	if (dinfo->cfg.flags & PCICFG_VF) {
5080 		switch (type) {
5081 		/* VFs can't have I/O BARs. */
5082 		case SYS_RES_IOPORT:
5083 			return (NULL);
5084 		case SYS_RES_MEMORY:
5085 			return (pci_vf_alloc_mem_resource(dev, child, rid,
5086 			    start, end, count, flags));
5087 		}
5088 
5089 		/* Fall through for other types of resource allocations. */
5090 	}
5091 #endif
5092 
5093 	return (pci_alloc_multi_resource(dev, child, type, rid, start, end,
5094 	    count, 1, flags));
5095 }
5096 
5097 int
5098 pci_release_resource(device_t dev, device_t child, int type, int rid,
5099     struct resource *r)
5100 {
5101 	struct pci_devinfo *dinfo;
5102 	struct resource_list *rl;
5103 	pcicfgregs *cfg;
5104 
5105 	if (device_get_parent(child) != dev)
5106 		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
5107 		    type, rid, r));
5108 
5109 	dinfo = device_get_ivars(child);
5110 	cfg = &dinfo->cfg;
5111 
5112 #ifdef PCI_IOV
5113 	if (dinfo->cfg.flags & PCICFG_VF) {
5114 		switch (type) {
5115 		/* VFs can't have I/O BARs. */
5116 		case SYS_RES_IOPORT:
5117 			return (EDOOFUS);
5118 		case SYS_RES_MEMORY:
5119 			return (pci_vf_release_mem_resource(dev, child, rid,
5120 			    r));
5121 		}
5122 
5123 		/* Fall through for other types of resource allocations. */
5124 	}
5125 #endif
5126 
5127 #ifdef NEW_PCIB
5128 	/*
5129 	 * PCI-PCI bridge I/O window resources are not BARs.  For
5130 	 * those allocations just pass the request up the tree.
5131 	 */
5132 	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
5133 	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
5134 		switch (rid) {
5135 		case PCIR_IOBASEL_1:
5136 		case PCIR_MEMBASE_1:
5137 		case PCIR_PMBASEL_1:
5138 			return (bus_generic_release_resource(dev, child, type,
5139 			    rid, r));
5140 		}
5141 	}
5142 #endif
5143 
5144 	rl = &dinfo->resources;
5145 	return (resource_list_release(rl, dev, child, type, rid, r));
5146 }
5147 
5148 int
5149 pci_activate_resource(device_t dev, device_t child, int type, int rid,
5150     struct resource *r)
5151 {
5152 	struct pci_devinfo *dinfo;
5153 	int error;
5154 
5155 	error = bus_generic_activate_resource(dev, child, type, rid, r);
5156 	if (error)
5157 		return (error);
5158 
5159 	/* Enable decoding in the command register when activating BARs. */
5160 	if (device_get_parent(child) == dev) {
5161 		/* Device ROMs need their decoding explicitly enabled. */
5162 		dinfo = device_get_ivars(child);
5163 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
5164 			pci_write_bar(child, pci_find_bar(child, rid),
5165 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
5166 		switch (type) {
5167 		case SYS_RES_IOPORT:
5168 		case SYS_RES_MEMORY:
5169 			error = PCI_ENABLE_IO(dev, child, type);
5170 			break;
5171 		}
5172 	}
5173 	return (error);
5174 }
5175 
5176 int
5177 pci_deactivate_resource(device_t dev, device_t child, int type,
5178     int rid, struct resource *r)
5179 {
5180 	struct pci_devinfo *dinfo;
5181 	int error;
5182 
5183 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
5184 	if (error)
5185 		return (error);
5186 
5187 	/* Disable decoding for device ROMs. */
5188 	if (device_get_parent(child) == dev) {
5189 		dinfo = device_get_ivars(child);
5190 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
5191 			pci_write_bar(child, pci_find_bar(child, rid),
5192 			    rman_get_start(r));
5193 	}
5194 	return (0);
5195 }
5196 
5197 void
5198 pci_child_deleted(device_t dev, device_t child)
5199 {
5200 	struct resource_list_entry *rle;
5201 	struct resource_list *rl;
5202 	struct pci_devinfo *dinfo;
5203 
5204 	dinfo = device_get_ivars(child);
5205 	rl = &dinfo->resources;
5206 
5207 	/* Turn off access to resources we're about to free */
5208 	if (bus_child_present(child) != 0) {
5209 		pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
5210 		    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
5211 
5212 		pci_disable_busmaster(child);
5213 	}
5214 
5215 	/* Free all allocated resources */
5216 	STAILQ_FOREACH(rle, rl, link) {
5217 		if (rle->res) {
5218 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
5219 			    resource_list_busy(rl, rle->type, rle->rid)) {
5220 				pci_printf(&dinfo->cfg,
5221 				    "Resource still owned, oops. "
5222 				    "(type=%d, rid=%d, addr=%lx)\n",
5223 				    rle->type, rle->rid,
5224 				    rman_get_start(rle->res));
5225 				bus_release_resource(child, rle->type, rle->rid,
5226 				    rle->res);
5227 			}
5228 			resource_list_unreserve(rl, dev, child, rle->type,
5229 			    rle->rid);
5230 		}
5231 	}
5232 	resource_list_free(rl);
5233 
5234 	pci_freecfg(dinfo);
5235 }
5236 
5237 void
5238 pci_delete_resource(device_t dev, device_t child, int type, int rid)
5239 {
5240 	struct pci_devinfo *dinfo;
5241 	struct resource_list *rl;
5242 	struct resource_list_entry *rle;
5243 
5244 	if (device_get_parent(child) != dev)
5245 		return;
5246 
5247 	dinfo = device_get_ivars(child);
5248 	rl = &dinfo->resources;
5249 	rle = resource_list_find(rl, type, rid);
5250 	if (rle == NULL)
5251 		return;
5252 
5253 	if (rle->res) {
5254 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
5255 		    resource_list_busy(rl, type, rid)) {
5256 			device_printf(dev, "delete_resource: "
5257 			    "Resource still owned by child, oops. "
5258 			    "(type=%d, rid=%d, addr=%jx)\n",
5259 			    type, rid, rman_get_start(rle->res));
5260 			return;
5261 		}
5262 		resource_list_unreserve(rl, dev, child, type, rid);
5263 	}
5264 	resource_list_delete(rl, type, rid);
5265 }
5266 
5267 struct resource_list *
5268 pci_get_resource_list (device_t dev, device_t child)
5269 {
5270 	struct pci_devinfo *dinfo = device_get_ivars(child);
5271 
5272 	return (&dinfo->resources);
5273 }
5274 
5275 bus_dma_tag_t
5276 pci_get_dma_tag(device_t bus, device_t dev)
5277 {
5278 	struct pci_softc *sc = device_get_softc(bus);
5279 
5280 	return (sc->sc_dma_tag);
5281 }
5282 
5283 uint32_t
5284 pci_read_config_method(device_t dev, device_t child, int reg, int width)
5285 {
5286 	struct pci_devinfo *dinfo = device_get_ivars(child);
5287 	pcicfgregs *cfg = &dinfo->cfg;
5288 
5289 #ifdef PCI_IOV
5290 	/*
5291 	 * SR-IOV VFs don't implement the VID or DID registers, so we have to
5292 	 * emulate them here.
5293 	 */
5294 	if (cfg->flags & PCICFG_VF) {
5295 		if (reg == PCIR_VENDOR) {
5296 			switch (width) {
5297 			case 4:
5298 				return (cfg->device << 16 | cfg->vendor);
5299 			case 2:
5300 				return (cfg->vendor);
5301 			case 1:
5302 				return (cfg->vendor & 0xff);
5303 			default:
5304 				return (0xffffffff);
5305 			}
5306 		} else if (reg == PCIR_DEVICE) {
5307 			switch (width) {
5308 			/* Note that an unaligned 4-byte read is an error. */
5309 			case 2:
5310 				return (cfg->device);
5311 			case 1:
5312 				return (cfg->device & 0xff);
5313 			default:
5314 				return (0xffffffff);
5315 			}
5316 		}
5317 	}
5318 #endif
5319 
5320 	return (PCIB_READ_CONFIG(device_get_parent(dev),
5321 	    cfg->bus, cfg->slot, cfg->func, reg, width));
5322 }
5323 
5324 void
5325 pci_write_config_method(device_t dev, device_t child, int reg,
5326     uint32_t val, int width)
5327 {
5328 	struct pci_devinfo *dinfo = device_get_ivars(child);
5329 	pcicfgregs *cfg = &dinfo->cfg;
5330 
5331 	PCIB_WRITE_CONFIG(device_get_parent(dev),
5332 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
5333 }
5334 
5335 int
5336 pci_child_location_str_method(device_t dev, device_t child, char *buf,
5337     size_t buflen)
5338 {
5339 
5340 	snprintf(buf, buflen, "pci%d:%d:%d:%d", pci_get_domain(child),
5341 	    pci_get_bus(child), pci_get_slot(child), pci_get_function(child));
5342 	return (0);
5343 }
5344 
5345 int
5346 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
5347     size_t buflen)
5348 {
5349 	struct pci_devinfo *dinfo;
5350 	pcicfgregs *cfg;
5351 
5352 	dinfo = device_get_ivars(child);
5353 	cfg = &dinfo->cfg;
5354 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
5355 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
5356 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
5357 	    cfg->progif);
5358 	return (0);
5359 }
5360 
5361 int
5362 pci_assign_interrupt_method(device_t dev, device_t child)
5363 {
5364 	struct pci_devinfo *dinfo = device_get_ivars(child);
5365 	pcicfgregs *cfg = &dinfo->cfg;
5366 
5367 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
5368 	    cfg->intpin));
5369 }
5370 
5371 static void
5372 pci_lookup(void *arg, const char *name, device_t *dev)
5373 {
5374 	long val;
5375 	char *end;
5376 	int domain, bus, slot, func;
5377 
5378 	if (*dev != NULL)
5379 		return;
5380 
5381 	/*
5382 	 * Accept pciconf-style selectors of either pciD:B:S:F or
5383 	 * pciB:S:F.  In the latter case, the domain is assumed to
5384 	 * be zero.
5385 	 */
5386 	if (strncmp(name, "pci", 3) != 0)
5387 		return;
5388 	val = strtol(name + 3, &end, 10);
5389 	if (val < 0 || val > INT_MAX || *end != ':')
5390 		return;
5391 	domain = val;
5392 	val = strtol(end + 1, &end, 10);
5393 	if (val < 0 || val > INT_MAX || *end != ':')
5394 		return;
5395 	bus = val;
5396 	val = strtol(end + 1, &end, 10);
5397 	if (val < 0 || val > INT_MAX)
5398 		return;
5399 	slot = val;
5400 	if (*end == ':') {
5401 		val = strtol(end + 1, &end, 10);
5402 		if (val < 0 || val > INT_MAX || *end != '\0')
5403 			return;
5404 		func = val;
5405 	} else if (*end == '\0') {
5406 		func = slot;
5407 		slot = bus;
5408 		bus = domain;
5409 		domain = 0;
5410 	} else
5411 		return;
5412 
5413 	if (domain > PCI_DOMAINMAX || bus > PCI_BUSMAX || slot > PCI_SLOTMAX ||
5414 	    func > PCIE_ARI_FUNCMAX || (slot != 0 && func > PCI_FUNCMAX))
5415 		return;
5416 
5417 	*dev = pci_find_dbsf(domain, bus, slot, func);
5418 }
5419 
5420 static int
5421 pci_modevent(module_t mod, int what, void *arg)
5422 {
5423 	static struct cdev *pci_cdev;
5424 	static eventhandler_tag tag;
5425 
5426 	switch (what) {
5427 	case MOD_LOAD:
5428 		STAILQ_INIT(&pci_devq);
5429 		pci_generation = 0;
5430 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
5431 		    "pci");
5432 		pci_load_vendor_data();
5433 		tag = EVENTHANDLER_REGISTER(dev_lookup, pci_lookup, NULL,
5434 		    1000);
5435 		break;
5436 
5437 	case MOD_UNLOAD:
5438 		if (tag != NULL)
5439 			EVENTHANDLER_DEREGISTER(dev_lookup, tag);
5440 		destroy_dev(pci_cdev);
5441 		break;
5442 	}
5443 
5444 	return (0);
5445 }
5446 
5447 static void
5448 pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
5449 {
5450 #define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
5451 	struct pcicfg_pcie *cfg;
5452 	int version, pos;
5453 
5454 	cfg = &dinfo->cfg.pcie;
5455 	pos = cfg->pcie_location;
5456 
5457 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5458 
5459 	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
5460 
5461 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5462 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5463 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5464 		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
5465 
5466 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5467 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5468 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5469 		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
5470 
5471 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5472 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5473 		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
5474 
5475 	if (version > 1) {
5476 		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
5477 		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
5478 		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
5479 	}
5480 #undef WREG
5481 }
5482 
5483 static void
5484 pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
5485 {
5486 	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
5487 	    dinfo->cfg.pcix.pcix_command,  2);
5488 }
5489 
5490 void
5491 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
5492 {
5493 
5494 	/*
5495 	 * Restore the device to full power mode.  We must do this
5496 	 * before we restore the registers because moving from D3 to
5497 	 * D0 will cause the chip's BARs and some other registers to
5498 	 * be reset to some unknown power on reset values.  Cut down
5499 	 * the noise on boot by doing nothing if we are already in
5500 	 * state D0.
5501 	 */
5502 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
5503 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5504 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
5505 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
5506 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
5507 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
5508 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
5509 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
5510 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
5511 	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5512 	case PCIM_HDRTYPE_NORMAL:
5513 		pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
5514 		pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
5515 		break;
5516 	case PCIM_HDRTYPE_BRIDGE:
5517 		pci_write_config(dev, PCIR_SECLAT_1,
5518 		    dinfo->cfg.bridge.br_seclat, 1);
5519 		pci_write_config(dev, PCIR_SUBBUS_1,
5520 		    dinfo->cfg.bridge.br_subbus, 1);
5521 		pci_write_config(dev, PCIR_SECBUS_1,
5522 		    dinfo->cfg.bridge.br_secbus, 1);
5523 		pci_write_config(dev, PCIR_PRIBUS_1,
5524 		    dinfo->cfg.bridge.br_pribus, 1);
5525 		pci_write_config(dev, PCIR_BRIDGECTL_1,
5526 		    dinfo->cfg.bridge.br_control, 2);
5527 		break;
5528 	case PCIM_HDRTYPE_CARDBUS:
5529 		pci_write_config(dev, PCIR_SECLAT_2,
5530 		    dinfo->cfg.bridge.br_seclat, 1);
5531 		pci_write_config(dev, PCIR_SUBBUS_2,
5532 		    dinfo->cfg.bridge.br_subbus, 1);
5533 		pci_write_config(dev, PCIR_SECBUS_2,
5534 		    dinfo->cfg.bridge.br_secbus, 1);
5535 		pci_write_config(dev, PCIR_PRIBUS_2,
5536 		    dinfo->cfg.bridge.br_pribus, 1);
5537 		pci_write_config(dev, PCIR_BRIDGECTL_2,
5538 		    dinfo->cfg.bridge.br_control, 2);
5539 		break;
5540 	}
5541 	pci_restore_bars(dev);
5542 
5543 	/*
5544 	 * Restore extended capabilities for PCI-Express and PCI-X
5545 	 */
5546 	if (dinfo->cfg.pcie.pcie_location != 0)
5547 		pci_cfg_restore_pcie(dev, dinfo);
5548 	if (dinfo->cfg.pcix.pcix_location != 0)
5549 		pci_cfg_restore_pcix(dev, dinfo);
5550 
5551 	/* Restore MSI and MSI-X configurations if they are present. */
5552 	if (dinfo->cfg.msi.msi_location != 0)
5553 		pci_resume_msi(dev);
5554 	if (dinfo->cfg.msix.msix_location != 0)
5555 		pci_resume_msix(dev);
5556 }
5557 
5558 static void
5559 pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
5560 {
5561 #define	RREG(n)	pci_read_config(dev, pos + (n), 2)
5562 	struct pcicfg_pcie *cfg;
5563 	int version, pos;
5564 
5565 	cfg = &dinfo->cfg.pcie;
5566 	pos = cfg->pcie_location;
5567 
5568 	cfg->pcie_flags = RREG(PCIER_FLAGS);
5569 
5570 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5571 
5572 	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
5573 
5574 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5575 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5576 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5577 		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
5578 
5579 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5580 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5581 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5582 		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
5583 
5584 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5585 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5586 		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
5587 
5588 	if (version > 1) {
5589 		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
5590 		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
5591 		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
5592 	}
5593 #undef RREG
5594 }
5595 
5596 static void
5597 pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
5598 {
5599 	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
5600 	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
5601 }
5602 
5603 void
5604 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
5605 {
5606 	uint32_t cls;
5607 	int ps;
5608 
5609 	/*
5610 	 * Some drivers apparently write to these registers w/o updating our
5611 	 * cached copy.  No harm happens if we update the copy, so do so here
5612 	 * so we can restore them.  The COMMAND register is modified by the
5613 	 * bus w/o updating the cache.  This should represent the normally
5614 	 * writable portion of the 'defined' part of type 0/1/2 headers.
5615 	 */
5616 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
5617 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
5618 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
5619 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
5620 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
5621 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
5622 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
5623 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
5624 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
5625 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
5626 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
5627 	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5628 	case PCIM_HDRTYPE_NORMAL:
5629 		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
5630 		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
5631 		dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
5632 		dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
5633 		break;
5634 	case PCIM_HDRTYPE_BRIDGE:
5635 		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
5636 		    PCIR_SECLAT_1, 1);
5637 		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
5638 		    PCIR_SUBBUS_1, 1);
5639 		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
5640 		    PCIR_SECBUS_1, 1);
5641 		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
5642 		    PCIR_PRIBUS_1, 1);
5643 		dinfo->cfg.bridge.br_control = pci_read_config(dev,
5644 		    PCIR_BRIDGECTL_1, 2);
5645 		break;
5646 	case PCIM_HDRTYPE_CARDBUS:
5647 		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
5648 		    PCIR_SECLAT_2, 1);
5649 		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
5650 		    PCIR_SUBBUS_2, 1);
5651 		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
5652 		    PCIR_SECBUS_2, 1);
5653 		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
5654 		    PCIR_PRIBUS_2, 1);
5655 		dinfo->cfg.bridge.br_control = pci_read_config(dev,
5656 		    PCIR_BRIDGECTL_2, 2);
5657 		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_2, 2);
5658 		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_2, 2);
5659 		break;
5660 	}
5661 
5662 	if (dinfo->cfg.pcie.pcie_location != 0)
5663 		pci_cfg_save_pcie(dev, dinfo);
5664 
5665 	if (dinfo->cfg.pcix.pcix_location != 0)
5666 		pci_cfg_save_pcix(dev, dinfo);
5667 
5668 	/*
5669 	 * don't set the state for display devices, base peripherals and
5670 	 * memory devices since bad things happen when they are powered down.
5671 	 * We should (a) have drivers that can easily detach and (b) use
5672 	 * generic drivers for these devices so that some device actually
5673 	 * attaches.  We need to make sure that when we implement (a) we don't
5674 	 * power the device down on a reattach.
5675 	 */
5676 	cls = pci_get_class(dev);
5677 	if (!setstate)
5678 		return;
5679 	switch (pci_do_power_nodriver)
5680 	{
5681 		case 0:		/* NO powerdown at all */
5682 			return;
5683 		case 1:		/* Conservative about what to power down */
5684 			if (cls == PCIC_STORAGE)
5685 				return;
5686 			/*FALLTHROUGH*/
5687 		case 2:		/* Agressive about what to power down */
5688 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
5689 			    cls == PCIC_BASEPERIPH)
5690 				return;
5691 			/*FALLTHROUGH*/
5692 		case 3:		/* Power down everything */
5693 			break;
5694 	}
5695 	/*
5696 	 * PCI spec says we can only go into D3 state from D0 state.
5697 	 * Transition from D[12] into D0 before going to D3 state.
5698 	 */
5699 	ps = pci_get_powerstate(dev);
5700 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
5701 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5702 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
5703 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
5704 }
5705 
5706 /* Wrapper APIs suitable for device driver use. */
5707 void
5708 pci_save_state(device_t dev)
5709 {
5710 	struct pci_devinfo *dinfo;
5711 
5712 	dinfo = device_get_ivars(dev);
5713 	pci_cfg_save(dev, dinfo, 0);
5714 }
5715 
5716 void
5717 pci_restore_state(device_t dev)
5718 {
5719 	struct pci_devinfo *dinfo;
5720 
5721 	dinfo = device_get_ivars(dev);
5722 	pci_cfg_restore(dev, dinfo);
5723 }
5724 
5725 static uint16_t
5726 pci_get_rid_method(device_t dev, device_t child)
5727 {
5728 
5729 	return (PCIB_GET_RID(device_get_parent(dev), child));
5730 }
5731 
5732 /* Find the upstream port of a given PCI device in a root complex. */
5733 device_t
5734 pci_find_pcie_root_port(device_t dev)
5735 {
5736 	struct pci_devinfo *dinfo;
5737 	devclass_t pci_class;
5738 	device_t pcib, bus;
5739 
5740 	pci_class = devclass_find("pci");
5741 	KASSERT(device_get_devclass(device_get_parent(dev)) == pci_class,
5742 	    ("%s: non-pci device %s", __func__, device_get_nameunit(dev)));
5743 
5744 	/*
5745 	 * Walk the bridge hierarchy until we find a PCI-e root
5746 	 * port or a non-PCI device.
5747 	 */
5748 	for (;;) {
5749 		bus = device_get_parent(dev);
5750 		KASSERT(bus != NULL, ("%s: null parent of %s", __func__,
5751 		    device_get_nameunit(dev)));
5752 
5753 		pcib = device_get_parent(bus);
5754 		KASSERT(pcib != NULL, ("%s: null bridge of %s", __func__,
5755 		    device_get_nameunit(bus)));
5756 
5757 		/*
5758 		 * pcib's parent must be a PCI bus for this to be a
5759 		 * PCI-PCI bridge.
5760 		 */
5761 		if (device_get_devclass(device_get_parent(pcib)) != pci_class)
5762 			return (NULL);
5763 
5764 		dinfo = device_get_ivars(pcib);
5765 		if (dinfo->cfg.pcie.pcie_location != 0 &&
5766 		    dinfo->cfg.pcie.pcie_type == PCIEM_TYPE_ROOT_PORT)
5767 			return (pcib);
5768 
5769 		dev = pcib;
5770 	}
5771 }
5772