xref: /freebsd/sys/dev/pci/pci.c (revision 88eb5c506d00e446dcfeb0f84b36d5132a8d9f6b)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/limits.h>
39 #include <sys/linker.h>
40 #include <sys/fcntl.h>
41 #include <sys/conf.h>
42 #include <sys/kernel.h>
43 #include <sys/queue.h>
44 #include <sys/sysctl.h>
45 #include <sys/endian.h>
46 
47 #include <vm/vm.h>
48 #include <vm/pmap.h>
49 #include <vm/vm_extern.h>
50 
51 #include <sys/bus.h>
52 #include <machine/bus.h>
53 #include <sys/rman.h>
54 #include <machine/resource.h>
55 #include <machine/stdarg.h>
56 
57 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
58 #include <machine/intr_machdep.h>
59 #endif
60 
61 #include <sys/pciio.h>
62 #include <dev/pci/pcireg.h>
63 #include <dev/pci/pcivar.h>
64 #include <dev/pci/pci_private.h>
65 
66 #ifdef PCI_IOV
67 #include <sys/nv.h>
68 #include <dev/pci/pci_iov_private.h>
69 #endif
70 
71 #include <dev/usb/controller/xhcireg.h>
72 #include <dev/usb/controller/ehcireg.h>
73 #include <dev/usb/controller/ohcireg.h>
74 #include <dev/usb/controller/uhcireg.h>
75 
76 #include "pcib_if.h"
77 #include "pci_if.h"
78 
79 #define	PCIR_IS_BIOS(cfg, reg)						\
80 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
81 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
82 
83 static int		pci_has_quirk(uint32_t devid, int quirk);
84 static pci_addr_t	pci_mapbase(uint64_t mapreg);
85 static const char	*pci_maptype(uint64_t mapreg);
86 static int		pci_maprange(uint64_t mapreg);
87 static pci_addr_t	pci_rombase(uint64_t mapreg);
88 static int		pci_romsize(uint64_t testval);
89 static void		pci_fixancient(pcicfgregs *cfg);
90 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
91 
92 static int		pci_porten(device_t dev);
93 static int		pci_memen(device_t dev);
94 static void		pci_assign_interrupt(device_t bus, device_t dev,
95 			    int force_route);
96 static int		pci_add_map(device_t bus, device_t dev, int reg,
97 			    struct resource_list *rl, int force, int prefetch);
98 static int		pci_probe(device_t dev);
99 static int		pci_attach(device_t dev);
100 #ifdef PCI_RES_BUS
101 static int		pci_detach(device_t dev);
102 #endif
103 static void		pci_load_vendor_data(void);
104 static int		pci_describe_parse_line(char **ptr, int *vendor,
105 			    int *device, char **desc);
106 static char		*pci_describe_device(device_t dev);
107 static int		pci_modevent(module_t mod, int what, void *arg);
108 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
109 			    pcicfgregs *cfg);
110 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
111 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
112 			    int reg, uint32_t *data);
113 #if 0
114 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
115 			    int reg, uint32_t data);
116 #endif
117 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
118 static void		pci_mask_msix(device_t dev, u_int index);
119 static void		pci_unmask_msix(device_t dev, u_int index);
120 static int		pci_msi_blacklisted(void);
121 static int		pci_msix_blacklisted(void);
122 static void		pci_resume_msi(device_t dev);
123 static void		pci_resume_msix(device_t dev);
124 static int		pci_remap_intr_method(device_t bus, device_t dev,
125 			    u_int irq);
126 
127 static uint16_t		pci_get_rid_method(device_t dev, device_t child);
128 
129 static struct pci_devinfo * pci_fill_devinfo(device_t pcib, device_t bus, int d,
130     int b, int s, int f, uint16_t vid, uint16_t did);
131 
132 static device_method_t pci_methods[] = {
133 	/* Device interface */
134 	DEVMETHOD(device_probe,		pci_probe),
135 	DEVMETHOD(device_attach,	pci_attach),
136 #ifdef PCI_RES_BUS
137 	DEVMETHOD(device_detach,	pci_detach),
138 #else
139 	DEVMETHOD(device_detach,	bus_generic_detach),
140 #endif
141 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
142 	DEVMETHOD(device_suspend,	bus_generic_suspend),
143 	DEVMETHOD(device_resume,	pci_resume),
144 
145 	/* Bus interface */
146 	DEVMETHOD(bus_print_child,	pci_print_child),
147 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
148 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
149 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
150 	DEVMETHOD(bus_driver_added,	pci_driver_added),
151 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
152 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
153 
154 	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
155 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
156 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
157 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
158 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
159 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
160 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
161 	DEVMETHOD(bus_release_resource,	pci_release_resource),
162 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
163 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
164 	DEVMETHOD(bus_child_deleted,	pci_child_deleted),
165 	DEVMETHOD(bus_child_detached,	pci_child_detached),
166 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
167 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
168 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
169 	DEVMETHOD(bus_suspend_child,	pci_suspend_child),
170 	DEVMETHOD(bus_resume_child,	pci_resume_child),
171 	DEVMETHOD(bus_rescan,		pci_rescan_method),
172 
173 	/* PCI interface */
174 	DEVMETHOD(pci_read_config,	pci_read_config_method),
175 	DEVMETHOD(pci_write_config,	pci_write_config_method),
176 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
177 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
178 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
179 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
180 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
181 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
182 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
183 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
184 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
185 	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
186 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
187 	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
188 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
189 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
190 	DEVMETHOD(pci_enable_msi,	pci_enable_msi_method),
191 	DEVMETHOD(pci_enable_msix,	pci_enable_msix_method),
192 	DEVMETHOD(pci_disable_msi,	pci_disable_msi_method),
193 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
194 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
195 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
196 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
197 	DEVMETHOD(pci_msix_pba_bar,	pci_msix_pba_bar_method),
198 	DEVMETHOD(pci_msix_table_bar,	pci_msix_table_bar_method),
199 	DEVMETHOD(pci_get_rid,		pci_get_rid_method),
200 	DEVMETHOD(pci_alloc_devinfo,	pci_alloc_devinfo_method),
201 	DEVMETHOD(pci_child_added,	pci_child_added_method),
202 #ifdef PCI_IOV
203 	DEVMETHOD(pci_iov_attach,	pci_iov_attach_method),
204 	DEVMETHOD(pci_iov_detach,	pci_iov_detach_method),
205 	DEVMETHOD(pci_create_iov_child,	pci_create_iov_child_method),
206 #endif
207 
208 	DEVMETHOD_END
209 };
210 
211 DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
212 
213 static devclass_t pci_devclass;
214 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
215 MODULE_VERSION(pci, 1);
216 
217 static char	*pci_vendordata;
218 static size_t	pci_vendordata_size;
219 
220 struct pci_quirk {
221 	uint32_t devid;	/* Vendor/device of the card */
222 	int	type;
223 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
224 #define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
225 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
226 #define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
227 #define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
228 #define	PCI_QUIRK_MSI_INTX_BUG	6 /* PCIM_CMD_INTxDIS disables MSI */
229 	int	arg1;
230 	int	arg2;
231 };
232 
233 static const struct pci_quirk pci_quirks[] = {
234 	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
235 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
236 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
237 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
238 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
239 
240 	/*
241 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
242 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
243 	 */
244 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
245 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
246 
247 	/*
248 	 * MSI doesn't work on earlier Intel chipsets including
249 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
250 	 */
251 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
252 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
253 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
254 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
255 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
256 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
257 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
258 
259 	/*
260 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
261 	 * bridge.
262 	 */
263 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
264 
265 	/*
266 	 * MSI-X allocation doesn't work properly for devices passed through
267 	 * by VMware up to at least ESXi 5.1.
268 	 */
269 	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
270 	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
271 
272 	/*
273 	 * Some virtualization environments emulate an older chipset
274 	 * but support MSI just fine.  QEMU uses the Intel 82440.
275 	 */
276 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
277 
278 	/*
279 	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
280 	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
281 	 * It prevents us from attaching hpet(4) when the bit is unset.
282 	 * Note this quirk only affects SB600 revision A13 and earlier.
283 	 * For SB600 A21 and later, firmware must set the bit to hide it.
284 	 * For SB700 and later, it is unused and hardcoded to zero.
285 	 */
286 	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
287 
288 	/*
289 	 * Atheros AR8161/AR8162/E2200 Ethernet controllers have a bug that
290 	 * MSI interrupt does not assert if PCIM_CMD_INTxDIS bit of the
291 	 * command register is set.
292 	 */
293 	{ 0x10911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
294 	{ 0xE0911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
295 	{ 0x10901969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
296 
297 	/*
298 	 * Broadcom BCM5714(S)/BCM5715(S)/BCM5780(S) Ethernet MACs don't
299 	 * issue MSI interrupts with PCIM_CMD_INTxDIS set either.
300 	 */
301 	{ 0x166814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714 */
302 	{ 0x166914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714S */
303 	{ 0x166a14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780 */
304 	{ 0x166b14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780S */
305 	{ 0x167814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715 */
306 	{ 0x167914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715S */
307 
308 	{ 0 }
309 };
310 
311 /* map register information */
312 #define	PCI_MAPMEM	0x01	/* memory map */
313 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
314 #define	PCI_MAPPORT	0x04	/* port map */
315 
316 struct devlist pci_devq;
317 uint32_t pci_generation;
318 uint32_t pci_numdevs = 0;
319 static int pcie_chipset, pcix_chipset;
320 
321 /* sysctl vars */
322 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
323 
324 static int pci_enable_io_modes = 1;
325 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RWTUN,
326     &pci_enable_io_modes, 1,
327     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
328 enable these bits correctly.  We'd like to do this all the time, but there\n\
329 are some peripherals that this causes problems with.");
330 
331 static int pci_do_realloc_bars = 0;
332 SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RWTUN,
333     &pci_do_realloc_bars, 0,
334     "Attempt to allocate a new range for any BARs whose original "
335     "firmware-assigned ranges fail to allocate during the initial device scan.");
336 
337 static int pci_do_power_nodriver = 0;
338 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RWTUN,
339     &pci_do_power_nodriver, 0,
340   "Place a function into D3 state when no driver attaches to it.  0 means\n\
341 disable.  1 means conservatively place devices into D3 state.  2 means\n\
342 agressively place devices into D3 state.  3 means put absolutely everything\n\
343 in D3 state.");
344 
345 int pci_do_power_resume = 1;
346 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RWTUN,
347     &pci_do_power_resume, 1,
348   "Transition from D3 -> D0 on resume.");
349 
350 int pci_do_power_suspend = 1;
351 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RWTUN,
352     &pci_do_power_suspend, 1,
353   "Transition from D0 -> D3 on suspend.");
354 
355 static int pci_do_msi = 1;
356 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RWTUN, &pci_do_msi, 1,
357     "Enable support for MSI interrupts");
358 
359 static int pci_do_msix = 1;
360 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RWTUN, &pci_do_msix, 1,
361     "Enable support for MSI-X interrupts");
362 
363 static int pci_honor_msi_blacklist = 1;
364 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RDTUN,
365     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
366 
367 #if defined(__i386__) || defined(__amd64__)
368 static int pci_usb_takeover = 1;
369 #else
370 static int pci_usb_takeover = 0;
371 #endif
372 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
373     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
374 Disable this if you depend on BIOS emulation of USB devices, that is\n\
375 you use USB devices (like keyboard or mouse) but do not load USB drivers");
376 
377 static int pci_clear_bars;
378 SYSCTL_INT(_hw_pci, OID_AUTO, clear_bars, CTLFLAG_RDTUN, &pci_clear_bars, 0,
379     "Ignore firmware-assigned resources for BARs.");
380 
381 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
382 static int pci_clear_buses;
383 SYSCTL_INT(_hw_pci, OID_AUTO, clear_buses, CTLFLAG_RDTUN, &pci_clear_buses, 0,
384     "Ignore firmware-assigned bus numbers.");
385 #endif
386 
387 static int pci_enable_ari = 1;
388 SYSCTL_INT(_hw_pci, OID_AUTO, enable_ari, CTLFLAG_RDTUN, &pci_enable_ari,
389     0, "Enable support for PCIe Alternative RID Interpretation");
390 
391 static int
392 pci_has_quirk(uint32_t devid, int quirk)
393 {
394 	const struct pci_quirk *q;
395 
396 	for (q = &pci_quirks[0]; q->devid; q++) {
397 		if (q->devid == devid && q->type == quirk)
398 			return (1);
399 	}
400 	return (0);
401 }
402 
403 /* Find a device_t by bus/slot/function in domain 0 */
404 
405 device_t
406 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
407 {
408 
409 	return (pci_find_dbsf(0, bus, slot, func));
410 }
411 
412 /* Find a device_t by domain/bus/slot/function */
413 
414 device_t
415 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
416 {
417 	struct pci_devinfo *dinfo;
418 
419 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
420 		if ((dinfo->cfg.domain == domain) &&
421 		    (dinfo->cfg.bus == bus) &&
422 		    (dinfo->cfg.slot == slot) &&
423 		    (dinfo->cfg.func == func)) {
424 			return (dinfo->cfg.dev);
425 		}
426 	}
427 
428 	return (NULL);
429 }
430 
431 /* Find a device_t by vendor/device ID */
432 
433 device_t
434 pci_find_device(uint16_t vendor, uint16_t device)
435 {
436 	struct pci_devinfo *dinfo;
437 
438 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
439 		if ((dinfo->cfg.vendor == vendor) &&
440 		    (dinfo->cfg.device == device)) {
441 			return (dinfo->cfg.dev);
442 		}
443 	}
444 
445 	return (NULL);
446 }
447 
448 device_t
449 pci_find_class(uint8_t class, uint8_t subclass)
450 {
451 	struct pci_devinfo *dinfo;
452 
453 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
454 		if (dinfo->cfg.baseclass == class &&
455 		    dinfo->cfg.subclass == subclass) {
456 			return (dinfo->cfg.dev);
457 		}
458 	}
459 
460 	return (NULL);
461 }
462 
463 static int
464 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
465 {
466 	va_list ap;
467 	int retval;
468 
469 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
470 	    cfg->func);
471 	va_start(ap, fmt);
472 	retval += vprintf(fmt, ap);
473 	va_end(ap);
474 	return (retval);
475 }
476 
477 /* return base address of memory or port map */
478 
479 static pci_addr_t
480 pci_mapbase(uint64_t mapreg)
481 {
482 
483 	if (PCI_BAR_MEM(mapreg))
484 		return (mapreg & PCIM_BAR_MEM_BASE);
485 	else
486 		return (mapreg & PCIM_BAR_IO_BASE);
487 }
488 
489 /* return map type of memory or port map */
490 
491 static const char *
492 pci_maptype(uint64_t mapreg)
493 {
494 
495 	if (PCI_BAR_IO(mapreg))
496 		return ("I/O Port");
497 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
498 		return ("Prefetchable Memory");
499 	return ("Memory");
500 }
501 
502 /* return log2 of map size decoded for memory or port map */
503 
504 int
505 pci_mapsize(uint64_t testval)
506 {
507 	int ln2size;
508 
509 	testval = pci_mapbase(testval);
510 	ln2size = 0;
511 	if (testval != 0) {
512 		while ((testval & 1) == 0)
513 		{
514 			ln2size++;
515 			testval >>= 1;
516 		}
517 	}
518 	return (ln2size);
519 }
520 
521 /* return base address of device ROM */
522 
523 static pci_addr_t
524 pci_rombase(uint64_t mapreg)
525 {
526 
527 	return (mapreg & PCIM_BIOS_ADDR_MASK);
528 }
529 
530 /* return log2 of map size decided for device ROM */
531 
532 static int
533 pci_romsize(uint64_t testval)
534 {
535 	int ln2size;
536 
537 	testval = pci_rombase(testval);
538 	ln2size = 0;
539 	if (testval != 0) {
540 		while ((testval & 1) == 0)
541 		{
542 			ln2size++;
543 			testval >>= 1;
544 		}
545 	}
546 	return (ln2size);
547 }
548 
549 /* return log2 of address range supported by map register */
550 
551 static int
552 pci_maprange(uint64_t mapreg)
553 {
554 	int ln2range = 0;
555 
556 	if (PCI_BAR_IO(mapreg))
557 		ln2range = 32;
558 	else
559 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
560 		case PCIM_BAR_MEM_32:
561 			ln2range = 32;
562 			break;
563 		case PCIM_BAR_MEM_1MB:
564 			ln2range = 20;
565 			break;
566 		case PCIM_BAR_MEM_64:
567 			ln2range = 64;
568 			break;
569 		}
570 	return (ln2range);
571 }
572 
573 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
574 
575 static void
576 pci_fixancient(pcicfgregs *cfg)
577 {
578 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
579 		return;
580 
581 	/* PCI to PCI bridges use header type 1 */
582 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
583 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
584 }
585 
586 /* extract header type specific config data */
587 
588 static void
589 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
590 {
591 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
592 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
593 	case PCIM_HDRTYPE_NORMAL:
594 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
595 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
596 		cfg->mingnt         = REG(PCIR_MINGNT, 1);
597 		cfg->maxlat         = REG(PCIR_MAXLAT, 1);
598 		cfg->nummaps	    = PCI_MAXMAPS_0;
599 		break;
600 	case PCIM_HDRTYPE_BRIDGE:
601 		cfg->bridge.br_seclat = REG(PCIR_SECLAT_1, 1);
602 		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_1, 1);
603 		cfg->bridge.br_secbus = REG(PCIR_SECBUS_1, 1);
604 		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_1, 1);
605 		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_1, 2);
606 		cfg->nummaps	    = PCI_MAXMAPS_1;
607 		break;
608 	case PCIM_HDRTYPE_CARDBUS:
609 		cfg->bridge.br_seclat = REG(PCIR_SECLAT_2, 1);
610 		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_2, 1);
611 		cfg->bridge.br_secbus = REG(PCIR_SECBUS_2, 1);
612 		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_2, 1);
613 		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_2, 2);
614 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
615 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
616 		cfg->nummaps	    = PCI_MAXMAPS_2;
617 		break;
618 	}
619 #undef REG
620 }
621 
622 /* read configuration header into pcicfgregs structure */
623 struct pci_devinfo *
624 pci_read_device(device_t pcib, device_t bus, int d, int b, int s, int f)
625 {
626 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
627 	uint16_t vid, did;
628 
629 	vid = REG(PCIR_VENDOR, 2);
630 	did = REG(PCIR_DEVICE, 2);
631 	if (vid != 0xffff)
632 		return (pci_fill_devinfo(pcib, bus, d, b, s, f, vid, did));
633 
634 	return (NULL);
635 }
636 
637 struct pci_devinfo *
638 pci_alloc_devinfo_method(device_t dev)
639 {
640 
641 	return (malloc(sizeof(struct pci_devinfo), M_DEVBUF,
642 	    M_WAITOK | M_ZERO));
643 }
644 
645 static struct pci_devinfo *
646 pci_fill_devinfo(device_t pcib, device_t bus, int d, int b, int s, int f,
647     uint16_t vid, uint16_t did)
648 {
649 	struct pci_devinfo *devlist_entry;
650 	pcicfgregs *cfg;
651 
652 	devlist_entry = PCI_ALLOC_DEVINFO(bus);
653 
654 	cfg = &devlist_entry->cfg;
655 
656 	cfg->domain		= d;
657 	cfg->bus		= b;
658 	cfg->slot		= s;
659 	cfg->func		= f;
660 	cfg->vendor		= vid;
661 	cfg->device		= did;
662 	cfg->cmdreg		= REG(PCIR_COMMAND, 2);
663 	cfg->statreg		= REG(PCIR_STATUS, 2);
664 	cfg->baseclass		= REG(PCIR_CLASS, 1);
665 	cfg->subclass		= REG(PCIR_SUBCLASS, 1);
666 	cfg->progif		= REG(PCIR_PROGIF, 1);
667 	cfg->revid		= REG(PCIR_REVID, 1);
668 	cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
669 	cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
670 	cfg->lattimer		= REG(PCIR_LATTIMER, 1);
671 	cfg->intpin		= REG(PCIR_INTPIN, 1);
672 	cfg->intline		= REG(PCIR_INTLINE, 1);
673 
674 	cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
675 	cfg->hdrtype		&= ~PCIM_MFDEV;
676 	STAILQ_INIT(&cfg->maps);
677 
678 	cfg->iov		= NULL;
679 
680 	pci_fixancient(cfg);
681 	pci_hdrtypedata(pcib, b, s, f, cfg);
682 
683 	if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
684 		pci_read_cap(pcib, cfg);
685 
686 	STAILQ_INSERT_TAIL(&pci_devq, devlist_entry, pci_links);
687 
688 	devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
689 	devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
690 	devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
691 	devlist_entry->conf.pc_sel.pc_func = cfg->func;
692 	devlist_entry->conf.pc_hdr = cfg->hdrtype;
693 
694 	devlist_entry->conf.pc_subvendor = cfg->subvendor;
695 	devlist_entry->conf.pc_subdevice = cfg->subdevice;
696 	devlist_entry->conf.pc_vendor = cfg->vendor;
697 	devlist_entry->conf.pc_device = cfg->device;
698 
699 	devlist_entry->conf.pc_class = cfg->baseclass;
700 	devlist_entry->conf.pc_subclass = cfg->subclass;
701 	devlist_entry->conf.pc_progif = cfg->progif;
702 	devlist_entry->conf.pc_revid = cfg->revid;
703 
704 	pci_numdevs++;
705 	pci_generation++;
706 
707 	return (devlist_entry);
708 }
709 #undef REG
710 
711 static void
712 pci_ea_fill_info(device_t pcib, pcicfgregs *cfg)
713 {
714 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, \
715     cfg->ea.ea_location + (n), w)
716 	int num_ent;
717 	int ptr;
718 	int a, b;
719 	uint32_t val;
720 	int ent_size;
721 	uint32_t dw[4];
722 	uint64_t base, max_offset;
723 	struct pci_ea_entry *eae;
724 
725 	if (cfg->ea.ea_location == 0)
726 		return;
727 
728 	STAILQ_INIT(&cfg->ea.ea_entries);
729 
730 	/* Determine the number of entries */
731 	num_ent = REG(PCIR_EA_NUM_ENT, 2);
732 	num_ent &= PCIM_EA_NUM_ENT_MASK;
733 
734 	/* Find the first entry to care of */
735 	ptr = PCIR_EA_FIRST_ENT;
736 
737 	/* Skip DWORD 2 for type 1 functions */
738 	if ((cfg->hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_BRIDGE)
739 		ptr += 4;
740 
741 	for (a = 0; a < num_ent; a++) {
742 
743 		eae = malloc(sizeof(*eae), M_DEVBUF, M_WAITOK | M_ZERO);
744 		eae->eae_cfg_offset = cfg->ea.ea_location + ptr;
745 
746 		/* Read a number of dwords in the entry */
747 		val = REG(ptr, 4);
748 		ptr += 4;
749 		ent_size = (val & PCIM_EA_ES);
750 
751 		for (b = 0; b < ent_size; b++) {
752 			dw[b] = REG(ptr, 4);
753 			ptr += 4;
754 		}
755 
756 		eae->eae_flags = val;
757 		eae->eae_bei = (PCIM_EA_BEI & val) >> PCIM_EA_BEI_OFFSET;
758 
759 		base = dw[0] & PCIM_EA_FIELD_MASK;
760 		max_offset = dw[1] | ~PCIM_EA_FIELD_MASK;
761 		b = 2;
762 		if (((dw[0] & PCIM_EA_IS_64) != 0) && (b < ent_size)) {
763 			base |= (uint64_t)dw[b] << 32UL;
764 			b++;
765 		}
766 		if (((dw[1] & PCIM_EA_IS_64) != 0)
767 		    && (b < ent_size)) {
768 			max_offset |= (uint64_t)dw[b] << 32UL;
769 			b++;
770 		}
771 
772 		eae->eae_base = base;
773 		eae->eae_max_offset = max_offset;
774 
775 		STAILQ_INSERT_TAIL(&cfg->ea.ea_entries, eae, eae_link);
776 
777 		if (bootverbose) {
778 			printf("PCI(EA) dev %04x:%04x, bei %d, flags #%x, base #%jx, max_offset #%jx\n",
779 			    cfg->vendor, cfg->device, eae->eae_bei, eae->eae_flags,
780 			    (uintmax_t)eae->eae_base, (uintmax_t)eae->eae_max_offset);
781 		}
782 	}
783 }
784 #undef REG
785 
786 static void
787 pci_read_cap(device_t pcib, pcicfgregs *cfg)
788 {
789 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
790 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
791 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
792 	uint64_t addr;
793 #endif
794 	uint32_t val;
795 	int	ptr, nextptr, ptrptr;
796 
797 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
798 	case PCIM_HDRTYPE_NORMAL:
799 	case PCIM_HDRTYPE_BRIDGE:
800 		ptrptr = PCIR_CAP_PTR;
801 		break;
802 	case PCIM_HDRTYPE_CARDBUS:
803 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
804 		break;
805 	default:
806 		return;		/* no extended capabilities support */
807 	}
808 	nextptr = REG(ptrptr, 1);	/* sanity check? */
809 
810 	/*
811 	 * Read capability entries.
812 	 */
813 	while (nextptr != 0) {
814 		/* Sanity check */
815 		if (nextptr > 255) {
816 			printf("illegal PCI extended capability offset %d\n",
817 			    nextptr);
818 			return;
819 		}
820 		/* Find the next entry */
821 		ptr = nextptr;
822 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
823 
824 		/* Process this entry */
825 		switch (REG(ptr + PCICAP_ID, 1)) {
826 		case PCIY_PMG:		/* PCI power management */
827 			if (cfg->pp.pp_cap == 0) {
828 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
829 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
830 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
831 				if ((nextptr - ptr) > PCIR_POWER_DATA)
832 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
833 			}
834 			break;
835 		case PCIY_HT:		/* HyperTransport */
836 			/* Determine HT-specific capability type. */
837 			val = REG(ptr + PCIR_HT_COMMAND, 2);
838 
839 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
840 				cfg->ht.ht_slave = ptr;
841 
842 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
843 			switch (val & PCIM_HTCMD_CAP_MASK) {
844 			case PCIM_HTCAP_MSI_MAPPING:
845 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
846 					/* Sanity check the mapping window. */
847 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
848 					    4);
849 					addr <<= 32;
850 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
851 					    4);
852 					if (addr != MSI_INTEL_ADDR_BASE)
853 						device_printf(pcib,
854 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
855 						    cfg->domain, cfg->bus,
856 						    cfg->slot, cfg->func,
857 						    (long long)addr);
858 				} else
859 					addr = MSI_INTEL_ADDR_BASE;
860 
861 				cfg->ht.ht_msimap = ptr;
862 				cfg->ht.ht_msictrl = val;
863 				cfg->ht.ht_msiaddr = addr;
864 				break;
865 			}
866 #endif
867 			break;
868 		case PCIY_MSI:		/* PCI MSI */
869 			cfg->msi.msi_location = ptr;
870 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
871 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
872 						     PCIM_MSICTRL_MMC_MASK)>>1);
873 			break;
874 		case PCIY_MSIX:		/* PCI MSI-X */
875 			cfg->msix.msix_location = ptr;
876 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
877 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
878 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
879 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
880 			cfg->msix.msix_table_bar = PCIR_BAR(val &
881 			    PCIM_MSIX_BIR_MASK);
882 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
883 			val = REG(ptr + PCIR_MSIX_PBA, 4);
884 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
885 			    PCIM_MSIX_BIR_MASK);
886 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
887 			break;
888 		case PCIY_VPD:		/* PCI Vital Product Data */
889 			cfg->vpd.vpd_reg = ptr;
890 			break;
891 		case PCIY_SUBVENDOR:
892 			/* Should always be true. */
893 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
894 			    PCIM_HDRTYPE_BRIDGE) {
895 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
896 				cfg->subvendor = val & 0xffff;
897 				cfg->subdevice = val >> 16;
898 			}
899 			break;
900 		case PCIY_PCIX:		/* PCI-X */
901 			/*
902 			 * Assume we have a PCI-X chipset if we have
903 			 * at least one PCI-PCI bridge with a PCI-X
904 			 * capability.  Note that some systems with
905 			 * PCI-express or HT chipsets might match on
906 			 * this check as well.
907 			 */
908 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
909 			    PCIM_HDRTYPE_BRIDGE)
910 				pcix_chipset = 1;
911 			cfg->pcix.pcix_location = ptr;
912 			break;
913 		case PCIY_EXPRESS:	/* PCI-express */
914 			/*
915 			 * Assume we have a PCI-express chipset if we have
916 			 * at least one PCI-express device.
917 			 */
918 			pcie_chipset = 1;
919 			cfg->pcie.pcie_location = ptr;
920 			val = REG(ptr + PCIER_FLAGS, 2);
921 			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
922 			break;
923 		case PCIY_EA:		/* Enhanced Allocation */
924 			cfg->ea.ea_location = ptr;
925 			pci_ea_fill_info(pcib, cfg);
926 			break;
927 		default:
928 			break;
929 		}
930 	}
931 
932 #if defined(__powerpc__)
933 	/*
934 	 * Enable the MSI mapping window for all HyperTransport
935 	 * slaves.  PCI-PCI bridges have their windows enabled via
936 	 * PCIB_MAP_MSI().
937 	 */
938 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
939 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
940 		device_printf(pcib,
941 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
942 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
943 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
944 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
945 		     2);
946 	}
947 #endif
948 /* REG and WREG use carry through to next functions */
949 }
950 
951 /*
952  * PCI Vital Product Data
953  */
954 
955 #define	PCI_VPD_TIMEOUT		1000000
956 
957 static int
958 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
959 {
960 	int count = PCI_VPD_TIMEOUT;
961 
962 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
963 
964 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
965 
966 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
967 		if (--count < 0)
968 			return (ENXIO);
969 		DELAY(1);	/* limit looping */
970 	}
971 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
972 
973 	return (0);
974 }
975 
976 #if 0
977 static int
978 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
979 {
980 	int count = PCI_VPD_TIMEOUT;
981 
982 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
983 
984 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
985 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
986 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
987 		if (--count < 0)
988 			return (ENXIO);
989 		DELAY(1);	/* limit looping */
990 	}
991 
992 	return (0);
993 }
994 #endif
995 
996 #undef PCI_VPD_TIMEOUT
997 
998 struct vpd_readstate {
999 	device_t	pcib;
1000 	pcicfgregs	*cfg;
1001 	uint32_t	val;
1002 	int		bytesinval;
1003 	int		off;
1004 	uint8_t		cksum;
1005 };
1006 
1007 static int
1008 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
1009 {
1010 	uint32_t reg;
1011 	uint8_t byte;
1012 
1013 	if (vrs->bytesinval == 0) {
1014 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
1015 			return (ENXIO);
1016 		vrs->val = le32toh(reg);
1017 		vrs->off += 4;
1018 		byte = vrs->val & 0xff;
1019 		vrs->bytesinval = 3;
1020 	} else {
1021 		vrs->val = vrs->val >> 8;
1022 		byte = vrs->val & 0xff;
1023 		vrs->bytesinval--;
1024 	}
1025 
1026 	vrs->cksum += byte;
1027 	*data = byte;
1028 	return (0);
1029 }
1030 
1031 static void
1032 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
1033 {
1034 	struct vpd_readstate vrs;
1035 	int state;
1036 	int name;
1037 	int remain;
1038 	int i;
1039 	int alloc, off;		/* alloc/off for RO/W arrays */
1040 	int cksumvalid;
1041 	int dflen;
1042 	uint8_t byte;
1043 	uint8_t byte2;
1044 
1045 	/* init vpd reader */
1046 	vrs.bytesinval = 0;
1047 	vrs.off = 0;
1048 	vrs.pcib = pcib;
1049 	vrs.cfg = cfg;
1050 	vrs.cksum = 0;
1051 
1052 	state = 0;
1053 	name = remain = i = 0;	/* shut up stupid gcc */
1054 	alloc = off = 0;	/* shut up stupid gcc */
1055 	dflen = 0;		/* shut up stupid gcc */
1056 	cksumvalid = -1;
1057 	while (state >= 0) {
1058 		if (vpd_nextbyte(&vrs, &byte)) {
1059 			state = -2;
1060 			break;
1061 		}
1062 #if 0
1063 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
1064 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
1065 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
1066 #endif
1067 		switch (state) {
1068 		case 0:		/* item name */
1069 			if (byte & 0x80) {
1070 				if (vpd_nextbyte(&vrs, &byte2)) {
1071 					state = -2;
1072 					break;
1073 				}
1074 				remain = byte2;
1075 				if (vpd_nextbyte(&vrs, &byte2)) {
1076 					state = -2;
1077 					break;
1078 				}
1079 				remain |= byte2 << 8;
1080 				if (remain > (0x7f*4 - vrs.off)) {
1081 					state = -1;
1082 					pci_printf(cfg,
1083 					    "invalid VPD data, remain %#x\n",
1084 					    remain);
1085 				}
1086 				name = byte & 0x7f;
1087 			} else {
1088 				remain = byte & 0x7;
1089 				name = (byte >> 3) & 0xf;
1090 			}
1091 			switch (name) {
1092 			case 0x2:	/* String */
1093 				cfg->vpd.vpd_ident = malloc(remain + 1,
1094 				    M_DEVBUF, M_WAITOK);
1095 				i = 0;
1096 				state = 1;
1097 				break;
1098 			case 0xf:	/* End */
1099 				state = -1;
1100 				break;
1101 			case 0x10:	/* VPD-R */
1102 				alloc = 8;
1103 				off = 0;
1104 				cfg->vpd.vpd_ros = malloc(alloc *
1105 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1106 				    M_WAITOK | M_ZERO);
1107 				state = 2;
1108 				break;
1109 			case 0x11:	/* VPD-W */
1110 				alloc = 8;
1111 				off = 0;
1112 				cfg->vpd.vpd_w = malloc(alloc *
1113 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1114 				    M_WAITOK | M_ZERO);
1115 				state = 5;
1116 				break;
1117 			default:	/* Invalid data, abort */
1118 				state = -1;
1119 				break;
1120 			}
1121 			break;
1122 
1123 		case 1:	/* Identifier String */
1124 			cfg->vpd.vpd_ident[i++] = byte;
1125 			remain--;
1126 			if (remain == 0)  {
1127 				cfg->vpd.vpd_ident[i] = '\0';
1128 				state = 0;
1129 			}
1130 			break;
1131 
1132 		case 2:	/* VPD-R Keyword Header */
1133 			if (off == alloc) {
1134 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1135 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1136 				    M_DEVBUF, M_WAITOK | M_ZERO);
1137 			}
1138 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1139 			if (vpd_nextbyte(&vrs, &byte2)) {
1140 				state = -2;
1141 				break;
1142 			}
1143 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1144 			if (vpd_nextbyte(&vrs, &byte2)) {
1145 				state = -2;
1146 				break;
1147 			}
1148 			cfg->vpd.vpd_ros[off].len = dflen = byte2;
1149 			if (dflen == 0 &&
1150 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1151 			    2) == 0) {
1152 				/*
1153 				 * if this happens, we can't trust the rest
1154 				 * of the VPD.
1155 				 */
1156 				pci_printf(cfg, "bad keyword length: %d\n",
1157 				    dflen);
1158 				cksumvalid = 0;
1159 				state = -1;
1160 				break;
1161 			} else if (dflen == 0) {
1162 				cfg->vpd.vpd_ros[off].value = malloc(1 *
1163 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1164 				    M_DEVBUF, M_WAITOK);
1165 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1166 			} else
1167 				cfg->vpd.vpd_ros[off].value = malloc(
1168 				    (dflen + 1) *
1169 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1170 				    M_DEVBUF, M_WAITOK);
1171 			remain -= 3;
1172 			i = 0;
1173 			/* keep in sync w/ state 3's transistions */
1174 			if (dflen == 0 && remain == 0)
1175 				state = 0;
1176 			else if (dflen == 0)
1177 				state = 2;
1178 			else
1179 				state = 3;
1180 			break;
1181 
1182 		case 3:	/* VPD-R Keyword Value */
1183 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1184 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1185 			    "RV", 2) == 0 && cksumvalid == -1) {
1186 				if (vrs.cksum == 0)
1187 					cksumvalid = 1;
1188 				else {
1189 					if (bootverbose)
1190 						pci_printf(cfg,
1191 					    "bad VPD cksum, remain %hhu\n",
1192 						    vrs.cksum);
1193 					cksumvalid = 0;
1194 					state = -1;
1195 					break;
1196 				}
1197 			}
1198 			dflen--;
1199 			remain--;
1200 			/* keep in sync w/ state 2's transistions */
1201 			if (dflen == 0)
1202 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1203 			if (dflen == 0 && remain == 0) {
1204 				cfg->vpd.vpd_rocnt = off;
1205 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1206 				    off * sizeof(*cfg->vpd.vpd_ros),
1207 				    M_DEVBUF, M_WAITOK | M_ZERO);
1208 				state = 0;
1209 			} else if (dflen == 0)
1210 				state = 2;
1211 			break;
1212 
1213 		case 4:
1214 			remain--;
1215 			if (remain == 0)
1216 				state = 0;
1217 			break;
1218 
1219 		case 5:	/* VPD-W Keyword Header */
1220 			if (off == alloc) {
1221 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1222 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1223 				    M_DEVBUF, M_WAITOK | M_ZERO);
1224 			}
1225 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1226 			if (vpd_nextbyte(&vrs, &byte2)) {
1227 				state = -2;
1228 				break;
1229 			}
1230 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1231 			if (vpd_nextbyte(&vrs, &byte2)) {
1232 				state = -2;
1233 				break;
1234 			}
1235 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1236 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1237 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1238 			    sizeof(*cfg->vpd.vpd_w[off].value),
1239 			    M_DEVBUF, M_WAITOK);
1240 			remain -= 3;
1241 			i = 0;
1242 			/* keep in sync w/ state 6's transistions */
1243 			if (dflen == 0 && remain == 0)
1244 				state = 0;
1245 			else if (dflen == 0)
1246 				state = 5;
1247 			else
1248 				state = 6;
1249 			break;
1250 
1251 		case 6:	/* VPD-W Keyword Value */
1252 			cfg->vpd.vpd_w[off].value[i++] = byte;
1253 			dflen--;
1254 			remain--;
1255 			/* keep in sync w/ state 5's transistions */
1256 			if (dflen == 0)
1257 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1258 			if (dflen == 0 && remain == 0) {
1259 				cfg->vpd.vpd_wcnt = off;
1260 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1261 				    off * sizeof(*cfg->vpd.vpd_w),
1262 				    M_DEVBUF, M_WAITOK | M_ZERO);
1263 				state = 0;
1264 			} else if (dflen == 0)
1265 				state = 5;
1266 			break;
1267 
1268 		default:
1269 			pci_printf(cfg, "invalid state: %d\n", state);
1270 			state = -1;
1271 			break;
1272 		}
1273 	}
1274 
1275 	if (cksumvalid == 0 || state < -1) {
1276 		/* read-only data bad, clean up */
1277 		if (cfg->vpd.vpd_ros != NULL) {
1278 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1279 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1280 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1281 			cfg->vpd.vpd_ros = NULL;
1282 		}
1283 	}
1284 	if (state < -1) {
1285 		/* I/O error, clean up */
1286 		pci_printf(cfg, "failed to read VPD data.\n");
1287 		if (cfg->vpd.vpd_ident != NULL) {
1288 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1289 			cfg->vpd.vpd_ident = NULL;
1290 		}
1291 		if (cfg->vpd.vpd_w != NULL) {
1292 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1293 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1294 			free(cfg->vpd.vpd_w, M_DEVBUF);
1295 			cfg->vpd.vpd_w = NULL;
1296 		}
1297 	}
1298 	cfg->vpd.vpd_cached = 1;
1299 #undef REG
1300 #undef WREG
1301 }
1302 
1303 int
1304 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1305 {
1306 	struct pci_devinfo *dinfo = device_get_ivars(child);
1307 	pcicfgregs *cfg = &dinfo->cfg;
1308 
1309 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1310 		pci_read_vpd(device_get_parent(dev), cfg);
1311 
1312 	*identptr = cfg->vpd.vpd_ident;
1313 
1314 	if (*identptr == NULL)
1315 		return (ENXIO);
1316 
1317 	return (0);
1318 }
1319 
1320 int
1321 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1322 	const char **vptr)
1323 {
1324 	struct pci_devinfo *dinfo = device_get_ivars(child);
1325 	pcicfgregs *cfg = &dinfo->cfg;
1326 	int i;
1327 
1328 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1329 		pci_read_vpd(device_get_parent(dev), cfg);
1330 
1331 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1332 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1333 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1334 			*vptr = cfg->vpd.vpd_ros[i].value;
1335 			return (0);
1336 		}
1337 
1338 	*vptr = NULL;
1339 	return (ENXIO);
1340 }
1341 
1342 struct pcicfg_vpd *
1343 pci_fetch_vpd_list(device_t dev)
1344 {
1345 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1346 	pcicfgregs *cfg = &dinfo->cfg;
1347 
1348 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1349 		pci_read_vpd(device_get_parent(device_get_parent(dev)), cfg);
1350 	return (&cfg->vpd);
1351 }
1352 
1353 /*
1354  * Find the requested HyperTransport capability and return the offset
1355  * in configuration space via the pointer provided.  The function
1356  * returns 0 on success and an error code otherwise.
1357  */
1358 int
1359 pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1360 {
1361 	int ptr, error;
1362 	uint16_t val;
1363 
1364 	error = pci_find_cap(child, PCIY_HT, &ptr);
1365 	if (error)
1366 		return (error);
1367 
1368 	/*
1369 	 * Traverse the capabilities list checking each HT capability
1370 	 * to see if it matches the requested HT capability.
1371 	 */
1372 	while (ptr != 0) {
1373 		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1374 		if (capability == PCIM_HTCAP_SLAVE ||
1375 		    capability == PCIM_HTCAP_HOST)
1376 			val &= 0xe000;
1377 		else
1378 			val &= PCIM_HTCMD_CAP_MASK;
1379 		if (val == capability) {
1380 			if (capreg != NULL)
1381 				*capreg = ptr;
1382 			return (0);
1383 		}
1384 
1385 		/* Skip to the next HT capability. */
1386 		while (ptr != 0) {
1387 			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1388 			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1389 			    PCIY_HT)
1390 				break;
1391 		}
1392 	}
1393 	return (ENOENT);
1394 }
1395 
1396 /*
1397  * Find the requested capability and return the offset in
1398  * configuration space via the pointer provided.  The function returns
1399  * 0 on success and an error code otherwise.
1400  */
1401 int
1402 pci_find_cap_method(device_t dev, device_t child, int capability,
1403     int *capreg)
1404 {
1405 	struct pci_devinfo *dinfo = device_get_ivars(child);
1406 	pcicfgregs *cfg = &dinfo->cfg;
1407 	u_int32_t status;
1408 	u_int8_t ptr;
1409 
1410 	/*
1411 	 * Check the CAP_LIST bit of the PCI status register first.
1412 	 */
1413 	status = pci_read_config(child, PCIR_STATUS, 2);
1414 	if (!(status & PCIM_STATUS_CAPPRESENT))
1415 		return (ENXIO);
1416 
1417 	/*
1418 	 * Determine the start pointer of the capabilities list.
1419 	 */
1420 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1421 	case PCIM_HDRTYPE_NORMAL:
1422 	case PCIM_HDRTYPE_BRIDGE:
1423 		ptr = PCIR_CAP_PTR;
1424 		break;
1425 	case PCIM_HDRTYPE_CARDBUS:
1426 		ptr = PCIR_CAP_PTR_2;
1427 		break;
1428 	default:
1429 		/* XXX: panic? */
1430 		return (ENXIO);		/* no extended capabilities support */
1431 	}
1432 	ptr = pci_read_config(child, ptr, 1);
1433 
1434 	/*
1435 	 * Traverse the capabilities list.
1436 	 */
1437 	while (ptr != 0) {
1438 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1439 			if (capreg != NULL)
1440 				*capreg = ptr;
1441 			return (0);
1442 		}
1443 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1444 	}
1445 
1446 	return (ENOENT);
1447 }
1448 
1449 /*
1450  * Find the requested extended capability and return the offset in
1451  * configuration space via the pointer provided.  The function returns
1452  * 0 on success and an error code otherwise.
1453  */
1454 int
1455 pci_find_extcap_method(device_t dev, device_t child, int capability,
1456     int *capreg)
1457 {
1458 	struct pci_devinfo *dinfo = device_get_ivars(child);
1459 	pcicfgregs *cfg = &dinfo->cfg;
1460 	uint32_t ecap;
1461 	uint16_t ptr;
1462 
1463 	/* Only supported for PCI-express devices. */
1464 	if (cfg->pcie.pcie_location == 0)
1465 		return (ENXIO);
1466 
1467 	ptr = PCIR_EXTCAP;
1468 	ecap = pci_read_config(child, ptr, 4);
1469 	if (ecap == 0xffffffff || ecap == 0)
1470 		return (ENOENT);
1471 	for (;;) {
1472 		if (PCI_EXTCAP_ID(ecap) == capability) {
1473 			if (capreg != NULL)
1474 				*capreg = ptr;
1475 			return (0);
1476 		}
1477 		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1478 		if (ptr == 0)
1479 			break;
1480 		ecap = pci_read_config(child, ptr, 4);
1481 	}
1482 
1483 	return (ENOENT);
1484 }
1485 
1486 /*
1487  * Support for MSI-X message interrupts.
1488  */
1489 void
1490 pci_enable_msix_method(device_t dev, device_t child, u_int index,
1491     uint64_t address, uint32_t data)
1492 {
1493 	struct pci_devinfo *dinfo = device_get_ivars(child);
1494 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1495 	uint32_t offset;
1496 
1497 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1498 	offset = msix->msix_table_offset + index * 16;
1499 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1500 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1501 	bus_write_4(msix->msix_table_res, offset + 8, data);
1502 
1503 	/* Enable MSI -> HT mapping. */
1504 	pci_ht_map_msi(child, address);
1505 }
1506 
1507 void
1508 pci_mask_msix(device_t dev, u_int index)
1509 {
1510 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1511 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1512 	uint32_t offset, val;
1513 
1514 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1515 	offset = msix->msix_table_offset + index * 16 + 12;
1516 	val = bus_read_4(msix->msix_table_res, offset);
1517 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1518 		val |= PCIM_MSIX_VCTRL_MASK;
1519 		bus_write_4(msix->msix_table_res, offset, val);
1520 	}
1521 }
1522 
1523 void
1524 pci_unmask_msix(device_t dev, u_int index)
1525 {
1526 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1527 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1528 	uint32_t offset, val;
1529 
1530 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1531 	offset = msix->msix_table_offset + index * 16 + 12;
1532 	val = bus_read_4(msix->msix_table_res, offset);
1533 	if (val & PCIM_MSIX_VCTRL_MASK) {
1534 		val &= ~PCIM_MSIX_VCTRL_MASK;
1535 		bus_write_4(msix->msix_table_res, offset, val);
1536 	}
1537 }
1538 
1539 int
1540 pci_pending_msix(device_t dev, u_int index)
1541 {
1542 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1543 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1544 	uint32_t offset, bit;
1545 
1546 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1547 	offset = msix->msix_pba_offset + (index / 32) * 4;
1548 	bit = 1 << index % 32;
1549 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1550 }
1551 
1552 /*
1553  * Restore MSI-X registers and table during resume.  If MSI-X is
1554  * enabled then walk the virtual table to restore the actual MSI-X
1555  * table.
1556  */
1557 static void
1558 pci_resume_msix(device_t dev)
1559 {
1560 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1561 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1562 	struct msix_table_entry *mte;
1563 	struct msix_vector *mv;
1564 	int i;
1565 
1566 	if (msix->msix_alloc > 0) {
1567 		/* First, mask all vectors. */
1568 		for (i = 0; i < msix->msix_msgnum; i++)
1569 			pci_mask_msix(dev, i);
1570 
1571 		/* Second, program any messages with at least one handler. */
1572 		for (i = 0; i < msix->msix_table_len; i++) {
1573 			mte = &msix->msix_table[i];
1574 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1575 				continue;
1576 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1577 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1578 			pci_unmask_msix(dev, i);
1579 		}
1580 	}
1581 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1582 	    msix->msix_ctrl, 2);
1583 }
1584 
1585 /*
1586  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1587  * returned in *count.  After this function returns, each message will be
1588  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1589  */
1590 int
1591 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1592 {
1593 	struct pci_devinfo *dinfo = device_get_ivars(child);
1594 	pcicfgregs *cfg = &dinfo->cfg;
1595 	struct resource_list_entry *rle;
1596 	int actual, error, i, irq, max;
1597 
1598 	/* Don't let count == 0 get us into trouble. */
1599 	if (*count == 0)
1600 		return (EINVAL);
1601 
1602 	/* If rid 0 is allocated, then fail. */
1603 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1604 	if (rle != NULL && rle->res != NULL)
1605 		return (ENXIO);
1606 
1607 	/* Already have allocated messages? */
1608 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1609 		return (ENXIO);
1610 
1611 	/* If MSI-X is blacklisted for this system, fail. */
1612 	if (pci_msix_blacklisted())
1613 		return (ENXIO);
1614 
1615 	/* MSI-X capability present? */
1616 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1617 		return (ENODEV);
1618 
1619 	/* Make sure the appropriate BARs are mapped. */
1620 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1621 	    cfg->msix.msix_table_bar);
1622 	if (rle == NULL || rle->res == NULL ||
1623 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1624 		return (ENXIO);
1625 	cfg->msix.msix_table_res = rle->res;
1626 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1627 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1628 		    cfg->msix.msix_pba_bar);
1629 		if (rle == NULL || rle->res == NULL ||
1630 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1631 			return (ENXIO);
1632 	}
1633 	cfg->msix.msix_pba_res = rle->res;
1634 
1635 	if (bootverbose)
1636 		device_printf(child,
1637 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1638 		    *count, cfg->msix.msix_msgnum);
1639 	max = min(*count, cfg->msix.msix_msgnum);
1640 	for (i = 0; i < max; i++) {
1641 		/* Allocate a message. */
1642 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1643 		if (error) {
1644 			if (i == 0)
1645 				return (error);
1646 			break;
1647 		}
1648 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1649 		    irq, 1);
1650 	}
1651 	actual = i;
1652 
1653 	if (bootverbose) {
1654 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1655 		if (actual == 1)
1656 			device_printf(child, "using IRQ %ju for MSI-X\n",
1657 			    rle->start);
1658 		else {
1659 			int run;
1660 
1661 			/*
1662 			 * Be fancy and try to print contiguous runs of
1663 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1664 			 * 'run' is true if we are in a range.
1665 			 */
1666 			device_printf(child, "using IRQs %ju", rle->start);
1667 			irq = rle->start;
1668 			run = 0;
1669 			for (i = 1; i < actual; i++) {
1670 				rle = resource_list_find(&dinfo->resources,
1671 				    SYS_RES_IRQ, i + 1);
1672 
1673 				/* Still in a run? */
1674 				if (rle->start == irq + 1) {
1675 					run = 1;
1676 					irq++;
1677 					continue;
1678 				}
1679 
1680 				/* Finish previous range. */
1681 				if (run) {
1682 					printf("-%d", irq);
1683 					run = 0;
1684 				}
1685 
1686 				/* Start new range. */
1687 				printf(",%ju", rle->start);
1688 				irq = rle->start;
1689 			}
1690 
1691 			/* Unfinished range? */
1692 			if (run)
1693 				printf("-%d", irq);
1694 			printf(" for MSI-X\n");
1695 		}
1696 	}
1697 
1698 	/* Mask all vectors. */
1699 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1700 		pci_mask_msix(child, i);
1701 
1702 	/* Allocate and initialize vector data and virtual table. */
1703 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1704 	    M_DEVBUF, M_WAITOK | M_ZERO);
1705 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1706 	    M_DEVBUF, M_WAITOK | M_ZERO);
1707 	for (i = 0; i < actual; i++) {
1708 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1709 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1710 		cfg->msix.msix_table[i].mte_vector = i + 1;
1711 	}
1712 
1713 	/* Update control register to enable MSI-X. */
1714 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1715 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1716 	    cfg->msix.msix_ctrl, 2);
1717 
1718 	/* Update counts of alloc'd messages. */
1719 	cfg->msix.msix_alloc = actual;
1720 	cfg->msix.msix_table_len = actual;
1721 	*count = actual;
1722 	return (0);
1723 }
1724 
1725 /*
1726  * By default, pci_alloc_msix() will assign the allocated IRQ
1727  * resources consecutively to the first N messages in the MSI-X table.
1728  * However, device drivers may want to use different layouts if they
1729  * either receive fewer messages than they asked for, or they wish to
1730  * populate the MSI-X table sparsely.  This method allows the driver
1731  * to specify what layout it wants.  It must be called after a
1732  * successful pci_alloc_msix() but before any of the associated
1733  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1734  *
1735  * The 'vectors' array contains 'count' message vectors.  The array
1736  * maps directly to the MSI-X table in that index 0 in the array
1737  * specifies the vector for the first message in the MSI-X table, etc.
1738  * The vector value in each array index can either be 0 to indicate
1739  * that no vector should be assigned to a message slot, or it can be a
1740  * number from 1 to N (where N is the count returned from a
1741  * succcessful call to pci_alloc_msix()) to indicate which message
1742  * vector (IRQ) to be used for the corresponding message.
1743  *
1744  * On successful return, each message with a non-zero vector will have
1745  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1746  * 1.  Additionally, if any of the IRQs allocated via the previous
1747  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1748  * will be freed back to the system automatically.
1749  *
1750  * For example, suppose a driver has a MSI-X table with 6 messages and
1751  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1752  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1753  * C.  After the call to pci_alloc_msix(), the device will be setup to
1754  * have an MSI-X table of ABC--- (where - means no vector assigned).
1755  * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1756  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1757  * be freed back to the system.  This device will also have valid
1758  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1759  *
1760  * In any case, the SYS_RES_IRQ rid X will always map to the message
1761  * at MSI-X table index X - 1 and will only be valid if a vector is
1762  * assigned to that table entry.
1763  */
1764 int
1765 pci_remap_msix_method(device_t dev, device_t child, int count,
1766     const u_int *vectors)
1767 {
1768 	struct pci_devinfo *dinfo = device_get_ivars(child);
1769 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1770 	struct resource_list_entry *rle;
1771 	int i, irq, j, *used;
1772 
1773 	/*
1774 	 * Have to have at least one message in the table but the
1775 	 * table can't be bigger than the actual MSI-X table in the
1776 	 * device.
1777 	 */
1778 	if (count == 0 || count > msix->msix_msgnum)
1779 		return (EINVAL);
1780 
1781 	/* Sanity check the vectors. */
1782 	for (i = 0; i < count; i++)
1783 		if (vectors[i] > msix->msix_alloc)
1784 			return (EINVAL);
1785 
1786 	/*
1787 	 * Make sure there aren't any holes in the vectors to be used.
1788 	 * It's a big pain to support it, and it doesn't really make
1789 	 * sense anyway.  Also, at least one vector must be used.
1790 	 */
1791 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1792 	    M_ZERO);
1793 	for (i = 0; i < count; i++)
1794 		if (vectors[i] != 0)
1795 			used[vectors[i] - 1] = 1;
1796 	for (i = 0; i < msix->msix_alloc - 1; i++)
1797 		if (used[i] == 0 && used[i + 1] == 1) {
1798 			free(used, M_DEVBUF);
1799 			return (EINVAL);
1800 		}
1801 	if (used[0] != 1) {
1802 		free(used, M_DEVBUF);
1803 		return (EINVAL);
1804 	}
1805 
1806 	/* Make sure none of the resources are allocated. */
1807 	for (i = 0; i < msix->msix_table_len; i++) {
1808 		if (msix->msix_table[i].mte_vector == 0)
1809 			continue;
1810 		if (msix->msix_table[i].mte_handlers > 0) {
1811 			free(used, M_DEVBUF);
1812 			return (EBUSY);
1813 		}
1814 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1815 		KASSERT(rle != NULL, ("missing resource"));
1816 		if (rle->res != NULL) {
1817 			free(used, M_DEVBUF);
1818 			return (EBUSY);
1819 		}
1820 	}
1821 
1822 	/* Free the existing resource list entries. */
1823 	for (i = 0; i < msix->msix_table_len; i++) {
1824 		if (msix->msix_table[i].mte_vector == 0)
1825 			continue;
1826 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1827 	}
1828 
1829 	/*
1830 	 * Build the new virtual table keeping track of which vectors are
1831 	 * used.
1832 	 */
1833 	free(msix->msix_table, M_DEVBUF);
1834 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1835 	    M_DEVBUF, M_WAITOK | M_ZERO);
1836 	for (i = 0; i < count; i++)
1837 		msix->msix_table[i].mte_vector = vectors[i];
1838 	msix->msix_table_len = count;
1839 
1840 	/* Free any unused IRQs and resize the vectors array if necessary. */
1841 	j = msix->msix_alloc - 1;
1842 	if (used[j] == 0) {
1843 		struct msix_vector *vec;
1844 
1845 		while (used[j] == 0) {
1846 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1847 			    msix->msix_vectors[j].mv_irq);
1848 			j--;
1849 		}
1850 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1851 		    M_WAITOK);
1852 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1853 		    (j + 1));
1854 		free(msix->msix_vectors, M_DEVBUF);
1855 		msix->msix_vectors = vec;
1856 		msix->msix_alloc = j + 1;
1857 	}
1858 	free(used, M_DEVBUF);
1859 
1860 	/* Map the IRQs onto the rids. */
1861 	for (i = 0; i < count; i++) {
1862 		if (vectors[i] == 0)
1863 			continue;
1864 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1865 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1866 		    irq, 1);
1867 	}
1868 
1869 	if (bootverbose) {
1870 		device_printf(child, "Remapped MSI-X IRQs as: ");
1871 		for (i = 0; i < count; i++) {
1872 			if (i != 0)
1873 				printf(", ");
1874 			if (vectors[i] == 0)
1875 				printf("---");
1876 			else
1877 				printf("%d",
1878 				    msix->msix_vectors[vectors[i]].mv_irq);
1879 		}
1880 		printf("\n");
1881 	}
1882 
1883 	return (0);
1884 }
1885 
1886 static int
1887 pci_release_msix(device_t dev, device_t child)
1888 {
1889 	struct pci_devinfo *dinfo = device_get_ivars(child);
1890 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1891 	struct resource_list_entry *rle;
1892 	int i;
1893 
1894 	/* Do we have any messages to release? */
1895 	if (msix->msix_alloc == 0)
1896 		return (ENODEV);
1897 
1898 	/* Make sure none of the resources are allocated. */
1899 	for (i = 0; i < msix->msix_table_len; i++) {
1900 		if (msix->msix_table[i].mte_vector == 0)
1901 			continue;
1902 		if (msix->msix_table[i].mte_handlers > 0)
1903 			return (EBUSY);
1904 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1905 		KASSERT(rle != NULL, ("missing resource"));
1906 		if (rle->res != NULL)
1907 			return (EBUSY);
1908 	}
1909 
1910 	/* Update control register to disable MSI-X. */
1911 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1912 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1913 	    msix->msix_ctrl, 2);
1914 
1915 	/* Free the resource list entries. */
1916 	for (i = 0; i < msix->msix_table_len; i++) {
1917 		if (msix->msix_table[i].mte_vector == 0)
1918 			continue;
1919 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1920 	}
1921 	free(msix->msix_table, M_DEVBUF);
1922 	msix->msix_table_len = 0;
1923 
1924 	/* Release the IRQs. */
1925 	for (i = 0; i < msix->msix_alloc; i++)
1926 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1927 		    msix->msix_vectors[i].mv_irq);
1928 	free(msix->msix_vectors, M_DEVBUF);
1929 	msix->msix_alloc = 0;
1930 	return (0);
1931 }
1932 
1933 /*
1934  * Return the max supported MSI-X messages this device supports.
1935  * Basically, assuming the MD code can alloc messages, this function
1936  * should return the maximum value that pci_alloc_msix() can return.
1937  * Thus, it is subject to the tunables, etc.
1938  */
1939 int
1940 pci_msix_count_method(device_t dev, device_t child)
1941 {
1942 	struct pci_devinfo *dinfo = device_get_ivars(child);
1943 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1944 
1945 	if (pci_do_msix && msix->msix_location != 0)
1946 		return (msix->msix_msgnum);
1947 	return (0);
1948 }
1949 
1950 int
1951 pci_msix_pba_bar_method(device_t dev, device_t child)
1952 {
1953 	struct pci_devinfo *dinfo = device_get_ivars(child);
1954 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1955 
1956 	if (pci_do_msix && msix->msix_location != 0)
1957 		return (msix->msix_pba_bar);
1958 	return (-1);
1959 }
1960 
1961 int
1962 pci_msix_table_bar_method(device_t dev, device_t child)
1963 {
1964 	struct pci_devinfo *dinfo = device_get_ivars(child);
1965 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1966 
1967 	if (pci_do_msix && msix->msix_location != 0)
1968 		return (msix->msix_table_bar);
1969 	return (-1);
1970 }
1971 
1972 /*
1973  * HyperTransport MSI mapping control
1974  */
1975 void
1976 pci_ht_map_msi(device_t dev, uint64_t addr)
1977 {
1978 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1979 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1980 
1981 	if (!ht->ht_msimap)
1982 		return;
1983 
1984 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1985 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1986 		/* Enable MSI -> HT mapping. */
1987 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1988 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1989 		    ht->ht_msictrl, 2);
1990 	}
1991 
1992 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1993 		/* Disable MSI -> HT mapping. */
1994 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1995 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1996 		    ht->ht_msictrl, 2);
1997 	}
1998 }
1999 
2000 int
2001 pci_get_max_read_req(device_t dev)
2002 {
2003 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2004 	int cap;
2005 	uint16_t val;
2006 
2007 	cap = dinfo->cfg.pcie.pcie_location;
2008 	if (cap == 0)
2009 		return (0);
2010 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2011 	val &= PCIEM_CTL_MAX_READ_REQUEST;
2012 	val >>= 12;
2013 	return (1 << (val + 7));
2014 }
2015 
2016 int
2017 pci_set_max_read_req(device_t dev, int size)
2018 {
2019 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2020 	int cap;
2021 	uint16_t val;
2022 
2023 	cap = dinfo->cfg.pcie.pcie_location;
2024 	if (cap == 0)
2025 		return (0);
2026 	if (size < 128)
2027 		size = 128;
2028 	if (size > 4096)
2029 		size = 4096;
2030 	size = (1 << (fls(size) - 1));
2031 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2032 	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
2033 	val |= (fls(size) - 8) << 12;
2034 	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
2035 	return (size);
2036 }
2037 
2038 uint32_t
2039 pcie_read_config(device_t dev, int reg, int width)
2040 {
2041 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2042 	int cap;
2043 
2044 	cap = dinfo->cfg.pcie.pcie_location;
2045 	if (cap == 0) {
2046 		if (width == 2)
2047 			return (0xffff);
2048 		return (0xffffffff);
2049 	}
2050 
2051 	return (pci_read_config(dev, cap + reg, width));
2052 }
2053 
2054 void
2055 pcie_write_config(device_t dev, int reg, uint32_t value, int width)
2056 {
2057 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2058 	int cap;
2059 
2060 	cap = dinfo->cfg.pcie.pcie_location;
2061 	if (cap == 0)
2062 		return;
2063 	pci_write_config(dev, cap + reg, value, width);
2064 }
2065 
2066 /*
2067  * Adjusts a PCI-e capability register by clearing the bits in mask
2068  * and setting the bits in (value & mask).  Bits not set in mask are
2069  * not adjusted.
2070  *
2071  * Returns the old value on success or all ones on failure.
2072  */
2073 uint32_t
2074 pcie_adjust_config(device_t dev, int reg, uint32_t mask, uint32_t value,
2075     int width)
2076 {
2077 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2078 	uint32_t old, new;
2079 	int cap;
2080 
2081 	cap = dinfo->cfg.pcie.pcie_location;
2082 	if (cap == 0) {
2083 		if (width == 2)
2084 			return (0xffff);
2085 		return (0xffffffff);
2086 	}
2087 
2088 	old = pci_read_config(dev, cap + reg, width);
2089 	new = old & ~mask;
2090 	new |= (value & mask);
2091 	pci_write_config(dev, cap + reg, new, width);
2092 	return (old);
2093 }
2094 
2095 /*
2096  * Support for MSI message signalled interrupts.
2097  */
2098 void
2099 pci_enable_msi_method(device_t dev, device_t child, uint64_t address,
2100     uint16_t data)
2101 {
2102 	struct pci_devinfo *dinfo = device_get_ivars(child);
2103 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2104 
2105 	/* Write data and address values. */
2106 	pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR,
2107 	    address & 0xffffffff, 4);
2108 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2109 		pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR_HIGH,
2110 		    address >> 32, 4);
2111 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA_64BIT,
2112 		    data, 2);
2113 	} else
2114 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA, data,
2115 		    2);
2116 
2117 	/* Enable MSI in the control register. */
2118 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
2119 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2120 	    msi->msi_ctrl, 2);
2121 
2122 	/* Enable MSI -> HT mapping. */
2123 	pci_ht_map_msi(child, address);
2124 }
2125 
2126 void
2127 pci_disable_msi_method(device_t dev, device_t child)
2128 {
2129 	struct pci_devinfo *dinfo = device_get_ivars(child);
2130 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2131 
2132 	/* Disable MSI -> HT mapping. */
2133 	pci_ht_map_msi(child, 0);
2134 
2135 	/* Disable MSI in the control register. */
2136 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
2137 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2138 	    msi->msi_ctrl, 2);
2139 }
2140 
2141 /*
2142  * Restore MSI registers during resume.  If MSI is enabled then
2143  * restore the data and address registers in addition to the control
2144  * register.
2145  */
2146 static void
2147 pci_resume_msi(device_t dev)
2148 {
2149 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2150 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2151 	uint64_t address;
2152 	uint16_t data;
2153 
2154 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
2155 		address = msi->msi_addr;
2156 		data = msi->msi_data;
2157 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
2158 		    address & 0xffffffff, 4);
2159 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2160 			pci_write_config(dev, msi->msi_location +
2161 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
2162 			pci_write_config(dev, msi->msi_location +
2163 			    PCIR_MSI_DATA_64BIT, data, 2);
2164 		} else
2165 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
2166 			    data, 2);
2167 	}
2168 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
2169 	    2);
2170 }
2171 
2172 static int
2173 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
2174 {
2175 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2176 	pcicfgregs *cfg = &dinfo->cfg;
2177 	struct resource_list_entry *rle;
2178 	struct msix_table_entry *mte;
2179 	struct msix_vector *mv;
2180 	uint64_t addr;
2181 	uint32_t data;
2182 	int error, i, j;
2183 
2184 	/*
2185 	 * Handle MSI first.  We try to find this IRQ among our list
2186 	 * of MSI IRQs.  If we find it, we request updated address and
2187 	 * data registers and apply the results.
2188 	 */
2189 	if (cfg->msi.msi_alloc > 0) {
2190 
2191 		/* If we don't have any active handlers, nothing to do. */
2192 		if (cfg->msi.msi_handlers == 0)
2193 			return (0);
2194 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
2195 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
2196 			    i + 1);
2197 			if (rle->start == irq) {
2198 				error = PCIB_MAP_MSI(device_get_parent(bus),
2199 				    dev, irq, &addr, &data);
2200 				if (error)
2201 					return (error);
2202 				pci_disable_msi(dev);
2203 				dinfo->cfg.msi.msi_addr = addr;
2204 				dinfo->cfg.msi.msi_data = data;
2205 				pci_enable_msi(dev, addr, data);
2206 				return (0);
2207 			}
2208 		}
2209 		return (ENOENT);
2210 	}
2211 
2212 	/*
2213 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
2214 	 * we request the updated mapping info.  If that works, we go
2215 	 * through all the slots that use this IRQ and update them.
2216 	 */
2217 	if (cfg->msix.msix_alloc > 0) {
2218 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
2219 			mv = &cfg->msix.msix_vectors[i];
2220 			if (mv->mv_irq == irq) {
2221 				error = PCIB_MAP_MSI(device_get_parent(bus),
2222 				    dev, irq, &addr, &data);
2223 				if (error)
2224 					return (error);
2225 				mv->mv_address = addr;
2226 				mv->mv_data = data;
2227 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
2228 					mte = &cfg->msix.msix_table[j];
2229 					if (mte->mte_vector != i + 1)
2230 						continue;
2231 					if (mte->mte_handlers == 0)
2232 						continue;
2233 					pci_mask_msix(dev, j);
2234 					pci_enable_msix(dev, j, addr, data);
2235 					pci_unmask_msix(dev, j);
2236 				}
2237 			}
2238 		}
2239 		return (ENOENT);
2240 	}
2241 
2242 	return (ENOENT);
2243 }
2244 
2245 /*
2246  * Returns true if the specified device is blacklisted because MSI
2247  * doesn't work.
2248  */
2249 int
2250 pci_msi_device_blacklisted(device_t dev)
2251 {
2252 
2253 	if (!pci_honor_msi_blacklist)
2254 		return (0);
2255 
2256 	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2257 }
2258 
2259 /*
2260  * Determine if MSI is blacklisted globally on this system.  Currently,
2261  * we just check for blacklisted chipsets as represented by the
2262  * host-PCI bridge at device 0:0:0.  In the future, it may become
2263  * necessary to check other system attributes, such as the kenv values
2264  * that give the motherboard manufacturer and model number.
2265  */
2266 static int
2267 pci_msi_blacklisted(void)
2268 {
2269 	device_t dev;
2270 
2271 	if (!pci_honor_msi_blacklist)
2272 		return (0);
2273 
2274 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2275 	if (!(pcie_chipset || pcix_chipset)) {
2276 		if (vm_guest != VM_GUEST_NO) {
2277 			/*
2278 			 * Whitelist older chipsets in virtual
2279 			 * machines known to support MSI.
2280 			 */
2281 			dev = pci_find_bsf(0, 0, 0);
2282 			if (dev != NULL)
2283 				return (!pci_has_quirk(pci_get_devid(dev),
2284 					PCI_QUIRK_ENABLE_MSI_VM));
2285 		}
2286 		return (1);
2287 	}
2288 
2289 	dev = pci_find_bsf(0, 0, 0);
2290 	if (dev != NULL)
2291 		return (pci_msi_device_blacklisted(dev));
2292 	return (0);
2293 }
2294 
2295 /*
2296  * Returns true if the specified device is blacklisted because MSI-X
2297  * doesn't work.  Note that this assumes that if MSI doesn't work,
2298  * MSI-X doesn't either.
2299  */
2300 int
2301 pci_msix_device_blacklisted(device_t dev)
2302 {
2303 
2304 	if (!pci_honor_msi_blacklist)
2305 		return (0);
2306 
2307 	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2308 		return (1);
2309 
2310 	return (pci_msi_device_blacklisted(dev));
2311 }
2312 
2313 /*
2314  * Determine if MSI-X is blacklisted globally on this system.  If MSI
2315  * is blacklisted, assume that MSI-X is as well.  Check for additional
2316  * chipsets where MSI works but MSI-X does not.
2317  */
2318 static int
2319 pci_msix_blacklisted(void)
2320 {
2321 	device_t dev;
2322 
2323 	if (!pci_honor_msi_blacklist)
2324 		return (0);
2325 
2326 	dev = pci_find_bsf(0, 0, 0);
2327 	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2328 	    PCI_QUIRK_DISABLE_MSIX))
2329 		return (1);
2330 
2331 	return (pci_msi_blacklisted());
2332 }
2333 
2334 /*
2335  * Attempt to allocate *count MSI messages.  The actual number allocated is
2336  * returned in *count.  After this function returns, each message will be
2337  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2338  */
2339 int
2340 pci_alloc_msi_method(device_t dev, device_t child, int *count)
2341 {
2342 	struct pci_devinfo *dinfo = device_get_ivars(child);
2343 	pcicfgregs *cfg = &dinfo->cfg;
2344 	struct resource_list_entry *rle;
2345 	int actual, error, i, irqs[32];
2346 	uint16_t ctrl;
2347 
2348 	/* Don't let count == 0 get us into trouble. */
2349 	if (*count == 0)
2350 		return (EINVAL);
2351 
2352 	/* If rid 0 is allocated, then fail. */
2353 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2354 	if (rle != NULL && rle->res != NULL)
2355 		return (ENXIO);
2356 
2357 	/* Already have allocated messages? */
2358 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2359 		return (ENXIO);
2360 
2361 	/* If MSI is blacklisted for this system, fail. */
2362 	if (pci_msi_blacklisted())
2363 		return (ENXIO);
2364 
2365 	/* MSI capability present? */
2366 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2367 		return (ENODEV);
2368 
2369 	if (bootverbose)
2370 		device_printf(child,
2371 		    "attempting to allocate %d MSI vectors (%d supported)\n",
2372 		    *count, cfg->msi.msi_msgnum);
2373 
2374 	/* Don't ask for more than the device supports. */
2375 	actual = min(*count, cfg->msi.msi_msgnum);
2376 
2377 	/* Don't ask for more than 32 messages. */
2378 	actual = min(actual, 32);
2379 
2380 	/* MSI requires power of 2 number of messages. */
2381 	if (!powerof2(actual))
2382 		return (EINVAL);
2383 
2384 	for (;;) {
2385 		/* Try to allocate N messages. */
2386 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2387 		    actual, irqs);
2388 		if (error == 0)
2389 			break;
2390 		if (actual == 1)
2391 			return (error);
2392 
2393 		/* Try N / 2. */
2394 		actual >>= 1;
2395 	}
2396 
2397 	/*
2398 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2399 	 * resources in the irqs[] array, so add new resources
2400 	 * starting at rid 1.
2401 	 */
2402 	for (i = 0; i < actual; i++)
2403 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2404 		    irqs[i], irqs[i], 1);
2405 
2406 	if (bootverbose) {
2407 		if (actual == 1)
2408 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2409 		else {
2410 			int run;
2411 
2412 			/*
2413 			 * Be fancy and try to print contiguous runs
2414 			 * of IRQ values as ranges.  'run' is true if
2415 			 * we are in a range.
2416 			 */
2417 			device_printf(child, "using IRQs %d", irqs[0]);
2418 			run = 0;
2419 			for (i = 1; i < actual; i++) {
2420 
2421 				/* Still in a run? */
2422 				if (irqs[i] == irqs[i - 1] + 1) {
2423 					run = 1;
2424 					continue;
2425 				}
2426 
2427 				/* Finish previous range. */
2428 				if (run) {
2429 					printf("-%d", irqs[i - 1]);
2430 					run = 0;
2431 				}
2432 
2433 				/* Start new range. */
2434 				printf(",%d", irqs[i]);
2435 			}
2436 
2437 			/* Unfinished range? */
2438 			if (run)
2439 				printf("-%d", irqs[actual - 1]);
2440 			printf(" for MSI\n");
2441 		}
2442 	}
2443 
2444 	/* Update control register with actual count. */
2445 	ctrl = cfg->msi.msi_ctrl;
2446 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2447 	ctrl |= (ffs(actual) - 1) << 4;
2448 	cfg->msi.msi_ctrl = ctrl;
2449 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2450 
2451 	/* Update counts of alloc'd messages. */
2452 	cfg->msi.msi_alloc = actual;
2453 	cfg->msi.msi_handlers = 0;
2454 	*count = actual;
2455 	return (0);
2456 }
2457 
2458 /* Release the MSI messages associated with this device. */
2459 int
2460 pci_release_msi_method(device_t dev, device_t child)
2461 {
2462 	struct pci_devinfo *dinfo = device_get_ivars(child);
2463 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2464 	struct resource_list_entry *rle;
2465 	int error, i, irqs[32];
2466 
2467 	/* Try MSI-X first. */
2468 	error = pci_release_msix(dev, child);
2469 	if (error != ENODEV)
2470 		return (error);
2471 
2472 	/* Do we have any messages to release? */
2473 	if (msi->msi_alloc == 0)
2474 		return (ENODEV);
2475 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2476 
2477 	/* Make sure none of the resources are allocated. */
2478 	if (msi->msi_handlers > 0)
2479 		return (EBUSY);
2480 	for (i = 0; i < msi->msi_alloc; i++) {
2481 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2482 		KASSERT(rle != NULL, ("missing MSI resource"));
2483 		if (rle->res != NULL)
2484 			return (EBUSY);
2485 		irqs[i] = rle->start;
2486 	}
2487 
2488 	/* Update control register with 0 count. */
2489 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2490 	    ("%s: MSI still enabled", __func__));
2491 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2492 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2493 	    msi->msi_ctrl, 2);
2494 
2495 	/* Release the messages. */
2496 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2497 	for (i = 0; i < msi->msi_alloc; i++)
2498 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2499 
2500 	/* Update alloc count. */
2501 	msi->msi_alloc = 0;
2502 	msi->msi_addr = 0;
2503 	msi->msi_data = 0;
2504 	return (0);
2505 }
2506 
2507 /*
2508  * Return the max supported MSI messages this device supports.
2509  * Basically, assuming the MD code can alloc messages, this function
2510  * should return the maximum value that pci_alloc_msi() can return.
2511  * Thus, it is subject to the tunables, etc.
2512  */
2513 int
2514 pci_msi_count_method(device_t dev, device_t child)
2515 {
2516 	struct pci_devinfo *dinfo = device_get_ivars(child);
2517 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2518 
2519 	if (pci_do_msi && msi->msi_location != 0)
2520 		return (msi->msi_msgnum);
2521 	return (0);
2522 }
2523 
2524 /* free pcicfgregs structure and all depending data structures */
2525 
2526 int
2527 pci_freecfg(struct pci_devinfo *dinfo)
2528 {
2529 	struct devlist *devlist_head;
2530 	struct pci_map *pm, *next;
2531 	int i;
2532 
2533 	devlist_head = &pci_devq;
2534 
2535 	if (dinfo->cfg.vpd.vpd_reg) {
2536 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2537 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2538 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2539 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2540 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2541 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2542 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2543 	}
2544 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2545 		free(pm, M_DEVBUF);
2546 	}
2547 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2548 	free(dinfo, M_DEVBUF);
2549 
2550 	/* increment the generation count */
2551 	pci_generation++;
2552 
2553 	/* we're losing one device */
2554 	pci_numdevs--;
2555 	return (0);
2556 }
2557 
2558 /*
2559  * PCI power manangement
2560  */
2561 int
2562 pci_set_powerstate_method(device_t dev, device_t child, int state)
2563 {
2564 	struct pci_devinfo *dinfo = device_get_ivars(child);
2565 	pcicfgregs *cfg = &dinfo->cfg;
2566 	uint16_t status;
2567 	int oldstate, highest, delay;
2568 
2569 	if (cfg->pp.pp_cap == 0)
2570 		return (EOPNOTSUPP);
2571 
2572 	/*
2573 	 * Optimize a no state change request away.  While it would be OK to
2574 	 * write to the hardware in theory, some devices have shown odd
2575 	 * behavior when going from D3 -> D3.
2576 	 */
2577 	oldstate = pci_get_powerstate(child);
2578 	if (oldstate == state)
2579 		return (0);
2580 
2581 	/*
2582 	 * The PCI power management specification states that after a state
2583 	 * transition between PCI power states, system software must
2584 	 * guarantee a minimal delay before the function accesses the device.
2585 	 * Compute the worst case delay that we need to guarantee before we
2586 	 * access the device.  Many devices will be responsive much more
2587 	 * quickly than this delay, but there are some that don't respond
2588 	 * instantly to state changes.  Transitions to/from D3 state require
2589 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2590 	 * is done below with DELAY rather than a sleeper function because
2591 	 * this function can be called from contexts where we cannot sleep.
2592 	 */
2593 	highest = (oldstate > state) ? oldstate : state;
2594 	if (highest == PCI_POWERSTATE_D3)
2595 	    delay = 10000;
2596 	else if (highest == PCI_POWERSTATE_D2)
2597 	    delay = 200;
2598 	else
2599 	    delay = 0;
2600 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2601 	    & ~PCIM_PSTAT_DMASK;
2602 	switch (state) {
2603 	case PCI_POWERSTATE_D0:
2604 		status |= PCIM_PSTAT_D0;
2605 		break;
2606 	case PCI_POWERSTATE_D1:
2607 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2608 			return (EOPNOTSUPP);
2609 		status |= PCIM_PSTAT_D1;
2610 		break;
2611 	case PCI_POWERSTATE_D2:
2612 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2613 			return (EOPNOTSUPP);
2614 		status |= PCIM_PSTAT_D2;
2615 		break;
2616 	case PCI_POWERSTATE_D3:
2617 		status |= PCIM_PSTAT_D3;
2618 		break;
2619 	default:
2620 		return (EINVAL);
2621 	}
2622 
2623 	if (bootverbose)
2624 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2625 		    state);
2626 
2627 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2628 	if (delay)
2629 		DELAY(delay);
2630 	return (0);
2631 }
2632 
2633 int
2634 pci_get_powerstate_method(device_t dev, device_t child)
2635 {
2636 	struct pci_devinfo *dinfo = device_get_ivars(child);
2637 	pcicfgregs *cfg = &dinfo->cfg;
2638 	uint16_t status;
2639 	int result;
2640 
2641 	if (cfg->pp.pp_cap != 0) {
2642 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2643 		switch (status & PCIM_PSTAT_DMASK) {
2644 		case PCIM_PSTAT_D0:
2645 			result = PCI_POWERSTATE_D0;
2646 			break;
2647 		case PCIM_PSTAT_D1:
2648 			result = PCI_POWERSTATE_D1;
2649 			break;
2650 		case PCIM_PSTAT_D2:
2651 			result = PCI_POWERSTATE_D2;
2652 			break;
2653 		case PCIM_PSTAT_D3:
2654 			result = PCI_POWERSTATE_D3;
2655 			break;
2656 		default:
2657 			result = PCI_POWERSTATE_UNKNOWN;
2658 			break;
2659 		}
2660 	} else {
2661 		/* No support, device is always at D0 */
2662 		result = PCI_POWERSTATE_D0;
2663 	}
2664 	return (result);
2665 }
2666 
2667 /*
2668  * Some convenience functions for PCI device drivers.
2669  */
2670 
2671 static __inline void
2672 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2673 {
2674 	uint16_t	command;
2675 
2676 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2677 	command |= bit;
2678 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2679 }
2680 
2681 static __inline void
2682 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2683 {
2684 	uint16_t	command;
2685 
2686 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2687 	command &= ~bit;
2688 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2689 }
2690 
2691 int
2692 pci_enable_busmaster_method(device_t dev, device_t child)
2693 {
2694 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2695 	return (0);
2696 }
2697 
2698 int
2699 pci_disable_busmaster_method(device_t dev, device_t child)
2700 {
2701 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2702 	return (0);
2703 }
2704 
2705 int
2706 pci_enable_io_method(device_t dev, device_t child, int space)
2707 {
2708 	uint16_t bit;
2709 
2710 	switch(space) {
2711 	case SYS_RES_IOPORT:
2712 		bit = PCIM_CMD_PORTEN;
2713 		break;
2714 	case SYS_RES_MEMORY:
2715 		bit = PCIM_CMD_MEMEN;
2716 		break;
2717 	default:
2718 		return (EINVAL);
2719 	}
2720 	pci_set_command_bit(dev, child, bit);
2721 	return (0);
2722 }
2723 
2724 int
2725 pci_disable_io_method(device_t dev, device_t child, int space)
2726 {
2727 	uint16_t bit;
2728 
2729 	switch(space) {
2730 	case SYS_RES_IOPORT:
2731 		bit = PCIM_CMD_PORTEN;
2732 		break;
2733 	case SYS_RES_MEMORY:
2734 		bit = PCIM_CMD_MEMEN;
2735 		break;
2736 	default:
2737 		return (EINVAL);
2738 	}
2739 	pci_clear_command_bit(dev, child, bit);
2740 	return (0);
2741 }
2742 
2743 /*
2744  * New style pci driver.  Parent device is either a pci-host-bridge or a
2745  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2746  */
2747 
2748 void
2749 pci_print_verbose(struct pci_devinfo *dinfo)
2750 {
2751 
2752 	if (bootverbose) {
2753 		pcicfgregs *cfg = &dinfo->cfg;
2754 
2755 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2756 		    cfg->vendor, cfg->device, cfg->revid);
2757 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2758 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2759 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2760 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2761 		    cfg->mfdev);
2762 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2763 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2764 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2765 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2766 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2767 		if (cfg->intpin > 0)
2768 			printf("\tintpin=%c, irq=%d\n",
2769 			    cfg->intpin +'a' -1, cfg->intline);
2770 		if (cfg->pp.pp_cap) {
2771 			uint16_t status;
2772 
2773 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2774 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2775 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2776 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2777 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2778 			    status & PCIM_PSTAT_DMASK);
2779 		}
2780 		if (cfg->msi.msi_location) {
2781 			int ctrl;
2782 
2783 			ctrl = cfg->msi.msi_ctrl;
2784 			printf("\tMSI supports %d message%s%s%s\n",
2785 			    cfg->msi.msi_msgnum,
2786 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2787 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2788 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2789 		}
2790 		if (cfg->msix.msix_location) {
2791 			printf("\tMSI-X supports %d message%s ",
2792 			    cfg->msix.msix_msgnum,
2793 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2794 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2795 				printf("in map 0x%x\n",
2796 				    cfg->msix.msix_table_bar);
2797 			else
2798 				printf("in maps 0x%x and 0x%x\n",
2799 				    cfg->msix.msix_table_bar,
2800 				    cfg->msix.msix_pba_bar);
2801 		}
2802 	}
2803 }
2804 
2805 static int
2806 pci_porten(device_t dev)
2807 {
2808 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2809 }
2810 
2811 static int
2812 pci_memen(device_t dev)
2813 {
2814 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2815 }
2816 
2817 void
2818 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp,
2819     int *bar64)
2820 {
2821 	struct pci_devinfo *dinfo;
2822 	pci_addr_t map, testval;
2823 	int ln2range;
2824 	uint16_t cmd;
2825 
2826 	/*
2827 	 * The device ROM BAR is special.  It is always a 32-bit
2828 	 * memory BAR.  Bit 0 is special and should not be set when
2829 	 * sizing the BAR.
2830 	 */
2831 	dinfo = device_get_ivars(dev);
2832 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2833 		map = pci_read_config(dev, reg, 4);
2834 		pci_write_config(dev, reg, 0xfffffffe, 4);
2835 		testval = pci_read_config(dev, reg, 4);
2836 		pci_write_config(dev, reg, map, 4);
2837 		*mapp = map;
2838 		*testvalp = testval;
2839 		if (bar64 != NULL)
2840 			*bar64 = 0;
2841 		return;
2842 	}
2843 
2844 	map = pci_read_config(dev, reg, 4);
2845 	ln2range = pci_maprange(map);
2846 	if (ln2range == 64)
2847 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2848 
2849 	/*
2850 	 * Disable decoding via the command register before
2851 	 * determining the BAR's length since we will be placing it in
2852 	 * a weird state.
2853 	 */
2854 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2855 	pci_write_config(dev, PCIR_COMMAND,
2856 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2857 
2858 	/*
2859 	 * Determine the BAR's length by writing all 1's.  The bottom
2860 	 * log_2(size) bits of the BAR will stick as 0 when we read
2861 	 * the value back.
2862 	 */
2863 	pci_write_config(dev, reg, 0xffffffff, 4);
2864 	testval = pci_read_config(dev, reg, 4);
2865 	if (ln2range == 64) {
2866 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2867 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2868 	}
2869 
2870 	/*
2871 	 * Restore the original value of the BAR.  We may have reprogrammed
2872 	 * the BAR of the low-level console device and when booting verbose,
2873 	 * we need the console device addressable.
2874 	 */
2875 	pci_write_config(dev, reg, map, 4);
2876 	if (ln2range == 64)
2877 		pci_write_config(dev, reg + 4, map >> 32, 4);
2878 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2879 
2880 	*mapp = map;
2881 	*testvalp = testval;
2882 	if (bar64 != NULL)
2883 		*bar64 = (ln2range == 64);
2884 }
2885 
2886 static void
2887 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2888 {
2889 	struct pci_devinfo *dinfo;
2890 	int ln2range;
2891 
2892 	/* The device ROM BAR is always a 32-bit memory BAR. */
2893 	dinfo = device_get_ivars(dev);
2894 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2895 		ln2range = 32;
2896 	else
2897 		ln2range = pci_maprange(pm->pm_value);
2898 	pci_write_config(dev, pm->pm_reg, base, 4);
2899 	if (ln2range == 64)
2900 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2901 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2902 	if (ln2range == 64)
2903 		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2904 		    pm->pm_reg + 4, 4) << 32;
2905 }
2906 
2907 struct pci_map *
2908 pci_find_bar(device_t dev, int reg)
2909 {
2910 	struct pci_devinfo *dinfo;
2911 	struct pci_map *pm;
2912 
2913 	dinfo = device_get_ivars(dev);
2914 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2915 		if (pm->pm_reg == reg)
2916 			return (pm);
2917 	}
2918 	return (NULL);
2919 }
2920 
2921 int
2922 pci_bar_enabled(device_t dev, struct pci_map *pm)
2923 {
2924 	struct pci_devinfo *dinfo;
2925 	uint16_t cmd;
2926 
2927 	dinfo = device_get_ivars(dev);
2928 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2929 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2930 		return (0);
2931 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2932 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2933 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2934 	else
2935 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2936 }
2937 
2938 struct pci_map *
2939 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2940 {
2941 	struct pci_devinfo *dinfo;
2942 	struct pci_map *pm, *prev;
2943 
2944 	dinfo = device_get_ivars(dev);
2945 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2946 	pm->pm_reg = reg;
2947 	pm->pm_value = value;
2948 	pm->pm_size = size;
2949 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2950 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2951 		    reg));
2952 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2953 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2954 			break;
2955 	}
2956 	if (prev != NULL)
2957 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2958 	else
2959 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2960 	return (pm);
2961 }
2962 
2963 static void
2964 pci_restore_bars(device_t dev)
2965 {
2966 	struct pci_devinfo *dinfo;
2967 	struct pci_map *pm;
2968 	int ln2range;
2969 
2970 	dinfo = device_get_ivars(dev);
2971 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2972 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2973 			ln2range = 32;
2974 		else
2975 			ln2range = pci_maprange(pm->pm_value);
2976 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2977 		if (ln2range == 64)
2978 			pci_write_config(dev, pm->pm_reg + 4,
2979 			    pm->pm_value >> 32, 4);
2980 	}
2981 }
2982 
2983 /*
2984  * Add a resource based on a pci map register. Return 1 if the map
2985  * register is a 32bit map register or 2 if it is a 64bit register.
2986  */
2987 static int
2988 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2989     int force, int prefetch)
2990 {
2991 	struct pci_map *pm;
2992 	pci_addr_t base, map, testval;
2993 	pci_addr_t start, end, count;
2994 	int barlen, basezero, flags, maprange, mapsize, type;
2995 	uint16_t cmd;
2996 	struct resource *res;
2997 
2998 	/*
2999 	 * The BAR may already exist if the device is a CardBus card
3000 	 * whose CIS is stored in this BAR.
3001 	 */
3002 	pm = pci_find_bar(dev, reg);
3003 	if (pm != NULL) {
3004 		maprange = pci_maprange(pm->pm_value);
3005 		barlen = maprange == 64 ? 2 : 1;
3006 		return (barlen);
3007 	}
3008 
3009 	pci_read_bar(dev, reg, &map, &testval, NULL);
3010 	if (PCI_BAR_MEM(map)) {
3011 		type = SYS_RES_MEMORY;
3012 		if (map & PCIM_BAR_MEM_PREFETCH)
3013 			prefetch = 1;
3014 	} else
3015 		type = SYS_RES_IOPORT;
3016 	mapsize = pci_mapsize(testval);
3017 	base = pci_mapbase(map);
3018 #ifdef __PCI_BAR_ZERO_VALID
3019 	basezero = 0;
3020 #else
3021 	basezero = base == 0;
3022 #endif
3023 	maprange = pci_maprange(map);
3024 	barlen = maprange == 64 ? 2 : 1;
3025 
3026 	/*
3027 	 * For I/O registers, if bottom bit is set, and the next bit up
3028 	 * isn't clear, we know we have a BAR that doesn't conform to the
3029 	 * spec, so ignore it.  Also, sanity check the size of the data
3030 	 * areas to the type of memory involved.  Memory must be at least
3031 	 * 16 bytes in size, while I/O ranges must be at least 4.
3032 	 */
3033 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
3034 		return (barlen);
3035 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
3036 	    (type == SYS_RES_IOPORT && mapsize < 2))
3037 		return (barlen);
3038 
3039 	/* Save a record of this BAR. */
3040 	pm = pci_add_bar(dev, reg, map, mapsize);
3041 	if (bootverbose) {
3042 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
3043 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
3044 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
3045 			printf(", port disabled\n");
3046 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
3047 			printf(", memory disabled\n");
3048 		else
3049 			printf(", enabled\n");
3050 	}
3051 
3052 	/*
3053 	 * If base is 0, then we have problems if this architecture does
3054 	 * not allow that.  It is best to ignore such entries for the
3055 	 * moment.  These will be allocated later if the driver specifically
3056 	 * requests them.  However, some removable busses look better when
3057 	 * all resources are allocated, so allow '0' to be overriden.
3058 	 *
3059 	 * Similarly treat maps whose values is the same as the test value
3060 	 * read back.  These maps have had all f's written to them by the
3061 	 * BIOS in an attempt to disable the resources.
3062 	 */
3063 	if (!force && (basezero || map == testval))
3064 		return (barlen);
3065 	if ((u_long)base != base) {
3066 		device_printf(bus,
3067 		    "pci%d:%d:%d:%d bar %#x too many address bits",
3068 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
3069 		    pci_get_function(dev), reg);
3070 		return (barlen);
3071 	}
3072 
3073 	/*
3074 	 * This code theoretically does the right thing, but has
3075 	 * undesirable side effects in some cases where peripherals
3076 	 * respond oddly to having these bits enabled.  Let the user
3077 	 * be able to turn them off (since pci_enable_io_modes is 1 by
3078 	 * default).
3079 	 */
3080 	if (pci_enable_io_modes) {
3081 		/* Turn on resources that have been left off by a lazy BIOS */
3082 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
3083 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3084 			cmd |= PCIM_CMD_PORTEN;
3085 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3086 		}
3087 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
3088 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3089 			cmd |= PCIM_CMD_MEMEN;
3090 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3091 		}
3092 	} else {
3093 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
3094 			return (barlen);
3095 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
3096 			return (barlen);
3097 	}
3098 
3099 	count = (pci_addr_t)1 << mapsize;
3100 	flags = RF_ALIGNMENT_LOG2(mapsize);
3101 	if (prefetch)
3102 		flags |= RF_PREFETCHABLE;
3103 	if (basezero || base == pci_mapbase(testval) || pci_clear_bars) {
3104 		start = 0;	/* Let the parent decide. */
3105 		end = ~0;
3106 	} else {
3107 		start = base;
3108 		end = base + count - 1;
3109 	}
3110 	resource_list_add(rl, type, reg, start, end, count);
3111 
3112 	/*
3113 	 * Try to allocate the resource for this BAR from our parent
3114 	 * so that this resource range is already reserved.  The
3115 	 * driver for this device will later inherit this resource in
3116 	 * pci_alloc_resource().
3117 	 */
3118 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
3119 	    flags);
3120 	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0)) {
3121 		/*
3122 		 * If the allocation fails, try to allocate a resource for
3123 		 * this BAR using any available range.  The firmware felt
3124 		 * it was important enough to assign a resource, so don't
3125 		 * disable decoding if we can help it.
3126 		 */
3127 		resource_list_delete(rl, type, reg);
3128 		resource_list_add(rl, type, reg, 0, ~0, count);
3129 		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0,
3130 		    count, flags);
3131 	}
3132 	if (res == NULL) {
3133 		/*
3134 		 * If the allocation fails, delete the resource list entry
3135 		 * and disable decoding for this device.
3136 		 *
3137 		 * If the driver requests this resource in the future,
3138 		 * pci_reserve_map() will try to allocate a fresh
3139 		 * resource range.
3140 		 */
3141 		resource_list_delete(rl, type, reg);
3142 		pci_disable_io(dev, type);
3143 		if (bootverbose)
3144 			device_printf(bus,
3145 			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
3146 			    pci_get_domain(dev), pci_get_bus(dev),
3147 			    pci_get_slot(dev), pci_get_function(dev), reg);
3148 	} else {
3149 		start = rman_get_start(res);
3150 		pci_write_bar(dev, pm, start);
3151 	}
3152 	return (barlen);
3153 }
3154 
3155 /*
3156  * For ATA devices we need to decide early what addressing mode to use.
3157  * Legacy demands that the primary and secondary ATA ports sits on the
3158  * same addresses that old ISA hardware did. This dictates that we use
3159  * those addresses and ignore the BAR's if we cannot set PCI native
3160  * addressing mode.
3161  */
3162 static void
3163 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
3164     uint32_t prefetchmask)
3165 {
3166 	int rid, type, progif;
3167 #if 0
3168 	/* if this device supports PCI native addressing use it */
3169 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3170 	if ((progif & 0x8a) == 0x8a) {
3171 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
3172 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
3173 			printf("Trying ATA native PCI addressing mode\n");
3174 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
3175 		}
3176 	}
3177 #endif
3178 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3179 	type = SYS_RES_IOPORT;
3180 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
3181 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
3182 		    prefetchmask & (1 << 0));
3183 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
3184 		    prefetchmask & (1 << 1));
3185 	} else {
3186 		rid = PCIR_BAR(0);
3187 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
3188 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
3189 		    0x1f7, 8, 0);
3190 		rid = PCIR_BAR(1);
3191 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
3192 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
3193 		    0x3f6, 1, 0);
3194 	}
3195 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
3196 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
3197 		    prefetchmask & (1 << 2));
3198 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
3199 		    prefetchmask & (1 << 3));
3200 	} else {
3201 		rid = PCIR_BAR(2);
3202 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
3203 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
3204 		    0x177, 8, 0);
3205 		rid = PCIR_BAR(3);
3206 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
3207 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
3208 		    0x376, 1, 0);
3209 	}
3210 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
3211 	    prefetchmask & (1 << 4));
3212 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
3213 	    prefetchmask & (1 << 5));
3214 }
3215 
3216 static void
3217 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
3218 {
3219 	struct pci_devinfo *dinfo = device_get_ivars(dev);
3220 	pcicfgregs *cfg = &dinfo->cfg;
3221 	char tunable_name[64];
3222 	int irq;
3223 
3224 	/* Has to have an intpin to have an interrupt. */
3225 	if (cfg->intpin == 0)
3226 		return;
3227 
3228 	/* Let the user override the IRQ with a tunable. */
3229 	irq = PCI_INVALID_IRQ;
3230 	snprintf(tunable_name, sizeof(tunable_name),
3231 	    "hw.pci%d.%d.%d.INT%c.irq",
3232 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
3233 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
3234 		irq = PCI_INVALID_IRQ;
3235 
3236 	/*
3237 	 * If we didn't get an IRQ via the tunable, then we either use the
3238 	 * IRQ value in the intline register or we ask the bus to route an
3239 	 * interrupt for us.  If force_route is true, then we only use the
3240 	 * value in the intline register if the bus was unable to assign an
3241 	 * IRQ.
3242 	 */
3243 	if (!PCI_INTERRUPT_VALID(irq)) {
3244 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
3245 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
3246 		if (!PCI_INTERRUPT_VALID(irq))
3247 			irq = cfg->intline;
3248 	}
3249 
3250 	/* If after all that we don't have an IRQ, just bail. */
3251 	if (!PCI_INTERRUPT_VALID(irq))
3252 		return;
3253 
3254 	/* Update the config register if it changed. */
3255 	if (irq != cfg->intline) {
3256 		cfg->intline = irq;
3257 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
3258 	}
3259 
3260 	/* Add this IRQ as rid 0 interrupt resource. */
3261 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3262 }
3263 
3264 /* Perform early OHCI takeover from SMM. */
3265 static void
3266 ohci_early_takeover(device_t self)
3267 {
3268 	struct resource *res;
3269 	uint32_t ctl;
3270 	int rid;
3271 	int i;
3272 
3273 	rid = PCIR_BAR(0);
3274 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3275 	if (res == NULL)
3276 		return;
3277 
3278 	ctl = bus_read_4(res, OHCI_CONTROL);
3279 	if (ctl & OHCI_IR) {
3280 		if (bootverbose)
3281 			printf("ohci early: "
3282 			    "SMM active, request owner change\n");
3283 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3284 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3285 			DELAY(1000);
3286 			ctl = bus_read_4(res, OHCI_CONTROL);
3287 		}
3288 		if (ctl & OHCI_IR) {
3289 			if (bootverbose)
3290 				printf("ohci early: "
3291 				    "SMM does not respond, resetting\n");
3292 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3293 		}
3294 		/* Disable interrupts */
3295 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3296 	}
3297 
3298 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3299 }
3300 
3301 /* Perform early UHCI takeover from SMM. */
3302 static void
3303 uhci_early_takeover(device_t self)
3304 {
3305 	struct resource *res;
3306 	int rid;
3307 
3308 	/*
3309 	 * Set the PIRQD enable bit and switch off all the others. We don't
3310 	 * want legacy support to interfere with us XXX Does this also mean
3311 	 * that the BIOS won't touch the keyboard anymore if it is connected
3312 	 * to the ports of the root hub?
3313 	 */
3314 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3315 
3316 	/* Disable interrupts */
3317 	rid = PCI_UHCI_BASE_REG;
3318 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3319 	if (res != NULL) {
3320 		bus_write_2(res, UHCI_INTR, 0);
3321 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3322 	}
3323 }
3324 
3325 /* Perform early EHCI takeover from SMM. */
3326 static void
3327 ehci_early_takeover(device_t self)
3328 {
3329 	struct resource *res;
3330 	uint32_t cparams;
3331 	uint32_t eec;
3332 	uint8_t eecp;
3333 	uint8_t bios_sem;
3334 	uint8_t offs;
3335 	int rid;
3336 	int i;
3337 
3338 	rid = PCIR_BAR(0);
3339 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3340 	if (res == NULL)
3341 		return;
3342 
3343 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3344 
3345 	/* Synchronise with the BIOS if it owns the controller. */
3346 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3347 	    eecp = EHCI_EECP_NEXT(eec)) {
3348 		eec = pci_read_config(self, eecp, 4);
3349 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3350 			continue;
3351 		}
3352 		bios_sem = pci_read_config(self, eecp +
3353 		    EHCI_LEGSUP_BIOS_SEM, 1);
3354 		if (bios_sem == 0) {
3355 			continue;
3356 		}
3357 		if (bootverbose)
3358 			printf("ehci early: "
3359 			    "SMM active, request owner change\n");
3360 
3361 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3362 
3363 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3364 			DELAY(1000);
3365 			bios_sem = pci_read_config(self, eecp +
3366 			    EHCI_LEGSUP_BIOS_SEM, 1);
3367 		}
3368 
3369 		if (bios_sem != 0) {
3370 			if (bootverbose)
3371 				printf("ehci early: "
3372 				    "SMM does not respond\n");
3373 		}
3374 		/* Disable interrupts */
3375 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3376 		bus_write_4(res, offs + EHCI_USBINTR, 0);
3377 	}
3378 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3379 }
3380 
3381 /* Perform early XHCI takeover from SMM. */
3382 static void
3383 xhci_early_takeover(device_t self)
3384 {
3385 	struct resource *res;
3386 	uint32_t cparams;
3387 	uint32_t eec;
3388 	uint8_t eecp;
3389 	uint8_t bios_sem;
3390 	uint8_t offs;
3391 	int rid;
3392 	int i;
3393 
3394 	rid = PCIR_BAR(0);
3395 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3396 	if (res == NULL)
3397 		return;
3398 
3399 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3400 
3401 	eec = -1;
3402 
3403 	/* Synchronise with the BIOS if it owns the controller. */
3404 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3405 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3406 		eec = bus_read_4(res, eecp);
3407 
3408 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3409 			continue;
3410 
3411 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3412 		if (bios_sem == 0)
3413 			continue;
3414 
3415 		if (bootverbose)
3416 			printf("xhci early: "
3417 			    "SMM active, request owner change\n");
3418 
3419 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3420 
3421 		/* wait a maximum of 5 second */
3422 
3423 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3424 			DELAY(1000);
3425 			bios_sem = bus_read_1(res, eecp +
3426 			    XHCI_XECP_BIOS_SEM);
3427 		}
3428 
3429 		if (bios_sem != 0) {
3430 			if (bootverbose)
3431 				printf("xhci early: "
3432 				    "SMM does not respond\n");
3433 		}
3434 
3435 		/* Disable interrupts */
3436 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3437 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3438 		bus_read_4(res, offs + XHCI_USBSTS);
3439 	}
3440 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3441 }
3442 
3443 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3444 static void
3445 pci_reserve_secbus(device_t bus, device_t dev, pcicfgregs *cfg,
3446     struct resource_list *rl)
3447 {
3448 	struct resource *res;
3449 	char *cp;
3450 	rman_res_t start, end, count;
3451 	int rid, sec_bus, sec_reg, sub_bus, sub_reg, sup_bus;
3452 
3453 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3454 	case PCIM_HDRTYPE_BRIDGE:
3455 		sec_reg = PCIR_SECBUS_1;
3456 		sub_reg = PCIR_SUBBUS_1;
3457 		break;
3458 	case PCIM_HDRTYPE_CARDBUS:
3459 		sec_reg = PCIR_SECBUS_2;
3460 		sub_reg = PCIR_SUBBUS_2;
3461 		break;
3462 	default:
3463 		return;
3464 	}
3465 
3466 	/*
3467 	 * If the existing bus range is valid, attempt to reserve it
3468 	 * from our parent.  If this fails for any reason, clear the
3469 	 * secbus and subbus registers.
3470 	 *
3471 	 * XXX: Should we reset sub_bus to sec_bus if it is < sec_bus?
3472 	 * This would at least preserve the existing sec_bus if it is
3473 	 * valid.
3474 	 */
3475 	sec_bus = PCI_READ_CONFIG(bus, dev, sec_reg, 1);
3476 	sub_bus = PCI_READ_CONFIG(bus, dev, sub_reg, 1);
3477 
3478 	/* Quirk handling. */
3479 	switch (pci_get_devid(dev)) {
3480 	case 0x12258086:		/* Intel 82454KX/GX (Orion) */
3481 		sup_bus = pci_read_config(dev, 0x41, 1);
3482 		if (sup_bus != 0xff) {
3483 			sec_bus = sup_bus + 1;
3484 			sub_bus = sup_bus + 1;
3485 			PCI_WRITE_CONFIG(bus, dev, sec_reg, sec_bus, 1);
3486 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3487 		}
3488 		break;
3489 
3490 	case 0x00dd10de:
3491 		/* Compaq R3000 BIOS sets wrong subordinate bus number. */
3492 		if ((cp = kern_getenv("smbios.planar.maker")) == NULL)
3493 			break;
3494 		if (strncmp(cp, "Compal", 6) != 0) {
3495 			freeenv(cp);
3496 			break;
3497 		}
3498 		freeenv(cp);
3499 		if ((cp = kern_getenv("smbios.planar.product")) == NULL)
3500 			break;
3501 		if (strncmp(cp, "08A0", 4) != 0) {
3502 			freeenv(cp);
3503 			break;
3504 		}
3505 		freeenv(cp);
3506 		if (sub_bus < 0xa) {
3507 			sub_bus = 0xa;
3508 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3509 		}
3510 		break;
3511 	}
3512 
3513 	if (bootverbose)
3514 		printf("\tsecbus=%d, subbus=%d\n", sec_bus, sub_bus);
3515 	if (sec_bus > 0 && sub_bus >= sec_bus) {
3516 		start = sec_bus;
3517 		end = sub_bus;
3518 		count = end - start + 1;
3519 
3520 		resource_list_add(rl, PCI_RES_BUS, 0, 0, ~0, count);
3521 
3522 		/*
3523 		 * If requested, clear secondary bus registers in
3524 		 * bridge devices to force a complete renumbering
3525 		 * rather than reserving the existing range.  However,
3526 		 * preserve the existing size.
3527 		 */
3528 		if (pci_clear_buses)
3529 			goto clear;
3530 
3531 		rid = 0;
3532 		res = resource_list_reserve(rl, bus, dev, PCI_RES_BUS, &rid,
3533 		    start, end, count, 0);
3534 		if (res != NULL)
3535 			return;
3536 
3537 		if (bootverbose)
3538 			device_printf(bus,
3539 			    "pci%d:%d:%d:%d secbus failed to allocate\n",
3540 			    pci_get_domain(dev), pci_get_bus(dev),
3541 			    pci_get_slot(dev), pci_get_function(dev));
3542 	}
3543 
3544 clear:
3545 	PCI_WRITE_CONFIG(bus, dev, sec_reg, 0, 1);
3546 	PCI_WRITE_CONFIG(bus, dev, sub_reg, 0, 1);
3547 }
3548 
3549 static struct resource *
3550 pci_alloc_secbus(device_t dev, device_t child, int *rid, rman_res_t start,
3551     rman_res_t end, rman_res_t count, u_int flags)
3552 {
3553 	struct pci_devinfo *dinfo;
3554 	pcicfgregs *cfg;
3555 	struct resource_list *rl;
3556 	struct resource *res;
3557 	int sec_reg, sub_reg;
3558 
3559 	dinfo = device_get_ivars(child);
3560 	cfg = &dinfo->cfg;
3561 	rl = &dinfo->resources;
3562 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3563 	case PCIM_HDRTYPE_BRIDGE:
3564 		sec_reg = PCIR_SECBUS_1;
3565 		sub_reg = PCIR_SUBBUS_1;
3566 		break;
3567 	case PCIM_HDRTYPE_CARDBUS:
3568 		sec_reg = PCIR_SECBUS_2;
3569 		sub_reg = PCIR_SUBBUS_2;
3570 		break;
3571 	default:
3572 		return (NULL);
3573 	}
3574 
3575 	if (*rid != 0)
3576 		return (NULL);
3577 
3578 	if (resource_list_find(rl, PCI_RES_BUS, *rid) == NULL)
3579 		resource_list_add(rl, PCI_RES_BUS, *rid, start, end, count);
3580 	if (!resource_list_reserved(rl, PCI_RES_BUS, *rid)) {
3581 		res = resource_list_reserve(rl, dev, child, PCI_RES_BUS, rid,
3582 		    start, end, count, flags & ~RF_ACTIVE);
3583 		if (res == NULL) {
3584 			resource_list_delete(rl, PCI_RES_BUS, *rid);
3585 			device_printf(child, "allocating %ju bus%s failed\n",
3586 			    count, count == 1 ? "" : "es");
3587 			return (NULL);
3588 		}
3589 		if (bootverbose)
3590 			device_printf(child,
3591 			    "Lazy allocation of %ju bus%s at %ju\n", count,
3592 			    count == 1 ? "" : "es", rman_get_start(res));
3593 		PCI_WRITE_CONFIG(dev, child, sec_reg, rman_get_start(res), 1);
3594 		PCI_WRITE_CONFIG(dev, child, sub_reg, rman_get_end(res), 1);
3595 	}
3596 	return (resource_list_alloc(rl, dev, child, PCI_RES_BUS, rid, start,
3597 	    end, count, flags));
3598 }
3599 #endif
3600 
3601 static int
3602 pci_ea_bei_to_rid(device_t dev, int bei)
3603 {
3604 #ifdef PCI_IOV
3605 	struct pci_devinfo *dinfo;
3606 	int iov_pos;
3607 	struct pcicfg_iov *iov;
3608 
3609 	dinfo = device_get_ivars(dev);
3610 	iov = dinfo->cfg.iov;
3611 	if (iov != NULL)
3612 		iov_pos = iov->iov_pos;
3613 	else
3614 		iov_pos = 0;
3615 #endif
3616 
3617 	/* Check if matches BAR */
3618 	if ((bei >= PCIM_EA_BEI_BAR_0) &&
3619 	    (bei <= PCIM_EA_BEI_BAR_5))
3620 		return (PCIR_BAR(bei));
3621 
3622 	/* Check ROM */
3623 	if (bei == PCIM_EA_BEI_ROM)
3624 		return (PCIR_BIOS);
3625 
3626 #ifdef PCI_IOV
3627 	/* Check if matches VF_BAR */
3628 	if ((iov != NULL) && (bei >= PCIM_EA_BEI_VF_BAR_0) &&
3629 	    (bei <= PCIM_EA_BEI_VF_BAR_5))
3630 		return (PCIR_SRIOV_BAR(bei - PCIM_EA_BEI_VF_BAR_0) +
3631 		    iov_pos);
3632 #endif
3633 
3634 	return (-1);
3635 }
3636 
3637 int
3638 pci_ea_is_enabled(device_t dev, int rid)
3639 {
3640 	struct pci_ea_entry *ea;
3641 	struct pci_devinfo *dinfo;
3642 
3643 	dinfo = device_get_ivars(dev);
3644 
3645 	STAILQ_FOREACH(ea, &dinfo->cfg.ea.ea_entries, eae_link) {
3646 		if (pci_ea_bei_to_rid(dev, ea->eae_bei) == rid)
3647 			return ((ea->eae_flags & PCIM_EA_ENABLE) > 0);
3648 	}
3649 
3650 	return (0);
3651 }
3652 
3653 void
3654 pci_add_resources_ea(device_t bus, device_t dev, int alloc_iov)
3655 {
3656 	struct pci_ea_entry *ea;
3657 	struct pci_devinfo *dinfo;
3658 	pci_addr_t start, end, count;
3659 	struct resource_list *rl;
3660 	int type, flags, rid;
3661 	struct resource *res;
3662 	uint32_t tmp;
3663 #ifdef PCI_IOV
3664 	struct pcicfg_iov *iov;
3665 #endif
3666 
3667 	dinfo = device_get_ivars(dev);
3668 	rl = &dinfo->resources;
3669 	flags = 0;
3670 
3671 #ifdef PCI_IOV
3672 	iov = dinfo->cfg.iov;
3673 #endif
3674 
3675 	if (dinfo->cfg.ea.ea_location == 0)
3676 		return;
3677 
3678 	STAILQ_FOREACH(ea, &dinfo->cfg.ea.ea_entries, eae_link) {
3679 
3680 		/*
3681 		 * TODO: Ignore EA-BAR if is not enabled.
3682 		 *   Currently the EA implementation supports
3683 		 *   only situation, where EA structure contains
3684 		 *   predefined entries. In case they are not enabled
3685 		 *   leave them unallocated and proceed with
3686 		 *   a legacy-BAR mechanism.
3687 		 */
3688 		if ((ea->eae_flags & PCIM_EA_ENABLE) == 0)
3689 			continue;
3690 
3691 		switch ((ea->eae_flags & PCIM_EA_PP) >> PCIM_EA_PP_OFFSET) {
3692 		case PCIM_EA_P_MEM_PREFETCH:
3693 		case PCIM_EA_P_VF_MEM_PREFETCH:
3694 			flags = RF_PREFETCHABLE;
3695 			/* FALLTHROUGH */
3696 		case PCIM_EA_P_VF_MEM:
3697 		case PCIM_EA_P_MEM:
3698 			type = SYS_RES_MEMORY;
3699 			break;
3700 		case PCIM_EA_P_IO:
3701 			type = SYS_RES_IOPORT;
3702 			break;
3703 		default:
3704 			continue;
3705 		}
3706 
3707 		if (alloc_iov != 0) {
3708 #ifdef PCI_IOV
3709 			/* Allocating IOV, confirm BEI matches */
3710 			if ((ea->eae_bei < PCIM_EA_BEI_VF_BAR_0) ||
3711 			    (ea->eae_bei > PCIM_EA_BEI_VF_BAR_5))
3712 				continue;
3713 #else
3714 			continue;
3715 #endif
3716 		} else {
3717 			/* Allocating BAR, confirm BEI matches */
3718 			if (((ea->eae_bei < PCIM_EA_BEI_BAR_0) ||
3719 			    (ea->eae_bei > PCIM_EA_BEI_BAR_5)) &&
3720 			    (ea->eae_bei != PCIM_EA_BEI_ROM))
3721 				continue;
3722 		}
3723 
3724 		rid = pci_ea_bei_to_rid(dev, ea->eae_bei);
3725 		if (rid < 0)
3726 			continue;
3727 
3728 		/* Skip resources already allocated by EA */
3729 		if ((resource_list_find(rl, SYS_RES_MEMORY, rid) != NULL) ||
3730 		    (resource_list_find(rl, SYS_RES_IOPORT, rid) != NULL))
3731 			continue;
3732 
3733 		start = ea->eae_base;
3734 		count = ea->eae_max_offset + 1;
3735 #ifdef PCI_IOV
3736 		if (iov != NULL)
3737 			count = count * iov->iov_num_vfs;
3738 #endif
3739 		end = start + count - 1;
3740 		if (count == 0)
3741 			continue;
3742 
3743 		resource_list_add(rl, type, rid, start, end, count);
3744 		res = resource_list_reserve(rl, bus, dev, type, &rid, start, end, count,
3745 		    flags);
3746 		if (res == NULL) {
3747 			resource_list_delete(rl, type, rid);
3748 
3749 			/*
3750 			 * Failed to allocate using EA, disable entry.
3751 			 * Another attempt to allocation will be performed
3752 			 * further, but this time using legacy BAR registers
3753 			 */
3754 			tmp = pci_read_config(dev, ea->eae_cfg_offset, 4);
3755 			tmp &= ~PCIM_EA_ENABLE;
3756 			pci_write_config(dev, ea->eae_cfg_offset, tmp, 4);
3757 
3758 			/*
3759 			 * Disabling entry might fail in case it is hardwired.
3760 			 * Read flags again to match current status.
3761 			 */
3762 			ea->eae_flags = pci_read_config(dev, ea->eae_cfg_offset, 4);
3763 
3764 			continue;
3765 		}
3766 
3767 		/* As per specification, fill BAR with zeros */
3768 		pci_write_config(dev, rid, 0, 4);
3769 	}
3770 }
3771 
3772 void
3773 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3774 {
3775 	struct pci_devinfo *dinfo;
3776 	pcicfgregs *cfg;
3777 	struct resource_list *rl;
3778 	const struct pci_quirk *q;
3779 	uint32_t devid;
3780 	int i;
3781 
3782 	dinfo = device_get_ivars(dev);
3783 	cfg = &dinfo->cfg;
3784 	rl = &dinfo->resources;
3785 	devid = (cfg->device << 16) | cfg->vendor;
3786 
3787 	/* Allocate resources using Enhanced Allocation */
3788 	pci_add_resources_ea(bus, dev, 0);
3789 
3790 	/* ATA devices needs special map treatment */
3791 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3792 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3793 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3794 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3795 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3796 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3797 	else
3798 		for (i = 0; i < cfg->nummaps;) {
3799 			/* Skip resources already managed by EA */
3800 			if ((resource_list_find(rl, SYS_RES_MEMORY, PCIR_BAR(i)) != NULL) ||
3801 			    (resource_list_find(rl, SYS_RES_IOPORT, PCIR_BAR(i)) != NULL) ||
3802 			    pci_ea_is_enabled(dev, PCIR_BAR(i))) {
3803 				i++;
3804 				continue;
3805 			}
3806 
3807 			/*
3808 			 * Skip quirked resources.
3809 			 */
3810 			for (q = &pci_quirks[0]; q->devid != 0; q++)
3811 				if (q->devid == devid &&
3812 				    q->type == PCI_QUIRK_UNMAP_REG &&
3813 				    q->arg1 == PCIR_BAR(i))
3814 					break;
3815 			if (q->devid != 0) {
3816 				i++;
3817 				continue;
3818 			}
3819 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3820 			    prefetchmask & (1 << i));
3821 		}
3822 
3823 	/*
3824 	 * Add additional, quirked resources.
3825 	 */
3826 	for (q = &pci_quirks[0]; q->devid != 0; q++)
3827 		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3828 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3829 
3830 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3831 #ifdef __PCI_REROUTE_INTERRUPT
3832 		/*
3833 		 * Try to re-route interrupts. Sometimes the BIOS or
3834 		 * firmware may leave bogus values in these registers.
3835 		 * If the re-route fails, then just stick with what we
3836 		 * have.
3837 		 */
3838 		pci_assign_interrupt(bus, dev, 1);
3839 #else
3840 		pci_assign_interrupt(bus, dev, 0);
3841 #endif
3842 	}
3843 
3844 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3845 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3846 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3847 			xhci_early_takeover(dev);
3848 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3849 			ehci_early_takeover(dev);
3850 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3851 			ohci_early_takeover(dev);
3852 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3853 			uhci_early_takeover(dev);
3854 	}
3855 
3856 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3857 	/*
3858 	 * Reserve resources for secondary bus ranges behind bridge
3859 	 * devices.
3860 	 */
3861 	pci_reserve_secbus(bus, dev, cfg, rl);
3862 #endif
3863 }
3864 
3865 static struct pci_devinfo *
3866 pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
3867     int slot, int func)
3868 {
3869 	struct pci_devinfo *dinfo;
3870 
3871 	dinfo = pci_read_device(pcib, dev, domain, busno, slot, func);
3872 	if (dinfo != NULL)
3873 		pci_add_child(dev, dinfo);
3874 
3875 	return (dinfo);
3876 }
3877 
3878 void
3879 pci_add_children(device_t dev, int domain, int busno)
3880 {
3881 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3882 	device_t pcib = device_get_parent(dev);
3883 	struct pci_devinfo *dinfo;
3884 	int maxslots;
3885 	int s, f, pcifunchigh;
3886 	uint8_t hdrtype;
3887 	int first_func;
3888 
3889 	/*
3890 	 * Try to detect a device at slot 0, function 0.  If it exists, try to
3891 	 * enable ARI.  We must enable ARI before detecting the rest of the
3892 	 * functions on this bus as ARI changes the set of slots and functions
3893 	 * that are legal on this bus.
3894 	 */
3895 	dinfo = pci_identify_function(pcib, dev, domain, busno, 0, 0);
3896 	if (dinfo != NULL && pci_enable_ari)
3897 		PCIB_TRY_ENABLE_ARI(pcib, dinfo->cfg.dev);
3898 
3899 	/*
3900 	 * Start looking for new devices on slot 0 at function 1 because we
3901 	 * just identified the device at slot 0, function 0.
3902 	 */
3903 	first_func = 1;
3904 
3905 	maxslots = PCIB_MAXSLOTS(pcib);
3906 	for (s = 0; s <= maxslots; s++, first_func = 0) {
3907 		pcifunchigh = 0;
3908 		f = 0;
3909 		DELAY(1);
3910 		hdrtype = REG(PCIR_HDRTYPE, 1);
3911 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3912 			continue;
3913 		if (hdrtype & PCIM_MFDEV)
3914 			pcifunchigh = PCIB_MAXFUNCS(pcib);
3915 		for (f = first_func; f <= pcifunchigh; f++)
3916 			pci_identify_function(pcib, dev, domain, busno, s, f);
3917 	}
3918 #undef REG
3919 }
3920 
3921 int
3922 pci_rescan_method(device_t dev)
3923 {
3924 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3925 	device_t pcib = device_get_parent(dev);
3926 	struct pci_softc *sc;
3927 	device_t child, *devlist, *unchanged;
3928 	int devcount, error, i, j, maxslots, oldcount;
3929 	int busno, domain, s, f, pcifunchigh;
3930 	uint8_t hdrtype;
3931 
3932 	/* No need to check for ARI on a rescan. */
3933 	error = device_get_children(dev, &devlist, &devcount);
3934 	if (error)
3935 		return (error);
3936 	if (devcount != 0) {
3937 		unchanged = malloc(devcount * sizeof(device_t), M_TEMP,
3938 		    M_NOWAIT | M_ZERO);
3939 		if (unchanged == NULL) {
3940 			free(devlist, M_TEMP);
3941 			return (ENOMEM);
3942 		}
3943 	} else
3944 		unchanged = NULL;
3945 
3946 	sc = device_get_softc(dev);
3947 	domain = pcib_get_domain(dev);
3948 	busno = pcib_get_bus(dev);
3949 	maxslots = PCIB_MAXSLOTS(pcib);
3950 	for (s = 0; s <= maxslots; s++) {
3951 		/* If function 0 is not present, skip to the next slot. */
3952 		f = 0;
3953 		if (REG(PCIR_VENDOR, 2) == 0xffff)
3954 			continue;
3955 		pcifunchigh = 0;
3956 		hdrtype = REG(PCIR_HDRTYPE, 1);
3957 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3958 			continue;
3959 		if (hdrtype & PCIM_MFDEV)
3960 			pcifunchigh = PCIB_MAXFUNCS(pcib);
3961 		for (f = 0; f <= pcifunchigh; f++) {
3962 			if (REG(PCIR_VENDOR, 2) == 0xfff)
3963 				continue;
3964 
3965 			/*
3966 			 * Found a valid function.  Check if a
3967 			 * device_t for this device already exists.
3968 			 */
3969 			for (i = 0; i < devcount; i++) {
3970 				child = devlist[i];
3971 				if (child == NULL)
3972 					continue;
3973 				if (pci_get_slot(child) == s &&
3974 				    pci_get_function(child) == f) {
3975 					unchanged[i] = child;
3976 					goto next_func;
3977 				}
3978 			}
3979 
3980 			pci_identify_function(pcib, dev, domain, busno, s, f);
3981 		next_func:;
3982 		}
3983 	}
3984 
3985 	/* Remove devices that are no longer present. */
3986 	for (i = 0; i < devcount; i++) {
3987 		if (unchanged[i] != NULL)
3988 			continue;
3989 		device_delete_child(dev, devlist[i]);
3990 	}
3991 
3992 	free(devlist, M_TEMP);
3993 	oldcount = devcount;
3994 
3995 	/* Try to attach the devices just added. */
3996 	error = device_get_children(dev, &devlist, &devcount);
3997 	if (error) {
3998 		free(unchanged, M_TEMP);
3999 		return (error);
4000 	}
4001 
4002 	for (i = 0; i < devcount; i++) {
4003 		for (j = 0; j < oldcount; j++) {
4004 			if (devlist[i] == unchanged[j])
4005 				goto next_device;
4006 		}
4007 
4008 		device_probe_and_attach(devlist[i]);
4009 	next_device:;
4010 	}
4011 
4012 	free(unchanged, M_TEMP);
4013 	free(devlist, M_TEMP);
4014 	return (0);
4015 #undef REG
4016 }
4017 
4018 #ifdef PCI_IOV
4019 device_t
4020 pci_add_iov_child(device_t bus, device_t pf, uint16_t rid, uint16_t vid,
4021     uint16_t did)
4022 {
4023 	struct pci_devinfo *pf_dinfo, *vf_dinfo;
4024 	device_t pcib;
4025 	int busno, slot, func;
4026 
4027 	pf_dinfo = device_get_ivars(pf);
4028 
4029 	pcib = device_get_parent(bus);
4030 
4031 	PCIB_DECODE_RID(pcib, rid, &busno, &slot, &func);
4032 
4033 	vf_dinfo = pci_fill_devinfo(pcib, bus, pci_get_domain(pcib), busno,
4034 	    slot, func, vid, did);
4035 
4036 	vf_dinfo->cfg.flags |= PCICFG_VF;
4037 	pci_add_child(bus, vf_dinfo);
4038 
4039 	return (vf_dinfo->cfg.dev);
4040 }
4041 
4042 device_t
4043 pci_create_iov_child_method(device_t bus, device_t pf, uint16_t rid,
4044     uint16_t vid, uint16_t did)
4045 {
4046 
4047 	return (pci_add_iov_child(bus, pf, rid, vid, did));
4048 }
4049 #endif
4050 
4051 void
4052 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
4053 {
4054 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
4055 	device_set_ivars(dinfo->cfg.dev, dinfo);
4056 	resource_list_init(&dinfo->resources);
4057 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
4058 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
4059 	pci_print_verbose(dinfo);
4060 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
4061 	pci_child_added(dinfo->cfg.dev);
4062 }
4063 
4064 void
4065 pci_child_added_method(device_t dev, device_t child)
4066 {
4067 
4068 }
4069 
4070 static int
4071 pci_probe(device_t dev)
4072 {
4073 
4074 	device_set_desc(dev, "PCI bus");
4075 
4076 	/* Allow other subclasses to override this driver. */
4077 	return (BUS_PROBE_GENERIC);
4078 }
4079 
4080 int
4081 pci_attach_common(device_t dev)
4082 {
4083 	struct pci_softc *sc;
4084 	int busno, domain;
4085 #ifdef PCI_DMA_BOUNDARY
4086 	int error, tag_valid;
4087 #endif
4088 #ifdef PCI_RES_BUS
4089 	int rid;
4090 #endif
4091 
4092 	sc = device_get_softc(dev);
4093 	domain = pcib_get_domain(dev);
4094 	busno = pcib_get_bus(dev);
4095 #ifdef PCI_RES_BUS
4096 	rid = 0;
4097 	sc->sc_bus = bus_alloc_resource(dev, PCI_RES_BUS, &rid, busno, busno,
4098 	    1, 0);
4099 	if (sc->sc_bus == NULL) {
4100 		device_printf(dev, "failed to allocate bus number\n");
4101 		return (ENXIO);
4102 	}
4103 #endif
4104 	if (bootverbose)
4105 		device_printf(dev, "domain=%d, physical bus=%d\n",
4106 		    domain, busno);
4107 #ifdef PCI_DMA_BOUNDARY
4108 	tag_valid = 0;
4109 	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
4110 	    devclass_find("pci")) {
4111 		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
4112 		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
4113 		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
4114 		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
4115 		if (error)
4116 			device_printf(dev, "Failed to create DMA tag: %d\n",
4117 			    error);
4118 		else
4119 			tag_valid = 1;
4120 	}
4121 	if (!tag_valid)
4122 #endif
4123 		sc->sc_dma_tag = bus_get_dma_tag(dev);
4124 	return (0);
4125 }
4126 
4127 static int
4128 pci_attach(device_t dev)
4129 {
4130 	int busno, domain, error;
4131 
4132 	error = pci_attach_common(dev);
4133 	if (error)
4134 		return (error);
4135 
4136 	/*
4137 	 * Since there can be multiple independantly numbered PCI
4138 	 * busses on systems with multiple PCI domains, we can't use
4139 	 * the unit number to decide which bus we are probing. We ask
4140 	 * the parent pcib what our domain and bus numbers are.
4141 	 */
4142 	domain = pcib_get_domain(dev);
4143 	busno = pcib_get_bus(dev);
4144 	pci_add_children(dev, domain, busno);
4145 	return (bus_generic_attach(dev));
4146 }
4147 
4148 #ifdef PCI_RES_BUS
4149 static int
4150 pci_detach(device_t dev)
4151 {
4152 	struct pci_softc *sc;
4153 	int error;
4154 
4155 	error = bus_generic_detach(dev);
4156 	if (error)
4157 		return (error);
4158 	sc = device_get_softc(dev);
4159 	return (bus_release_resource(dev, PCI_RES_BUS, 0, sc->sc_bus));
4160 }
4161 #endif
4162 
4163 static void
4164 pci_set_power_child(device_t dev, device_t child, int state)
4165 {
4166 	device_t pcib;
4167 	int dstate;
4168 
4169 	/*
4170 	 * Set the device to the given state.  If the firmware suggests
4171 	 * a different power state, use it instead.  If power management
4172 	 * is not present, the firmware is responsible for managing
4173 	 * device power.  Skip children who aren't attached since they
4174 	 * are handled separately.
4175 	 */
4176 	pcib = device_get_parent(dev);
4177 	dstate = state;
4178 	if (device_is_attached(child) &&
4179 	    PCIB_POWER_FOR_SLEEP(pcib, child, &dstate) == 0)
4180 		pci_set_powerstate(child, dstate);
4181 }
4182 
4183 int
4184 pci_suspend_child(device_t dev, device_t child)
4185 {
4186 	struct pci_devinfo *dinfo;
4187 	int error;
4188 
4189 	dinfo = device_get_ivars(child);
4190 
4191 	/*
4192 	 * Save the PCI configuration space for the child and set the
4193 	 * device in the appropriate power state for this sleep state.
4194 	 */
4195 	pci_cfg_save(child, dinfo, 0);
4196 
4197 	/* Suspend devices before potentially powering them down. */
4198 	error = bus_generic_suspend_child(dev, child);
4199 
4200 	if (error)
4201 		return (error);
4202 
4203 	if (pci_do_power_suspend)
4204 		pci_set_power_child(dev, child, PCI_POWERSTATE_D3);
4205 
4206 	return (0);
4207 }
4208 
4209 int
4210 pci_resume_child(device_t dev, device_t child)
4211 {
4212 	struct pci_devinfo *dinfo;
4213 
4214 	if (pci_do_power_resume)
4215 		pci_set_power_child(dev, child, PCI_POWERSTATE_D0);
4216 
4217 	dinfo = device_get_ivars(child);
4218 	pci_cfg_restore(child, dinfo);
4219 	if (!device_is_attached(child))
4220 		pci_cfg_save(child, dinfo, 1);
4221 
4222 	bus_generic_resume_child(dev, child);
4223 
4224 	return (0);
4225 }
4226 
4227 int
4228 pci_resume(device_t dev)
4229 {
4230 	device_t child, *devlist;
4231 	int error, i, numdevs;
4232 
4233 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
4234 		return (error);
4235 
4236 	/*
4237 	 * Resume critical devices first, then everything else later.
4238 	 */
4239 	for (i = 0; i < numdevs; i++) {
4240 		child = devlist[i];
4241 		switch (pci_get_class(child)) {
4242 		case PCIC_DISPLAY:
4243 		case PCIC_MEMORY:
4244 		case PCIC_BRIDGE:
4245 		case PCIC_BASEPERIPH:
4246 			BUS_RESUME_CHILD(dev, child);
4247 			break;
4248 		}
4249 	}
4250 	for (i = 0; i < numdevs; i++) {
4251 		child = devlist[i];
4252 		switch (pci_get_class(child)) {
4253 		case PCIC_DISPLAY:
4254 		case PCIC_MEMORY:
4255 		case PCIC_BRIDGE:
4256 		case PCIC_BASEPERIPH:
4257 			break;
4258 		default:
4259 			BUS_RESUME_CHILD(dev, child);
4260 		}
4261 	}
4262 	free(devlist, M_TEMP);
4263 	return (0);
4264 }
4265 
4266 static void
4267 pci_load_vendor_data(void)
4268 {
4269 	caddr_t data;
4270 	void *ptr;
4271 	size_t sz;
4272 
4273 	data = preload_search_by_type("pci_vendor_data");
4274 	if (data != NULL) {
4275 		ptr = preload_fetch_addr(data);
4276 		sz = preload_fetch_size(data);
4277 		if (ptr != NULL && sz != 0) {
4278 			pci_vendordata = ptr;
4279 			pci_vendordata_size = sz;
4280 			/* terminate the database */
4281 			pci_vendordata[pci_vendordata_size] = '\n';
4282 		}
4283 	}
4284 }
4285 
4286 void
4287 pci_driver_added(device_t dev, driver_t *driver)
4288 {
4289 	int numdevs;
4290 	device_t *devlist;
4291 	device_t child;
4292 	struct pci_devinfo *dinfo;
4293 	int i;
4294 
4295 	if (bootverbose)
4296 		device_printf(dev, "driver added\n");
4297 	DEVICE_IDENTIFY(driver, dev);
4298 	if (device_get_children(dev, &devlist, &numdevs) != 0)
4299 		return;
4300 	for (i = 0; i < numdevs; i++) {
4301 		child = devlist[i];
4302 		if (device_get_state(child) != DS_NOTPRESENT)
4303 			continue;
4304 		dinfo = device_get_ivars(child);
4305 		pci_print_verbose(dinfo);
4306 		if (bootverbose)
4307 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
4308 		pci_cfg_restore(child, dinfo);
4309 		if (device_probe_and_attach(child) != 0)
4310 			pci_child_detached(dev, child);
4311 	}
4312 	free(devlist, M_TEMP);
4313 }
4314 
4315 int
4316 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
4317     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
4318 {
4319 	struct pci_devinfo *dinfo;
4320 	struct msix_table_entry *mte;
4321 	struct msix_vector *mv;
4322 	uint64_t addr;
4323 	uint32_t data;
4324 	void *cookie;
4325 	int error, rid;
4326 
4327 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
4328 	    arg, &cookie);
4329 	if (error)
4330 		return (error);
4331 
4332 	/* If this is not a direct child, just bail out. */
4333 	if (device_get_parent(child) != dev) {
4334 		*cookiep = cookie;
4335 		return(0);
4336 	}
4337 
4338 	rid = rman_get_rid(irq);
4339 	if (rid == 0) {
4340 		/* Make sure that INTx is enabled */
4341 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
4342 	} else {
4343 		/*
4344 		 * Check to see if the interrupt is MSI or MSI-X.
4345 		 * Ask our parent to map the MSI and give
4346 		 * us the address and data register values.
4347 		 * If we fail for some reason, teardown the
4348 		 * interrupt handler.
4349 		 */
4350 		dinfo = device_get_ivars(child);
4351 		if (dinfo->cfg.msi.msi_alloc > 0) {
4352 			if (dinfo->cfg.msi.msi_addr == 0) {
4353 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
4354 			    ("MSI has handlers, but vectors not mapped"));
4355 				error = PCIB_MAP_MSI(device_get_parent(dev),
4356 				    child, rman_get_start(irq), &addr, &data);
4357 				if (error)
4358 					goto bad;
4359 				dinfo->cfg.msi.msi_addr = addr;
4360 				dinfo->cfg.msi.msi_data = data;
4361 			}
4362 			if (dinfo->cfg.msi.msi_handlers == 0)
4363 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
4364 				    dinfo->cfg.msi.msi_data);
4365 			dinfo->cfg.msi.msi_handlers++;
4366 		} else {
4367 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4368 			    ("No MSI or MSI-X interrupts allocated"));
4369 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4370 			    ("MSI-X index too high"));
4371 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4372 			KASSERT(mte->mte_vector != 0, ("no message vector"));
4373 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
4374 			KASSERT(mv->mv_irq == rman_get_start(irq),
4375 			    ("IRQ mismatch"));
4376 			if (mv->mv_address == 0) {
4377 				KASSERT(mte->mte_handlers == 0,
4378 		    ("MSI-X table entry has handlers, but vector not mapped"));
4379 				error = PCIB_MAP_MSI(device_get_parent(dev),
4380 				    child, rman_get_start(irq), &addr, &data);
4381 				if (error)
4382 					goto bad;
4383 				mv->mv_address = addr;
4384 				mv->mv_data = data;
4385 			}
4386 			if (mte->mte_handlers == 0) {
4387 				pci_enable_msix(child, rid - 1, mv->mv_address,
4388 				    mv->mv_data);
4389 				pci_unmask_msix(child, rid - 1);
4390 			}
4391 			mte->mte_handlers++;
4392 		}
4393 
4394 		/*
4395 		 * Make sure that INTx is disabled if we are using MSI/MSI-X,
4396 		 * unless the device is affected by PCI_QUIRK_MSI_INTX_BUG,
4397 		 * in which case we "enable" INTx so MSI/MSI-X actually works.
4398 		 */
4399 		if (!pci_has_quirk(pci_get_devid(child),
4400 		    PCI_QUIRK_MSI_INTX_BUG))
4401 			pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4402 		else
4403 			pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
4404 	bad:
4405 		if (error) {
4406 			(void)bus_generic_teardown_intr(dev, child, irq,
4407 			    cookie);
4408 			return (error);
4409 		}
4410 	}
4411 	*cookiep = cookie;
4412 	return (0);
4413 }
4414 
4415 int
4416 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
4417     void *cookie)
4418 {
4419 	struct msix_table_entry *mte;
4420 	struct resource_list_entry *rle;
4421 	struct pci_devinfo *dinfo;
4422 	int error, rid;
4423 
4424 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
4425 		return (EINVAL);
4426 
4427 	/* If this isn't a direct child, just bail out */
4428 	if (device_get_parent(child) != dev)
4429 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
4430 
4431 	rid = rman_get_rid(irq);
4432 	if (rid == 0) {
4433 		/* Mask INTx */
4434 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4435 	} else {
4436 		/*
4437 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
4438 		 * decrement the appropriate handlers count and mask the
4439 		 * MSI-X message, or disable MSI messages if the count
4440 		 * drops to 0.
4441 		 */
4442 		dinfo = device_get_ivars(child);
4443 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
4444 		if (rle->res != irq)
4445 			return (EINVAL);
4446 		if (dinfo->cfg.msi.msi_alloc > 0) {
4447 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
4448 			    ("MSI-X index too high"));
4449 			if (dinfo->cfg.msi.msi_handlers == 0)
4450 				return (EINVAL);
4451 			dinfo->cfg.msi.msi_handlers--;
4452 			if (dinfo->cfg.msi.msi_handlers == 0)
4453 				pci_disable_msi(child);
4454 		} else {
4455 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4456 			    ("No MSI or MSI-X interrupts allocated"));
4457 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4458 			    ("MSI-X index too high"));
4459 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4460 			if (mte->mte_handlers == 0)
4461 				return (EINVAL);
4462 			mte->mte_handlers--;
4463 			if (mte->mte_handlers == 0)
4464 				pci_mask_msix(child, rid - 1);
4465 		}
4466 	}
4467 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
4468 	if (rid > 0)
4469 		KASSERT(error == 0,
4470 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
4471 	return (error);
4472 }
4473 
4474 int
4475 pci_print_child(device_t dev, device_t child)
4476 {
4477 	struct pci_devinfo *dinfo;
4478 	struct resource_list *rl;
4479 	int retval = 0;
4480 
4481 	dinfo = device_get_ivars(child);
4482 	rl = &dinfo->resources;
4483 
4484 	retval += bus_print_child_header(dev, child);
4485 
4486 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#jx");
4487 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#jx");
4488 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%jd");
4489 	if (device_get_flags(dev))
4490 		retval += printf(" flags %#x", device_get_flags(dev));
4491 
4492 	retval += printf(" at device %d.%d", pci_get_slot(child),
4493 	    pci_get_function(child));
4494 
4495 	retval += bus_print_child_domain(dev, child);
4496 	retval += bus_print_child_footer(dev, child);
4497 
4498 	return (retval);
4499 }
4500 
4501 static const struct
4502 {
4503 	int		class;
4504 	int		subclass;
4505 	int		report; /* 0 = bootverbose, 1 = always */
4506 	const char	*desc;
4507 } pci_nomatch_tab[] = {
4508 	{PCIC_OLD,		-1,			1, "old"},
4509 	{PCIC_OLD,		PCIS_OLD_NONVGA,	1, "non-VGA display device"},
4510 	{PCIC_OLD,		PCIS_OLD_VGA,		1, "VGA-compatible display device"},
4511 	{PCIC_STORAGE,		-1,			1, "mass storage"},
4512 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	1, "SCSI"},
4513 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	1, "ATA"},
4514 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	1, "floppy disk"},
4515 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	1, "IPI"},
4516 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	1, "RAID"},
4517 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	1, "ATA (ADMA)"},
4518 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	1, "SATA"},
4519 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	1, "SAS"},
4520 	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	1, "NVM"},
4521 	{PCIC_NETWORK,		-1,			1, "network"},
4522 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	1, "ethernet"},
4523 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	1, "token ring"},
4524 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	1, "fddi"},
4525 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	1, "ATM"},
4526 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	1, "ISDN"},
4527 	{PCIC_DISPLAY,		-1,			1, "display"},
4528 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	1, "VGA"},
4529 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	1, "XGA"},
4530 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	1, "3D"},
4531 	{PCIC_MULTIMEDIA,	-1,			1, "multimedia"},
4532 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	1, "video"},
4533 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	1, "audio"},
4534 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	1, "telephony"},
4535 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	1, "HDA"},
4536 	{PCIC_MEMORY,		-1,			1, "memory"},
4537 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	1, "RAM"},
4538 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	1, "flash"},
4539 	{PCIC_BRIDGE,		-1,			1, "bridge"},
4540 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	1, "HOST-PCI"},
4541 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	1, "PCI-ISA"},
4542 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	1, "PCI-EISA"},
4543 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	1, "PCI-MCA"},
4544 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	1, "PCI-PCI"},
4545 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	1, "PCI-PCMCIA"},
4546 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	1, "PCI-NuBus"},
4547 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	1, "PCI-CardBus"},
4548 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	1, "PCI-RACEway"},
4549 	{PCIC_SIMPLECOMM,	-1,			1, "simple comms"},
4550 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	1, "UART"},	/* could detect 16550 */
4551 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	1, "parallel port"},
4552 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	1, "multiport serial"},
4553 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	1, "generic modem"},
4554 	{PCIC_BASEPERIPH,	-1,			0, "base peripheral"},
4555 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	1, "interrupt controller"},
4556 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	1, "DMA controller"},
4557 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	1, "timer"},
4558 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	1, "realtime clock"},
4559 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	1, "PCI hot-plug controller"},
4560 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	1, "SD host controller"},
4561 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_IOMMU,	1, "IOMMU"},
4562 	{PCIC_INPUTDEV,		-1,			1, "input device"},
4563 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	1, "keyboard"},
4564 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,1, "digitizer"},
4565 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	1, "mouse"},
4566 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	1, "scanner"},
4567 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	1, "gameport"},
4568 	{PCIC_DOCKING,		-1,			1, "docking station"},
4569 	{PCIC_PROCESSOR,	-1,			1, "processor"},
4570 	{PCIC_SERIALBUS,	-1,			1, "serial bus"},
4571 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	1, "FireWire"},
4572 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	1, "AccessBus"},
4573 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	1, "SSA"},
4574 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	1, "USB"},
4575 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	1, "Fibre Channel"},
4576 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	0, "SMBus"},
4577 	{PCIC_WIRELESS,		-1,			1, "wireless controller"},
4578 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	1, "iRDA"},
4579 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	1, "IR"},
4580 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	1, "RF"},
4581 	{PCIC_INTELLIIO,	-1,			1, "intelligent I/O controller"},
4582 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	1, "I2O"},
4583 	{PCIC_SATCOM,		-1,			1, "satellite communication"},
4584 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		1, "sat TV"},
4585 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	1, "sat audio"},
4586 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	1, "sat voice"},
4587 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	1, "sat data"},
4588 	{PCIC_CRYPTO,		-1,			1, "encrypt/decrypt"},
4589 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	1, "network/computer crypto"},
4590 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	1, "entertainment crypto"},
4591 	{PCIC_DASP,		-1,			0, "dasp"},
4592 	{PCIC_DASP,		PCIS_DASP_DPIO,		1, "DPIO module"},
4593 	{0, 0, 0,		NULL}
4594 };
4595 
4596 void
4597 pci_probe_nomatch(device_t dev, device_t child)
4598 {
4599 	int i, report;
4600 	const char *cp, *scp;
4601 	char *device;
4602 
4603 	/*
4604 	 * Look for a listing for this device in a loaded device database.
4605 	 */
4606 	report = 1;
4607 	if ((device = pci_describe_device(child)) != NULL) {
4608 		device_printf(dev, "<%s>", device);
4609 		free(device, M_DEVBUF);
4610 	} else {
4611 		/*
4612 		 * Scan the class/subclass descriptions for a general
4613 		 * description.
4614 		 */
4615 		cp = "unknown";
4616 		scp = NULL;
4617 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
4618 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
4619 				if (pci_nomatch_tab[i].subclass == -1) {
4620 					cp = pci_nomatch_tab[i].desc;
4621 					report = pci_nomatch_tab[i].report;
4622 				} else if (pci_nomatch_tab[i].subclass ==
4623 				    pci_get_subclass(child)) {
4624 					scp = pci_nomatch_tab[i].desc;
4625 					report = pci_nomatch_tab[i].report;
4626 				}
4627 			}
4628 		}
4629 		if (report || bootverbose) {
4630 			device_printf(dev, "<%s%s%s>",
4631 			    cp ? cp : "",
4632 			    ((cp != NULL) && (scp != NULL)) ? ", " : "",
4633 			    scp ? scp : "");
4634 		}
4635 	}
4636 	if (report || bootverbose) {
4637 		printf(" at device %d.%d (no driver attached)\n",
4638 		    pci_get_slot(child), pci_get_function(child));
4639 	}
4640 	pci_cfg_save(child, device_get_ivars(child), 1);
4641 }
4642 
4643 void
4644 pci_child_detached(device_t dev, device_t child)
4645 {
4646 	struct pci_devinfo *dinfo;
4647 	struct resource_list *rl;
4648 
4649 	dinfo = device_get_ivars(child);
4650 	rl = &dinfo->resources;
4651 
4652 	/*
4653 	 * Have to deallocate IRQs before releasing any MSI messages and
4654 	 * have to release MSI messages before deallocating any memory
4655 	 * BARs.
4656 	 */
4657 	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
4658 		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
4659 	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
4660 		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
4661 		(void)pci_release_msi(child);
4662 	}
4663 	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
4664 		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
4665 	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
4666 		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
4667 #ifdef PCI_RES_BUS
4668 	if (resource_list_release_active(rl, dev, child, PCI_RES_BUS) != 0)
4669 		pci_printf(&dinfo->cfg, "Device leaked PCI bus numbers\n");
4670 #endif
4671 
4672 	pci_cfg_save(child, dinfo, 1);
4673 }
4674 
4675 /*
4676  * Parse the PCI device database, if loaded, and return a pointer to a
4677  * description of the device.
4678  *
4679  * The database is flat text formatted as follows:
4680  *
4681  * Any line not in a valid format is ignored.
4682  * Lines are terminated with newline '\n' characters.
4683  *
4684  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
4685  * the vendor name.
4686  *
4687  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
4688  * - devices cannot be listed without a corresponding VENDOR line.
4689  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
4690  * another TAB, then the device name.
4691  */
4692 
4693 /*
4694  * Assuming (ptr) points to the beginning of a line in the database,
4695  * return the vendor or device and description of the next entry.
4696  * The value of (vendor) or (device) inappropriate for the entry type
4697  * is set to -1.  Returns nonzero at the end of the database.
4698  *
4699  * Note that this is slightly unrobust in the face of corrupt data;
4700  * we attempt to safeguard against this by spamming the end of the
4701  * database with a newline when we initialise.
4702  */
4703 static int
4704 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
4705 {
4706 	char	*cp = *ptr;
4707 	int	left;
4708 
4709 	*device = -1;
4710 	*vendor = -1;
4711 	**desc = '\0';
4712 	for (;;) {
4713 		left = pci_vendordata_size - (cp - pci_vendordata);
4714 		if (left <= 0) {
4715 			*ptr = cp;
4716 			return(1);
4717 		}
4718 
4719 		/* vendor entry? */
4720 		if (*cp != '\t' &&
4721 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
4722 			break;
4723 		/* device entry? */
4724 		if (*cp == '\t' &&
4725 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
4726 			break;
4727 
4728 		/* skip to next line */
4729 		while (*cp != '\n' && left > 0) {
4730 			cp++;
4731 			left--;
4732 		}
4733 		if (*cp == '\n') {
4734 			cp++;
4735 			left--;
4736 		}
4737 	}
4738 	/* skip to next line */
4739 	while (*cp != '\n' && left > 0) {
4740 		cp++;
4741 		left--;
4742 	}
4743 	if (*cp == '\n' && left > 0)
4744 		cp++;
4745 	*ptr = cp;
4746 	return(0);
4747 }
4748 
4749 static char *
4750 pci_describe_device(device_t dev)
4751 {
4752 	int	vendor, device;
4753 	char	*desc, *vp, *dp, *line;
4754 
4755 	desc = vp = dp = NULL;
4756 
4757 	/*
4758 	 * If we have no vendor data, we can't do anything.
4759 	 */
4760 	if (pci_vendordata == NULL)
4761 		goto out;
4762 
4763 	/*
4764 	 * Scan the vendor data looking for this device
4765 	 */
4766 	line = pci_vendordata;
4767 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4768 		goto out;
4769 	for (;;) {
4770 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
4771 			goto out;
4772 		if (vendor == pci_get_vendor(dev))
4773 			break;
4774 	}
4775 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4776 		goto out;
4777 	for (;;) {
4778 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
4779 			*dp = 0;
4780 			break;
4781 		}
4782 		if (vendor != -1) {
4783 			*dp = 0;
4784 			break;
4785 		}
4786 		if (device == pci_get_device(dev))
4787 			break;
4788 	}
4789 	if (dp[0] == '\0')
4790 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
4791 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
4792 	    NULL)
4793 		sprintf(desc, "%s, %s", vp, dp);
4794 out:
4795 	if (vp != NULL)
4796 		free(vp, M_DEVBUF);
4797 	if (dp != NULL)
4798 		free(dp, M_DEVBUF);
4799 	return(desc);
4800 }
4801 
4802 int
4803 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
4804 {
4805 	struct pci_devinfo *dinfo;
4806 	pcicfgregs *cfg;
4807 
4808 	dinfo = device_get_ivars(child);
4809 	cfg = &dinfo->cfg;
4810 
4811 	switch (which) {
4812 	case PCI_IVAR_ETHADDR:
4813 		/*
4814 		 * The generic accessor doesn't deal with failure, so
4815 		 * we set the return value, then return an error.
4816 		 */
4817 		*((uint8_t **) result) = NULL;
4818 		return (EINVAL);
4819 	case PCI_IVAR_SUBVENDOR:
4820 		*result = cfg->subvendor;
4821 		break;
4822 	case PCI_IVAR_SUBDEVICE:
4823 		*result = cfg->subdevice;
4824 		break;
4825 	case PCI_IVAR_VENDOR:
4826 		*result = cfg->vendor;
4827 		break;
4828 	case PCI_IVAR_DEVICE:
4829 		*result = cfg->device;
4830 		break;
4831 	case PCI_IVAR_DEVID:
4832 		*result = (cfg->device << 16) | cfg->vendor;
4833 		break;
4834 	case PCI_IVAR_CLASS:
4835 		*result = cfg->baseclass;
4836 		break;
4837 	case PCI_IVAR_SUBCLASS:
4838 		*result = cfg->subclass;
4839 		break;
4840 	case PCI_IVAR_PROGIF:
4841 		*result = cfg->progif;
4842 		break;
4843 	case PCI_IVAR_REVID:
4844 		*result = cfg->revid;
4845 		break;
4846 	case PCI_IVAR_INTPIN:
4847 		*result = cfg->intpin;
4848 		break;
4849 	case PCI_IVAR_IRQ:
4850 		*result = cfg->intline;
4851 		break;
4852 	case PCI_IVAR_DOMAIN:
4853 		*result = cfg->domain;
4854 		break;
4855 	case PCI_IVAR_BUS:
4856 		*result = cfg->bus;
4857 		break;
4858 	case PCI_IVAR_SLOT:
4859 		*result = cfg->slot;
4860 		break;
4861 	case PCI_IVAR_FUNCTION:
4862 		*result = cfg->func;
4863 		break;
4864 	case PCI_IVAR_CMDREG:
4865 		*result = cfg->cmdreg;
4866 		break;
4867 	case PCI_IVAR_CACHELNSZ:
4868 		*result = cfg->cachelnsz;
4869 		break;
4870 	case PCI_IVAR_MINGNT:
4871 		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4872 			*result = -1;
4873 			return (EINVAL);
4874 		}
4875 		*result = cfg->mingnt;
4876 		break;
4877 	case PCI_IVAR_MAXLAT:
4878 		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4879 			*result = -1;
4880 			return (EINVAL);
4881 		}
4882 		*result = cfg->maxlat;
4883 		break;
4884 	case PCI_IVAR_LATTIMER:
4885 		*result = cfg->lattimer;
4886 		break;
4887 	default:
4888 		return (ENOENT);
4889 	}
4890 	return (0);
4891 }
4892 
4893 int
4894 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4895 {
4896 	struct pci_devinfo *dinfo;
4897 
4898 	dinfo = device_get_ivars(child);
4899 
4900 	switch (which) {
4901 	case PCI_IVAR_INTPIN:
4902 		dinfo->cfg.intpin = value;
4903 		return (0);
4904 	case PCI_IVAR_ETHADDR:
4905 	case PCI_IVAR_SUBVENDOR:
4906 	case PCI_IVAR_SUBDEVICE:
4907 	case PCI_IVAR_VENDOR:
4908 	case PCI_IVAR_DEVICE:
4909 	case PCI_IVAR_DEVID:
4910 	case PCI_IVAR_CLASS:
4911 	case PCI_IVAR_SUBCLASS:
4912 	case PCI_IVAR_PROGIF:
4913 	case PCI_IVAR_REVID:
4914 	case PCI_IVAR_IRQ:
4915 	case PCI_IVAR_DOMAIN:
4916 	case PCI_IVAR_BUS:
4917 	case PCI_IVAR_SLOT:
4918 	case PCI_IVAR_FUNCTION:
4919 		return (EINVAL);	/* disallow for now */
4920 
4921 	default:
4922 		return (ENOENT);
4923 	}
4924 }
4925 
4926 #include "opt_ddb.h"
4927 #ifdef DDB
4928 #include <ddb/ddb.h>
4929 #include <sys/cons.h>
4930 
4931 /*
4932  * List resources based on pci map registers, used for within ddb
4933  */
4934 
4935 DB_SHOW_COMMAND(pciregs, db_pci_dump)
4936 {
4937 	struct pci_devinfo *dinfo;
4938 	struct devlist *devlist_head;
4939 	struct pci_conf *p;
4940 	const char *name;
4941 	int i, error, none_count;
4942 
4943 	none_count = 0;
4944 	/* get the head of the device queue */
4945 	devlist_head = &pci_devq;
4946 
4947 	/*
4948 	 * Go through the list of devices and print out devices
4949 	 */
4950 	for (error = 0, i = 0,
4951 	     dinfo = STAILQ_FIRST(devlist_head);
4952 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4953 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4954 
4955 		/* Populate pd_name and pd_unit */
4956 		name = NULL;
4957 		if (dinfo->cfg.dev)
4958 			name = device_get_name(dinfo->cfg.dev);
4959 
4960 		p = &dinfo->conf;
4961 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4962 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4963 			(name && *name) ? name : "none",
4964 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4965 			none_count++,
4966 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4967 			p->pc_sel.pc_func, (p->pc_class << 16) |
4968 			(p->pc_subclass << 8) | p->pc_progif,
4969 			(p->pc_subdevice << 16) | p->pc_subvendor,
4970 			(p->pc_device << 16) | p->pc_vendor,
4971 			p->pc_revid, p->pc_hdr);
4972 	}
4973 }
4974 #endif /* DDB */
4975 
4976 static struct resource *
4977 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4978     rman_res_t start, rman_res_t end, rman_res_t count, u_int num,
4979     u_int flags)
4980 {
4981 	struct pci_devinfo *dinfo = device_get_ivars(child);
4982 	struct resource_list *rl = &dinfo->resources;
4983 	struct resource *res;
4984 	struct pci_map *pm;
4985 	pci_addr_t map, testval;
4986 	int mapsize;
4987 
4988 	res = NULL;
4989 
4990 	/* If rid is managed by EA, ignore it */
4991 	if (pci_ea_is_enabled(child, *rid))
4992 		goto out;
4993 
4994 	pm = pci_find_bar(child, *rid);
4995 	if (pm != NULL) {
4996 		/* This is a BAR that we failed to allocate earlier. */
4997 		mapsize = pm->pm_size;
4998 		map = pm->pm_value;
4999 	} else {
5000 		/*
5001 		 * Weed out the bogons, and figure out how large the
5002 		 * BAR/map is.  BARs that read back 0 here are bogus
5003 		 * and unimplemented.  Note: atapci in legacy mode are
5004 		 * special and handled elsewhere in the code.  If you
5005 		 * have a atapci device in legacy mode and it fails
5006 		 * here, that other code is broken.
5007 		 */
5008 		pci_read_bar(child, *rid, &map, &testval, NULL);
5009 
5010 		/*
5011 		 * Determine the size of the BAR and ignore BARs with a size
5012 		 * of 0.  Device ROM BARs use a different mask value.
5013 		 */
5014 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
5015 			mapsize = pci_romsize(testval);
5016 		else
5017 			mapsize = pci_mapsize(testval);
5018 		if (mapsize == 0)
5019 			goto out;
5020 		pm = pci_add_bar(child, *rid, map, mapsize);
5021 	}
5022 
5023 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
5024 		if (type != SYS_RES_MEMORY) {
5025 			if (bootverbose)
5026 				device_printf(dev,
5027 				    "child %s requested type %d for rid %#x,"
5028 				    " but the BAR says it is an memio\n",
5029 				    device_get_nameunit(child), type, *rid);
5030 			goto out;
5031 		}
5032 	} else {
5033 		if (type != SYS_RES_IOPORT) {
5034 			if (bootverbose)
5035 				device_printf(dev,
5036 				    "child %s requested type %d for rid %#x,"
5037 				    " but the BAR says it is an ioport\n",
5038 				    device_get_nameunit(child), type, *rid);
5039 			goto out;
5040 		}
5041 	}
5042 
5043 	/*
5044 	 * For real BARs, we need to override the size that
5045 	 * the driver requests, because that's what the BAR
5046 	 * actually uses and we would otherwise have a
5047 	 * situation where we might allocate the excess to
5048 	 * another driver, which won't work.
5049 	 */
5050 	count = ((pci_addr_t)1 << mapsize) * num;
5051 	if (RF_ALIGNMENT(flags) < mapsize)
5052 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
5053 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
5054 		flags |= RF_PREFETCHABLE;
5055 
5056 	/*
5057 	 * Allocate enough resource, and then write back the
5058 	 * appropriate BAR for that resource.
5059 	 */
5060 	resource_list_add(rl, type, *rid, start, end, count);
5061 	res = resource_list_reserve(rl, dev, child, type, rid, start, end,
5062 	    count, flags & ~RF_ACTIVE);
5063 	if (res == NULL) {
5064 		resource_list_delete(rl, type, *rid);
5065 		device_printf(child,
5066 		    "%#jx bytes of rid %#x res %d failed (%#jx, %#jx).\n",
5067 		    count, *rid, type, start, end);
5068 		goto out;
5069 	}
5070 	if (bootverbose)
5071 		device_printf(child,
5072 		    "Lazy allocation of %#jx bytes rid %#x type %d at %#jx\n",
5073 		    count, *rid, type, rman_get_start(res));
5074 	map = rman_get_start(res);
5075 	pci_write_bar(child, pm, map);
5076 out:
5077 	return (res);
5078 }
5079 
5080 struct resource *
5081 pci_alloc_multi_resource(device_t dev, device_t child, int type, int *rid,
5082     rman_res_t start, rman_res_t end, rman_res_t count, u_long num,
5083     u_int flags)
5084 {
5085 	struct pci_devinfo *dinfo;
5086 	struct resource_list *rl;
5087 	struct resource_list_entry *rle;
5088 	struct resource *res;
5089 	pcicfgregs *cfg;
5090 
5091 	/*
5092 	 * Perform lazy resource allocation
5093 	 */
5094 	dinfo = device_get_ivars(child);
5095 	rl = &dinfo->resources;
5096 	cfg = &dinfo->cfg;
5097 	switch (type) {
5098 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
5099 	case PCI_RES_BUS:
5100 		return (pci_alloc_secbus(dev, child, rid, start, end, count,
5101 		    flags));
5102 #endif
5103 	case SYS_RES_IRQ:
5104 		/*
5105 		 * Can't alloc legacy interrupt once MSI messages have
5106 		 * been allocated.
5107 		 */
5108 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
5109 		    cfg->msix.msix_alloc > 0))
5110 			return (NULL);
5111 
5112 		/*
5113 		 * If the child device doesn't have an interrupt
5114 		 * routed and is deserving of an interrupt, try to
5115 		 * assign it one.
5116 		 */
5117 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
5118 		    (cfg->intpin != 0))
5119 			pci_assign_interrupt(dev, child, 0);
5120 		break;
5121 	case SYS_RES_IOPORT:
5122 	case SYS_RES_MEMORY:
5123 #ifdef NEW_PCIB
5124 		/*
5125 		 * PCI-PCI bridge I/O window resources are not BARs.
5126 		 * For those allocations just pass the request up the
5127 		 * tree.
5128 		 */
5129 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
5130 			switch (*rid) {
5131 			case PCIR_IOBASEL_1:
5132 			case PCIR_MEMBASE_1:
5133 			case PCIR_PMBASEL_1:
5134 				/*
5135 				 * XXX: Should we bother creating a resource
5136 				 * list entry?
5137 				 */
5138 				return (bus_generic_alloc_resource(dev, child,
5139 				    type, rid, start, end, count, flags));
5140 			}
5141 		}
5142 #endif
5143 		/* Reserve resources for this BAR if needed. */
5144 		rle = resource_list_find(rl, type, *rid);
5145 		if (rle == NULL) {
5146 			res = pci_reserve_map(dev, child, type, rid, start, end,
5147 			    count, num, flags);
5148 			if (res == NULL)
5149 				return (NULL);
5150 		}
5151 	}
5152 	return (resource_list_alloc(rl, dev, child, type, rid,
5153 	    start, end, count, flags));
5154 }
5155 
5156 struct resource *
5157 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
5158     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
5159 {
5160 #ifdef PCI_IOV
5161 	struct pci_devinfo *dinfo;
5162 #endif
5163 
5164 	if (device_get_parent(child) != dev)
5165 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
5166 		    type, rid, start, end, count, flags));
5167 
5168 #ifdef PCI_IOV
5169 	dinfo = device_get_ivars(child);
5170 	if (dinfo->cfg.flags & PCICFG_VF) {
5171 		switch (type) {
5172 		/* VFs can't have I/O BARs. */
5173 		case SYS_RES_IOPORT:
5174 			return (NULL);
5175 		case SYS_RES_MEMORY:
5176 			return (pci_vf_alloc_mem_resource(dev, child, rid,
5177 			    start, end, count, flags));
5178 		}
5179 
5180 		/* Fall through for other types of resource allocations. */
5181 	}
5182 #endif
5183 
5184 	return (pci_alloc_multi_resource(dev, child, type, rid, start, end,
5185 	    count, 1, flags));
5186 }
5187 
5188 int
5189 pci_release_resource(device_t dev, device_t child, int type, int rid,
5190     struct resource *r)
5191 {
5192 	struct pci_devinfo *dinfo;
5193 	struct resource_list *rl;
5194 	pcicfgregs *cfg;
5195 
5196 	if (device_get_parent(child) != dev)
5197 		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
5198 		    type, rid, r));
5199 
5200 	dinfo = device_get_ivars(child);
5201 	cfg = &dinfo->cfg;
5202 
5203 #ifdef PCI_IOV
5204 	if (dinfo->cfg.flags & PCICFG_VF) {
5205 		switch (type) {
5206 		/* VFs can't have I/O BARs. */
5207 		case SYS_RES_IOPORT:
5208 			return (EDOOFUS);
5209 		case SYS_RES_MEMORY:
5210 			return (pci_vf_release_mem_resource(dev, child, rid,
5211 			    r));
5212 		}
5213 
5214 		/* Fall through for other types of resource allocations. */
5215 	}
5216 #endif
5217 
5218 #ifdef NEW_PCIB
5219 	/*
5220 	 * PCI-PCI bridge I/O window resources are not BARs.  For
5221 	 * those allocations just pass the request up the tree.
5222 	 */
5223 	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
5224 	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
5225 		switch (rid) {
5226 		case PCIR_IOBASEL_1:
5227 		case PCIR_MEMBASE_1:
5228 		case PCIR_PMBASEL_1:
5229 			return (bus_generic_release_resource(dev, child, type,
5230 			    rid, r));
5231 		}
5232 	}
5233 #endif
5234 
5235 	rl = &dinfo->resources;
5236 	return (resource_list_release(rl, dev, child, type, rid, r));
5237 }
5238 
5239 int
5240 pci_activate_resource(device_t dev, device_t child, int type, int rid,
5241     struct resource *r)
5242 {
5243 	struct pci_devinfo *dinfo;
5244 	int error;
5245 
5246 	error = bus_generic_activate_resource(dev, child, type, rid, r);
5247 	if (error)
5248 		return (error);
5249 
5250 	/* Enable decoding in the command register when activating BARs. */
5251 	if (device_get_parent(child) == dev) {
5252 		/* Device ROMs need their decoding explicitly enabled. */
5253 		dinfo = device_get_ivars(child);
5254 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
5255 			pci_write_bar(child, pci_find_bar(child, rid),
5256 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
5257 		switch (type) {
5258 		case SYS_RES_IOPORT:
5259 		case SYS_RES_MEMORY:
5260 			error = PCI_ENABLE_IO(dev, child, type);
5261 			break;
5262 		}
5263 	}
5264 	return (error);
5265 }
5266 
5267 int
5268 pci_deactivate_resource(device_t dev, device_t child, int type,
5269     int rid, struct resource *r)
5270 {
5271 	struct pci_devinfo *dinfo;
5272 	int error;
5273 
5274 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
5275 	if (error)
5276 		return (error);
5277 
5278 	/* Disable decoding for device ROMs. */
5279 	if (device_get_parent(child) == dev) {
5280 		dinfo = device_get_ivars(child);
5281 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
5282 			pci_write_bar(child, pci_find_bar(child, rid),
5283 			    rman_get_start(r));
5284 	}
5285 	return (0);
5286 }
5287 
5288 void
5289 pci_child_deleted(device_t dev, device_t child)
5290 {
5291 	struct resource_list_entry *rle;
5292 	struct resource_list *rl;
5293 	struct pci_devinfo *dinfo;
5294 
5295 	dinfo = device_get_ivars(child);
5296 	rl = &dinfo->resources;
5297 
5298 	/* Turn off access to resources we're about to free */
5299 	if (bus_child_present(child) != 0) {
5300 		pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
5301 		    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
5302 
5303 		pci_disable_busmaster(child);
5304 	}
5305 
5306 	/* Free all allocated resources */
5307 	STAILQ_FOREACH(rle, rl, link) {
5308 		if (rle->res) {
5309 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
5310 			    resource_list_busy(rl, rle->type, rle->rid)) {
5311 				pci_printf(&dinfo->cfg,
5312 				    "Resource still owned, oops. "
5313 				    "(type=%d, rid=%d, addr=%lx)\n",
5314 				    rle->type, rle->rid,
5315 				    rman_get_start(rle->res));
5316 				bus_release_resource(child, rle->type, rle->rid,
5317 				    rle->res);
5318 			}
5319 			resource_list_unreserve(rl, dev, child, rle->type,
5320 			    rle->rid);
5321 		}
5322 	}
5323 	resource_list_free(rl);
5324 
5325 	pci_freecfg(dinfo);
5326 }
5327 
5328 void
5329 pci_delete_resource(device_t dev, device_t child, int type, int rid)
5330 {
5331 	struct pci_devinfo *dinfo;
5332 	struct resource_list *rl;
5333 	struct resource_list_entry *rle;
5334 
5335 	if (device_get_parent(child) != dev)
5336 		return;
5337 
5338 	dinfo = device_get_ivars(child);
5339 	rl = &dinfo->resources;
5340 	rle = resource_list_find(rl, type, rid);
5341 	if (rle == NULL)
5342 		return;
5343 
5344 	if (rle->res) {
5345 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
5346 		    resource_list_busy(rl, type, rid)) {
5347 			device_printf(dev, "delete_resource: "
5348 			    "Resource still owned by child, oops. "
5349 			    "(type=%d, rid=%d, addr=%jx)\n",
5350 			    type, rid, rman_get_start(rle->res));
5351 			return;
5352 		}
5353 		resource_list_unreserve(rl, dev, child, type, rid);
5354 	}
5355 	resource_list_delete(rl, type, rid);
5356 }
5357 
5358 struct resource_list *
5359 pci_get_resource_list (device_t dev, device_t child)
5360 {
5361 	struct pci_devinfo *dinfo = device_get_ivars(child);
5362 
5363 	return (&dinfo->resources);
5364 }
5365 
5366 bus_dma_tag_t
5367 pci_get_dma_tag(device_t bus, device_t dev)
5368 {
5369 	struct pci_softc *sc = device_get_softc(bus);
5370 
5371 	return (sc->sc_dma_tag);
5372 }
5373 
5374 uint32_t
5375 pci_read_config_method(device_t dev, device_t child, int reg, int width)
5376 {
5377 	struct pci_devinfo *dinfo = device_get_ivars(child);
5378 	pcicfgregs *cfg = &dinfo->cfg;
5379 
5380 #ifdef PCI_IOV
5381 	/*
5382 	 * SR-IOV VFs don't implement the VID or DID registers, so we have to
5383 	 * emulate them here.
5384 	 */
5385 	if (cfg->flags & PCICFG_VF) {
5386 		if (reg == PCIR_VENDOR) {
5387 			switch (width) {
5388 			case 4:
5389 				return (cfg->device << 16 | cfg->vendor);
5390 			case 2:
5391 				return (cfg->vendor);
5392 			case 1:
5393 				return (cfg->vendor & 0xff);
5394 			default:
5395 				return (0xffffffff);
5396 			}
5397 		} else if (reg == PCIR_DEVICE) {
5398 			switch (width) {
5399 			/* Note that an unaligned 4-byte read is an error. */
5400 			case 2:
5401 				return (cfg->device);
5402 			case 1:
5403 				return (cfg->device & 0xff);
5404 			default:
5405 				return (0xffffffff);
5406 			}
5407 		}
5408 	}
5409 #endif
5410 
5411 	return (PCIB_READ_CONFIG(device_get_parent(dev),
5412 	    cfg->bus, cfg->slot, cfg->func, reg, width));
5413 }
5414 
5415 void
5416 pci_write_config_method(device_t dev, device_t child, int reg,
5417     uint32_t val, int width)
5418 {
5419 	struct pci_devinfo *dinfo = device_get_ivars(child);
5420 	pcicfgregs *cfg = &dinfo->cfg;
5421 
5422 	PCIB_WRITE_CONFIG(device_get_parent(dev),
5423 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
5424 }
5425 
5426 int
5427 pci_child_location_str_method(device_t dev, device_t child, char *buf,
5428     size_t buflen)
5429 {
5430 
5431 	snprintf(buf, buflen, "pci%d:%d:%d:%d", pci_get_domain(child),
5432 	    pci_get_bus(child), pci_get_slot(child), pci_get_function(child));
5433 	return (0);
5434 }
5435 
5436 int
5437 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
5438     size_t buflen)
5439 {
5440 	struct pci_devinfo *dinfo;
5441 	pcicfgregs *cfg;
5442 
5443 	dinfo = device_get_ivars(child);
5444 	cfg = &dinfo->cfg;
5445 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
5446 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
5447 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
5448 	    cfg->progif);
5449 	return (0);
5450 }
5451 
5452 int
5453 pci_assign_interrupt_method(device_t dev, device_t child)
5454 {
5455 	struct pci_devinfo *dinfo = device_get_ivars(child);
5456 	pcicfgregs *cfg = &dinfo->cfg;
5457 
5458 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
5459 	    cfg->intpin));
5460 }
5461 
5462 static void
5463 pci_lookup(void *arg, const char *name, device_t *dev)
5464 {
5465 	long val;
5466 	char *end;
5467 	int domain, bus, slot, func;
5468 
5469 	if (*dev != NULL)
5470 		return;
5471 
5472 	/*
5473 	 * Accept pciconf-style selectors of either pciD:B:S:F or
5474 	 * pciB:S:F.  In the latter case, the domain is assumed to
5475 	 * be zero.
5476 	 */
5477 	if (strncmp(name, "pci", 3) != 0)
5478 		return;
5479 	val = strtol(name + 3, &end, 10);
5480 	if (val < 0 || val > INT_MAX || *end != ':')
5481 		return;
5482 	domain = val;
5483 	val = strtol(end + 1, &end, 10);
5484 	if (val < 0 || val > INT_MAX || *end != ':')
5485 		return;
5486 	bus = val;
5487 	val = strtol(end + 1, &end, 10);
5488 	if (val < 0 || val > INT_MAX)
5489 		return;
5490 	slot = val;
5491 	if (*end == ':') {
5492 		val = strtol(end + 1, &end, 10);
5493 		if (val < 0 || val > INT_MAX || *end != '\0')
5494 			return;
5495 		func = val;
5496 	} else if (*end == '\0') {
5497 		func = slot;
5498 		slot = bus;
5499 		bus = domain;
5500 		domain = 0;
5501 	} else
5502 		return;
5503 
5504 	if (domain > PCI_DOMAINMAX || bus > PCI_BUSMAX || slot > PCI_SLOTMAX ||
5505 	    func > PCIE_ARI_FUNCMAX || (slot != 0 && func > PCI_FUNCMAX))
5506 		return;
5507 
5508 	*dev = pci_find_dbsf(domain, bus, slot, func);
5509 }
5510 
5511 static int
5512 pci_modevent(module_t mod, int what, void *arg)
5513 {
5514 	static struct cdev *pci_cdev;
5515 	static eventhandler_tag tag;
5516 
5517 	switch (what) {
5518 	case MOD_LOAD:
5519 		STAILQ_INIT(&pci_devq);
5520 		pci_generation = 0;
5521 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
5522 		    "pci");
5523 		pci_load_vendor_data();
5524 		tag = EVENTHANDLER_REGISTER(dev_lookup, pci_lookup, NULL,
5525 		    1000);
5526 		break;
5527 
5528 	case MOD_UNLOAD:
5529 		if (tag != NULL)
5530 			EVENTHANDLER_DEREGISTER(dev_lookup, tag);
5531 		destroy_dev(pci_cdev);
5532 		break;
5533 	}
5534 
5535 	return (0);
5536 }
5537 
5538 static void
5539 pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
5540 {
5541 #define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
5542 	struct pcicfg_pcie *cfg;
5543 	int version, pos;
5544 
5545 	cfg = &dinfo->cfg.pcie;
5546 	pos = cfg->pcie_location;
5547 
5548 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5549 
5550 	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
5551 
5552 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5553 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5554 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5555 		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
5556 
5557 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5558 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5559 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5560 		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
5561 
5562 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5563 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5564 		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
5565 
5566 	if (version > 1) {
5567 		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
5568 		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
5569 		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
5570 	}
5571 #undef WREG
5572 }
5573 
5574 static void
5575 pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
5576 {
5577 	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
5578 	    dinfo->cfg.pcix.pcix_command,  2);
5579 }
5580 
5581 void
5582 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
5583 {
5584 
5585 	/*
5586 	 * Restore the device to full power mode.  We must do this
5587 	 * before we restore the registers because moving from D3 to
5588 	 * D0 will cause the chip's BARs and some other registers to
5589 	 * be reset to some unknown power on reset values.  Cut down
5590 	 * the noise on boot by doing nothing if we are already in
5591 	 * state D0.
5592 	 */
5593 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
5594 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5595 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
5596 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
5597 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
5598 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
5599 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
5600 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
5601 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
5602 	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5603 	case PCIM_HDRTYPE_NORMAL:
5604 		pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
5605 		pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
5606 		break;
5607 	case PCIM_HDRTYPE_BRIDGE:
5608 		pci_write_config(dev, PCIR_SECLAT_1,
5609 		    dinfo->cfg.bridge.br_seclat, 1);
5610 		pci_write_config(dev, PCIR_SUBBUS_1,
5611 		    dinfo->cfg.bridge.br_subbus, 1);
5612 		pci_write_config(dev, PCIR_SECBUS_1,
5613 		    dinfo->cfg.bridge.br_secbus, 1);
5614 		pci_write_config(dev, PCIR_PRIBUS_1,
5615 		    dinfo->cfg.bridge.br_pribus, 1);
5616 		pci_write_config(dev, PCIR_BRIDGECTL_1,
5617 		    dinfo->cfg.bridge.br_control, 2);
5618 		break;
5619 	case PCIM_HDRTYPE_CARDBUS:
5620 		pci_write_config(dev, PCIR_SECLAT_2,
5621 		    dinfo->cfg.bridge.br_seclat, 1);
5622 		pci_write_config(dev, PCIR_SUBBUS_2,
5623 		    dinfo->cfg.bridge.br_subbus, 1);
5624 		pci_write_config(dev, PCIR_SECBUS_2,
5625 		    dinfo->cfg.bridge.br_secbus, 1);
5626 		pci_write_config(dev, PCIR_PRIBUS_2,
5627 		    dinfo->cfg.bridge.br_pribus, 1);
5628 		pci_write_config(dev, PCIR_BRIDGECTL_2,
5629 		    dinfo->cfg.bridge.br_control, 2);
5630 		break;
5631 	}
5632 	pci_restore_bars(dev);
5633 
5634 	/*
5635 	 * Restore extended capabilities for PCI-Express and PCI-X
5636 	 */
5637 	if (dinfo->cfg.pcie.pcie_location != 0)
5638 		pci_cfg_restore_pcie(dev, dinfo);
5639 	if (dinfo->cfg.pcix.pcix_location != 0)
5640 		pci_cfg_restore_pcix(dev, dinfo);
5641 
5642 	/* Restore MSI and MSI-X configurations if they are present. */
5643 	if (dinfo->cfg.msi.msi_location != 0)
5644 		pci_resume_msi(dev);
5645 	if (dinfo->cfg.msix.msix_location != 0)
5646 		pci_resume_msix(dev);
5647 }
5648 
5649 static void
5650 pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
5651 {
5652 #define	RREG(n)	pci_read_config(dev, pos + (n), 2)
5653 	struct pcicfg_pcie *cfg;
5654 	int version, pos;
5655 
5656 	cfg = &dinfo->cfg.pcie;
5657 	pos = cfg->pcie_location;
5658 
5659 	cfg->pcie_flags = RREG(PCIER_FLAGS);
5660 
5661 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5662 
5663 	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
5664 
5665 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5666 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5667 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5668 		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
5669 
5670 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5671 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5672 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5673 		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
5674 
5675 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5676 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5677 		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
5678 
5679 	if (version > 1) {
5680 		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
5681 		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
5682 		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
5683 	}
5684 #undef RREG
5685 }
5686 
5687 static void
5688 pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
5689 {
5690 	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
5691 	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
5692 }
5693 
5694 void
5695 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
5696 {
5697 	uint32_t cls;
5698 	int ps;
5699 
5700 	/*
5701 	 * Some drivers apparently write to these registers w/o updating our
5702 	 * cached copy.  No harm happens if we update the copy, so do so here
5703 	 * so we can restore them.  The COMMAND register is modified by the
5704 	 * bus w/o updating the cache.  This should represent the normally
5705 	 * writable portion of the 'defined' part of type 0/1/2 headers.
5706 	 */
5707 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
5708 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
5709 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
5710 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
5711 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
5712 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
5713 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
5714 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
5715 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
5716 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
5717 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
5718 	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5719 	case PCIM_HDRTYPE_NORMAL:
5720 		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
5721 		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
5722 		dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
5723 		dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
5724 		break;
5725 	case PCIM_HDRTYPE_BRIDGE:
5726 		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
5727 		    PCIR_SECLAT_1, 1);
5728 		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
5729 		    PCIR_SUBBUS_1, 1);
5730 		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
5731 		    PCIR_SECBUS_1, 1);
5732 		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
5733 		    PCIR_PRIBUS_1, 1);
5734 		dinfo->cfg.bridge.br_control = pci_read_config(dev,
5735 		    PCIR_BRIDGECTL_1, 2);
5736 		break;
5737 	case PCIM_HDRTYPE_CARDBUS:
5738 		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
5739 		    PCIR_SECLAT_2, 1);
5740 		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
5741 		    PCIR_SUBBUS_2, 1);
5742 		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
5743 		    PCIR_SECBUS_2, 1);
5744 		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
5745 		    PCIR_PRIBUS_2, 1);
5746 		dinfo->cfg.bridge.br_control = pci_read_config(dev,
5747 		    PCIR_BRIDGECTL_2, 2);
5748 		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_2, 2);
5749 		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_2, 2);
5750 		break;
5751 	}
5752 
5753 	if (dinfo->cfg.pcie.pcie_location != 0)
5754 		pci_cfg_save_pcie(dev, dinfo);
5755 
5756 	if (dinfo->cfg.pcix.pcix_location != 0)
5757 		pci_cfg_save_pcix(dev, dinfo);
5758 
5759 	/*
5760 	 * don't set the state for display devices, base peripherals and
5761 	 * memory devices since bad things happen when they are powered down.
5762 	 * We should (a) have drivers that can easily detach and (b) use
5763 	 * generic drivers for these devices so that some device actually
5764 	 * attaches.  We need to make sure that when we implement (a) we don't
5765 	 * power the device down on a reattach.
5766 	 */
5767 	cls = pci_get_class(dev);
5768 	if (!setstate)
5769 		return;
5770 	switch (pci_do_power_nodriver)
5771 	{
5772 		case 0:		/* NO powerdown at all */
5773 			return;
5774 		case 1:		/* Conservative about what to power down */
5775 			if (cls == PCIC_STORAGE)
5776 				return;
5777 			/*FALLTHROUGH*/
5778 		case 2:		/* Agressive about what to power down */
5779 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
5780 			    cls == PCIC_BASEPERIPH)
5781 				return;
5782 			/*FALLTHROUGH*/
5783 		case 3:		/* Power down everything */
5784 			break;
5785 	}
5786 	/*
5787 	 * PCI spec says we can only go into D3 state from D0 state.
5788 	 * Transition from D[12] into D0 before going to D3 state.
5789 	 */
5790 	ps = pci_get_powerstate(dev);
5791 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
5792 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5793 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
5794 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
5795 }
5796 
5797 /* Wrapper APIs suitable for device driver use. */
5798 void
5799 pci_save_state(device_t dev)
5800 {
5801 	struct pci_devinfo *dinfo;
5802 
5803 	dinfo = device_get_ivars(dev);
5804 	pci_cfg_save(dev, dinfo, 0);
5805 }
5806 
5807 void
5808 pci_restore_state(device_t dev)
5809 {
5810 	struct pci_devinfo *dinfo;
5811 
5812 	dinfo = device_get_ivars(dev);
5813 	pci_cfg_restore(dev, dinfo);
5814 }
5815 
5816 static uint16_t
5817 pci_get_rid_method(device_t dev, device_t child)
5818 {
5819 
5820 	return (PCIB_GET_RID(device_get_parent(dev), child));
5821 }
5822 
5823 /* Find the upstream port of a given PCI device in a root complex. */
5824 device_t
5825 pci_find_pcie_root_port(device_t dev)
5826 {
5827 	struct pci_devinfo *dinfo;
5828 	devclass_t pci_class;
5829 	device_t pcib, bus;
5830 
5831 	pci_class = devclass_find("pci");
5832 	KASSERT(device_get_devclass(device_get_parent(dev)) == pci_class,
5833 	    ("%s: non-pci device %s", __func__, device_get_nameunit(dev)));
5834 
5835 	/*
5836 	 * Walk the bridge hierarchy until we find a PCI-e root
5837 	 * port or a non-PCI device.
5838 	 */
5839 	for (;;) {
5840 		bus = device_get_parent(dev);
5841 		KASSERT(bus != NULL, ("%s: null parent of %s", __func__,
5842 		    device_get_nameunit(dev)));
5843 
5844 		pcib = device_get_parent(bus);
5845 		KASSERT(pcib != NULL, ("%s: null bridge of %s", __func__,
5846 		    device_get_nameunit(bus)));
5847 
5848 		/*
5849 		 * pcib's parent must be a PCI bus for this to be a
5850 		 * PCI-PCI bridge.
5851 		 */
5852 		if (device_get_devclass(device_get_parent(pcib)) != pci_class)
5853 			return (NULL);
5854 
5855 		dinfo = device_get_ivars(pcib);
5856 		if (dinfo->cfg.pcie.pcie_location != 0 &&
5857 		    dinfo->cfg.pcie.pcie_type == PCIEM_TYPE_ROOT_PORT)
5858 			return (pcib);
5859 
5860 		dev = pcib;
5861 	}
5862 }
5863