xref: /freebsd/sys/dev/pci/pci.c (revision 884d26c84cba3ffc3d4e626306098fcdfe6a0c2b)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/limits.h>
39 #include <sys/linker.h>
40 #include <sys/fcntl.h>
41 #include <sys/conf.h>
42 #include <sys/kernel.h>
43 #include <sys/queue.h>
44 #include <sys/sysctl.h>
45 #include <sys/endian.h>
46 
47 #include <vm/vm.h>
48 #include <vm/pmap.h>
49 #include <vm/vm_extern.h>
50 
51 #include <sys/bus.h>
52 #include <machine/bus.h>
53 #include <sys/rman.h>
54 #include <machine/resource.h>
55 #include <machine/stdarg.h>
56 
57 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
58 #include <machine/intr_machdep.h>
59 #endif
60 
61 #include <sys/pciio.h>
62 #include <dev/pci/pcireg.h>
63 #include <dev/pci/pcivar.h>
64 #include <dev/pci/pci_private.h>
65 
66 #ifdef PCI_IOV
67 #include <sys/nv.h>
68 #include <dev/pci/pci_iov_private.h>
69 #endif
70 
71 #include <dev/usb/controller/xhcireg.h>
72 #include <dev/usb/controller/ehcireg.h>
73 #include <dev/usb/controller/ohcireg.h>
74 #include <dev/usb/controller/uhcireg.h>
75 
76 #include "pcib_if.h"
77 #include "pci_if.h"
78 
79 #define	PCIR_IS_BIOS(cfg, reg)						\
80 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
81 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
82 
83 static int		pci_has_quirk(uint32_t devid, int quirk);
84 static pci_addr_t	pci_mapbase(uint64_t mapreg);
85 static const char	*pci_maptype(uint64_t mapreg);
86 static int		pci_maprange(uint64_t mapreg);
87 static pci_addr_t	pci_rombase(uint64_t mapreg);
88 static int		pci_romsize(uint64_t testval);
89 static void		pci_fixancient(pcicfgregs *cfg);
90 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
91 
92 static int		pci_porten(device_t dev);
93 static int		pci_memen(device_t dev);
94 static void		pci_assign_interrupt(device_t bus, device_t dev,
95 			    int force_route);
96 static int		pci_add_map(device_t bus, device_t dev, int reg,
97 			    struct resource_list *rl, int force, int prefetch);
98 static int		pci_probe(device_t dev);
99 static int		pci_attach(device_t dev);
100 static int		pci_detach(device_t dev);
101 static void		pci_load_vendor_data(void);
102 static int		pci_describe_parse_line(char **ptr, int *vendor,
103 			    int *device, char **desc);
104 static char		*pci_describe_device(device_t dev);
105 static int		pci_modevent(module_t mod, int what, void *arg);
106 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
107 			    pcicfgregs *cfg);
108 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
109 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
110 			    int reg, uint32_t *data);
111 #if 0
112 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
113 			    int reg, uint32_t data);
114 #endif
115 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
116 static void		pci_mask_msix(device_t dev, u_int index);
117 static void		pci_unmask_msix(device_t dev, u_int index);
118 static int		pci_msi_blacklisted(void);
119 static int		pci_msix_blacklisted(void);
120 static void		pci_resume_msi(device_t dev);
121 static void		pci_resume_msix(device_t dev);
122 static int		pci_remap_intr_method(device_t bus, device_t dev,
123 			    u_int irq);
124 
125 static int		pci_get_id_method(device_t dev, device_t child,
126 			    enum pci_id_type type, uintptr_t *rid);
127 
128 static struct pci_devinfo * pci_fill_devinfo(device_t pcib, device_t bus, int d,
129     int b, int s, int f, uint16_t vid, uint16_t did);
130 
131 static device_method_t pci_methods[] = {
132 	/* Device interface */
133 	DEVMETHOD(device_probe,		pci_probe),
134 	DEVMETHOD(device_attach,	pci_attach),
135 	DEVMETHOD(device_detach,	pci_detach),
136 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
137 	DEVMETHOD(device_suspend,	bus_generic_suspend),
138 	DEVMETHOD(device_resume,	pci_resume),
139 
140 	/* Bus interface */
141 	DEVMETHOD(bus_print_child,	pci_print_child),
142 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
143 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
144 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
145 	DEVMETHOD(bus_driver_added,	pci_driver_added),
146 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
147 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
148 
149 	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
150 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
151 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
152 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
153 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
154 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
155 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
156 	DEVMETHOD(bus_release_resource,	pci_release_resource),
157 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
158 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
159 	DEVMETHOD(bus_child_deleted,	pci_child_deleted),
160 	DEVMETHOD(bus_child_detached,	pci_child_detached),
161 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
162 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
163 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
164 	DEVMETHOD(bus_suspend_child,	pci_suspend_child),
165 	DEVMETHOD(bus_resume_child,	pci_resume_child),
166 	DEVMETHOD(bus_rescan,		pci_rescan_method),
167 
168 	/* PCI interface */
169 	DEVMETHOD(pci_read_config,	pci_read_config_method),
170 	DEVMETHOD(pci_write_config,	pci_write_config_method),
171 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
172 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
173 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
174 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
175 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
176 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
177 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
178 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
179 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
180 	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
181 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
182 	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
183 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
184 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
185 	DEVMETHOD(pci_enable_msi,	pci_enable_msi_method),
186 	DEVMETHOD(pci_enable_msix,	pci_enable_msix_method),
187 	DEVMETHOD(pci_disable_msi,	pci_disable_msi_method),
188 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
189 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
190 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
191 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
192 	DEVMETHOD(pci_msix_pba_bar,	pci_msix_pba_bar_method),
193 	DEVMETHOD(pci_msix_table_bar,	pci_msix_table_bar_method),
194 	DEVMETHOD(pci_get_id,		pci_get_id_method),
195 	DEVMETHOD(pci_alloc_devinfo,	pci_alloc_devinfo_method),
196 	DEVMETHOD(pci_child_added,	pci_child_added_method),
197 #ifdef PCI_IOV
198 	DEVMETHOD(pci_iov_attach,	pci_iov_attach_method),
199 	DEVMETHOD(pci_iov_detach,	pci_iov_detach_method),
200 	DEVMETHOD(pci_create_iov_child,	pci_create_iov_child_method),
201 #endif
202 
203 	DEVMETHOD_END
204 };
205 
206 DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
207 
208 static devclass_t pci_devclass;
209 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
210 MODULE_VERSION(pci, 1);
211 
212 static char	*pci_vendordata;
213 static size_t	pci_vendordata_size;
214 
215 struct pci_quirk {
216 	uint32_t devid;	/* Vendor/device of the card */
217 	int	type;
218 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
219 #define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
220 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
221 #define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
222 #define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
223 #define	PCI_QUIRK_MSI_INTX_BUG	6 /* PCIM_CMD_INTxDIS disables MSI */
224 	int	arg1;
225 	int	arg2;
226 };
227 
228 static const struct pci_quirk pci_quirks[] = {
229 	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
230 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
231 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
232 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
233 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
234 
235 	/*
236 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
237 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
238 	 */
239 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
240 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
241 
242 	/*
243 	 * MSI doesn't work on earlier Intel chipsets including
244 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
245 	 */
246 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
247 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
248 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
249 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
250 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
251 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
252 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
253 
254 	/*
255 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
256 	 * bridge.
257 	 */
258 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
259 
260 	/*
261 	 * MSI-X allocation doesn't work properly for devices passed through
262 	 * by VMware up to at least ESXi 5.1.
263 	 */
264 	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
265 	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
266 
267 	/*
268 	 * Some virtualization environments emulate an older chipset
269 	 * but support MSI just fine.  QEMU uses the Intel 82440.
270 	 */
271 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
272 
273 	/*
274 	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
275 	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
276 	 * It prevents us from attaching hpet(4) when the bit is unset.
277 	 * Note this quirk only affects SB600 revision A13 and earlier.
278 	 * For SB600 A21 and later, firmware must set the bit to hide it.
279 	 * For SB700 and later, it is unused and hardcoded to zero.
280 	 */
281 	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
282 
283 	/*
284 	 * Atheros AR8161/AR8162/E2200 Ethernet controllers have a bug that
285 	 * MSI interrupt does not assert if PCIM_CMD_INTxDIS bit of the
286 	 * command register is set.
287 	 */
288 	{ 0x10911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
289 	{ 0xE0911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
290 	{ 0x10901969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
291 
292 	/*
293 	 * Broadcom BCM5714(S)/BCM5715(S)/BCM5780(S) Ethernet MACs don't
294 	 * issue MSI interrupts with PCIM_CMD_INTxDIS set either.
295 	 */
296 	{ 0x166814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714 */
297 	{ 0x166914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714S */
298 	{ 0x166a14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780 */
299 	{ 0x166b14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780S */
300 	{ 0x167814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715 */
301 	{ 0x167914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715S */
302 
303 	{ 0 }
304 };
305 
306 /* map register information */
307 #define	PCI_MAPMEM	0x01	/* memory map */
308 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
309 #define	PCI_MAPPORT	0x04	/* port map */
310 
311 struct devlist pci_devq;
312 uint32_t pci_generation;
313 uint32_t pci_numdevs = 0;
314 static int pcie_chipset, pcix_chipset;
315 
316 /* sysctl vars */
317 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
318 
319 static int pci_enable_io_modes = 1;
320 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RWTUN,
321     &pci_enable_io_modes, 1,
322     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
323 enable these bits correctly.  We'd like to do this all the time, but there\n\
324 are some peripherals that this causes problems with.");
325 
326 static int pci_do_realloc_bars = 0;
327 SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RWTUN,
328     &pci_do_realloc_bars, 0,
329     "Attempt to allocate a new range for any BARs whose original "
330     "firmware-assigned ranges fail to allocate during the initial device scan.");
331 
332 static int pci_do_power_nodriver = 0;
333 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RWTUN,
334     &pci_do_power_nodriver, 0,
335   "Place a function into D3 state when no driver attaches to it.  0 means\n\
336 disable.  1 means conservatively place devices into D3 state.  2 means\n\
337 aggressively place devices into D3 state.  3 means put absolutely everything\n\
338 in D3 state.");
339 
340 int pci_do_power_resume = 1;
341 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RWTUN,
342     &pci_do_power_resume, 1,
343   "Transition from D3 -> D0 on resume.");
344 
345 int pci_do_power_suspend = 1;
346 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RWTUN,
347     &pci_do_power_suspend, 1,
348   "Transition from D0 -> D3 on suspend.");
349 
350 static int pci_do_msi = 1;
351 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RWTUN, &pci_do_msi, 1,
352     "Enable support for MSI interrupts");
353 
354 static int pci_do_msix = 1;
355 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RWTUN, &pci_do_msix, 1,
356     "Enable support for MSI-X interrupts");
357 
358 static int pci_honor_msi_blacklist = 1;
359 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RDTUN,
360     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
361 
362 #if defined(__i386__) || defined(__amd64__)
363 static int pci_usb_takeover = 1;
364 #else
365 static int pci_usb_takeover = 0;
366 #endif
367 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
368     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
369 Disable this if you depend on BIOS emulation of USB devices, that is\n\
370 you use USB devices (like keyboard or mouse) but do not load USB drivers");
371 
372 static int pci_clear_bars;
373 SYSCTL_INT(_hw_pci, OID_AUTO, clear_bars, CTLFLAG_RDTUN, &pci_clear_bars, 0,
374     "Ignore firmware-assigned resources for BARs.");
375 
376 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
377 static int pci_clear_buses;
378 SYSCTL_INT(_hw_pci, OID_AUTO, clear_buses, CTLFLAG_RDTUN, &pci_clear_buses, 0,
379     "Ignore firmware-assigned bus numbers.");
380 #endif
381 
382 static int pci_enable_ari = 1;
383 SYSCTL_INT(_hw_pci, OID_AUTO, enable_ari, CTLFLAG_RDTUN, &pci_enable_ari,
384     0, "Enable support for PCIe Alternative RID Interpretation");
385 
386 static int
387 pci_has_quirk(uint32_t devid, int quirk)
388 {
389 	const struct pci_quirk *q;
390 
391 	for (q = &pci_quirks[0]; q->devid; q++) {
392 		if (q->devid == devid && q->type == quirk)
393 			return (1);
394 	}
395 	return (0);
396 }
397 
398 /* Find a device_t by bus/slot/function in domain 0 */
399 
400 device_t
401 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
402 {
403 
404 	return (pci_find_dbsf(0, bus, slot, func));
405 }
406 
407 /* Find a device_t by domain/bus/slot/function */
408 
409 device_t
410 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
411 {
412 	struct pci_devinfo *dinfo;
413 
414 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
415 		if ((dinfo->cfg.domain == domain) &&
416 		    (dinfo->cfg.bus == bus) &&
417 		    (dinfo->cfg.slot == slot) &&
418 		    (dinfo->cfg.func == func)) {
419 			return (dinfo->cfg.dev);
420 		}
421 	}
422 
423 	return (NULL);
424 }
425 
426 /* Find a device_t by vendor/device ID */
427 
428 device_t
429 pci_find_device(uint16_t vendor, uint16_t device)
430 {
431 	struct pci_devinfo *dinfo;
432 
433 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
434 		if ((dinfo->cfg.vendor == vendor) &&
435 		    (dinfo->cfg.device == device)) {
436 			return (dinfo->cfg.dev);
437 		}
438 	}
439 
440 	return (NULL);
441 }
442 
443 device_t
444 pci_find_class(uint8_t class, uint8_t subclass)
445 {
446 	struct pci_devinfo *dinfo;
447 
448 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
449 		if (dinfo->cfg.baseclass == class &&
450 		    dinfo->cfg.subclass == subclass) {
451 			return (dinfo->cfg.dev);
452 		}
453 	}
454 
455 	return (NULL);
456 }
457 
458 static int
459 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
460 {
461 	va_list ap;
462 	int retval;
463 
464 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
465 	    cfg->func);
466 	va_start(ap, fmt);
467 	retval += vprintf(fmt, ap);
468 	va_end(ap);
469 	return (retval);
470 }
471 
472 /* return base address of memory or port map */
473 
474 static pci_addr_t
475 pci_mapbase(uint64_t mapreg)
476 {
477 
478 	if (PCI_BAR_MEM(mapreg))
479 		return (mapreg & PCIM_BAR_MEM_BASE);
480 	else
481 		return (mapreg & PCIM_BAR_IO_BASE);
482 }
483 
484 /* return map type of memory or port map */
485 
486 static const char *
487 pci_maptype(uint64_t mapreg)
488 {
489 
490 	if (PCI_BAR_IO(mapreg))
491 		return ("I/O Port");
492 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
493 		return ("Prefetchable Memory");
494 	return ("Memory");
495 }
496 
497 /* return log2 of map size decoded for memory or port map */
498 
499 int
500 pci_mapsize(uint64_t testval)
501 {
502 	int ln2size;
503 
504 	testval = pci_mapbase(testval);
505 	ln2size = 0;
506 	if (testval != 0) {
507 		while ((testval & 1) == 0)
508 		{
509 			ln2size++;
510 			testval >>= 1;
511 		}
512 	}
513 	return (ln2size);
514 }
515 
516 /* return base address of device ROM */
517 
518 static pci_addr_t
519 pci_rombase(uint64_t mapreg)
520 {
521 
522 	return (mapreg & PCIM_BIOS_ADDR_MASK);
523 }
524 
525 /* return log2 of map size decided for device ROM */
526 
527 static int
528 pci_romsize(uint64_t testval)
529 {
530 	int ln2size;
531 
532 	testval = pci_rombase(testval);
533 	ln2size = 0;
534 	if (testval != 0) {
535 		while ((testval & 1) == 0)
536 		{
537 			ln2size++;
538 			testval >>= 1;
539 		}
540 	}
541 	return (ln2size);
542 }
543 
544 /* return log2 of address range supported by map register */
545 
546 static int
547 pci_maprange(uint64_t mapreg)
548 {
549 	int ln2range = 0;
550 
551 	if (PCI_BAR_IO(mapreg))
552 		ln2range = 32;
553 	else
554 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
555 		case PCIM_BAR_MEM_32:
556 			ln2range = 32;
557 			break;
558 		case PCIM_BAR_MEM_1MB:
559 			ln2range = 20;
560 			break;
561 		case PCIM_BAR_MEM_64:
562 			ln2range = 64;
563 			break;
564 		}
565 	return (ln2range);
566 }
567 
568 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
569 
570 static void
571 pci_fixancient(pcicfgregs *cfg)
572 {
573 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
574 		return;
575 
576 	/* PCI to PCI bridges use header type 1 */
577 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
578 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
579 }
580 
581 /* extract header type specific config data */
582 
583 static void
584 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
585 {
586 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
587 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
588 	case PCIM_HDRTYPE_NORMAL:
589 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
590 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
591 		cfg->mingnt         = REG(PCIR_MINGNT, 1);
592 		cfg->maxlat         = REG(PCIR_MAXLAT, 1);
593 		cfg->nummaps	    = PCI_MAXMAPS_0;
594 		break;
595 	case PCIM_HDRTYPE_BRIDGE:
596 		cfg->bridge.br_seclat = REG(PCIR_SECLAT_1, 1);
597 		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_1, 1);
598 		cfg->bridge.br_secbus = REG(PCIR_SECBUS_1, 1);
599 		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_1, 1);
600 		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_1, 2);
601 		cfg->nummaps	    = PCI_MAXMAPS_1;
602 		break;
603 	case PCIM_HDRTYPE_CARDBUS:
604 		cfg->bridge.br_seclat = REG(PCIR_SECLAT_2, 1);
605 		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_2, 1);
606 		cfg->bridge.br_secbus = REG(PCIR_SECBUS_2, 1);
607 		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_2, 1);
608 		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_2, 2);
609 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
610 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
611 		cfg->nummaps	    = PCI_MAXMAPS_2;
612 		break;
613 	}
614 #undef REG
615 }
616 
617 /* read configuration header into pcicfgregs structure */
618 struct pci_devinfo *
619 pci_read_device(device_t pcib, device_t bus, int d, int b, int s, int f)
620 {
621 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
622 	uint16_t vid, did;
623 
624 	vid = REG(PCIR_VENDOR, 2);
625 	did = REG(PCIR_DEVICE, 2);
626 	if (vid != 0xffff)
627 		return (pci_fill_devinfo(pcib, bus, d, b, s, f, vid, did));
628 
629 	return (NULL);
630 }
631 
632 struct pci_devinfo *
633 pci_alloc_devinfo_method(device_t dev)
634 {
635 
636 	return (malloc(sizeof(struct pci_devinfo), M_DEVBUF,
637 	    M_WAITOK | M_ZERO));
638 }
639 
640 static struct pci_devinfo *
641 pci_fill_devinfo(device_t pcib, device_t bus, int d, int b, int s, int f,
642     uint16_t vid, uint16_t did)
643 {
644 	struct pci_devinfo *devlist_entry;
645 	pcicfgregs *cfg;
646 
647 	devlist_entry = PCI_ALLOC_DEVINFO(bus);
648 
649 	cfg = &devlist_entry->cfg;
650 
651 	cfg->domain		= d;
652 	cfg->bus		= b;
653 	cfg->slot		= s;
654 	cfg->func		= f;
655 	cfg->vendor		= vid;
656 	cfg->device		= did;
657 	cfg->cmdreg		= REG(PCIR_COMMAND, 2);
658 	cfg->statreg		= REG(PCIR_STATUS, 2);
659 	cfg->baseclass		= REG(PCIR_CLASS, 1);
660 	cfg->subclass		= REG(PCIR_SUBCLASS, 1);
661 	cfg->progif		= REG(PCIR_PROGIF, 1);
662 	cfg->revid		= REG(PCIR_REVID, 1);
663 	cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
664 	cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
665 	cfg->lattimer		= REG(PCIR_LATTIMER, 1);
666 	cfg->intpin		= REG(PCIR_INTPIN, 1);
667 	cfg->intline		= REG(PCIR_INTLINE, 1);
668 
669 	cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
670 	cfg->hdrtype		&= ~PCIM_MFDEV;
671 	STAILQ_INIT(&cfg->maps);
672 
673 	cfg->iov		= NULL;
674 
675 	pci_fixancient(cfg);
676 	pci_hdrtypedata(pcib, b, s, f, cfg);
677 
678 	if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
679 		pci_read_cap(pcib, cfg);
680 
681 	STAILQ_INSERT_TAIL(&pci_devq, devlist_entry, pci_links);
682 
683 	devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
684 	devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
685 	devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
686 	devlist_entry->conf.pc_sel.pc_func = cfg->func;
687 	devlist_entry->conf.pc_hdr = cfg->hdrtype;
688 
689 	devlist_entry->conf.pc_subvendor = cfg->subvendor;
690 	devlist_entry->conf.pc_subdevice = cfg->subdevice;
691 	devlist_entry->conf.pc_vendor = cfg->vendor;
692 	devlist_entry->conf.pc_device = cfg->device;
693 
694 	devlist_entry->conf.pc_class = cfg->baseclass;
695 	devlist_entry->conf.pc_subclass = cfg->subclass;
696 	devlist_entry->conf.pc_progif = cfg->progif;
697 	devlist_entry->conf.pc_revid = cfg->revid;
698 
699 	pci_numdevs++;
700 	pci_generation++;
701 
702 	return (devlist_entry);
703 }
704 #undef REG
705 
706 static void
707 pci_ea_fill_info(device_t pcib, pcicfgregs *cfg)
708 {
709 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, \
710     cfg->ea.ea_location + (n), w)
711 	int num_ent;
712 	int ptr;
713 	int a, b;
714 	uint32_t val;
715 	int ent_size;
716 	uint32_t dw[4];
717 	uint64_t base, max_offset;
718 	struct pci_ea_entry *eae;
719 
720 	if (cfg->ea.ea_location == 0)
721 		return;
722 
723 	STAILQ_INIT(&cfg->ea.ea_entries);
724 
725 	/* Determine the number of entries */
726 	num_ent = REG(PCIR_EA_NUM_ENT, 2);
727 	num_ent &= PCIM_EA_NUM_ENT_MASK;
728 
729 	/* Find the first entry to care of */
730 	ptr = PCIR_EA_FIRST_ENT;
731 
732 	/* Skip DWORD 2 for type 1 functions */
733 	if ((cfg->hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_BRIDGE)
734 		ptr += 4;
735 
736 	for (a = 0; a < num_ent; a++) {
737 
738 		eae = malloc(sizeof(*eae), M_DEVBUF, M_WAITOK | M_ZERO);
739 		eae->eae_cfg_offset = cfg->ea.ea_location + ptr;
740 
741 		/* Read a number of dwords in the entry */
742 		val = REG(ptr, 4);
743 		ptr += 4;
744 		ent_size = (val & PCIM_EA_ES);
745 
746 		for (b = 0; b < ent_size; b++) {
747 			dw[b] = REG(ptr, 4);
748 			ptr += 4;
749 		}
750 
751 		eae->eae_flags = val;
752 		eae->eae_bei = (PCIM_EA_BEI & val) >> PCIM_EA_BEI_OFFSET;
753 
754 		base = dw[0] & PCIM_EA_FIELD_MASK;
755 		max_offset = dw[1] | ~PCIM_EA_FIELD_MASK;
756 		b = 2;
757 		if (((dw[0] & PCIM_EA_IS_64) != 0) && (b < ent_size)) {
758 			base |= (uint64_t)dw[b] << 32UL;
759 			b++;
760 		}
761 		if (((dw[1] & PCIM_EA_IS_64) != 0)
762 		    && (b < ent_size)) {
763 			max_offset |= (uint64_t)dw[b] << 32UL;
764 			b++;
765 		}
766 
767 		eae->eae_base = base;
768 		eae->eae_max_offset = max_offset;
769 
770 		STAILQ_INSERT_TAIL(&cfg->ea.ea_entries, eae, eae_link);
771 
772 		if (bootverbose) {
773 			printf("PCI(EA) dev %04x:%04x, bei %d, flags #%x, base #%jx, max_offset #%jx\n",
774 			    cfg->vendor, cfg->device, eae->eae_bei, eae->eae_flags,
775 			    (uintmax_t)eae->eae_base, (uintmax_t)eae->eae_max_offset);
776 		}
777 	}
778 }
779 #undef REG
780 
781 static void
782 pci_read_cap(device_t pcib, pcicfgregs *cfg)
783 {
784 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
785 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
786 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
787 	uint64_t addr;
788 #endif
789 	uint32_t val;
790 	int	ptr, nextptr, ptrptr;
791 
792 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
793 	case PCIM_HDRTYPE_NORMAL:
794 	case PCIM_HDRTYPE_BRIDGE:
795 		ptrptr = PCIR_CAP_PTR;
796 		break;
797 	case PCIM_HDRTYPE_CARDBUS:
798 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
799 		break;
800 	default:
801 		return;		/* no extended capabilities support */
802 	}
803 	nextptr = REG(ptrptr, 1);	/* sanity check? */
804 
805 	/*
806 	 * Read capability entries.
807 	 */
808 	while (nextptr != 0) {
809 		/* Sanity check */
810 		if (nextptr > 255) {
811 			printf("illegal PCI extended capability offset %d\n",
812 			    nextptr);
813 			return;
814 		}
815 		/* Find the next entry */
816 		ptr = nextptr;
817 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
818 
819 		/* Process this entry */
820 		switch (REG(ptr + PCICAP_ID, 1)) {
821 		case PCIY_PMG:		/* PCI power management */
822 			if (cfg->pp.pp_cap == 0) {
823 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
824 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
825 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
826 				if ((nextptr - ptr) > PCIR_POWER_DATA)
827 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
828 			}
829 			break;
830 		case PCIY_HT:		/* HyperTransport */
831 			/* Determine HT-specific capability type. */
832 			val = REG(ptr + PCIR_HT_COMMAND, 2);
833 
834 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
835 				cfg->ht.ht_slave = ptr;
836 
837 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
838 			switch (val & PCIM_HTCMD_CAP_MASK) {
839 			case PCIM_HTCAP_MSI_MAPPING:
840 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
841 					/* Sanity check the mapping window. */
842 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
843 					    4);
844 					addr <<= 32;
845 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
846 					    4);
847 					if (addr != MSI_INTEL_ADDR_BASE)
848 						device_printf(pcib,
849 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
850 						    cfg->domain, cfg->bus,
851 						    cfg->slot, cfg->func,
852 						    (long long)addr);
853 				} else
854 					addr = MSI_INTEL_ADDR_BASE;
855 
856 				cfg->ht.ht_msimap = ptr;
857 				cfg->ht.ht_msictrl = val;
858 				cfg->ht.ht_msiaddr = addr;
859 				break;
860 			}
861 #endif
862 			break;
863 		case PCIY_MSI:		/* PCI MSI */
864 			cfg->msi.msi_location = ptr;
865 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
866 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
867 						     PCIM_MSICTRL_MMC_MASK)>>1);
868 			break;
869 		case PCIY_MSIX:		/* PCI MSI-X */
870 			cfg->msix.msix_location = ptr;
871 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
872 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
873 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
874 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
875 			cfg->msix.msix_table_bar = PCIR_BAR(val &
876 			    PCIM_MSIX_BIR_MASK);
877 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
878 			val = REG(ptr + PCIR_MSIX_PBA, 4);
879 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
880 			    PCIM_MSIX_BIR_MASK);
881 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
882 			break;
883 		case PCIY_VPD:		/* PCI Vital Product Data */
884 			cfg->vpd.vpd_reg = ptr;
885 			break;
886 		case PCIY_SUBVENDOR:
887 			/* Should always be true. */
888 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
889 			    PCIM_HDRTYPE_BRIDGE) {
890 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
891 				cfg->subvendor = val & 0xffff;
892 				cfg->subdevice = val >> 16;
893 			}
894 			break;
895 		case PCIY_PCIX:		/* PCI-X */
896 			/*
897 			 * Assume we have a PCI-X chipset if we have
898 			 * at least one PCI-PCI bridge with a PCI-X
899 			 * capability.  Note that some systems with
900 			 * PCI-express or HT chipsets might match on
901 			 * this check as well.
902 			 */
903 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
904 			    PCIM_HDRTYPE_BRIDGE)
905 				pcix_chipset = 1;
906 			cfg->pcix.pcix_location = ptr;
907 			break;
908 		case PCIY_EXPRESS:	/* PCI-express */
909 			/*
910 			 * Assume we have a PCI-express chipset if we have
911 			 * at least one PCI-express device.
912 			 */
913 			pcie_chipset = 1;
914 			cfg->pcie.pcie_location = ptr;
915 			val = REG(ptr + PCIER_FLAGS, 2);
916 			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
917 			break;
918 		case PCIY_EA:		/* Enhanced Allocation */
919 			cfg->ea.ea_location = ptr;
920 			pci_ea_fill_info(pcib, cfg);
921 			break;
922 		default:
923 			break;
924 		}
925 	}
926 
927 #if defined(__powerpc__)
928 	/*
929 	 * Enable the MSI mapping window for all HyperTransport
930 	 * slaves.  PCI-PCI bridges have their windows enabled via
931 	 * PCIB_MAP_MSI().
932 	 */
933 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
934 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
935 		device_printf(pcib,
936 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
937 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
938 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
939 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
940 		     2);
941 	}
942 #endif
943 /* REG and WREG use carry through to next functions */
944 }
945 
946 /*
947  * PCI Vital Product Data
948  */
949 
950 #define	PCI_VPD_TIMEOUT		1000000
951 
952 static int
953 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
954 {
955 	int count = PCI_VPD_TIMEOUT;
956 
957 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
958 
959 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
960 
961 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
962 		if (--count < 0)
963 			return (ENXIO);
964 		DELAY(1);	/* limit looping */
965 	}
966 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
967 
968 	return (0);
969 }
970 
971 #if 0
972 static int
973 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
974 {
975 	int count = PCI_VPD_TIMEOUT;
976 
977 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
978 
979 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
980 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
981 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
982 		if (--count < 0)
983 			return (ENXIO);
984 		DELAY(1);	/* limit looping */
985 	}
986 
987 	return (0);
988 }
989 #endif
990 
991 #undef PCI_VPD_TIMEOUT
992 
993 struct vpd_readstate {
994 	device_t	pcib;
995 	pcicfgregs	*cfg;
996 	uint32_t	val;
997 	int		bytesinval;
998 	int		off;
999 	uint8_t		cksum;
1000 };
1001 
1002 static int
1003 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
1004 {
1005 	uint32_t reg;
1006 	uint8_t byte;
1007 
1008 	if (vrs->bytesinval == 0) {
1009 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
1010 			return (ENXIO);
1011 		vrs->val = le32toh(reg);
1012 		vrs->off += 4;
1013 		byte = vrs->val & 0xff;
1014 		vrs->bytesinval = 3;
1015 	} else {
1016 		vrs->val = vrs->val >> 8;
1017 		byte = vrs->val & 0xff;
1018 		vrs->bytesinval--;
1019 	}
1020 
1021 	vrs->cksum += byte;
1022 	*data = byte;
1023 	return (0);
1024 }
1025 
1026 static void
1027 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
1028 {
1029 	struct vpd_readstate vrs;
1030 	int state;
1031 	int name;
1032 	int remain;
1033 	int i;
1034 	int alloc, off;		/* alloc/off for RO/W arrays */
1035 	int cksumvalid;
1036 	int dflen;
1037 	uint8_t byte;
1038 	uint8_t byte2;
1039 
1040 	/* init vpd reader */
1041 	vrs.bytesinval = 0;
1042 	vrs.off = 0;
1043 	vrs.pcib = pcib;
1044 	vrs.cfg = cfg;
1045 	vrs.cksum = 0;
1046 
1047 	state = 0;
1048 	name = remain = i = 0;	/* shut up stupid gcc */
1049 	alloc = off = 0;	/* shut up stupid gcc */
1050 	dflen = 0;		/* shut up stupid gcc */
1051 	cksumvalid = -1;
1052 	while (state >= 0) {
1053 		if (vpd_nextbyte(&vrs, &byte)) {
1054 			state = -2;
1055 			break;
1056 		}
1057 #if 0
1058 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
1059 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
1060 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
1061 #endif
1062 		switch (state) {
1063 		case 0:		/* item name */
1064 			if (byte & 0x80) {
1065 				if (vpd_nextbyte(&vrs, &byte2)) {
1066 					state = -2;
1067 					break;
1068 				}
1069 				remain = byte2;
1070 				if (vpd_nextbyte(&vrs, &byte2)) {
1071 					state = -2;
1072 					break;
1073 				}
1074 				remain |= byte2 << 8;
1075 				if (remain > (0x7f*4 - vrs.off)) {
1076 					state = -1;
1077 					pci_printf(cfg,
1078 					    "invalid VPD data, remain %#x\n",
1079 					    remain);
1080 				}
1081 				name = byte & 0x7f;
1082 			} else {
1083 				remain = byte & 0x7;
1084 				name = (byte >> 3) & 0xf;
1085 			}
1086 			switch (name) {
1087 			case 0x2:	/* String */
1088 				cfg->vpd.vpd_ident = malloc(remain + 1,
1089 				    M_DEVBUF, M_WAITOK);
1090 				i = 0;
1091 				state = 1;
1092 				break;
1093 			case 0xf:	/* End */
1094 				state = -1;
1095 				break;
1096 			case 0x10:	/* VPD-R */
1097 				alloc = 8;
1098 				off = 0;
1099 				cfg->vpd.vpd_ros = malloc(alloc *
1100 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1101 				    M_WAITOK | M_ZERO);
1102 				state = 2;
1103 				break;
1104 			case 0x11:	/* VPD-W */
1105 				alloc = 8;
1106 				off = 0;
1107 				cfg->vpd.vpd_w = malloc(alloc *
1108 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1109 				    M_WAITOK | M_ZERO);
1110 				state = 5;
1111 				break;
1112 			default:	/* Invalid data, abort */
1113 				state = -1;
1114 				break;
1115 			}
1116 			break;
1117 
1118 		case 1:	/* Identifier String */
1119 			cfg->vpd.vpd_ident[i++] = byte;
1120 			remain--;
1121 			if (remain == 0)  {
1122 				cfg->vpd.vpd_ident[i] = '\0';
1123 				state = 0;
1124 			}
1125 			break;
1126 
1127 		case 2:	/* VPD-R Keyword Header */
1128 			if (off == alloc) {
1129 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1130 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1131 				    M_DEVBUF, M_WAITOK | M_ZERO);
1132 			}
1133 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1134 			if (vpd_nextbyte(&vrs, &byte2)) {
1135 				state = -2;
1136 				break;
1137 			}
1138 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1139 			if (vpd_nextbyte(&vrs, &byte2)) {
1140 				state = -2;
1141 				break;
1142 			}
1143 			cfg->vpd.vpd_ros[off].len = dflen = byte2;
1144 			if (dflen == 0 &&
1145 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1146 			    2) == 0) {
1147 				/*
1148 				 * if this happens, we can't trust the rest
1149 				 * of the VPD.
1150 				 */
1151 				pci_printf(cfg, "bad keyword length: %d\n",
1152 				    dflen);
1153 				cksumvalid = 0;
1154 				state = -1;
1155 				break;
1156 			} else if (dflen == 0) {
1157 				cfg->vpd.vpd_ros[off].value = malloc(1 *
1158 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1159 				    M_DEVBUF, M_WAITOK);
1160 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1161 			} else
1162 				cfg->vpd.vpd_ros[off].value = malloc(
1163 				    (dflen + 1) *
1164 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1165 				    M_DEVBUF, M_WAITOK);
1166 			remain -= 3;
1167 			i = 0;
1168 			/* keep in sync w/ state 3's transistions */
1169 			if (dflen == 0 && remain == 0)
1170 				state = 0;
1171 			else if (dflen == 0)
1172 				state = 2;
1173 			else
1174 				state = 3;
1175 			break;
1176 
1177 		case 3:	/* VPD-R Keyword Value */
1178 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1179 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1180 			    "RV", 2) == 0 && cksumvalid == -1) {
1181 				if (vrs.cksum == 0)
1182 					cksumvalid = 1;
1183 				else {
1184 					if (bootverbose)
1185 						pci_printf(cfg,
1186 					    "bad VPD cksum, remain %hhu\n",
1187 						    vrs.cksum);
1188 					cksumvalid = 0;
1189 					state = -1;
1190 					break;
1191 				}
1192 			}
1193 			dflen--;
1194 			remain--;
1195 			/* keep in sync w/ state 2's transistions */
1196 			if (dflen == 0)
1197 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1198 			if (dflen == 0 && remain == 0) {
1199 				cfg->vpd.vpd_rocnt = off;
1200 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1201 				    off * sizeof(*cfg->vpd.vpd_ros),
1202 				    M_DEVBUF, M_WAITOK | M_ZERO);
1203 				state = 0;
1204 			} else if (dflen == 0)
1205 				state = 2;
1206 			break;
1207 
1208 		case 4:
1209 			remain--;
1210 			if (remain == 0)
1211 				state = 0;
1212 			break;
1213 
1214 		case 5:	/* VPD-W Keyword Header */
1215 			if (off == alloc) {
1216 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1217 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1218 				    M_DEVBUF, M_WAITOK | M_ZERO);
1219 			}
1220 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1221 			if (vpd_nextbyte(&vrs, &byte2)) {
1222 				state = -2;
1223 				break;
1224 			}
1225 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1226 			if (vpd_nextbyte(&vrs, &byte2)) {
1227 				state = -2;
1228 				break;
1229 			}
1230 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1231 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1232 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1233 			    sizeof(*cfg->vpd.vpd_w[off].value),
1234 			    M_DEVBUF, M_WAITOK);
1235 			remain -= 3;
1236 			i = 0;
1237 			/* keep in sync w/ state 6's transistions */
1238 			if (dflen == 0 && remain == 0)
1239 				state = 0;
1240 			else if (dflen == 0)
1241 				state = 5;
1242 			else
1243 				state = 6;
1244 			break;
1245 
1246 		case 6:	/* VPD-W Keyword Value */
1247 			cfg->vpd.vpd_w[off].value[i++] = byte;
1248 			dflen--;
1249 			remain--;
1250 			/* keep in sync w/ state 5's transistions */
1251 			if (dflen == 0)
1252 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1253 			if (dflen == 0 && remain == 0) {
1254 				cfg->vpd.vpd_wcnt = off;
1255 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1256 				    off * sizeof(*cfg->vpd.vpd_w),
1257 				    M_DEVBUF, M_WAITOK | M_ZERO);
1258 				state = 0;
1259 			} else if (dflen == 0)
1260 				state = 5;
1261 			break;
1262 
1263 		default:
1264 			pci_printf(cfg, "invalid state: %d\n", state);
1265 			state = -1;
1266 			break;
1267 		}
1268 	}
1269 
1270 	if (cksumvalid == 0 || state < -1) {
1271 		/* read-only data bad, clean up */
1272 		if (cfg->vpd.vpd_ros != NULL) {
1273 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1274 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1275 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1276 			cfg->vpd.vpd_ros = NULL;
1277 		}
1278 	}
1279 	if (state < -1) {
1280 		/* I/O error, clean up */
1281 		pci_printf(cfg, "failed to read VPD data.\n");
1282 		if (cfg->vpd.vpd_ident != NULL) {
1283 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1284 			cfg->vpd.vpd_ident = NULL;
1285 		}
1286 		if (cfg->vpd.vpd_w != NULL) {
1287 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1288 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1289 			free(cfg->vpd.vpd_w, M_DEVBUF);
1290 			cfg->vpd.vpd_w = NULL;
1291 		}
1292 	}
1293 	cfg->vpd.vpd_cached = 1;
1294 #undef REG
1295 #undef WREG
1296 }
1297 
1298 int
1299 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1300 {
1301 	struct pci_devinfo *dinfo = device_get_ivars(child);
1302 	pcicfgregs *cfg = &dinfo->cfg;
1303 
1304 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1305 		pci_read_vpd(device_get_parent(dev), cfg);
1306 
1307 	*identptr = cfg->vpd.vpd_ident;
1308 
1309 	if (*identptr == NULL)
1310 		return (ENXIO);
1311 
1312 	return (0);
1313 }
1314 
1315 int
1316 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1317 	const char **vptr)
1318 {
1319 	struct pci_devinfo *dinfo = device_get_ivars(child);
1320 	pcicfgregs *cfg = &dinfo->cfg;
1321 	int i;
1322 
1323 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1324 		pci_read_vpd(device_get_parent(dev), cfg);
1325 
1326 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1327 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1328 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1329 			*vptr = cfg->vpd.vpd_ros[i].value;
1330 			return (0);
1331 		}
1332 
1333 	*vptr = NULL;
1334 	return (ENXIO);
1335 }
1336 
1337 struct pcicfg_vpd *
1338 pci_fetch_vpd_list(device_t dev)
1339 {
1340 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1341 	pcicfgregs *cfg = &dinfo->cfg;
1342 
1343 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1344 		pci_read_vpd(device_get_parent(device_get_parent(dev)), cfg);
1345 	return (&cfg->vpd);
1346 }
1347 
1348 /*
1349  * Find the requested HyperTransport capability and return the offset
1350  * in configuration space via the pointer provided.  The function
1351  * returns 0 on success and an error code otherwise.
1352  */
1353 int
1354 pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1355 {
1356 	int ptr, error;
1357 	uint16_t val;
1358 
1359 	error = pci_find_cap(child, PCIY_HT, &ptr);
1360 	if (error)
1361 		return (error);
1362 
1363 	/*
1364 	 * Traverse the capabilities list checking each HT capability
1365 	 * to see if it matches the requested HT capability.
1366 	 */
1367 	while (ptr != 0) {
1368 		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1369 		if (capability == PCIM_HTCAP_SLAVE ||
1370 		    capability == PCIM_HTCAP_HOST)
1371 			val &= 0xe000;
1372 		else
1373 			val &= PCIM_HTCMD_CAP_MASK;
1374 		if (val == capability) {
1375 			if (capreg != NULL)
1376 				*capreg = ptr;
1377 			return (0);
1378 		}
1379 
1380 		/* Skip to the next HT capability. */
1381 		while (ptr != 0) {
1382 			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1383 			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1384 			    PCIY_HT)
1385 				break;
1386 		}
1387 	}
1388 	return (ENOENT);
1389 }
1390 
1391 /*
1392  * Find the requested capability and return the offset in
1393  * configuration space via the pointer provided.  The function returns
1394  * 0 on success and an error code otherwise.
1395  */
1396 int
1397 pci_find_cap_method(device_t dev, device_t child, int capability,
1398     int *capreg)
1399 {
1400 	struct pci_devinfo *dinfo = device_get_ivars(child);
1401 	pcicfgregs *cfg = &dinfo->cfg;
1402 	u_int32_t status;
1403 	u_int8_t ptr;
1404 
1405 	/*
1406 	 * Check the CAP_LIST bit of the PCI status register first.
1407 	 */
1408 	status = pci_read_config(child, PCIR_STATUS, 2);
1409 	if (!(status & PCIM_STATUS_CAPPRESENT))
1410 		return (ENXIO);
1411 
1412 	/*
1413 	 * Determine the start pointer of the capabilities list.
1414 	 */
1415 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1416 	case PCIM_HDRTYPE_NORMAL:
1417 	case PCIM_HDRTYPE_BRIDGE:
1418 		ptr = PCIR_CAP_PTR;
1419 		break;
1420 	case PCIM_HDRTYPE_CARDBUS:
1421 		ptr = PCIR_CAP_PTR_2;
1422 		break;
1423 	default:
1424 		/* XXX: panic? */
1425 		return (ENXIO);		/* no extended capabilities support */
1426 	}
1427 	ptr = pci_read_config(child, ptr, 1);
1428 
1429 	/*
1430 	 * Traverse the capabilities list.
1431 	 */
1432 	while (ptr != 0) {
1433 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1434 			if (capreg != NULL)
1435 				*capreg = ptr;
1436 			return (0);
1437 		}
1438 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1439 	}
1440 
1441 	return (ENOENT);
1442 }
1443 
1444 /*
1445  * Find the requested extended capability and return the offset in
1446  * configuration space via the pointer provided.  The function returns
1447  * 0 on success and an error code otherwise.
1448  */
1449 int
1450 pci_find_extcap_method(device_t dev, device_t child, int capability,
1451     int *capreg)
1452 {
1453 	struct pci_devinfo *dinfo = device_get_ivars(child);
1454 	pcicfgregs *cfg = &dinfo->cfg;
1455 	uint32_t ecap;
1456 	uint16_t ptr;
1457 
1458 	/* Only supported for PCI-express devices. */
1459 	if (cfg->pcie.pcie_location == 0)
1460 		return (ENXIO);
1461 
1462 	ptr = PCIR_EXTCAP;
1463 	ecap = pci_read_config(child, ptr, 4);
1464 	if (ecap == 0xffffffff || ecap == 0)
1465 		return (ENOENT);
1466 	for (;;) {
1467 		if (PCI_EXTCAP_ID(ecap) == capability) {
1468 			if (capreg != NULL)
1469 				*capreg = ptr;
1470 			return (0);
1471 		}
1472 		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1473 		if (ptr == 0)
1474 			break;
1475 		ecap = pci_read_config(child, ptr, 4);
1476 	}
1477 
1478 	return (ENOENT);
1479 }
1480 
1481 /*
1482  * Support for MSI-X message interrupts.
1483  */
1484 void
1485 pci_enable_msix_method(device_t dev, device_t child, u_int index,
1486     uint64_t address, uint32_t data)
1487 {
1488 	struct pci_devinfo *dinfo = device_get_ivars(child);
1489 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1490 	uint32_t offset;
1491 
1492 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1493 	offset = msix->msix_table_offset + index * 16;
1494 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1495 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1496 	bus_write_4(msix->msix_table_res, offset + 8, data);
1497 
1498 	/* Enable MSI -> HT mapping. */
1499 	pci_ht_map_msi(child, address);
1500 }
1501 
1502 void
1503 pci_mask_msix(device_t dev, u_int index)
1504 {
1505 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1506 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1507 	uint32_t offset, val;
1508 
1509 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1510 	offset = msix->msix_table_offset + index * 16 + 12;
1511 	val = bus_read_4(msix->msix_table_res, offset);
1512 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1513 		val |= PCIM_MSIX_VCTRL_MASK;
1514 		bus_write_4(msix->msix_table_res, offset, val);
1515 	}
1516 }
1517 
1518 void
1519 pci_unmask_msix(device_t dev, u_int index)
1520 {
1521 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1522 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1523 	uint32_t offset, val;
1524 
1525 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1526 	offset = msix->msix_table_offset + index * 16 + 12;
1527 	val = bus_read_4(msix->msix_table_res, offset);
1528 	if (val & PCIM_MSIX_VCTRL_MASK) {
1529 		val &= ~PCIM_MSIX_VCTRL_MASK;
1530 		bus_write_4(msix->msix_table_res, offset, val);
1531 	}
1532 }
1533 
1534 int
1535 pci_pending_msix(device_t dev, u_int index)
1536 {
1537 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1538 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1539 	uint32_t offset, bit;
1540 
1541 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1542 	offset = msix->msix_pba_offset + (index / 32) * 4;
1543 	bit = 1 << index % 32;
1544 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1545 }
1546 
1547 /*
1548  * Restore MSI-X registers and table during resume.  If MSI-X is
1549  * enabled then walk the virtual table to restore the actual MSI-X
1550  * table.
1551  */
1552 static void
1553 pci_resume_msix(device_t dev)
1554 {
1555 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1556 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1557 	struct msix_table_entry *mte;
1558 	struct msix_vector *mv;
1559 	int i;
1560 
1561 	if (msix->msix_alloc > 0) {
1562 		/* First, mask all vectors. */
1563 		for (i = 0; i < msix->msix_msgnum; i++)
1564 			pci_mask_msix(dev, i);
1565 
1566 		/* Second, program any messages with at least one handler. */
1567 		for (i = 0; i < msix->msix_table_len; i++) {
1568 			mte = &msix->msix_table[i];
1569 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1570 				continue;
1571 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1572 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1573 			pci_unmask_msix(dev, i);
1574 		}
1575 	}
1576 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1577 	    msix->msix_ctrl, 2);
1578 }
1579 
1580 /*
1581  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1582  * returned in *count.  After this function returns, each message will be
1583  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1584  */
1585 int
1586 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1587 {
1588 	struct pci_devinfo *dinfo = device_get_ivars(child);
1589 	pcicfgregs *cfg = &dinfo->cfg;
1590 	struct resource_list_entry *rle;
1591 	int actual, error, i, irq, max;
1592 
1593 	/* Don't let count == 0 get us into trouble. */
1594 	if (*count == 0)
1595 		return (EINVAL);
1596 
1597 	/* If rid 0 is allocated, then fail. */
1598 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1599 	if (rle != NULL && rle->res != NULL)
1600 		return (ENXIO);
1601 
1602 	/* Already have allocated messages? */
1603 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1604 		return (ENXIO);
1605 
1606 	/* If MSI-X is blacklisted for this system, fail. */
1607 	if (pci_msix_blacklisted())
1608 		return (ENXIO);
1609 
1610 	/* MSI-X capability present? */
1611 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1612 		return (ENODEV);
1613 
1614 	/* Make sure the appropriate BARs are mapped. */
1615 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1616 	    cfg->msix.msix_table_bar);
1617 	if (rle == NULL || rle->res == NULL ||
1618 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1619 		return (ENXIO);
1620 	cfg->msix.msix_table_res = rle->res;
1621 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1622 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1623 		    cfg->msix.msix_pba_bar);
1624 		if (rle == NULL || rle->res == NULL ||
1625 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1626 			return (ENXIO);
1627 	}
1628 	cfg->msix.msix_pba_res = rle->res;
1629 
1630 	if (bootverbose)
1631 		device_printf(child,
1632 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1633 		    *count, cfg->msix.msix_msgnum);
1634 	max = min(*count, cfg->msix.msix_msgnum);
1635 	for (i = 0; i < max; i++) {
1636 		/* Allocate a message. */
1637 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1638 		if (error) {
1639 			if (i == 0)
1640 				return (error);
1641 			break;
1642 		}
1643 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1644 		    irq, 1);
1645 	}
1646 	actual = i;
1647 
1648 	if (bootverbose) {
1649 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1650 		if (actual == 1)
1651 			device_printf(child, "using IRQ %ju for MSI-X\n",
1652 			    rle->start);
1653 		else {
1654 			int run;
1655 
1656 			/*
1657 			 * Be fancy and try to print contiguous runs of
1658 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1659 			 * 'run' is true if we are in a range.
1660 			 */
1661 			device_printf(child, "using IRQs %ju", rle->start);
1662 			irq = rle->start;
1663 			run = 0;
1664 			for (i = 1; i < actual; i++) {
1665 				rle = resource_list_find(&dinfo->resources,
1666 				    SYS_RES_IRQ, i + 1);
1667 
1668 				/* Still in a run? */
1669 				if (rle->start == irq + 1) {
1670 					run = 1;
1671 					irq++;
1672 					continue;
1673 				}
1674 
1675 				/* Finish previous range. */
1676 				if (run) {
1677 					printf("-%d", irq);
1678 					run = 0;
1679 				}
1680 
1681 				/* Start new range. */
1682 				printf(",%ju", rle->start);
1683 				irq = rle->start;
1684 			}
1685 
1686 			/* Unfinished range? */
1687 			if (run)
1688 				printf("-%d", irq);
1689 			printf(" for MSI-X\n");
1690 		}
1691 	}
1692 
1693 	/* Mask all vectors. */
1694 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1695 		pci_mask_msix(child, i);
1696 
1697 	/* Allocate and initialize vector data and virtual table. */
1698 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1699 	    M_DEVBUF, M_WAITOK | M_ZERO);
1700 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1701 	    M_DEVBUF, M_WAITOK | M_ZERO);
1702 	for (i = 0; i < actual; i++) {
1703 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1704 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1705 		cfg->msix.msix_table[i].mte_vector = i + 1;
1706 	}
1707 
1708 	/* Update control register to enable MSI-X. */
1709 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1710 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1711 	    cfg->msix.msix_ctrl, 2);
1712 
1713 	/* Update counts of alloc'd messages. */
1714 	cfg->msix.msix_alloc = actual;
1715 	cfg->msix.msix_table_len = actual;
1716 	*count = actual;
1717 	return (0);
1718 }
1719 
1720 /*
1721  * By default, pci_alloc_msix() will assign the allocated IRQ
1722  * resources consecutively to the first N messages in the MSI-X table.
1723  * However, device drivers may want to use different layouts if they
1724  * either receive fewer messages than they asked for, or they wish to
1725  * populate the MSI-X table sparsely.  This method allows the driver
1726  * to specify what layout it wants.  It must be called after a
1727  * successful pci_alloc_msix() but before any of the associated
1728  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1729  *
1730  * The 'vectors' array contains 'count' message vectors.  The array
1731  * maps directly to the MSI-X table in that index 0 in the array
1732  * specifies the vector for the first message in the MSI-X table, etc.
1733  * The vector value in each array index can either be 0 to indicate
1734  * that no vector should be assigned to a message slot, or it can be a
1735  * number from 1 to N (where N is the count returned from a
1736  * succcessful call to pci_alloc_msix()) to indicate which message
1737  * vector (IRQ) to be used for the corresponding message.
1738  *
1739  * On successful return, each message with a non-zero vector will have
1740  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1741  * 1.  Additionally, if any of the IRQs allocated via the previous
1742  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1743  * will be freed back to the system automatically.
1744  *
1745  * For example, suppose a driver has a MSI-X table with 6 messages and
1746  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1747  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1748  * C.  After the call to pci_alloc_msix(), the device will be setup to
1749  * have an MSI-X table of ABC--- (where - means no vector assigned).
1750  * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1751  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1752  * be freed back to the system.  This device will also have valid
1753  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1754  *
1755  * In any case, the SYS_RES_IRQ rid X will always map to the message
1756  * at MSI-X table index X - 1 and will only be valid if a vector is
1757  * assigned to that table entry.
1758  */
1759 int
1760 pci_remap_msix_method(device_t dev, device_t child, int count,
1761     const u_int *vectors)
1762 {
1763 	struct pci_devinfo *dinfo = device_get_ivars(child);
1764 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1765 	struct resource_list_entry *rle;
1766 	int i, irq, j, *used;
1767 
1768 	/*
1769 	 * Have to have at least one message in the table but the
1770 	 * table can't be bigger than the actual MSI-X table in the
1771 	 * device.
1772 	 */
1773 	if (count == 0 || count > msix->msix_msgnum)
1774 		return (EINVAL);
1775 
1776 	/* Sanity check the vectors. */
1777 	for (i = 0; i < count; i++)
1778 		if (vectors[i] > msix->msix_alloc)
1779 			return (EINVAL);
1780 
1781 	/*
1782 	 * Make sure there aren't any holes in the vectors to be used.
1783 	 * It's a big pain to support it, and it doesn't really make
1784 	 * sense anyway.  Also, at least one vector must be used.
1785 	 */
1786 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1787 	    M_ZERO);
1788 	for (i = 0; i < count; i++)
1789 		if (vectors[i] != 0)
1790 			used[vectors[i] - 1] = 1;
1791 	for (i = 0; i < msix->msix_alloc - 1; i++)
1792 		if (used[i] == 0 && used[i + 1] == 1) {
1793 			free(used, M_DEVBUF);
1794 			return (EINVAL);
1795 		}
1796 	if (used[0] != 1) {
1797 		free(used, M_DEVBUF);
1798 		return (EINVAL);
1799 	}
1800 
1801 	/* Make sure none of the resources are allocated. */
1802 	for (i = 0; i < msix->msix_table_len; i++) {
1803 		if (msix->msix_table[i].mte_vector == 0)
1804 			continue;
1805 		if (msix->msix_table[i].mte_handlers > 0) {
1806 			free(used, M_DEVBUF);
1807 			return (EBUSY);
1808 		}
1809 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1810 		KASSERT(rle != NULL, ("missing resource"));
1811 		if (rle->res != NULL) {
1812 			free(used, M_DEVBUF);
1813 			return (EBUSY);
1814 		}
1815 	}
1816 
1817 	/* Free the existing resource list entries. */
1818 	for (i = 0; i < msix->msix_table_len; i++) {
1819 		if (msix->msix_table[i].mte_vector == 0)
1820 			continue;
1821 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1822 	}
1823 
1824 	/*
1825 	 * Build the new virtual table keeping track of which vectors are
1826 	 * used.
1827 	 */
1828 	free(msix->msix_table, M_DEVBUF);
1829 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1830 	    M_DEVBUF, M_WAITOK | M_ZERO);
1831 	for (i = 0; i < count; i++)
1832 		msix->msix_table[i].mte_vector = vectors[i];
1833 	msix->msix_table_len = count;
1834 
1835 	/* Free any unused IRQs and resize the vectors array if necessary. */
1836 	j = msix->msix_alloc - 1;
1837 	if (used[j] == 0) {
1838 		struct msix_vector *vec;
1839 
1840 		while (used[j] == 0) {
1841 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1842 			    msix->msix_vectors[j].mv_irq);
1843 			j--;
1844 		}
1845 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1846 		    M_WAITOK);
1847 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1848 		    (j + 1));
1849 		free(msix->msix_vectors, M_DEVBUF);
1850 		msix->msix_vectors = vec;
1851 		msix->msix_alloc = j + 1;
1852 	}
1853 	free(used, M_DEVBUF);
1854 
1855 	/* Map the IRQs onto the rids. */
1856 	for (i = 0; i < count; i++) {
1857 		if (vectors[i] == 0)
1858 			continue;
1859 		irq = msix->msix_vectors[vectors[i] - 1].mv_irq;
1860 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1861 		    irq, 1);
1862 	}
1863 
1864 	if (bootverbose) {
1865 		device_printf(child, "Remapped MSI-X IRQs as: ");
1866 		for (i = 0; i < count; i++) {
1867 			if (i != 0)
1868 				printf(", ");
1869 			if (vectors[i] == 0)
1870 				printf("---");
1871 			else
1872 				printf("%d",
1873 				    msix->msix_vectors[vectors[i] - 1].mv_irq);
1874 		}
1875 		printf("\n");
1876 	}
1877 
1878 	return (0);
1879 }
1880 
1881 static int
1882 pci_release_msix(device_t dev, device_t child)
1883 {
1884 	struct pci_devinfo *dinfo = device_get_ivars(child);
1885 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1886 	struct resource_list_entry *rle;
1887 	int i;
1888 
1889 	/* Do we have any messages to release? */
1890 	if (msix->msix_alloc == 0)
1891 		return (ENODEV);
1892 
1893 	/* Make sure none of the resources are allocated. */
1894 	for (i = 0; i < msix->msix_table_len; i++) {
1895 		if (msix->msix_table[i].mte_vector == 0)
1896 			continue;
1897 		if (msix->msix_table[i].mte_handlers > 0)
1898 			return (EBUSY);
1899 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1900 		KASSERT(rle != NULL, ("missing resource"));
1901 		if (rle->res != NULL)
1902 			return (EBUSY);
1903 	}
1904 
1905 	/* Update control register to disable MSI-X. */
1906 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1907 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1908 	    msix->msix_ctrl, 2);
1909 
1910 	/* Free the resource list entries. */
1911 	for (i = 0; i < msix->msix_table_len; i++) {
1912 		if (msix->msix_table[i].mte_vector == 0)
1913 			continue;
1914 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1915 	}
1916 	free(msix->msix_table, M_DEVBUF);
1917 	msix->msix_table_len = 0;
1918 
1919 	/* Release the IRQs. */
1920 	for (i = 0; i < msix->msix_alloc; i++)
1921 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1922 		    msix->msix_vectors[i].mv_irq);
1923 	free(msix->msix_vectors, M_DEVBUF);
1924 	msix->msix_alloc = 0;
1925 	return (0);
1926 }
1927 
1928 /*
1929  * Return the max supported MSI-X messages this device supports.
1930  * Basically, assuming the MD code can alloc messages, this function
1931  * should return the maximum value that pci_alloc_msix() can return.
1932  * Thus, it is subject to the tunables, etc.
1933  */
1934 int
1935 pci_msix_count_method(device_t dev, device_t child)
1936 {
1937 	struct pci_devinfo *dinfo = device_get_ivars(child);
1938 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1939 
1940 	if (pci_do_msix && msix->msix_location != 0)
1941 		return (msix->msix_msgnum);
1942 	return (0);
1943 }
1944 
1945 int
1946 pci_msix_pba_bar_method(device_t dev, device_t child)
1947 {
1948 	struct pci_devinfo *dinfo = device_get_ivars(child);
1949 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1950 
1951 	if (pci_do_msix && msix->msix_location != 0)
1952 		return (msix->msix_pba_bar);
1953 	return (-1);
1954 }
1955 
1956 int
1957 pci_msix_table_bar_method(device_t dev, device_t child)
1958 {
1959 	struct pci_devinfo *dinfo = device_get_ivars(child);
1960 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1961 
1962 	if (pci_do_msix && msix->msix_location != 0)
1963 		return (msix->msix_table_bar);
1964 	return (-1);
1965 }
1966 
1967 /*
1968  * HyperTransport MSI mapping control
1969  */
1970 void
1971 pci_ht_map_msi(device_t dev, uint64_t addr)
1972 {
1973 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1974 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1975 
1976 	if (!ht->ht_msimap)
1977 		return;
1978 
1979 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1980 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1981 		/* Enable MSI -> HT mapping. */
1982 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1983 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1984 		    ht->ht_msictrl, 2);
1985 	}
1986 
1987 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1988 		/* Disable MSI -> HT mapping. */
1989 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1990 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1991 		    ht->ht_msictrl, 2);
1992 	}
1993 }
1994 
1995 int
1996 pci_get_max_payload(device_t dev)
1997 {
1998 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1999 	int cap;
2000 	uint16_t val;
2001 
2002 	cap = dinfo->cfg.pcie.pcie_location;
2003 	if (cap == 0)
2004 		return (0);
2005 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2006 	val &= PCIEM_CTL_MAX_PAYLOAD;
2007 	val >>= 5;
2008 	return (1 << (val + 7));
2009 }
2010 
2011 int
2012 pci_get_max_read_req(device_t dev)
2013 {
2014 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2015 	int cap;
2016 	uint16_t val;
2017 
2018 	cap = dinfo->cfg.pcie.pcie_location;
2019 	if (cap == 0)
2020 		return (0);
2021 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2022 	val &= PCIEM_CTL_MAX_READ_REQUEST;
2023 	val >>= 12;
2024 	return (1 << (val + 7));
2025 }
2026 
2027 int
2028 pci_set_max_read_req(device_t dev, int size)
2029 {
2030 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2031 	int cap;
2032 	uint16_t val;
2033 
2034 	cap = dinfo->cfg.pcie.pcie_location;
2035 	if (cap == 0)
2036 		return (0);
2037 	if (size < 128)
2038 		size = 128;
2039 	if (size > 4096)
2040 		size = 4096;
2041 	size = (1 << (fls(size) - 1));
2042 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2043 	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
2044 	val |= (fls(size) - 8) << 12;
2045 	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
2046 	return (size);
2047 }
2048 
2049 uint32_t
2050 pcie_read_config(device_t dev, int reg, int width)
2051 {
2052 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2053 	int cap;
2054 
2055 	cap = dinfo->cfg.pcie.pcie_location;
2056 	if (cap == 0) {
2057 		if (width == 2)
2058 			return (0xffff);
2059 		return (0xffffffff);
2060 	}
2061 
2062 	return (pci_read_config(dev, cap + reg, width));
2063 }
2064 
2065 void
2066 pcie_write_config(device_t dev, int reg, uint32_t value, int width)
2067 {
2068 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2069 	int cap;
2070 
2071 	cap = dinfo->cfg.pcie.pcie_location;
2072 	if (cap == 0)
2073 		return;
2074 	pci_write_config(dev, cap + reg, value, width);
2075 }
2076 
2077 /*
2078  * Adjusts a PCI-e capability register by clearing the bits in mask
2079  * and setting the bits in (value & mask).  Bits not set in mask are
2080  * not adjusted.
2081  *
2082  * Returns the old value on success or all ones on failure.
2083  */
2084 uint32_t
2085 pcie_adjust_config(device_t dev, int reg, uint32_t mask, uint32_t value,
2086     int width)
2087 {
2088 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2089 	uint32_t old, new;
2090 	int cap;
2091 
2092 	cap = dinfo->cfg.pcie.pcie_location;
2093 	if (cap == 0) {
2094 		if (width == 2)
2095 			return (0xffff);
2096 		return (0xffffffff);
2097 	}
2098 
2099 	old = pci_read_config(dev, cap + reg, width);
2100 	new = old & ~mask;
2101 	new |= (value & mask);
2102 	pci_write_config(dev, cap + reg, new, width);
2103 	return (old);
2104 }
2105 
2106 /*
2107  * Support for MSI message signalled interrupts.
2108  */
2109 void
2110 pci_enable_msi_method(device_t dev, device_t child, uint64_t address,
2111     uint16_t data)
2112 {
2113 	struct pci_devinfo *dinfo = device_get_ivars(child);
2114 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2115 
2116 	/* Write data and address values. */
2117 	pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR,
2118 	    address & 0xffffffff, 4);
2119 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2120 		pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR_HIGH,
2121 		    address >> 32, 4);
2122 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA_64BIT,
2123 		    data, 2);
2124 	} else
2125 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA, data,
2126 		    2);
2127 
2128 	/* Enable MSI in the control register. */
2129 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
2130 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2131 	    msi->msi_ctrl, 2);
2132 
2133 	/* Enable MSI -> HT mapping. */
2134 	pci_ht_map_msi(child, address);
2135 }
2136 
2137 void
2138 pci_disable_msi_method(device_t dev, device_t child)
2139 {
2140 	struct pci_devinfo *dinfo = device_get_ivars(child);
2141 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2142 
2143 	/* Disable MSI -> HT mapping. */
2144 	pci_ht_map_msi(child, 0);
2145 
2146 	/* Disable MSI in the control register. */
2147 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
2148 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2149 	    msi->msi_ctrl, 2);
2150 }
2151 
2152 /*
2153  * Restore MSI registers during resume.  If MSI is enabled then
2154  * restore the data and address registers in addition to the control
2155  * register.
2156  */
2157 static void
2158 pci_resume_msi(device_t dev)
2159 {
2160 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2161 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2162 	uint64_t address;
2163 	uint16_t data;
2164 
2165 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
2166 		address = msi->msi_addr;
2167 		data = msi->msi_data;
2168 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
2169 		    address & 0xffffffff, 4);
2170 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2171 			pci_write_config(dev, msi->msi_location +
2172 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
2173 			pci_write_config(dev, msi->msi_location +
2174 			    PCIR_MSI_DATA_64BIT, data, 2);
2175 		} else
2176 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
2177 			    data, 2);
2178 	}
2179 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
2180 	    2);
2181 }
2182 
2183 static int
2184 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
2185 {
2186 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2187 	pcicfgregs *cfg = &dinfo->cfg;
2188 	struct resource_list_entry *rle;
2189 	struct msix_table_entry *mte;
2190 	struct msix_vector *mv;
2191 	uint64_t addr;
2192 	uint32_t data;
2193 	int error, i, j;
2194 
2195 	/*
2196 	 * Handle MSI first.  We try to find this IRQ among our list
2197 	 * of MSI IRQs.  If we find it, we request updated address and
2198 	 * data registers and apply the results.
2199 	 */
2200 	if (cfg->msi.msi_alloc > 0) {
2201 
2202 		/* If we don't have any active handlers, nothing to do. */
2203 		if (cfg->msi.msi_handlers == 0)
2204 			return (0);
2205 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
2206 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
2207 			    i + 1);
2208 			if (rle->start == irq) {
2209 				error = PCIB_MAP_MSI(device_get_parent(bus),
2210 				    dev, irq, &addr, &data);
2211 				if (error)
2212 					return (error);
2213 				pci_disable_msi(dev);
2214 				dinfo->cfg.msi.msi_addr = addr;
2215 				dinfo->cfg.msi.msi_data = data;
2216 				pci_enable_msi(dev, addr, data);
2217 				return (0);
2218 			}
2219 		}
2220 		return (ENOENT);
2221 	}
2222 
2223 	/*
2224 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
2225 	 * we request the updated mapping info.  If that works, we go
2226 	 * through all the slots that use this IRQ and update them.
2227 	 */
2228 	if (cfg->msix.msix_alloc > 0) {
2229 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
2230 			mv = &cfg->msix.msix_vectors[i];
2231 			if (mv->mv_irq == irq) {
2232 				error = PCIB_MAP_MSI(device_get_parent(bus),
2233 				    dev, irq, &addr, &data);
2234 				if (error)
2235 					return (error);
2236 				mv->mv_address = addr;
2237 				mv->mv_data = data;
2238 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
2239 					mte = &cfg->msix.msix_table[j];
2240 					if (mte->mte_vector != i + 1)
2241 						continue;
2242 					if (mte->mte_handlers == 0)
2243 						continue;
2244 					pci_mask_msix(dev, j);
2245 					pci_enable_msix(dev, j, addr, data);
2246 					pci_unmask_msix(dev, j);
2247 				}
2248 			}
2249 		}
2250 		return (ENOENT);
2251 	}
2252 
2253 	return (ENOENT);
2254 }
2255 
2256 /*
2257  * Returns true if the specified device is blacklisted because MSI
2258  * doesn't work.
2259  */
2260 int
2261 pci_msi_device_blacklisted(device_t dev)
2262 {
2263 
2264 	if (!pci_honor_msi_blacklist)
2265 		return (0);
2266 
2267 	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2268 }
2269 
2270 /*
2271  * Determine if MSI is blacklisted globally on this system.  Currently,
2272  * we just check for blacklisted chipsets as represented by the
2273  * host-PCI bridge at device 0:0:0.  In the future, it may become
2274  * necessary to check other system attributes, such as the kenv values
2275  * that give the motherboard manufacturer and model number.
2276  */
2277 static int
2278 pci_msi_blacklisted(void)
2279 {
2280 	device_t dev;
2281 
2282 	if (!pci_honor_msi_blacklist)
2283 		return (0);
2284 
2285 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2286 	if (!(pcie_chipset || pcix_chipset)) {
2287 		if (vm_guest != VM_GUEST_NO) {
2288 			/*
2289 			 * Whitelist older chipsets in virtual
2290 			 * machines known to support MSI.
2291 			 */
2292 			dev = pci_find_bsf(0, 0, 0);
2293 			if (dev != NULL)
2294 				return (!pci_has_quirk(pci_get_devid(dev),
2295 					PCI_QUIRK_ENABLE_MSI_VM));
2296 		}
2297 		return (1);
2298 	}
2299 
2300 	dev = pci_find_bsf(0, 0, 0);
2301 	if (dev != NULL)
2302 		return (pci_msi_device_blacklisted(dev));
2303 	return (0);
2304 }
2305 
2306 /*
2307  * Returns true if the specified device is blacklisted because MSI-X
2308  * doesn't work.  Note that this assumes that if MSI doesn't work,
2309  * MSI-X doesn't either.
2310  */
2311 int
2312 pci_msix_device_blacklisted(device_t dev)
2313 {
2314 
2315 	if (!pci_honor_msi_blacklist)
2316 		return (0);
2317 
2318 	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2319 		return (1);
2320 
2321 	return (pci_msi_device_blacklisted(dev));
2322 }
2323 
2324 /*
2325  * Determine if MSI-X is blacklisted globally on this system.  If MSI
2326  * is blacklisted, assume that MSI-X is as well.  Check for additional
2327  * chipsets where MSI works but MSI-X does not.
2328  */
2329 static int
2330 pci_msix_blacklisted(void)
2331 {
2332 	device_t dev;
2333 
2334 	if (!pci_honor_msi_blacklist)
2335 		return (0);
2336 
2337 	dev = pci_find_bsf(0, 0, 0);
2338 	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2339 	    PCI_QUIRK_DISABLE_MSIX))
2340 		return (1);
2341 
2342 	return (pci_msi_blacklisted());
2343 }
2344 
2345 /*
2346  * Attempt to allocate *count MSI messages.  The actual number allocated is
2347  * returned in *count.  After this function returns, each message will be
2348  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2349  */
2350 int
2351 pci_alloc_msi_method(device_t dev, device_t child, int *count)
2352 {
2353 	struct pci_devinfo *dinfo = device_get_ivars(child);
2354 	pcicfgregs *cfg = &dinfo->cfg;
2355 	struct resource_list_entry *rle;
2356 	int actual, error, i, irqs[32];
2357 	uint16_t ctrl;
2358 
2359 	/* Don't let count == 0 get us into trouble. */
2360 	if (*count == 0)
2361 		return (EINVAL);
2362 
2363 	/* If rid 0 is allocated, then fail. */
2364 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2365 	if (rle != NULL && rle->res != NULL)
2366 		return (ENXIO);
2367 
2368 	/* Already have allocated messages? */
2369 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2370 		return (ENXIO);
2371 
2372 	/* If MSI is blacklisted for this system, fail. */
2373 	if (pci_msi_blacklisted())
2374 		return (ENXIO);
2375 
2376 	/* MSI capability present? */
2377 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2378 		return (ENODEV);
2379 
2380 	if (bootverbose)
2381 		device_printf(child,
2382 		    "attempting to allocate %d MSI vectors (%d supported)\n",
2383 		    *count, cfg->msi.msi_msgnum);
2384 
2385 	/* Don't ask for more than the device supports. */
2386 	actual = min(*count, cfg->msi.msi_msgnum);
2387 
2388 	/* Don't ask for more than 32 messages. */
2389 	actual = min(actual, 32);
2390 
2391 	/* MSI requires power of 2 number of messages. */
2392 	if (!powerof2(actual))
2393 		return (EINVAL);
2394 
2395 	for (;;) {
2396 		/* Try to allocate N messages. */
2397 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2398 		    actual, irqs);
2399 		if (error == 0)
2400 			break;
2401 		if (actual == 1)
2402 			return (error);
2403 
2404 		/* Try N / 2. */
2405 		actual >>= 1;
2406 	}
2407 
2408 	/*
2409 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2410 	 * resources in the irqs[] array, so add new resources
2411 	 * starting at rid 1.
2412 	 */
2413 	for (i = 0; i < actual; i++)
2414 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2415 		    irqs[i], irqs[i], 1);
2416 
2417 	if (bootverbose) {
2418 		if (actual == 1)
2419 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2420 		else {
2421 			int run;
2422 
2423 			/*
2424 			 * Be fancy and try to print contiguous runs
2425 			 * of IRQ values as ranges.  'run' is true if
2426 			 * we are in a range.
2427 			 */
2428 			device_printf(child, "using IRQs %d", irqs[0]);
2429 			run = 0;
2430 			for (i = 1; i < actual; i++) {
2431 
2432 				/* Still in a run? */
2433 				if (irqs[i] == irqs[i - 1] + 1) {
2434 					run = 1;
2435 					continue;
2436 				}
2437 
2438 				/* Finish previous range. */
2439 				if (run) {
2440 					printf("-%d", irqs[i - 1]);
2441 					run = 0;
2442 				}
2443 
2444 				/* Start new range. */
2445 				printf(",%d", irqs[i]);
2446 			}
2447 
2448 			/* Unfinished range? */
2449 			if (run)
2450 				printf("-%d", irqs[actual - 1]);
2451 			printf(" for MSI\n");
2452 		}
2453 	}
2454 
2455 	/* Update control register with actual count. */
2456 	ctrl = cfg->msi.msi_ctrl;
2457 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2458 	ctrl |= (ffs(actual) - 1) << 4;
2459 	cfg->msi.msi_ctrl = ctrl;
2460 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2461 
2462 	/* Update counts of alloc'd messages. */
2463 	cfg->msi.msi_alloc = actual;
2464 	cfg->msi.msi_handlers = 0;
2465 	*count = actual;
2466 	return (0);
2467 }
2468 
2469 /* Release the MSI messages associated with this device. */
2470 int
2471 pci_release_msi_method(device_t dev, device_t child)
2472 {
2473 	struct pci_devinfo *dinfo = device_get_ivars(child);
2474 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2475 	struct resource_list_entry *rle;
2476 	int error, i, irqs[32];
2477 
2478 	/* Try MSI-X first. */
2479 	error = pci_release_msix(dev, child);
2480 	if (error != ENODEV)
2481 		return (error);
2482 
2483 	/* Do we have any messages to release? */
2484 	if (msi->msi_alloc == 0)
2485 		return (ENODEV);
2486 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2487 
2488 	/* Make sure none of the resources are allocated. */
2489 	if (msi->msi_handlers > 0)
2490 		return (EBUSY);
2491 	for (i = 0; i < msi->msi_alloc; i++) {
2492 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2493 		KASSERT(rle != NULL, ("missing MSI resource"));
2494 		if (rle->res != NULL)
2495 			return (EBUSY);
2496 		irqs[i] = rle->start;
2497 	}
2498 
2499 	/* Update control register with 0 count. */
2500 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2501 	    ("%s: MSI still enabled", __func__));
2502 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2503 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2504 	    msi->msi_ctrl, 2);
2505 
2506 	/* Release the messages. */
2507 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2508 	for (i = 0; i < msi->msi_alloc; i++)
2509 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2510 
2511 	/* Update alloc count. */
2512 	msi->msi_alloc = 0;
2513 	msi->msi_addr = 0;
2514 	msi->msi_data = 0;
2515 	return (0);
2516 }
2517 
2518 /*
2519  * Return the max supported MSI messages this device supports.
2520  * Basically, assuming the MD code can alloc messages, this function
2521  * should return the maximum value that pci_alloc_msi() can return.
2522  * Thus, it is subject to the tunables, etc.
2523  */
2524 int
2525 pci_msi_count_method(device_t dev, device_t child)
2526 {
2527 	struct pci_devinfo *dinfo = device_get_ivars(child);
2528 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2529 
2530 	if (pci_do_msi && msi->msi_location != 0)
2531 		return (msi->msi_msgnum);
2532 	return (0);
2533 }
2534 
2535 /* free pcicfgregs structure and all depending data structures */
2536 
2537 int
2538 pci_freecfg(struct pci_devinfo *dinfo)
2539 {
2540 	struct devlist *devlist_head;
2541 	struct pci_map *pm, *next;
2542 	int i;
2543 
2544 	devlist_head = &pci_devq;
2545 
2546 	if (dinfo->cfg.vpd.vpd_reg) {
2547 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2548 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2549 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2550 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2551 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2552 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2553 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2554 	}
2555 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2556 		free(pm, M_DEVBUF);
2557 	}
2558 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2559 	free(dinfo, M_DEVBUF);
2560 
2561 	/* increment the generation count */
2562 	pci_generation++;
2563 
2564 	/* we're losing one device */
2565 	pci_numdevs--;
2566 	return (0);
2567 }
2568 
2569 /*
2570  * PCI power manangement
2571  */
2572 int
2573 pci_set_powerstate_method(device_t dev, device_t child, int state)
2574 {
2575 	struct pci_devinfo *dinfo = device_get_ivars(child);
2576 	pcicfgregs *cfg = &dinfo->cfg;
2577 	uint16_t status;
2578 	int oldstate, highest, delay;
2579 
2580 	if (cfg->pp.pp_cap == 0)
2581 		return (EOPNOTSUPP);
2582 
2583 	/*
2584 	 * Optimize a no state change request away.  While it would be OK to
2585 	 * write to the hardware in theory, some devices have shown odd
2586 	 * behavior when going from D3 -> D3.
2587 	 */
2588 	oldstate = pci_get_powerstate(child);
2589 	if (oldstate == state)
2590 		return (0);
2591 
2592 	/*
2593 	 * The PCI power management specification states that after a state
2594 	 * transition between PCI power states, system software must
2595 	 * guarantee a minimal delay before the function accesses the device.
2596 	 * Compute the worst case delay that we need to guarantee before we
2597 	 * access the device.  Many devices will be responsive much more
2598 	 * quickly than this delay, but there are some that don't respond
2599 	 * instantly to state changes.  Transitions to/from D3 state require
2600 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2601 	 * is done below with DELAY rather than a sleeper function because
2602 	 * this function can be called from contexts where we cannot sleep.
2603 	 */
2604 	highest = (oldstate > state) ? oldstate : state;
2605 	if (highest == PCI_POWERSTATE_D3)
2606 	    delay = 10000;
2607 	else if (highest == PCI_POWERSTATE_D2)
2608 	    delay = 200;
2609 	else
2610 	    delay = 0;
2611 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2612 	    & ~PCIM_PSTAT_DMASK;
2613 	switch (state) {
2614 	case PCI_POWERSTATE_D0:
2615 		status |= PCIM_PSTAT_D0;
2616 		break;
2617 	case PCI_POWERSTATE_D1:
2618 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2619 			return (EOPNOTSUPP);
2620 		status |= PCIM_PSTAT_D1;
2621 		break;
2622 	case PCI_POWERSTATE_D2:
2623 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2624 			return (EOPNOTSUPP);
2625 		status |= PCIM_PSTAT_D2;
2626 		break;
2627 	case PCI_POWERSTATE_D3:
2628 		status |= PCIM_PSTAT_D3;
2629 		break;
2630 	default:
2631 		return (EINVAL);
2632 	}
2633 
2634 	if (bootverbose)
2635 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2636 		    state);
2637 
2638 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2639 	if (delay)
2640 		DELAY(delay);
2641 	return (0);
2642 }
2643 
2644 int
2645 pci_get_powerstate_method(device_t dev, device_t child)
2646 {
2647 	struct pci_devinfo *dinfo = device_get_ivars(child);
2648 	pcicfgregs *cfg = &dinfo->cfg;
2649 	uint16_t status;
2650 	int result;
2651 
2652 	if (cfg->pp.pp_cap != 0) {
2653 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2654 		switch (status & PCIM_PSTAT_DMASK) {
2655 		case PCIM_PSTAT_D0:
2656 			result = PCI_POWERSTATE_D0;
2657 			break;
2658 		case PCIM_PSTAT_D1:
2659 			result = PCI_POWERSTATE_D1;
2660 			break;
2661 		case PCIM_PSTAT_D2:
2662 			result = PCI_POWERSTATE_D2;
2663 			break;
2664 		case PCIM_PSTAT_D3:
2665 			result = PCI_POWERSTATE_D3;
2666 			break;
2667 		default:
2668 			result = PCI_POWERSTATE_UNKNOWN;
2669 			break;
2670 		}
2671 	} else {
2672 		/* No support, device is always at D0 */
2673 		result = PCI_POWERSTATE_D0;
2674 	}
2675 	return (result);
2676 }
2677 
2678 /*
2679  * Some convenience functions for PCI device drivers.
2680  */
2681 
2682 static __inline void
2683 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2684 {
2685 	uint16_t	command;
2686 
2687 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2688 	command |= bit;
2689 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2690 }
2691 
2692 static __inline void
2693 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2694 {
2695 	uint16_t	command;
2696 
2697 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2698 	command &= ~bit;
2699 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2700 }
2701 
2702 int
2703 pci_enable_busmaster_method(device_t dev, device_t child)
2704 {
2705 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2706 	return (0);
2707 }
2708 
2709 int
2710 pci_disable_busmaster_method(device_t dev, device_t child)
2711 {
2712 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2713 	return (0);
2714 }
2715 
2716 int
2717 pci_enable_io_method(device_t dev, device_t child, int space)
2718 {
2719 	uint16_t bit;
2720 
2721 	switch(space) {
2722 	case SYS_RES_IOPORT:
2723 		bit = PCIM_CMD_PORTEN;
2724 		break;
2725 	case SYS_RES_MEMORY:
2726 		bit = PCIM_CMD_MEMEN;
2727 		break;
2728 	default:
2729 		return (EINVAL);
2730 	}
2731 	pci_set_command_bit(dev, child, bit);
2732 	return (0);
2733 }
2734 
2735 int
2736 pci_disable_io_method(device_t dev, device_t child, int space)
2737 {
2738 	uint16_t bit;
2739 
2740 	switch(space) {
2741 	case SYS_RES_IOPORT:
2742 		bit = PCIM_CMD_PORTEN;
2743 		break;
2744 	case SYS_RES_MEMORY:
2745 		bit = PCIM_CMD_MEMEN;
2746 		break;
2747 	default:
2748 		return (EINVAL);
2749 	}
2750 	pci_clear_command_bit(dev, child, bit);
2751 	return (0);
2752 }
2753 
2754 /*
2755  * New style pci driver.  Parent device is either a pci-host-bridge or a
2756  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2757  */
2758 
2759 void
2760 pci_print_verbose(struct pci_devinfo *dinfo)
2761 {
2762 
2763 	if (bootverbose) {
2764 		pcicfgregs *cfg = &dinfo->cfg;
2765 
2766 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2767 		    cfg->vendor, cfg->device, cfg->revid);
2768 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2769 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2770 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2771 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2772 		    cfg->mfdev);
2773 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2774 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2775 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2776 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2777 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2778 		if (cfg->intpin > 0)
2779 			printf("\tintpin=%c, irq=%d\n",
2780 			    cfg->intpin +'a' -1, cfg->intline);
2781 		if (cfg->pp.pp_cap) {
2782 			uint16_t status;
2783 
2784 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2785 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2786 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2787 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2788 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2789 			    status & PCIM_PSTAT_DMASK);
2790 		}
2791 		if (cfg->msi.msi_location) {
2792 			int ctrl;
2793 
2794 			ctrl = cfg->msi.msi_ctrl;
2795 			printf("\tMSI supports %d message%s%s%s\n",
2796 			    cfg->msi.msi_msgnum,
2797 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2798 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2799 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2800 		}
2801 		if (cfg->msix.msix_location) {
2802 			printf("\tMSI-X supports %d message%s ",
2803 			    cfg->msix.msix_msgnum,
2804 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2805 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2806 				printf("in map 0x%x\n",
2807 				    cfg->msix.msix_table_bar);
2808 			else
2809 				printf("in maps 0x%x and 0x%x\n",
2810 				    cfg->msix.msix_table_bar,
2811 				    cfg->msix.msix_pba_bar);
2812 		}
2813 	}
2814 }
2815 
2816 static int
2817 pci_porten(device_t dev)
2818 {
2819 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2820 }
2821 
2822 static int
2823 pci_memen(device_t dev)
2824 {
2825 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2826 }
2827 
2828 void
2829 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp,
2830     int *bar64)
2831 {
2832 	struct pci_devinfo *dinfo;
2833 	pci_addr_t map, testval;
2834 	int ln2range;
2835 	uint16_t cmd;
2836 
2837 	/*
2838 	 * The device ROM BAR is special.  It is always a 32-bit
2839 	 * memory BAR.  Bit 0 is special and should not be set when
2840 	 * sizing the BAR.
2841 	 */
2842 	dinfo = device_get_ivars(dev);
2843 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2844 		map = pci_read_config(dev, reg, 4);
2845 		pci_write_config(dev, reg, 0xfffffffe, 4);
2846 		testval = pci_read_config(dev, reg, 4);
2847 		pci_write_config(dev, reg, map, 4);
2848 		*mapp = map;
2849 		*testvalp = testval;
2850 		if (bar64 != NULL)
2851 			*bar64 = 0;
2852 		return;
2853 	}
2854 
2855 	map = pci_read_config(dev, reg, 4);
2856 	ln2range = pci_maprange(map);
2857 	if (ln2range == 64)
2858 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2859 
2860 	/*
2861 	 * Disable decoding via the command register before
2862 	 * determining the BAR's length since we will be placing it in
2863 	 * a weird state.
2864 	 */
2865 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2866 	pci_write_config(dev, PCIR_COMMAND,
2867 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2868 
2869 	/*
2870 	 * Determine the BAR's length by writing all 1's.  The bottom
2871 	 * log_2(size) bits of the BAR will stick as 0 when we read
2872 	 * the value back.
2873 	 */
2874 	pci_write_config(dev, reg, 0xffffffff, 4);
2875 	testval = pci_read_config(dev, reg, 4);
2876 	if (ln2range == 64) {
2877 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2878 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2879 	}
2880 
2881 	/*
2882 	 * Restore the original value of the BAR.  We may have reprogrammed
2883 	 * the BAR of the low-level console device and when booting verbose,
2884 	 * we need the console device addressable.
2885 	 */
2886 	pci_write_config(dev, reg, map, 4);
2887 	if (ln2range == 64)
2888 		pci_write_config(dev, reg + 4, map >> 32, 4);
2889 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2890 
2891 	*mapp = map;
2892 	*testvalp = testval;
2893 	if (bar64 != NULL)
2894 		*bar64 = (ln2range == 64);
2895 }
2896 
2897 static void
2898 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2899 {
2900 	struct pci_devinfo *dinfo;
2901 	int ln2range;
2902 
2903 	/* The device ROM BAR is always a 32-bit memory BAR. */
2904 	dinfo = device_get_ivars(dev);
2905 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2906 		ln2range = 32;
2907 	else
2908 		ln2range = pci_maprange(pm->pm_value);
2909 	pci_write_config(dev, pm->pm_reg, base, 4);
2910 	if (ln2range == 64)
2911 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2912 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2913 	if (ln2range == 64)
2914 		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2915 		    pm->pm_reg + 4, 4) << 32;
2916 }
2917 
2918 struct pci_map *
2919 pci_find_bar(device_t dev, int reg)
2920 {
2921 	struct pci_devinfo *dinfo;
2922 	struct pci_map *pm;
2923 
2924 	dinfo = device_get_ivars(dev);
2925 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2926 		if (pm->pm_reg == reg)
2927 			return (pm);
2928 	}
2929 	return (NULL);
2930 }
2931 
2932 int
2933 pci_bar_enabled(device_t dev, struct pci_map *pm)
2934 {
2935 	struct pci_devinfo *dinfo;
2936 	uint16_t cmd;
2937 
2938 	dinfo = device_get_ivars(dev);
2939 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2940 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2941 		return (0);
2942 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2943 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2944 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2945 	else
2946 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2947 }
2948 
2949 struct pci_map *
2950 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2951 {
2952 	struct pci_devinfo *dinfo;
2953 	struct pci_map *pm, *prev;
2954 
2955 	dinfo = device_get_ivars(dev);
2956 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2957 	pm->pm_reg = reg;
2958 	pm->pm_value = value;
2959 	pm->pm_size = size;
2960 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2961 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2962 		    reg));
2963 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2964 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2965 			break;
2966 	}
2967 	if (prev != NULL)
2968 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2969 	else
2970 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2971 	return (pm);
2972 }
2973 
2974 static void
2975 pci_restore_bars(device_t dev)
2976 {
2977 	struct pci_devinfo *dinfo;
2978 	struct pci_map *pm;
2979 	int ln2range;
2980 
2981 	dinfo = device_get_ivars(dev);
2982 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2983 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2984 			ln2range = 32;
2985 		else
2986 			ln2range = pci_maprange(pm->pm_value);
2987 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2988 		if (ln2range == 64)
2989 			pci_write_config(dev, pm->pm_reg + 4,
2990 			    pm->pm_value >> 32, 4);
2991 	}
2992 }
2993 
2994 /*
2995  * Add a resource based on a pci map register. Return 1 if the map
2996  * register is a 32bit map register or 2 if it is a 64bit register.
2997  */
2998 static int
2999 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
3000     int force, int prefetch)
3001 {
3002 	struct pci_map *pm;
3003 	pci_addr_t base, map, testval;
3004 	pci_addr_t start, end, count;
3005 	int barlen, basezero, flags, maprange, mapsize, type;
3006 	uint16_t cmd;
3007 	struct resource *res;
3008 
3009 	/*
3010 	 * The BAR may already exist if the device is a CardBus card
3011 	 * whose CIS is stored in this BAR.
3012 	 */
3013 	pm = pci_find_bar(dev, reg);
3014 	if (pm != NULL) {
3015 		maprange = pci_maprange(pm->pm_value);
3016 		barlen = maprange == 64 ? 2 : 1;
3017 		return (barlen);
3018 	}
3019 
3020 	pci_read_bar(dev, reg, &map, &testval, NULL);
3021 	if (PCI_BAR_MEM(map)) {
3022 		type = SYS_RES_MEMORY;
3023 		if (map & PCIM_BAR_MEM_PREFETCH)
3024 			prefetch = 1;
3025 	} else
3026 		type = SYS_RES_IOPORT;
3027 	mapsize = pci_mapsize(testval);
3028 	base = pci_mapbase(map);
3029 #ifdef __PCI_BAR_ZERO_VALID
3030 	basezero = 0;
3031 #else
3032 	basezero = base == 0;
3033 #endif
3034 	maprange = pci_maprange(map);
3035 	barlen = maprange == 64 ? 2 : 1;
3036 
3037 	/*
3038 	 * For I/O registers, if bottom bit is set, and the next bit up
3039 	 * isn't clear, we know we have a BAR that doesn't conform to the
3040 	 * spec, so ignore it.  Also, sanity check the size of the data
3041 	 * areas to the type of memory involved.  Memory must be at least
3042 	 * 16 bytes in size, while I/O ranges must be at least 4.
3043 	 */
3044 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
3045 		return (barlen);
3046 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
3047 	    (type == SYS_RES_IOPORT && mapsize < 2))
3048 		return (barlen);
3049 
3050 	/* Save a record of this BAR. */
3051 	pm = pci_add_bar(dev, reg, map, mapsize);
3052 	if (bootverbose) {
3053 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
3054 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
3055 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
3056 			printf(", port disabled\n");
3057 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
3058 			printf(", memory disabled\n");
3059 		else
3060 			printf(", enabled\n");
3061 	}
3062 
3063 	/*
3064 	 * If base is 0, then we have problems if this architecture does
3065 	 * not allow that.  It is best to ignore such entries for the
3066 	 * moment.  These will be allocated later if the driver specifically
3067 	 * requests them.  However, some removable busses look better when
3068 	 * all resources are allocated, so allow '0' to be overriden.
3069 	 *
3070 	 * Similarly treat maps whose values is the same as the test value
3071 	 * read back.  These maps have had all f's written to them by the
3072 	 * BIOS in an attempt to disable the resources.
3073 	 */
3074 	if (!force && (basezero || map == testval))
3075 		return (barlen);
3076 	if ((u_long)base != base) {
3077 		device_printf(bus,
3078 		    "pci%d:%d:%d:%d bar %#x too many address bits",
3079 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
3080 		    pci_get_function(dev), reg);
3081 		return (barlen);
3082 	}
3083 
3084 	/*
3085 	 * This code theoretically does the right thing, but has
3086 	 * undesirable side effects in some cases where peripherals
3087 	 * respond oddly to having these bits enabled.  Let the user
3088 	 * be able to turn them off (since pci_enable_io_modes is 1 by
3089 	 * default).
3090 	 */
3091 	if (pci_enable_io_modes) {
3092 		/* Turn on resources that have been left off by a lazy BIOS */
3093 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
3094 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3095 			cmd |= PCIM_CMD_PORTEN;
3096 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3097 		}
3098 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
3099 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3100 			cmd |= PCIM_CMD_MEMEN;
3101 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3102 		}
3103 	} else {
3104 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
3105 			return (barlen);
3106 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
3107 			return (barlen);
3108 	}
3109 
3110 	count = (pci_addr_t)1 << mapsize;
3111 	flags = RF_ALIGNMENT_LOG2(mapsize);
3112 	if (prefetch)
3113 		flags |= RF_PREFETCHABLE;
3114 	if (basezero || base == pci_mapbase(testval) || pci_clear_bars) {
3115 		start = 0;	/* Let the parent decide. */
3116 		end = ~0;
3117 	} else {
3118 		start = base;
3119 		end = base + count - 1;
3120 	}
3121 	resource_list_add(rl, type, reg, start, end, count);
3122 
3123 	/*
3124 	 * Try to allocate the resource for this BAR from our parent
3125 	 * so that this resource range is already reserved.  The
3126 	 * driver for this device will later inherit this resource in
3127 	 * pci_alloc_resource().
3128 	 */
3129 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
3130 	    flags);
3131 	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0)) {
3132 		/*
3133 		 * If the allocation fails, try to allocate a resource for
3134 		 * this BAR using any available range.  The firmware felt
3135 		 * it was important enough to assign a resource, so don't
3136 		 * disable decoding if we can help it.
3137 		 */
3138 		resource_list_delete(rl, type, reg);
3139 		resource_list_add(rl, type, reg, 0, ~0, count);
3140 		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0,
3141 		    count, flags);
3142 	}
3143 	if (res == NULL) {
3144 		/*
3145 		 * If the allocation fails, delete the resource list entry
3146 		 * and disable decoding for this device.
3147 		 *
3148 		 * If the driver requests this resource in the future,
3149 		 * pci_reserve_map() will try to allocate a fresh
3150 		 * resource range.
3151 		 */
3152 		resource_list_delete(rl, type, reg);
3153 		pci_disable_io(dev, type);
3154 		if (bootverbose)
3155 			device_printf(bus,
3156 			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
3157 			    pci_get_domain(dev), pci_get_bus(dev),
3158 			    pci_get_slot(dev), pci_get_function(dev), reg);
3159 	} else {
3160 		start = rman_get_start(res);
3161 		pci_write_bar(dev, pm, start);
3162 	}
3163 	return (barlen);
3164 }
3165 
3166 /*
3167  * For ATA devices we need to decide early what addressing mode to use.
3168  * Legacy demands that the primary and secondary ATA ports sits on the
3169  * same addresses that old ISA hardware did. This dictates that we use
3170  * those addresses and ignore the BAR's if we cannot set PCI native
3171  * addressing mode.
3172  */
3173 static void
3174 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
3175     uint32_t prefetchmask)
3176 {
3177 	int rid, type, progif;
3178 #if 0
3179 	/* if this device supports PCI native addressing use it */
3180 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3181 	if ((progif & 0x8a) == 0x8a) {
3182 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
3183 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
3184 			printf("Trying ATA native PCI addressing mode\n");
3185 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
3186 		}
3187 	}
3188 #endif
3189 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3190 	type = SYS_RES_IOPORT;
3191 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
3192 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
3193 		    prefetchmask & (1 << 0));
3194 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
3195 		    prefetchmask & (1 << 1));
3196 	} else {
3197 		rid = PCIR_BAR(0);
3198 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
3199 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
3200 		    0x1f7, 8, 0);
3201 		rid = PCIR_BAR(1);
3202 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
3203 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
3204 		    0x3f6, 1, 0);
3205 	}
3206 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
3207 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
3208 		    prefetchmask & (1 << 2));
3209 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
3210 		    prefetchmask & (1 << 3));
3211 	} else {
3212 		rid = PCIR_BAR(2);
3213 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
3214 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
3215 		    0x177, 8, 0);
3216 		rid = PCIR_BAR(3);
3217 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
3218 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
3219 		    0x376, 1, 0);
3220 	}
3221 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
3222 	    prefetchmask & (1 << 4));
3223 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
3224 	    prefetchmask & (1 << 5));
3225 }
3226 
3227 static void
3228 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
3229 {
3230 	struct pci_devinfo *dinfo = device_get_ivars(dev);
3231 	pcicfgregs *cfg = &dinfo->cfg;
3232 	char tunable_name[64];
3233 	int irq;
3234 
3235 	/* Has to have an intpin to have an interrupt. */
3236 	if (cfg->intpin == 0)
3237 		return;
3238 
3239 	/* Let the user override the IRQ with a tunable. */
3240 	irq = PCI_INVALID_IRQ;
3241 	snprintf(tunable_name, sizeof(tunable_name),
3242 	    "hw.pci%d.%d.%d.INT%c.irq",
3243 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
3244 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
3245 		irq = PCI_INVALID_IRQ;
3246 
3247 	/*
3248 	 * If we didn't get an IRQ via the tunable, then we either use the
3249 	 * IRQ value in the intline register or we ask the bus to route an
3250 	 * interrupt for us.  If force_route is true, then we only use the
3251 	 * value in the intline register if the bus was unable to assign an
3252 	 * IRQ.
3253 	 */
3254 	if (!PCI_INTERRUPT_VALID(irq)) {
3255 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
3256 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
3257 		if (!PCI_INTERRUPT_VALID(irq))
3258 			irq = cfg->intline;
3259 	}
3260 
3261 	/* If after all that we don't have an IRQ, just bail. */
3262 	if (!PCI_INTERRUPT_VALID(irq))
3263 		return;
3264 
3265 	/* Update the config register if it changed. */
3266 	if (irq != cfg->intline) {
3267 		cfg->intline = irq;
3268 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
3269 	}
3270 
3271 	/* Add this IRQ as rid 0 interrupt resource. */
3272 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3273 }
3274 
3275 /* Perform early OHCI takeover from SMM. */
3276 static void
3277 ohci_early_takeover(device_t self)
3278 {
3279 	struct resource *res;
3280 	uint32_t ctl;
3281 	int rid;
3282 	int i;
3283 
3284 	rid = PCIR_BAR(0);
3285 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3286 	if (res == NULL)
3287 		return;
3288 
3289 	ctl = bus_read_4(res, OHCI_CONTROL);
3290 	if (ctl & OHCI_IR) {
3291 		if (bootverbose)
3292 			printf("ohci early: "
3293 			    "SMM active, request owner change\n");
3294 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3295 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3296 			DELAY(1000);
3297 			ctl = bus_read_4(res, OHCI_CONTROL);
3298 		}
3299 		if (ctl & OHCI_IR) {
3300 			if (bootverbose)
3301 				printf("ohci early: "
3302 				    "SMM does not respond, resetting\n");
3303 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3304 		}
3305 		/* Disable interrupts */
3306 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3307 	}
3308 
3309 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3310 }
3311 
3312 /* Perform early UHCI takeover from SMM. */
3313 static void
3314 uhci_early_takeover(device_t self)
3315 {
3316 	struct resource *res;
3317 	int rid;
3318 
3319 	/*
3320 	 * Set the PIRQD enable bit and switch off all the others. We don't
3321 	 * want legacy support to interfere with us XXX Does this also mean
3322 	 * that the BIOS won't touch the keyboard anymore if it is connected
3323 	 * to the ports of the root hub?
3324 	 */
3325 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3326 
3327 	/* Disable interrupts */
3328 	rid = PCI_UHCI_BASE_REG;
3329 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3330 	if (res != NULL) {
3331 		bus_write_2(res, UHCI_INTR, 0);
3332 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3333 	}
3334 }
3335 
3336 /* Perform early EHCI takeover from SMM. */
3337 static void
3338 ehci_early_takeover(device_t self)
3339 {
3340 	struct resource *res;
3341 	uint32_t cparams;
3342 	uint32_t eec;
3343 	uint8_t eecp;
3344 	uint8_t bios_sem;
3345 	uint8_t offs;
3346 	int rid;
3347 	int i;
3348 
3349 	rid = PCIR_BAR(0);
3350 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3351 	if (res == NULL)
3352 		return;
3353 
3354 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3355 
3356 	/* Synchronise with the BIOS if it owns the controller. */
3357 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3358 	    eecp = EHCI_EECP_NEXT(eec)) {
3359 		eec = pci_read_config(self, eecp, 4);
3360 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3361 			continue;
3362 		}
3363 		bios_sem = pci_read_config(self, eecp +
3364 		    EHCI_LEGSUP_BIOS_SEM, 1);
3365 		if (bios_sem == 0) {
3366 			continue;
3367 		}
3368 		if (bootverbose)
3369 			printf("ehci early: "
3370 			    "SMM active, request owner change\n");
3371 
3372 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3373 
3374 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3375 			DELAY(1000);
3376 			bios_sem = pci_read_config(self, eecp +
3377 			    EHCI_LEGSUP_BIOS_SEM, 1);
3378 		}
3379 
3380 		if (bios_sem != 0) {
3381 			if (bootverbose)
3382 				printf("ehci early: "
3383 				    "SMM does not respond\n");
3384 		}
3385 		/* Disable interrupts */
3386 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3387 		bus_write_4(res, offs + EHCI_USBINTR, 0);
3388 	}
3389 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3390 }
3391 
3392 /* Perform early XHCI takeover from SMM. */
3393 static void
3394 xhci_early_takeover(device_t self)
3395 {
3396 	struct resource *res;
3397 	uint32_t cparams;
3398 	uint32_t eec;
3399 	uint8_t eecp;
3400 	uint8_t bios_sem;
3401 	uint8_t offs;
3402 	int rid;
3403 	int i;
3404 
3405 	rid = PCIR_BAR(0);
3406 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3407 	if (res == NULL)
3408 		return;
3409 
3410 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3411 
3412 	eec = -1;
3413 
3414 	/* Synchronise with the BIOS if it owns the controller. */
3415 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3416 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3417 		eec = bus_read_4(res, eecp);
3418 
3419 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3420 			continue;
3421 
3422 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3423 		if (bios_sem == 0)
3424 			continue;
3425 
3426 		if (bootverbose)
3427 			printf("xhci early: "
3428 			    "SMM active, request owner change\n");
3429 
3430 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3431 
3432 		/* wait a maximum of 5 second */
3433 
3434 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3435 			DELAY(1000);
3436 			bios_sem = bus_read_1(res, eecp +
3437 			    XHCI_XECP_BIOS_SEM);
3438 		}
3439 
3440 		if (bios_sem != 0) {
3441 			if (bootverbose)
3442 				printf("xhci early: "
3443 				    "SMM does not respond\n");
3444 		}
3445 
3446 		/* Disable interrupts */
3447 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3448 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3449 		bus_read_4(res, offs + XHCI_USBSTS);
3450 	}
3451 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3452 }
3453 
3454 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3455 static void
3456 pci_reserve_secbus(device_t bus, device_t dev, pcicfgregs *cfg,
3457     struct resource_list *rl)
3458 {
3459 	struct resource *res;
3460 	char *cp;
3461 	rman_res_t start, end, count;
3462 	int rid, sec_bus, sec_reg, sub_bus, sub_reg, sup_bus;
3463 
3464 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3465 	case PCIM_HDRTYPE_BRIDGE:
3466 		sec_reg = PCIR_SECBUS_1;
3467 		sub_reg = PCIR_SUBBUS_1;
3468 		break;
3469 	case PCIM_HDRTYPE_CARDBUS:
3470 		sec_reg = PCIR_SECBUS_2;
3471 		sub_reg = PCIR_SUBBUS_2;
3472 		break;
3473 	default:
3474 		return;
3475 	}
3476 
3477 	/*
3478 	 * If the existing bus range is valid, attempt to reserve it
3479 	 * from our parent.  If this fails for any reason, clear the
3480 	 * secbus and subbus registers.
3481 	 *
3482 	 * XXX: Should we reset sub_bus to sec_bus if it is < sec_bus?
3483 	 * This would at least preserve the existing sec_bus if it is
3484 	 * valid.
3485 	 */
3486 	sec_bus = PCI_READ_CONFIG(bus, dev, sec_reg, 1);
3487 	sub_bus = PCI_READ_CONFIG(bus, dev, sub_reg, 1);
3488 
3489 	/* Quirk handling. */
3490 	switch (pci_get_devid(dev)) {
3491 	case 0x12258086:		/* Intel 82454KX/GX (Orion) */
3492 		sup_bus = pci_read_config(dev, 0x41, 1);
3493 		if (sup_bus != 0xff) {
3494 			sec_bus = sup_bus + 1;
3495 			sub_bus = sup_bus + 1;
3496 			PCI_WRITE_CONFIG(bus, dev, sec_reg, sec_bus, 1);
3497 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3498 		}
3499 		break;
3500 
3501 	case 0x00dd10de:
3502 		/* Compaq R3000 BIOS sets wrong subordinate bus number. */
3503 		if ((cp = kern_getenv("smbios.planar.maker")) == NULL)
3504 			break;
3505 		if (strncmp(cp, "Compal", 6) != 0) {
3506 			freeenv(cp);
3507 			break;
3508 		}
3509 		freeenv(cp);
3510 		if ((cp = kern_getenv("smbios.planar.product")) == NULL)
3511 			break;
3512 		if (strncmp(cp, "08A0", 4) != 0) {
3513 			freeenv(cp);
3514 			break;
3515 		}
3516 		freeenv(cp);
3517 		if (sub_bus < 0xa) {
3518 			sub_bus = 0xa;
3519 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3520 		}
3521 		break;
3522 	}
3523 
3524 	if (bootverbose)
3525 		printf("\tsecbus=%d, subbus=%d\n", sec_bus, sub_bus);
3526 	if (sec_bus > 0 && sub_bus >= sec_bus) {
3527 		start = sec_bus;
3528 		end = sub_bus;
3529 		count = end - start + 1;
3530 
3531 		resource_list_add(rl, PCI_RES_BUS, 0, 0, ~0, count);
3532 
3533 		/*
3534 		 * If requested, clear secondary bus registers in
3535 		 * bridge devices to force a complete renumbering
3536 		 * rather than reserving the existing range.  However,
3537 		 * preserve the existing size.
3538 		 */
3539 		if (pci_clear_buses)
3540 			goto clear;
3541 
3542 		rid = 0;
3543 		res = resource_list_reserve(rl, bus, dev, PCI_RES_BUS, &rid,
3544 		    start, end, count, 0);
3545 		if (res != NULL)
3546 			return;
3547 
3548 		if (bootverbose)
3549 			device_printf(bus,
3550 			    "pci%d:%d:%d:%d secbus failed to allocate\n",
3551 			    pci_get_domain(dev), pci_get_bus(dev),
3552 			    pci_get_slot(dev), pci_get_function(dev));
3553 	}
3554 
3555 clear:
3556 	PCI_WRITE_CONFIG(bus, dev, sec_reg, 0, 1);
3557 	PCI_WRITE_CONFIG(bus, dev, sub_reg, 0, 1);
3558 }
3559 
3560 static struct resource *
3561 pci_alloc_secbus(device_t dev, device_t child, int *rid, rman_res_t start,
3562     rman_res_t end, rman_res_t count, u_int flags)
3563 {
3564 	struct pci_devinfo *dinfo;
3565 	pcicfgregs *cfg;
3566 	struct resource_list *rl;
3567 	struct resource *res;
3568 	int sec_reg, sub_reg;
3569 
3570 	dinfo = device_get_ivars(child);
3571 	cfg = &dinfo->cfg;
3572 	rl = &dinfo->resources;
3573 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3574 	case PCIM_HDRTYPE_BRIDGE:
3575 		sec_reg = PCIR_SECBUS_1;
3576 		sub_reg = PCIR_SUBBUS_1;
3577 		break;
3578 	case PCIM_HDRTYPE_CARDBUS:
3579 		sec_reg = PCIR_SECBUS_2;
3580 		sub_reg = PCIR_SUBBUS_2;
3581 		break;
3582 	default:
3583 		return (NULL);
3584 	}
3585 
3586 	if (*rid != 0)
3587 		return (NULL);
3588 
3589 	if (resource_list_find(rl, PCI_RES_BUS, *rid) == NULL)
3590 		resource_list_add(rl, PCI_RES_BUS, *rid, start, end, count);
3591 	if (!resource_list_reserved(rl, PCI_RES_BUS, *rid)) {
3592 		res = resource_list_reserve(rl, dev, child, PCI_RES_BUS, rid,
3593 		    start, end, count, flags & ~RF_ACTIVE);
3594 		if (res == NULL) {
3595 			resource_list_delete(rl, PCI_RES_BUS, *rid);
3596 			device_printf(child, "allocating %ju bus%s failed\n",
3597 			    count, count == 1 ? "" : "es");
3598 			return (NULL);
3599 		}
3600 		if (bootverbose)
3601 			device_printf(child,
3602 			    "Lazy allocation of %ju bus%s at %ju\n", count,
3603 			    count == 1 ? "" : "es", rman_get_start(res));
3604 		PCI_WRITE_CONFIG(dev, child, sec_reg, rman_get_start(res), 1);
3605 		PCI_WRITE_CONFIG(dev, child, sub_reg, rman_get_end(res), 1);
3606 	}
3607 	return (resource_list_alloc(rl, dev, child, PCI_RES_BUS, rid, start,
3608 	    end, count, flags));
3609 }
3610 #endif
3611 
3612 static int
3613 pci_ea_bei_to_rid(device_t dev, int bei)
3614 {
3615 #ifdef PCI_IOV
3616 	struct pci_devinfo *dinfo;
3617 	int iov_pos;
3618 	struct pcicfg_iov *iov;
3619 
3620 	dinfo = device_get_ivars(dev);
3621 	iov = dinfo->cfg.iov;
3622 	if (iov != NULL)
3623 		iov_pos = iov->iov_pos;
3624 	else
3625 		iov_pos = 0;
3626 #endif
3627 
3628 	/* Check if matches BAR */
3629 	if ((bei >= PCIM_EA_BEI_BAR_0) &&
3630 	    (bei <= PCIM_EA_BEI_BAR_5))
3631 		return (PCIR_BAR(bei));
3632 
3633 	/* Check ROM */
3634 	if (bei == PCIM_EA_BEI_ROM)
3635 		return (PCIR_BIOS);
3636 
3637 #ifdef PCI_IOV
3638 	/* Check if matches VF_BAR */
3639 	if ((iov != NULL) && (bei >= PCIM_EA_BEI_VF_BAR_0) &&
3640 	    (bei <= PCIM_EA_BEI_VF_BAR_5))
3641 		return (PCIR_SRIOV_BAR(bei - PCIM_EA_BEI_VF_BAR_0) +
3642 		    iov_pos);
3643 #endif
3644 
3645 	return (-1);
3646 }
3647 
3648 int
3649 pci_ea_is_enabled(device_t dev, int rid)
3650 {
3651 	struct pci_ea_entry *ea;
3652 	struct pci_devinfo *dinfo;
3653 
3654 	dinfo = device_get_ivars(dev);
3655 
3656 	STAILQ_FOREACH(ea, &dinfo->cfg.ea.ea_entries, eae_link) {
3657 		if (pci_ea_bei_to_rid(dev, ea->eae_bei) == rid)
3658 			return ((ea->eae_flags & PCIM_EA_ENABLE) > 0);
3659 	}
3660 
3661 	return (0);
3662 }
3663 
3664 void
3665 pci_add_resources_ea(device_t bus, device_t dev, int alloc_iov)
3666 {
3667 	struct pci_ea_entry *ea;
3668 	struct pci_devinfo *dinfo;
3669 	pci_addr_t start, end, count;
3670 	struct resource_list *rl;
3671 	int type, flags, rid;
3672 	struct resource *res;
3673 	uint32_t tmp;
3674 #ifdef PCI_IOV
3675 	struct pcicfg_iov *iov;
3676 #endif
3677 
3678 	dinfo = device_get_ivars(dev);
3679 	rl = &dinfo->resources;
3680 	flags = 0;
3681 
3682 #ifdef PCI_IOV
3683 	iov = dinfo->cfg.iov;
3684 #endif
3685 
3686 	if (dinfo->cfg.ea.ea_location == 0)
3687 		return;
3688 
3689 	STAILQ_FOREACH(ea, &dinfo->cfg.ea.ea_entries, eae_link) {
3690 
3691 		/*
3692 		 * TODO: Ignore EA-BAR if is not enabled.
3693 		 *   Currently the EA implementation supports
3694 		 *   only situation, where EA structure contains
3695 		 *   predefined entries. In case they are not enabled
3696 		 *   leave them unallocated and proceed with
3697 		 *   a legacy-BAR mechanism.
3698 		 */
3699 		if ((ea->eae_flags & PCIM_EA_ENABLE) == 0)
3700 			continue;
3701 
3702 		switch ((ea->eae_flags & PCIM_EA_PP) >> PCIM_EA_PP_OFFSET) {
3703 		case PCIM_EA_P_MEM_PREFETCH:
3704 		case PCIM_EA_P_VF_MEM_PREFETCH:
3705 			flags = RF_PREFETCHABLE;
3706 			/* FALLTHROUGH */
3707 		case PCIM_EA_P_VF_MEM:
3708 		case PCIM_EA_P_MEM:
3709 			type = SYS_RES_MEMORY;
3710 			break;
3711 		case PCIM_EA_P_IO:
3712 			type = SYS_RES_IOPORT;
3713 			break;
3714 		default:
3715 			continue;
3716 		}
3717 
3718 		if (alloc_iov != 0) {
3719 #ifdef PCI_IOV
3720 			/* Allocating IOV, confirm BEI matches */
3721 			if ((ea->eae_bei < PCIM_EA_BEI_VF_BAR_0) ||
3722 			    (ea->eae_bei > PCIM_EA_BEI_VF_BAR_5))
3723 				continue;
3724 #else
3725 			continue;
3726 #endif
3727 		} else {
3728 			/* Allocating BAR, confirm BEI matches */
3729 			if (((ea->eae_bei < PCIM_EA_BEI_BAR_0) ||
3730 			    (ea->eae_bei > PCIM_EA_BEI_BAR_5)) &&
3731 			    (ea->eae_bei != PCIM_EA_BEI_ROM))
3732 				continue;
3733 		}
3734 
3735 		rid = pci_ea_bei_to_rid(dev, ea->eae_bei);
3736 		if (rid < 0)
3737 			continue;
3738 
3739 		/* Skip resources already allocated by EA */
3740 		if ((resource_list_find(rl, SYS_RES_MEMORY, rid) != NULL) ||
3741 		    (resource_list_find(rl, SYS_RES_IOPORT, rid) != NULL))
3742 			continue;
3743 
3744 		start = ea->eae_base;
3745 		count = ea->eae_max_offset + 1;
3746 #ifdef PCI_IOV
3747 		if (iov != NULL)
3748 			count = count * iov->iov_num_vfs;
3749 #endif
3750 		end = start + count - 1;
3751 		if (count == 0)
3752 			continue;
3753 
3754 		resource_list_add(rl, type, rid, start, end, count);
3755 		res = resource_list_reserve(rl, bus, dev, type, &rid, start, end, count,
3756 		    flags);
3757 		if (res == NULL) {
3758 			resource_list_delete(rl, type, rid);
3759 
3760 			/*
3761 			 * Failed to allocate using EA, disable entry.
3762 			 * Another attempt to allocation will be performed
3763 			 * further, but this time using legacy BAR registers
3764 			 */
3765 			tmp = pci_read_config(dev, ea->eae_cfg_offset, 4);
3766 			tmp &= ~PCIM_EA_ENABLE;
3767 			pci_write_config(dev, ea->eae_cfg_offset, tmp, 4);
3768 
3769 			/*
3770 			 * Disabling entry might fail in case it is hardwired.
3771 			 * Read flags again to match current status.
3772 			 */
3773 			ea->eae_flags = pci_read_config(dev, ea->eae_cfg_offset, 4);
3774 
3775 			continue;
3776 		}
3777 
3778 		/* As per specification, fill BAR with zeros */
3779 		pci_write_config(dev, rid, 0, 4);
3780 	}
3781 }
3782 
3783 void
3784 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3785 {
3786 	struct pci_devinfo *dinfo;
3787 	pcicfgregs *cfg;
3788 	struct resource_list *rl;
3789 	const struct pci_quirk *q;
3790 	uint32_t devid;
3791 	int i;
3792 
3793 	dinfo = device_get_ivars(dev);
3794 	cfg = &dinfo->cfg;
3795 	rl = &dinfo->resources;
3796 	devid = (cfg->device << 16) | cfg->vendor;
3797 
3798 	/* Allocate resources using Enhanced Allocation */
3799 	pci_add_resources_ea(bus, dev, 0);
3800 
3801 	/* ATA devices needs special map treatment */
3802 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3803 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3804 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3805 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3806 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3807 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3808 	else
3809 		for (i = 0; i < cfg->nummaps;) {
3810 			/* Skip resources already managed by EA */
3811 			if ((resource_list_find(rl, SYS_RES_MEMORY, PCIR_BAR(i)) != NULL) ||
3812 			    (resource_list_find(rl, SYS_RES_IOPORT, PCIR_BAR(i)) != NULL) ||
3813 			    pci_ea_is_enabled(dev, PCIR_BAR(i))) {
3814 				i++;
3815 				continue;
3816 			}
3817 
3818 			/*
3819 			 * Skip quirked resources.
3820 			 */
3821 			for (q = &pci_quirks[0]; q->devid != 0; q++)
3822 				if (q->devid == devid &&
3823 				    q->type == PCI_QUIRK_UNMAP_REG &&
3824 				    q->arg1 == PCIR_BAR(i))
3825 					break;
3826 			if (q->devid != 0) {
3827 				i++;
3828 				continue;
3829 			}
3830 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3831 			    prefetchmask & (1 << i));
3832 		}
3833 
3834 	/*
3835 	 * Add additional, quirked resources.
3836 	 */
3837 	for (q = &pci_quirks[0]; q->devid != 0; q++)
3838 		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3839 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3840 
3841 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3842 #ifdef __PCI_REROUTE_INTERRUPT
3843 		/*
3844 		 * Try to re-route interrupts. Sometimes the BIOS or
3845 		 * firmware may leave bogus values in these registers.
3846 		 * If the re-route fails, then just stick with what we
3847 		 * have.
3848 		 */
3849 		pci_assign_interrupt(bus, dev, 1);
3850 #else
3851 		pci_assign_interrupt(bus, dev, 0);
3852 #endif
3853 	}
3854 
3855 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3856 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3857 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3858 			xhci_early_takeover(dev);
3859 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3860 			ehci_early_takeover(dev);
3861 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3862 			ohci_early_takeover(dev);
3863 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3864 			uhci_early_takeover(dev);
3865 	}
3866 
3867 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3868 	/*
3869 	 * Reserve resources for secondary bus ranges behind bridge
3870 	 * devices.
3871 	 */
3872 	pci_reserve_secbus(bus, dev, cfg, rl);
3873 #endif
3874 }
3875 
3876 static struct pci_devinfo *
3877 pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
3878     int slot, int func)
3879 {
3880 	struct pci_devinfo *dinfo;
3881 
3882 	dinfo = pci_read_device(pcib, dev, domain, busno, slot, func);
3883 	if (dinfo != NULL)
3884 		pci_add_child(dev, dinfo);
3885 
3886 	return (dinfo);
3887 }
3888 
3889 void
3890 pci_add_children(device_t dev, int domain, int busno)
3891 {
3892 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3893 	device_t pcib = device_get_parent(dev);
3894 	struct pci_devinfo *dinfo;
3895 	int maxslots;
3896 	int s, f, pcifunchigh;
3897 	uint8_t hdrtype;
3898 	int first_func;
3899 
3900 	/*
3901 	 * Try to detect a device at slot 0, function 0.  If it exists, try to
3902 	 * enable ARI.  We must enable ARI before detecting the rest of the
3903 	 * functions on this bus as ARI changes the set of slots and functions
3904 	 * that are legal on this bus.
3905 	 */
3906 	dinfo = pci_identify_function(pcib, dev, domain, busno, 0, 0);
3907 	if (dinfo != NULL && pci_enable_ari)
3908 		PCIB_TRY_ENABLE_ARI(pcib, dinfo->cfg.dev);
3909 
3910 	/*
3911 	 * Start looking for new devices on slot 0 at function 1 because we
3912 	 * just identified the device at slot 0, function 0.
3913 	 */
3914 	first_func = 1;
3915 
3916 	maxslots = PCIB_MAXSLOTS(pcib);
3917 	for (s = 0; s <= maxslots; s++, first_func = 0) {
3918 		pcifunchigh = 0;
3919 		f = 0;
3920 		DELAY(1);
3921 		hdrtype = REG(PCIR_HDRTYPE, 1);
3922 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3923 			continue;
3924 		if (hdrtype & PCIM_MFDEV)
3925 			pcifunchigh = PCIB_MAXFUNCS(pcib);
3926 		for (f = first_func; f <= pcifunchigh; f++)
3927 			pci_identify_function(pcib, dev, domain, busno, s, f);
3928 	}
3929 #undef REG
3930 }
3931 
3932 int
3933 pci_rescan_method(device_t dev)
3934 {
3935 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3936 	device_t pcib = device_get_parent(dev);
3937 	struct pci_softc *sc;
3938 	device_t child, *devlist, *unchanged;
3939 	int devcount, error, i, j, maxslots, oldcount;
3940 	int busno, domain, s, f, pcifunchigh;
3941 	uint8_t hdrtype;
3942 
3943 	/* No need to check for ARI on a rescan. */
3944 	error = device_get_children(dev, &devlist, &devcount);
3945 	if (error)
3946 		return (error);
3947 	if (devcount != 0) {
3948 		unchanged = malloc(devcount * sizeof(device_t), M_TEMP,
3949 		    M_NOWAIT | M_ZERO);
3950 		if (unchanged == NULL) {
3951 			free(devlist, M_TEMP);
3952 			return (ENOMEM);
3953 		}
3954 	} else
3955 		unchanged = NULL;
3956 
3957 	sc = device_get_softc(dev);
3958 	domain = pcib_get_domain(dev);
3959 	busno = pcib_get_bus(dev);
3960 	maxslots = PCIB_MAXSLOTS(pcib);
3961 	for (s = 0; s <= maxslots; s++) {
3962 		/* If function 0 is not present, skip to the next slot. */
3963 		f = 0;
3964 		if (REG(PCIR_VENDOR, 2) == 0xffff)
3965 			continue;
3966 		pcifunchigh = 0;
3967 		hdrtype = REG(PCIR_HDRTYPE, 1);
3968 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3969 			continue;
3970 		if (hdrtype & PCIM_MFDEV)
3971 			pcifunchigh = PCIB_MAXFUNCS(pcib);
3972 		for (f = 0; f <= pcifunchigh; f++) {
3973 			if (REG(PCIR_VENDOR, 2) == 0xfff)
3974 				continue;
3975 
3976 			/*
3977 			 * Found a valid function.  Check if a
3978 			 * device_t for this device already exists.
3979 			 */
3980 			for (i = 0; i < devcount; i++) {
3981 				child = devlist[i];
3982 				if (child == NULL)
3983 					continue;
3984 				if (pci_get_slot(child) == s &&
3985 				    pci_get_function(child) == f) {
3986 					unchanged[i] = child;
3987 					goto next_func;
3988 				}
3989 			}
3990 
3991 			pci_identify_function(pcib, dev, domain, busno, s, f);
3992 		next_func:;
3993 		}
3994 	}
3995 
3996 	/* Remove devices that are no longer present. */
3997 	for (i = 0; i < devcount; i++) {
3998 		if (unchanged[i] != NULL)
3999 			continue;
4000 		device_delete_child(dev, devlist[i]);
4001 	}
4002 
4003 	free(devlist, M_TEMP);
4004 	oldcount = devcount;
4005 
4006 	/* Try to attach the devices just added. */
4007 	error = device_get_children(dev, &devlist, &devcount);
4008 	if (error) {
4009 		free(unchanged, M_TEMP);
4010 		return (error);
4011 	}
4012 
4013 	for (i = 0; i < devcount; i++) {
4014 		for (j = 0; j < oldcount; j++) {
4015 			if (devlist[i] == unchanged[j])
4016 				goto next_device;
4017 		}
4018 
4019 		device_probe_and_attach(devlist[i]);
4020 	next_device:;
4021 	}
4022 
4023 	free(unchanged, M_TEMP);
4024 	free(devlist, M_TEMP);
4025 	return (0);
4026 #undef REG
4027 }
4028 
4029 #ifdef PCI_IOV
4030 device_t
4031 pci_add_iov_child(device_t bus, device_t pf, uint16_t rid, uint16_t vid,
4032     uint16_t did)
4033 {
4034 	struct pci_devinfo *pf_dinfo, *vf_dinfo;
4035 	device_t pcib;
4036 	int busno, slot, func;
4037 
4038 	pf_dinfo = device_get_ivars(pf);
4039 
4040 	pcib = device_get_parent(bus);
4041 
4042 	PCIB_DECODE_RID(pcib, rid, &busno, &slot, &func);
4043 
4044 	vf_dinfo = pci_fill_devinfo(pcib, bus, pci_get_domain(pcib), busno,
4045 	    slot, func, vid, did);
4046 
4047 	vf_dinfo->cfg.flags |= PCICFG_VF;
4048 	pci_add_child(bus, vf_dinfo);
4049 
4050 	return (vf_dinfo->cfg.dev);
4051 }
4052 
4053 device_t
4054 pci_create_iov_child_method(device_t bus, device_t pf, uint16_t rid,
4055     uint16_t vid, uint16_t did)
4056 {
4057 
4058 	return (pci_add_iov_child(bus, pf, rid, vid, did));
4059 }
4060 #endif
4061 
4062 void
4063 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
4064 {
4065 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
4066 	device_set_ivars(dinfo->cfg.dev, dinfo);
4067 	resource_list_init(&dinfo->resources);
4068 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
4069 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
4070 	pci_print_verbose(dinfo);
4071 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
4072 	pci_child_added(dinfo->cfg.dev);
4073 }
4074 
4075 void
4076 pci_child_added_method(device_t dev, device_t child)
4077 {
4078 
4079 }
4080 
4081 static int
4082 pci_probe(device_t dev)
4083 {
4084 
4085 	device_set_desc(dev, "PCI bus");
4086 
4087 	/* Allow other subclasses to override this driver. */
4088 	return (BUS_PROBE_GENERIC);
4089 }
4090 
4091 int
4092 pci_attach_common(device_t dev)
4093 {
4094 	struct pci_softc *sc;
4095 	int busno, domain;
4096 #ifdef PCI_DMA_BOUNDARY
4097 	int error, tag_valid;
4098 #endif
4099 #ifdef PCI_RES_BUS
4100 	int rid;
4101 #endif
4102 
4103 	sc = device_get_softc(dev);
4104 	domain = pcib_get_domain(dev);
4105 	busno = pcib_get_bus(dev);
4106 #ifdef PCI_RES_BUS
4107 	rid = 0;
4108 	sc->sc_bus = bus_alloc_resource(dev, PCI_RES_BUS, &rid, busno, busno,
4109 	    1, 0);
4110 	if (sc->sc_bus == NULL) {
4111 		device_printf(dev, "failed to allocate bus number\n");
4112 		return (ENXIO);
4113 	}
4114 #endif
4115 	if (bootverbose)
4116 		device_printf(dev, "domain=%d, physical bus=%d\n",
4117 		    domain, busno);
4118 #ifdef PCI_DMA_BOUNDARY
4119 	tag_valid = 0;
4120 	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
4121 	    devclass_find("pci")) {
4122 		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
4123 		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
4124 		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
4125 		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
4126 		if (error)
4127 			device_printf(dev, "Failed to create DMA tag: %d\n",
4128 			    error);
4129 		else
4130 			tag_valid = 1;
4131 	}
4132 	if (!tag_valid)
4133 #endif
4134 		sc->sc_dma_tag = bus_get_dma_tag(dev);
4135 	return (0);
4136 }
4137 
4138 static int
4139 pci_attach(device_t dev)
4140 {
4141 	int busno, domain, error;
4142 
4143 	error = pci_attach_common(dev);
4144 	if (error)
4145 		return (error);
4146 
4147 	/*
4148 	 * Since there can be multiple independently numbered PCI
4149 	 * busses on systems with multiple PCI domains, we can't use
4150 	 * the unit number to decide which bus we are probing. We ask
4151 	 * the parent pcib what our domain and bus numbers are.
4152 	 */
4153 	domain = pcib_get_domain(dev);
4154 	busno = pcib_get_bus(dev);
4155 	pci_add_children(dev, domain, busno);
4156 	return (bus_generic_attach(dev));
4157 }
4158 
4159 static int
4160 pci_detach(device_t dev)
4161 {
4162 #ifdef PCI_RES_BUS
4163 	struct pci_softc *sc;
4164 #endif
4165 	int error;
4166 
4167 	error = bus_generic_detach(dev);
4168 	if (error)
4169 		return (error);
4170 #ifdef PCI_RES_BUS
4171 	sc = device_get_softc(dev);
4172 	error = bus_release_resource(dev, PCI_RES_BUS, 0, sc->sc_bus);
4173 	if (error)
4174 		return (error);
4175 #endif
4176 	return (device_delete_children(dev));
4177 }
4178 
4179 static void
4180 pci_set_power_child(device_t dev, device_t child, int state)
4181 {
4182 	device_t pcib;
4183 	int dstate;
4184 
4185 	/*
4186 	 * Set the device to the given state.  If the firmware suggests
4187 	 * a different power state, use it instead.  If power management
4188 	 * is not present, the firmware is responsible for managing
4189 	 * device power.  Skip children who aren't attached since they
4190 	 * are handled separately.
4191 	 */
4192 	pcib = device_get_parent(dev);
4193 	dstate = state;
4194 	if (device_is_attached(child) &&
4195 	    PCIB_POWER_FOR_SLEEP(pcib, child, &dstate) == 0)
4196 		pci_set_powerstate(child, dstate);
4197 }
4198 
4199 int
4200 pci_suspend_child(device_t dev, device_t child)
4201 {
4202 	struct pci_devinfo *dinfo;
4203 	int error;
4204 
4205 	dinfo = device_get_ivars(child);
4206 
4207 	/*
4208 	 * Save the PCI configuration space for the child and set the
4209 	 * device in the appropriate power state for this sleep state.
4210 	 */
4211 	pci_cfg_save(child, dinfo, 0);
4212 
4213 	/* Suspend devices before potentially powering them down. */
4214 	error = bus_generic_suspend_child(dev, child);
4215 
4216 	if (error)
4217 		return (error);
4218 
4219 	if (pci_do_power_suspend)
4220 		pci_set_power_child(dev, child, PCI_POWERSTATE_D3);
4221 
4222 	return (0);
4223 }
4224 
4225 int
4226 pci_resume_child(device_t dev, device_t child)
4227 {
4228 	struct pci_devinfo *dinfo;
4229 
4230 	if (pci_do_power_resume)
4231 		pci_set_power_child(dev, child, PCI_POWERSTATE_D0);
4232 
4233 	dinfo = device_get_ivars(child);
4234 	pci_cfg_restore(child, dinfo);
4235 	if (!device_is_attached(child))
4236 		pci_cfg_save(child, dinfo, 1);
4237 
4238 	bus_generic_resume_child(dev, child);
4239 
4240 	return (0);
4241 }
4242 
4243 int
4244 pci_resume(device_t dev)
4245 {
4246 	device_t child, *devlist;
4247 	int error, i, numdevs;
4248 
4249 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
4250 		return (error);
4251 
4252 	/*
4253 	 * Resume critical devices first, then everything else later.
4254 	 */
4255 	for (i = 0; i < numdevs; i++) {
4256 		child = devlist[i];
4257 		switch (pci_get_class(child)) {
4258 		case PCIC_DISPLAY:
4259 		case PCIC_MEMORY:
4260 		case PCIC_BRIDGE:
4261 		case PCIC_BASEPERIPH:
4262 			BUS_RESUME_CHILD(dev, child);
4263 			break;
4264 		}
4265 	}
4266 	for (i = 0; i < numdevs; i++) {
4267 		child = devlist[i];
4268 		switch (pci_get_class(child)) {
4269 		case PCIC_DISPLAY:
4270 		case PCIC_MEMORY:
4271 		case PCIC_BRIDGE:
4272 		case PCIC_BASEPERIPH:
4273 			break;
4274 		default:
4275 			BUS_RESUME_CHILD(dev, child);
4276 		}
4277 	}
4278 	free(devlist, M_TEMP);
4279 	return (0);
4280 }
4281 
4282 static void
4283 pci_load_vendor_data(void)
4284 {
4285 	caddr_t data;
4286 	void *ptr;
4287 	size_t sz;
4288 
4289 	data = preload_search_by_type("pci_vendor_data");
4290 	if (data != NULL) {
4291 		ptr = preload_fetch_addr(data);
4292 		sz = preload_fetch_size(data);
4293 		if (ptr != NULL && sz != 0) {
4294 			pci_vendordata = ptr;
4295 			pci_vendordata_size = sz;
4296 			/* terminate the database */
4297 			pci_vendordata[pci_vendordata_size] = '\n';
4298 		}
4299 	}
4300 }
4301 
4302 void
4303 pci_driver_added(device_t dev, driver_t *driver)
4304 {
4305 	int numdevs;
4306 	device_t *devlist;
4307 	device_t child;
4308 	struct pci_devinfo *dinfo;
4309 	int i;
4310 
4311 	if (bootverbose)
4312 		device_printf(dev, "driver added\n");
4313 	DEVICE_IDENTIFY(driver, dev);
4314 	if (device_get_children(dev, &devlist, &numdevs) != 0)
4315 		return;
4316 	for (i = 0; i < numdevs; i++) {
4317 		child = devlist[i];
4318 		if (device_get_state(child) != DS_NOTPRESENT)
4319 			continue;
4320 		dinfo = device_get_ivars(child);
4321 		pci_print_verbose(dinfo);
4322 		if (bootverbose)
4323 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
4324 		pci_cfg_restore(child, dinfo);
4325 		if (device_probe_and_attach(child) != 0)
4326 			pci_child_detached(dev, child);
4327 	}
4328 	free(devlist, M_TEMP);
4329 }
4330 
4331 int
4332 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
4333     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
4334 {
4335 	struct pci_devinfo *dinfo;
4336 	struct msix_table_entry *mte;
4337 	struct msix_vector *mv;
4338 	uint64_t addr;
4339 	uint32_t data;
4340 	void *cookie;
4341 	int error, rid;
4342 
4343 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
4344 	    arg, &cookie);
4345 	if (error)
4346 		return (error);
4347 
4348 	/* If this is not a direct child, just bail out. */
4349 	if (device_get_parent(child) != dev) {
4350 		*cookiep = cookie;
4351 		return(0);
4352 	}
4353 
4354 	rid = rman_get_rid(irq);
4355 	if (rid == 0) {
4356 		/* Make sure that INTx is enabled */
4357 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
4358 	} else {
4359 		/*
4360 		 * Check to see if the interrupt is MSI or MSI-X.
4361 		 * Ask our parent to map the MSI and give
4362 		 * us the address and data register values.
4363 		 * If we fail for some reason, teardown the
4364 		 * interrupt handler.
4365 		 */
4366 		dinfo = device_get_ivars(child);
4367 		if (dinfo->cfg.msi.msi_alloc > 0) {
4368 			if (dinfo->cfg.msi.msi_addr == 0) {
4369 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
4370 			    ("MSI has handlers, but vectors not mapped"));
4371 				error = PCIB_MAP_MSI(device_get_parent(dev),
4372 				    child, rman_get_start(irq), &addr, &data);
4373 				if (error)
4374 					goto bad;
4375 				dinfo->cfg.msi.msi_addr = addr;
4376 				dinfo->cfg.msi.msi_data = data;
4377 			}
4378 			if (dinfo->cfg.msi.msi_handlers == 0)
4379 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
4380 				    dinfo->cfg.msi.msi_data);
4381 			dinfo->cfg.msi.msi_handlers++;
4382 		} else {
4383 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4384 			    ("No MSI or MSI-X interrupts allocated"));
4385 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4386 			    ("MSI-X index too high"));
4387 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4388 			KASSERT(mte->mte_vector != 0, ("no message vector"));
4389 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
4390 			KASSERT(mv->mv_irq == rman_get_start(irq),
4391 			    ("IRQ mismatch"));
4392 			if (mv->mv_address == 0) {
4393 				KASSERT(mte->mte_handlers == 0,
4394 		    ("MSI-X table entry has handlers, but vector not mapped"));
4395 				error = PCIB_MAP_MSI(device_get_parent(dev),
4396 				    child, rman_get_start(irq), &addr, &data);
4397 				if (error)
4398 					goto bad;
4399 				mv->mv_address = addr;
4400 				mv->mv_data = data;
4401 			}
4402 			if (mte->mte_handlers == 0) {
4403 				pci_enable_msix(child, rid - 1, mv->mv_address,
4404 				    mv->mv_data);
4405 				pci_unmask_msix(child, rid - 1);
4406 			}
4407 			mte->mte_handlers++;
4408 		}
4409 
4410 		/*
4411 		 * Make sure that INTx is disabled if we are using MSI/MSI-X,
4412 		 * unless the device is affected by PCI_QUIRK_MSI_INTX_BUG,
4413 		 * in which case we "enable" INTx so MSI/MSI-X actually works.
4414 		 */
4415 		if (!pci_has_quirk(pci_get_devid(child),
4416 		    PCI_QUIRK_MSI_INTX_BUG))
4417 			pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4418 		else
4419 			pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
4420 	bad:
4421 		if (error) {
4422 			(void)bus_generic_teardown_intr(dev, child, irq,
4423 			    cookie);
4424 			return (error);
4425 		}
4426 	}
4427 	*cookiep = cookie;
4428 	return (0);
4429 }
4430 
4431 int
4432 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
4433     void *cookie)
4434 {
4435 	struct msix_table_entry *mte;
4436 	struct resource_list_entry *rle;
4437 	struct pci_devinfo *dinfo;
4438 	int error, rid;
4439 
4440 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
4441 		return (EINVAL);
4442 
4443 	/* If this isn't a direct child, just bail out */
4444 	if (device_get_parent(child) != dev)
4445 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
4446 
4447 	rid = rman_get_rid(irq);
4448 	if (rid == 0) {
4449 		/* Mask INTx */
4450 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4451 	} else {
4452 		/*
4453 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
4454 		 * decrement the appropriate handlers count and mask the
4455 		 * MSI-X message, or disable MSI messages if the count
4456 		 * drops to 0.
4457 		 */
4458 		dinfo = device_get_ivars(child);
4459 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
4460 		if (rle->res != irq)
4461 			return (EINVAL);
4462 		if (dinfo->cfg.msi.msi_alloc > 0) {
4463 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
4464 			    ("MSI-X index too high"));
4465 			if (dinfo->cfg.msi.msi_handlers == 0)
4466 				return (EINVAL);
4467 			dinfo->cfg.msi.msi_handlers--;
4468 			if (dinfo->cfg.msi.msi_handlers == 0)
4469 				pci_disable_msi(child);
4470 		} else {
4471 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4472 			    ("No MSI or MSI-X interrupts allocated"));
4473 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4474 			    ("MSI-X index too high"));
4475 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4476 			if (mte->mte_handlers == 0)
4477 				return (EINVAL);
4478 			mte->mte_handlers--;
4479 			if (mte->mte_handlers == 0)
4480 				pci_mask_msix(child, rid - 1);
4481 		}
4482 	}
4483 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
4484 	if (rid > 0)
4485 		KASSERT(error == 0,
4486 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
4487 	return (error);
4488 }
4489 
4490 int
4491 pci_print_child(device_t dev, device_t child)
4492 {
4493 	struct pci_devinfo *dinfo;
4494 	struct resource_list *rl;
4495 	int retval = 0;
4496 
4497 	dinfo = device_get_ivars(child);
4498 	rl = &dinfo->resources;
4499 
4500 	retval += bus_print_child_header(dev, child);
4501 
4502 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#jx");
4503 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#jx");
4504 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%jd");
4505 	if (device_get_flags(dev))
4506 		retval += printf(" flags %#x", device_get_flags(dev));
4507 
4508 	retval += printf(" at device %d.%d", pci_get_slot(child),
4509 	    pci_get_function(child));
4510 
4511 	retval += bus_print_child_domain(dev, child);
4512 	retval += bus_print_child_footer(dev, child);
4513 
4514 	return (retval);
4515 }
4516 
4517 static const struct
4518 {
4519 	int		class;
4520 	int		subclass;
4521 	int		report; /* 0 = bootverbose, 1 = always */
4522 	const char	*desc;
4523 } pci_nomatch_tab[] = {
4524 	{PCIC_OLD,		-1,			1, "old"},
4525 	{PCIC_OLD,		PCIS_OLD_NONVGA,	1, "non-VGA display device"},
4526 	{PCIC_OLD,		PCIS_OLD_VGA,		1, "VGA-compatible display device"},
4527 	{PCIC_STORAGE,		-1,			1, "mass storage"},
4528 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	1, "SCSI"},
4529 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	1, "ATA"},
4530 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	1, "floppy disk"},
4531 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	1, "IPI"},
4532 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	1, "RAID"},
4533 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	1, "ATA (ADMA)"},
4534 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	1, "SATA"},
4535 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	1, "SAS"},
4536 	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	1, "NVM"},
4537 	{PCIC_NETWORK,		-1,			1, "network"},
4538 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	1, "ethernet"},
4539 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	1, "token ring"},
4540 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	1, "fddi"},
4541 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	1, "ATM"},
4542 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	1, "ISDN"},
4543 	{PCIC_DISPLAY,		-1,			1, "display"},
4544 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	1, "VGA"},
4545 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	1, "XGA"},
4546 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	1, "3D"},
4547 	{PCIC_MULTIMEDIA,	-1,			1, "multimedia"},
4548 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	1, "video"},
4549 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	1, "audio"},
4550 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	1, "telephony"},
4551 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	1, "HDA"},
4552 	{PCIC_MEMORY,		-1,			1, "memory"},
4553 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	1, "RAM"},
4554 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	1, "flash"},
4555 	{PCIC_BRIDGE,		-1,			1, "bridge"},
4556 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	1, "HOST-PCI"},
4557 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	1, "PCI-ISA"},
4558 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	1, "PCI-EISA"},
4559 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	1, "PCI-MCA"},
4560 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	1, "PCI-PCI"},
4561 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	1, "PCI-PCMCIA"},
4562 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	1, "PCI-NuBus"},
4563 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	1, "PCI-CardBus"},
4564 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	1, "PCI-RACEway"},
4565 	{PCIC_SIMPLECOMM,	-1,			1, "simple comms"},
4566 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	1, "UART"},	/* could detect 16550 */
4567 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	1, "parallel port"},
4568 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	1, "multiport serial"},
4569 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	1, "generic modem"},
4570 	{PCIC_BASEPERIPH,	-1,			0, "base peripheral"},
4571 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	1, "interrupt controller"},
4572 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	1, "DMA controller"},
4573 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	1, "timer"},
4574 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	1, "realtime clock"},
4575 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	1, "PCI hot-plug controller"},
4576 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	1, "SD host controller"},
4577 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_IOMMU,	1, "IOMMU"},
4578 	{PCIC_INPUTDEV,		-1,			1, "input device"},
4579 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	1, "keyboard"},
4580 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,1, "digitizer"},
4581 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	1, "mouse"},
4582 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	1, "scanner"},
4583 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	1, "gameport"},
4584 	{PCIC_DOCKING,		-1,			1, "docking station"},
4585 	{PCIC_PROCESSOR,	-1,			1, "processor"},
4586 	{PCIC_SERIALBUS,	-1,			1, "serial bus"},
4587 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	1, "FireWire"},
4588 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	1, "AccessBus"},
4589 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	1, "SSA"},
4590 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	1, "USB"},
4591 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	1, "Fibre Channel"},
4592 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	0, "SMBus"},
4593 	{PCIC_WIRELESS,		-1,			1, "wireless controller"},
4594 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	1, "iRDA"},
4595 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	1, "IR"},
4596 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	1, "RF"},
4597 	{PCIC_INTELLIIO,	-1,			1, "intelligent I/O controller"},
4598 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	1, "I2O"},
4599 	{PCIC_SATCOM,		-1,			1, "satellite communication"},
4600 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		1, "sat TV"},
4601 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	1, "sat audio"},
4602 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	1, "sat voice"},
4603 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	1, "sat data"},
4604 	{PCIC_CRYPTO,		-1,			1, "encrypt/decrypt"},
4605 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	1, "network/computer crypto"},
4606 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	1, "entertainment crypto"},
4607 	{PCIC_DASP,		-1,			0, "dasp"},
4608 	{PCIC_DASP,		PCIS_DASP_DPIO,		1, "DPIO module"},
4609 	{0, 0, 0,		NULL}
4610 };
4611 
4612 void
4613 pci_probe_nomatch(device_t dev, device_t child)
4614 {
4615 	int i, report;
4616 	const char *cp, *scp;
4617 	char *device;
4618 
4619 	/*
4620 	 * Look for a listing for this device in a loaded device database.
4621 	 */
4622 	report = 1;
4623 	if ((device = pci_describe_device(child)) != NULL) {
4624 		device_printf(dev, "<%s>", device);
4625 		free(device, M_DEVBUF);
4626 	} else {
4627 		/*
4628 		 * Scan the class/subclass descriptions for a general
4629 		 * description.
4630 		 */
4631 		cp = "unknown";
4632 		scp = NULL;
4633 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
4634 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
4635 				if (pci_nomatch_tab[i].subclass == -1) {
4636 					cp = pci_nomatch_tab[i].desc;
4637 					report = pci_nomatch_tab[i].report;
4638 				} else if (pci_nomatch_tab[i].subclass ==
4639 				    pci_get_subclass(child)) {
4640 					scp = pci_nomatch_tab[i].desc;
4641 					report = pci_nomatch_tab[i].report;
4642 				}
4643 			}
4644 		}
4645 		if (report || bootverbose) {
4646 			device_printf(dev, "<%s%s%s>",
4647 			    cp ? cp : "",
4648 			    ((cp != NULL) && (scp != NULL)) ? ", " : "",
4649 			    scp ? scp : "");
4650 		}
4651 	}
4652 	if (report || bootverbose) {
4653 		printf(" at device %d.%d (no driver attached)\n",
4654 		    pci_get_slot(child), pci_get_function(child));
4655 	}
4656 	pci_cfg_save(child, device_get_ivars(child), 1);
4657 }
4658 
4659 void
4660 pci_child_detached(device_t dev, device_t child)
4661 {
4662 	struct pci_devinfo *dinfo;
4663 	struct resource_list *rl;
4664 
4665 	dinfo = device_get_ivars(child);
4666 	rl = &dinfo->resources;
4667 
4668 	/*
4669 	 * Have to deallocate IRQs before releasing any MSI messages and
4670 	 * have to release MSI messages before deallocating any memory
4671 	 * BARs.
4672 	 */
4673 	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
4674 		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
4675 	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
4676 		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
4677 		(void)pci_release_msi(child);
4678 	}
4679 	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
4680 		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
4681 	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
4682 		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
4683 #ifdef PCI_RES_BUS
4684 	if (resource_list_release_active(rl, dev, child, PCI_RES_BUS) != 0)
4685 		pci_printf(&dinfo->cfg, "Device leaked PCI bus numbers\n");
4686 #endif
4687 
4688 	pci_cfg_save(child, dinfo, 1);
4689 }
4690 
4691 /*
4692  * Parse the PCI device database, if loaded, and return a pointer to a
4693  * description of the device.
4694  *
4695  * The database is flat text formatted as follows:
4696  *
4697  * Any line not in a valid format is ignored.
4698  * Lines are terminated with newline '\n' characters.
4699  *
4700  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
4701  * the vendor name.
4702  *
4703  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
4704  * - devices cannot be listed without a corresponding VENDOR line.
4705  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
4706  * another TAB, then the device name.
4707  */
4708 
4709 /*
4710  * Assuming (ptr) points to the beginning of a line in the database,
4711  * return the vendor or device and description of the next entry.
4712  * The value of (vendor) or (device) inappropriate for the entry type
4713  * is set to -1.  Returns nonzero at the end of the database.
4714  *
4715  * Note that this is slightly unrobust in the face of corrupt data;
4716  * we attempt to safeguard against this by spamming the end of the
4717  * database with a newline when we initialise.
4718  */
4719 static int
4720 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
4721 {
4722 	char	*cp = *ptr;
4723 	int	left;
4724 
4725 	*device = -1;
4726 	*vendor = -1;
4727 	**desc = '\0';
4728 	for (;;) {
4729 		left = pci_vendordata_size - (cp - pci_vendordata);
4730 		if (left <= 0) {
4731 			*ptr = cp;
4732 			return(1);
4733 		}
4734 
4735 		/* vendor entry? */
4736 		if (*cp != '\t' &&
4737 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
4738 			break;
4739 		/* device entry? */
4740 		if (*cp == '\t' &&
4741 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
4742 			break;
4743 
4744 		/* skip to next line */
4745 		while (*cp != '\n' && left > 0) {
4746 			cp++;
4747 			left--;
4748 		}
4749 		if (*cp == '\n') {
4750 			cp++;
4751 			left--;
4752 		}
4753 	}
4754 	/* skip to next line */
4755 	while (*cp != '\n' && left > 0) {
4756 		cp++;
4757 		left--;
4758 	}
4759 	if (*cp == '\n' && left > 0)
4760 		cp++;
4761 	*ptr = cp;
4762 	return(0);
4763 }
4764 
4765 static char *
4766 pci_describe_device(device_t dev)
4767 {
4768 	int	vendor, device;
4769 	char	*desc, *vp, *dp, *line;
4770 
4771 	desc = vp = dp = NULL;
4772 
4773 	/*
4774 	 * If we have no vendor data, we can't do anything.
4775 	 */
4776 	if (pci_vendordata == NULL)
4777 		goto out;
4778 
4779 	/*
4780 	 * Scan the vendor data looking for this device
4781 	 */
4782 	line = pci_vendordata;
4783 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4784 		goto out;
4785 	for (;;) {
4786 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
4787 			goto out;
4788 		if (vendor == pci_get_vendor(dev))
4789 			break;
4790 	}
4791 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4792 		goto out;
4793 	for (;;) {
4794 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
4795 			*dp = 0;
4796 			break;
4797 		}
4798 		if (vendor != -1) {
4799 			*dp = 0;
4800 			break;
4801 		}
4802 		if (device == pci_get_device(dev))
4803 			break;
4804 	}
4805 	if (dp[0] == '\0')
4806 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
4807 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
4808 	    NULL)
4809 		sprintf(desc, "%s, %s", vp, dp);
4810 out:
4811 	if (vp != NULL)
4812 		free(vp, M_DEVBUF);
4813 	if (dp != NULL)
4814 		free(dp, M_DEVBUF);
4815 	return(desc);
4816 }
4817 
4818 int
4819 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
4820 {
4821 	struct pci_devinfo *dinfo;
4822 	pcicfgregs *cfg;
4823 
4824 	dinfo = device_get_ivars(child);
4825 	cfg = &dinfo->cfg;
4826 
4827 	switch (which) {
4828 	case PCI_IVAR_ETHADDR:
4829 		/*
4830 		 * The generic accessor doesn't deal with failure, so
4831 		 * we set the return value, then return an error.
4832 		 */
4833 		*((uint8_t **) result) = NULL;
4834 		return (EINVAL);
4835 	case PCI_IVAR_SUBVENDOR:
4836 		*result = cfg->subvendor;
4837 		break;
4838 	case PCI_IVAR_SUBDEVICE:
4839 		*result = cfg->subdevice;
4840 		break;
4841 	case PCI_IVAR_VENDOR:
4842 		*result = cfg->vendor;
4843 		break;
4844 	case PCI_IVAR_DEVICE:
4845 		*result = cfg->device;
4846 		break;
4847 	case PCI_IVAR_DEVID:
4848 		*result = (cfg->device << 16) | cfg->vendor;
4849 		break;
4850 	case PCI_IVAR_CLASS:
4851 		*result = cfg->baseclass;
4852 		break;
4853 	case PCI_IVAR_SUBCLASS:
4854 		*result = cfg->subclass;
4855 		break;
4856 	case PCI_IVAR_PROGIF:
4857 		*result = cfg->progif;
4858 		break;
4859 	case PCI_IVAR_REVID:
4860 		*result = cfg->revid;
4861 		break;
4862 	case PCI_IVAR_INTPIN:
4863 		*result = cfg->intpin;
4864 		break;
4865 	case PCI_IVAR_IRQ:
4866 		*result = cfg->intline;
4867 		break;
4868 	case PCI_IVAR_DOMAIN:
4869 		*result = cfg->domain;
4870 		break;
4871 	case PCI_IVAR_BUS:
4872 		*result = cfg->bus;
4873 		break;
4874 	case PCI_IVAR_SLOT:
4875 		*result = cfg->slot;
4876 		break;
4877 	case PCI_IVAR_FUNCTION:
4878 		*result = cfg->func;
4879 		break;
4880 	case PCI_IVAR_CMDREG:
4881 		*result = cfg->cmdreg;
4882 		break;
4883 	case PCI_IVAR_CACHELNSZ:
4884 		*result = cfg->cachelnsz;
4885 		break;
4886 	case PCI_IVAR_MINGNT:
4887 		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4888 			*result = -1;
4889 			return (EINVAL);
4890 		}
4891 		*result = cfg->mingnt;
4892 		break;
4893 	case PCI_IVAR_MAXLAT:
4894 		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4895 			*result = -1;
4896 			return (EINVAL);
4897 		}
4898 		*result = cfg->maxlat;
4899 		break;
4900 	case PCI_IVAR_LATTIMER:
4901 		*result = cfg->lattimer;
4902 		break;
4903 	default:
4904 		return (ENOENT);
4905 	}
4906 	return (0);
4907 }
4908 
4909 int
4910 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4911 {
4912 	struct pci_devinfo *dinfo;
4913 
4914 	dinfo = device_get_ivars(child);
4915 
4916 	switch (which) {
4917 	case PCI_IVAR_INTPIN:
4918 		dinfo->cfg.intpin = value;
4919 		return (0);
4920 	case PCI_IVAR_ETHADDR:
4921 	case PCI_IVAR_SUBVENDOR:
4922 	case PCI_IVAR_SUBDEVICE:
4923 	case PCI_IVAR_VENDOR:
4924 	case PCI_IVAR_DEVICE:
4925 	case PCI_IVAR_DEVID:
4926 	case PCI_IVAR_CLASS:
4927 	case PCI_IVAR_SUBCLASS:
4928 	case PCI_IVAR_PROGIF:
4929 	case PCI_IVAR_REVID:
4930 	case PCI_IVAR_IRQ:
4931 	case PCI_IVAR_DOMAIN:
4932 	case PCI_IVAR_BUS:
4933 	case PCI_IVAR_SLOT:
4934 	case PCI_IVAR_FUNCTION:
4935 		return (EINVAL);	/* disallow for now */
4936 
4937 	default:
4938 		return (ENOENT);
4939 	}
4940 }
4941 
4942 #include "opt_ddb.h"
4943 #ifdef DDB
4944 #include <ddb/ddb.h>
4945 #include <sys/cons.h>
4946 
4947 /*
4948  * List resources based on pci map registers, used for within ddb
4949  */
4950 
4951 DB_SHOW_COMMAND(pciregs, db_pci_dump)
4952 {
4953 	struct pci_devinfo *dinfo;
4954 	struct devlist *devlist_head;
4955 	struct pci_conf *p;
4956 	const char *name;
4957 	int i, error, none_count;
4958 
4959 	none_count = 0;
4960 	/* get the head of the device queue */
4961 	devlist_head = &pci_devq;
4962 
4963 	/*
4964 	 * Go through the list of devices and print out devices
4965 	 */
4966 	for (error = 0, i = 0,
4967 	     dinfo = STAILQ_FIRST(devlist_head);
4968 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4969 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4970 
4971 		/* Populate pd_name and pd_unit */
4972 		name = NULL;
4973 		if (dinfo->cfg.dev)
4974 			name = device_get_name(dinfo->cfg.dev);
4975 
4976 		p = &dinfo->conf;
4977 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4978 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4979 			(name && *name) ? name : "none",
4980 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4981 			none_count++,
4982 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4983 			p->pc_sel.pc_func, (p->pc_class << 16) |
4984 			(p->pc_subclass << 8) | p->pc_progif,
4985 			(p->pc_subdevice << 16) | p->pc_subvendor,
4986 			(p->pc_device << 16) | p->pc_vendor,
4987 			p->pc_revid, p->pc_hdr);
4988 	}
4989 }
4990 #endif /* DDB */
4991 
4992 static struct resource *
4993 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4994     rman_res_t start, rman_res_t end, rman_res_t count, u_int num,
4995     u_int flags)
4996 {
4997 	struct pci_devinfo *dinfo = device_get_ivars(child);
4998 	struct resource_list *rl = &dinfo->resources;
4999 	struct resource *res;
5000 	struct pci_map *pm;
5001 	pci_addr_t map, testval;
5002 	int mapsize;
5003 
5004 	res = NULL;
5005 
5006 	/* If rid is managed by EA, ignore it */
5007 	if (pci_ea_is_enabled(child, *rid))
5008 		goto out;
5009 
5010 	pm = pci_find_bar(child, *rid);
5011 	if (pm != NULL) {
5012 		/* This is a BAR that we failed to allocate earlier. */
5013 		mapsize = pm->pm_size;
5014 		map = pm->pm_value;
5015 	} else {
5016 		/*
5017 		 * Weed out the bogons, and figure out how large the
5018 		 * BAR/map is.  BARs that read back 0 here are bogus
5019 		 * and unimplemented.  Note: atapci in legacy mode are
5020 		 * special and handled elsewhere in the code.  If you
5021 		 * have a atapci device in legacy mode and it fails
5022 		 * here, that other code is broken.
5023 		 */
5024 		pci_read_bar(child, *rid, &map, &testval, NULL);
5025 
5026 		/*
5027 		 * Determine the size of the BAR and ignore BARs with a size
5028 		 * of 0.  Device ROM BARs use a different mask value.
5029 		 */
5030 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
5031 			mapsize = pci_romsize(testval);
5032 		else
5033 			mapsize = pci_mapsize(testval);
5034 		if (mapsize == 0)
5035 			goto out;
5036 		pm = pci_add_bar(child, *rid, map, mapsize);
5037 	}
5038 
5039 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
5040 		if (type != SYS_RES_MEMORY) {
5041 			if (bootverbose)
5042 				device_printf(dev,
5043 				    "child %s requested type %d for rid %#x,"
5044 				    " but the BAR says it is an memio\n",
5045 				    device_get_nameunit(child), type, *rid);
5046 			goto out;
5047 		}
5048 	} else {
5049 		if (type != SYS_RES_IOPORT) {
5050 			if (bootverbose)
5051 				device_printf(dev,
5052 				    "child %s requested type %d for rid %#x,"
5053 				    " but the BAR says it is an ioport\n",
5054 				    device_get_nameunit(child), type, *rid);
5055 			goto out;
5056 		}
5057 	}
5058 
5059 	/*
5060 	 * For real BARs, we need to override the size that
5061 	 * the driver requests, because that's what the BAR
5062 	 * actually uses and we would otherwise have a
5063 	 * situation where we might allocate the excess to
5064 	 * another driver, which won't work.
5065 	 */
5066 	count = ((pci_addr_t)1 << mapsize) * num;
5067 	if (RF_ALIGNMENT(flags) < mapsize)
5068 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
5069 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
5070 		flags |= RF_PREFETCHABLE;
5071 
5072 	/*
5073 	 * Allocate enough resource, and then write back the
5074 	 * appropriate BAR for that resource.
5075 	 */
5076 	resource_list_add(rl, type, *rid, start, end, count);
5077 	res = resource_list_reserve(rl, dev, child, type, rid, start, end,
5078 	    count, flags & ~RF_ACTIVE);
5079 	if (res == NULL) {
5080 		resource_list_delete(rl, type, *rid);
5081 		device_printf(child,
5082 		    "%#jx bytes of rid %#x res %d failed (%#jx, %#jx).\n",
5083 		    count, *rid, type, start, end);
5084 		goto out;
5085 	}
5086 	if (bootverbose)
5087 		device_printf(child,
5088 		    "Lazy allocation of %#jx bytes rid %#x type %d at %#jx\n",
5089 		    count, *rid, type, rman_get_start(res));
5090 	map = rman_get_start(res);
5091 	pci_write_bar(child, pm, map);
5092 out:
5093 	return (res);
5094 }
5095 
5096 struct resource *
5097 pci_alloc_multi_resource(device_t dev, device_t child, int type, int *rid,
5098     rman_res_t start, rman_res_t end, rman_res_t count, u_long num,
5099     u_int flags)
5100 {
5101 	struct pci_devinfo *dinfo;
5102 	struct resource_list *rl;
5103 	struct resource_list_entry *rle;
5104 	struct resource *res;
5105 	pcicfgregs *cfg;
5106 
5107 	/*
5108 	 * Perform lazy resource allocation
5109 	 */
5110 	dinfo = device_get_ivars(child);
5111 	rl = &dinfo->resources;
5112 	cfg = &dinfo->cfg;
5113 	switch (type) {
5114 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
5115 	case PCI_RES_BUS:
5116 		return (pci_alloc_secbus(dev, child, rid, start, end, count,
5117 		    flags));
5118 #endif
5119 	case SYS_RES_IRQ:
5120 		/*
5121 		 * Can't alloc legacy interrupt once MSI messages have
5122 		 * been allocated.
5123 		 */
5124 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
5125 		    cfg->msix.msix_alloc > 0))
5126 			return (NULL);
5127 
5128 		/*
5129 		 * If the child device doesn't have an interrupt
5130 		 * routed and is deserving of an interrupt, try to
5131 		 * assign it one.
5132 		 */
5133 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
5134 		    (cfg->intpin != 0))
5135 			pci_assign_interrupt(dev, child, 0);
5136 		break;
5137 	case SYS_RES_IOPORT:
5138 	case SYS_RES_MEMORY:
5139 #ifdef NEW_PCIB
5140 		/*
5141 		 * PCI-PCI bridge I/O window resources are not BARs.
5142 		 * For those allocations just pass the request up the
5143 		 * tree.
5144 		 */
5145 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
5146 			switch (*rid) {
5147 			case PCIR_IOBASEL_1:
5148 			case PCIR_MEMBASE_1:
5149 			case PCIR_PMBASEL_1:
5150 				/*
5151 				 * XXX: Should we bother creating a resource
5152 				 * list entry?
5153 				 */
5154 				return (bus_generic_alloc_resource(dev, child,
5155 				    type, rid, start, end, count, flags));
5156 			}
5157 		}
5158 #endif
5159 		/* Reserve resources for this BAR if needed. */
5160 		rle = resource_list_find(rl, type, *rid);
5161 		if (rle == NULL) {
5162 			res = pci_reserve_map(dev, child, type, rid, start, end,
5163 			    count, num, flags);
5164 			if (res == NULL)
5165 				return (NULL);
5166 		}
5167 	}
5168 	return (resource_list_alloc(rl, dev, child, type, rid,
5169 	    start, end, count, flags));
5170 }
5171 
5172 struct resource *
5173 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
5174     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
5175 {
5176 #ifdef PCI_IOV
5177 	struct pci_devinfo *dinfo;
5178 #endif
5179 
5180 	if (device_get_parent(child) != dev)
5181 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
5182 		    type, rid, start, end, count, flags));
5183 
5184 #ifdef PCI_IOV
5185 	dinfo = device_get_ivars(child);
5186 	if (dinfo->cfg.flags & PCICFG_VF) {
5187 		switch (type) {
5188 		/* VFs can't have I/O BARs. */
5189 		case SYS_RES_IOPORT:
5190 			return (NULL);
5191 		case SYS_RES_MEMORY:
5192 			return (pci_vf_alloc_mem_resource(dev, child, rid,
5193 			    start, end, count, flags));
5194 		}
5195 
5196 		/* Fall through for other types of resource allocations. */
5197 	}
5198 #endif
5199 
5200 	return (pci_alloc_multi_resource(dev, child, type, rid, start, end,
5201 	    count, 1, flags));
5202 }
5203 
5204 int
5205 pci_release_resource(device_t dev, device_t child, int type, int rid,
5206     struct resource *r)
5207 {
5208 	struct pci_devinfo *dinfo;
5209 	struct resource_list *rl;
5210 	pcicfgregs *cfg;
5211 
5212 	if (device_get_parent(child) != dev)
5213 		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
5214 		    type, rid, r));
5215 
5216 	dinfo = device_get_ivars(child);
5217 	cfg = &dinfo->cfg;
5218 
5219 #ifdef PCI_IOV
5220 	if (dinfo->cfg.flags & PCICFG_VF) {
5221 		switch (type) {
5222 		/* VFs can't have I/O BARs. */
5223 		case SYS_RES_IOPORT:
5224 			return (EDOOFUS);
5225 		case SYS_RES_MEMORY:
5226 			return (pci_vf_release_mem_resource(dev, child, rid,
5227 			    r));
5228 		}
5229 
5230 		/* Fall through for other types of resource allocations. */
5231 	}
5232 #endif
5233 
5234 #ifdef NEW_PCIB
5235 	/*
5236 	 * PCI-PCI bridge I/O window resources are not BARs.  For
5237 	 * those allocations just pass the request up the tree.
5238 	 */
5239 	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
5240 	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
5241 		switch (rid) {
5242 		case PCIR_IOBASEL_1:
5243 		case PCIR_MEMBASE_1:
5244 		case PCIR_PMBASEL_1:
5245 			return (bus_generic_release_resource(dev, child, type,
5246 			    rid, r));
5247 		}
5248 	}
5249 #endif
5250 
5251 	rl = &dinfo->resources;
5252 	return (resource_list_release(rl, dev, child, type, rid, r));
5253 }
5254 
5255 int
5256 pci_activate_resource(device_t dev, device_t child, int type, int rid,
5257     struct resource *r)
5258 {
5259 	struct pci_devinfo *dinfo;
5260 	int error;
5261 
5262 	error = bus_generic_activate_resource(dev, child, type, rid, r);
5263 	if (error)
5264 		return (error);
5265 
5266 	/* Enable decoding in the command register when activating BARs. */
5267 	if (device_get_parent(child) == dev) {
5268 		/* Device ROMs need their decoding explicitly enabled. */
5269 		dinfo = device_get_ivars(child);
5270 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
5271 			pci_write_bar(child, pci_find_bar(child, rid),
5272 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
5273 		switch (type) {
5274 		case SYS_RES_IOPORT:
5275 		case SYS_RES_MEMORY:
5276 			error = PCI_ENABLE_IO(dev, child, type);
5277 			break;
5278 		}
5279 	}
5280 	return (error);
5281 }
5282 
5283 int
5284 pci_deactivate_resource(device_t dev, device_t child, int type,
5285     int rid, struct resource *r)
5286 {
5287 	struct pci_devinfo *dinfo;
5288 	int error;
5289 
5290 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
5291 	if (error)
5292 		return (error);
5293 
5294 	/* Disable decoding for device ROMs. */
5295 	if (device_get_parent(child) == dev) {
5296 		dinfo = device_get_ivars(child);
5297 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
5298 			pci_write_bar(child, pci_find_bar(child, rid),
5299 			    rman_get_start(r));
5300 	}
5301 	return (0);
5302 }
5303 
5304 void
5305 pci_child_deleted(device_t dev, device_t child)
5306 {
5307 	struct resource_list_entry *rle;
5308 	struct resource_list *rl;
5309 	struct pci_devinfo *dinfo;
5310 
5311 	dinfo = device_get_ivars(child);
5312 	rl = &dinfo->resources;
5313 
5314 	/* Turn off access to resources we're about to free */
5315 	if (bus_child_present(child) != 0) {
5316 		pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
5317 		    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
5318 
5319 		pci_disable_busmaster(child);
5320 	}
5321 
5322 	/* Free all allocated resources */
5323 	STAILQ_FOREACH(rle, rl, link) {
5324 		if (rle->res) {
5325 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
5326 			    resource_list_busy(rl, rle->type, rle->rid)) {
5327 				pci_printf(&dinfo->cfg,
5328 				    "Resource still owned, oops. "
5329 				    "(type=%d, rid=%d, addr=%lx)\n",
5330 				    rle->type, rle->rid,
5331 				    rman_get_start(rle->res));
5332 				bus_release_resource(child, rle->type, rle->rid,
5333 				    rle->res);
5334 			}
5335 			resource_list_unreserve(rl, dev, child, rle->type,
5336 			    rle->rid);
5337 		}
5338 	}
5339 	resource_list_free(rl);
5340 
5341 	pci_freecfg(dinfo);
5342 }
5343 
5344 void
5345 pci_delete_resource(device_t dev, device_t child, int type, int rid)
5346 {
5347 	struct pci_devinfo *dinfo;
5348 	struct resource_list *rl;
5349 	struct resource_list_entry *rle;
5350 
5351 	if (device_get_parent(child) != dev)
5352 		return;
5353 
5354 	dinfo = device_get_ivars(child);
5355 	rl = &dinfo->resources;
5356 	rle = resource_list_find(rl, type, rid);
5357 	if (rle == NULL)
5358 		return;
5359 
5360 	if (rle->res) {
5361 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
5362 		    resource_list_busy(rl, type, rid)) {
5363 			device_printf(dev, "delete_resource: "
5364 			    "Resource still owned by child, oops. "
5365 			    "(type=%d, rid=%d, addr=%jx)\n",
5366 			    type, rid, rman_get_start(rle->res));
5367 			return;
5368 		}
5369 		resource_list_unreserve(rl, dev, child, type, rid);
5370 	}
5371 	resource_list_delete(rl, type, rid);
5372 }
5373 
5374 struct resource_list *
5375 pci_get_resource_list (device_t dev, device_t child)
5376 {
5377 	struct pci_devinfo *dinfo = device_get_ivars(child);
5378 
5379 	return (&dinfo->resources);
5380 }
5381 
5382 bus_dma_tag_t
5383 pci_get_dma_tag(device_t bus, device_t dev)
5384 {
5385 	struct pci_softc *sc = device_get_softc(bus);
5386 
5387 	return (sc->sc_dma_tag);
5388 }
5389 
5390 uint32_t
5391 pci_read_config_method(device_t dev, device_t child, int reg, int width)
5392 {
5393 	struct pci_devinfo *dinfo = device_get_ivars(child);
5394 	pcicfgregs *cfg = &dinfo->cfg;
5395 
5396 #ifdef PCI_IOV
5397 	/*
5398 	 * SR-IOV VFs don't implement the VID or DID registers, so we have to
5399 	 * emulate them here.
5400 	 */
5401 	if (cfg->flags & PCICFG_VF) {
5402 		if (reg == PCIR_VENDOR) {
5403 			switch (width) {
5404 			case 4:
5405 				return (cfg->device << 16 | cfg->vendor);
5406 			case 2:
5407 				return (cfg->vendor);
5408 			case 1:
5409 				return (cfg->vendor & 0xff);
5410 			default:
5411 				return (0xffffffff);
5412 			}
5413 		} else if (reg == PCIR_DEVICE) {
5414 			switch (width) {
5415 			/* Note that an unaligned 4-byte read is an error. */
5416 			case 2:
5417 				return (cfg->device);
5418 			case 1:
5419 				return (cfg->device & 0xff);
5420 			default:
5421 				return (0xffffffff);
5422 			}
5423 		}
5424 	}
5425 #endif
5426 
5427 	return (PCIB_READ_CONFIG(device_get_parent(dev),
5428 	    cfg->bus, cfg->slot, cfg->func, reg, width));
5429 }
5430 
5431 void
5432 pci_write_config_method(device_t dev, device_t child, int reg,
5433     uint32_t val, int width)
5434 {
5435 	struct pci_devinfo *dinfo = device_get_ivars(child);
5436 	pcicfgregs *cfg = &dinfo->cfg;
5437 
5438 	PCIB_WRITE_CONFIG(device_get_parent(dev),
5439 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
5440 }
5441 
5442 int
5443 pci_child_location_str_method(device_t dev, device_t child, char *buf,
5444     size_t buflen)
5445 {
5446 
5447 	snprintf(buf, buflen, "slot=%d function=%d dbsf=pci%d:%d:%d:%d",
5448 	    pci_get_slot(child), pci_get_function(child), pci_get_domain(child),
5449 	    pci_get_bus(child), pci_get_slot(child), pci_get_function(child));
5450 	return (0);
5451 }
5452 
5453 int
5454 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
5455     size_t buflen)
5456 {
5457 	struct pci_devinfo *dinfo;
5458 	pcicfgregs *cfg;
5459 
5460 	dinfo = device_get_ivars(child);
5461 	cfg = &dinfo->cfg;
5462 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
5463 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
5464 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
5465 	    cfg->progif);
5466 	return (0);
5467 }
5468 
5469 int
5470 pci_assign_interrupt_method(device_t dev, device_t child)
5471 {
5472 	struct pci_devinfo *dinfo = device_get_ivars(child);
5473 	pcicfgregs *cfg = &dinfo->cfg;
5474 
5475 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
5476 	    cfg->intpin));
5477 }
5478 
5479 static void
5480 pci_lookup(void *arg, const char *name, device_t *dev)
5481 {
5482 	long val;
5483 	char *end;
5484 	int domain, bus, slot, func;
5485 
5486 	if (*dev != NULL)
5487 		return;
5488 
5489 	/*
5490 	 * Accept pciconf-style selectors of either pciD:B:S:F or
5491 	 * pciB:S:F.  In the latter case, the domain is assumed to
5492 	 * be zero.
5493 	 */
5494 	if (strncmp(name, "pci", 3) != 0)
5495 		return;
5496 	val = strtol(name + 3, &end, 10);
5497 	if (val < 0 || val > INT_MAX || *end != ':')
5498 		return;
5499 	domain = val;
5500 	val = strtol(end + 1, &end, 10);
5501 	if (val < 0 || val > INT_MAX || *end != ':')
5502 		return;
5503 	bus = val;
5504 	val = strtol(end + 1, &end, 10);
5505 	if (val < 0 || val > INT_MAX)
5506 		return;
5507 	slot = val;
5508 	if (*end == ':') {
5509 		val = strtol(end + 1, &end, 10);
5510 		if (val < 0 || val > INT_MAX || *end != '\0')
5511 			return;
5512 		func = val;
5513 	} else if (*end == '\0') {
5514 		func = slot;
5515 		slot = bus;
5516 		bus = domain;
5517 		domain = 0;
5518 	} else
5519 		return;
5520 
5521 	if (domain > PCI_DOMAINMAX || bus > PCI_BUSMAX || slot > PCI_SLOTMAX ||
5522 	    func > PCIE_ARI_FUNCMAX || (slot != 0 && func > PCI_FUNCMAX))
5523 		return;
5524 
5525 	*dev = pci_find_dbsf(domain, bus, slot, func);
5526 }
5527 
5528 static int
5529 pci_modevent(module_t mod, int what, void *arg)
5530 {
5531 	static struct cdev *pci_cdev;
5532 	static eventhandler_tag tag;
5533 
5534 	switch (what) {
5535 	case MOD_LOAD:
5536 		STAILQ_INIT(&pci_devq);
5537 		pci_generation = 0;
5538 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
5539 		    "pci");
5540 		pci_load_vendor_data();
5541 		tag = EVENTHANDLER_REGISTER(dev_lookup, pci_lookup, NULL,
5542 		    1000);
5543 		break;
5544 
5545 	case MOD_UNLOAD:
5546 		if (tag != NULL)
5547 			EVENTHANDLER_DEREGISTER(dev_lookup, tag);
5548 		destroy_dev(pci_cdev);
5549 		break;
5550 	}
5551 
5552 	return (0);
5553 }
5554 
5555 static void
5556 pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
5557 {
5558 #define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
5559 	struct pcicfg_pcie *cfg;
5560 	int version, pos;
5561 
5562 	cfg = &dinfo->cfg.pcie;
5563 	pos = cfg->pcie_location;
5564 
5565 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5566 
5567 	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
5568 
5569 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5570 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5571 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5572 		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
5573 
5574 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5575 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5576 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5577 		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
5578 
5579 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5580 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5581 		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
5582 
5583 	if (version > 1) {
5584 		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
5585 		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
5586 		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
5587 	}
5588 #undef WREG
5589 }
5590 
5591 static void
5592 pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
5593 {
5594 	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
5595 	    dinfo->cfg.pcix.pcix_command,  2);
5596 }
5597 
5598 void
5599 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
5600 {
5601 
5602 	/*
5603 	 * Restore the device to full power mode.  We must do this
5604 	 * before we restore the registers because moving from D3 to
5605 	 * D0 will cause the chip's BARs and some other registers to
5606 	 * be reset to some unknown power on reset values.  Cut down
5607 	 * the noise on boot by doing nothing if we are already in
5608 	 * state D0.
5609 	 */
5610 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
5611 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5612 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
5613 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
5614 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
5615 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
5616 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
5617 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
5618 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
5619 	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5620 	case PCIM_HDRTYPE_NORMAL:
5621 		pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
5622 		pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
5623 		break;
5624 	case PCIM_HDRTYPE_BRIDGE:
5625 		pci_write_config(dev, PCIR_SECLAT_1,
5626 		    dinfo->cfg.bridge.br_seclat, 1);
5627 		pci_write_config(dev, PCIR_SUBBUS_1,
5628 		    dinfo->cfg.bridge.br_subbus, 1);
5629 		pci_write_config(dev, PCIR_SECBUS_1,
5630 		    dinfo->cfg.bridge.br_secbus, 1);
5631 		pci_write_config(dev, PCIR_PRIBUS_1,
5632 		    dinfo->cfg.bridge.br_pribus, 1);
5633 		pci_write_config(dev, PCIR_BRIDGECTL_1,
5634 		    dinfo->cfg.bridge.br_control, 2);
5635 		break;
5636 	case PCIM_HDRTYPE_CARDBUS:
5637 		pci_write_config(dev, PCIR_SECLAT_2,
5638 		    dinfo->cfg.bridge.br_seclat, 1);
5639 		pci_write_config(dev, PCIR_SUBBUS_2,
5640 		    dinfo->cfg.bridge.br_subbus, 1);
5641 		pci_write_config(dev, PCIR_SECBUS_2,
5642 		    dinfo->cfg.bridge.br_secbus, 1);
5643 		pci_write_config(dev, PCIR_PRIBUS_2,
5644 		    dinfo->cfg.bridge.br_pribus, 1);
5645 		pci_write_config(dev, PCIR_BRIDGECTL_2,
5646 		    dinfo->cfg.bridge.br_control, 2);
5647 		break;
5648 	}
5649 	pci_restore_bars(dev);
5650 
5651 	/*
5652 	 * Restore extended capabilities for PCI-Express and PCI-X
5653 	 */
5654 	if (dinfo->cfg.pcie.pcie_location != 0)
5655 		pci_cfg_restore_pcie(dev, dinfo);
5656 	if (dinfo->cfg.pcix.pcix_location != 0)
5657 		pci_cfg_restore_pcix(dev, dinfo);
5658 
5659 	/* Restore MSI and MSI-X configurations if they are present. */
5660 	if (dinfo->cfg.msi.msi_location != 0)
5661 		pci_resume_msi(dev);
5662 	if (dinfo->cfg.msix.msix_location != 0)
5663 		pci_resume_msix(dev);
5664 
5665 #ifdef PCI_IOV
5666 	if (dinfo->cfg.iov != NULL)
5667 		pci_iov_cfg_restore(dev, dinfo);
5668 #endif
5669 }
5670 
5671 static void
5672 pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
5673 {
5674 #define	RREG(n)	pci_read_config(dev, pos + (n), 2)
5675 	struct pcicfg_pcie *cfg;
5676 	int version, pos;
5677 
5678 	cfg = &dinfo->cfg.pcie;
5679 	pos = cfg->pcie_location;
5680 
5681 	cfg->pcie_flags = RREG(PCIER_FLAGS);
5682 
5683 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5684 
5685 	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
5686 
5687 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5688 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5689 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5690 		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
5691 
5692 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5693 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5694 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5695 		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
5696 
5697 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5698 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5699 		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
5700 
5701 	if (version > 1) {
5702 		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
5703 		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
5704 		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
5705 	}
5706 #undef RREG
5707 }
5708 
5709 static void
5710 pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
5711 {
5712 	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
5713 	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
5714 }
5715 
5716 void
5717 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
5718 {
5719 	uint32_t cls;
5720 	int ps;
5721 
5722 	/*
5723 	 * Some drivers apparently write to these registers w/o updating our
5724 	 * cached copy.  No harm happens if we update the copy, so do so here
5725 	 * so we can restore them.  The COMMAND register is modified by the
5726 	 * bus w/o updating the cache.  This should represent the normally
5727 	 * writable portion of the 'defined' part of type 0/1/2 headers.
5728 	 */
5729 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
5730 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
5731 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
5732 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
5733 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
5734 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
5735 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
5736 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
5737 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
5738 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
5739 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
5740 	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5741 	case PCIM_HDRTYPE_NORMAL:
5742 		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
5743 		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
5744 		dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
5745 		dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
5746 		break;
5747 	case PCIM_HDRTYPE_BRIDGE:
5748 		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
5749 		    PCIR_SECLAT_1, 1);
5750 		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
5751 		    PCIR_SUBBUS_1, 1);
5752 		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
5753 		    PCIR_SECBUS_1, 1);
5754 		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
5755 		    PCIR_PRIBUS_1, 1);
5756 		dinfo->cfg.bridge.br_control = pci_read_config(dev,
5757 		    PCIR_BRIDGECTL_1, 2);
5758 		break;
5759 	case PCIM_HDRTYPE_CARDBUS:
5760 		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
5761 		    PCIR_SECLAT_2, 1);
5762 		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
5763 		    PCIR_SUBBUS_2, 1);
5764 		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
5765 		    PCIR_SECBUS_2, 1);
5766 		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
5767 		    PCIR_PRIBUS_2, 1);
5768 		dinfo->cfg.bridge.br_control = pci_read_config(dev,
5769 		    PCIR_BRIDGECTL_2, 2);
5770 		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_2, 2);
5771 		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_2, 2);
5772 		break;
5773 	}
5774 
5775 	if (dinfo->cfg.pcie.pcie_location != 0)
5776 		pci_cfg_save_pcie(dev, dinfo);
5777 
5778 	if (dinfo->cfg.pcix.pcix_location != 0)
5779 		pci_cfg_save_pcix(dev, dinfo);
5780 
5781 #ifdef PCI_IOV
5782 	if (dinfo->cfg.iov != NULL)
5783 		pci_iov_cfg_save(dev, dinfo);
5784 #endif
5785 
5786 	/*
5787 	 * don't set the state for display devices, base peripherals and
5788 	 * memory devices since bad things happen when they are powered down.
5789 	 * We should (a) have drivers that can easily detach and (b) use
5790 	 * generic drivers for these devices so that some device actually
5791 	 * attaches.  We need to make sure that when we implement (a) we don't
5792 	 * power the device down on a reattach.
5793 	 */
5794 	cls = pci_get_class(dev);
5795 	if (!setstate)
5796 		return;
5797 	switch (pci_do_power_nodriver)
5798 	{
5799 		case 0:		/* NO powerdown at all */
5800 			return;
5801 		case 1:		/* Conservative about what to power down */
5802 			if (cls == PCIC_STORAGE)
5803 				return;
5804 			/*FALLTHROUGH*/
5805 		case 2:		/* Aggressive about what to power down */
5806 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
5807 			    cls == PCIC_BASEPERIPH)
5808 				return;
5809 			/*FALLTHROUGH*/
5810 		case 3:		/* Power down everything */
5811 			break;
5812 	}
5813 	/*
5814 	 * PCI spec says we can only go into D3 state from D0 state.
5815 	 * Transition from D[12] into D0 before going to D3 state.
5816 	 */
5817 	ps = pci_get_powerstate(dev);
5818 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
5819 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5820 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
5821 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
5822 }
5823 
5824 /* Wrapper APIs suitable for device driver use. */
5825 void
5826 pci_save_state(device_t dev)
5827 {
5828 	struct pci_devinfo *dinfo;
5829 
5830 	dinfo = device_get_ivars(dev);
5831 	pci_cfg_save(dev, dinfo, 0);
5832 }
5833 
5834 void
5835 pci_restore_state(device_t dev)
5836 {
5837 	struct pci_devinfo *dinfo;
5838 
5839 	dinfo = device_get_ivars(dev);
5840 	pci_cfg_restore(dev, dinfo);
5841 }
5842 
5843 static int
5844 pci_get_id_method(device_t dev, device_t child, enum pci_id_type type,
5845     uintptr_t *id)
5846 {
5847 
5848 	return (PCIB_GET_ID(device_get_parent(dev), child, type, id));
5849 }
5850 
5851 /* Find the upstream port of a given PCI device in a root complex. */
5852 device_t
5853 pci_find_pcie_root_port(device_t dev)
5854 {
5855 	struct pci_devinfo *dinfo;
5856 	devclass_t pci_class;
5857 	device_t pcib, bus;
5858 
5859 	pci_class = devclass_find("pci");
5860 	KASSERT(device_get_devclass(device_get_parent(dev)) == pci_class,
5861 	    ("%s: non-pci device %s", __func__, device_get_nameunit(dev)));
5862 
5863 	/*
5864 	 * Walk the bridge hierarchy until we find a PCI-e root
5865 	 * port or a non-PCI device.
5866 	 */
5867 	for (;;) {
5868 		bus = device_get_parent(dev);
5869 		KASSERT(bus != NULL, ("%s: null parent of %s", __func__,
5870 		    device_get_nameunit(dev)));
5871 
5872 		pcib = device_get_parent(bus);
5873 		KASSERT(pcib != NULL, ("%s: null bridge of %s", __func__,
5874 		    device_get_nameunit(bus)));
5875 
5876 		/*
5877 		 * pcib's parent must be a PCI bus for this to be a
5878 		 * PCI-PCI bridge.
5879 		 */
5880 		if (device_get_devclass(device_get_parent(pcib)) != pci_class)
5881 			return (NULL);
5882 
5883 		dinfo = device_get_ivars(pcib);
5884 		if (dinfo->cfg.pcie.pcie_location != 0 &&
5885 		    dinfo->cfg.pcie.pcie_type == PCIEM_TYPE_ROOT_PORT)
5886 			return (pcib);
5887 
5888 		dev = pcib;
5889 	}
5890 }
5891