xref: /freebsd/sys/dev/pci/pci.c (revision a4dc509f723944821bcfcc52005ff87c9a5dee5b)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/limits.h>
39 #include <sys/linker.h>
40 #include <sys/fcntl.h>
41 #include <sys/conf.h>
42 #include <sys/kernel.h>
43 #include <sys/queue.h>
44 #include <sys/sysctl.h>
45 #include <sys/endian.h>
46 
47 #include <vm/vm.h>
48 #include <vm/pmap.h>
49 #include <vm/vm_extern.h>
50 
51 #include <sys/bus.h>
52 #include <machine/bus.h>
53 #include <sys/rman.h>
54 #include <machine/resource.h>
55 #include <machine/stdarg.h>
56 
57 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
58 #include <machine/intr_machdep.h>
59 #endif
60 
61 #include <sys/pciio.h>
62 #include <dev/pci/pcireg.h>
63 #include <dev/pci/pcivar.h>
64 #include <dev/pci/pci_private.h>
65 
66 #include <dev/usb/controller/xhcireg.h>
67 #include <dev/usb/controller/ehcireg.h>
68 #include <dev/usb/controller/ohcireg.h>
69 #include <dev/usb/controller/uhcireg.h>
70 
71 #include "pcib_if.h"
72 #include "pci_if.h"
73 
74 #define	PCIR_IS_BIOS(cfg, reg)						\
75 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
76 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
77 
78 static int		pci_has_quirk(uint32_t devid, int quirk);
79 static pci_addr_t	pci_mapbase(uint64_t mapreg);
80 static const char	*pci_maptype(uint64_t mapreg);
81 static int		pci_maprange(uint64_t mapreg);
82 static pci_addr_t	pci_rombase(uint64_t mapreg);
83 static int		pci_romsize(uint64_t testval);
84 static void		pci_fixancient(pcicfgregs *cfg);
85 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
86 
87 static int		pci_porten(device_t dev);
88 static int		pci_memen(device_t dev);
89 static void		pci_assign_interrupt(device_t bus, device_t dev,
90 			    int force_route);
91 static int		pci_add_map(device_t bus, device_t dev, int reg,
92 			    struct resource_list *rl, int force, int prefetch);
93 static int		pci_probe(device_t dev);
94 static int		pci_attach(device_t dev);
95 #ifdef PCI_RES_BUS
96 static int		pci_detach(device_t dev);
97 #endif
98 static void		pci_load_vendor_data(void);
99 static int		pci_describe_parse_line(char **ptr, int *vendor,
100 			    int *device, char **desc);
101 static char		*pci_describe_device(device_t dev);
102 static int		pci_modevent(module_t mod, int what, void *arg);
103 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
104 			    pcicfgregs *cfg);
105 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
106 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
107 			    int reg, uint32_t *data);
108 #if 0
109 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
110 			    int reg, uint32_t data);
111 #endif
112 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
113 static void		pci_mask_msix(device_t dev, u_int index);
114 static void		pci_unmask_msix(device_t dev, u_int index);
115 static int		pci_msi_blacklisted(void);
116 static int		pci_msix_blacklisted(void);
117 static void		pci_resume_msi(device_t dev);
118 static void		pci_resume_msix(device_t dev);
119 static int		pci_remap_intr_method(device_t bus, device_t dev,
120 			    u_int irq);
121 
122 static uint16_t		pci_get_rid_method(device_t dev, device_t child);
123 
124 static struct pci_devinfo * pci_fill_devinfo(device_t pcib, int d, int b, int s,
125     int f, uint16_t vid, uint16_t did, size_t size);
126 
127 static device_method_t pci_methods[] = {
128 	/* Device interface */
129 	DEVMETHOD(device_probe,		pci_probe),
130 	DEVMETHOD(device_attach,	pci_attach),
131 #ifdef PCI_RES_BUS
132 	DEVMETHOD(device_detach,	pci_detach),
133 #else
134 	DEVMETHOD(device_detach,	bus_generic_detach),
135 #endif
136 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
137 	DEVMETHOD(device_suspend,	bus_generic_suspend),
138 	DEVMETHOD(device_resume,	pci_resume),
139 
140 	/* Bus interface */
141 	DEVMETHOD(bus_print_child,	pci_print_child),
142 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
143 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
144 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
145 	DEVMETHOD(bus_driver_added,	pci_driver_added),
146 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
147 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
148 
149 	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
150 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
151 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
152 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
153 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
154 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
155 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
156 	DEVMETHOD(bus_release_resource,	pci_release_resource),
157 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
158 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
159 	DEVMETHOD(bus_child_detached,	pci_child_detached),
160 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
161 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
162 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
163 	DEVMETHOD(bus_suspend_child,	pci_suspend_child),
164 	DEVMETHOD(bus_resume_child,	pci_resume_child),
165 
166 	/* PCI interface */
167 	DEVMETHOD(pci_read_config,	pci_read_config_method),
168 	DEVMETHOD(pci_write_config,	pci_write_config_method),
169 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
170 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
171 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
172 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
173 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
174 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
175 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
176 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
177 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
178 	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
179 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
180 	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
181 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
182 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
183 	DEVMETHOD(pci_enable_msi,	pci_enable_msi_method),
184 	DEVMETHOD(pci_enable_msix,	pci_enable_msix_method),
185 	DEVMETHOD(pci_disable_msi,	pci_disable_msi_method),
186 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
187 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
188 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
189 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
190 	DEVMETHOD(pci_get_rid,		pci_get_rid_method),
191 	DEVMETHOD(pci_child_added,	pci_child_added_method),
192 #ifdef PCI_IOV
193 	DEVMETHOD(pci_iov_attach,	pci_iov_attach_method),
194 	DEVMETHOD(pci_iov_detach,	pci_iov_detach_method),
195 	DEVMETHOD(pci_create_iov_child,	pci_create_iov_child_method),
196 #endif
197 
198 	DEVMETHOD_END
199 };
200 
201 DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
202 
203 static devclass_t pci_devclass;
204 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
205 MODULE_VERSION(pci, 1);
206 
207 static char	*pci_vendordata;
208 static size_t	pci_vendordata_size;
209 
210 struct pci_quirk {
211 	uint32_t devid;	/* Vendor/device of the card */
212 	int	type;
213 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
214 #define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
215 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
216 #define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
217 #define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
218 #define	PCI_QUIRK_MSI_INTX_BUG	6 /* PCIM_CMD_INTxDIS disables MSI */
219 	int	arg1;
220 	int	arg2;
221 };
222 
223 static const struct pci_quirk pci_quirks[] = {
224 	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
225 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
226 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
227 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
228 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
229 
230 	/*
231 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
232 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
233 	 */
234 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
235 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
236 
237 	/*
238 	 * MSI doesn't work on earlier Intel chipsets including
239 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
240 	 */
241 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
242 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
243 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
244 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
245 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
246 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
247 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
248 
249 	/*
250 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
251 	 * bridge.
252 	 */
253 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
254 
255 	/*
256 	 * MSI-X allocation doesn't work properly for devices passed through
257 	 * by VMware up to at least ESXi 5.1.
258 	 */
259 	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
260 	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
261 
262 	/*
263 	 * Some virtualization environments emulate an older chipset
264 	 * but support MSI just fine.  QEMU uses the Intel 82440.
265 	 */
266 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
267 
268 	/*
269 	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
270 	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
271 	 * It prevents us from attaching hpet(4) when the bit is unset.
272 	 * Note this quirk only affects SB600 revision A13 and earlier.
273 	 * For SB600 A21 and later, firmware must set the bit to hide it.
274 	 * For SB700 and later, it is unused and hardcoded to zero.
275 	 */
276 	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
277 
278 	/*
279 	 * Atheros AR8161/AR8162/E2200 Ethernet controllers have a bug that
280 	 * MSI interrupt does not assert if PCIM_CMD_INTxDIS bit of the
281 	 * command register is set.
282 	 */
283 	{ 0x10911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
284 	{ 0xE0911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
285 	{ 0x10901969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
286 
287 	/*
288 	 * Broadcom BCM5714(S)/BCM5715(S)/BCM5780(S) Ethernet MACs don't
289 	 * issue MSI interrupts with PCIM_CMD_INTxDIS set either.
290 	 */
291 	{ 0x166814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714 */
292 	{ 0x166914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714S */
293 	{ 0x166a14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780 */
294 	{ 0x166b14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780S */
295 	{ 0x167814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715 */
296 	{ 0x167914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715S */
297 
298 	{ 0 }
299 };
300 
301 /* map register information */
302 #define	PCI_MAPMEM	0x01	/* memory map */
303 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
304 #define	PCI_MAPPORT	0x04	/* port map */
305 
306 struct devlist pci_devq;
307 uint32_t pci_generation;
308 uint32_t pci_numdevs = 0;
309 static int pcie_chipset, pcix_chipset;
310 
311 /* sysctl vars */
312 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
313 
314 static int pci_enable_io_modes = 1;
315 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RWTUN,
316     &pci_enable_io_modes, 1,
317     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
318 enable these bits correctly.  We'd like to do this all the time, but there\n\
319 are some peripherals that this causes problems with.");
320 
321 static int pci_do_realloc_bars = 0;
322 SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RWTUN,
323     &pci_do_realloc_bars, 0,
324     "Attempt to allocate a new range for any BARs whose original "
325     "firmware-assigned ranges fail to allocate during the initial device scan.");
326 
327 static int pci_do_power_nodriver = 0;
328 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RWTUN,
329     &pci_do_power_nodriver, 0,
330   "Place a function into D3 state when no driver attaches to it.  0 means\n\
331 disable.  1 means conservatively place devices into D3 state.  2 means\n\
332 agressively place devices into D3 state.  3 means put absolutely everything\n\
333 in D3 state.");
334 
335 int pci_do_power_resume = 1;
336 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RWTUN,
337     &pci_do_power_resume, 1,
338   "Transition from D3 -> D0 on resume.");
339 
340 int pci_do_power_suspend = 1;
341 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RWTUN,
342     &pci_do_power_suspend, 1,
343   "Transition from D0 -> D3 on suspend.");
344 
345 static int pci_do_msi = 1;
346 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RWTUN, &pci_do_msi, 1,
347     "Enable support for MSI interrupts");
348 
349 static int pci_do_msix = 1;
350 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RWTUN, &pci_do_msix, 1,
351     "Enable support for MSI-X interrupts");
352 
353 static int pci_honor_msi_blacklist = 1;
354 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RDTUN,
355     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
356 
357 #if defined(__i386__) || defined(__amd64__)
358 static int pci_usb_takeover = 1;
359 #else
360 static int pci_usb_takeover = 0;
361 #endif
362 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
363     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
364 Disable this if you depend on BIOS emulation of USB devices, that is\n\
365 you use USB devices (like keyboard or mouse) but do not load USB drivers");
366 
367 static int pci_clear_bars;
368 SYSCTL_INT(_hw_pci, OID_AUTO, clear_bars, CTLFLAG_RDTUN, &pci_clear_bars, 0,
369     "Ignore firmware-assigned resources for BARs.");
370 
371 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
372 static int pci_clear_buses;
373 SYSCTL_INT(_hw_pci, OID_AUTO, clear_buses, CTLFLAG_RDTUN, &pci_clear_buses, 0,
374     "Ignore firmware-assigned bus numbers.");
375 #endif
376 
377 static int pci_enable_ari = 1;
378 SYSCTL_INT(_hw_pci, OID_AUTO, enable_ari, CTLFLAG_RDTUN, &pci_enable_ari,
379     0, "Enable support for PCIe Alternative RID Interpretation");
380 
381 static int
382 pci_has_quirk(uint32_t devid, int quirk)
383 {
384 	const struct pci_quirk *q;
385 
386 	for (q = &pci_quirks[0]; q->devid; q++) {
387 		if (q->devid == devid && q->type == quirk)
388 			return (1);
389 	}
390 	return (0);
391 }
392 
393 /* Find a device_t by bus/slot/function in domain 0 */
394 
395 device_t
396 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
397 {
398 
399 	return (pci_find_dbsf(0, bus, slot, func));
400 }
401 
402 /* Find a device_t by domain/bus/slot/function */
403 
404 device_t
405 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
406 {
407 	struct pci_devinfo *dinfo;
408 
409 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
410 		if ((dinfo->cfg.domain == domain) &&
411 		    (dinfo->cfg.bus == bus) &&
412 		    (dinfo->cfg.slot == slot) &&
413 		    (dinfo->cfg.func == func)) {
414 			return (dinfo->cfg.dev);
415 		}
416 	}
417 
418 	return (NULL);
419 }
420 
421 /* Find a device_t by vendor/device ID */
422 
423 device_t
424 pci_find_device(uint16_t vendor, uint16_t device)
425 {
426 	struct pci_devinfo *dinfo;
427 
428 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
429 		if ((dinfo->cfg.vendor == vendor) &&
430 		    (dinfo->cfg.device == device)) {
431 			return (dinfo->cfg.dev);
432 		}
433 	}
434 
435 	return (NULL);
436 }
437 
438 device_t
439 pci_find_class(uint8_t class, uint8_t subclass)
440 {
441 	struct pci_devinfo *dinfo;
442 
443 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
444 		if (dinfo->cfg.baseclass == class &&
445 		    dinfo->cfg.subclass == subclass) {
446 			return (dinfo->cfg.dev);
447 		}
448 	}
449 
450 	return (NULL);
451 }
452 
453 static int
454 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
455 {
456 	va_list ap;
457 	int retval;
458 
459 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
460 	    cfg->func);
461 	va_start(ap, fmt);
462 	retval += vprintf(fmt, ap);
463 	va_end(ap);
464 	return (retval);
465 }
466 
467 /* return base address of memory or port map */
468 
469 static pci_addr_t
470 pci_mapbase(uint64_t mapreg)
471 {
472 
473 	if (PCI_BAR_MEM(mapreg))
474 		return (mapreg & PCIM_BAR_MEM_BASE);
475 	else
476 		return (mapreg & PCIM_BAR_IO_BASE);
477 }
478 
479 /* return map type of memory or port map */
480 
481 static const char *
482 pci_maptype(uint64_t mapreg)
483 {
484 
485 	if (PCI_BAR_IO(mapreg))
486 		return ("I/O Port");
487 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
488 		return ("Prefetchable Memory");
489 	return ("Memory");
490 }
491 
492 /* return log2 of map size decoded for memory or port map */
493 
494 int
495 pci_mapsize(uint64_t testval)
496 {
497 	int ln2size;
498 
499 	testval = pci_mapbase(testval);
500 	ln2size = 0;
501 	if (testval != 0) {
502 		while ((testval & 1) == 0)
503 		{
504 			ln2size++;
505 			testval >>= 1;
506 		}
507 	}
508 	return (ln2size);
509 }
510 
511 /* return base address of device ROM */
512 
513 static pci_addr_t
514 pci_rombase(uint64_t mapreg)
515 {
516 
517 	return (mapreg & PCIM_BIOS_ADDR_MASK);
518 }
519 
520 /* return log2 of map size decided for device ROM */
521 
522 static int
523 pci_romsize(uint64_t testval)
524 {
525 	int ln2size;
526 
527 	testval = pci_rombase(testval);
528 	ln2size = 0;
529 	if (testval != 0) {
530 		while ((testval & 1) == 0)
531 		{
532 			ln2size++;
533 			testval >>= 1;
534 		}
535 	}
536 	return (ln2size);
537 }
538 
539 /* return log2 of address range supported by map register */
540 
541 static int
542 pci_maprange(uint64_t mapreg)
543 {
544 	int ln2range = 0;
545 
546 	if (PCI_BAR_IO(mapreg))
547 		ln2range = 32;
548 	else
549 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
550 		case PCIM_BAR_MEM_32:
551 			ln2range = 32;
552 			break;
553 		case PCIM_BAR_MEM_1MB:
554 			ln2range = 20;
555 			break;
556 		case PCIM_BAR_MEM_64:
557 			ln2range = 64;
558 			break;
559 		}
560 	return (ln2range);
561 }
562 
563 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
564 
565 static void
566 pci_fixancient(pcicfgregs *cfg)
567 {
568 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
569 		return;
570 
571 	/* PCI to PCI bridges use header type 1 */
572 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
573 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
574 }
575 
576 /* extract header type specific config data */
577 
578 static void
579 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
580 {
581 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
582 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
583 	case PCIM_HDRTYPE_NORMAL:
584 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
585 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
586 		cfg->mingnt         = REG(PCIR_MINGNT, 1);
587 		cfg->maxlat         = REG(PCIR_MAXLAT, 1);
588 		cfg->nummaps	    = PCI_MAXMAPS_0;
589 		break;
590 	case PCIM_HDRTYPE_BRIDGE:
591 		cfg->bridge.br_seclat = REG(PCIR_SECLAT_1, 1);
592 		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_1, 1);
593 		cfg->bridge.br_secbus = REG(PCIR_SECBUS_1, 1);
594 		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_1, 1);
595 		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_1, 2);
596 		cfg->nummaps	    = PCI_MAXMAPS_1;
597 		break;
598 	case PCIM_HDRTYPE_CARDBUS:
599 		cfg->bridge.br_seclat = REG(PCIR_SECLAT_2, 1);
600 		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_2, 1);
601 		cfg->bridge.br_secbus = REG(PCIR_SECBUS_2, 1);
602 		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_2, 1);
603 		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_2, 2);
604 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
605 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
606 		cfg->nummaps	    = PCI_MAXMAPS_2;
607 		break;
608 	}
609 #undef REG
610 }
611 
612 /* read configuration header into pcicfgregs structure */
613 struct pci_devinfo *
614 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
615 {
616 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
617 	uint16_t vid, did;
618 
619 	vid = REG(PCIR_VENDOR, 2);
620 	did = REG(PCIR_DEVICE, 2);
621 	if (vid != 0xffff)
622 		return (pci_fill_devinfo(pcib, d, b, s, f, vid, did, size));
623 
624 	return (NULL);
625 }
626 
627 static struct pci_devinfo *
628 pci_fill_devinfo(device_t pcib, int d, int b, int s, int f, uint16_t vid,
629     uint16_t did, size_t size)
630 {
631 	struct pci_devinfo *devlist_entry;
632 	pcicfgregs *cfg;
633 
634 	devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
635 
636 	cfg = &devlist_entry->cfg;
637 
638 	cfg->domain		= d;
639 	cfg->bus		= b;
640 	cfg->slot		= s;
641 	cfg->func		= f;
642 	cfg->vendor		= vid;
643 	cfg->device		= did;
644 	cfg->cmdreg		= REG(PCIR_COMMAND, 2);
645 	cfg->statreg		= REG(PCIR_STATUS, 2);
646 	cfg->baseclass		= REG(PCIR_CLASS, 1);
647 	cfg->subclass		= REG(PCIR_SUBCLASS, 1);
648 	cfg->progif		= REG(PCIR_PROGIF, 1);
649 	cfg->revid		= REG(PCIR_REVID, 1);
650 	cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
651 	cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
652 	cfg->lattimer		= REG(PCIR_LATTIMER, 1);
653 	cfg->intpin		= REG(PCIR_INTPIN, 1);
654 	cfg->intline		= REG(PCIR_INTLINE, 1);
655 
656 	cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
657 	cfg->hdrtype		&= ~PCIM_MFDEV;
658 	STAILQ_INIT(&cfg->maps);
659 
660 	cfg->devinfo_size	= size;
661 	cfg->iov		= NULL;
662 
663 	pci_fixancient(cfg);
664 	pci_hdrtypedata(pcib, b, s, f, cfg);
665 
666 	if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
667 		pci_read_cap(pcib, cfg);
668 
669 	STAILQ_INSERT_TAIL(&pci_devq, devlist_entry, pci_links);
670 
671 	devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
672 	devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
673 	devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
674 	devlist_entry->conf.pc_sel.pc_func = cfg->func;
675 	devlist_entry->conf.pc_hdr = cfg->hdrtype;
676 
677 	devlist_entry->conf.pc_subvendor = cfg->subvendor;
678 	devlist_entry->conf.pc_subdevice = cfg->subdevice;
679 	devlist_entry->conf.pc_vendor = cfg->vendor;
680 	devlist_entry->conf.pc_device = cfg->device;
681 
682 	devlist_entry->conf.pc_class = cfg->baseclass;
683 	devlist_entry->conf.pc_subclass = cfg->subclass;
684 	devlist_entry->conf.pc_progif = cfg->progif;
685 	devlist_entry->conf.pc_revid = cfg->revid;
686 
687 	pci_numdevs++;
688 	pci_generation++;
689 
690 	return (devlist_entry);
691 }
692 #undef REG
693 
694 static void
695 pci_read_cap(device_t pcib, pcicfgregs *cfg)
696 {
697 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
698 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
699 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
700 	uint64_t addr;
701 #endif
702 	uint32_t val;
703 	int	ptr, nextptr, ptrptr;
704 
705 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
706 	case PCIM_HDRTYPE_NORMAL:
707 	case PCIM_HDRTYPE_BRIDGE:
708 		ptrptr = PCIR_CAP_PTR;
709 		break;
710 	case PCIM_HDRTYPE_CARDBUS:
711 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
712 		break;
713 	default:
714 		return;		/* no extended capabilities support */
715 	}
716 	nextptr = REG(ptrptr, 1);	/* sanity check? */
717 
718 	/*
719 	 * Read capability entries.
720 	 */
721 	while (nextptr != 0) {
722 		/* Sanity check */
723 		if (nextptr > 255) {
724 			printf("illegal PCI extended capability offset %d\n",
725 			    nextptr);
726 			return;
727 		}
728 		/* Find the next entry */
729 		ptr = nextptr;
730 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
731 
732 		/* Process this entry */
733 		switch (REG(ptr + PCICAP_ID, 1)) {
734 		case PCIY_PMG:		/* PCI power management */
735 			if (cfg->pp.pp_cap == 0) {
736 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
737 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
738 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
739 				if ((nextptr - ptr) > PCIR_POWER_DATA)
740 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
741 			}
742 			break;
743 		case PCIY_HT:		/* HyperTransport */
744 			/* Determine HT-specific capability type. */
745 			val = REG(ptr + PCIR_HT_COMMAND, 2);
746 
747 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
748 				cfg->ht.ht_slave = ptr;
749 
750 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
751 			switch (val & PCIM_HTCMD_CAP_MASK) {
752 			case PCIM_HTCAP_MSI_MAPPING:
753 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
754 					/* Sanity check the mapping window. */
755 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
756 					    4);
757 					addr <<= 32;
758 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
759 					    4);
760 					if (addr != MSI_INTEL_ADDR_BASE)
761 						device_printf(pcib,
762 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
763 						    cfg->domain, cfg->bus,
764 						    cfg->slot, cfg->func,
765 						    (long long)addr);
766 				} else
767 					addr = MSI_INTEL_ADDR_BASE;
768 
769 				cfg->ht.ht_msimap = ptr;
770 				cfg->ht.ht_msictrl = val;
771 				cfg->ht.ht_msiaddr = addr;
772 				break;
773 			}
774 #endif
775 			break;
776 		case PCIY_MSI:		/* PCI MSI */
777 			cfg->msi.msi_location = ptr;
778 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
779 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
780 						     PCIM_MSICTRL_MMC_MASK)>>1);
781 			break;
782 		case PCIY_MSIX:		/* PCI MSI-X */
783 			cfg->msix.msix_location = ptr;
784 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
785 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
786 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
787 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
788 			cfg->msix.msix_table_bar = PCIR_BAR(val &
789 			    PCIM_MSIX_BIR_MASK);
790 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
791 			val = REG(ptr + PCIR_MSIX_PBA, 4);
792 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
793 			    PCIM_MSIX_BIR_MASK);
794 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
795 			break;
796 		case PCIY_VPD:		/* PCI Vital Product Data */
797 			cfg->vpd.vpd_reg = ptr;
798 			break;
799 		case PCIY_SUBVENDOR:
800 			/* Should always be true. */
801 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
802 			    PCIM_HDRTYPE_BRIDGE) {
803 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
804 				cfg->subvendor = val & 0xffff;
805 				cfg->subdevice = val >> 16;
806 			}
807 			break;
808 		case PCIY_PCIX:		/* PCI-X */
809 			/*
810 			 * Assume we have a PCI-X chipset if we have
811 			 * at least one PCI-PCI bridge with a PCI-X
812 			 * capability.  Note that some systems with
813 			 * PCI-express or HT chipsets might match on
814 			 * this check as well.
815 			 */
816 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
817 			    PCIM_HDRTYPE_BRIDGE)
818 				pcix_chipset = 1;
819 			cfg->pcix.pcix_location = ptr;
820 			break;
821 		case PCIY_EXPRESS:	/* PCI-express */
822 			/*
823 			 * Assume we have a PCI-express chipset if we have
824 			 * at least one PCI-express device.
825 			 */
826 			pcie_chipset = 1;
827 			cfg->pcie.pcie_location = ptr;
828 			val = REG(ptr + PCIER_FLAGS, 2);
829 			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
830 			break;
831 		default:
832 			break;
833 		}
834 	}
835 
836 #if defined(__powerpc__)
837 	/*
838 	 * Enable the MSI mapping window for all HyperTransport
839 	 * slaves.  PCI-PCI bridges have their windows enabled via
840 	 * PCIB_MAP_MSI().
841 	 */
842 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
843 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
844 		device_printf(pcib,
845 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
846 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
847 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
848 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
849 		     2);
850 	}
851 #endif
852 /* REG and WREG use carry through to next functions */
853 }
854 
855 /*
856  * PCI Vital Product Data
857  */
858 
859 #define	PCI_VPD_TIMEOUT		1000000
860 
861 static int
862 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
863 {
864 	int count = PCI_VPD_TIMEOUT;
865 
866 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
867 
868 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
869 
870 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
871 		if (--count < 0)
872 			return (ENXIO);
873 		DELAY(1);	/* limit looping */
874 	}
875 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
876 
877 	return (0);
878 }
879 
880 #if 0
881 static int
882 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
883 {
884 	int count = PCI_VPD_TIMEOUT;
885 
886 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
887 
888 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
889 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
890 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
891 		if (--count < 0)
892 			return (ENXIO);
893 		DELAY(1);	/* limit looping */
894 	}
895 
896 	return (0);
897 }
898 #endif
899 
900 #undef PCI_VPD_TIMEOUT
901 
902 struct vpd_readstate {
903 	device_t	pcib;
904 	pcicfgregs	*cfg;
905 	uint32_t	val;
906 	int		bytesinval;
907 	int		off;
908 	uint8_t		cksum;
909 };
910 
911 static int
912 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
913 {
914 	uint32_t reg;
915 	uint8_t byte;
916 
917 	if (vrs->bytesinval == 0) {
918 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
919 			return (ENXIO);
920 		vrs->val = le32toh(reg);
921 		vrs->off += 4;
922 		byte = vrs->val & 0xff;
923 		vrs->bytesinval = 3;
924 	} else {
925 		vrs->val = vrs->val >> 8;
926 		byte = vrs->val & 0xff;
927 		vrs->bytesinval--;
928 	}
929 
930 	vrs->cksum += byte;
931 	*data = byte;
932 	return (0);
933 }
934 
935 static void
936 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
937 {
938 	struct vpd_readstate vrs;
939 	int state;
940 	int name;
941 	int remain;
942 	int i;
943 	int alloc, off;		/* alloc/off for RO/W arrays */
944 	int cksumvalid;
945 	int dflen;
946 	uint8_t byte;
947 	uint8_t byte2;
948 
949 	/* init vpd reader */
950 	vrs.bytesinval = 0;
951 	vrs.off = 0;
952 	vrs.pcib = pcib;
953 	vrs.cfg = cfg;
954 	vrs.cksum = 0;
955 
956 	state = 0;
957 	name = remain = i = 0;	/* shut up stupid gcc */
958 	alloc = off = 0;	/* shut up stupid gcc */
959 	dflen = 0;		/* shut up stupid gcc */
960 	cksumvalid = -1;
961 	while (state >= 0) {
962 		if (vpd_nextbyte(&vrs, &byte)) {
963 			state = -2;
964 			break;
965 		}
966 #if 0
967 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
968 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
969 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
970 #endif
971 		switch (state) {
972 		case 0:		/* item name */
973 			if (byte & 0x80) {
974 				if (vpd_nextbyte(&vrs, &byte2)) {
975 					state = -2;
976 					break;
977 				}
978 				remain = byte2;
979 				if (vpd_nextbyte(&vrs, &byte2)) {
980 					state = -2;
981 					break;
982 				}
983 				remain |= byte2 << 8;
984 				if (remain > (0x7f*4 - vrs.off)) {
985 					state = -1;
986 					pci_printf(cfg,
987 					    "invalid VPD data, remain %#x\n",
988 					    remain);
989 				}
990 				name = byte & 0x7f;
991 			} else {
992 				remain = byte & 0x7;
993 				name = (byte >> 3) & 0xf;
994 			}
995 			switch (name) {
996 			case 0x2:	/* String */
997 				cfg->vpd.vpd_ident = malloc(remain + 1,
998 				    M_DEVBUF, M_WAITOK);
999 				i = 0;
1000 				state = 1;
1001 				break;
1002 			case 0xf:	/* End */
1003 				state = -1;
1004 				break;
1005 			case 0x10:	/* VPD-R */
1006 				alloc = 8;
1007 				off = 0;
1008 				cfg->vpd.vpd_ros = malloc(alloc *
1009 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1010 				    M_WAITOK | M_ZERO);
1011 				state = 2;
1012 				break;
1013 			case 0x11:	/* VPD-W */
1014 				alloc = 8;
1015 				off = 0;
1016 				cfg->vpd.vpd_w = malloc(alloc *
1017 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1018 				    M_WAITOK | M_ZERO);
1019 				state = 5;
1020 				break;
1021 			default:	/* Invalid data, abort */
1022 				state = -1;
1023 				break;
1024 			}
1025 			break;
1026 
1027 		case 1:	/* Identifier String */
1028 			cfg->vpd.vpd_ident[i++] = byte;
1029 			remain--;
1030 			if (remain == 0)  {
1031 				cfg->vpd.vpd_ident[i] = '\0';
1032 				state = 0;
1033 			}
1034 			break;
1035 
1036 		case 2:	/* VPD-R Keyword Header */
1037 			if (off == alloc) {
1038 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1039 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1040 				    M_DEVBUF, M_WAITOK | M_ZERO);
1041 			}
1042 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1043 			if (vpd_nextbyte(&vrs, &byte2)) {
1044 				state = -2;
1045 				break;
1046 			}
1047 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1048 			if (vpd_nextbyte(&vrs, &byte2)) {
1049 				state = -2;
1050 				break;
1051 			}
1052 			cfg->vpd.vpd_ros[off].len = dflen = byte2;
1053 			if (dflen == 0 &&
1054 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1055 			    2) == 0) {
1056 				/*
1057 				 * if this happens, we can't trust the rest
1058 				 * of the VPD.
1059 				 */
1060 				pci_printf(cfg, "bad keyword length: %d\n",
1061 				    dflen);
1062 				cksumvalid = 0;
1063 				state = -1;
1064 				break;
1065 			} else if (dflen == 0) {
1066 				cfg->vpd.vpd_ros[off].value = malloc(1 *
1067 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1068 				    M_DEVBUF, M_WAITOK);
1069 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1070 			} else
1071 				cfg->vpd.vpd_ros[off].value = malloc(
1072 				    (dflen + 1) *
1073 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1074 				    M_DEVBUF, M_WAITOK);
1075 			remain -= 3;
1076 			i = 0;
1077 			/* keep in sync w/ state 3's transistions */
1078 			if (dflen == 0 && remain == 0)
1079 				state = 0;
1080 			else if (dflen == 0)
1081 				state = 2;
1082 			else
1083 				state = 3;
1084 			break;
1085 
1086 		case 3:	/* VPD-R Keyword Value */
1087 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1088 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1089 			    "RV", 2) == 0 && cksumvalid == -1) {
1090 				if (vrs.cksum == 0)
1091 					cksumvalid = 1;
1092 				else {
1093 					if (bootverbose)
1094 						pci_printf(cfg,
1095 					    "bad VPD cksum, remain %hhu\n",
1096 						    vrs.cksum);
1097 					cksumvalid = 0;
1098 					state = -1;
1099 					break;
1100 				}
1101 			}
1102 			dflen--;
1103 			remain--;
1104 			/* keep in sync w/ state 2's transistions */
1105 			if (dflen == 0)
1106 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1107 			if (dflen == 0 && remain == 0) {
1108 				cfg->vpd.vpd_rocnt = off;
1109 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1110 				    off * sizeof(*cfg->vpd.vpd_ros),
1111 				    M_DEVBUF, M_WAITOK | M_ZERO);
1112 				state = 0;
1113 			} else if (dflen == 0)
1114 				state = 2;
1115 			break;
1116 
1117 		case 4:
1118 			remain--;
1119 			if (remain == 0)
1120 				state = 0;
1121 			break;
1122 
1123 		case 5:	/* VPD-W Keyword Header */
1124 			if (off == alloc) {
1125 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1126 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1127 				    M_DEVBUF, M_WAITOK | M_ZERO);
1128 			}
1129 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1130 			if (vpd_nextbyte(&vrs, &byte2)) {
1131 				state = -2;
1132 				break;
1133 			}
1134 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1135 			if (vpd_nextbyte(&vrs, &byte2)) {
1136 				state = -2;
1137 				break;
1138 			}
1139 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1140 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1141 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1142 			    sizeof(*cfg->vpd.vpd_w[off].value),
1143 			    M_DEVBUF, M_WAITOK);
1144 			remain -= 3;
1145 			i = 0;
1146 			/* keep in sync w/ state 6's transistions */
1147 			if (dflen == 0 && remain == 0)
1148 				state = 0;
1149 			else if (dflen == 0)
1150 				state = 5;
1151 			else
1152 				state = 6;
1153 			break;
1154 
1155 		case 6:	/* VPD-W Keyword Value */
1156 			cfg->vpd.vpd_w[off].value[i++] = byte;
1157 			dflen--;
1158 			remain--;
1159 			/* keep in sync w/ state 5's transistions */
1160 			if (dflen == 0)
1161 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1162 			if (dflen == 0 && remain == 0) {
1163 				cfg->vpd.vpd_wcnt = off;
1164 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1165 				    off * sizeof(*cfg->vpd.vpd_w),
1166 				    M_DEVBUF, M_WAITOK | M_ZERO);
1167 				state = 0;
1168 			} else if (dflen == 0)
1169 				state = 5;
1170 			break;
1171 
1172 		default:
1173 			pci_printf(cfg, "invalid state: %d\n", state);
1174 			state = -1;
1175 			break;
1176 		}
1177 	}
1178 
1179 	if (cksumvalid == 0 || state < -1) {
1180 		/* read-only data bad, clean up */
1181 		if (cfg->vpd.vpd_ros != NULL) {
1182 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1183 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1184 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1185 			cfg->vpd.vpd_ros = NULL;
1186 		}
1187 	}
1188 	if (state < -1) {
1189 		/* I/O error, clean up */
1190 		pci_printf(cfg, "failed to read VPD data.\n");
1191 		if (cfg->vpd.vpd_ident != NULL) {
1192 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1193 			cfg->vpd.vpd_ident = NULL;
1194 		}
1195 		if (cfg->vpd.vpd_w != NULL) {
1196 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1197 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1198 			free(cfg->vpd.vpd_w, M_DEVBUF);
1199 			cfg->vpd.vpd_w = NULL;
1200 		}
1201 	}
1202 	cfg->vpd.vpd_cached = 1;
1203 #undef REG
1204 #undef WREG
1205 }
1206 
1207 int
1208 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1209 {
1210 	struct pci_devinfo *dinfo = device_get_ivars(child);
1211 	pcicfgregs *cfg = &dinfo->cfg;
1212 
1213 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1214 		pci_read_vpd(device_get_parent(dev), cfg);
1215 
1216 	*identptr = cfg->vpd.vpd_ident;
1217 
1218 	if (*identptr == NULL)
1219 		return (ENXIO);
1220 
1221 	return (0);
1222 }
1223 
1224 int
1225 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1226 	const char **vptr)
1227 {
1228 	struct pci_devinfo *dinfo = device_get_ivars(child);
1229 	pcicfgregs *cfg = &dinfo->cfg;
1230 	int i;
1231 
1232 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1233 		pci_read_vpd(device_get_parent(dev), cfg);
1234 
1235 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1236 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1237 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1238 			*vptr = cfg->vpd.vpd_ros[i].value;
1239 			return (0);
1240 		}
1241 
1242 	*vptr = NULL;
1243 	return (ENXIO);
1244 }
1245 
1246 struct pcicfg_vpd *
1247 pci_fetch_vpd_list(device_t dev)
1248 {
1249 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1250 	pcicfgregs *cfg = &dinfo->cfg;
1251 
1252 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1253 		pci_read_vpd(device_get_parent(device_get_parent(dev)), cfg);
1254 	return (&cfg->vpd);
1255 }
1256 
1257 /*
1258  * Find the requested HyperTransport capability and return the offset
1259  * in configuration space via the pointer provided.  The function
1260  * returns 0 on success and an error code otherwise.
1261  */
1262 int
1263 pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1264 {
1265 	int ptr, error;
1266 	uint16_t val;
1267 
1268 	error = pci_find_cap(child, PCIY_HT, &ptr);
1269 	if (error)
1270 		return (error);
1271 
1272 	/*
1273 	 * Traverse the capabilities list checking each HT capability
1274 	 * to see if it matches the requested HT capability.
1275 	 */
1276 	while (ptr != 0) {
1277 		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1278 		if (capability == PCIM_HTCAP_SLAVE ||
1279 		    capability == PCIM_HTCAP_HOST)
1280 			val &= 0xe000;
1281 		else
1282 			val &= PCIM_HTCMD_CAP_MASK;
1283 		if (val == capability) {
1284 			if (capreg != NULL)
1285 				*capreg = ptr;
1286 			return (0);
1287 		}
1288 
1289 		/* Skip to the next HT capability. */
1290 		while (ptr != 0) {
1291 			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1292 			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1293 			    PCIY_HT)
1294 				break;
1295 		}
1296 	}
1297 	return (ENOENT);
1298 }
1299 
1300 /*
1301  * Find the requested capability and return the offset in
1302  * configuration space via the pointer provided.  The function returns
1303  * 0 on success and an error code otherwise.
1304  */
1305 int
1306 pci_find_cap_method(device_t dev, device_t child, int capability,
1307     int *capreg)
1308 {
1309 	struct pci_devinfo *dinfo = device_get_ivars(child);
1310 	pcicfgregs *cfg = &dinfo->cfg;
1311 	u_int32_t status;
1312 	u_int8_t ptr;
1313 
1314 	/*
1315 	 * Check the CAP_LIST bit of the PCI status register first.
1316 	 */
1317 	status = pci_read_config(child, PCIR_STATUS, 2);
1318 	if (!(status & PCIM_STATUS_CAPPRESENT))
1319 		return (ENXIO);
1320 
1321 	/*
1322 	 * Determine the start pointer of the capabilities list.
1323 	 */
1324 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1325 	case PCIM_HDRTYPE_NORMAL:
1326 	case PCIM_HDRTYPE_BRIDGE:
1327 		ptr = PCIR_CAP_PTR;
1328 		break;
1329 	case PCIM_HDRTYPE_CARDBUS:
1330 		ptr = PCIR_CAP_PTR_2;
1331 		break;
1332 	default:
1333 		/* XXX: panic? */
1334 		return (ENXIO);		/* no extended capabilities support */
1335 	}
1336 	ptr = pci_read_config(child, ptr, 1);
1337 
1338 	/*
1339 	 * Traverse the capabilities list.
1340 	 */
1341 	while (ptr != 0) {
1342 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1343 			if (capreg != NULL)
1344 				*capreg = ptr;
1345 			return (0);
1346 		}
1347 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1348 	}
1349 
1350 	return (ENOENT);
1351 }
1352 
1353 /*
1354  * Find the requested extended capability and return the offset in
1355  * configuration space via the pointer provided.  The function returns
1356  * 0 on success and an error code otherwise.
1357  */
1358 int
1359 pci_find_extcap_method(device_t dev, device_t child, int capability,
1360     int *capreg)
1361 {
1362 	struct pci_devinfo *dinfo = device_get_ivars(child);
1363 	pcicfgregs *cfg = &dinfo->cfg;
1364 	uint32_t ecap;
1365 	uint16_t ptr;
1366 
1367 	/* Only supported for PCI-express devices. */
1368 	if (cfg->pcie.pcie_location == 0)
1369 		return (ENXIO);
1370 
1371 	ptr = PCIR_EXTCAP;
1372 	ecap = pci_read_config(child, ptr, 4);
1373 	if (ecap == 0xffffffff || ecap == 0)
1374 		return (ENOENT);
1375 	for (;;) {
1376 		if (PCI_EXTCAP_ID(ecap) == capability) {
1377 			if (capreg != NULL)
1378 				*capreg = ptr;
1379 			return (0);
1380 		}
1381 		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1382 		if (ptr == 0)
1383 			break;
1384 		ecap = pci_read_config(child, ptr, 4);
1385 	}
1386 
1387 	return (ENOENT);
1388 }
1389 
1390 /*
1391  * Support for MSI-X message interrupts.
1392  */
1393 void
1394 pci_enable_msix_method(device_t dev, device_t child, u_int index,
1395     uint64_t address, uint32_t data)
1396 {
1397 	struct pci_devinfo *dinfo = device_get_ivars(child);
1398 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1399 	uint32_t offset;
1400 
1401 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1402 	offset = msix->msix_table_offset + index * 16;
1403 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1404 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1405 	bus_write_4(msix->msix_table_res, offset + 8, data);
1406 
1407 	/* Enable MSI -> HT mapping. */
1408 	pci_ht_map_msi(child, address);
1409 }
1410 
1411 void
1412 pci_mask_msix(device_t dev, u_int index)
1413 {
1414 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1415 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1416 	uint32_t offset, val;
1417 
1418 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1419 	offset = msix->msix_table_offset + index * 16 + 12;
1420 	val = bus_read_4(msix->msix_table_res, offset);
1421 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1422 		val |= PCIM_MSIX_VCTRL_MASK;
1423 		bus_write_4(msix->msix_table_res, offset, val);
1424 	}
1425 }
1426 
1427 void
1428 pci_unmask_msix(device_t dev, u_int index)
1429 {
1430 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1431 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1432 	uint32_t offset, val;
1433 
1434 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1435 	offset = msix->msix_table_offset + index * 16 + 12;
1436 	val = bus_read_4(msix->msix_table_res, offset);
1437 	if (val & PCIM_MSIX_VCTRL_MASK) {
1438 		val &= ~PCIM_MSIX_VCTRL_MASK;
1439 		bus_write_4(msix->msix_table_res, offset, val);
1440 	}
1441 }
1442 
1443 int
1444 pci_pending_msix(device_t dev, u_int index)
1445 {
1446 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1447 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1448 	uint32_t offset, bit;
1449 
1450 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1451 	offset = msix->msix_pba_offset + (index / 32) * 4;
1452 	bit = 1 << index % 32;
1453 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1454 }
1455 
1456 /*
1457  * Restore MSI-X registers and table during resume.  If MSI-X is
1458  * enabled then walk the virtual table to restore the actual MSI-X
1459  * table.
1460  */
1461 static void
1462 pci_resume_msix(device_t dev)
1463 {
1464 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1465 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1466 	struct msix_table_entry *mte;
1467 	struct msix_vector *mv;
1468 	int i;
1469 
1470 	if (msix->msix_alloc > 0) {
1471 		/* First, mask all vectors. */
1472 		for (i = 0; i < msix->msix_msgnum; i++)
1473 			pci_mask_msix(dev, i);
1474 
1475 		/* Second, program any messages with at least one handler. */
1476 		for (i = 0; i < msix->msix_table_len; i++) {
1477 			mte = &msix->msix_table[i];
1478 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1479 				continue;
1480 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1481 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1482 			pci_unmask_msix(dev, i);
1483 		}
1484 	}
1485 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1486 	    msix->msix_ctrl, 2);
1487 }
1488 
1489 /*
1490  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1491  * returned in *count.  After this function returns, each message will be
1492  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1493  */
1494 int
1495 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1496 {
1497 	struct pci_devinfo *dinfo = device_get_ivars(child);
1498 	pcicfgregs *cfg = &dinfo->cfg;
1499 	struct resource_list_entry *rle;
1500 	int actual, error, i, irq, max;
1501 
1502 	/* Don't let count == 0 get us into trouble. */
1503 	if (*count == 0)
1504 		return (EINVAL);
1505 
1506 	/* If rid 0 is allocated, then fail. */
1507 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1508 	if (rle != NULL && rle->res != NULL)
1509 		return (ENXIO);
1510 
1511 	/* Already have allocated messages? */
1512 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1513 		return (ENXIO);
1514 
1515 	/* If MSI-X is blacklisted for this system, fail. */
1516 	if (pci_msix_blacklisted())
1517 		return (ENXIO);
1518 
1519 	/* MSI-X capability present? */
1520 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1521 		return (ENODEV);
1522 
1523 	/* Make sure the appropriate BARs are mapped. */
1524 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1525 	    cfg->msix.msix_table_bar);
1526 	if (rle == NULL || rle->res == NULL ||
1527 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1528 		return (ENXIO);
1529 	cfg->msix.msix_table_res = rle->res;
1530 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1531 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1532 		    cfg->msix.msix_pba_bar);
1533 		if (rle == NULL || rle->res == NULL ||
1534 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1535 			return (ENXIO);
1536 	}
1537 	cfg->msix.msix_pba_res = rle->res;
1538 
1539 	if (bootverbose)
1540 		device_printf(child,
1541 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1542 		    *count, cfg->msix.msix_msgnum);
1543 	max = min(*count, cfg->msix.msix_msgnum);
1544 	for (i = 0; i < max; i++) {
1545 		/* Allocate a message. */
1546 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1547 		if (error) {
1548 			if (i == 0)
1549 				return (error);
1550 			break;
1551 		}
1552 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1553 		    irq, 1);
1554 	}
1555 	actual = i;
1556 
1557 	if (bootverbose) {
1558 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1559 		if (actual == 1)
1560 			device_printf(child, "using IRQ %lu for MSI-X\n",
1561 			    rle->start);
1562 		else {
1563 			int run;
1564 
1565 			/*
1566 			 * Be fancy and try to print contiguous runs of
1567 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1568 			 * 'run' is true if we are in a range.
1569 			 */
1570 			device_printf(child, "using IRQs %lu", rle->start);
1571 			irq = rle->start;
1572 			run = 0;
1573 			for (i = 1; i < actual; i++) {
1574 				rle = resource_list_find(&dinfo->resources,
1575 				    SYS_RES_IRQ, i + 1);
1576 
1577 				/* Still in a run? */
1578 				if (rle->start == irq + 1) {
1579 					run = 1;
1580 					irq++;
1581 					continue;
1582 				}
1583 
1584 				/* Finish previous range. */
1585 				if (run) {
1586 					printf("-%d", irq);
1587 					run = 0;
1588 				}
1589 
1590 				/* Start new range. */
1591 				printf(",%lu", rle->start);
1592 				irq = rle->start;
1593 			}
1594 
1595 			/* Unfinished range? */
1596 			if (run)
1597 				printf("-%d", irq);
1598 			printf(" for MSI-X\n");
1599 		}
1600 	}
1601 
1602 	/* Mask all vectors. */
1603 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1604 		pci_mask_msix(child, i);
1605 
1606 	/* Allocate and initialize vector data and virtual table. */
1607 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1608 	    M_DEVBUF, M_WAITOK | M_ZERO);
1609 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1610 	    M_DEVBUF, M_WAITOK | M_ZERO);
1611 	for (i = 0; i < actual; i++) {
1612 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1613 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1614 		cfg->msix.msix_table[i].mte_vector = i + 1;
1615 	}
1616 
1617 	/* Update control register to enable MSI-X. */
1618 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1619 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1620 	    cfg->msix.msix_ctrl, 2);
1621 
1622 	/* Update counts of alloc'd messages. */
1623 	cfg->msix.msix_alloc = actual;
1624 	cfg->msix.msix_table_len = actual;
1625 	*count = actual;
1626 	return (0);
1627 }
1628 
1629 /*
1630  * By default, pci_alloc_msix() will assign the allocated IRQ
1631  * resources consecutively to the first N messages in the MSI-X table.
1632  * However, device drivers may want to use different layouts if they
1633  * either receive fewer messages than they asked for, or they wish to
1634  * populate the MSI-X table sparsely.  This method allows the driver
1635  * to specify what layout it wants.  It must be called after a
1636  * successful pci_alloc_msix() but before any of the associated
1637  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1638  *
1639  * The 'vectors' array contains 'count' message vectors.  The array
1640  * maps directly to the MSI-X table in that index 0 in the array
1641  * specifies the vector for the first message in the MSI-X table, etc.
1642  * The vector value in each array index can either be 0 to indicate
1643  * that no vector should be assigned to a message slot, or it can be a
1644  * number from 1 to N (where N is the count returned from a
1645  * succcessful call to pci_alloc_msix()) to indicate which message
1646  * vector (IRQ) to be used for the corresponding message.
1647  *
1648  * On successful return, each message with a non-zero vector will have
1649  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1650  * 1.  Additionally, if any of the IRQs allocated via the previous
1651  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1652  * will be freed back to the system automatically.
1653  *
1654  * For example, suppose a driver has a MSI-X table with 6 messages and
1655  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1656  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1657  * C.  After the call to pci_alloc_msix(), the device will be setup to
1658  * have an MSI-X table of ABC--- (where - means no vector assigned).
1659  * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1660  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1661  * be freed back to the system.  This device will also have valid
1662  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1663  *
1664  * In any case, the SYS_RES_IRQ rid X will always map to the message
1665  * at MSI-X table index X - 1 and will only be valid if a vector is
1666  * assigned to that table entry.
1667  */
1668 int
1669 pci_remap_msix_method(device_t dev, device_t child, int count,
1670     const u_int *vectors)
1671 {
1672 	struct pci_devinfo *dinfo = device_get_ivars(child);
1673 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1674 	struct resource_list_entry *rle;
1675 	int i, irq, j, *used;
1676 
1677 	/*
1678 	 * Have to have at least one message in the table but the
1679 	 * table can't be bigger than the actual MSI-X table in the
1680 	 * device.
1681 	 */
1682 	if (count == 0 || count > msix->msix_msgnum)
1683 		return (EINVAL);
1684 
1685 	/* Sanity check the vectors. */
1686 	for (i = 0; i < count; i++)
1687 		if (vectors[i] > msix->msix_alloc)
1688 			return (EINVAL);
1689 
1690 	/*
1691 	 * Make sure there aren't any holes in the vectors to be used.
1692 	 * It's a big pain to support it, and it doesn't really make
1693 	 * sense anyway.  Also, at least one vector must be used.
1694 	 */
1695 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1696 	    M_ZERO);
1697 	for (i = 0; i < count; i++)
1698 		if (vectors[i] != 0)
1699 			used[vectors[i] - 1] = 1;
1700 	for (i = 0; i < msix->msix_alloc - 1; i++)
1701 		if (used[i] == 0 && used[i + 1] == 1) {
1702 			free(used, M_DEVBUF);
1703 			return (EINVAL);
1704 		}
1705 	if (used[0] != 1) {
1706 		free(used, M_DEVBUF);
1707 		return (EINVAL);
1708 	}
1709 
1710 	/* Make sure none of the resources are allocated. */
1711 	for (i = 0; i < msix->msix_table_len; i++) {
1712 		if (msix->msix_table[i].mte_vector == 0)
1713 			continue;
1714 		if (msix->msix_table[i].mte_handlers > 0) {
1715 			free(used, M_DEVBUF);
1716 			return (EBUSY);
1717 		}
1718 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1719 		KASSERT(rle != NULL, ("missing resource"));
1720 		if (rle->res != NULL) {
1721 			free(used, M_DEVBUF);
1722 			return (EBUSY);
1723 		}
1724 	}
1725 
1726 	/* Free the existing resource list entries. */
1727 	for (i = 0; i < msix->msix_table_len; i++) {
1728 		if (msix->msix_table[i].mte_vector == 0)
1729 			continue;
1730 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1731 	}
1732 
1733 	/*
1734 	 * Build the new virtual table keeping track of which vectors are
1735 	 * used.
1736 	 */
1737 	free(msix->msix_table, M_DEVBUF);
1738 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1739 	    M_DEVBUF, M_WAITOK | M_ZERO);
1740 	for (i = 0; i < count; i++)
1741 		msix->msix_table[i].mte_vector = vectors[i];
1742 	msix->msix_table_len = count;
1743 
1744 	/* Free any unused IRQs and resize the vectors array if necessary. */
1745 	j = msix->msix_alloc - 1;
1746 	if (used[j] == 0) {
1747 		struct msix_vector *vec;
1748 
1749 		while (used[j] == 0) {
1750 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1751 			    msix->msix_vectors[j].mv_irq);
1752 			j--;
1753 		}
1754 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1755 		    M_WAITOK);
1756 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1757 		    (j + 1));
1758 		free(msix->msix_vectors, M_DEVBUF);
1759 		msix->msix_vectors = vec;
1760 		msix->msix_alloc = j + 1;
1761 	}
1762 	free(used, M_DEVBUF);
1763 
1764 	/* Map the IRQs onto the rids. */
1765 	for (i = 0; i < count; i++) {
1766 		if (vectors[i] == 0)
1767 			continue;
1768 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1769 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1770 		    irq, 1);
1771 	}
1772 
1773 	if (bootverbose) {
1774 		device_printf(child, "Remapped MSI-X IRQs as: ");
1775 		for (i = 0; i < count; i++) {
1776 			if (i != 0)
1777 				printf(", ");
1778 			if (vectors[i] == 0)
1779 				printf("---");
1780 			else
1781 				printf("%d",
1782 				    msix->msix_vectors[vectors[i]].mv_irq);
1783 		}
1784 		printf("\n");
1785 	}
1786 
1787 	return (0);
1788 }
1789 
1790 static int
1791 pci_release_msix(device_t dev, device_t child)
1792 {
1793 	struct pci_devinfo *dinfo = device_get_ivars(child);
1794 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1795 	struct resource_list_entry *rle;
1796 	int i;
1797 
1798 	/* Do we have any messages to release? */
1799 	if (msix->msix_alloc == 0)
1800 		return (ENODEV);
1801 
1802 	/* Make sure none of the resources are allocated. */
1803 	for (i = 0; i < msix->msix_table_len; i++) {
1804 		if (msix->msix_table[i].mte_vector == 0)
1805 			continue;
1806 		if (msix->msix_table[i].mte_handlers > 0)
1807 			return (EBUSY);
1808 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1809 		KASSERT(rle != NULL, ("missing resource"));
1810 		if (rle->res != NULL)
1811 			return (EBUSY);
1812 	}
1813 
1814 	/* Update control register to disable MSI-X. */
1815 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1816 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1817 	    msix->msix_ctrl, 2);
1818 
1819 	/* Free the resource list entries. */
1820 	for (i = 0; i < msix->msix_table_len; i++) {
1821 		if (msix->msix_table[i].mte_vector == 0)
1822 			continue;
1823 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1824 	}
1825 	free(msix->msix_table, M_DEVBUF);
1826 	msix->msix_table_len = 0;
1827 
1828 	/* Release the IRQs. */
1829 	for (i = 0; i < msix->msix_alloc; i++)
1830 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1831 		    msix->msix_vectors[i].mv_irq);
1832 	free(msix->msix_vectors, M_DEVBUF);
1833 	msix->msix_alloc = 0;
1834 	return (0);
1835 }
1836 
1837 /*
1838  * Return the max supported MSI-X messages this device supports.
1839  * Basically, assuming the MD code can alloc messages, this function
1840  * should return the maximum value that pci_alloc_msix() can return.
1841  * Thus, it is subject to the tunables, etc.
1842  */
1843 int
1844 pci_msix_count_method(device_t dev, device_t child)
1845 {
1846 	struct pci_devinfo *dinfo = device_get_ivars(child);
1847 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1848 
1849 	if (pci_do_msix && msix->msix_location != 0)
1850 		return (msix->msix_msgnum);
1851 	return (0);
1852 }
1853 
1854 /*
1855  * HyperTransport MSI mapping control
1856  */
1857 void
1858 pci_ht_map_msi(device_t dev, uint64_t addr)
1859 {
1860 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1861 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1862 
1863 	if (!ht->ht_msimap)
1864 		return;
1865 
1866 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1867 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1868 		/* Enable MSI -> HT mapping. */
1869 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1870 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1871 		    ht->ht_msictrl, 2);
1872 	}
1873 
1874 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1875 		/* Disable MSI -> HT mapping. */
1876 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1877 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1878 		    ht->ht_msictrl, 2);
1879 	}
1880 }
1881 
1882 int
1883 pci_get_max_read_req(device_t dev)
1884 {
1885 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1886 	int cap;
1887 	uint16_t val;
1888 
1889 	cap = dinfo->cfg.pcie.pcie_location;
1890 	if (cap == 0)
1891 		return (0);
1892 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1893 	val &= PCIEM_CTL_MAX_READ_REQUEST;
1894 	val >>= 12;
1895 	return (1 << (val + 7));
1896 }
1897 
1898 int
1899 pci_set_max_read_req(device_t dev, int size)
1900 {
1901 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1902 	int cap;
1903 	uint16_t val;
1904 
1905 	cap = dinfo->cfg.pcie.pcie_location;
1906 	if (cap == 0)
1907 		return (0);
1908 	if (size < 128)
1909 		size = 128;
1910 	if (size > 4096)
1911 		size = 4096;
1912 	size = (1 << (fls(size) - 1));
1913 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1914 	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
1915 	val |= (fls(size) - 8) << 12;
1916 	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
1917 	return (size);
1918 }
1919 
1920 /*
1921  * Support for MSI message signalled interrupts.
1922  */
1923 void
1924 pci_enable_msi_method(device_t dev, device_t child, uint64_t address,
1925     uint16_t data)
1926 {
1927 	struct pci_devinfo *dinfo = device_get_ivars(child);
1928 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1929 
1930 	/* Write data and address values. */
1931 	pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR,
1932 	    address & 0xffffffff, 4);
1933 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1934 		pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1935 		    address >> 32, 4);
1936 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA_64BIT,
1937 		    data, 2);
1938 	} else
1939 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA, data,
1940 		    2);
1941 
1942 	/* Enable MSI in the control register. */
1943 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1944 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1945 	    msi->msi_ctrl, 2);
1946 
1947 	/* Enable MSI -> HT mapping. */
1948 	pci_ht_map_msi(child, address);
1949 }
1950 
1951 void
1952 pci_disable_msi_method(device_t dev, device_t child)
1953 {
1954 	struct pci_devinfo *dinfo = device_get_ivars(child);
1955 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1956 
1957 	/* Disable MSI -> HT mapping. */
1958 	pci_ht_map_msi(child, 0);
1959 
1960 	/* Disable MSI in the control register. */
1961 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1962 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1963 	    msi->msi_ctrl, 2);
1964 }
1965 
1966 /*
1967  * Restore MSI registers during resume.  If MSI is enabled then
1968  * restore the data and address registers in addition to the control
1969  * register.
1970  */
1971 static void
1972 pci_resume_msi(device_t dev)
1973 {
1974 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1975 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1976 	uint64_t address;
1977 	uint16_t data;
1978 
1979 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1980 		address = msi->msi_addr;
1981 		data = msi->msi_data;
1982 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1983 		    address & 0xffffffff, 4);
1984 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1985 			pci_write_config(dev, msi->msi_location +
1986 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1987 			pci_write_config(dev, msi->msi_location +
1988 			    PCIR_MSI_DATA_64BIT, data, 2);
1989 		} else
1990 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1991 			    data, 2);
1992 	}
1993 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1994 	    2);
1995 }
1996 
1997 static int
1998 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1999 {
2000 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2001 	pcicfgregs *cfg = &dinfo->cfg;
2002 	struct resource_list_entry *rle;
2003 	struct msix_table_entry *mte;
2004 	struct msix_vector *mv;
2005 	uint64_t addr;
2006 	uint32_t data;
2007 	int error, i, j;
2008 
2009 	/*
2010 	 * Handle MSI first.  We try to find this IRQ among our list
2011 	 * of MSI IRQs.  If we find it, we request updated address and
2012 	 * data registers and apply the results.
2013 	 */
2014 	if (cfg->msi.msi_alloc > 0) {
2015 
2016 		/* If we don't have any active handlers, nothing to do. */
2017 		if (cfg->msi.msi_handlers == 0)
2018 			return (0);
2019 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
2020 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
2021 			    i + 1);
2022 			if (rle->start == irq) {
2023 				error = PCIB_MAP_MSI(device_get_parent(bus),
2024 				    dev, irq, &addr, &data);
2025 				if (error)
2026 					return (error);
2027 				pci_disable_msi(dev);
2028 				dinfo->cfg.msi.msi_addr = addr;
2029 				dinfo->cfg.msi.msi_data = data;
2030 				pci_enable_msi(dev, addr, data);
2031 				return (0);
2032 			}
2033 		}
2034 		return (ENOENT);
2035 	}
2036 
2037 	/*
2038 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
2039 	 * we request the updated mapping info.  If that works, we go
2040 	 * through all the slots that use this IRQ and update them.
2041 	 */
2042 	if (cfg->msix.msix_alloc > 0) {
2043 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
2044 			mv = &cfg->msix.msix_vectors[i];
2045 			if (mv->mv_irq == irq) {
2046 				error = PCIB_MAP_MSI(device_get_parent(bus),
2047 				    dev, irq, &addr, &data);
2048 				if (error)
2049 					return (error);
2050 				mv->mv_address = addr;
2051 				mv->mv_data = data;
2052 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
2053 					mte = &cfg->msix.msix_table[j];
2054 					if (mte->mte_vector != i + 1)
2055 						continue;
2056 					if (mte->mte_handlers == 0)
2057 						continue;
2058 					pci_mask_msix(dev, j);
2059 					pci_enable_msix(dev, j, addr, data);
2060 					pci_unmask_msix(dev, j);
2061 				}
2062 			}
2063 		}
2064 		return (ENOENT);
2065 	}
2066 
2067 	return (ENOENT);
2068 }
2069 
2070 /*
2071  * Returns true if the specified device is blacklisted because MSI
2072  * doesn't work.
2073  */
2074 int
2075 pci_msi_device_blacklisted(device_t dev)
2076 {
2077 
2078 	if (!pci_honor_msi_blacklist)
2079 		return (0);
2080 
2081 	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2082 }
2083 
2084 /*
2085  * Determine if MSI is blacklisted globally on this system.  Currently,
2086  * we just check for blacklisted chipsets as represented by the
2087  * host-PCI bridge at device 0:0:0.  In the future, it may become
2088  * necessary to check other system attributes, such as the kenv values
2089  * that give the motherboard manufacturer and model number.
2090  */
2091 static int
2092 pci_msi_blacklisted(void)
2093 {
2094 	device_t dev;
2095 
2096 	if (!pci_honor_msi_blacklist)
2097 		return (0);
2098 
2099 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2100 	if (!(pcie_chipset || pcix_chipset)) {
2101 		if (vm_guest != VM_GUEST_NO) {
2102 			/*
2103 			 * Whitelist older chipsets in virtual
2104 			 * machines known to support MSI.
2105 			 */
2106 			dev = pci_find_bsf(0, 0, 0);
2107 			if (dev != NULL)
2108 				return (!pci_has_quirk(pci_get_devid(dev),
2109 					PCI_QUIRK_ENABLE_MSI_VM));
2110 		}
2111 		return (1);
2112 	}
2113 
2114 	dev = pci_find_bsf(0, 0, 0);
2115 	if (dev != NULL)
2116 		return (pci_msi_device_blacklisted(dev));
2117 	return (0);
2118 }
2119 
2120 /*
2121  * Returns true if the specified device is blacklisted because MSI-X
2122  * doesn't work.  Note that this assumes that if MSI doesn't work,
2123  * MSI-X doesn't either.
2124  */
2125 int
2126 pci_msix_device_blacklisted(device_t dev)
2127 {
2128 
2129 	if (!pci_honor_msi_blacklist)
2130 		return (0);
2131 
2132 	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2133 		return (1);
2134 
2135 	return (pci_msi_device_blacklisted(dev));
2136 }
2137 
2138 /*
2139  * Determine if MSI-X is blacklisted globally on this system.  If MSI
2140  * is blacklisted, assume that MSI-X is as well.  Check for additional
2141  * chipsets where MSI works but MSI-X does not.
2142  */
2143 static int
2144 pci_msix_blacklisted(void)
2145 {
2146 	device_t dev;
2147 
2148 	if (!pci_honor_msi_blacklist)
2149 		return (0);
2150 
2151 	dev = pci_find_bsf(0, 0, 0);
2152 	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2153 	    PCI_QUIRK_DISABLE_MSIX))
2154 		return (1);
2155 
2156 	return (pci_msi_blacklisted());
2157 }
2158 
2159 /*
2160  * Attempt to allocate *count MSI messages.  The actual number allocated is
2161  * returned in *count.  After this function returns, each message will be
2162  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2163  */
2164 int
2165 pci_alloc_msi_method(device_t dev, device_t child, int *count)
2166 {
2167 	struct pci_devinfo *dinfo = device_get_ivars(child);
2168 	pcicfgregs *cfg = &dinfo->cfg;
2169 	struct resource_list_entry *rle;
2170 	int actual, error, i, irqs[32];
2171 	uint16_t ctrl;
2172 
2173 	/* Don't let count == 0 get us into trouble. */
2174 	if (*count == 0)
2175 		return (EINVAL);
2176 
2177 	/* If rid 0 is allocated, then fail. */
2178 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2179 	if (rle != NULL && rle->res != NULL)
2180 		return (ENXIO);
2181 
2182 	/* Already have allocated messages? */
2183 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2184 		return (ENXIO);
2185 
2186 	/* If MSI is blacklisted for this system, fail. */
2187 	if (pci_msi_blacklisted())
2188 		return (ENXIO);
2189 
2190 	/* MSI capability present? */
2191 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2192 		return (ENODEV);
2193 
2194 	if (bootverbose)
2195 		device_printf(child,
2196 		    "attempting to allocate %d MSI vectors (%d supported)\n",
2197 		    *count, cfg->msi.msi_msgnum);
2198 
2199 	/* Don't ask for more than the device supports. */
2200 	actual = min(*count, cfg->msi.msi_msgnum);
2201 
2202 	/* Don't ask for more than 32 messages. */
2203 	actual = min(actual, 32);
2204 
2205 	/* MSI requires power of 2 number of messages. */
2206 	if (!powerof2(actual))
2207 		return (EINVAL);
2208 
2209 	for (;;) {
2210 		/* Try to allocate N messages. */
2211 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2212 		    actual, irqs);
2213 		if (error == 0)
2214 			break;
2215 		if (actual == 1)
2216 			return (error);
2217 
2218 		/* Try N / 2. */
2219 		actual >>= 1;
2220 	}
2221 
2222 	/*
2223 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2224 	 * resources in the irqs[] array, so add new resources
2225 	 * starting at rid 1.
2226 	 */
2227 	for (i = 0; i < actual; i++)
2228 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2229 		    irqs[i], irqs[i], 1);
2230 
2231 	if (bootverbose) {
2232 		if (actual == 1)
2233 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2234 		else {
2235 			int run;
2236 
2237 			/*
2238 			 * Be fancy and try to print contiguous runs
2239 			 * of IRQ values as ranges.  'run' is true if
2240 			 * we are in a range.
2241 			 */
2242 			device_printf(child, "using IRQs %d", irqs[0]);
2243 			run = 0;
2244 			for (i = 1; i < actual; i++) {
2245 
2246 				/* Still in a run? */
2247 				if (irqs[i] == irqs[i - 1] + 1) {
2248 					run = 1;
2249 					continue;
2250 				}
2251 
2252 				/* Finish previous range. */
2253 				if (run) {
2254 					printf("-%d", irqs[i - 1]);
2255 					run = 0;
2256 				}
2257 
2258 				/* Start new range. */
2259 				printf(",%d", irqs[i]);
2260 			}
2261 
2262 			/* Unfinished range? */
2263 			if (run)
2264 				printf("-%d", irqs[actual - 1]);
2265 			printf(" for MSI\n");
2266 		}
2267 	}
2268 
2269 	/* Update control register with actual count. */
2270 	ctrl = cfg->msi.msi_ctrl;
2271 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2272 	ctrl |= (ffs(actual) - 1) << 4;
2273 	cfg->msi.msi_ctrl = ctrl;
2274 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2275 
2276 	/* Update counts of alloc'd messages. */
2277 	cfg->msi.msi_alloc = actual;
2278 	cfg->msi.msi_handlers = 0;
2279 	*count = actual;
2280 	return (0);
2281 }
2282 
2283 /* Release the MSI messages associated with this device. */
2284 int
2285 pci_release_msi_method(device_t dev, device_t child)
2286 {
2287 	struct pci_devinfo *dinfo = device_get_ivars(child);
2288 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2289 	struct resource_list_entry *rle;
2290 	int error, i, irqs[32];
2291 
2292 	/* Try MSI-X first. */
2293 	error = pci_release_msix(dev, child);
2294 	if (error != ENODEV)
2295 		return (error);
2296 
2297 	/* Do we have any messages to release? */
2298 	if (msi->msi_alloc == 0)
2299 		return (ENODEV);
2300 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2301 
2302 	/* Make sure none of the resources are allocated. */
2303 	if (msi->msi_handlers > 0)
2304 		return (EBUSY);
2305 	for (i = 0; i < msi->msi_alloc; i++) {
2306 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2307 		KASSERT(rle != NULL, ("missing MSI resource"));
2308 		if (rle->res != NULL)
2309 			return (EBUSY);
2310 		irqs[i] = rle->start;
2311 	}
2312 
2313 	/* Update control register with 0 count. */
2314 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2315 	    ("%s: MSI still enabled", __func__));
2316 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2317 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2318 	    msi->msi_ctrl, 2);
2319 
2320 	/* Release the messages. */
2321 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2322 	for (i = 0; i < msi->msi_alloc; i++)
2323 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2324 
2325 	/* Update alloc count. */
2326 	msi->msi_alloc = 0;
2327 	msi->msi_addr = 0;
2328 	msi->msi_data = 0;
2329 	return (0);
2330 }
2331 
2332 /*
2333  * Return the max supported MSI messages this device supports.
2334  * Basically, assuming the MD code can alloc messages, this function
2335  * should return the maximum value that pci_alloc_msi() can return.
2336  * Thus, it is subject to the tunables, etc.
2337  */
2338 int
2339 pci_msi_count_method(device_t dev, device_t child)
2340 {
2341 	struct pci_devinfo *dinfo = device_get_ivars(child);
2342 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2343 
2344 	if (pci_do_msi && msi->msi_location != 0)
2345 		return (msi->msi_msgnum);
2346 	return (0);
2347 }
2348 
2349 /* free pcicfgregs structure and all depending data structures */
2350 
2351 int
2352 pci_freecfg(struct pci_devinfo *dinfo)
2353 {
2354 	struct devlist *devlist_head;
2355 	struct pci_map *pm, *next;
2356 	int i;
2357 
2358 	devlist_head = &pci_devq;
2359 
2360 	if (dinfo->cfg.vpd.vpd_reg) {
2361 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2362 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2363 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2364 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2365 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2366 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2367 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2368 	}
2369 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2370 		free(pm, M_DEVBUF);
2371 	}
2372 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2373 	free(dinfo, M_DEVBUF);
2374 
2375 	/* increment the generation count */
2376 	pci_generation++;
2377 
2378 	/* we're losing one device */
2379 	pci_numdevs--;
2380 	return (0);
2381 }
2382 
2383 /*
2384  * PCI power manangement
2385  */
2386 int
2387 pci_set_powerstate_method(device_t dev, device_t child, int state)
2388 {
2389 	struct pci_devinfo *dinfo = device_get_ivars(child);
2390 	pcicfgregs *cfg = &dinfo->cfg;
2391 	uint16_t status;
2392 	int oldstate, highest, delay;
2393 
2394 	if (cfg->pp.pp_cap == 0)
2395 		return (EOPNOTSUPP);
2396 
2397 	/*
2398 	 * Optimize a no state change request away.  While it would be OK to
2399 	 * write to the hardware in theory, some devices have shown odd
2400 	 * behavior when going from D3 -> D3.
2401 	 */
2402 	oldstate = pci_get_powerstate(child);
2403 	if (oldstate == state)
2404 		return (0);
2405 
2406 	/*
2407 	 * The PCI power management specification states that after a state
2408 	 * transition between PCI power states, system software must
2409 	 * guarantee a minimal delay before the function accesses the device.
2410 	 * Compute the worst case delay that we need to guarantee before we
2411 	 * access the device.  Many devices will be responsive much more
2412 	 * quickly than this delay, but there are some that don't respond
2413 	 * instantly to state changes.  Transitions to/from D3 state require
2414 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2415 	 * is done below with DELAY rather than a sleeper function because
2416 	 * this function can be called from contexts where we cannot sleep.
2417 	 */
2418 	highest = (oldstate > state) ? oldstate : state;
2419 	if (highest == PCI_POWERSTATE_D3)
2420 	    delay = 10000;
2421 	else if (highest == PCI_POWERSTATE_D2)
2422 	    delay = 200;
2423 	else
2424 	    delay = 0;
2425 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2426 	    & ~PCIM_PSTAT_DMASK;
2427 	switch (state) {
2428 	case PCI_POWERSTATE_D0:
2429 		status |= PCIM_PSTAT_D0;
2430 		break;
2431 	case PCI_POWERSTATE_D1:
2432 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2433 			return (EOPNOTSUPP);
2434 		status |= PCIM_PSTAT_D1;
2435 		break;
2436 	case PCI_POWERSTATE_D2:
2437 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2438 			return (EOPNOTSUPP);
2439 		status |= PCIM_PSTAT_D2;
2440 		break;
2441 	case PCI_POWERSTATE_D3:
2442 		status |= PCIM_PSTAT_D3;
2443 		break;
2444 	default:
2445 		return (EINVAL);
2446 	}
2447 
2448 	if (bootverbose)
2449 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2450 		    state);
2451 
2452 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2453 	if (delay)
2454 		DELAY(delay);
2455 	return (0);
2456 }
2457 
2458 int
2459 pci_get_powerstate_method(device_t dev, device_t child)
2460 {
2461 	struct pci_devinfo *dinfo = device_get_ivars(child);
2462 	pcicfgregs *cfg = &dinfo->cfg;
2463 	uint16_t status;
2464 	int result;
2465 
2466 	if (cfg->pp.pp_cap != 0) {
2467 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2468 		switch (status & PCIM_PSTAT_DMASK) {
2469 		case PCIM_PSTAT_D0:
2470 			result = PCI_POWERSTATE_D0;
2471 			break;
2472 		case PCIM_PSTAT_D1:
2473 			result = PCI_POWERSTATE_D1;
2474 			break;
2475 		case PCIM_PSTAT_D2:
2476 			result = PCI_POWERSTATE_D2;
2477 			break;
2478 		case PCIM_PSTAT_D3:
2479 			result = PCI_POWERSTATE_D3;
2480 			break;
2481 		default:
2482 			result = PCI_POWERSTATE_UNKNOWN;
2483 			break;
2484 		}
2485 	} else {
2486 		/* No support, device is always at D0 */
2487 		result = PCI_POWERSTATE_D0;
2488 	}
2489 	return (result);
2490 }
2491 
2492 /*
2493  * Some convenience functions for PCI device drivers.
2494  */
2495 
2496 static __inline void
2497 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2498 {
2499 	uint16_t	command;
2500 
2501 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2502 	command |= bit;
2503 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2504 }
2505 
2506 static __inline void
2507 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2508 {
2509 	uint16_t	command;
2510 
2511 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2512 	command &= ~bit;
2513 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2514 }
2515 
2516 int
2517 pci_enable_busmaster_method(device_t dev, device_t child)
2518 {
2519 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2520 	return (0);
2521 }
2522 
2523 int
2524 pci_disable_busmaster_method(device_t dev, device_t child)
2525 {
2526 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2527 	return (0);
2528 }
2529 
2530 int
2531 pci_enable_io_method(device_t dev, device_t child, int space)
2532 {
2533 	uint16_t bit;
2534 
2535 	switch(space) {
2536 	case SYS_RES_IOPORT:
2537 		bit = PCIM_CMD_PORTEN;
2538 		break;
2539 	case SYS_RES_MEMORY:
2540 		bit = PCIM_CMD_MEMEN;
2541 		break;
2542 	default:
2543 		return (EINVAL);
2544 	}
2545 	pci_set_command_bit(dev, child, bit);
2546 	return (0);
2547 }
2548 
2549 int
2550 pci_disable_io_method(device_t dev, device_t child, int space)
2551 {
2552 	uint16_t bit;
2553 
2554 	switch(space) {
2555 	case SYS_RES_IOPORT:
2556 		bit = PCIM_CMD_PORTEN;
2557 		break;
2558 	case SYS_RES_MEMORY:
2559 		bit = PCIM_CMD_MEMEN;
2560 		break;
2561 	default:
2562 		return (EINVAL);
2563 	}
2564 	pci_clear_command_bit(dev, child, bit);
2565 	return (0);
2566 }
2567 
2568 /*
2569  * New style pci driver.  Parent device is either a pci-host-bridge or a
2570  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2571  */
2572 
2573 void
2574 pci_print_verbose(struct pci_devinfo *dinfo)
2575 {
2576 
2577 	if (bootverbose) {
2578 		pcicfgregs *cfg = &dinfo->cfg;
2579 
2580 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2581 		    cfg->vendor, cfg->device, cfg->revid);
2582 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2583 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2584 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2585 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2586 		    cfg->mfdev);
2587 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2588 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2589 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2590 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2591 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2592 		if (cfg->intpin > 0)
2593 			printf("\tintpin=%c, irq=%d\n",
2594 			    cfg->intpin +'a' -1, cfg->intline);
2595 		if (cfg->pp.pp_cap) {
2596 			uint16_t status;
2597 
2598 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2599 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2600 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2601 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2602 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2603 			    status & PCIM_PSTAT_DMASK);
2604 		}
2605 		if (cfg->msi.msi_location) {
2606 			int ctrl;
2607 
2608 			ctrl = cfg->msi.msi_ctrl;
2609 			printf("\tMSI supports %d message%s%s%s\n",
2610 			    cfg->msi.msi_msgnum,
2611 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2612 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2613 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2614 		}
2615 		if (cfg->msix.msix_location) {
2616 			printf("\tMSI-X supports %d message%s ",
2617 			    cfg->msix.msix_msgnum,
2618 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2619 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2620 				printf("in map 0x%x\n",
2621 				    cfg->msix.msix_table_bar);
2622 			else
2623 				printf("in maps 0x%x and 0x%x\n",
2624 				    cfg->msix.msix_table_bar,
2625 				    cfg->msix.msix_pba_bar);
2626 		}
2627 	}
2628 }
2629 
2630 static int
2631 pci_porten(device_t dev)
2632 {
2633 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2634 }
2635 
2636 static int
2637 pci_memen(device_t dev)
2638 {
2639 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2640 }
2641 
2642 void
2643 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp,
2644     int *bar64)
2645 {
2646 	struct pci_devinfo *dinfo;
2647 	pci_addr_t map, testval;
2648 	int ln2range;
2649 	uint16_t cmd;
2650 
2651 	/*
2652 	 * The device ROM BAR is special.  It is always a 32-bit
2653 	 * memory BAR.  Bit 0 is special and should not be set when
2654 	 * sizing the BAR.
2655 	 */
2656 	dinfo = device_get_ivars(dev);
2657 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2658 		map = pci_read_config(dev, reg, 4);
2659 		pci_write_config(dev, reg, 0xfffffffe, 4);
2660 		testval = pci_read_config(dev, reg, 4);
2661 		pci_write_config(dev, reg, map, 4);
2662 		*mapp = map;
2663 		*testvalp = testval;
2664 		if (bar64 != NULL)
2665 			*bar64 = 0;
2666 		return;
2667 	}
2668 
2669 	map = pci_read_config(dev, reg, 4);
2670 	ln2range = pci_maprange(map);
2671 	if (ln2range == 64)
2672 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2673 
2674 	/*
2675 	 * Disable decoding via the command register before
2676 	 * determining the BAR's length since we will be placing it in
2677 	 * a weird state.
2678 	 */
2679 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2680 	pci_write_config(dev, PCIR_COMMAND,
2681 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2682 
2683 	/*
2684 	 * Determine the BAR's length by writing all 1's.  The bottom
2685 	 * log_2(size) bits of the BAR will stick as 0 when we read
2686 	 * the value back.
2687 	 */
2688 	pci_write_config(dev, reg, 0xffffffff, 4);
2689 	testval = pci_read_config(dev, reg, 4);
2690 	if (ln2range == 64) {
2691 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2692 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2693 	}
2694 
2695 	/*
2696 	 * Restore the original value of the BAR.  We may have reprogrammed
2697 	 * the BAR of the low-level console device and when booting verbose,
2698 	 * we need the console device addressable.
2699 	 */
2700 	pci_write_config(dev, reg, map, 4);
2701 	if (ln2range == 64)
2702 		pci_write_config(dev, reg + 4, map >> 32, 4);
2703 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2704 
2705 	*mapp = map;
2706 	*testvalp = testval;
2707 	if (bar64 != NULL)
2708 		*bar64 = (ln2range == 64);
2709 }
2710 
2711 static void
2712 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2713 {
2714 	struct pci_devinfo *dinfo;
2715 	int ln2range;
2716 
2717 	/* The device ROM BAR is always a 32-bit memory BAR. */
2718 	dinfo = device_get_ivars(dev);
2719 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2720 		ln2range = 32;
2721 	else
2722 		ln2range = pci_maprange(pm->pm_value);
2723 	pci_write_config(dev, pm->pm_reg, base, 4);
2724 	if (ln2range == 64)
2725 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2726 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2727 	if (ln2range == 64)
2728 		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2729 		    pm->pm_reg + 4, 4) << 32;
2730 }
2731 
2732 struct pci_map *
2733 pci_find_bar(device_t dev, int reg)
2734 {
2735 	struct pci_devinfo *dinfo;
2736 	struct pci_map *pm;
2737 
2738 	dinfo = device_get_ivars(dev);
2739 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2740 		if (pm->pm_reg == reg)
2741 			return (pm);
2742 	}
2743 	return (NULL);
2744 }
2745 
2746 int
2747 pci_bar_enabled(device_t dev, struct pci_map *pm)
2748 {
2749 	struct pci_devinfo *dinfo;
2750 	uint16_t cmd;
2751 
2752 	dinfo = device_get_ivars(dev);
2753 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2754 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2755 		return (0);
2756 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2757 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2758 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2759 	else
2760 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2761 }
2762 
2763 struct pci_map *
2764 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2765 {
2766 	struct pci_devinfo *dinfo;
2767 	struct pci_map *pm, *prev;
2768 
2769 	dinfo = device_get_ivars(dev);
2770 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2771 	pm->pm_reg = reg;
2772 	pm->pm_value = value;
2773 	pm->pm_size = size;
2774 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2775 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2776 		    reg));
2777 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2778 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2779 			break;
2780 	}
2781 	if (prev != NULL)
2782 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2783 	else
2784 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2785 	return (pm);
2786 }
2787 
2788 static void
2789 pci_restore_bars(device_t dev)
2790 {
2791 	struct pci_devinfo *dinfo;
2792 	struct pci_map *pm;
2793 	int ln2range;
2794 
2795 	dinfo = device_get_ivars(dev);
2796 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2797 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2798 			ln2range = 32;
2799 		else
2800 			ln2range = pci_maprange(pm->pm_value);
2801 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2802 		if (ln2range == 64)
2803 			pci_write_config(dev, pm->pm_reg + 4,
2804 			    pm->pm_value >> 32, 4);
2805 	}
2806 }
2807 
2808 /*
2809  * Add a resource based on a pci map register. Return 1 if the map
2810  * register is a 32bit map register or 2 if it is a 64bit register.
2811  */
2812 static int
2813 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2814     int force, int prefetch)
2815 {
2816 	struct pci_map *pm;
2817 	pci_addr_t base, map, testval;
2818 	pci_addr_t start, end, count;
2819 	int barlen, basezero, flags, maprange, mapsize, type;
2820 	uint16_t cmd;
2821 	struct resource *res;
2822 
2823 	/*
2824 	 * The BAR may already exist if the device is a CardBus card
2825 	 * whose CIS is stored in this BAR.
2826 	 */
2827 	pm = pci_find_bar(dev, reg);
2828 	if (pm != NULL) {
2829 		maprange = pci_maprange(pm->pm_value);
2830 		barlen = maprange == 64 ? 2 : 1;
2831 		return (barlen);
2832 	}
2833 
2834 	pci_read_bar(dev, reg, &map, &testval, NULL);
2835 	if (PCI_BAR_MEM(map)) {
2836 		type = SYS_RES_MEMORY;
2837 		if (map & PCIM_BAR_MEM_PREFETCH)
2838 			prefetch = 1;
2839 	} else
2840 		type = SYS_RES_IOPORT;
2841 	mapsize = pci_mapsize(testval);
2842 	base = pci_mapbase(map);
2843 #ifdef __PCI_BAR_ZERO_VALID
2844 	basezero = 0;
2845 #else
2846 	basezero = base == 0;
2847 #endif
2848 	maprange = pci_maprange(map);
2849 	barlen = maprange == 64 ? 2 : 1;
2850 
2851 	/*
2852 	 * For I/O registers, if bottom bit is set, and the next bit up
2853 	 * isn't clear, we know we have a BAR that doesn't conform to the
2854 	 * spec, so ignore it.  Also, sanity check the size of the data
2855 	 * areas to the type of memory involved.  Memory must be at least
2856 	 * 16 bytes in size, while I/O ranges must be at least 4.
2857 	 */
2858 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2859 		return (barlen);
2860 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2861 	    (type == SYS_RES_IOPORT && mapsize < 2))
2862 		return (barlen);
2863 
2864 	/* Save a record of this BAR. */
2865 	pm = pci_add_bar(dev, reg, map, mapsize);
2866 	if (bootverbose) {
2867 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2868 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2869 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2870 			printf(", port disabled\n");
2871 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2872 			printf(", memory disabled\n");
2873 		else
2874 			printf(", enabled\n");
2875 	}
2876 
2877 	/*
2878 	 * If base is 0, then we have problems if this architecture does
2879 	 * not allow that.  It is best to ignore such entries for the
2880 	 * moment.  These will be allocated later if the driver specifically
2881 	 * requests them.  However, some removable busses look better when
2882 	 * all resources are allocated, so allow '0' to be overriden.
2883 	 *
2884 	 * Similarly treat maps whose values is the same as the test value
2885 	 * read back.  These maps have had all f's written to them by the
2886 	 * BIOS in an attempt to disable the resources.
2887 	 */
2888 	if (!force && (basezero || map == testval))
2889 		return (barlen);
2890 	if ((u_long)base != base) {
2891 		device_printf(bus,
2892 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2893 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2894 		    pci_get_function(dev), reg);
2895 		return (barlen);
2896 	}
2897 
2898 	/*
2899 	 * This code theoretically does the right thing, but has
2900 	 * undesirable side effects in some cases where peripherals
2901 	 * respond oddly to having these bits enabled.  Let the user
2902 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2903 	 * default).
2904 	 */
2905 	if (pci_enable_io_modes) {
2906 		/* Turn on resources that have been left off by a lazy BIOS */
2907 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2908 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2909 			cmd |= PCIM_CMD_PORTEN;
2910 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2911 		}
2912 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2913 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2914 			cmd |= PCIM_CMD_MEMEN;
2915 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2916 		}
2917 	} else {
2918 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2919 			return (barlen);
2920 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2921 			return (barlen);
2922 	}
2923 
2924 	count = (pci_addr_t)1 << mapsize;
2925 	flags = RF_ALIGNMENT_LOG2(mapsize);
2926 	if (prefetch)
2927 		flags |= RF_PREFETCHABLE;
2928 	if (basezero || base == pci_mapbase(testval) || pci_clear_bars) {
2929 		start = 0;	/* Let the parent decide. */
2930 		end = ~0ul;
2931 	} else {
2932 		start = base;
2933 		end = base + count - 1;
2934 	}
2935 	resource_list_add(rl, type, reg, start, end, count);
2936 
2937 	/*
2938 	 * Try to allocate the resource for this BAR from our parent
2939 	 * so that this resource range is already reserved.  The
2940 	 * driver for this device will later inherit this resource in
2941 	 * pci_alloc_resource().
2942 	 */
2943 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2944 	    flags);
2945 	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0ul)) {
2946 		/*
2947 		 * If the allocation fails, try to allocate a resource for
2948 		 * this BAR using any available range.  The firmware felt
2949 		 * it was important enough to assign a resource, so don't
2950 		 * disable decoding if we can help it.
2951 		 */
2952 		resource_list_delete(rl, type, reg);
2953 		resource_list_add(rl, type, reg, 0, ~0ul, count);
2954 		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0ul,
2955 		    count, flags);
2956 	}
2957 	if (res == NULL) {
2958 		/*
2959 		 * If the allocation fails, delete the resource list entry
2960 		 * and disable decoding for this device.
2961 		 *
2962 		 * If the driver requests this resource in the future,
2963 		 * pci_reserve_map() will try to allocate a fresh
2964 		 * resource range.
2965 		 */
2966 		resource_list_delete(rl, type, reg);
2967 		pci_disable_io(dev, type);
2968 		if (bootverbose)
2969 			device_printf(bus,
2970 			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
2971 			    pci_get_domain(dev), pci_get_bus(dev),
2972 			    pci_get_slot(dev), pci_get_function(dev), reg);
2973 	} else {
2974 		start = rman_get_start(res);
2975 		pci_write_bar(dev, pm, start);
2976 	}
2977 	return (barlen);
2978 }
2979 
2980 /*
2981  * For ATA devices we need to decide early what addressing mode to use.
2982  * Legacy demands that the primary and secondary ATA ports sits on the
2983  * same addresses that old ISA hardware did. This dictates that we use
2984  * those addresses and ignore the BAR's if we cannot set PCI native
2985  * addressing mode.
2986  */
2987 static void
2988 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2989     uint32_t prefetchmask)
2990 {
2991 	int rid, type, progif;
2992 #if 0
2993 	/* if this device supports PCI native addressing use it */
2994 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2995 	if ((progif & 0x8a) == 0x8a) {
2996 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2997 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2998 			printf("Trying ATA native PCI addressing mode\n");
2999 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
3000 		}
3001 	}
3002 #endif
3003 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3004 	type = SYS_RES_IOPORT;
3005 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
3006 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
3007 		    prefetchmask & (1 << 0));
3008 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
3009 		    prefetchmask & (1 << 1));
3010 	} else {
3011 		rid = PCIR_BAR(0);
3012 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
3013 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
3014 		    0x1f7, 8, 0);
3015 		rid = PCIR_BAR(1);
3016 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
3017 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
3018 		    0x3f6, 1, 0);
3019 	}
3020 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
3021 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
3022 		    prefetchmask & (1 << 2));
3023 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
3024 		    prefetchmask & (1 << 3));
3025 	} else {
3026 		rid = PCIR_BAR(2);
3027 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
3028 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
3029 		    0x177, 8, 0);
3030 		rid = PCIR_BAR(3);
3031 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
3032 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
3033 		    0x376, 1, 0);
3034 	}
3035 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
3036 	    prefetchmask & (1 << 4));
3037 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
3038 	    prefetchmask & (1 << 5));
3039 }
3040 
3041 static void
3042 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
3043 {
3044 	struct pci_devinfo *dinfo = device_get_ivars(dev);
3045 	pcicfgregs *cfg = &dinfo->cfg;
3046 	char tunable_name[64];
3047 	int irq;
3048 
3049 	/* Has to have an intpin to have an interrupt. */
3050 	if (cfg->intpin == 0)
3051 		return;
3052 
3053 	/* Let the user override the IRQ with a tunable. */
3054 	irq = PCI_INVALID_IRQ;
3055 	snprintf(tunable_name, sizeof(tunable_name),
3056 	    "hw.pci%d.%d.%d.INT%c.irq",
3057 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
3058 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
3059 		irq = PCI_INVALID_IRQ;
3060 
3061 	/*
3062 	 * If we didn't get an IRQ via the tunable, then we either use the
3063 	 * IRQ value in the intline register or we ask the bus to route an
3064 	 * interrupt for us.  If force_route is true, then we only use the
3065 	 * value in the intline register if the bus was unable to assign an
3066 	 * IRQ.
3067 	 */
3068 	if (!PCI_INTERRUPT_VALID(irq)) {
3069 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
3070 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
3071 		if (!PCI_INTERRUPT_VALID(irq))
3072 			irq = cfg->intline;
3073 	}
3074 
3075 	/* If after all that we don't have an IRQ, just bail. */
3076 	if (!PCI_INTERRUPT_VALID(irq))
3077 		return;
3078 
3079 	/* Update the config register if it changed. */
3080 	if (irq != cfg->intline) {
3081 		cfg->intline = irq;
3082 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
3083 	}
3084 
3085 	/* Add this IRQ as rid 0 interrupt resource. */
3086 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3087 }
3088 
3089 /* Perform early OHCI takeover from SMM. */
3090 static void
3091 ohci_early_takeover(device_t self)
3092 {
3093 	struct resource *res;
3094 	uint32_t ctl;
3095 	int rid;
3096 	int i;
3097 
3098 	rid = PCIR_BAR(0);
3099 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3100 	if (res == NULL)
3101 		return;
3102 
3103 	ctl = bus_read_4(res, OHCI_CONTROL);
3104 	if (ctl & OHCI_IR) {
3105 		if (bootverbose)
3106 			printf("ohci early: "
3107 			    "SMM active, request owner change\n");
3108 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3109 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3110 			DELAY(1000);
3111 			ctl = bus_read_4(res, OHCI_CONTROL);
3112 		}
3113 		if (ctl & OHCI_IR) {
3114 			if (bootverbose)
3115 				printf("ohci early: "
3116 				    "SMM does not respond, resetting\n");
3117 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3118 		}
3119 		/* Disable interrupts */
3120 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3121 	}
3122 
3123 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3124 }
3125 
3126 /* Perform early UHCI takeover from SMM. */
3127 static void
3128 uhci_early_takeover(device_t self)
3129 {
3130 	struct resource *res;
3131 	int rid;
3132 
3133 	/*
3134 	 * Set the PIRQD enable bit and switch off all the others. We don't
3135 	 * want legacy support to interfere with us XXX Does this also mean
3136 	 * that the BIOS won't touch the keyboard anymore if it is connected
3137 	 * to the ports of the root hub?
3138 	 */
3139 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3140 
3141 	/* Disable interrupts */
3142 	rid = PCI_UHCI_BASE_REG;
3143 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3144 	if (res != NULL) {
3145 		bus_write_2(res, UHCI_INTR, 0);
3146 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3147 	}
3148 }
3149 
3150 /* Perform early EHCI takeover from SMM. */
3151 static void
3152 ehci_early_takeover(device_t self)
3153 {
3154 	struct resource *res;
3155 	uint32_t cparams;
3156 	uint32_t eec;
3157 	uint8_t eecp;
3158 	uint8_t bios_sem;
3159 	uint8_t offs;
3160 	int rid;
3161 	int i;
3162 
3163 	rid = PCIR_BAR(0);
3164 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3165 	if (res == NULL)
3166 		return;
3167 
3168 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3169 
3170 	/* Synchronise with the BIOS if it owns the controller. */
3171 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3172 	    eecp = EHCI_EECP_NEXT(eec)) {
3173 		eec = pci_read_config(self, eecp, 4);
3174 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3175 			continue;
3176 		}
3177 		bios_sem = pci_read_config(self, eecp +
3178 		    EHCI_LEGSUP_BIOS_SEM, 1);
3179 		if (bios_sem == 0) {
3180 			continue;
3181 		}
3182 		if (bootverbose)
3183 			printf("ehci early: "
3184 			    "SMM active, request owner change\n");
3185 
3186 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3187 
3188 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3189 			DELAY(1000);
3190 			bios_sem = pci_read_config(self, eecp +
3191 			    EHCI_LEGSUP_BIOS_SEM, 1);
3192 		}
3193 
3194 		if (bios_sem != 0) {
3195 			if (bootverbose)
3196 				printf("ehci early: "
3197 				    "SMM does not respond\n");
3198 		}
3199 		/* Disable interrupts */
3200 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3201 		bus_write_4(res, offs + EHCI_USBINTR, 0);
3202 	}
3203 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3204 }
3205 
3206 /* Perform early XHCI takeover from SMM. */
3207 static void
3208 xhci_early_takeover(device_t self)
3209 {
3210 	struct resource *res;
3211 	uint32_t cparams;
3212 	uint32_t eec;
3213 	uint8_t eecp;
3214 	uint8_t bios_sem;
3215 	uint8_t offs;
3216 	int rid;
3217 	int i;
3218 
3219 	rid = PCIR_BAR(0);
3220 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3221 	if (res == NULL)
3222 		return;
3223 
3224 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3225 
3226 	eec = -1;
3227 
3228 	/* Synchronise with the BIOS if it owns the controller. */
3229 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3230 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3231 		eec = bus_read_4(res, eecp);
3232 
3233 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3234 			continue;
3235 
3236 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3237 		if (bios_sem == 0)
3238 			continue;
3239 
3240 		if (bootverbose)
3241 			printf("xhci early: "
3242 			    "SMM active, request owner change\n");
3243 
3244 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3245 
3246 		/* wait a maximum of 5 second */
3247 
3248 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3249 			DELAY(1000);
3250 			bios_sem = bus_read_1(res, eecp +
3251 			    XHCI_XECP_BIOS_SEM);
3252 		}
3253 
3254 		if (bios_sem != 0) {
3255 			if (bootverbose)
3256 				printf("xhci early: "
3257 				    "SMM does not respond\n");
3258 		}
3259 
3260 		/* Disable interrupts */
3261 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3262 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3263 		bus_read_4(res, offs + XHCI_USBSTS);
3264 	}
3265 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3266 }
3267 
3268 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3269 static void
3270 pci_reserve_secbus(device_t bus, device_t dev, pcicfgregs *cfg,
3271     struct resource_list *rl)
3272 {
3273 	struct resource *res;
3274 	char *cp;
3275 	u_long start, end, count;
3276 	int rid, sec_bus, sec_reg, sub_bus, sub_reg, sup_bus;
3277 
3278 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3279 	case PCIM_HDRTYPE_BRIDGE:
3280 		sec_reg = PCIR_SECBUS_1;
3281 		sub_reg = PCIR_SUBBUS_1;
3282 		break;
3283 	case PCIM_HDRTYPE_CARDBUS:
3284 		sec_reg = PCIR_SECBUS_2;
3285 		sub_reg = PCIR_SUBBUS_2;
3286 		break;
3287 	default:
3288 		return;
3289 	}
3290 
3291 	/*
3292 	 * If the existing bus range is valid, attempt to reserve it
3293 	 * from our parent.  If this fails for any reason, clear the
3294 	 * secbus and subbus registers.
3295 	 *
3296 	 * XXX: Should we reset sub_bus to sec_bus if it is < sec_bus?
3297 	 * This would at least preserve the existing sec_bus if it is
3298 	 * valid.
3299 	 */
3300 	sec_bus = PCI_READ_CONFIG(bus, dev, sec_reg, 1);
3301 	sub_bus = PCI_READ_CONFIG(bus, dev, sub_reg, 1);
3302 
3303 	/* Quirk handling. */
3304 	switch (pci_get_devid(dev)) {
3305 	case 0x12258086:		/* Intel 82454KX/GX (Orion) */
3306 		sup_bus = pci_read_config(dev, 0x41, 1);
3307 		if (sup_bus != 0xff) {
3308 			sec_bus = sup_bus + 1;
3309 			sub_bus = sup_bus + 1;
3310 			PCI_WRITE_CONFIG(bus, dev, sec_reg, sec_bus, 1);
3311 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3312 		}
3313 		break;
3314 
3315 	case 0x00dd10de:
3316 		/* Compaq R3000 BIOS sets wrong subordinate bus number. */
3317 		if ((cp = kern_getenv("smbios.planar.maker")) == NULL)
3318 			break;
3319 		if (strncmp(cp, "Compal", 6) != 0) {
3320 			freeenv(cp);
3321 			break;
3322 		}
3323 		freeenv(cp);
3324 		if ((cp = kern_getenv("smbios.planar.product")) == NULL)
3325 			break;
3326 		if (strncmp(cp, "08A0", 4) != 0) {
3327 			freeenv(cp);
3328 			break;
3329 		}
3330 		freeenv(cp);
3331 		if (sub_bus < 0xa) {
3332 			sub_bus = 0xa;
3333 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3334 		}
3335 		break;
3336 	}
3337 
3338 	if (bootverbose)
3339 		printf("\tsecbus=%d, subbus=%d\n", sec_bus, sub_bus);
3340 	if (sec_bus > 0 && sub_bus >= sec_bus) {
3341 		start = sec_bus;
3342 		end = sub_bus;
3343 		count = end - start + 1;
3344 
3345 		resource_list_add(rl, PCI_RES_BUS, 0, 0ul, ~0ul, count);
3346 
3347 		/*
3348 		 * If requested, clear secondary bus registers in
3349 		 * bridge devices to force a complete renumbering
3350 		 * rather than reserving the existing range.  However,
3351 		 * preserve the existing size.
3352 		 */
3353 		if (pci_clear_buses)
3354 			goto clear;
3355 
3356 		rid = 0;
3357 		res = resource_list_reserve(rl, bus, dev, PCI_RES_BUS, &rid,
3358 		    start, end, count, 0);
3359 		if (res != NULL)
3360 			return;
3361 
3362 		if (bootverbose)
3363 			device_printf(bus,
3364 			    "pci%d:%d:%d:%d secbus failed to allocate\n",
3365 			    pci_get_domain(dev), pci_get_bus(dev),
3366 			    pci_get_slot(dev), pci_get_function(dev));
3367 	}
3368 
3369 clear:
3370 	PCI_WRITE_CONFIG(bus, dev, sec_reg, 0, 1);
3371 	PCI_WRITE_CONFIG(bus, dev, sub_reg, 0, 1);
3372 }
3373 
3374 static struct resource *
3375 pci_alloc_secbus(device_t dev, device_t child, int *rid, u_long start,
3376     u_long end, u_long count, u_int flags)
3377 {
3378 	struct pci_devinfo *dinfo;
3379 	pcicfgregs *cfg;
3380 	struct resource_list *rl;
3381 	struct resource *res;
3382 	int sec_reg, sub_reg;
3383 
3384 	dinfo = device_get_ivars(child);
3385 	cfg = &dinfo->cfg;
3386 	rl = &dinfo->resources;
3387 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3388 	case PCIM_HDRTYPE_BRIDGE:
3389 		sec_reg = PCIR_SECBUS_1;
3390 		sub_reg = PCIR_SUBBUS_1;
3391 		break;
3392 	case PCIM_HDRTYPE_CARDBUS:
3393 		sec_reg = PCIR_SECBUS_2;
3394 		sub_reg = PCIR_SUBBUS_2;
3395 		break;
3396 	default:
3397 		return (NULL);
3398 	}
3399 
3400 	if (*rid != 0)
3401 		return (NULL);
3402 
3403 	if (resource_list_find(rl, PCI_RES_BUS, *rid) == NULL)
3404 		resource_list_add(rl, PCI_RES_BUS, *rid, start, end, count);
3405 	if (!resource_list_reserved(rl, PCI_RES_BUS, *rid)) {
3406 		res = resource_list_reserve(rl, dev, child, PCI_RES_BUS, rid,
3407 		    start, end, count, flags & ~RF_ACTIVE);
3408 		if (res == NULL) {
3409 			resource_list_delete(rl, PCI_RES_BUS, *rid);
3410 			device_printf(child, "allocating %lu bus%s failed\n",
3411 			    count, count == 1 ? "" : "es");
3412 			return (NULL);
3413 		}
3414 		if (bootverbose)
3415 			device_printf(child,
3416 			    "Lazy allocation of %lu bus%s at %lu\n", count,
3417 			    count == 1 ? "" : "es", rman_get_start(res));
3418 		PCI_WRITE_CONFIG(dev, child, sec_reg, rman_get_start(res), 1);
3419 		PCI_WRITE_CONFIG(dev, child, sub_reg, rman_get_end(res), 1);
3420 	}
3421 	return (resource_list_alloc(rl, dev, child, PCI_RES_BUS, rid, start,
3422 	    end, count, flags));
3423 }
3424 #endif
3425 
3426 void
3427 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3428 {
3429 	struct pci_devinfo *dinfo;
3430 	pcicfgregs *cfg;
3431 	struct resource_list *rl;
3432 	const struct pci_quirk *q;
3433 	uint32_t devid;
3434 	int i;
3435 
3436 	dinfo = device_get_ivars(dev);
3437 	cfg = &dinfo->cfg;
3438 	rl = &dinfo->resources;
3439 	devid = (cfg->device << 16) | cfg->vendor;
3440 
3441 	/* ATA devices needs special map treatment */
3442 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3443 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3444 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3445 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3446 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3447 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3448 	else
3449 		for (i = 0; i < cfg->nummaps;) {
3450 			/*
3451 			 * Skip quirked resources.
3452 			 */
3453 			for (q = &pci_quirks[0]; q->devid != 0; q++)
3454 				if (q->devid == devid &&
3455 				    q->type == PCI_QUIRK_UNMAP_REG &&
3456 				    q->arg1 == PCIR_BAR(i))
3457 					break;
3458 			if (q->devid != 0) {
3459 				i++;
3460 				continue;
3461 			}
3462 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3463 			    prefetchmask & (1 << i));
3464 		}
3465 
3466 	/*
3467 	 * Add additional, quirked resources.
3468 	 */
3469 	for (q = &pci_quirks[0]; q->devid != 0; q++)
3470 		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3471 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3472 
3473 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3474 #ifdef __PCI_REROUTE_INTERRUPT
3475 		/*
3476 		 * Try to re-route interrupts. Sometimes the BIOS or
3477 		 * firmware may leave bogus values in these registers.
3478 		 * If the re-route fails, then just stick with what we
3479 		 * have.
3480 		 */
3481 		pci_assign_interrupt(bus, dev, 1);
3482 #else
3483 		pci_assign_interrupt(bus, dev, 0);
3484 #endif
3485 	}
3486 
3487 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3488 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3489 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3490 			xhci_early_takeover(dev);
3491 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3492 			ehci_early_takeover(dev);
3493 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3494 			ohci_early_takeover(dev);
3495 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3496 			uhci_early_takeover(dev);
3497 	}
3498 
3499 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3500 	/*
3501 	 * Reserve resources for secondary bus ranges behind bridge
3502 	 * devices.
3503 	 */
3504 	pci_reserve_secbus(bus, dev, cfg, rl);
3505 #endif
3506 }
3507 
3508 static struct pci_devinfo *
3509 pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
3510     int slot, int func, size_t dinfo_size)
3511 {
3512 	struct pci_devinfo *dinfo;
3513 
3514 	dinfo = pci_read_device(pcib, domain, busno, slot, func, dinfo_size);
3515 	if (dinfo != NULL)
3516 		pci_add_child(dev, dinfo);
3517 
3518 	return (dinfo);
3519 }
3520 
3521 void
3522 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3523 {
3524 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3525 	device_t pcib = device_get_parent(dev);
3526 	struct pci_devinfo *dinfo;
3527 	int maxslots;
3528 	int s, f, pcifunchigh;
3529 	uint8_t hdrtype;
3530 	int first_func;
3531 
3532 	/*
3533 	 * Try to detect a device at slot 0, function 0.  If it exists, try to
3534 	 * enable ARI.  We must enable ARI before detecting the rest of the
3535 	 * functions on this bus as ARI changes the set of slots and functions
3536 	 * that are legal on this bus.
3537 	 */
3538 	dinfo = pci_identify_function(pcib, dev, domain, busno, 0, 0,
3539 	    dinfo_size);
3540 	if (dinfo != NULL && pci_enable_ari)
3541 		PCIB_TRY_ENABLE_ARI(pcib, dinfo->cfg.dev);
3542 
3543 	/*
3544 	 * Start looking for new devices on slot 0 at function 1 because we
3545 	 * just identified the device at slot 0, function 0.
3546 	 */
3547 	first_func = 1;
3548 
3549 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3550 	    ("dinfo_size too small"));
3551 	maxslots = PCIB_MAXSLOTS(pcib);
3552 	for (s = 0; s <= maxslots; s++, first_func = 0) {
3553 		pcifunchigh = 0;
3554 		f = 0;
3555 		DELAY(1);
3556 		hdrtype = REG(PCIR_HDRTYPE, 1);
3557 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3558 			continue;
3559 		if (hdrtype & PCIM_MFDEV)
3560 			pcifunchigh = PCIB_MAXFUNCS(pcib);
3561 		for (f = first_func; f <= pcifunchigh; f++)
3562 			pci_identify_function(pcib, dev, domain, busno, s, f,
3563 			    dinfo_size);
3564 	}
3565 #undef REG
3566 }
3567 
3568 #ifdef PCI_IOV
3569 device_t
3570 pci_add_iov_child(device_t bus, device_t pf, size_t size, uint16_t rid,
3571     uint16_t vid, uint16_t did)
3572 {
3573 	struct pci_devinfo *pf_dinfo, *vf_dinfo;
3574 	device_t pcib;
3575 	int busno, slot, func;
3576 
3577 	pf_dinfo = device_get_ivars(pf);
3578 
3579 	/*
3580 	 * Do a sanity check that we have been passed the correct size.  If this
3581 	 * test fails then likely the pci subclass hasn't implemented the
3582 	 * pci_create_iov_child method like it's supposed it.
3583 	 */
3584 	if (size != pf_dinfo->cfg.devinfo_size) {
3585 		device_printf(pf,
3586 		    "PCI subclass does not properly implement PCI_IOV\n");
3587 		return (NULL);
3588 	}
3589 
3590 	pcib = device_get_parent(bus);
3591 
3592 	PCIB_DECODE_RID(pcib, rid, &busno, &slot, &func);
3593 
3594 	vf_dinfo = pci_fill_devinfo(pcib, pci_get_domain(pcib), busno, slot, func,
3595 	    vid, did, size);
3596 
3597 	vf_dinfo->cfg.flags |= PCICFG_VF;
3598 	pci_add_child(bus, vf_dinfo);
3599 
3600 	return (vf_dinfo->cfg.dev);
3601 }
3602 
3603 device_t
3604 pci_create_iov_child_method(device_t bus, device_t pf, uint16_t rid,
3605     uint16_t vid, uint16_t did)
3606 {
3607 
3608 	return (pci_add_iov_child(bus, pf, sizeof(struct pci_devinfo), rid, vid,
3609 	    did));
3610 }
3611 #endif
3612 
3613 void
3614 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3615 {
3616 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3617 	device_set_ivars(dinfo->cfg.dev, dinfo);
3618 	resource_list_init(&dinfo->resources);
3619 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3620 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3621 	pci_print_verbose(dinfo);
3622 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3623 	pci_child_added(dinfo->cfg.dev);
3624 }
3625 
3626 void
3627 pci_child_added_method(device_t dev, device_t child)
3628 {
3629 
3630 }
3631 
3632 static int
3633 pci_probe(device_t dev)
3634 {
3635 
3636 	device_set_desc(dev, "PCI bus");
3637 
3638 	/* Allow other subclasses to override this driver. */
3639 	return (BUS_PROBE_GENERIC);
3640 }
3641 
3642 int
3643 pci_attach_common(device_t dev)
3644 {
3645 	struct pci_softc *sc;
3646 	int busno, domain;
3647 #ifdef PCI_DMA_BOUNDARY
3648 	int error, tag_valid;
3649 #endif
3650 #ifdef PCI_RES_BUS
3651 	int rid;
3652 #endif
3653 
3654 	sc = device_get_softc(dev);
3655 	domain = pcib_get_domain(dev);
3656 	busno = pcib_get_bus(dev);
3657 #ifdef PCI_RES_BUS
3658 	rid = 0;
3659 	sc->sc_bus = bus_alloc_resource(dev, PCI_RES_BUS, &rid, busno, busno,
3660 	    1, 0);
3661 	if (sc->sc_bus == NULL) {
3662 		device_printf(dev, "failed to allocate bus number\n");
3663 		return (ENXIO);
3664 	}
3665 #endif
3666 	if (bootverbose)
3667 		device_printf(dev, "domain=%d, physical bus=%d\n",
3668 		    domain, busno);
3669 #ifdef PCI_DMA_BOUNDARY
3670 	tag_valid = 0;
3671 	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
3672 	    devclass_find("pci")) {
3673 		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
3674 		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3675 		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
3676 		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
3677 		if (error)
3678 			device_printf(dev, "Failed to create DMA tag: %d\n",
3679 			    error);
3680 		else
3681 			tag_valid = 1;
3682 	}
3683 	if (!tag_valid)
3684 #endif
3685 		sc->sc_dma_tag = bus_get_dma_tag(dev);
3686 	return (0);
3687 }
3688 
3689 static int
3690 pci_attach(device_t dev)
3691 {
3692 	int busno, domain, error;
3693 
3694 	error = pci_attach_common(dev);
3695 	if (error)
3696 		return (error);
3697 
3698 	/*
3699 	 * Since there can be multiple independantly numbered PCI
3700 	 * busses on systems with multiple PCI domains, we can't use
3701 	 * the unit number to decide which bus we are probing. We ask
3702 	 * the parent pcib what our domain and bus numbers are.
3703 	 */
3704 	domain = pcib_get_domain(dev);
3705 	busno = pcib_get_bus(dev);
3706 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3707 	return (bus_generic_attach(dev));
3708 }
3709 
3710 #ifdef PCI_RES_BUS
3711 static int
3712 pci_detach(device_t dev)
3713 {
3714 	struct pci_softc *sc;
3715 	int error;
3716 
3717 	error = bus_generic_detach(dev);
3718 	if (error)
3719 		return (error);
3720 	sc = device_get_softc(dev);
3721 	return (bus_release_resource(dev, PCI_RES_BUS, 0, sc->sc_bus));
3722 }
3723 #endif
3724 
3725 static void
3726 pci_set_power_child(device_t dev, device_t child, int state)
3727 {
3728 	device_t pcib;
3729 	int dstate;
3730 
3731 	/*
3732 	 * Set the device to the given state.  If the firmware suggests
3733 	 * a different power state, use it instead.  If power management
3734 	 * is not present, the firmware is responsible for managing
3735 	 * device power.  Skip children who aren't attached since they
3736 	 * are handled separately.
3737 	 */
3738 	pcib = device_get_parent(dev);
3739 	dstate = state;
3740 	if (device_is_attached(child) &&
3741 	    PCIB_POWER_FOR_SLEEP(pcib, child, &dstate) == 0)
3742 		pci_set_powerstate(child, dstate);
3743 }
3744 
3745 int
3746 pci_suspend_child(device_t dev, device_t child)
3747 {
3748 	struct pci_devinfo *dinfo;
3749 	int error;
3750 
3751 	dinfo = device_get_ivars(child);
3752 
3753 	/*
3754 	 * Save the PCI configuration space for the child and set the
3755 	 * device in the appropriate power state for this sleep state.
3756 	 */
3757 	pci_cfg_save(child, dinfo, 0);
3758 
3759 	/* Suspend devices before potentially powering them down. */
3760 	error = bus_generic_suspend_child(dev, child);
3761 
3762 	if (error)
3763 		return (error);
3764 
3765 	if (pci_do_power_suspend)
3766 		pci_set_power_child(dev, child, PCI_POWERSTATE_D3);
3767 
3768 	return (0);
3769 }
3770 
3771 int
3772 pci_resume_child(device_t dev, device_t child)
3773 {
3774 	struct pci_devinfo *dinfo;
3775 
3776 	if (pci_do_power_resume)
3777 		pci_set_power_child(dev, child, PCI_POWERSTATE_D0);
3778 
3779 	dinfo = device_get_ivars(child);
3780 	pci_cfg_restore(child, dinfo);
3781 	if (!device_is_attached(child))
3782 		pci_cfg_save(child, dinfo, 1);
3783 
3784 	bus_generic_resume_child(dev, child);
3785 
3786 	return (0);
3787 }
3788 
3789 int
3790 pci_resume(device_t dev)
3791 {
3792 	device_t child, *devlist;
3793 	int error, i, numdevs;
3794 
3795 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3796 		return (error);
3797 
3798 	/*
3799 	 * Resume critical devices first, then everything else later.
3800 	 */
3801 	for (i = 0; i < numdevs; i++) {
3802 		child = devlist[i];
3803 		switch (pci_get_class(child)) {
3804 		case PCIC_DISPLAY:
3805 		case PCIC_MEMORY:
3806 		case PCIC_BRIDGE:
3807 		case PCIC_BASEPERIPH:
3808 			BUS_RESUME_CHILD(dev, child);
3809 			break;
3810 		}
3811 	}
3812 	for (i = 0; i < numdevs; i++) {
3813 		child = devlist[i];
3814 		switch (pci_get_class(child)) {
3815 		case PCIC_DISPLAY:
3816 		case PCIC_MEMORY:
3817 		case PCIC_BRIDGE:
3818 		case PCIC_BASEPERIPH:
3819 			break;
3820 		default:
3821 			BUS_RESUME_CHILD(dev, child);
3822 		}
3823 	}
3824 	free(devlist, M_TEMP);
3825 	return (0);
3826 }
3827 
3828 static void
3829 pci_load_vendor_data(void)
3830 {
3831 	caddr_t data;
3832 	void *ptr;
3833 	size_t sz;
3834 
3835 	data = preload_search_by_type("pci_vendor_data");
3836 	if (data != NULL) {
3837 		ptr = preload_fetch_addr(data);
3838 		sz = preload_fetch_size(data);
3839 		if (ptr != NULL && sz != 0) {
3840 			pci_vendordata = ptr;
3841 			pci_vendordata_size = sz;
3842 			/* terminate the database */
3843 			pci_vendordata[pci_vendordata_size] = '\n';
3844 		}
3845 	}
3846 }
3847 
3848 void
3849 pci_driver_added(device_t dev, driver_t *driver)
3850 {
3851 	int numdevs;
3852 	device_t *devlist;
3853 	device_t child;
3854 	struct pci_devinfo *dinfo;
3855 	int i;
3856 
3857 	if (bootverbose)
3858 		device_printf(dev, "driver added\n");
3859 	DEVICE_IDENTIFY(driver, dev);
3860 	if (device_get_children(dev, &devlist, &numdevs) != 0)
3861 		return;
3862 	for (i = 0; i < numdevs; i++) {
3863 		child = devlist[i];
3864 		if (device_get_state(child) != DS_NOTPRESENT)
3865 			continue;
3866 		dinfo = device_get_ivars(child);
3867 		pci_print_verbose(dinfo);
3868 		if (bootverbose)
3869 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3870 		pci_cfg_restore(child, dinfo);
3871 		if (device_probe_and_attach(child) != 0)
3872 			pci_child_detached(dev, child);
3873 	}
3874 	free(devlist, M_TEMP);
3875 }
3876 
3877 int
3878 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3879     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3880 {
3881 	struct pci_devinfo *dinfo;
3882 	struct msix_table_entry *mte;
3883 	struct msix_vector *mv;
3884 	uint64_t addr;
3885 	uint32_t data;
3886 	void *cookie;
3887 	int error, rid;
3888 
3889 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3890 	    arg, &cookie);
3891 	if (error)
3892 		return (error);
3893 
3894 	/* If this is not a direct child, just bail out. */
3895 	if (device_get_parent(child) != dev) {
3896 		*cookiep = cookie;
3897 		return(0);
3898 	}
3899 
3900 	rid = rman_get_rid(irq);
3901 	if (rid == 0) {
3902 		/* Make sure that INTx is enabled */
3903 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3904 	} else {
3905 		/*
3906 		 * Check to see if the interrupt is MSI or MSI-X.
3907 		 * Ask our parent to map the MSI and give
3908 		 * us the address and data register values.
3909 		 * If we fail for some reason, teardown the
3910 		 * interrupt handler.
3911 		 */
3912 		dinfo = device_get_ivars(child);
3913 		if (dinfo->cfg.msi.msi_alloc > 0) {
3914 			if (dinfo->cfg.msi.msi_addr == 0) {
3915 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3916 			    ("MSI has handlers, but vectors not mapped"));
3917 				error = PCIB_MAP_MSI(device_get_parent(dev),
3918 				    child, rman_get_start(irq), &addr, &data);
3919 				if (error)
3920 					goto bad;
3921 				dinfo->cfg.msi.msi_addr = addr;
3922 				dinfo->cfg.msi.msi_data = data;
3923 			}
3924 			if (dinfo->cfg.msi.msi_handlers == 0)
3925 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3926 				    dinfo->cfg.msi.msi_data);
3927 			dinfo->cfg.msi.msi_handlers++;
3928 		} else {
3929 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3930 			    ("No MSI or MSI-X interrupts allocated"));
3931 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3932 			    ("MSI-X index too high"));
3933 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3934 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3935 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3936 			KASSERT(mv->mv_irq == rman_get_start(irq),
3937 			    ("IRQ mismatch"));
3938 			if (mv->mv_address == 0) {
3939 				KASSERT(mte->mte_handlers == 0,
3940 		    ("MSI-X table entry has handlers, but vector not mapped"));
3941 				error = PCIB_MAP_MSI(device_get_parent(dev),
3942 				    child, rman_get_start(irq), &addr, &data);
3943 				if (error)
3944 					goto bad;
3945 				mv->mv_address = addr;
3946 				mv->mv_data = data;
3947 			}
3948 			if (mte->mte_handlers == 0) {
3949 				pci_enable_msix(child, rid - 1, mv->mv_address,
3950 				    mv->mv_data);
3951 				pci_unmask_msix(child, rid - 1);
3952 			}
3953 			mte->mte_handlers++;
3954 		}
3955 
3956 		/*
3957 		 * Make sure that INTx is disabled if we are using MSI/MSI-X,
3958 		 * unless the device is affected by PCI_QUIRK_MSI_INTX_BUG,
3959 		 * in which case we "enable" INTx so MSI/MSI-X actually works.
3960 		 */
3961 		if (!pci_has_quirk(pci_get_devid(child),
3962 		    PCI_QUIRK_MSI_INTX_BUG))
3963 			pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3964 		else
3965 			pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3966 	bad:
3967 		if (error) {
3968 			(void)bus_generic_teardown_intr(dev, child, irq,
3969 			    cookie);
3970 			return (error);
3971 		}
3972 	}
3973 	*cookiep = cookie;
3974 	return (0);
3975 }
3976 
3977 int
3978 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3979     void *cookie)
3980 {
3981 	struct msix_table_entry *mte;
3982 	struct resource_list_entry *rle;
3983 	struct pci_devinfo *dinfo;
3984 	int error, rid;
3985 
3986 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3987 		return (EINVAL);
3988 
3989 	/* If this isn't a direct child, just bail out */
3990 	if (device_get_parent(child) != dev)
3991 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3992 
3993 	rid = rman_get_rid(irq);
3994 	if (rid == 0) {
3995 		/* Mask INTx */
3996 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3997 	} else {
3998 		/*
3999 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
4000 		 * decrement the appropriate handlers count and mask the
4001 		 * MSI-X message, or disable MSI messages if the count
4002 		 * drops to 0.
4003 		 */
4004 		dinfo = device_get_ivars(child);
4005 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
4006 		if (rle->res != irq)
4007 			return (EINVAL);
4008 		if (dinfo->cfg.msi.msi_alloc > 0) {
4009 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
4010 			    ("MSI-X index too high"));
4011 			if (dinfo->cfg.msi.msi_handlers == 0)
4012 				return (EINVAL);
4013 			dinfo->cfg.msi.msi_handlers--;
4014 			if (dinfo->cfg.msi.msi_handlers == 0)
4015 				pci_disable_msi(child);
4016 		} else {
4017 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4018 			    ("No MSI or MSI-X interrupts allocated"));
4019 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4020 			    ("MSI-X index too high"));
4021 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4022 			if (mte->mte_handlers == 0)
4023 				return (EINVAL);
4024 			mte->mte_handlers--;
4025 			if (mte->mte_handlers == 0)
4026 				pci_mask_msix(child, rid - 1);
4027 		}
4028 	}
4029 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
4030 	if (rid > 0)
4031 		KASSERT(error == 0,
4032 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
4033 	return (error);
4034 }
4035 
4036 int
4037 pci_print_child(device_t dev, device_t child)
4038 {
4039 	struct pci_devinfo *dinfo;
4040 	struct resource_list *rl;
4041 	int retval = 0;
4042 
4043 	dinfo = device_get_ivars(child);
4044 	rl = &dinfo->resources;
4045 
4046 	retval += bus_print_child_header(dev, child);
4047 
4048 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
4049 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
4050 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
4051 	if (device_get_flags(dev))
4052 		retval += printf(" flags %#x", device_get_flags(dev));
4053 
4054 	retval += printf(" at device %d.%d", pci_get_slot(child),
4055 	    pci_get_function(child));
4056 
4057 	retval += bus_print_child_domain(dev, child);
4058 	retval += bus_print_child_footer(dev, child);
4059 
4060 	return (retval);
4061 }
4062 
4063 static const struct
4064 {
4065 	int		class;
4066 	int		subclass;
4067 	int		report; /* 0 = bootverbose, 1 = always */
4068 	const char	*desc;
4069 } pci_nomatch_tab[] = {
4070 	{PCIC_OLD,		-1,			1, "old"},
4071 	{PCIC_OLD,		PCIS_OLD_NONVGA,	1, "non-VGA display device"},
4072 	{PCIC_OLD,		PCIS_OLD_VGA,		1, "VGA-compatible display device"},
4073 	{PCIC_STORAGE,		-1,			1, "mass storage"},
4074 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	1, "SCSI"},
4075 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	1, "ATA"},
4076 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	1, "floppy disk"},
4077 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	1, "IPI"},
4078 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	1, "RAID"},
4079 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	1, "ATA (ADMA)"},
4080 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	1, "SATA"},
4081 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	1, "SAS"},
4082 	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	1, "NVM"},
4083 	{PCIC_NETWORK,		-1,			1, "network"},
4084 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	1, "ethernet"},
4085 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	1, "token ring"},
4086 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	1, "fddi"},
4087 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	1, "ATM"},
4088 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	1, "ISDN"},
4089 	{PCIC_DISPLAY,		-1,			1, "display"},
4090 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	1, "VGA"},
4091 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	1, "XGA"},
4092 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	1, "3D"},
4093 	{PCIC_MULTIMEDIA,	-1,			1, "multimedia"},
4094 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	1, "video"},
4095 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	1, "audio"},
4096 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	1, "telephony"},
4097 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	1, "HDA"},
4098 	{PCIC_MEMORY,		-1,			1, "memory"},
4099 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	1, "RAM"},
4100 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	1, "flash"},
4101 	{PCIC_BRIDGE,		-1,			1, "bridge"},
4102 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	1, "HOST-PCI"},
4103 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	1, "PCI-ISA"},
4104 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	1, "PCI-EISA"},
4105 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	1, "PCI-MCA"},
4106 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	1, "PCI-PCI"},
4107 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	1, "PCI-PCMCIA"},
4108 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	1, "PCI-NuBus"},
4109 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	1, "PCI-CardBus"},
4110 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	1, "PCI-RACEway"},
4111 	{PCIC_SIMPLECOMM,	-1,			1, "simple comms"},
4112 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	1, "UART"},	/* could detect 16550 */
4113 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	1, "parallel port"},
4114 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	1, "multiport serial"},
4115 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	1, "generic modem"},
4116 	{PCIC_BASEPERIPH,	-1,			0, "base peripheral"},
4117 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	1, "interrupt controller"},
4118 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	1, "DMA controller"},
4119 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	1, "timer"},
4120 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	1, "realtime clock"},
4121 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	1, "PCI hot-plug controller"},
4122 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	1, "SD host controller"},
4123 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_IOMMU,	1, "IOMMU"},
4124 	{PCIC_INPUTDEV,		-1,			1, "input device"},
4125 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	1, "keyboard"},
4126 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,1, "digitizer"},
4127 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	1, "mouse"},
4128 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	1, "scanner"},
4129 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	1, "gameport"},
4130 	{PCIC_DOCKING,		-1,			1, "docking station"},
4131 	{PCIC_PROCESSOR,	-1,			1, "processor"},
4132 	{PCIC_SERIALBUS,	-1,			1, "serial bus"},
4133 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	1, "FireWire"},
4134 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	1, "AccessBus"},
4135 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	1, "SSA"},
4136 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	1, "USB"},
4137 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	1, "Fibre Channel"},
4138 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	0, "SMBus"},
4139 	{PCIC_WIRELESS,		-1,			1, "wireless controller"},
4140 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	1, "iRDA"},
4141 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	1, "IR"},
4142 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	1, "RF"},
4143 	{PCIC_INTELLIIO,	-1,			1, "intelligent I/O controller"},
4144 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	1, "I2O"},
4145 	{PCIC_SATCOM,		-1,			1, "satellite communication"},
4146 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		1, "sat TV"},
4147 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	1, "sat audio"},
4148 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	1, "sat voice"},
4149 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	1, "sat data"},
4150 	{PCIC_CRYPTO,		-1,			1, "encrypt/decrypt"},
4151 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	1, "network/computer crypto"},
4152 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	1, "entertainment crypto"},
4153 	{PCIC_DASP,		-1,			0, "dasp"},
4154 	{PCIC_DASP,		PCIS_DASP_DPIO,		1, "DPIO module"},
4155 	{0, 0, 0,		NULL}
4156 };
4157 
4158 void
4159 pci_probe_nomatch(device_t dev, device_t child)
4160 {
4161 	int i, report;
4162 	const char *cp, *scp;
4163 	char *device;
4164 
4165 	/*
4166 	 * Look for a listing for this device in a loaded device database.
4167 	 */
4168 	report = 1;
4169 	if ((device = pci_describe_device(child)) != NULL) {
4170 		device_printf(dev, "<%s>", device);
4171 		free(device, M_DEVBUF);
4172 	} else {
4173 		/*
4174 		 * Scan the class/subclass descriptions for a general
4175 		 * description.
4176 		 */
4177 		cp = "unknown";
4178 		scp = NULL;
4179 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
4180 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
4181 				if (pci_nomatch_tab[i].subclass == -1) {
4182 					cp = pci_nomatch_tab[i].desc;
4183 					report = pci_nomatch_tab[i].report;
4184 				} else if (pci_nomatch_tab[i].subclass ==
4185 				    pci_get_subclass(child)) {
4186 					scp = pci_nomatch_tab[i].desc;
4187 					report = pci_nomatch_tab[i].report;
4188 				}
4189 			}
4190 		}
4191 		if (report || bootverbose) {
4192 			device_printf(dev, "<%s%s%s>",
4193 			    cp ? cp : "",
4194 			    ((cp != NULL) && (scp != NULL)) ? ", " : "",
4195 			    scp ? scp : "");
4196 		}
4197 	}
4198 	if (report || bootverbose) {
4199 		printf(" at device %d.%d (no driver attached)\n",
4200 		    pci_get_slot(child), pci_get_function(child));
4201 	}
4202 	pci_cfg_save(child, device_get_ivars(child), 1);
4203 }
4204 
4205 void
4206 pci_child_detached(device_t dev, device_t child)
4207 {
4208 	struct pci_devinfo *dinfo;
4209 	struct resource_list *rl;
4210 
4211 	dinfo = device_get_ivars(child);
4212 	rl = &dinfo->resources;
4213 
4214 	/*
4215 	 * Have to deallocate IRQs before releasing any MSI messages and
4216 	 * have to release MSI messages before deallocating any memory
4217 	 * BARs.
4218 	 */
4219 	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
4220 		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
4221 	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
4222 		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
4223 		(void)pci_release_msi(child);
4224 	}
4225 	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
4226 		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
4227 	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
4228 		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
4229 #ifdef PCI_RES_BUS
4230 	if (resource_list_release_active(rl, dev, child, PCI_RES_BUS) != 0)
4231 		pci_printf(&dinfo->cfg, "Device leaked PCI bus numbers\n");
4232 #endif
4233 
4234 	pci_cfg_save(child, dinfo, 1);
4235 }
4236 
4237 /*
4238  * Parse the PCI device database, if loaded, and return a pointer to a
4239  * description of the device.
4240  *
4241  * The database is flat text formatted as follows:
4242  *
4243  * Any line not in a valid format is ignored.
4244  * Lines are terminated with newline '\n' characters.
4245  *
4246  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
4247  * the vendor name.
4248  *
4249  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
4250  * - devices cannot be listed without a corresponding VENDOR line.
4251  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
4252  * another TAB, then the device name.
4253  */
4254 
4255 /*
4256  * Assuming (ptr) points to the beginning of a line in the database,
4257  * return the vendor or device and description of the next entry.
4258  * The value of (vendor) or (device) inappropriate for the entry type
4259  * is set to -1.  Returns nonzero at the end of the database.
4260  *
4261  * Note that this is slightly unrobust in the face of corrupt data;
4262  * we attempt to safeguard against this by spamming the end of the
4263  * database with a newline when we initialise.
4264  */
4265 static int
4266 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
4267 {
4268 	char	*cp = *ptr;
4269 	int	left;
4270 
4271 	*device = -1;
4272 	*vendor = -1;
4273 	**desc = '\0';
4274 	for (;;) {
4275 		left = pci_vendordata_size - (cp - pci_vendordata);
4276 		if (left <= 0) {
4277 			*ptr = cp;
4278 			return(1);
4279 		}
4280 
4281 		/* vendor entry? */
4282 		if (*cp != '\t' &&
4283 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
4284 			break;
4285 		/* device entry? */
4286 		if (*cp == '\t' &&
4287 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
4288 			break;
4289 
4290 		/* skip to next line */
4291 		while (*cp != '\n' && left > 0) {
4292 			cp++;
4293 			left--;
4294 		}
4295 		if (*cp == '\n') {
4296 			cp++;
4297 			left--;
4298 		}
4299 	}
4300 	/* skip to next line */
4301 	while (*cp != '\n' && left > 0) {
4302 		cp++;
4303 		left--;
4304 	}
4305 	if (*cp == '\n' && left > 0)
4306 		cp++;
4307 	*ptr = cp;
4308 	return(0);
4309 }
4310 
4311 static char *
4312 pci_describe_device(device_t dev)
4313 {
4314 	int	vendor, device;
4315 	char	*desc, *vp, *dp, *line;
4316 
4317 	desc = vp = dp = NULL;
4318 
4319 	/*
4320 	 * If we have no vendor data, we can't do anything.
4321 	 */
4322 	if (pci_vendordata == NULL)
4323 		goto out;
4324 
4325 	/*
4326 	 * Scan the vendor data looking for this device
4327 	 */
4328 	line = pci_vendordata;
4329 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4330 		goto out;
4331 	for (;;) {
4332 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
4333 			goto out;
4334 		if (vendor == pci_get_vendor(dev))
4335 			break;
4336 	}
4337 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4338 		goto out;
4339 	for (;;) {
4340 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
4341 			*dp = 0;
4342 			break;
4343 		}
4344 		if (vendor != -1) {
4345 			*dp = 0;
4346 			break;
4347 		}
4348 		if (device == pci_get_device(dev))
4349 			break;
4350 	}
4351 	if (dp[0] == '\0')
4352 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
4353 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
4354 	    NULL)
4355 		sprintf(desc, "%s, %s", vp, dp);
4356 out:
4357 	if (vp != NULL)
4358 		free(vp, M_DEVBUF);
4359 	if (dp != NULL)
4360 		free(dp, M_DEVBUF);
4361 	return(desc);
4362 }
4363 
4364 int
4365 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
4366 {
4367 	struct pci_devinfo *dinfo;
4368 	pcicfgregs *cfg;
4369 
4370 	dinfo = device_get_ivars(child);
4371 	cfg = &dinfo->cfg;
4372 
4373 	switch (which) {
4374 	case PCI_IVAR_ETHADDR:
4375 		/*
4376 		 * The generic accessor doesn't deal with failure, so
4377 		 * we set the return value, then return an error.
4378 		 */
4379 		*((uint8_t **) result) = NULL;
4380 		return (EINVAL);
4381 	case PCI_IVAR_SUBVENDOR:
4382 		*result = cfg->subvendor;
4383 		break;
4384 	case PCI_IVAR_SUBDEVICE:
4385 		*result = cfg->subdevice;
4386 		break;
4387 	case PCI_IVAR_VENDOR:
4388 		*result = cfg->vendor;
4389 		break;
4390 	case PCI_IVAR_DEVICE:
4391 		*result = cfg->device;
4392 		break;
4393 	case PCI_IVAR_DEVID:
4394 		*result = (cfg->device << 16) | cfg->vendor;
4395 		break;
4396 	case PCI_IVAR_CLASS:
4397 		*result = cfg->baseclass;
4398 		break;
4399 	case PCI_IVAR_SUBCLASS:
4400 		*result = cfg->subclass;
4401 		break;
4402 	case PCI_IVAR_PROGIF:
4403 		*result = cfg->progif;
4404 		break;
4405 	case PCI_IVAR_REVID:
4406 		*result = cfg->revid;
4407 		break;
4408 	case PCI_IVAR_INTPIN:
4409 		*result = cfg->intpin;
4410 		break;
4411 	case PCI_IVAR_IRQ:
4412 		*result = cfg->intline;
4413 		break;
4414 	case PCI_IVAR_DOMAIN:
4415 		*result = cfg->domain;
4416 		break;
4417 	case PCI_IVAR_BUS:
4418 		*result = cfg->bus;
4419 		break;
4420 	case PCI_IVAR_SLOT:
4421 		*result = cfg->slot;
4422 		break;
4423 	case PCI_IVAR_FUNCTION:
4424 		*result = cfg->func;
4425 		break;
4426 	case PCI_IVAR_CMDREG:
4427 		*result = cfg->cmdreg;
4428 		break;
4429 	case PCI_IVAR_CACHELNSZ:
4430 		*result = cfg->cachelnsz;
4431 		break;
4432 	case PCI_IVAR_MINGNT:
4433 		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4434 			*result = -1;
4435 			return (EINVAL);
4436 		}
4437 		*result = cfg->mingnt;
4438 		break;
4439 	case PCI_IVAR_MAXLAT:
4440 		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4441 			*result = -1;
4442 			return (EINVAL);
4443 		}
4444 		*result = cfg->maxlat;
4445 		break;
4446 	case PCI_IVAR_LATTIMER:
4447 		*result = cfg->lattimer;
4448 		break;
4449 	default:
4450 		return (ENOENT);
4451 	}
4452 	return (0);
4453 }
4454 
4455 int
4456 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4457 {
4458 	struct pci_devinfo *dinfo;
4459 
4460 	dinfo = device_get_ivars(child);
4461 
4462 	switch (which) {
4463 	case PCI_IVAR_INTPIN:
4464 		dinfo->cfg.intpin = value;
4465 		return (0);
4466 	case PCI_IVAR_ETHADDR:
4467 	case PCI_IVAR_SUBVENDOR:
4468 	case PCI_IVAR_SUBDEVICE:
4469 	case PCI_IVAR_VENDOR:
4470 	case PCI_IVAR_DEVICE:
4471 	case PCI_IVAR_DEVID:
4472 	case PCI_IVAR_CLASS:
4473 	case PCI_IVAR_SUBCLASS:
4474 	case PCI_IVAR_PROGIF:
4475 	case PCI_IVAR_REVID:
4476 	case PCI_IVAR_IRQ:
4477 	case PCI_IVAR_DOMAIN:
4478 	case PCI_IVAR_BUS:
4479 	case PCI_IVAR_SLOT:
4480 	case PCI_IVAR_FUNCTION:
4481 		return (EINVAL);	/* disallow for now */
4482 
4483 	default:
4484 		return (ENOENT);
4485 	}
4486 }
4487 
4488 #include "opt_ddb.h"
4489 #ifdef DDB
4490 #include <ddb/ddb.h>
4491 #include <sys/cons.h>
4492 
4493 /*
4494  * List resources based on pci map registers, used for within ddb
4495  */
4496 
4497 DB_SHOW_COMMAND(pciregs, db_pci_dump)
4498 {
4499 	struct pci_devinfo *dinfo;
4500 	struct devlist *devlist_head;
4501 	struct pci_conf *p;
4502 	const char *name;
4503 	int i, error, none_count;
4504 
4505 	none_count = 0;
4506 	/* get the head of the device queue */
4507 	devlist_head = &pci_devq;
4508 
4509 	/*
4510 	 * Go through the list of devices and print out devices
4511 	 */
4512 	for (error = 0, i = 0,
4513 	     dinfo = STAILQ_FIRST(devlist_head);
4514 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4515 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4516 
4517 		/* Populate pd_name and pd_unit */
4518 		name = NULL;
4519 		if (dinfo->cfg.dev)
4520 			name = device_get_name(dinfo->cfg.dev);
4521 
4522 		p = &dinfo->conf;
4523 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4524 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4525 			(name && *name) ? name : "none",
4526 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4527 			none_count++,
4528 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4529 			p->pc_sel.pc_func, (p->pc_class << 16) |
4530 			(p->pc_subclass << 8) | p->pc_progif,
4531 			(p->pc_subdevice << 16) | p->pc_subvendor,
4532 			(p->pc_device << 16) | p->pc_vendor,
4533 			p->pc_revid, p->pc_hdr);
4534 	}
4535 }
4536 #endif /* DDB */
4537 
4538 static struct resource *
4539 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4540     u_long start, u_long end, u_long count, u_int num, u_int flags)
4541 {
4542 	struct pci_devinfo *dinfo = device_get_ivars(child);
4543 	struct resource_list *rl = &dinfo->resources;
4544 	struct resource *res;
4545 	struct pci_map *pm;
4546 	pci_addr_t map, testval;
4547 	int mapsize;
4548 
4549 	res = NULL;
4550 	pm = pci_find_bar(child, *rid);
4551 	if (pm != NULL) {
4552 		/* This is a BAR that we failed to allocate earlier. */
4553 		mapsize = pm->pm_size;
4554 		map = pm->pm_value;
4555 	} else {
4556 		/*
4557 		 * Weed out the bogons, and figure out how large the
4558 		 * BAR/map is.  BARs that read back 0 here are bogus
4559 		 * and unimplemented.  Note: atapci in legacy mode are
4560 		 * special and handled elsewhere in the code.  If you
4561 		 * have a atapci device in legacy mode and it fails
4562 		 * here, that other code is broken.
4563 		 */
4564 		pci_read_bar(child, *rid, &map, &testval, NULL);
4565 
4566 		/*
4567 		 * Determine the size of the BAR and ignore BARs with a size
4568 		 * of 0.  Device ROM BARs use a different mask value.
4569 		 */
4570 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
4571 			mapsize = pci_romsize(testval);
4572 		else
4573 			mapsize = pci_mapsize(testval);
4574 		if (mapsize == 0)
4575 			goto out;
4576 		pm = pci_add_bar(child, *rid, map, mapsize);
4577 	}
4578 
4579 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
4580 		if (type != SYS_RES_MEMORY) {
4581 			if (bootverbose)
4582 				device_printf(dev,
4583 				    "child %s requested type %d for rid %#x,"
4584 				    " but the BAR says it is an memio\n",
4585 				    device_get_nameunit(child), type, *rid);
4586 			goto out;
4587 		}
4588 	} else {
4589 		if (type != SYS_RES_IOPORT) {
4590 			if (bootverbose)
4591 				device_printf(dev,
4592 				    "child %s requested type %d for rid %#x,"
4593 				    " but the BAR says it is an ioport\n",
4594 				    device_get_nameunit(child), type, *rid);
4595 			goto out;
4596 		}
4597 	}
4598 
4599 	/*
4600 	 * For real BARs, we need to override the size that
4601 	 * the driver requests, because that's what the BAR
4602 	 * actually uses and we would otherwise have a
4603 	 * situation where we might allocate the excess to
4604 	 * another driver, which won't work.
4605 	 */
4606 	count = ((pci_addr_t)1 << mapsize) * num;
4607 	if (RF_ALIGNMENT(flags) < mapsize)
4608 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
4609 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
4610 		flags |= RF_PREFETCHABLE;
4611 
4612 	/*
4613 	 * Allocate enough resource, and then write back the
4614 	 * appropriate BAR for that resource.
4615 	 */
4616 	resource_list_add(rl, type, *rid, start, end, count);
4617 	res = resource_list_reserve(rl, dev, child, type, rid, start, end,
4618 	    count, flags & ~RF_ACTIVE);
4619 	if (res == NULL) {
4620 		resource_list_delete(rl, type, *rid);
4621 		device_printf(child,
4622 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4623 		    count, *rid, type, start, end);
4624 		goto out;
4625 	}
4626 	if (bootverbose)
4627 		device_printf(child,
4628 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4629 		    count, *rid, type, rman_get_start(res));
4630 	map = rman_get_start(res);
4631 	pci_write_bar(child, pm, map);
4632 out:
4633 	return (res);
4634 }
4635 
4636 struct resource *
4637 pci_alloc_multi_resource(device_t dev, device_t child, int type, int *rid,
4638     u_long start, u_long end, u_long count, u_long num, u_int flags)
4639 {
4640 	struct pci_devinfo *dinfo;
4641 	struct resource_list *rl;
4642 	struct resource_list_entry *rle;
4643 	struct resource *res;
4644 	pcicfgregs *cfg;
4645 
4646 	/*
4647 	 * Perform lazy resource allocation
4648 	 */
4649 	dinfo = device_get_ivars(child);
4650 	rl = &dinfo->resources;
4651 	cfg = &dinfo->cfg;
4652 	switch (type) {
4653 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
4654 	case PCI_RES_BUS:
4655 		return (pci_alloc_secbus(dev, child, rid, start, end, count,
4656 		    flags));
4657 #endif
4658 	case SYS_RES_IRQ:
4659 		/*
4660 		 * Can't alloc legacy interrupt once MSI messages have
4661 		 * been allocated.
4662 		 */
4663 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4664 		    cfg->msix.msix_alloc > 0))
4665 			return (NULL);
4666 
4667 		/*
4668 		 * If the child device doesn't have an interrupt
4669 		 * routed and is deserving of an interrupt, try to
4670 		 * assign it one.
4671 		 */
4672 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4673 		    (cfg->intpin != 0))
4674 			pci_assign_interrupt(dev, child, 0);
4675 		break;
4676 	case SYS_RES_IOPORT:
4677 	case SYS_RES_MEMORY:
4678 #ifdef NEW_PCIB
4679 		/*
4680 		 * PCI-PCI bridge I/O window resources are not BARs.
4681 		 * For those allocations just pass the request up the
4682 		 * tree.
4683 		 */
4684 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4685 			switch (*rid) {
4686 			case PCIR_IOBASEL_1:
4687 			case PCIR_MEMBASE_1:
4688 			case PCIR_PMBASEL_1:
4689 				/*
4690 				 * XXX: Should we bother creating a resource
4691 				 * list entry?
4692 				 */
4693 				return (bus_generic_alloc_resource(dev, child,
4694 				    type, rid, start, end, count, flags));
4695 			}
4696 		}
4697 #endif
4698 		/* Reserve resources for this BAR if needed. */
4699 		rle = resource_list_find(rl, type, *rid);
4700 		if (rle == NULL) {
4701 			res = pci_reserve_map(dev, child, type, rid, start, end,
4702 			    count, num, flags);
4703 			if (res == NULL)
4704 				return (NULL);
4705 		}
4706 	}
4707 	return (resource_list_alloc(rl, dev, child, type, rid,
4708 	    start, end, count, flags));
4709 }
4710 
4711 struct resource *
4712 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4713     u_long start, u_long end, u_long count, u_int flags)
4714 {
4715 #ifdef PCI_IOV
4716 	struct pci_devinfo *dinfo;
4717 #endif
4718 
4719 	if (device_get_parent(child) != dev)
4720 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4721 		    type, rid, start, end, count, flags));
4722 
4723 #ifdef PCI_IOV
4724 	dinfo = device_get_ivars(child);
4725 	if (dinfo->cfg.flags & PCICFG_VF) {
4726 		switch (type) {
4727 		/* VFs can't have I/O BARs. */
4728 		case SYS_RES_IOPORT:
4729 			return (NULL);
4730 		case SYS_RES_MEMORY:
4731 			return (pci_vf_alloc_mem_resource(dev, child, rid,
4732 			    start, end, count, flags));
4733 		}
4734 
4735 		/* Fall through for other types of resource allocations. */
4736 	}
4737 #endif
4738 
4739 	return (pci_alloc_multi_resource(dev, child, type, rid, start, end,
4740 	    count, 1, flags));
4741 }
4742 
4743 int
4744 pci_release_resource(device_t dev, device_t child, int type, int rid,
4745     struct resource *r)
4746 {
4747 	struct pci_devinfo *dinfo;
4748 	struct resource_list *rl;
4749 	pcicfgregs *cfg;
4750 
4751 	if (device_get_parent(child) != dev)
4752 		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
4753 		    type, rid, r));
4754 
4755 	dinfo = device_get_ivars(child);
4756 	cfg = &dinfo->cfg;
4757 
4758 #ifdef PCI_IOV
4759 	if (dinfo->cfg.flags & PCICFG_VF) {
4760 		switch (type) {
4761 		/* VFs can't have I/O BARs. */
4762 		case SYS_RES_IOPORT:
4763 			return (EDOOFUS);
4764 		case SYS_RES_MEMORY:
4765 			return (pci_vf_release_mem_resource(dev, child, rid,
4766 			    r));
4767 		}
4768 
4769 		/* Fall through for other types of resource allocations. */
4770 	}
4771 #endif
4772 
4773 #ifdef NEW_PCIB
4774 	/*
4775 	 * PCI-PCI bridge I/O window resources are not BARs.  For
4776 	 * those allocations just pass the request up the tree.
4777 	 */
4778 	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
4779 	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
4780 		switch (rid) {
4781 		case PCIR_IOBASEL_1:
4782 		case PCIR_MEMBASE_1:
4783 		case PCIR_PMBASEL_1:
4784 			return (bus_generic_release_resource(dev, child, type,
4785 			    rid, r));
4786 		}
4787 	}
4788 #endif
4789 
4790 	rl = &dinfo->resources;
4791 	return (resource_list_release(rl, dev, child, type, rid, r));
4792 }
4793 
4794 int
4795 pci_activate_resource(device_t dev, device_t child, int type, int rid,
4796     struct resource *r)
4797 {
4798 	struct pci_devinfo *dinfo;
4799 	int error;
4800 
4801 	error = bus_generic_activate_resource(dev, child, type, rid, r);
4802 	if (error)
4803 		return (error);
4804 
4805 	/* Enable decoding in the command register when activating BARs. */
4806 	if (device_get_parent(child) == dev) {
4807 		/* Device ROMs need their decoding explicitly enabled. */
4808 		dinfo = device_get_ivars(child);
4809 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4810 			pci_write_bar(child, pci_find_bar(child, rid),
4811 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4812 		switch (type) {
4813 		case SYS_RES_IOPORT:
4814 		case SYS_RES_MEMORY:
4815 			error = PCI_ENABLE_IO(dev, child, type);
4816 			break;
4817 		}
4818 	}
4819 	return (error);
4820 }
4821 
4822 int
4823 pci_deactivate_resource(device_t dev, device_t child, int type,
4824     int rid, struct resource *r)
4825 {
4826 	struct pci_devinfo *dinfo;
4827 	int error;
4828 
4829 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4830 	if (error)
4831 		return (error);
4832 
4833 	/* Disable decoding for device ROMs. */
4834 	if (device_get_parent(child) == dev) {
4835 		dinfo = device_get_ivars(child);
4836 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4837 			pci_write_bar(child, pci_find_bar(child, rid),
4838 			    rman_get_start(r));
4839 	}
4840 	return (0);
4841 }
4842 
4843 void
4844 pci_delete_child(device_t dev, device_t child)
4845 {
4846 	struct resource_list_entry *rle;
4847 	struct resource_list *rl;
4848 	struct pci_devinfo *dinfo;
4849 
4850 	dinfo = device_get_ivars(child);
4851 	rl = &dinfo->resources;
4852 
4853 	if (device_is_attached(child))
4854 		device_detach(child);
4855 
4856 	/* Turn off access to resources we're about to free */
4857 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4858 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4859 
4860 	/* Free all allocated resources */
4861 	STAILQ_FOREACH(rle, rl, link) {
4862 		if (rle->res) {
4863 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4864 			    resource_list_busy(rl, rle->type, rle->rid)) {
4865 				pci_printf(&dinfo->cfg,
4866 				    "Resource still owned, oops. "
4867 				    "(type=%d, rid=%d, addr=%lx)\n",
4868 				    rle->type, rle->rid,
4869 				    rman_get_start(rle->res));
4870 				bus_release_resource(child, rle->type, rle->rid,
4871 				    rle->res);
4872 			}
4873 			resource_list_unreserve(rl, dev, child, rle->type,
4874 			    rle->rid);
4875 		}
4876 	}
4877 	resource_list_free(rl);
4878 
4879 	device_delete_child(dev, child);
4880 	pci_freecfg(dinfo);
4881 }
4882 
4883 void
4884 pci_delete_resource(device_t dev, device_t child, int type, int rid)
4885 {
4886 	struct pci_devinfo *dinfo;
4887 	struct resource_list *rl;
4888 	struct resource_list_entry *rle;
4889 
4890 	if (device_get_parent(child) != dev)
4891 		return;
4892 
4893 	dinfo = device_get_ivars(child);
4894 	rl = &dinfo->resources;
4895 	rle = resource_list_find(rl, type, rid);
4896 	if (rle == NULL)
4897 		return;
4898 
4899 	if (rle->res) {
4900 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4901 		    resource_list_busy(rl, type, rid)) {
4902 			device_printf(dev, "delete_resource: "
4903 			    "Resource still owned by child, oops. "
4904 			    "(type=%d, rid=%d, addr=%lx)\n",
4905 			    type, rid, rman_get_start(rle->res));
4906 			return;
4907 		}
4908 		resource_list_unreserve(rl, dev, child, type, rid);
4909 	}
4910 	resource_list_delete(rl, type, rid);
4911 }
4912 
4913 struct resource_list *
4914 pci_get_resource_list (device_t dev, device_t child)
4915 {
4916 	struct pci_devinfo *dinfo = device_get_ivars(child);
4917 
4918 	return (&dinfo->resources);
4919 }
4920 
4921 bus_dma_tag_t
4922 pci_get_dma_tag(device_t bus, device_t dev)
4923 {
4924 	struct pci_softc *sc = device_get_softc(bus);
4925 
4926 	return (sc->sc_dma_tag);
4927 }
4928 
4929 uint32_t
4930 pci_read_config_method(device_t dev, device_t child, int reg, int width)
4931 {
4932 	struct pci_devinfo *dinfo = device_get_ivars(child);
4933 	pcicfgregs *cfg = &dinfo->cfg;
4934 
4935 #ifdef PCI_IOV
4936 	/*
4937 	 * SR-IOV VFs don't implement the VID or DID registers, so we have to
4938 	 * emulate them here.
4939 	 */
4940 	if (cfg->flags & PCICFG_VF) {
4941 		if (reg == PCIR_VENDOR) {
4942 			switch (width) {
4943 			case 4:
4944 				return (cfg->device << 16 | cfg->vendor);
4945 			case 2:
4946 				return (cfg->vendor);
4947 			case 1:
4948 				return (cfg->vendor & 0xff);
4949 			default:
4950 				return (0xffffffff);
4951 			}
4952 		} else if (reg == PCIR_DEVICE) {
4953 			switch (width) {
4954 			/* Note that an unaligned 4-byte read is an error. */
4955 			case 2:
4956 				return (cfg->device);
4957 			case 1:
4958 				return (cfg->device & 0xff);
4959 			default:
4960 				return (0xffffffff);
4961 			}
4962 		}
4963 	}
4964 #endif
4965 
4966 	return (PCIB_READ_CONFIG(device_get_parent(dev),
4967 	    cfg->bus, cfg->slot, cfg->func, reg, width));
4968 }
4969 
4970 void
4971 pci_write_config_method(device_t dev, device_t child, int reg,
4972     uint32_t val, int width)
4973 {
4974 	struct pci_devinfo *dinfo = device_get_ivars(child);
4975 	pcicfgregs *cfg = &dinfo->cfg;
4976 
4977 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4978 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4979 }
4980 
4981 int
4982 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4983     size_t buflen)
4984 {
4985 
4986 	snprintf(buf, buflen, "pci%d:%d:%d:%d", pci_get_domain(child),
4987 	    pci_get_bus(child), pci_get_slot(child), pci_get_function(child));
4988 	return (0);
4989 }
4990 
4991 int
4992 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4993     size_t buflen)
4994 {
4995 	struct pci_devinfo *dinfo;
4996 	pcicfgregs *cfg;
4997 
4998 	dinfo = device_get_ivars(child);
4999 	cfg = &dinfo->cfg;
5000 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
5001 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
5002 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
5003 	    cfg->progif);
5004 	return (0);
5005 }
5006 
5007 int
5008 pci_assign_interrupt_method(device_t dev, device_t child)
5009 {
5010 	struct pci_devinfo *dinfo = device_get_ivars(child);
5011 	pcicfgregs *cfg = &dinfo->cfg;
5012 
5013 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
5014 	    cfg->intpin));
5015 }
5016 
5017 static void
5018 pci_lookup(void *arg, const char *name, device_t *dev)
5019 {
5020 	long val;
5021 	char *end;
5022 	int domain, bus, slot, func;
5023 
5024 	if (*dev != NULL)
5025 		return;
5026 
5027 	/*
5028 	 * Accept pciconf-style selectors of either pciD:B:S:F or
5029 	 * pciB:S:F.  In the latter case, the domain is assumed to
5030 	 * be zero.
5031 	 */
5032 	if (strncmp(name, "pci", 3) != 0)
5033 		return;
5034 	val = strtol(name + 3, &end, 10);
5035 	if (val < 0 || val > INT_MAX || *end != ':')
5036 		return;
5037 	domain = val;
5038 	val = strtol(end + 1, &end, 10);
5039 	if (val < 0 || val > INT_MAX || *end != ':')
5040 		return;
5041 	bus = val;
5042 	val = strtol(end + 1, &end, 10);
5043 	if (val < 0 || val > INT_MAX)
5044 		return;
5045 	slot = val;
5046 	if (*end == ':') {
5047 		val = strtol(end + 1, &end, 10);
5048 		if (val < 0 || val > INT_MAX || *end != '\0')
5049 			return;
5050 		func = val;
5051 	} else if (*end == '\0') {
5052 		func = slot;
5053 		slot = bus;
5054 		bus = domain;
5055 		domain = 0;
5056 	} else
5057 		return;
5058 
5059 	if (domain > PCI_DOMAINMAX || bus > PCI_BUSMAX || slot > PCI_SLOTMAX ||
5060 	    func > PCIE_ARI_FUNCMAX || (slot != 0 && func > PCI_FUNCMAX))
5061 		return;
5062 
5063 	*dev = pci_find_dbsf(domain, bus, slot, func);
5064 }
5065 
5066 static int
5067 pci_modevent(module_t mod, int what, void *arg)
5068 {
5069 	static struct cdev *pci_cdev;
5070 	static eventhandler_tag tag;
5071 
5072 	switch (what) {
5073 	case MOD_LOAD:
5074 		STAILQ_INIT(&pci_devq);
5075 		pci_generation = 0;
5076 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
5077 		    "pci");
5078 		pci_load_vendor_data();
5079 		tag = EVENTHANDLER_REGISTER(dev_lookup, pci_lookup, NULL,
5080 		    1000);
5081 		break;
5082 
5083 	case MOD_UNLOAD:
5084 		if (tag != NULL)
5085 			EVENTHANDLER_DEREGISTER(dev_lookup, tag);
5086 		destroy_dev(pci_cdev);
5087 		break;
5088 	}
5089 
5090 	return (0);
5091 }
5092 
5093 static void
5094 pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
5095 {
5096 #define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
5097 	struct pcicfg_pcie *cfg;
5098 	int version, pos;
5099 
5100 	cfg = &dinfo->cfg.pcie;
5101 	pos = cfg->pcie_location;
5102 
5103 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5104 
5105 	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
5106 
5107 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5108 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5109 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5110 		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
5111 
5112 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5113 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5114 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5115 		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
5116 
5117 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5118 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5119 		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
5120 
5121 	if (version > 1) {
5122 		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
5123 		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
5124 		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
5125 	}
5126 #undef WREG
5127 }
5128 
5129 static void
5130 pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
5131 {
5132 	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
5133 	    dinfo->cfg.pcix.pcix_command,  2);
5134 }
5135 
5136 void
5137 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
5138 {
5139 
5140 	/*
5141 	 * Restore the device to full power mode.  We must do this
5142 	 * before we restore the registers because moving from D3 to
5143 	 * D0 will cause the chip's BARs and some other registers to
5144 	 * be reset to some unknown power on reset values.  Cut down
5145 	 * the noise on boot by doing nothing if we are already in
5146 	 * state D0.
5147 	 */
5148 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
5149 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5150 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
5151 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
5152 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
5153 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
5154 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
5155 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
5156 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
5157 	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5158 	case PCIM_HDRTYPE_NORMAL:
5159 		pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
5160 		pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
5161 		break;
5162 	case PCIM_HDRTYPE_BRIDGE:
5163 		pci_write_config(dev, PCIR_SECLAT_1,
5164 		    dinfo->cfg.bridge.br_seclat, 1);
5165 		pci_write_config(dev, PCIR_SUBBUS_1,
5166 		    dinfo->cfg.bridge.br_subbus, 1);
5167 		pci_write_config(dev, PCIR_SECBUS_1,
5168 		    dinfo->cfg.bridge.br_secbus, 1);
5169 		pci_write_config(dev, PCIR_PRIBUS_1,
5170 		    dinfo->cfg.bridge.br_pribus, 1);
5171 		pci_write_config(dev, PCIR_BRIDGECTL_1,
5172 		    dinfo->cfg.bridge.br_control, 2);
5173 		break;
5174 	case PCIM_HDRTYPE_CARDBUS:
5175 		pci_write_config(dev, PCIR_SECLAT_2,
5176 		    dinfo->cfg.bridge.br_seclat, 1);
5177 		pci_write_config(dev, PCIR_SUBBUS_2,
5178 		    dinfo->cfg.bridge.br_subbus, 1);
5179 		pci_write_config(dev, PCIR_SECBUS_2,
5180 		    dinfo->cfg.bridge.br_secbus, 1);
5181 		pci_write_config(dev, PCIR_PRIBUS_2,
5182 		    dinfo->cfg.bridge.br_pribus, 1);
5183 		pci_write_config(dev, PCIR_BRIDGECTL_2,
5184 		    dinfo->cfg.bridge.br_control, 2);
5185 		break;
5186 	}
5187 	pci_restore_bars(dev);
5188 
5189 	/*
5190 	 * Restore extended capabilities for PCI-Express and PCI-X
5191 	 */
5192 	if (dinfo->cfg.pcie.pcie_location != 0)
5193 		pci_cfg_restore_pcie(dev, dinfo);
5194 	if (dinfo->cfg.pcix.pcix_location != 0)
5195 		pci_cfg_restore_pcix(dev, dinfo);
5196 
5197 	/* Restore MSI and MSI-X configurations if they are present. */
5198 	if (dinfo->cfg.msi.msi_location != 0)
5199 		pci_resume_msi(dev);
5200 	if (dinfo->cfg.msix.msix_location != 0)
5201 		pci_resume_msix(dev);
5202 }
5203 
5204 static void
5205 pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
5206 {
5207 #define	RREG(n)	pci_read_config(dev, pos + (n), 2)
5208 	struct pcicfg_pcie *cfg;
5209 	int version, pos;
5210 
5211 	cfg = &dinfo->cfg.pcie;
5212 	pos = cfg->pcie_location;
5213 
5214 	cfg->pcie_flags = RREG(PCIER_FLAGS);
5215 
5216 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5217 
5218 	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
5219 
5220 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5221 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5222 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5223 		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
5224 
5225 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5226 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5227 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5228 		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
5229 
5230 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5231 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5232 		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
5233 
5234 	if (version > 1) {
5235 		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
5236 		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
5237 		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
5238 	}
5239 #undef RREG
5240 }
5241 
5242 static void
5243 pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
5244 {
5245 	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
5246 	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
5247 }
5248 
5249 void
5250 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
5251 {
5252 	uint32_t cls;
5253 	int ps;
5254 
5255 	/*
5256 	 * Some drivers apparently write to these registers w/o updating our
5257 	 * cached copy.  No harm happens if we update the copy, so do so here
5258 	 * so we can restore them.  The COMMAND register is modified by the
5259 	 * bus w/o updating the cache.  This should represent the normally
5260 	 * writable portion of the 'defined' part of type 0/1/2 headers.
5261 	 */
5262 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
5263 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
5264 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
5265 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
5266 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
5267 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
5268 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
5269 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
5270 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
5271 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
5272 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
5273 	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5274 	case PCIM_HDRTYPE_NORMAL:
5275 		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
5276 		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
5277 		dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
5278 		dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
5279 		break;
5280 	case PCIM_HDRTYPE_BRIDGE:
5281 		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
5282 		    PCIR_SECLAT_1, 1);
5283 		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
5284 		    PCIR_SUBBUS_1, 1);
5285 		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
5286 		    PCIR_SECBUS_1, 1);
5287 		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
5288 		    PCIR_PRIBUS_1, 1);
5289 		dinfo->cfg.bridge.br_control = pci_read_config(dev,
5290 		    PCIR_BRIDGECTL_1, 2);
5291 		break;
5292 	case PCIM_HDRTYPE_CARDBUS:
5293 		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
5294 		    PCIR_SECLAT_2, 1);
5295 		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
5296 		    PCIR_SUBBUS_2, 1);
5297 		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
5298 		    PCIR_SECBUS_2, 1);
5299 		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
5300 		    PCIR_PRIBUS_2, 1);
5301 		dinfo->cfg.bridge.br_control = pci_read_config(dev,
5302 		    PCIR_BRIDGECTL_2, 2);
5303 		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_2, 2);
5304 		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_2, 2);
5305 		break;
5306 	}
5307 
5308 	if (dinfo->cfg.pcie.pcie_location != 0)
5309 		pci_cfg_save_pcie(dev, dinfo);
5310 
5311 	if (dinfo->cfg.pcix.pcix_location != 0)
5312 		pci_cfg_save_pcix(dev, dinfo);
5313 
5314 	/*
5315 	 * don't set the state for display devices, base peripherals and
5316 	 * memory devices since bad things happen when they are powered down.
5317 	 * We should (a) have drivers that can easily detach and (b) use
5318 	 * generic drivers for these devices so that some device actually
5319 	 * attaches.  We need to make sure that when we implement (a) we don't
5320 	 * power the device down on a reattach.
5321 	 */
5322 	cls = pci_get_class(dev);
5323 	if (!setstate)
5324 		return;
5325 	switch (pci_do_power_nodriver)
5326 	{
5327 		case 0:		/* NO powerdown at all */
5328 			return;
5329 		case 1:		/* Conservative about what to power down */
5330 			if (cls == PCIC_STORAGE)
5331 				return;
5332 			/*FALLTHROUGH*/
5333 		case 2:		/* Agressive about what to power down */
5334 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
5335 			    cls == PCIC_BASEPERIPH)
5336 				return;
5337 			/*FALLTHROUGH*/
5338 		case 3:		/* Power down everything */
5339 			break;
5340 	}
5341 	/*
5342 	 * PCI spec says we can only go into D3 state from D0 state.
5343 	 * Transition from D[12] into D0 before going to D3 state.
5344 	 */
5345 	ps = pci_get_powerstate(dev);
5346 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
5347 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5348 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
5349 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
5350 }
5351 
5352 /* Wrapper APIs suitable for device driver use. */
5353 void
5354 pci_save_state(device_t dev)
5355 {
5356 	struct pci_devinfo *dinfo;
5357 
5358 	dinfo = device_get_ivars(dev);
5359 	pci_cfg_save(dev, dinfo, 0);
5360 }
5361 
5362 void
5363 pci_restore_state(device_t dev)
5364 {
5365 	struct pci_devinfo *dinfo;
5366 
5367 	dinfo = device_get_ivars(dev);
5368 	pci_cfg_restore(dev, dinfo);
5369 }
5370 
5371 static uint16_t
5372 pci_get_rid_method(device_t dev, device_t child)
5373 {
5374 
5375 	return (PCIB_GET_RID(device_get_parent(dev), child));
5376 }
5377