xref: /freebsd/sys/dev/pci/pci.c (revision a812392203d7c4c3f0db9d8a0f3391374c49c71f)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 
56 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57 #include <machine/intr_machdep.h>
58 #endif
59 
60 #include <sys/pciio.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/pci/pci_private.h>
64 
65 #include <dev/usb/controller/xhcireg.h>
66 #include <dev/usb/controller/ehcireg.h>
67 #include <dev/usb/controller/ohcireg.h>
68 #include <dev/usb/controller/uhcireg.h>
69 
70 #include "pcib_if.h"
71 #include "pci_if.h"
72 
73 #define	PCIR_IS_BIOS(cfg, reg)						\
74 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
75 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
76 
77 static int		pci_has_quirk(uint32_t devid, int quirk);
78 static pci_addr_t	pci_mapbase(uint64_t mapreg);
79 static const char	*pci_maptype(uint64_t mapreg);
80 static int		pci_mapsize(uint64_t testval);
81 static int		pci_maprange(uint64_t mapreg);
82 static pci_addr_t	pci_rombase(uint64_t mapreg);
83 static int		pci_romsize(uint64_t testval);
84 static void		pci_fixancient(pcicfgregs *cfg);
85 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
86 
87 static int		pci_porten(device_t dev);
88 static int		pci_memen(device_t dev);
89 static void		pci_assign_interrupt(device_t bus, device_t dev,
90 			    int force_route);
91 static int		pci_add_map(device_t bus, device_t dev, int reg,
92 			    struct resource_list *rl, int force, int prefetch);
93 static int		pci_probe(device_t dev);
94 static int		pci_attach(device_t dev);
95 #ifdef PCI_RES_BUS
96 static int		pci_detach(device_t dev);
97 #endif
98 static void		pci_load_vendor_data(void);
99 static int		pci_describe_parse_line(char **ptr, int *vendor,
100 			    int *device, char **desc);
101 static char		*pci_describe_device(device_t dev);
102 static int		pci_modevent(module_t mod, int what, void *arg);
103 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
104 			    pcicfgregs *cfg);
105 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
106 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
107 			    int reg, uint32_t *data);
108 #if 0
109 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
110 			    int reg, uint32_t data);
111 #endif
112 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
113 static void		pci_mask_msix(device_t dev, u_int index);
114 static void		pci_unmask_msix(device_t dev, u_int index);
115 static int		pci_msi_blacklisted(void);
116 static int		pci_msix_blacklisted(void);
117 static void		pci_resume_msi(device_t dev);
118 static void		pci_resume_msix(device_t dev);
119 static int		pci_remap_intr_method(device_t bus, device_t dev,
120 			    u_int irq);
121 
122 static uint16_t		pci_get_rid_method(device_t dev, device_t child);
123 
124 static device_method_t pci_methods[] = {
125 	/* Device interface */
126 	DEVMETHOD(device_probe,		pci_probe),
127 	DEVMETHOD(device_attach,	pci_attach),
128 #ifdef PCI_RES_BUS
129 	DEVMETHOD(device_detach,	pci_detach),
130 #else
131 	DEVMETHOD(device_detach,	bus_generic_detach),
132 #endif
133 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
134 	DEVMETHOD(device_suspend,	bus_generic_suspend),
135 	DEVMETHOD(device_resume,	pci_resume),
136 
137 	/* Bus interface */
138 	DEVMETHOD(bus_print_child,	pci_print_child),
139 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
140 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
141 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
142 	DEVMETHOD(bus_driver_added,	pci_driver_added),
143 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
144 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
145 
146 	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
147 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
148 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
149 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
150 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
151 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
152 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
153 	DEVMETHOD(bus_release_resource,	pci_release_resource),
154 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
155 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
156 	DEVMETHOD(bus_child_detached,	pci_child_detached),
157 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
158 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
159 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
160 	DEVMETHOD(bus_suspend_child,	pci_suspend_child),
161 	DEVMETHOD(bus_resume_child,	pci_resume_child),
162 
163 	/* PCI interface */
164 	DEVMETHOD(pci_read_config,	pci_read_config_method),
165 	DEVMETHOD(pci_write_config,	pci_write_config_method),
166 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
167 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
168 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
169 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
170 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
171 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
172 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
173 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
174 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
175 	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
176 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
177 	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
178 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
179 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
180 	DEVMETHOD(pci_enable_msi,	pci_enable_msi_method),
181 	DEVMETHOD(pci_enable_msix,	pci_enable_msix_method),
182 	DEVMETHOD(pci_disable_msi,	pci_disable_msi_method),
183 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
184 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
185 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
186 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
187 	DEVMETHOD(pci_get_rid,		pci_get_rid_method),
188 	DEVMETHOD(pci_child_added,	pci_child_added_method),
189 
190 	DEVMETHOD_END
191 };
192 
193 DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
194 
195 static devclass_t pci_devclass;
196 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
197 MODULE_VERSION(pci, 1);
198 
199 static char	*pci_vendordata;
200 static size_t	pci_vendordata_size;
201 
202 struct pci_quirk {
203 	uint32_t devid;	/* Vendor/device of the card */
204 	int	type;
205 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
206 #define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
207 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
208 #define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
209 #define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
210 #define	PCI_QUIRK_MSI_INTX_BUG	6 /* PCIM_CMD_INTxDIS disables MSI */
211 	int	arg1;
212 	int	arg2;
213 };
214 
215 static const struct pci_quirk pci_quirks[] = {
216 	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
217 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
218 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
219 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
220 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
221 
222 	/*
223 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
224 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
225 	 */
226 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
227 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
228 
229 	/*
230 	 * MSI doesn't work on earlier Intel chipsets including
231 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
232 	 */
233 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
234 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
235 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
236 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
237 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
238 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
239 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
240 
241 	/*
242 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
243 	 * bridge.
244 	 */
245 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
246 
247 	/*
248 	 * MSI-X allocation doesn't work properly for devices passed through
249 	 * by VMware up to at least ESXi 5.1.
250 	 */
251 	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
252 	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
253 
254 	/*
255 	 * Some virtualization environments emulate an older chipset
256 	 * but support MSI just fine.  QEMU uses the Intel 82440.
257 	 */
258 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
259 
260 	/*
261 	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
262 	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
263 	 * It prevents us from attaching hpet(4) when the bit is unset.
264 	 * Note this quirk only affects SB600 revision A13 and earlier.
265 	 * For SB600 A21 and later, firmware must set the bit to hide it.
266 	 * For SB700 and later, it is unused and hardcoded to zero.
267 	 */
268 	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
269 
270 	/*
271 	 * Atheros AR8161/AR8162/E2200 Ethernet controllers have a bug that
272 	 * MSI interrupt does not assert if PCIM_CMD_INTxDIS bit of the
273 	 * command register is set.
274 	 */
275 	{ 0x10911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
276 	{ 0xE0911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
277 	{ 0x10901969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
278 
279 	/*
280 	 * Broadcom BCM5714(S)/BCM5715(S)/BCM5780(S) Ethernet MACs don't
281 	 * issue MSI interrupts with PCIM_CMD_INTxDIS set either.
282 	 */
283 	{ 0x166814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714 */
284 	{ 0x166914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714S */
285 	{ 0x166a14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780 */
286 	{ 0x166b14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780S */
287 	{ 0x167814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715 */
288 	{ 0x167914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715S */
289 
290 	{ 0 }
291 };
292 
293 /* map register information */
294 #define	PCI_MAPMEM	0x01	/* memory map */
295 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
296 #define	PCI_MAPPORT	0x04	/* port map */
297 
298 struct devlist pci_devq;
299 uint32_t pci_generation;
300 uint32_t pci_numdevs = 0;
301 static int pcie_chipset, pcix_chipset;
302 
303 /* sysctl vars */
304 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
305 
306 static int pci_enable_io_modes = 1;
307 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RWTUN,
308     &pci_enable_io_modes, 1,
309     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
310 enable these bits correctly.  We'd like to do this all the time, but there\n\
311 are some peripherals that this causes problems with.");
312 
313 static int pci_do_realloc_bars = 0;
314 SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RWTUN,
315     &pci_do_realloc_bars, 0,
316     "Attempt to allocate a new range for any BARs whose original "
317     "firmware-assigned ranges fail to allocate during the initial device scan.");
318 
319 static int pci_do_power_nodriver = 0;
320 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RWTUN,
321     &pci_do_power_nodriver, 0,
322   "Place a function into D3 state when no driver attaches to it.  0 means\n\
323 disable.  1 means conservatively place devices into D3 state.  2 means\n\
324 agressively place devices into D3 state.  3 means put absolutely everything\n\
325 in D3 state.");
326 
327 int pci_do_power_resume = 1;
328 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RWTUN,
329     &pci_do_power_resume, 1,
330   "Transition from D3 -> D0 on resume.");
331 
332 int pci_do_power_suspend = 1;
333 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RWTUN,
334     &pci_do_power_suspend, 1,
335   "Transition from D0 -> D3 on suspend.");
336 
337 static int pci_do_msi = 1;
338 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RWTUN, &pci_do_msi, 1,
339     "Enable support for MSI interrupts");
340 
341 static int pci_do_msix = 1;
342 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RWTUN, &pci_do_msix, 1,
343     "Enable support for MSI-X interrupts");
344 
345 static int pci_honor_msi_blacklist = 1;
346 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RDTUN,
347     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
348 
349 #if defined(__i386__) || defined(__amd64__)
350 static int pci_usb_takeover = 1;
351 #else
352 static int pci_usb_takeover = 0;
353 #endif
354 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
355     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
356 Disable this if you depend on BIOS emulation of USB devices, that is\n\
357 you use USB devices (like keyboard or mouse) but do not load USB drivers");
358 
359 static int pci_clear_bars;
360 SYSCTL_INT(_hw_pci, OID_AUTO, clear_bars, CTLFLAG_RDTUN, &pci_clear_bars, 0,
361     "Ignore firmware-assigned resources for BARs.");
362 
363 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
364 static int pci_clear_buses;
365 SYSCTL_INT(_hw_pci, OID_AUTO, clear_buses, CTLFLAG_RDTUN, &pci_clear_buses, 0,
366     "Ignore firmware-assigned bus numbers.");
367 #endif
368 
369 static int pci_enable_ari = 1;
370 SYSCTL_INT(_hw_pci, OID_AUTO, enable_ari, CTLFLAG_RDTUN, &pci_enable_ari,
371     0, "Enable support for PCIe Alternative RID Interpretation");
372 
373 static int
374 pci_has_quirk(uint32_t devid, int quirk)
375 {
376 	const struct pci_quirk *q;
377 
378 	for (q = &pci_quirks[0]; q->devid; q++) {
379 		if (q->devid == devid && q->type == quirk)
380 			return (1);
381 	}
382 	return (0);
383 }
384 
385 /* Find a device_t by bus/slot/function in domain 0 */
386 
387 device_t
388 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
389 {
390 
391 	return (pci_find_dbsf(0, bus, slot, func));
392 }
393 
394 /* Find a device_t by domain/bus/slot/function */
395 
396 device_t
397 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
398 {
399 	struct pci_devinfo *dinfo;
400 
401 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
402 		if ((dinfo->cfg.domain == domain) &&
403 		    (dinfo->cfg.bus == bus) &&
404 		    (dinfo->cfg.slot == slot) &&
405 		    (dinfo->cfg.func == func)) {
406 			return (dinfo->cfg.dev);
407 		}
408 	}
409 
410 	return (NULL);
411 }
412 
413 /* Find a device_t by vendor/device ID */
414 
415 device_t
416 pci_find_device(uint16_t vendor, uint16_t device)
417 {
418 	struct pci_devinfo *dinfo;
419 
420 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
421 		if ((dinfo->cfg.vendor == vendor) &&
422 		    (dinfo->cfg.device == device)) {
423 			return (dinfo->cfg.dev);
424 		}
425 	}
426 
427 	return (NULL);
428 }
429 
430 device_t
431 pci_find_class(uint8_t class, uint8_t subclass)
432 {
433 	struct pci_devinfo *dinfo;
434 
435 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
436 		if (dinfo->cfg.baseclass == class &&
437 		    dinfo->cfg.subclass == subclass) {
438 			return (dinfo->cfg.dev);
439 		}
440 	}
441 
442 	return (NULL);
443 }
444 
445 static int
446 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
447 {
448 	va_list ap;
449 	int retval;
450 
451 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
452 	    cfg->func);
453 	va_start(ap, fmt);
454 	retval += vprintf(fmt, ap);
455 	va_end(ap);
456 	return (retval);
457 }
458 
459 /* return base address of memory or port map */
460 
461 static pci_addr_t
462 pci_mapbase(uint64_t mapreg)
463 {
464 
465 	if (PCI_BAR_MEM(mapreg))
466 		return (mapreg & PCIM_BAR_MEM_BASE);
467 	else
468 		return (mapreg & PCIM_BAR_IO_BASE);
469 }
470 
471 /* return map type of memory or port map */
472 
473 static const char *
474 pci_maptype(uint64_t mapreg)
475 {
476 
477 	if (PCI_BAR_IO(mapreg))
478 		return ("I/O Port");
479 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
480 		return ("Prefetchable Memory");
481 	return ("Memory");
482 }
483 
484 /* return log2 of map size decoded for memory or port map */
485 
486 static int
487 pci_mapsize(uint64_t testval)
488 {
489 	int ln2size;
490 
491 	testval = pci_mapbase(testval);
492 	ln2size = 0;
493 	if (testval != 0) {
494 		while ((testval & 1) == 0)
495 		{
496 			ln2size++;
497 			testval >>= 1;
498 		}
499 	}
500 	return (ln2size);
501 }
502 
503 /* return base address of device ROM */
504 
505 static pci_addr_t
506 pci_rombase(uint64_t mapreg)
507 {
508 
509 	return (mapreg & PCIM_BIOS_ADDR_MASK);
510 }
511 
512 /* return log2 of map size decided for device ROM */
513 
514 static int
515 pci_romsize(uint64_t testval)
516 {
517 	int ln2size;
518 
519 	testval = pci_rombase(testval);
520 	ln2size = 0;
521 	if (testval != 0) {
522 		while ((testval & 1) == 0)
523 		{
524 			ln2size++;
525 			testval >>= 1;
526 		}
527 	}
528 	return (ln2size);
529 }
530 
531 /* return log2 of address range supported by map register */
532 
533 static int
534 pci_maprange(uint64_t mapreg)
535 {
536 	int ln2range = 0;
537 
538 	if (PCI_BAR_IO(mapreg))
539 		ln2range = 32;
540 	else
541 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
542 		case PCIM_BAR_MEM_32:
543 			ln2range = 32;
544 			break;
545 		case PCIM_BAR_MEM_1MB:
546 			ln2range = 20;
547 			break;
548 		case PCIM_BAR_MEM_64:
549 			ln2range = 64;
550 			break;
551 		}
552 	return (ln2range);
553 }
554 
555 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
556 
557 static void
558 pci_fixancient(pcicfgregs *cfg)
559 {
560 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
561 		return;
562 
563 	/* PCI to PCI bridges use header type 1 */
564 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
565 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
566 }
567 
568 /* extract header type specific config data */
569 
570 static void
571 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
572 {
573 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
574 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
575 	case PCIM_HDRTYPE_NORMAL:
576 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
577 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
578 		cfg->nummaps	    = PCI_MAXMAPS_0;
579 		break;
580 	case PCIM_HDRTYPE_BRIDGE:
581 		cfg->nummaps	    = PCI_MAXMAPS_1;
582 		break;
583 	case PCIM_HDRTYPE_CARDBUS:
584 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
585 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
586 		cfg->nummaps	    = PCI_MAXMAPS_2;
587 		break;
588 	}
589 #undef REG
590 }
591 
592 /* read configuration header into pcicfgregs structure */
593 struct pci_devinfo *
594 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
595 {
596 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
597 	pcicfgregs *cfg = NULL;
598 	struct pci_devinfo *devlist_entry;
599 	struct devlist *devlist_head;
600 
601 	devlist_head = &pci_devq;
602 
603 	devlist_entry = NULL;
604 
605 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
606 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
607 		if (devlist_entry == NULL)
608 			return (NULL);
609 
610 		cfg = &devlist_entry->cfg;
611 
612 		cfg->domain		= d;
613 		cfg->bus		= b;
614 		cfg->slot		= s;
615 		cfg->func		= f;
616 		cfg->vendor		= REG(PCIR_VENDOR, 2);
617 		cfg->device		= REG(PCIR_DEVICE, 2);
618 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
619 		cfg->statreg		= REG(PCIR_STATUS, 2);
620 		cfg->baseclass		= REG(PCIR_CLASS, 1);
621 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
622 		cfg->progif		= REG(PCIR_PROGIF, 1);
623 		cfg->revid		= REG(PCIR_REVID, 1);
624 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
625 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
626 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
627 		cfg->intpin		= REG(PCIR_INTPIN, 1);
628 		cfg->intline		= REG(PCIR_INTLINE, 1);
629 
630 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
631 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
632 
633 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
634 		cfg->hdrtype		&= ~PCIM_MFDEV;
635 		STAILQ_INIT(&cfg->maps);
636 
637 		pci_fixancient(cfg);
638 		pci_hdrtypedata(pcib, b, s, f, cfg);
639 
640 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
641 			pci_read_cap(pcib, cfg);
642 
643 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
644 
645 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
646 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
647 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
648 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
649 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
650 
651 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
652 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
653 		devlist_entry->conf.pc_vendor = cfg->vendor;
654 		devlist_entry->conf.pc_device = cfg->device;
655 
656 		devlist_entry->conf.pc_class = cfg->baseclass;
657 		devlist_entry->conf.pc_subclass = cfg->subclass;
658 		devlist_entry->conf.pc_progif = cfg->progif;
659 		devlist_entry->conf.pc_revid = cfg->revid;
660 
661 		pci_numdevs++;
662 		pci_generation++;
663 	}
664 	return (devlist_entry);
665 #undef REG
666 }
667 
668 static void
669 pci_read_cap(device_t pcib, pcicfgregs *cfg)
670 {
671 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
672 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
673 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
674 	uint64_t addr;
675 #endif
676 	uint32_t val;
677 	int	ptr, nextptr, ptrptr;
678 
679 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
680 	case PCIM_HDRTYPE_NORMAL:
681 	case PCIM_HDRTYPE_BRIDGE:
682 		ptrptr = PCIR_CAP_PTR;
683 		break;
684 	case PCIM_HDRTYPE_CARDBUS:
685 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
686 		break;
687 	default:
688 		return;		/* no extended capabilities support */
689 	}
690 	nextptr = REG(ptrptr, 1);	/* sanity check? */
691 
692 	/*
693 	 * Read capability entries.
694 	 */
695 	while (nextptr != 0) {
696 		/* Sanity check */
697 		if (nextptr > 255) {
698 			printf("illegal PCI extended capability offset %d\n",
699 			    nextptr);
700 			return;
701 		}
702 		/* Find the next entry */
703 		ptr = nextptr;
704 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
705 
706 		/* Process this entry */
707 		switch (REG(ptr + PCICAP_ID, 1)) {
708 		case PCIY_PMG:		/* PCI power management */
709 			if (cfg->pp.pp_cap == 0) {
710 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
711 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
712 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
713 				if ((nextptr - ptr) > PCIR_POWER_DATA)
714 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
715 			}
716 			break;
717 		case PCIY_HT:		/* HyperTransport */
718 			/* Determine HT-specific capability type. */
719 			val = REG(ptr + PCIR_HT_COMMAND, 2);
720 
721 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
722 				cfg->ht.ht_slave = ptr;
723 
724 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
725 			switch (val & PCIM_HTCMD_CAP_MASK) {
726 			case PCIM_HTCAP_MSI_MAPPING:
727 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
728 					/* Sanity check the mapping window. */
729 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
730 					    4);
731 					addr <<= 32;
732 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
733 					    4);
734 					if (addr != MSI_INTEL_ADDR_BASE)
735 						device_printf(pcib,
736 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
737 						    cfg->domain, cfg->bus,
738 						    cfg->slot, cfg->func,
739 						    (long long)addr);
740 				} else
741 					addr = MSI_INTEL_ADDR_BASE;
742 
743 				cfg->ht.ht_msimap = ptr;
744 				cfg->ht.ht_msictrl = val;
745 				cfg->ht.ht_msiaddr = addr;
746 				break;
747 			}
748 #endif
749 			break;
750 		case PCIY_MSI:		/* PCI MSI */
751 			cfg->msi.msi_location = ptr;
752 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
753 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
754 						     PCIM_MSICTRL_MMC_MASK)>>1);
755 			break;
756 		case PCIY_MSIX:		/* PCI MSI-X */
757 			cfg->msix.msix_location = ptr;
758 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
759 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
760 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
761 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
762 			cfg->msix.msix_table_bar = PCIR_BAR(val &
763 			    PCIM_MSIX_BIR_MASK);
764 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
765 			val = REG(ptr + PCIR_MSIX_PBA, 4);
766 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
767 			    PCIM_MSIX_BIR_MASK);
768 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
769 			break;
770 		case PCIY_VPD:		/* PCI Vital Product Data */
771 			cfg->vpd.vpd_reg = ptr;
772 			break;
773 		case PCIY_SUBVENDOR:
774 			/* Should always be true. */
775 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
776 			    PCIM_HDRTYPE_BRIDGE) {
777 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
778 				cfg->subvendor = val & 0xffff;
779 				cfg->subdevice = val >> 16;
780 			}
781 			break;
782 		case PCIY_PCIX:		/* PCI-X */
783 			/*
784 			 * Assume we have a PCI-X chipset if we have
785 			 * at least one PCI-PCI bridge with a PCI-X
786 			 * capability.  Note that some systems with
787 			 * PCI-express or HT chipsets might match on
788 			 * this check as well.
789 			 */
790 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
791 			    PCIM_HDRTYPE_BRIDGE)
792 				pcix_chipset = 1;
793 			cfg->pcix.pcix_location = ptr;
794 			break;
795 		case PCIY_EXPRESS:	/* PCI-express */
796 			/*
797 			 * Assume we have a PCI-express chipset if we have
798 			 * at least one PCI-express device.
799 			 */
800 			pcie_chipset = 1;
801 			cfg->pcie.pcie_location = ptr;
802 			val = REG(ptr + PCIER_FLAGS, 2);
803 			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
804 			break;
805 		default:
806 			break;
807 		}
808 	}
809 
810 #if defined(__powerpc__)
811 	/*
812 	 * Enable the MSI mapping window for all HyperTransport
813 	 * slaves.  PCI-PCI bridges have their windows enabled via
814 	 * PCIB_MAP_MSI().
815 	 */
816 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
817 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
818 		device_printf(pcib,
819 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
820 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
821 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
822 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
823 		     2);
824 	}
825 #endif
826 /* REG and WREG use carry through to next functions */
827 }
828 
829 /*
830  * PCI Vital Product Data
831  */
832 
833 #define	PCI_VPD_TIMEOUT		1000000
834 
835 static int
836 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
837 {
838 	int count = PCI_VPD_TIMEOUT;
839 
840 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
841 
842 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
843 
844 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
845 		if (--count < 0)
846 			return (ENXIO);
847 		DELAY(1);	/* limit looping */
848 	}
849 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
850 
851 	return (0);
852 }
853 
854 #if 0
855 static int
856 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
857 {
858 	int count = PCI_VPD_TIMEOUT;
859 
860 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
861 
862 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
863 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
864 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
865 		if (--count < 0)
866 			return (ENXIO);
867 		DELAY(1);	/* limit looping */
868 	}
869 
870 	return (0);
871 }
872 #endif
873 
874 #undef PCI_VPD_TIMEOUT
875 
876 struct vpd_readstate {
877 	device_t	pcib;
878 	pcicfgregs	*cfg;
879 	uint32_t	val;
880 	int		bytesinval;
881 	int		off;
882 	uint8_t		cksum;
883 };
884 
885 static int
886 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
887 {
888 	uint32_t reg;
889 	uint8_t byte;
890 
891 	if (vrs->bytesinval == 0) {
892 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
893 			return (ENXIO);
894 		vrs->val = le32toh(reg);
895 		vrs->off += 4;
896 		byte = vrs->val & 0xff;
897 		vrs->bytesinval = 3;
898 	} else {
899 		vrs->val = vrs->val >> 8;
900 		byte = vrs->val & 0xff;
901 		vrs->bytesinval--;
902 	}
903 
904 	vrs->cksum += byte;
905 	*data = byte;
906 	return (0);
907 }
908 
909 static void
910 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
911 {
912 	struct vpd_readstate vrs;
913 	int state;
914 	int name;
915 	int remain;
916 	int i;
917 	int alloc, off;		/* alloc/off for RO/W arrays */
918 	int cksumvalid;
919 	int dflen;
920 	uint8_t byte;
921 	uint8_t byte2;
922 
923 	/* init vpd reader */
924 	vrs.bytesinval = 0;
925 	vrs.off = 0;
926 	vrs.pcib = pcib;
927 	vrs.cfg = cfg;
928 	vrs.cksum = 0;
929 
930 	state = 0;
931 	name = remain = i = 0;	/* shut up stupid gcc */
932 	alloc = off = 0;	/* shut up stupid gcc */
933 	dflen = 0;		/* shut up stupid gcc */
934 	cksumvalid = -1;
935 	while (state >= 0) {
936 		if (vpd_nextbyte(&vrs, &byte)) {
937 			state = -2;
938 			break;
939 		}
940 #if 0
941 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
942 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
943 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
944 #endif
945 		switch (state) {
946 		case 0:		/* item name */
947 			if (byte & 0x80) {
948 				if (vpd_nextbyte(&vrs, &byte2)) {
949 					state = -2;
950 					break;
951 				}
952 				remain = byte2;
953 				if (vpd_nextbyte(&vrs, &byte2)) {
954 					state = -2;
955 					break;
956 				}
957 				remain |= byte2 << 8;
958 				if (remain > (0x7f*4 - vrs.off)) {
959 					state = -1;
960 					pci_printf(cfg,
961 					    "invalid VPD data, remain %#x\n",
962 					    remain);
963 				}
964 				name = byte & 0x7f;
965 			} else {
966 				remain = byte & 0x7;
967 				name = (byte >> 3) & 0xf;
968 			}
969 			switch (name) {
970 			case 0x2:	/* String */
971 				cfg->vpd.vpd_ident = malloc(remain + 1,
972 				    M_DEVBUF, M_WAITOK);
973 				i = 0;
974 				state = 1;
975 				break;
976 			case 0xf:	/* End */
977 				state = -1;
978 				break;
979 			case 0x10:	/* VPD-R */
980 				alloc = 8;
981 				off = 0;
982 				cfg->vpd.vpd_ros = malloc(alloc *
983 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
984 				    M_WAITOK | M_ZERO);
985 				state = 2;
986 				break;
987 			case 0x11:	/* VPD-W */
988 				alloc = 8;
989 				off = 0;
990 				cfg->vpd.vpd_w = malloc(alloc *
991 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
992 				    M_WAITOK | M_ZERO);
993 				state = 5;
994 				break;
995 			default:	/* Invalid data, abort */
996 				state = -1;
997 				break;
998 			}
999 			break;
1000 
1001 		case 1:	/* Identifier String */
1002 			cfg->vpd.vpd_ident[i++] = byte;
1003 			remain--;
1004 			if (remain == 0)  {
1005 				cfg->vpd.vpd_ident[i] = '\0';
1006 				state = 0;
1007 			}
1008 			break;
1009 
1010 		case 2:	/* VPD-R Keyword Header */
1011 			if (off == alloc) {
1012 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1013 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1014 				    M_DEVBUF, M_WAITOK | M_ZERO);
1015 			}
1016 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1017 			if (vpd_nextbyte(&vrs, &byte2)) {
1018 				state = -2;
1019 				break;
1020 			}
1021 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1022 			if (vpd_nextbyte(&vrs, &byte2)) {
1023 				state = -2;
1024 				break;
1025 			}
1026 			cfg->vpd.vpd_ros[off].len = dflen = byte2;
1027 			if (dflen == 0 &&
1028 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1029 			    2) == 0) {
1030 				/*
1031 				 * if this happens, we can't trust the rest
1032 				 * of the VPD.
1033 				 */
1034 				pci_printf(cfg, "bad keyword length: %d\n",
1035 				    dflen);
1036 				cksumvalid = 0;
1037 				state = -1;
1038 				break;
1039 			} else if (dflen == 0) {
1040 				cfg->vpd.vpd_ros[off].value = malloc(1 *
1041 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1042 				    M_DEVBUF, M_WAITOK);
1043 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1044 			} else
1045 				cfg->vpd.vpd_ros[off].value = malloc(
1046 				    (dflen + 1) *
1047 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1048 				    M_DEVBUF, M_WAITOK);
1049 			remain -= 3;
1050 			i = 0;
1051 			/* keep in sync w/ state 3's transistions */
1052 			if (dflen == 0 && remain == 0)
1053 				state = 0;
1054 			else if (dflen == 0)
1055 				state = 2;
1056 			else
1057 				state = 3;
1058 			break;
1059 
1060 		case 3:	/* VPD-R Keyword Value */
1061 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1062 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1063 			    "RV", 2) == 0 && cksumvalid == -1) {
1064 				if (vrs.cksum == 0)
1065 					cksumvalid = 1;
1066 				else {
1067 					if (bootverbose)
1068 						pci_printf(cfg,
1069 					    "bad VPD cksum, remain %hhu\n",
1070 						    vrs.cksum);
1071 					cksumvalid = 0;
1072 					state = -1;
1073 					break;
1074 				}
1075 			}
1076 			dflen--;
1077 			remain--;
1078 			/* keep in sync w/ state 2's transistions */
1079 			if (dflen == 0)
1080 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1081 			if (dflen == 0 && remain == 0) {
1082 				cfg->vpd.vpd_rocnt = off;
1083 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1084 				    off * sizeof(*cfg->vpd.vpd_ros),
1085 				    M_DEVBUF, M_WAITOK | M_ZERO);
1086 				state = 0;
1087 			} else if (dflen == 0)
1088 				state = 2;
1089 			break;
1090 
1091 		case 4:
1092 			remain--;
1093 			if (remain == 0)
1094 				state = 0;
1095 			break;
1096 
1097 		case 5:	/* VPD-W Keyword Header */
1098 			if (off == alloc) {
1099 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1100 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1101 				    M_DEVBUF, M_WAITOK | M_ZERO);
1102 			}
1103 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1104 			if (vpd_nextbyte(&vrs, &byte2)) {
1105 				state = -2;
1106 				break;
1107 			}
1108 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1109 			if (vpd_nextbyte(&vrs, &byte2)) {
1110 				state = -2;
1111 				break;
1112 			}
1113 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1114 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1115 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1116 			    sizeof(*cfg->vpd.vpd_w[off].value),
1117 			    M_DEVBUF, M_WAITOK);
1118 			remain -= 3;
1119 			i = 0;
1120 			/* keep in sync w/ state 6's transistions */
1121 			if (dflen == 0 && remain == 0)
1122 				state = 0;
1123 			else if (dflen == 0)
1124 				state = 5;
1125 			else
1126 				state = 6;
1127 			break;
1128 
1129 		case 6:	/* VPD-W Keyword Value */
1130 			cfg->vpd.vpd_w[off].value[i++] = byte;
1131 			dflen--;
1132 			remain--;
1133 			/* keep in sync w/ state 5's transistions */
1134 			if (dflen == 0)
1135 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1136 			if (dflen == 0 && remain == 0) {
1137 				cfg->vpd.vpd_wcnt = off;
1138 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1139 				    off * sizeof(*cfg->vpd.vpd_w),
1140 				    M_DEVBUF, M_WAITOK | M_ZERO);
1141 				state = 0;
1142 			} else if (dflen == 0)
1143 				state = 5;
1144 			break;
1145 
1146 		default:
1147 			pci_printf(cfg, "invalid state: %d\n", state);
1148 			state = -1;
1149 			break;
1150 		}
1151 	}
1152 
1153 	if (cksumvalid == 0 || state < -1) {
1154 		/* read-only data bad, clean up */
1155 		if (cfg->vpd.vpd_ros != NULL) {
1156 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1157 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1158 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1159 			cfg->vpd.vpd_ros = NULL;
1160 		}
1161 	}
1162 	if (state < -1) {
1163 		/* I/O error, clean up */
1164 		pci_printf(cfg, "failed to read VPD data.\n");
1165 		if (cfg->vpd.vpd_ident != NULL) {
1166 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1167 			cfg->vpd.vpd_ident = NULL;
1168 		}
1169 		if (cfg->vpd.vpd_w != NULL) {
1170 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1171 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1172 			free(cfg->vpd.vpd_w, M_DEVBUF);
1173 			cfg->vpd.vpd_w = NULL;
1174 		}
1175 	}
1176 	cfg->vpd.vpd_cached = 1;
1177 #undef REG
1178 #undef WREG
1179 }
1180 
1181 int
1182 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1183 {
1184 	struct pci_devinfo *dinfo = device_get_ivars(child);
1185 	pcicfgregs *cfg = &dinfo->cfg;
1186 
1187 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1188 		pci_read_vpd(device_get_parent(dev), cfg);
1189 
1190 	*identptr = cfg->vpd.vpd_ident;
1191 
1192 	if (*identptr == NULL)
1193 		return (ENXIO);
1194 
1195 	return (0);
1196 }
1197 
1198 int
1199 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1200 	const char **vptr)
1201 {
1202 	struct pci_devinfo *dinfo = device_get_ivars(child);
1203 	pcicfgregs *cfg = &dinfo->cfg;
1204 	int i;
1205 
1206 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1207 		pci_read_vpd(device_get_parent(dev), cfg);
1208 
1209 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1210 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1211 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1212 			*vptr = cfg->vpd.vpd_ros[i].value;
1213 			return (0);
1214 		}
1215 
1216 	*vptr = NULL;
1217 	return (ENXIO);
1218 }
1219 
1220 struct pcicfg_vpd *
1221 pci_fetch_vpd_list(device_t dev)
1222 {
1223 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1224 	pcicfgregs *cfg = &dinfo->cfg;
1225 
1226 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1227 		pci_read_vpd(device_get_parent(device_get_parent(dev)), cfg);
1228 	return (&cfg->vpd);
1229 }
1230 
1231 /*
1232  * Find the requested HyperTransport capability and return the offset
1233  * in configuration space via the pointer provided.  The function
1234  * returns 0 on success and an error code otherwise.
1235  */
1236 int
1237 pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1238 {
1239 	int ptr, error;
1240 	uint16_t val;
1241 
1242 	error = pci_find_cap(child, PCIY_HT, &ptr);
1243 	if (error)
1244 		return (error);
1245 
1246 	/*
1247 	 * Traverse the capabilities list checking each HT capability
1248 	 * to see if it matches the requested HT capability.
1249 	 */
1250 	while (ptr != 0) {
1251 		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1252 		if (capability == PCIM_HTCAP_SLAVE ||
1253 		    capability == PCIM_HTCAP_HOST)
1254 			val &= 0xe000;
1255 		else
1256 			val &= PCIM_HTCMD_CAP_MASK;
1257 		if (val == capability) {
1258 			if (capreg != NULL)
1259 				*capreg = ptr;
1260 			return (0);
1261 		}
1262 
1263 		/* Skip to the next HT capability. */
1264 		while (ptr != 0) {
1265 			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1266 			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1267 			    PCIY_HT)
1268 				break;
1269 		}
1270 	}
1271 	return (ENOENT);
1272 }
1273 
1274 /*
1275  * Find the requested capability and return the offset in
1276  * configuration space via the pointer provided.  The function returns
1277  * 0 on success and an error code otherwise.
1278  */
1279 int
1280 pci_find_cap_method(device_t dev, device_t child, int capability,
1281     int *capreg)
1282 {
1283 	struct pci_devinfo *dinfo = device_get_ivars(child);
1284 	pcicfgregs *cfg = &dinfo->cfg;
1285 	u_int32_t status;
1286 	u_int8_t ptr;
1287 
1288 	/*
1289 	 * Check the CAP_LIST bit of the PCI status register first.
1290 	 */
1291 	status = pci_read_config(child, PCIR_STATUS, 2);
1292 	if (!(status & PCIM_STATUS_CAPPRESENT))
1293 		return (ENXIO);
1294 
1295 	/*
1296 	 * Determine the start pointer of the capabilities list.
1297 	 */
1298 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1299 	case PCIM_HDRTYPE_NORMAL:
1300 	case PCIM_HDRTYPE_BRIDGE:
1301 		ptr = PCIR_CAP_PTR;
1302 		break;
1303 	case PCIM_HDRTYPE_CARDBUS:
1304 		ptr = PCIR_CAP_PTR_2;
1305 		break;
1306 	default:
1307 		/* XXX: panic? */
1308 		return (ENXIO);		/* no extended capabilities support */
1309 	}
1310 	ptr = pci_read_config(child, ptr, 1);
1311 
1312 	/*
1313 	 * Traverse the capabilities list.
1314 	 */
1315 	while (ptr != 0) {
1316 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1317 			if (capreg != NULL)
1318 				*capreg = ptr;
1319 			return (0);
1320 		}
1321 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1322 	}
1323 
1324 	return (ENOENT);
1325 }
1326 
1327 /*
1328  * Find the requested extended capability and return the offset in
1329  * configuration space via the pointer provided.  The function returns
1330  * 0 on success and an error code otherwise.
1331  */
1332 int
1333 pci_find_extcap_method(device_t dev, device_t child, int capability,
1334     int *capreg)
1335 {
1336 	struct pci_devinfo *dinfo = device_get_ivars(child);
1337 	pcicfgregs *cfg = &dinfo->cfg;
1338 	uint32_t ecap;
1339 	uint16_t ptr;
1340 
1341 	/* Only supported for PCI-express devices. */
1342 	if (cfg->pcie.pcie_location == 0)
1343 		return (ENXIO);
1344 
1345 	ptr = PCIR_EXTCAP;
1346 	ecap = pci_read_config(child, ptr, 4);
1347 	if (ecap == 0xffffffff || ecap == 0)
1348 		return (ENOENT);
1349 	for (;;) {
1350 		if (PCI_EXTCAP_ID(ecap) == capability) {
1351 			if (capreg != NULL)
1352 				*capreg = ptr;
1353 			return (0);
1354 		}
1355 		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1356 		if (ptr == 0)
1357 			break;
1358 		ecap = pci_read_config(child, ptr, 4);
1359 	}
1360 
1361 	return (ENOENT);
1362 }
1363 
1364 /*
1365  * Support for MSI-X message interrupts.
1366  */
1367 void
1368 pci_enable_msix_method(device_t dev, device_t child, u_int index,
1369     uint64_t address, uint32_t data)
1370 {
1371 	struct pci_devinfo *dinfo = device_get_ivars(child);
1372 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1373 	uint32_t offset;
1374 
1375 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1376 	offset = msix->msix_table_offset + index * 16;
1377 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1378 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1379 	bus_write_4(msix->msix_table_res, offset + 8, data);
1380 
1381 	/* Enable MSI -> HT mapping. */
1382 	pci_ht_map_msi(child, address);
1383 }
1384 
1385 void
1386 pci_mask_msix(device_t dev, u_int index)
1387 {
1388 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1389 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1390 	uint32_t offset, val;
1391 
1392 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1393 	offset = msix->msix_table_offset + index * 16 + 12;
1394 	val = bus_read_4(msix->msix_table_res, offset);
1395 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1396 		val |= PCIM_MSIX_VCTRL_MASK;
1397 		bus_write_4(msix->msix_table_res, offset, val);
1398 	}
1399 }
1400 
1401 void
1402 pci_unmask_msix(device_t dev, u_int index)
1403 {
1404 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1405 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1406 	uint32_t offset, val;
1407 
1408 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1409 	offset = msix->msix_table_offset + index * 16 + 12;
1410 	val = bus_read_4(msix->msix_table_res, offset);
1411 	if (val & PCIM_MSIX_VCTRL_MASK) {
1412 		val &= ~PCIM_MSIX_VCTRL_MASK;
1413 		bus_write_4(msix->msix_table_res, offset, val);
1414 	}
1415 }
1416 
1417 int
1418 pci_pending_msix(device_t dev, u_int index)
1419 {
1420 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1421 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1422 	uint32_t offset, bit;
1423 
1424 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1425 	offset = msix->msix_pba_offset + (index / 32) * 4;
1426 	bit = 1 << index % 32;
1427 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1428 }
1429 
1430 /*
1431  * Restore MSI-X registers and table during resume.  If MSI-X is
1432  * enabled then walk the virtual table to restore the actual MSI-X
1433  * table.
1434  */
1435 static void
1436 pci_resume_msix(device_t dev)
1437 {
1438 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1439 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1440 	struct msix_table_entry *mte;
1441 	struct msix_vector *mv;
1442 	int i;
1443 
1444 	if (msix->msix_alloc > 0) {
1445 		/* First, mask all vectors. */
1446 		for (i = 0; i < msix->msix_msgnum; i++)
1447 			pci_mask_msix(dev, i);
1448 
1449 		/* Second, program any messages with at least one handler. */
1450 		for (i = 0; i < msix->msix_table_len; i++) {
1451 			mte = &msix->msix_table[i];
1452 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1453 				continue;
1454 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1455 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1456 			pci_unmask_msix(dev, i);
1457 		}
1458 	}
1459 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1460 	    msix->msix_ctrl, 2);
1461 }
1462 
1463 /*
1464  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1465  * returned in *count.  After this function returns, each message will be
1466  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1467  */
1468 int
1469 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1470 {
1471 	struct pci_devinfo *dinfo = device_get_ivars(child);
1472 	pcicfgregs *cfg = &dinfo->cfg;
1473 	struct resource_list_entry *rle;
1474 	int actual, error, i, irq, max;
1475 
1476 	/* Don't let count == 0 get us into trouble. */
1477 	if (*count == 0)
1478 		return (EINVAL);
1479 
1480 	/* If rid 0 is allocated, then fail. */
1481 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1482 	if (rle != NULL && rle->res != NULL)
1483 		return (ENXIO);
1484 
1485 	/* Already have allocated messages? */
1486 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1487 		return (ENXIO);
1488 
1489 	/* If MSI-X is blacklisted for this system, fail. */
1490 	if (pci_msix_blacklisted())
1491 		return (ENXIO);
1492 
1493 	/* MSI-X capability present? */
1494 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1495 		return (ENODEV);
1496 
1497 	/* Make sure the appropriate BARs are mapped. */
1498 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1499 	    cfg->msix.msix_table_bar);
1500 	if (rle == NULL || rle->res == NULL ||
1501 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1502 		return (ENXIO);
1503 	cfg->msix.msix_table_res = rle->res;
1504 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1505 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1506 		    cfg->msix.msix_pba_bar);
1507 		if (rle == NULL || rle->res == NULL ||
1508 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1509 			return (ENXIO);
1510 	}
1511 	cfg->msix.msix_pba_res = rle->res;
1512 
1513 	if (bootverbose)
1514 		device_printf(child,
1515 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1516 		    *count, cfg->msix.msix_msgnum);
1517 	max = min(*count, cfg->msix.msix_msgnum);
1518 	for (i = 0; i < max; i++) {
1519 		/* Allocate a message. */
1520 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1521 		if (error) {
1522 			if (i == 0)
1523 				return (error);
1524 			break;
1525 		}
1526 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1527 		    irq, 1);
1528 	}
1529 	actual = i;
1530 
1531 	if (bootverbose) {
1532 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1533 		if (actual == 1)
1534 			device_printf(child, "using IRQ %lu for MSI-X\n",
1535 			    rle->start);
1536 		else {
1537 			int run;
1538 
1539 			/*
1540 			 * Be fancy and try to print contiguous runs of
1541 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1542 			 * 'run' is true if we are in a range.
1543 			 */
1544 			device_printf(child, "using IRQs %lu", rle->start);
1545 			irq = rle->start;
1546 			run = 0;
1547 			for (i = 1; i < actual; i++) {
1548 				rle = resource_list_find(&dinfo->resources,
1549 				    SYS_RES_IRQ, i + 1);
1550 
1551 				/* Still in a run? */
1552 				if (rle->start == irq + 1) {
1553 					run = 1;
1554 					irq++;
1555 					continue;
1556 				}
1557 
1558 				/* Finish previous range. */
1559 				if (run) {
1560 					printf("-%d", irq);
1561 					run = 0;
1562 				}
1563 
1564 				/* Start new range. */
1565 				printf(",%lu", rle->start);
1566 				irq = rle->start;
1567 			}
1568 
1569 			/* Unfinished range? */
1570 			if (run)
1571 				printf("-%d", irq);
1572 			printf(" for MSI-X\n");
1573 		}
1574 	}
1575 
1576 	/* Mask all vectors. */
1577 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1578 		pci_mask_msix(child, i);
1579 
1580 	/* Allocate and initialize vector data and virtual table. */
1581 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1582 	    M_DEVBUF, M_WAITOK | M_ZERO);
1583 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1584 	    M_DEVBUF, M_WAITOK | M_ZERO);
1585 	for (i = 0; i < actual; i++) {
1586 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1587 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1588 		cfg->msix.msix_table[i].mte_vector = i + 1;
1589 	}
1590 
1591 	/* Update control register to enable MSI-X. */
1592 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1593 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1594 	    cfg->msix.msix_ctrl, 2);
1595 
1596 	/* Update counts of alloc'd messages. */
1597 	cfg->msix.msix_alloc = actual;
1598 	cfg->msix.msix_table_len = actual;
1599 	*count = actual;
1600 	return (0);
1601 }
1602 
1603 /*
1604  * By default, pci_alloc_msix() will assign the allocated IRQ
1605  * resources consecutively to the first N messages in the MSI-X table.
1606  * However, device drivers may want to use different layouts if they
1607  * either receive fewer messages than they asked for, or they wish to
1608  * populate the MSI-X table sparsely.  This method allows the driver
1609  * to specify what layout it wants.  It must be called after a
1610  * successful pci_alloc_msix() but before any of the associated
1611  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1612  *
1613  * The 'vectors' array contains 'count' message vectors.  The array
1614  * maps directly to the MSI-X table in that index 0 in the array
1615  * specifies the vector for the first message in the MSI-X table, etc.
1616  * The vector value in each array index can either be 0 to indicate
1617  * that no vector should be assigned to a message slot, or it can be a
1618  * number from 1 to N (where N is the count returned from a
1619  * succcessful call to pci_alloc_msix()) to indicate which message
1620  * vector (IRQ) to be used for the corresponding message.
1621  *
1622  * On successful return, each message with a non-zero vector will have
1623  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1624  * 1.  Additionally, if any of the IRQs allocated via the previous
1625  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1626  * will be freed back to the system automatically.
1627  *
1628  * For example, suppose a driver has a MSI-X table with 6 messages and
1629  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1630  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1631  * C.  After the call to pci_alloc_msix(), the device will be setup to
1632  * have an MSI-X table of ABC--- (where - means no vector assigned).
1633  * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1634  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1635  * be freed back to the system.  This device will also have valid
1636  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1637  *
1638  * In any case, the SYS_RES_IRQ rid X will always map to the message
1639  * at MSI-X table index X - 1 and will only be valid if a vector is
1640  * assigned to that table entry.
1641  */
1642 int
1643 pci_remap_msix_method(device_t dev, device_t child, int count,
1644     const u_int *vectors)
1645 {
1646 	struct pci_devinfo *dinfo = device_get_ivars(child);
1647 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1648 	struct resource_list_entry *rle;
1649 	int i, irq, j, *used;
1650 
1651 	/*
1652 	 * Have to have at least one message in the table but the
1653 	 * table can't be bigger than the actual MSI-X table in the
1654 	 * device.
1655 	 */
1656 	if (count == 0 || count > msix->msix_msgnum)
1657 		return (EINVAL);
1658 
1659 	/* Sanity check the vectors. */
1660 	for (i = 0; i < count; i++)
1661 		if (vectors[i] > msix->msix_alloc)
1662 			return (EINVAL);
1663 
1664 	/*
1665 	 * Make sure there aren't any holes in the vectors to be used.
1666 	 * It's a big pain to support it, and it doesn't really make
1667 	 * sense anyway.  Also, at least one vector must be used.
1668 	 */
1669 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1670 	    M_ZERO);
1671 	for (i = 0; i < count; i++)
1672 		if (vectors[i] != 0)
1673 			used[vectors[i] - 1] = 1;
1674 	for (i = 0; i < msix->msix_alloc - 1; i++)
1675 		if (used[i] == 0 && used[i + 1] == 1) {
1676 			free(used, M_DEVBUF);
1677 			return (EINVAL);
1678 		}
1679 	if (used[0] != 1) {
1680 		free(used, M_DEVBUF);
1681 		return (EINVAL);
1682 	}
1683 
1684 	/* Make sure none of the resources are allocated. */
1685 	for (i = 0; i < msix->msix_table_len; i++) {
1686 		if (msix->msix_table[i].mte_vector == 0)
1687 			continue;
1688 		if (msix->msix_table[i].mte_handlers > 0)
1689 			return (EBUSY);
1690 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1691 		KASSERT(rle != NULL, ("missing resource"));
1692 		if (rle->res != NULL)
1693 			return (EBUSY);
1694 	}
1695 
1696 	/* Free the existing resource list entries. */
1697 	for (i = 0; i < msix->msix_table_len; i++) {
1698 		if (msix->msix_table[i].mte_vector == 0)
1699 			continue;
1700 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1701 	}
1702 
1703 	/*
1704 	 * Build the new virtual table keeping track of which vectors are
1705 	 * used.
1706 	 */
1707 	free(msix->msix_table, M_DEVBUF);
1708 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1709 	    M_DEVBUF, M_WAITOK | M_ZERO);
1710 	for (i = 0; i < count; i++)
1711 		msix->msix_table[i].mte_vector = vectors[i];
1712 	msix->msix_table_len = count;
1713 
1714 	/* Free any unused IRQs and resize the vectors array if necessary. */
1715 	j = msix->msix_alloc - 1;
1716 	if (used[j] == 0) {
1717 		struct msix_vector *vec;
1718 
1719 		while (used[j] == 0) {
1720 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1721 			    msix->msix_vectors[j].mv_irq);
1722 			j--;
1723 		}
1724 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1725 		    M_WAITOK);
1726 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1727 		    (j + 1));
1728 		free(msix->msix_vectors, M_DEVBUF);
1729 		msix->msix_vectors = vec;
1730 		msix->msix_alloc = j + 1;
1731 	}
1732 	free(used, M_DEVBUF);
1733 
1734 	/* Map the IRQs onto the rids. */
1735 	for (i = 0; i < count; i++) {
1736 		if (vectors[i] == 0)
1737 			continue;
1738 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1739 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1740 		    irq, 1);
1741 	}
1742 
1743 	if (bootverbose) {
1744 		device_printf(child, "Remapped MSI-X IRQs as: ");
1745 		for (i = 0; i < count; i++) {
1746 			if (i != 0)
1747 				printf(", ");
1748 			if (vectors[i] == 0)
1749 				printf("---");
1750 			else
1751 				printf("%d",
1752 				    msix->msix_vectors[vectors[i]].mv_irq);
1753 		}
1754 		printf("\n");
1755 	}
1756 
1757 	return (0);
1758 }
1759 
1760 static int
1761 pci_release_msix(device_t dev, device_t child)
1762 {
1763 	struct pci_devinfo *dinfo = device_get_ivars(child);
1764 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1765 	struct resource_list_entry *rle;
1766 	int i;
1767 
1768 	/* Do we have any messages to release? */
1769 	if (msix->msix_alloc == 0)
1770 		return (ENODEV);
1771 
1772 	/* Make sure none of the resources are allocated. */
1773 	for (i = 0; i < msix->msix_table_len; i++) {
1774 		if (msix->msix_table[i].mte_vector == 0)
1775 			continue;
1776 		if (msix->msix_table[i].mte_handlers > 0)
1777 			return (EBUSY);
1778 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1779 		KASSERT(rle != NULL, ("missing resource"));
1780 		if (rle->res != NULL)
1781 			return (EBUSY);
1782 	}
1783 
1784 	/* Update control register to disable MSI-X. */
1785 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1786 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1787 	    msix->msix_ctrl, 2);
1788 
1789 	/* Free the resource list entries. */
1790 	for (i = 0; i < msix->msix_table_len; i++) {
1791 		if (msix->msix_table[i].mte_vector == 0)
1792 			continue;
1793 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1794 	}
1795 	free(msix->msix_table, M_DEVBUF);
1796 	msix->msix_table_len = 0;
1797 
1798 	/* Release the IRQs. */
1799 	for (i = 0; i < msix->msix_alloc; i++)
1800 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1801 		    msix->msix_vectors[i].mv_irq);
1802 	free(msix->msix_vectors, M_DEVBUF);
1803 	msix->msix_alloc = 0;
1804 	return (0);
1805 }
1806 
1807 /*
1808  * Return the max supported MSI-X messages this device supports.
1809  * Basically, assuming the MD code can alloc messages, this function
1810  * should return the maximum value that pci_alloc_msix() can return.
1811  * Thus, it is subject to the tunables, etc.
1812  */
1813 int
1814 pci_msix_count_method(device_t dev, device_t child)
1815 {
1816 	struct pci_devinfo *dinfo = device_get_ivars(child);
1817 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1818 
1819 	if (pci_do_msix && msix->msix_location != 0)
1820 		return (msix->msix_msgnum);
1821 	return (0);
1822 }
1823 
1824 /*
1825  * HyperTransport MSI mapping control
1826  */
1827 void
1828 pci_ht_map_msi(device_t dev, uint64_t addr)
1829 {
1830 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1831 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1832 
1833 	if (!ht->ht_msimap)
1834 		return;
1835 
1836 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1837 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1838 		/* Enable MSI -> HT mapping. */
1839 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1840 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1841 		    ht->ht_msictrl, 2);
1842 	}
1843 
1844 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1845 		/* Disable MSI -> HT mapping. */
1846 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1847 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1848 		    ht->ht_msictrl, 2);
1849 	}
1850 }
1851 
1852 int
1853 pci_get_max_read_req(device_t dev)
1854 {
1855 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1856 	int cap;
1857 	uint16_t val;
1858 
1859 	cap = dinfo->cfg.pcie.pcie_location;
1860 	if (cap == 0)
1861 		return (0);
1862 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1863 	val &= PCIEM_CTL_MAX_READ_REQUEST;
1864 	val >>= 12;
1865 	return (1 << (val + 7));
1866 }
1867 
1868 int
1869 pci_set_max_read_req(device_t dev, int size)
1870 {
1871 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1872 	int cap;
1873 	uint16_t val;
1874 
1875 	cap = dinfo->cfg.pcie.pcie_location;
1876 	if (cap == 0)
1877 		return (0);
1878 	if (size < 128)
1879 		size = 128;
1880 	if (size > 4096)
1881 		size = 4096;
1882 	size = (1 << (fls(size) - 1));
1883 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1884 	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
1885 	val |= (fls(size) - 8) << 12;
1886 	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
1887 	return (size);
1888 }
1889 
1890 /*
1891  * Support for MSI message signalled interrupts.
1892  */
1893 void
1894 pci_enable_msi_method(device_t dev, device_t child, uint64_t address,
1895     uint16_t data)
1896 {
1897 	struct pci_devinfo *dinfo = device_get_ivars(child);
1898 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1899 
1900 	/* Write data and address values. */
1901 	pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR,
1902 	    address & 0xffffffff, 4);
1903 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1904 		pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1905 		    address >> 32, 4);
1906 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA_64BIT,
1907 		    data, 2);
1908 	} else
1909 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA, data,
1910 		    2);
1911 
1912 	/* Enable MSI in the control register. */
1913 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1914 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1915 	    msi->msi_ctrl, 2);
1916 
1917 	/* Enable MSI -> HT mapping. */
1918 	pci_ht_map_msi(child, address);
1919 }
1920 
1921 void
1922 pci_disable_msi_method(device_t dev, device_t child)
1923 {
1924 	struct pci_devinfo *dinfo = device_get_ivars(child);
1925 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1926 
1927 	/* Disable MSI -> HT mapping. */
1928 	pci_ht_map_msi(child, 0);
1929 
1930 	/* Disable MSI in the control register. */
1931 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1932 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1933 	    msi->msi_ctrl, 2);
1934 }
1935 
1936 /*
1937  * Restore MSI registers during resume.  If MSI is enabled then
1938  * restore the data and address registers in addition to the control
1939  * register.
1940  */
1941 static void
1942 pci_resume_msi(device_t dev)
1943 {
1944 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1945 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1946 	uint64_t address;
1947 	uint16_t data;
1948 
1949 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1950 		address = msi->msi_addr;
1951 		data = msi->msi_data;
1952 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1953 		    address & 0xffffffff, 4);
1954 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1955 			pci_write_config(dev, msi->msi_location +
1956 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1957 			pci_write_config(dev, msi->msi_location +
1958 			    PCIR_MSI_DATA_64BIT, data, 2);
1959 		} else
1960 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1961 			    data, 2);
1962 	}
1963 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1964 	    2);
1965 }
1966 
1967 static int
1968 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1969 {
1970 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1971 	pcicfgregs *cfg = &dinfo->cfg;
1972 	struct resource_list_entry *rle;
1973 	struct msix_table_entry *mte;
1974 	struct msix_vector *mv;
1975 	uint64_t addr;
1976 	uint32_t data;
1977 	int error, i, j;
1978 
1979 	/*
1980 	 * Handle MSI first.  We try to find this IRQ among our list
1981 	 * of MSI IRQs.  If we find it, we request updated address and
1982 	 * data registers and apply the results.
1983 	 */
1984 	if (cfg->msi.msi_alloc > 0) {
1985 
1986 		/* If we don't have any active handlers, nothing to do. */
1987 		if (cfg->msi.msi_handlers == 0)
1988 			return (0);
1989 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1990 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1991 			    i + 1);
1992 			if (rle->start == irq) {
1993 				error = PCIB_MAP_MSI(device_get_parent(bus),
1994 				    dev, irq, &addr, &data);
1995 				if (error)
1996 					return (error);
1997 				pci_disable_msi(dev);
1998 				dinfo->cfg.msi.msi_addr = addr;
1999 				dinfo->cfg.msi.msi_data = data;
2000 				pci_enable_msi(dev, addr, data);
2001 				return (0);
2002 			}
2003 		}
2004 		return (ENOENT);
2005 	}
2006 
2007 	/*
2008 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
2009 	 * we request the updated mapping info.  If that works, we go
2010 	 * through all the slots that use this IRQ and update them.
2011 	 */
2012 	if (cfg->msix.msix_alloc > 0) {
2013 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
2014 			mv = &cfg->msix.msix_vectors[i];
2015 			if (mv->mv_irq == irq) {
2016 				error = PCIB_MAP_MSI(device_get_parent(bus),
2017 				    dev, irq, &addr, &data);
2018 				if (error)
2019 					return (error);
2020 				mv->mv_address = addr;
2021 				mv->mv_data = data;
2022 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
2023 					mte = &cfg->msix.msix_table[j];
2024 					if (mte->mte_vector != i + 1)
2025 						continue;
2026 					if (mte->mte_handlers == 0)
2027 						continue;
2028 					pci_mask_msix(dev, j);
2029 					pci_enable_msix(dev, j, addr, data);
2030 					pci_unmask_msix(dev, j);
2031 				}
2032 			}
2033 		}
2034 		return (ENOENT);
2035 	}
2036 
2037 	return (ENOENT);
2038 }
2039 
2040 /*
2041  * Returns true if the specified device is blacklisted because MSI
2042  * doesn't work.
2043  */
2044 int
2045 pci_msi_device_blacklisted(device_t dev)
2046 {
2047 
2048 	if (!pci_honor_msi_blacklist)
2049 		return (0);
2050 
2051 	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2052 }
2053 
2054 /*
2055  * Determine if MSI is blacklisted globally on this system.  Currently,
2056  * we just check for blacklisted chipsets as represented by the
2057  * host-PCI bridge at device 0:0:0.  In the future, it may become
2058  * necessary to check other system attributes, such as the kenv values
2059  * that give the motherboard manufacturer and model number.
2060  */
2061 static int
2062 pci_msi_blacklisted(void)
2063 {
2064 	device_t dev;
2065 
2066 	if (!pci_honor_msi_blacklist)
2067 		return (0);
2068 
2069 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2070 	if (!(pcie_chipset || pcix_chipset)) {
2071 		if (vm_guest != VM_GUEST_NO) {
2072 			/*
2073 			 * Whitelist older chipsets in virtual
2074 			 * machines known to support MSI.
2075 			 */
2076 			dev = pci_find_bsf(0, 0, 0);
2077 			if (dev != NULL)
2078 				return (!pci_has_quirk(pci_get_devid(dev),
2079 					PCI_QUIRK_ENABLE_MSI_VM));
2080 		}
2081 		return (1);
2082 	}
2083 
2084 	dev = pci_find_bsf(0, 0, 0);
2085 	if (dev != NULL)
2086 		return (pci_msi_device_blacklisted(dev));
2087 	return (0);
2088 }
2089 
2090 /*
2091  * Returns true if the specified device is blacklisted because MSI-X
2092  * doesn't work.  Note that this assumes that if MSI doesn't work,
2093  * MSI-X doesn't either.
2094  */
2095 int
2096 pci_msix_device_blacklisted(device_t dev)
2097 {
2098 
2099 	if (!pci_honor_msi_blacklist)
2100 		return (0);
2101 
2102 	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2103 		return (1);
2104 
2105 	return (pci_msi_device_blacklisted(dev));
2106 }
2107 
2108 /*
2109  * Determine if MSI-X is blacklisted globally on this system.  If MSI
2110  * is blacklisted, assume that MSI-X is as well.  Check for additional
2111  * chipsets where MSI works but MSI-X does not.
2112  */
2113 static int
2114 pci_msix_blacklisted(void)
2115 {
2116 	device_t dev;
2117 
2118 	if (!pci_honor_msi_blacklist)
2119 		return (0);
2120 
2121 	dev = pci_find_bsf(0, 0, 0);
2122 	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2123 	    PCI_QUIRK_DISABLE_MSIX))
2124 		return (1);
2125 
2126 	return (pci_msi_blacklisted());
2127 }
2128 
2129 /*
2130  * Attempt to allocate *count MSI messages.  The actual number allocated is
2131  * returned in *count.  After this function returns, each message will be
2132  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2133  */
2134 int
2135 pci_alloc_msi_method(device_t dev, device_t child, int *count)
2136 {
2137 	struct pci_devinfo *dinfo = device_get_ivars(child);
2138 	pcicfgregs *cfg = &dinfo->cfg;
2139 	struct resource_list_entry *rle;
2140 	int actual, error, i, irqs[32];
2141 	uint16_t ctrl;
2142 
2143 	/* Don't let count == 0 get us into trouble. */
2144 	if (*count == 0)
2145 		return (EINVAL);
2146 
2147 	/* If rid 0 is allocated, then fail. */
2148 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2149 	if (rle != NULL && rle->res != NULL)
2150 		return (ENXIO);
2151 
2152 	/* Already have allocated messages? */
2153 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2154 		return (ENXIO);
2155 
2156 	/* If MSI is blacklisted for this system, fail. */
2157 	if (pci_msi_blacklisted())
2158 		return (ENXIO);
2159 
2160 	/* MSI capability present? */
2161 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2162 		return (ENODEV);
2163 
2164 	if (bootverbose)
2165 		device_printf(child,
2166 		    "attempting to allocate %d MSI vectors (%d supported)\n",
2167 		    *count, cfg->msi.msi_msgnum);
2168 
2169 	/* Don't ask for more than the device supports. */
2170 	actual = min(*count, cfg->msi.msi_msgnum);
2171 
2172 	/* Don't ask for more than 32 messages. */
2173 	actual = min(actual, 32);
2174 
2175 	/* MSI requires power of 2 number of messages. */
2176 	if (!powerof2(actual))
2177 		return (EINVAL);
2178 
2179 	for (;;) {
2180 		/* Try to allocate N messages. */
2181 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2182 		    actual, irqs);
2183 		if (error == 0)
2184 			break;
2185 		if (actual == 1)
2186 			return (error);
2187 
2188 		/* Try N / 2. */
2189 		actual >>= 1;
2190 	}
2191 
2192 	/*
2193 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2194 	 * resources in the irqs[] array, so add new resources
2195 	 * starting at rid 1.
2196 	 */
2197 	for (i = 0; i < actual; i++)
2198 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2199 		    irqs[i], irqs[i], 1);
2200 
2201 	if (bootverbose) {
2202 		if (actual == 1)
2203 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2204 		else {
2205 			int run;
2206 
2207 			/*
2208 			 * Be fancy and try to print contiguous runs
2209 			 * of IRQ values as ranges.  'run' is true if
2210 			 * we are in a range.
2211 			 */
2212 			device_printf(child, "using IRQs %d", irqs[0]);
2213 			run = 0;
2214 			for (i = 1; i < actual; i++) {
2215 
2216 				/* Still in a run? */
2217 				if (irqs[i] == irqs[i - 1] + 1) {
2218 					run = 1;
2219 					continue;
2220 				}
2221 
2222 				/* Finish previous range. */
2223 				if (run) {
2224 					printf("-%d", irqs[i - 1]);
2225 					run = 0;
2226 				}
2227 
2228 				/* Start new range. */
2229 				printf(",%d", irqs[i]);
2230 			}
2231 
2232 			/* Unfinished range? */
2233 			if (run)
2234 				printf("-%d", irqs[actual - 1]);
2235 			printf(" for MSI\n");
2236 		}
2237 	}
2238 
2239 	/* Update control register with actual count. */
2240 	ctrl = cfg->msi.msi_ctrl;
2241 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2242 	ctrl |= (ffs(actual) - 1) << 4;
2243 	cfg->msi.msi_ctrl = ctrl;
2244 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2245 
2246 	/* Update counts of alloc'd messages. */
2247 	cfg->msi.msi_alloc = actual;
2248 	cfg->msi.msi_handlers = 0;
2249 	*count = actual;
2250 	return (0);
2251 }
2252 
2253 /* Release the MSI messages associated with this device. */
2254 int
2255 pci_release_msi_method(device_t dev, device_t child)
2256 {
2257 	struct pci_devinfo *dinfo = device_get_ivars(child);
2258 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2259 	struct resource_list_entry *rle;
2260 	int error, i, irqs[32];
2261 
2262 	/* Try MSI-X first. */
2263 	error = pci_release_msix(dev, child);
2264 	if (error != ENODEV)
2265 		return (error);
2266 
2267 	/* Do we have any messages to release? */
2268 	if (msi->msi_alloc == 0)
2269 		return (ENODEV);
2270 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2271 
2272 	/* Make sure none of the resources are allocated. */
2273 	if (msi->msi_handlers > 0)
2274 		return (EBUSY);
2275 	for (i = 0; i < msi->msi_alloc; i++) {
2276 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2277 		KASSERT(rle != NULL, ("missing MSI resource"));
2278 		if (rle->res != NULL)
2279 			return (EBUSY);
2280 		irqs[i] = rle->start;
2281 	}
2282 
2283 	/* Update control register with 0 count. */
2284 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2285 	    ("%s: MSI still enabled", __func__));
2286 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2287 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2288 	    msi->msi_ctrl, 2);
2289 
2290 	/* Release the messages. */
2291 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2292 	for (i = 0; i < msi->msi_alloc; i++)
2293 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2294 
2295 	/* Update alloc count. */
2296 	msi->msi_alloc = 0;
2297 	msi->msi_addr = 0;
2298 	msi->msi_data = 0;
2299 	return (0);
2300 }
2301 
2302 /*
2303  * Return the max supported MSI messages this device supports.
2304  * Basically, assuming the MD code can alloc messages, this function
2305  * should return the maximum value that pci_alloc_msi() can return.
2306  * Thus, it is subject to the tunables, etc.
2307  */
2308 int
2309 pci_msi_count_method(device_t dev, device_t child)
2310 {
2311 	struct pci_devinfo *dinfo = device_get_ivars(child);
2312 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2313 
2314 	if (pci_do_msi && msi->msi_location != 0)
2315 		return (msi->msi_msgnum);
2316 	return (0);
2317 }
2318 
2319 /* free pcicfgregs structure and all depending data structures */
2320 
2321 int
2322 pci_freecfg(struct pci_devinfo *dinfo)
2323 {
2324 	struct devlist *devlist_head;
2325 	struct pci_map *pm, *next;
2326 	int i;
2327 
2328 	devlist_head = &pci_devq;
2329 
2330 	if (dinfo->cfg.vpd.vpd_reg) {
2331 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2332 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2333 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2334 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2335 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2336 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2337 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2338 	}
2339 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2340 		free(pm, M_DEVBUF);
2341 	}
2342 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2343 	free(dinfo, M_DEVBUF);
2344 
2345 	/* increment the generation count */
2346 	pci_generation++;
2347 
2348 	/* we're losing one device */
2349 	pci_numdevs--;
2350 	return (0);
2351 }
2352 
2353 /*
2354  * PCI power manangement
2355  */
2356 int
2357 pci_set_powerstate_method(device_t dev, device_t child, int state)
2358 {
2359 	struct pci_devinfo *dinfo = device_get_ivars(child);
2360 	pcicfgregs *cfg = &dinfo->cfg;
2361 	uint16_t status;
2362 	int result, oldstate, highest, delay;
2363 
2364 	if (cfg->pp.pp_cap == 0)
2365 		return (EOPNOTSUPP);
2366 
2367 	/*
2368 	 * Optimize a no state change request away.  While it would be OK to
2369 	 * write to the hardware in theory, some devices have shown odd
2370 	 * behavior when going from D3 -> D3.
2371 	 */
2372 	oldstate = pci_get_powerstate(child);
2373 	if (oldstate == state)
2374 		return (0);
2375 
2376 	/*
2377 	 * The PCI power management specification states that after a state
2378 	 * transition between PCI power states, system software must
2379 	 * guarantee a minimal delay before the function accesses the device.
2380 	 * Compute the worst case delay that we need to guarantee before we
2381 	 * access the device.  Many devices will be responsive much more
2382 	 * quickly than this delay, but there are some that don't respond
2383 	 * instantly to state changes.  Transitions to/from D3 state require
2384 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2385 	 * is done below with DELAY rather than a sleeper function because
2386 	 * this function can be called from contexts where we cannot sleep.
2387 	 */
2388 	highest = (oldstate > state) ? oldstate : state;
2389 	if (highest == PCI_POWERSTATE_D3)
2390 	    delay = 10000;
2391 	else if (highest == PCI_POWERSTATE_D2)
2392 	    delay = 200;
2393 	else
2394 	    delay = 0;
2395 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2396 	    & ~PCIM_PSTAT_DMASK;
2397 	result = 0;
2398 	switch (state) {
2399 	case PCI_POWERSTATE_D0:
2400 		status |= PCIM_PSTAT_D0;
2401 		break;
2402 	case PCI_POWERSTATE_D1:
2403 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2404 			return (EOPNOTSUPP);
2405 		status |= PCIM_PSTAT_D1;
2406 		break;
2407 	case PCI_POWERSTATE_D2:
2408 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2409 			return (EOPNOTSUPP);
2410 		status |= PCIM_PSTAT_D2;
2411 		break;
2412 	case PCI_POWERSTATE_D3:
2413 		status |= PCIM_PSTAT_D3;
2414 		break;
2415 	default:
2416 		return (EINVAL);
2417 	}
2418 
2419 	if (bootverbose)
2420 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2421 		    state);
2422 
2423 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2424 	if (delay)
2425 		DELAY(delay);
2426 	return (0);
2427 }
2428 
2429 int
2430 pci_get_powerstate_method(device_t dev, device_t child)
2431 {
2432 	struct pci_devinfo *dinfo = device_get_ivars(child);
2433 	pcicfgregs *cfg = &dinfo->cfg;
2434 	uint16_t status;
2435 	int result;
2436 
2437 	if (cfg->pp.pp_cap != 0) {
2438 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2439 		switch (status & PCIM_PSTAT_DMASK) {
2440 		case PCIM_PSTAT_D0:
2441 			result = PCI_POWERSTATE_D0;
2442 			break;
2443 		case PCIM_PSTAT_D1:
2444 			result = PCI_POWERSTATE_D1;
2445 			break;
2446 		case PCIM_PSTAT_D2:
2447 			result = PCI_POWERSTATE_D2;
2448 			break;
2449 		case PCIM_PSTAT_D3:
2450 			result = PCI_POWERSTATE_D3;
2451 			break;
2452 		default:
2453 			result = PCI_POWERSTATE_UNKNOWN;
2454 			break;
2455 		}
2456 	} else {
2457 		/* No support, device is always at D0 */
2458 		result = PCI_POWERSTATE_D0;
2459 	}
2460 	return (result);
2461 }
2462 
2463 /*
2464  * Some convenience functions for PCI device drivers.
2465  */
2466 
2467 static __inline void
2468 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2469 {
2470 	uint16_t	command;
2471 
2472 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2473 	command |= bit;
2474 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2475 }
2476 
2477 static __inline void
2478 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2479 {
2480 	uint16_t	command;
2481 
2482 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2483 	command &= ~bit;
2484 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2485 }
2486 
2487 int
2488 pci_enable_busmaster_method(device_t dev, device_t child)
2489 {
2490 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2491 	return (0);
2492 }
2493 
2494 int
2495 pci_disable_busmaster_method(device_t dev, device_t child)
2496 {
2497 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2498 	return (0);
2499 }
2500 
2501 int
2502 pci_enable_io_method(device_t dev, device_t child, int space)
2503 {
2504 	uint16_t bit;
2505 
2506 	switch(space) {
2507 	case SYS_RES_IOPORT:
2508 		bit = PCIM_CMD_PORTEN;
2509 		break;
2510 	case SYS_RES_MEMORY:
2511 		bit = PCIM_CMD_MEMEN;
2512 		break;
2513 	default:
2514 		return (EINVAL);
2515 	}
2516 	pci_set_command_bit(dev, child, bit);
2517 	return (0);
2518 }
2519 
2520 int
2521 pci_disable_io_method(device_t dev, device_t child, int space)
2522 {
2523 	uint16_t bit;
2524 
2525 	switch(space) {
2526 	case SYS_RES_IOPORT:
2527 		bit = PCIM_CMD_PORTEN;
2528 		break;
2529 	case SYS_RES_MEMORY:
2530 		bit = PCIM_CMD_MEMEN;
2531 		break;
2532 	default:
2533 		return (EINVAL);
2534 	}
2535 	pci_clear_command_bit(dev, child, bit);
2536 	return (0);
2537 }
2538 
2539 /*
2540  * New style pci driver.  Parent device is either a pci-host-bridge or a
2541  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2542  */
2543 
2544 void
2545 pci_print_verbose(struct pci_devinfo *dinfo)
2546 {
2547 
2548 	if (bootverbose) {
2549 		pcicfgregs *cfg = &dinfo->cfg;
2550 
2551 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2552 		    cfg->vendor, cfg->device, cfg->revid);
2553 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2554 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2555 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2556 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2557 		    cfg->mfdev);
2558 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2559 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2560 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2561 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2562 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2563 		if (cfg->intpin > 0)
2564 			printf("\tintpin=%c, irq=%d\n",
2565 			    cfg->intpin +'a' -1, cfg->intline);
2566 		if (cfg->pp.pp_cap) {
2567 			uint16_t status;
2568 
2569 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2570 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2571 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2572 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2573 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2574 			    status & PCIM_PSTAT_DMASK);
2575 		}
2576 		if (cfg->msi.msi_location) {
2577 			int ctrl;
2578 
2579 			ctrl = cfg->msi.msi_ctrl;
2580 			printf("\tMSI supports %d message%s%s%s\n",
2581 			    cfg->msi.msi_msgnum,
2582 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2583 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2584 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2585 		}
2586 		if (cfg->msix.msix_location) {
2587 			printf("\tMSI-X supports %d message%s ",
2588 			    cfg->msix.msix_msgnum,
2589 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2590 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2591 				printf("in map 0x%x\n",
2592 				    cfg->msix.msix_table_bar);
2593 			else
2594 				printf("in maps 0x%x and 0x%x\n",
2595 				    cfg->msix.msix_table_bar,
2596 				    cfg->msix.msix_pba_bar);
2597 		}
2598 	}
2599 }
2600 
2601 static int
2602 pci_porten(device_t dev)
2603 {
2604 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2605 }
2606 
2607 static int
2608 pci_memen(device_t dev)
2609 {
2610 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2611 }
2612 
2613 static void
2614 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2615 {
2616 	struct pci_devinfo *dinfo;
2617 	pci_addr_t map, testval;
2618 	int ln2range;
2619 	uint16_t cmd;
2620 
2621 	/*
2622 	 * The device ROM BAR is special.  It is always a 32-bit
2623 	 * memory BAR.  Bit 0 is special and should not be set when
2624 	 * sizing the BAR.
2625 	 */
2626 	dinfo = device_get_ivars(dev);
2627 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2628 		map = pci_read_config(dev, reg, 4);
2629 		pci_write_config(dev, reg, 0xfffffffe, 4);
2630 		testval = pci_read_config(dev, reg, 4);
2631 		pci_write_config(dev, reg, map, 4);
2632 		*mapp = map;
2633 		*testvalp = testval;
2634 		return;
2635 	}
2636 
2637 	map = pci_read_config(dev, reg, 4);
2638 	ln2range = pci_maprange(map);
2639 	if (ln2range == 64)
2640 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2641 
2642 	/*
2643 	 * Disable decoding via the command register before
2644 	 * determining the BAR's length since we will be placing it in
2645 	 * a weird state.
2646 	 */
2647 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2648 	pci_write_config(dev, PCIR_COMMAND,
2649 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2650 
2651 	/*
2652 	 * Determine the BAR's length by writing all 1's.  The bottom
2653 	 * log_2(size) bits of the BAR will stick as 0 when we read
2654 	 * the value back.
2655 	 */
2656 	pci_write_config(dev, reg, 0xffffffff, 4);
2657 	testval = pci_read_config(dev, reg, 4);
2658 	if (ln2range == 64) {
2659 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2660 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2661 	}
2662 
2663 	/*
2664 	 * Restore the original value of the BAR.  We may have reprogrammed
2665 	 * the BAR of the low-level console device and when booting verbose,
2666 	 * we need the console device addressable.
2667 	 */
2668 	pci_write_config(dev, reg, map, 4);
2669 	if (ln2range == 64)
2670 		pci_write_config(dev, reg + 4, map >> 32, 4);
2671 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2672 
2673 	*mapp = map;
2674 	*testvalp = testval;
2675 }
2676 
2677 static void
2678 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2679 {
2680 	struct pci_devinfo *dinfo;
2681 	int ln2range;
2682 
2683 	/* The device ROM BAR is always a 32-bit memory BAR. */
2684 	dinfo = device_get_ivars(dev);
2685 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2686 		ln2range = 32;
2687 	else
2688 		ln2range = pci_maprange(pm->pm_value);
2689 	pci_write_config(dev, pm->pm_reg, base, 4);
2690 	if (ln2range == 64)
2691 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2692 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2693 	if (ln2range == 64)
2694 		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2695 		    pm->pm_reg + 4, 4) << 32;
2696 }
2697 
2698 struct pci_map *
2699 pci_find_bar(device_t dev, int reg)
2700 {
2701 	struct pci_devinfo *dinfo;
2702 	struct pci_map *pm;
2703 
2704 	dinfo = device_get_ivars(dev);
2705 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2706 		if (pm->pm_reg == reg)
2707 			return (pm);
2708 	}
2709 	return (NULL);
2710 }
2711 
2712 int
2713 pci_bar_enabled(device_t dev, struct pci_map *pm)
2714 {
2715 	struct pci_devinfo *dinfo;
2716 	uint16_t cmd;
2717 
2718 	dinfo = device_get_ivars(dev);
2719 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2720 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2721 		return (0);
2722 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2723 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2724 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2725 	else
2726 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2727 }
2728 
2729 static struct pci_map *
2730 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2731 {
2732 	struct pci_devinfo *dinfo;
2733 	struct pci_map *pm, *prev;
2734 
2735 	dinfo = device_get_ivars(dev);
2736 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2737 	pm->pm_reg = reg;
2738 	pm->pm_value = value;
2739 	pm->pm_size = size;
2740 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2741 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2742 		    reg));
2743 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2744 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2745 			break;
2746 	}
2747 	if (prev != NULL)
2748 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2749 	else
2750 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2751 	return (pm);
2752 }
2753 
2754 static void
2755 pci_restore_bars(device_t dev)
2756 {
2757 	struct pci_devinfo *dinfo;
2758 	struct pci_map *pm;
2759 	int ln2range;
2760 
2761 	dinfo = device_get_ivars(dev);
2762 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2763 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2764 			ln2range = 32;
2765 		else
2766 			ln2range = pci_maprange(pm->pm_value);
2767 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2768 		if (ln2range == 64)
2769 			pci_write_config(dev, pm->pm_reg + 4,
2770 			    pm->pm_value >> 32, 4);
2771 	}
2772 }
2773 
2774 /*
2775  * Add a resource based on a pci map register. Return 1 if the map
2776  * register is a 32bit map register or 2 if it is a 64bit register.
2777  */
2778 static int
2779 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2780     int force, int prefetch)
2781 {
2782 	struct pci_map *pm;
2783 	pci_addr_t base, map, testval;
2784 	pci_addr_t start, end, count;
2785 	int barlen, basezero, flags, maprange, mapsize, type;
2786 	uint16_t cmd;
2787 	struct resource *res;
2788 
2789 	/*
2790 	 * The BAR may already exist if the device is a CardBus card
2791 	 * whose CIS is stored in this BAR.
2792 	 */
2793 	pm = pci_find_bar(dev, reg);
2794 	if (pm != NULL) {
2795 		maprange = pci_maprange(pm->pm_value);
2796 		barlen = maprange == 64 ? 2 : 1;
2797 		return (barlen);
2798 	}
2799 
2800 	pci_read_bar(dev, reg, &map, &testval);
2801 	if (PCI_BAR_MEM(map)) {
2802 		type = SYS_RES_MEMORY;
2803 		if (map & PCIM_BAR_MEM_PREFETCH)
2804 			prefetch = 1;
2805 	} else
2806 		type = SYS_RES_IOPORT;
2807 	mapsize = pci_mapsize(testval);
2808 	base = pci_mapbase(map);
2809 #ifdef __PCI_BAR_ZERO_VALID
2810 	basezero = 0;
2811 #else
2812 	basezero = base == 0;
2813 #endif
2814 	maprange = pci_maprange(map);
2815 	barlen = maprange == 64 ? 2 : 1;
2816 
2817 	/*
2818 	 * For I/O registers, if bottom bit is set, and the next bit up
2819 	 * isn't clear, we know we have a BAR that doesn't conform to the
2820 	 * spec, so ignore it.  Also, sanity check the size of the data
2821 	 * areas to the type of memory involved.  Memory must be at least
2822 	 * 16 bytes in size, while I/O ranges must be at least 4.
2823 	 */
2824 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2825 		return (barlen);
2826 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2827 	    (type == SYS_RES_IOPORT && mapsize < 2))
2828 		return (barlen);
2829 
2830 	/* Save a record of this BAR. */
2831 	pm = pci_add_bar(dev, reg, map, mapsize);
2832 	if (bootverbose) {
2833 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2834 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2835 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2836 			printf(", port disabled\n");
2837 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2838 			printf(", memory disabled\n");
2839 		else
2840 			printf(", enabled\n");
2841 	}
2842 
2843 	/*
2844 	 * If base is 0, then we have problems if this architecture does
2845 	 * not allow that.  It is best to ignore such entries for the
2846 	 * moment.  These will be allocated later if the driver specifically
2847 	 * requests them.  However, some removable busses look better when
2848 	 * all resources are allocated, so allow '0' to be overriden.
2849 	 *
2850 	 * Similarly treat maps whose values is the same as the test value
2851 	 * read back.  These maps have had all f's written to them by the
2852 	 * BIOS in an attempt to disable the resources.
2853 	 */
2854 	if (!force && (basezero || map == testval))
2855 		return (barlen);
2856 	if ((u_long)base != base) {
2857 		device_printf(bus,
2858 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2859 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2860 		    pci_get_function(dev), reg);
2861 		return (barlen);
2862 	}
2863 
2864 	/*
2865 	 * This code theoretically does the right thing, but has
2866 	 * undesirable side effects in some cases where peripherals
2867 	 * respond oddly to having these bits enabled.  Let the user
2868 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2869 	 * default).
2870 	 */
2871 	if (pci_enable_io_modes) {
2872 		/* Turn on resources that have been left off by a lazy BIOS */
2873 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2874 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2875 			cmd |= PCIM_CMD_PORTEN;
2876 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2877 		}
2878 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2879 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2880 			cmd |= PCIM_CMD_MEMEN;
2881 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2882 		}
2883 	} else {
2884 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2885 			return (barlen);
2886 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2887 			return (barlen);
2888 	}
2889 
2890 	count = (pci_addr_t)1 << mapsize;
2891 	flags = RF_ALIGNMENT_LOG2(mapsize);
2892 	if (prefetch)
2893 		flags |= RF_PREFETCHABLE;
2894 	if (basezero || base == pci_mapbase(testval) || pci_clear_bars) {
2895 		start = 0;	/* Let the parent decide. */
2896 		end = ~0ul;
2897 	} else {
2898 		start = base;
2899 		end = base + count - 1;
2900 	}
2901 	resource_list_add(rl, type, reg, start, end, count);
2902 
2903 	/*
2904 	 * Try to allocate the resource for this BAR from our parent
2905 	 * so that this resource range is already reserved.  The
2906 	 * driver for this device will later inherit this resource in
2907 	 * pci_alloc_resource().
2908 	 */
2909 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2910 	    flags);
2911 	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0ul)) {
2912 		/*
2913 		 * If the allocation fails, try to allocate a resource for
2914 		 * this BAR using any available range.  The firmware felt
2915 		 * it was important enough to assign a resource, so don't
2916 		 * disable decoding if we can help it.
2917 		 */
2918 		resource_list_delete(rl, type, reg);
2919 		resource_list_add(rl, type, reg, 0, ~0ul, count);
2920 		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0ul,
2921 		    count, flags);
2922 	}
2923 	if (res == NULL) {
2924 		/*
2925 		 * If the allocation fails, delete the resource list entry
2926 		 * and disable decoding for this device.
2927 		 *
2928 		 * If the driver requests this resource in the future,
2929 		 * pci_reserve_map() will try to allocate a fresh
2930 		 * resource range.
2931 		 */
2932 		resource_list_delete(rl, type, reg);
2933 		pci_disable_io(dev, type);
2934 		if (bootverbose)
2935 			device_printf(bus,
2936 			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
2937 			    pci_get_domain(dev), pci_get_bus(dev),
2938 			    pci_get_slot(dev), pci_get_function(dev), reg);
2939 	} else {
2940 		start = rman_get_start(res);
2941 		pci_write_bar(dev, pm, start);
2942 	}
2943 	return (barlen);
2944 }
2945 
2946 /*
2947  * For ATA devices we need to decide early what addressing mode to use.
2948  * Legacy demands that the primary and secondary ATA ports sits on the
2949  * same addresses that old ISA hardware did. This dictates that we use
2950  * those addresses and ignore the BAR's if we cannot set PCI native
2951  * addressing mode.
2952  */
2953 static void
2954 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2955     uint32_t prefetchmask)
2956 {
2957 	struct resource *r;
2958 	int rid, type, progif;
2959 #if 0
2960 	/* if this device supports PCI native addressing use it */
2961 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2962 	if ((progif & 0x8a) == 0x8a) {
2963 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2964 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2965 			printf("Trying ATA native PCI addressing mode\n");
2966 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2967 		}
2968 	}
2969 #endif
2970 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2971 	type = SYS_RES_IOPORT;
2972 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2973 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2974 		    prefetchmask & (1 << 0));
2975 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2976 		    prefetchmask & (1 << 1));
2977 	} else {
2978 		rid = PCIR_BAR(0);
2979 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2980 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2981 		    0x1f7, 8, 0);
2982 		rid = PCIR_BAR(1);
2983 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2984 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2985 		    0x3f6, 1, 0);
2986 	}
2987 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2988 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2989 		    prefetchmask & (1 << 2));
2990 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2991 		    prefetchmask & (1 << 3));
2992 	} else {
2993 		rid = PCIR_BAR(2);
2994 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2995 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2996 		    0x177, 8, 0);
2997 		rid = PCIR_BAR(3);
2998 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2999 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
3000 		    0x376, 1, 0);
3001 	}
3002 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
3003 	    prefetchmask & (1 << 4));
3004 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
3005 	    prefetchmask & (1 << 5));
3006 }
3007 
3008 static void
3009 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
3010 {
3011 	struct pci_devinfo *dinfo = device_get_ivars(dev);
3012 	pcicfgregs *cfg = &dinfo->cfg;
3013 	char tunable_name[64];
3014 	int irq;
3015 
3016 	/* Has to have an intpin to have an interrupt. */
3017 	if (cfg->intpin == 0)
3018 		return;
3019 
3020 	/* Let the user override the IRQ with a tunable. */
3021 	irq = PCI_INVALID_IRQ;
3022 	snprintf(tunable_name, sizeof(tunable_name),
3023 	    "hw.pci%d.%d.%d.INT%c.irq",
3024 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
3025 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
3026 		irq = PCI_INVALID_IRQ;
3027 
3028 	/*
3029 	 * If we didn't get an IRQ via the tunable, then we either use the
3030 	 * IRQ value in the intline register or we ask the bus to route an
3031 	 * interrupt for us.  If force_route is true, then we only use the
3032 	 * value in the intline register if the bus was unable to assign an
3033 	 * IRQ.
3034 	 */
3035 	if (!PCI_INTERRUPT_VALID(irq)) {
3036 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
3037 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
3038 		if (!PCI_INTERRUPT_VALID(irq))
3039 			irq = cfg->intline;
3040 	}
3041 
3042 	/* If after all that we don't have an IRQ, just bail. */
3043 	if (!PCI_INTERRUPT_VALID(irq))
3044 		return;
3045 
3046 	/* Update the config register if it changed. */
3047 	if (irq != cfg->intline) {
3048 		cfg->intline = irq;
3049 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
3050 	}
3051 
3052 	/* Add this IRQ as rid 0 interrupt resource. */
3053 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3054 }
3055 
3056 /* Perform early OHCI takeover from SMM. */
3057 static void
3058 ohci_early_takeover(device_t self)
3059 {
3060 	struct resource *res;
3061 	uint32_t ctl;
3062 	int rid;
3063 	int i;
3064 
3065 	rid = PCIR_BAR(0);
3066 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3067 	if (res == NULL)
3068 		return;
3069 
3070 	ctl = bus_read_4(res, OHCI_CONTROL);
3071 	if (ctl & OHCI_IR) {
3072 		if (bootverbose)
3073 			printf("ohci early: "
3074 			    "SMM active, request owner change\n");
3075 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3076 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3077 			DELAY(1000);
3078 			ctl = bus_read_4(res, OHCI_CONTROL);
3079 		}
3080 		if (ctl & OHCI_IR) {
3081 			if (bootverbose)
3082 				printf("ohci early: "
3083 				    "SMM does not respond, resetting\n");
3084 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3085 		}
3086 		/* Disable interrupts */
3087 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3088 	}
3089 
3090 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3091 }
3092 
3093 /* Perform early UHCI takeover from SMM. */
3094 static void
3095 uhci_early_takeover(device_t self)
3096 {
3097 	struct resource *res;
3098 	int rid;
3099 
3100 	/*
3101 	 * Set the PIRQD enable bit and switch off all the others. We don't
3102 	 * want legacy support to interfere with us XXX Does this also mean
3103 	 * that the BIOS won't touch the keyboard anymore if it is connected
3104 	 * to the ports of the root hub?
3105 	 */
3106 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3107 
3108 	/* Disable interrupts */
3109 	rid = PCI_UHCI_BASE_REG;
3110 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3111 	if (res != NULL) {
3112 		bus_write_2(res, UHCI_INTR, 0);
3113 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3114 	}
3115 }
3116 
3117 /* Perform early EHCI takeover from SMM. */
3118 static void
3119 ehci_early_takeover(device_t self)
3120 {
3121 	struct resource *res;
3122 	uint32_t cparams;
3123 	uint32_t eec;
3124 	uint8_t eecp;
3125 	uint8_t bios_sem;
3126 	uint8_t offs;
3127 	int rid;
3128 	int i;
3129 
3130 	rid = PCIR_BAR(0);
3131 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3132 	if (res == NULL)
3133 		return;
3134 
3135 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3136 
3137 	/* Synchronise with the BIOS if it owns the controller. */
3138 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3139 	    eecp = EHCI_EECP_NEXT(eec)) {
3140 		eec = pci_read_config(self, eecp, 4);
3141 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3142 			continue;
3143 		}
3144 		bios_sem = pci_read_config(self, eecp +
3145 		    EHCI_LEGSUP_BIOS_SEM, 1);
3146 		if (bios_sem == 0) {
3147 			continue;
3148 		}
3149 		if (bootverbose)
3150 			printf("ehci early: "
3151 			    "SMM active, request owner change\n");
3152 
3153 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3154 
3155 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3156 			DELAY(1000);
3157 			bios_sem = pci_read_config(self, eecp +
3158 			    EHCI_LEGSUP_BIOS_SEM, 1);
3159 		}
3160 
3161 		if (bios_sem != 0) {
3162 			if (bootverbose)
3163 				printf("ehci early: "
3164 				    "SMM does not respond\n");
3165 		}
3166 		/* Disable interrupts */
3167 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3168 		bus_write_4(res, offs + EHCI_USBINTR, 0);
3169 	}
3170 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3171 }
3172 
3173 /* Perform early XHCI takeover from SMM. */
3174 static void
3175 xhci_early_takeover(device_t self)
3176 {
3177 	struct resource *res;
3178 	uint32_t cparams;
3179 	uint32_t eec;
3180 	uint8_t eecp;
3181 	uint8_t bios_sem;
3182 	uint8_t offs;
3183 	int rid;
3184 	int i;
3185 
3186 	rid = PCIR_BAR(0);
3187 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3188 	if (res == NULL)
3189 		return;
3190 
3191 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3192 
3193 	eec = -1;
3194 
3195 	/* Synchronise with the BIOS if it owns the controller. */
3196 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3197 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3198 		eec = bus_read_4(res, eecp);
3199 
3200 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3201 			continue;
3202 
3203 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3204 		if (bios_sem == 0)
3205 			continue;
3206 
3207 		if (bootverbose)
3208 			printf("xhci early: "
3209 			    "SMM active, request owner change\n");
3210 
3211 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3212 
3213 		/* wait a maximum of 5 second */
3214 
3215 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3216 			DELAY(1000);
3217 			bios_sem = bus_read_1(res, eecp +
3218 			    XHCI_XECP_BIOS_SEM);
3219 		}
3220 
3221 		if (bios_sem != 0) {
3222 			if (bootverbose)
3223 				printf("xhci early: "
3224 				    "SMM does not respond\n");
3225 		}
3226 
3227 		/* Disable interrupts */
3228 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3229 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3230 		bus_read_4(res, offs + XHCI_USBSTS);
3231 	}
3232 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3233 }
3234 
3235 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3236 static void
3237 pci_reserve_secbus(device_t bus, device_t dev, pcicfgregs *cfg,
3238     struct resource_list *rl)
3239 {
3240 	struct resource *res;
3241 	char *cp;
3242 	u_long start, end, count;
3243 	int rid, sec_bus, sec_reg, sub_bus, sub_reg, sup_bus;
3244 
3245 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3246 	case PCIM_HDRTYPE_BRIDGE:
3247 		sec_reg = PCIR_SECBUS_1;
3248 		sub_reg = PCIR_SUBBUS_1;
3249 		break;
3250 	case PCIM_HDRTYPE_CARDBUS:
3251 		sec_reg = PCIR_SECBUS_2;
3252 		sub_reg = PCIR_SUBBUS_2;
3253 		break;
3254 	default:
3255 		return;
3256 	}
3257 
3258 	/*
3259 	 * If the existing bus range is valid, attempt to reserve it
3260 	 * from our parent.  If this fails for any reason, clear the
3261 	 * secbus and subbus registers.
3262 	 *
3263 	 * XXX: Should we reset sub_bus to sec_bus if it is < sec_bus?
3264 	 * This would at least preserve the existing sec_bus if it is
3265 	 * valid.
3266 	 */
3267 	sec_bus = PCI_READ_CONFIG(bus, dev, sec_reg, 1);
3268 	sub_bus = PCI_READ_CONFIG(bus, dev, sub_reg, 1);
3269 
3270 	/* Quirk handling. */
3271 	switch (pci_get_devid(dev)) {
3272 	case 0x12258086:		/* Intel 82454KX/GX (Orion) */
3273 		sup_bus = pci_read_config(dev, 0x41, 1);
3274 		if (sup_bus != 0xff) {
3275 			sec_bus = sup_bus + 1;
3276 			sub_bus = sup_bus + 1;
3277 			PCI_WRITE_CONFIG(bus, dev, sec_reg, sec_bus, 1);
3278 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3279 		}
3280 		break;
3281 
3282 	case 0x00dd10de:
3283 		/* Compaq R3000 BIOS sets wrong subordinate bus number. */
3284 		if ((cp = kern_getenv("smbios.planar.maker")) == NULL)
3285 			break;
3286 		if (strncmp(cp, "Compal", 6) != 0) {
3287 			freeenv(cp);
3288 			break;
3289 		}
3290 		freeenv(cp);
3291 		if ((cp = kern_getenv("smbios.planar.product")) == NULL)
3292 			break;
3293 		if (strncmp(cp, "08A0", 4) != 0) {
3294 			freeenv(cp);
3295 			break;
3296 		}
3297 		freeenv(cp);
3298 		if (sub_bus < 0xa) {
3299 			sub_bus = 0xa;
3300 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3301 		}
3302 		break;
3303 	}
3304 
3305 	if (bootverbose)
3306 		printf("\tsecbus=%d, subbus=%d\n", sec_bus, sub_bus);
3307 	if (sec_bus > 0 && sub_bus >= sec_bus) {
3308 		start = sec_bus;
3309 		end = sub_bus;
3310 		count = end - start + 1;
3311 
3312 		resource_list_add(rl, PCI_RES_BUS, 0, 0ul, ~0ul, count);
3313 
3314 		/*
3315 		 * If requested, clear secondary bus registers in
3316 		 * bridge devices to force a complete renumbering
3317 		 * rather than reserving the existing range.  However,
3318 		 * preserve the existing size.
3319 		 */
3320 		if (pci_clear_buses)
3321 			goto clear;
3322 
3323 		rid = 0;
3324 		res = resource_list_reserve(rl, bus, dev, PCI_RES_BUS, &rid,
3325 		    start, end, count, 0);
3326 		if (res != NULL)
3327 			return;
3328 
3329 		if (bootverbose)
3330 			device_printf(bus,
3331 			    "pci%d:%d:%d:%d secbus failed to allocate\n",
3332 			    pci_get_domain(dev), pci_get_bus(dev),
3333 			    pci_get_slot(dev), pci_get_function(dev));
3334 	}
3335 
3336 clear:
3337 	PCI_WRITE_CONFIG(bus, dev, sec_reg, 0, 1);
3338 	PCI_WRITE_CONFIG(bus, dev, sub_reg, 0, 1);
3339 }
3340 
3341 static struct resource *
3342 pci_alloc_secbus(device_t dev, device_t child, int *rid, u_long start,
3343     u_long end, u_long count, u_int flags)
3344 {
3345 	struct pci_devinfo *dinfo;
3346 	pcicfgregs *cfg;
3347 	struct resource_list *rl;
3348 	struct resource *res;
3349 	int sec_reg, sub_reg;
3350 
3351 	dinfo = device_get_ivars(child);
3352 	cfg = &dinfo->cfg;
3353 	rl = &dinfo->resources;
3354 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3355 	case PCIM_HDRTYPE_BRIDGE:
3356 		sec_reg = PCIR_SECBUS_1;
3357 		sub_reg = PCIR_SUBBUS_1;
3358 		break;
3359 	case PCIM_HDRTYPE_CARDBUS:
3360 		sec_reg = PCIR_SECBUS_2;
3361 		sub_reg = PCIR_SUBBUS_2;
3362 		break;
3363 	default:
3364 		return (NULL);
3365 	}
3366 
3367 	if (*rid != 0)
3368 		return (NULL);
3369 
3370 	if (resource_list_find(rl, PCI_RES_BUS, *rid) == NULL)
3371 		resource_list_add(rl, PCI_RES_BUS, *rid, start, end, count);
3372 	if (!resource_list_reserved(rl, PCI_RES_BUS, *rid)) {
3373 		res = resource_list_reserve(rl, dev, child, PCI_RES_BUS, rid,
3374 		    start, end, count, flags & ~RF_ACTIVE);
3375 		if (res == NULL) {
3376 			resource_list_delete(rl, PCI_RES_BUS, *rid);
3377 			device_printf(child, "allocating %lu bus%s failed\n",
3378 			    count, count == 1 ? "" : "es");
3379 			return (NULL);
3380 		}
3381 		if (bootverbose)
3382 			device_printf(child,
3383 			    "Lazy allocation of %lu bus%s at %lu\n", count,
3384 			    count == 1 ? "" : "es", rman_get_start(res));
3385 		PCI_WRITE_CONFIG(dev, child, sec_reg, rman_get_start(res), 1);
3386 		PCI_WRITE_CONFIG(dev, child, sub_reg, rman_get_end(res), 1);
3387 	}
3388 	return (resource_list_alloc(rl, dev, child, PCI_RES_BUS, rid, start,
3389 	    end, count, flags));
3390 }
3391 #endif
3392 
3393 void
3394 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3395 {
3396 	struct pci_devinfo *dinfo;
3397 	pcicfgregs *cfg;
3398 	struct resource_list *rl;
3399 	const struct pci_quirk *q;
3400 	uint32_t devid;
3401 	int i;
3402 
3403 	dinfo = device_get_ivars(dev);
3404 	cfg = &dinfo->cfg;
3405 	rl = &dinfo->resources;
3406 	devid = (cfg->device << 16) | cfg->vendor;
3407 
3408 	/* ATA devices needs special map treatment */
3409 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3410 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3411 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3412 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3413 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3414 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3415 	else
3416 		for (i = 0; i < cfg->nummaps;) {
3417 			/*
3418 			 * Skip quirked resources.
3419 			 */
3420 			for (q = &pci_quirks[0]; q->devid != 0; q++)
3421 				if (q->devid == devid &&
3422 				    q->type == PCI_QUIRK_UNMAP_REG &&
3423 				    q->arg1 == PCIR_BAR(i))
3424 					break;
3425 			if (q->devid != 0) {
3426 				i++;
3427 				continue;
3428 			}
3429 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3430 			    prefetchmask & (1 << i));
3431 		}
3432 
3433 	/*
3434 	 * Add additional, quirked resources.
3435 	 */
3436 	for (q = &pci_quirks[0]; q->devid != 0; q++)
3437 		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3438 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3439 
3440 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3441 #ifdef __PCI_REROUTE_INTERRUPT
3442 		/*
3443 		 * Try to re-route interrupts. Sometimes the BIOS or
3444 		 * firmware may leave bogus values in these registers.
3445 		 * If the re-route fails, then just stick with what we
3446 		 * have.
3447 		 */
3448 		pci_assign_interrupt(bus, dev, 1);
3449 #else
3450 		pci_assign_interrupt(bus, dev, 0);
3451 #endif
3452 	}
3453 
3454 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3455 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3456 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3457 			xhci_early_takeover(dev);
3458 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3459 			ehci_early_takeover(dev);
3460 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3461 			ohci_early_takeover(dev);
3462 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3463 			uhci_early_takeover(dev);
3464 	}
3465 
3466 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3467 	/*
3468 	 * Reserve resources for secondary bus ranges behind bridge
3469 	 * devices.
3470 	 */
3471 	pci_reserve_secbus(bus, dev, cfg, rl);
3472 #endif
3473 }
3474 
3475 static struct pci_devinfo *
3476 pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
3477     int slot, int func, size_t dinfo_size)
3478 {
3479 	struct pci_devinfo *dinfo;
3480 
3481 	dinfo = pci_read_device(pcib, domain, busno, slot, func, dinfo_size);
3482 	if (dinfo != NULL)
3483 		pci_add_child(dev, dinfo);
3484 
3485 	return (dinfo);
3486 }
3487 
3488 void
3489 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3490 {
3491 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3492 	device_t pcib = device_get_parent(dev);
3493 	struct pci_devinfo *dinfo;
3494 	int maxslots;
3495 	int s, f, pcifunchigh;
3496 	uint8_t hdrtype;
3497 	int first_func;
3498 
3499 	/*
3500 	 * Try to detect a device at slot 0, function 0.  If it exists, try to
3501 	 * enable ARI.  We must enable ARI before detecting the rest of the
3502 	 * functions on this bus as ARI changes the set of slots and functions
3503 	 * that are legal on this bus.
3504 	 */
3505 	dinfo = pci_identify_function(pcib, dev, domain, busno, 0, 0,
3506 	    dinfo_size);
3507 	if (dinfo != NULL && pci_enable_ari)
3508 		PCIB_TRY_ENABLE_ARI(pcib, dinfo->cfg.dev);
3509 
3510 	/*
3511 	 * Start looking for new devices on slot 0 at function 1 because we
3512 	 * just identified the device at slot 0, function 0.
3513 	 */
3514 	first_func = 1;
3515 
3516 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3517 	    ("dinfo_size too small"));
3518 	maxslots = PCIB_MAXSLOTS(pcib);
3519 	for (s = 0; s <= maxslots; s++, first_func = 0) {
3520 		pcifunchigh = 0;
3521 		f = 0;
3522 		DELAY(1);
3523 		hdrtype = REG(PCIR_HDRTYPE, 1);
3524 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3525 			continue;
3526 		if (hdrtype & PCIM_MFDEV)
3527 			pcifunchigh = PCIB_MAXFUNCS(pcib);
3528 		for (f = first_func; f <= pcifunchigh; f++)
3529 			pci_identify_function(pcib, dev, domain, busno, s, f,
3530 			    dinfo_size);
3531 	}
3532 #undef REG
3533 }
3534 
3535 void
3536 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3537 {
3538 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3539 	device_set_ivars(dinfo->cfg.dev, dinfo);
3540 	resource_list_init(&dinfo->resources);
3541 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3542 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3543 	pci_print_verbose(dinfo);
3544 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3545 	pci_child_added(dinfo->cfg.dev);
3546 }
3547 
3548 void
3549 pci_child_added_method(device_t dev, device_t child)
3550 {
3551 
3552 }
3553 
3554 static int
3555 pci_probe(device_t dev)
3556 {
3557 
3558 	device_set_desc(dev, "PCI bus");
3559 
3560 	/* Allow other subclasses to override this driver. */
3561 	return (BUS_PROBE_GENERIC);
3562 }
3563 
3564 int
3565 pci_attach_common(device_t dev)
3566 {
3567 	struct pci_softc *sc;
3568 	int busno, domain;
3569 #ifdef PCI_DMA_BOUNDARY
3570 	int error, tag_valid;
3571 #endif
3572 #ifdef PCI_RES_BUS
3573 	int rid;
3574 #endif
3575 
3576 	sc = device_get_softc(dev);
3577 	domain = pcib_get_domain(dev);
3578 	busno = pcib_get_bus(dev);
3579 #ifdef PCI_RES_BUS
3580 	rid = 0;
3581 	sc->sc_bus = bus_alloc_resource(dev, PCI_RES_BUS, &rid, busno, busno,
3582 	    1, 0);
3583 	if (sc->sc_bus == NULL) {
3584 		device_printf(dev, "failed to allocate bus number\n");
3585 		return (ENXIO);
3586 	}
3587 #endif
3588 	if (bootverbose)
3589 		device_printf(dev, "domain=%d, physical bus=%d\n",
3590 		    domain, busno);
3591 #ifdef PCI_DMA_BOUNDARY
3592 	tag_valid = 0;
3593 	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
3594 	    devclass_find("pci")) {
3595 		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
3596 		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3597 		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
3598 		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
3599 		if (error)
3600 			device_printf(dev, "Failed to create DMA tag: %d\n",
3601 			    error);
3602 		else
3603 			tag_valid = 1;
3604 	}
3605 	if (!tag_valid)
3606 #endif
3607 		sc->sc_dma_tag = bus_get_dma_tag(dev);
3608 	return (0);
3609 }
3610 
3611 static int
3612 pci_attach(device_t dev)
3613 {
3614 	int busno, domain, error;
3615 
3616 	error = pci_attach_common(dev);
3617 	if (error)
3618 		return (error);
3619 
3620 	/*
3621 	 * Since there can be multiple independantly numbered PCI
3622 	 * busses on systems with multiple PCI domains, we can't use
3623 	 * the unit number to decide which bus we are probing. We ask
3624 	 * the parent pcib what our domain and bus numbers are.
3625 	 */
3626 	domain = pcib_get_domain(dev);
3627 	busno = pcib_get_bus(dev);
3628 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3629 	return (bus_generic_attach(dev));
3630 }
3631 
3632 #ifdef PCI_RES_BUS
3633 static int
3634 pci_detach(device_t dev)
3635 {
3636 	struct pci_softc *sc;
3637 	int error;
3638 
3639 	error = bus_generic_detach(dev);
3640 	if (error)
3641 		return (error);
3642 	sc = device_get_softc(dev);
3643 	return (bus_release_resource(dev, PCI_RES_BUS, 0, sc->sc_bus));
3644 }
3645 #endif
3646 
3647 static void
3648 pci_set_power_child(device_t dev, device_t child, int state)
3649 {
3650 	struct pci_devinfo *dinfo;
3651 	device_t pcib;
3652 	int dstate;
3653 
3654 	/*
3655 	 * Set the device to the given state.  If the firmware suggests
3656 	 * a different power state, use it instead.  If power management
3657 	 * is not present, the firmware is responsible for managing
3658 	 * device power.  Skip children who aren't attached since they
3659 	 * are handled separately.
3660 	 */
3661 	pcib = device_get_parent(dev);
3662 	dinfo = device_get_ivars(child);
3663 	dstate = state;
3664 	if (device_is_attached(child) &&
3665 	    PCIB_POWER_FOR_SLEEP(pcib, child, &dstate) == 0)
3666 		pci_set_powerstate(child, dstate);
3667 }
3668 
3669 int
3670 pci_suspend_child(device_t dev, device_t child)
3671 {
3672 	struct pci_devinfo *dinfo;
3673 	int error;
3674 
3675 	dinfo = device_get_ivars(child);
3676 
3677 	/*
3678 	 * Save the PCI configuration space for the child and set the
3679 	 * device in the appropriate power state for this sleep state.
3680 	 */
3681 	pci_cfg_save(child, dinfo, 0);
3682 
3683 	/* Suspend devices before potentially powering them down. */
3684 	error = bus_generic_suspend_child(dev, child);
3685 
3686 	if (error)
3687 		return (error);
3688 
3689 	if (pci_do_power_suspend)
3690 		pci_set_power_child(dev, child, PCI_POWERSTATE_D3);
3691 
3692 	return (0);
3693 }
3694 
3695 int
3696 pci_resume_child(device_t dev, device_t child)
3697 {
3698 	struct pci_devinfo *dinfo;
3699 
3700 	if (pci_do_power_resume)
3701 		pci_set_power_child(dev, child, PCI_POWERSTATE_D0);
3702 
3703 	dinfo = device_get_ivars(child);
3704 	pci_cfg_restore(child, dinfo);
3705 	if (!device_is_attached(child))
3706 		pci_cfg_save(child, dinfo, 1);
3707 
3708 	bus_generic_resume_child(dev, child);
3709 
3710 	return (0);
3711 }
3712 
3713 int
3714 pci_resume(device_t dev)
3715 {
3716 	device_t child, *devlist;
3717 	int error, i, numdevs;
3718 
3719 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3720 		return (error);
3721 
3722 	/*
3723 	 * Resume critical devices first, then everything else later.
3724 	 */
3725 	for (i = 0; i < numdevs; i++) {
3726 		child = devlist[i];
3727 		switch (pci_get_class(child)) {
3728 		case PCIC_DISPLAY:
3729 		case PCIC_MEMORY:
3730 		case PCIC_BRIDGE:
3731 		case PCIC_BASEPERIPH:
3732 			BUS_RESUME_CHILD(dev, child);
3733 			break;
3734 		}
3735 	}
3736 	for (i = 0; i < numdevs; i++) {
3737 		child = devlist[i];
3738 		switch (pci_get_class(child)) {
3739 		case PCIC_DISPLAY:
3740 		case PCIC_MEMORY:
3741 		case PCIC_BRIDGE:
3742 		case PCIC_BASEPERIPH:
3743 			break;
3744 		default:
3745 			BUS_RESUME_CHILD(dev, child);
3746 		}
3747 	}
3748 	free(devlist, M_TEMP);
3749 	return (0);
3750 }
3751 
3752 static void
3753 pci_load_vendor_data(void)
3754 {
3755 	caddr_t data;
3756 	void *ptr;
3757 	size_t sz;
3758 
3759 	data = preload_search_by_type("pci_vendor_data");
3760 	if (data != NULL) {
3761 		ptr = preload_fetch_addr(data);
3762 		sz = preload_fetch_size(data);
3763 		if (ptr != NULL && sz != 0) {
3764 			pci_vendordata = ptr;
3765 			pci_vendordata_size = sz;
3766 			/* terminate the database */
3767 			pci_vendordata[pci_vendordata_size] = '\n';
3768 		}
3769 	}
3770 }
3771 
3772 void
3773 pci_driver_added(device_t dev, driver_t *driver)
3774 {
3775 	int numdevs;
3776 	device_t *devlist;
3777 	device_t child;
3778 	struct pci_devinfo *dinfo;
3779 	int i;
3780 
3781 	if (bootverbose)
3782 		device_printf(dev, "driver added\n");
3783 	DEVICE_IDENTIFY(driver, dev);
3784 	if (device_get_children(dev, &devlist, &numdevs) != 0)
3785 		return;
3786 	for (i = 0; i < numdevs; i++) {
3787 		child = devlist[i];
3788 		if (device_get_state(child) != DS_NOTPRESENT)
3789 			continue;
3790 		dinfo = device_get_ivars(child);
3791 		pci_print_verbose(dinfo);
3792 		if (bootverbose)
3793 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3794 		pci_cfg_restore(child, dinfo);
3795 		if (device_probe_and_attach(child) != 0)
3796 			pci_child_detached(dev, child);
3797 	}
3798 	free(devlist, M_TEMP);
3799 }
3800 
3801 int
3802 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3803     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3804 {
3805 	struct pci_devinfo *dinfo;
3806 	struct msix_table_entry *mte;
3807 	struct msix_vector *mv;
3808 	uint64_t addr;
3809 	uint32_t data;
3810 	void *cookie;
3811 	int error, rid;
3812 
3813 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3814 	    arg, &cookie);
3815 	if (error)
3816 		return (error);
3817 
3818 	/* If this is not a direct child, just bail out. */
3819 	if (device_get_parent(child) != dev) {
3820 		*cookiep = cookie;
3821 		return(0);
3822 	}
3823 
3824 	rid = rman_get_rid(irq);
3825 	if (rid == 0) {
3826 		/* Make sure that INTx is enabled */
3827 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3828 	} else {
3829 		/*
3830 		 * Check to see if the interrupt is MSI or MSI-X.
3831 		 * Ask our parent to map the MSI and give
3832 		 * us the address and data register values.
3833 		 * If we fail for some reason, teardown the
3834 		 * interrupt handler.
3835 		 */
3836 		dinfo = device_get_ivars(child);
3837 		if (dinfo->cfg.msi.msi_alloc > 0) {
3838 			if (dinfo->cfg.msi.msi_addr == 0) {
3839 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3840 			    ("MSI has handlers, but vectors not mapped"));
3841 				error = PCIB_MAP_MSI(device_get_parent(dev),
3842 				    child, rman_get_start(irq), &addr, &data);
3843 				if (error)
3844 					goto bad;
3845 				dinfo->cfg.msi.msi_addr = addr;
3846 				dinfo->cfg.msi.msi_data = data;
3847 			}
3848 			if (dinfo->cfg.msi.msi_handlers == 0)
3849 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3850 				    dinfo->cfg.msi.msi_data);
3851 			dinfo->cfg.msi.msi_handlers++;
3852 		} else {
3853 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3854 			    ("No MSI or MSI-X interrupts allocated"));
3855 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3856 			    ("MSI-X index too high"));
3857 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3858 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3859 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3860 			KASSERT(mv->mv_irq == rman_get_start(irq),
3861 			    ("IRQ mismatch"));
3862 			if (mv->mv_address == 0) {
3863 				KASSERT(mte->mte_handlers == 0,
3864 		    ("MSI-X table entry has handlers, but vector not mapped"));
3865 				error = PCIB_MAP_MSI(device_get_parent(dev),
3866 				    child, rman_get_start(irq), &addr, &data);
3867 				if (error)
3868 					goto bad;
3869 				mv->mv_address = addr;
3870 				mv->mv_data = data;
3871 			}
3872 			if (mte->mte_handlers == 0) {
3873 				pci_enable_msix(child, rid - 1, mv->mv_address,
3874 				    mv->mv_data);
3875 				pci_unmask_msix(child, rid - 1);
3876 			}
3877 			mte->mte_handlers++;
3878 		}
3879 
3880 		/*
3881 		 * Make sure that INTx is disabled if we are using MSI/MSI-X,
3882 		 * unless the device is affected by PCI_QUIRK_MSI_INTX_BUG,
3883 		 * in which case we "enable" INTx so MSI/MSI-X actually works.
3884 		 */
3885 		if (!pci_has_quirk(pci_get_devid(child),
3886 		    PCI_QUIRK_MSI_INTX_BUG))
3887 			pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3888 		else
3889 			pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3890 	bad:
3891 		if (error) {
3892 			(void)bus_generic_teardown_intr(dev, child, irq,
3893 			    cookie);
3894 			return (error);
3895 		}
3896 	}
3897 	*cookiep = cookie;
3898 	return (0);
3899 }
3900 
3901 int
3902 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3903     void *cookie)
3904 {
3905 	struct msix_table_entry *mte;
3906 	struct resource_list_entry *rle;
3907 	struct pci_devinfo *dinfo;
3908 	int error, rid;
3909 
3910 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3911 		return (EINVAL);
3912 
3913 	/* If this isn't a direct child, just bail out */
3914 	if (device_get_parent(child) != dev)
3915 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3916 
3917 	rid = rman_get_rid(irq);
3918 	if (rid == 0) {
3919 		/* Mask INTx */
3920 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3921 	} else {
3922 		/*
3923 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3924 		 * decrement the appropriate handlers count and mask the
3925 		 * MSI-X message, or disable MSI messages if the count
3926 		 * drops to 0.
3927 		 */
3928 		dinfo = device_get_ivars(child);
3929 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3930 		if (rle->res != irq)
3931 			return (EINVAL);
3932 		if (dinfo->cfg.msi.msi_alloc > 0) {
3933 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3934 			    ("MSI-X index too high"));
3935 			if (dinfo->cfg.msi.msi_handlers == 0)
3936 				return (EINVAL);
3937 			dinfo->cfg.msi.msi_handlers--;
3938 			if (dinfo->cfg.msi.msi_handlers == 0)
3939 				pci_disable_msi(child);
3940 		} else {
3941 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3942 			    ("No MSI or MSI-X interrupts allocated"));
3943 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3944 			    ("MSI-X index too high"));
3945 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3946 			if (mte->mte_handlers == 0)
3947 				return (EINVAL);
3948 			mte->mte_handlers--;
3949 			if (mte->mte_handlers == 0)
3950 				pci_mask_msix(child, rid - 1);
3951 		}
3952 	}
3953 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3954 	if (rid > 0)
3955 		KASSERT(error == 0,
3956 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3957 	return (error);
3958 }
3959 
3960 int
3961 pci_print_child(device_t dev, device_t child)
3962 {
3963 	struct pci_devinfo *dinfo;
3964 	struct resource_list *rl;
3965 	int retval = 0;
3966 
3967 	dinfo = device_get_ivars(child);
3968 	rl = &dinfo->resources;
3969 
3970 	retval += bus_print_child_header(dev, child);
3971 
3972 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3973 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3974 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3975 	if (device_get_flags(dev))
3976 		retval += printf(" flags %#x", device_get_flags(dev));
3977 
3978 	retval += printf(" at device %d.%d", pci_get_slot(child),
3979 	    pci_get_function(child));
3980 
3981 	retval += bus_print_child_domain(dev, child);
3982 	retval += bus_print_child_footer(dev, child);
3983 
3984 	return (retval);
3985 }
3986 
3987 static const struct
3988 {
3989 	int		class;
3990 	int		subclass;
3991 	int		report; /* 0 = bootverbose, 1 = always */
3992 	const char	*desc;
3993 } pci_nomatch_tab[] = {
3994 	{PCIC_OLD,		-1,			1, "old"},
3995 	{PCIC_OLD,		PCIS_OLD_NONVGA,	1, "non-VGA display device"},
3996 	{PCIC_OLD,		PCIS_OLD_VGA,		1, "VGA-compatible display device"},
3997 	{PCIC_STORAGE,		-1,			1, "mass storage"},
3998 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	1, "SCSI"},
3999 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	1, "ATA"},
4000 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	1, "floppy disk"},
4001 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	1, "IPI"},
4002 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	1, "RAID"},
4003 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	1, "ATA (ADMA)"},
4004 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	1, "SATA"},
4005 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	1, "SAS"},
4006 	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	1, "NVM"},
4007 	{PCIC_NETWORK,		-1,			1, "network"},
4008 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	1, "ethernet"},
4009 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	1, "token ring"},
4010 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	1, "fddi"},
4011 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	1, "ATM"},
4012 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	1, "ISDN"},
4013 	{PCIC_DISPLAY,		-1,			1, "display"},
4014 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	1, "VGA"},
4015 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	1, "XGA"},
4016 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	1, "3D"},
4017 	{PCIC_MULTIMEDIA,	-1,			1, "multimedia"},
4018 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	1, "video"},
4019 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	1, "audio"},
4020 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	1, "telephony"},
4021 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	1, "HDA"},
4022 	{PCIC_MEMORY,		-1,			1, "memory"},
4023 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	1, "RAM"},
4024 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	1, "flash"},
4025 	{PCIC_BRIDGE,		-1,			1, "bridge"},
4026 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	1, "HOST-PCI"},
4027 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	1, "PCI-ISA"},
4028 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	1, "PCI-EISA"},
4029 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	1, "PCI-MCA"},
4030 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	1, "PCI-PCI"},
4031 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	1, "PCI-PCMCIA"},
4032 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	1, "PCI-NuBus"},
4033 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	1, "PCI-CardBus"},
4034 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	1, "PCI-RACEway"},
4035 	{PCIC_SIMPLECOMM,	-1,			1, "simple comms"},
4036 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	1, "UART"},	/* could detect 16550 */
4037 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	1, "parallel port"},
4038 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	1, "multiport serial"},
4039 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	1, "generic modem"},
4040 	{PCIC_BASEPERIPH,	-1,			0, "base peripheral"},
4041 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	1, "interrupt controller"},
4042 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	1, "DMA controller"},
4043 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	1, "timer"},
4044 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	1, "realtime clock"},
4045 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	1, "PCI hot-plug controller"},
4046 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	1, "SD host controller"},
4047 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_IOMMU,	1, "IOMMU"},
4048 	{PCIC_INPUTDEV,		-1,			1, "input device"},
4049 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	1, "keyboard"},
4050 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,1, "digitizer"},
4051 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	1, "mouse"},
4052 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	1, "scanner"},
4053 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	1, "gameport"},
4054 	{PCIC_DOCKING,		-1,			1, "docking station"},
4055 	{PCIC_PROCESSOR,	-1,			1, "processor"},
4056 	{PCIC_SERIALBUS,	-1,			1, "serial bus"},
4057 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	1, "FireWire"},
4058 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	1, "AccessBus"},
4059 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	1, "SSA"},
4060 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	1, "USB"},
4061 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	1, "Fibre Channel"},
4062 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	0, "SMBus"},
4063 	{PCIC_WIRELESS,		-1,			1, "wireless controller"},
4064 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	1, "iRDA"},
4065 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	1, "IR"},
4066 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	1, "RF"},
4067 	{PCIC_INTELLIIO,	-1,			1, "intelligent I/O controller"},
4068 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	1, "I2O"},
4069 	{PCIC_SATCOM,		-1,			1, "satellite communication"},
4070 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		1, "sat TV"},
4071 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	1, "sat audio"},
4072 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	1, "sat voice"},
4073 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	1, "sat data"},
4074 	{PCIC_CRYPTO,		-1,			1, "encrypt/decrypt"},
4075 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	1, "network/computer crypto"},
4076 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	1, "entertainment crypto"},
4077 	{PCIC_DASP,		-1,			0, "dasp"},
4078 	{PCIC_DASP,		PCIS_DASP_DPIO,		1, "DPIO module"},
4079 	{0, 0, 0,		NULL}
4080 };
4081 
4082 void
4083 pci_probe_nomatch(device_t dev, device_t child)
4084 {
4085 	int i, report;
4086 	const char *cp, *scp;
4087 	char *device;
4088 
4089 	/*
4090 	 * Look for a listing for this device in a loaded device database.
4091 	 */
4092 	report = 1;
4093 	if ((device = pci_describe_device(child)) != NULL) {
4094 		device_printf(dev, "<%s>", device);
4095 		free(device, M_DEVBUF);
4096 	} else {
4097 		/*
4098 		 * Scan the class/subclass descriptions for a general
4099 		 * description.
4100 		 */
4101 		cp = "unknown";
4102 		scp = NULL;
4103 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
4104 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
4105 				if (pci_nomatch_tab[i].subclass == -1) {
4106 					cp = pci_nomatch_tab[i].desc;
4107 					report = pci_nomatch_tab[i].report;
4108 				} else if (pci_nomatch_tab[i].subclass ==
4109 				    pci_get_subclass(child)) {
4110 					scp = pci_nomatch_tab[i].desc;
4111 					report = pci_nomatch_tab[i].report;
4112 				}
4113 			}
4114 		}
4115 		if (report || bootverbose) {
4116 			device_printf(dev, "<%s%s%s>",
4117 			    cp ? cp : "",
4118 			    ((cp != NULL) && (scp != NULL)) ? ", " : "",
4119 			    scp ? scp : "");
4120 		}
4121 	}
4122 	if (report || bootverbose) {
4123 		printf(" at device %d.%d (no driver attached)\n",
4124 		    pci_get_slot(child), pci_get_function(child));
4125 	}
4126 	pci_cfg_save(child, device_get_ivars(child), 1);
4127 }
4128 
4129 void
4130 pci_child_detached(device_t dev, device_t child)
4131 {
4132 	struct pci_devinfo *dinfo;
4133 	struct resource_list *rl;
4134 
4135 	dinfo = device_get_ivars(child);
4136 	rl = &dinfo->resources;
4137 
4138 	/*
4139 	 * Have to deallocate IRQs before releasing any MSI messages and
4140 	 * have to release MSI messages before deallocating any memory
4141 	 * BARs.
4142 	 */
4143 	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
4144 		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
4145 	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
4146 		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
4147 		(void)pci_release_msi(child);
4148 	}
4149 	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
4150 		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
4151 	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
4152 		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
4153 #ifdef PCI_RES_BUS
4154 	if (resource_list_release_active(rl, dev, child, PCI_RES_BUS) != 0)
4155 		pci_printf(&dinfo->cfg, "Device leaked PCI bus numbers\n");
4156 #endif
4157 
4158 	pci_cfg_save(child, dinfo, 1);
4159 }
4160 
4161 /*
4162  * Parse the PCI device database, if loaded, and return a pointer to a
4163  * description of the device.
4164  *
4165  * The database is flat text formatted as follows:
4166  *
4167  * Any line not in a valid format is ignored.
4168  * Lines are terminated with newline '\n' characters.
4169  *
4170  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
4171  * the vendor name.
4172  *
4173  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
4174  * - devices cannot be listed without a corresponding VENDOR line.
4175  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
4176  * another TAB, then the device name.
4177  */
4178 
4179 /*
4180  * Assuming (ptr) points to the beginning of a line in the database,
4181  * return the vendor or device and description of the next entry.
4182  * The value of (vendor) or (device) inappropriate for the entry type
4183  * is set to -1.  Returns nonzero at the end of the database.
4184  *
4185  * Note that this is slightly unrobust in the face of corrupt data;
4186  * we attempt to safeguard against this by spamming the end of the
4187  * database with a newline when we initialise.
4188  */
4189 static int
4190 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
4191 {
4192 	char	*cp = *ptr;
4193 	int	left;
4194 
4195 	*device = -1;
4196 	*vendor = -1;
4197 	**desc = '\0';
4198 	for (;;) {
4199 		left = pci_vendordata_size - (cp - pci_vendordata);
4200 		if (left <= 0) {
4201 			*ptr = cp;
4202 			return(1);
4203 		}
4204 
4205 		/* vendor entry? */
4206 		if (*cp != '\t' &&
4207 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
4208 			break;
4209 		/* device entry? */
4210 		if (*cp == '\t' &&
4211 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
4212 			break;
4213 
4214 		/* skip to next line */
4215 		while (*cp != '\n' && left > 0) {
4216 			cp++;
4217 			left--;
4218 		}
4219 		if (*cp == '\n') {
4220 			cp++;
4221 			left--;
4222 		}
4223 	}
4224 	/* skip to next line */
4225 	while (*cp != '\n' && left > 0) {
4226 		cp++;
4227 		left--;
4228 	}
4229 	if (*cp == '\n' && left > 0)
4230 		cp++;
4231 	*ptr = cp;
4232 	return(0);
4233 }
4234 
4235 static char *
4236 pci_describe_device(device_t dev)
4237 {
4238 	int	vendor, device;
4239 	char	*desc, *vp, *dp, *line;
4240 
4241 	desc = vp = dp = NULL;
4242 
4243 	/*
4244 	 * If we have no vendor data, we can't do anything.
4245 	 */
4246 	if (pci_vendordata == NULL)
4247 		goto out;
4248 
4249 	/*
4250 	 * Scan the vendor data looking for this device
4251 	 */
4252 	line = pci_vendordata;
4253 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4254 		goto out;
4255 	for (;;) {
4256 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
4257 			goto out;
4258 		if (vendor == pci_get_vendor(dev))
4259 			break;
4260 	}
4261 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4262 		goto out;
4263 	for (;;) {
4264 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
4265 			*dp = 0;
4266 			break;
4267 		}
4268 		if (vendor != -1) {
4269 			*dp = 0;
4270 			break;
4271 		}
4272 		if (device == pci_get_device(dev))
4273 			break;
4274 	}
4275 	if (dp[0] == '\0')
4276 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
4277 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
4278 	    NULL)
4279 		sprintf(desc, "%s, %s", vp, dp);
4280 out:
4281 	if (vp != NULL)
4282 		free(vp, M_DEVBUF);
4283 	if (dp != NULL)
4284 		free(dp, M_DEVBUF);
4285 	return(desc);
4286 }
4287 
4288 int
4289 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
4290 {
4291 	struct pci_devinfo *dinfo;
4292 	pcicfgregs *cfg;
4293 
4294 	dinfo = device_get_ivars(child);
4295 	cfg = &dinfo->cfg;
4296 
4297 	switch (which) {
4298 	case PCI_IVAR_ETHADDR:
4299 		/*
4300 		 * The generic accessor doesn't deal with failure, so
4301 		 * we set the return value, then return an error.
4302 		 */
4303 		*((uint8_t **) result) = NULL;
4304 		return (EINVAL);
4305 	case PCI_IVAR_SUBVENDOR:
4306 		*result = cfg->subvendor;
4307 		break;
4308 	case PCI_IVAR_SUBDEVICE:
4309 		*result = cfg->subdevice;
4310 		break;
4311 	case PCI_IVAR_VENDOR:
4312 		*result = cfg->vendor;
4313 		break;
4314 	case PCI_IVAR_DEVICE:
4315 		*result = cfg->device;
4316 		break;
4317 	case PCI_IVAR_DEVID:
4318 		*result = (cfg->device << 16) | cfg->vendor;
4319 		break;
4320 	case PCI_IVAR_CLASS:
4321 		*result = cfg->baseclass;
4322 		break;
4323 	case PCI_IVAR_SUBCLASS:
4324 		*result = cfg->subclass;
4325 		break;
4326 	case PCI_IVAR_PROGIF:
4327 		*result = cfg->progif;
4328 		break;
4329 	case PCI_IVAR_REVID:
4330 		*result = cfg->revid;
4331 		break;
4332 	case PCI_IVAR_INTPIN:
4333 		*result = cfg->intpin;
4334 		break;
4335 	case PCI_IVAR_IRQ:
4336 		*result = cfg->intline;
4337 		break;
4338 	case PCI_IVAR_DOMAIN:
4339 		*result = cfg->domain;
4340 		break;
4341 	case PCI_IVAR_BUS:
4342 		*result = cfg->bus;
4343 		break;
4344 	case PCI_IVAR_SLOT:
4345 		*result = cfg->slot;
4346 		break;
4347 	case PCI_IVAR_FUNCTION:
4348 		*result = cfg->func;
4349 		break;
4350 	case PCI_IVAR_CMDREG:
4351 		*result = cfg->cmdreg;
4352 		break;
4353 	case PCI_IVAR_CACHELNSZ:
4354 		*result = cfg->cachelnsz;
4355 		break;
4356 	case PCI_IVAR_MINGNT:
4357 		*result = cfg->mingnt;
4358 		break;
4359 	case PCI_IVAR_MAXLAT:
4360 		*result = cfg->maxlat;
4361 		break;
4362 	case PCI_IVAR_LATTIMER:
4363 		*result = cfg->lattimer;
4364 		break;
4365 	default:
4366 		return (ENOENT);
4367 	}
4368 	return (0);
4369 }
4370 
4371 int
4372 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4373 {
4374 	struct pci_devinfo *dinfo;
4375 
4376 	dinfo = device_get_ivars(child);
4377 
4378 	switch (which) {
4379 	case PCI_IVAR_INTPIN:
4380 		dinfo->cfg.intpin = value;
4381 		return (0);
4382 	case PCI_IVAR_ETHADDR:
4383 	case PCI_IVAR_SUBVENDOR:
4384 	case PCI_IVAR_SUBDEVICE:
4385 	case PCI_IVAR_VENDOR:
4386 	case PCI_IVAR_DEVICE:
4387 	case PCI_IVAR_DEVID:
4388 	case PCI_IVAR_CLASS:
4389 	case PCI_IVAR_SUBCLASS:
4390 	case PCI_IVAR_PROGIF:
4391 	case PCI_IVAR_REVID:
4392 	case PCI_IVAR_IRQ:
4393 	case PCI_IVAR_DOMAIN:
4394 	case PCI_IVAR_BUS:
4395 	case PCI_IVAR_SLOT:
4396 	case PCI_IVAR_FUNCTION:
4397 		return (EINVAL);	/* disallow for now */
4398 
4399 	default:
4400 		return (ENOENT);
4401 	}
4402 }
4403 
4404 #include "opt_ddb.h"
4405 #ifdef DDB
4406 #include <ddb/ddb.h>
4407 #include <sys/cons.h>
4408 
4409 /*
4410  * List resources based on pci map registers, used for within ddb
4411  */
4412 
4413 DB_SHOW_COMMAND(pciregs, db_pci_dump)
4414 {
4415 	struct pci_devinfo *dinfo;
4416 	struct devlist *devlist_head;
4417 	struct pci_conf *p;
4418 	const char *name;
4419 	int i, error, none_count;
4420 
4421 	none_count = 0;
4422 	/* get the head of the device queue */
4423 	devlist_head = &pci_devq;
4424 
4425 	/*
4426 	 * Go through the list of devices and print out devices
4427 	 */
4428 	for (error = 0, i = 0,
4429 	     dinfo = STAILQ_FIRST(devlist_head);
4430 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4431 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4432 
4433 		/* Populate pd_name and pd_unit */
4434 		name = NULL;
4435 		if (dinfo->cfg.dev)
4436 			name = device_get_name(dinfo->cfg.dev);
4437 
4438 		p = &dinfo->conf;
4439 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4440 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4441 			(name && *name) ? name : "none",
4442 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4443 			none_count++,
4444 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4445 			p->pc_sel.pc_func, (p->pc_class << 16) |
4446 			(p->pc_subclass << 8) | p->pc_progif,
4447 			(p->pc_subdevice << 16) | p->pc_subvendor,
4448 			(p->pc_device << 16) | p->pc_vendor,
4449 			p->pc_revid, p->pc_hdr);
4450 	}
4451 }
4452 #endif /* DDB */
4453 
4454 static struct resource *
4455 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4456     u_long start, u_long end, u_long count, u_int flags)
4457 {
4458 	struct pci_devinfo *dinfo = device_get_ivars(child);
4459 	struct resource_list *rl = &dinfo->resources;
4460 	struct resource *res;
4461 	struct pci_map *pm;
4462 	pci_addr_t map, testval;
4463 	int mapsize;
4464 
4465 	res = NULL;
4466 	pm = pci_find_bar(child, *rid);
4467 	if (pm != NULL) {
4468 		/* This is a BAR that we failed to allocate earlier. */
4469 		mapsize = pm->pm_size;
4470 		map = pm->pm_value;
4471 	} else {
4472 		/*
4473 		 * Weed out the bogons, and figure out how large the
4474 		 * BAR/map is.  BARs that read back 0 here are bogus
4475 		 * and unimplemented.  Note: atapci in legacy mode are
4476 		 * special and handled elsewhere in the code.  If you
4477 		 * have a atapci device in legacy mode and it fails
4478 		 * here, that other code is broken.
4479 		 */
4480 		pci_read_bar(child, *rid, &map, &testval);
4481 
4482 		/*
4483 		 * Determine the size of the BAR and ignore BARs with a size
4484 		 * of 0.  Device ROM BARs use a different mask value.
4485 		 */
4486 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
4487 			mapsize = pci_romsize(testval);
4488 		else
4489 			mapsize = pci_mapsize(testval);
4490 		if (mapsize == 0)
4491 			goto out;
4492 		pm = pci_add_bar(child, *rid, map, mapsize);
4493 	}
4494 
4495 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
4496 		if (type != SYS_RES_MEMORY) {
4497 			if (bootverbose)
4498 				device_printf(dev,
4499 				    "child %s requested type %d for rid %#x,"
4500 				    " but the BAR says it is an memio\n",
4501 				    device_get_nameunit(child), type, *rid);
4502 			goto out;
4503 		}
4504 	} else {
4505 		if (type != SYS_RES_IOPORT) {
4506 			if (bootverbose)
4507 				device_printf(dev,
4508 				    "child %s requested type %d for rid %#x,"
4509 				    " but the BAR says it is an ioport\n",
4510 				    device_get_nameunit(child), type, *rid);
4511 			goto out;
4512 		}
4513 	}
4514 
4515 	/*
4516 	 * For real BARs, we need to override the size that
4517 	 * the driver requests, because that's what the BAR
4518 	 * actually uses and we would otherwise have a
4519 	 * situation where we might allocate the excess to
4520 	 * another driver, which won't work.
4521 	 */
4522 	count = (pci_addr_t)1 << mapsize;
4523 	if (RF_ALIGNMENT(flags) < mapsize)
4524 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
4525 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
4526 		flags |= RF_PREFETCHABLE;
4527 
4528 	/*
4529 	 * Allocate enough resource, and then write back the
4530 	 * appropriate BAR for that resource.
4531 	 */
4532 	resource_list_add(rl, type, *rid, start, end, count);
4533 	res = resource_list_reserve(rl, dev, child, type, rid, start, end,
4534 	    count, flags & ~RF_ACTIVE);
4535 	if (res == NULL) {
4536 		resource_list_delete(rl, type, *rid);
4537 		device_printf(child,
4538 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4539 		    count, *rid, type, start, end);
4540 		goto out;
4541 	}
4542 	if (bootverbose)
4543 		device_printf(child,
4544 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4545 		    count, *rid, type, rman_get_start(res));
4546 	map = rman_get_start(res);
4547 	pci_write_bar(child, pm, map);
4548 out:
4549 	return (res);
4550 }
4551 
4552 struct resource *
4553 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4554 		   u_long start, u_long end, u_long count, u_int flags)
4555 {
4556 	struct pci_devinfo *dinfo;
4557 	struct resource_list *rl;
4558 	struct resource_list_entry *rle;
4559 	struct resource *res;
4560 	pcicfgregs *cfg;
4561 
4562 	if (device_get_parent(child) != dev)
4563 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4564 		    type, rid, start, end, count, flags));
4565 
4566 	/*
4567 	 * Perform lazy resource allocation
4568 	 */
4569 	dinfo = device_get_ivars(child);
4570 	rl = &dinfo->resources;
4571 	cfg = &dinfo->cfg;
4572 	switch (type) {
4573 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
4574 	case PCI_RES_BUS:
4575 		return (pci_alloc_secbus(dev, child, rid, start, end, count,
4576 		    flags));
4577 #endif
4578 	case SYS_RES_IRQ:
4579 		/*
4580 		 * Can't alloc legacy interrupt once MSI messages have
4581 		 * been allocated.
4582 		 */
4583 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4584 		    cfg->msix.msix_alloc > 0))
4585 			return (NULL);
4586 
4587 		/*
4588 		 * If the child device doesn't have an interrupt
4589 		 * routed and is deserving of an interrupt, try to
4590 		 * assign it one.
4591 		 */
4592 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4593 		    (cfg->intpin != 0))
4594 			pci_assign_interrupt(dev, child, 0);
4595 		break;
4596 	case SYS_RES_IOPORT:
4597 	case SYS_RES_MEMORY:
4598 #ifdef NEW_PCIB
4599 		/*
4600 		 * PCI-PCI bridge I/O window resources are not BARs.
4601 		 * For those allocations just pass the request up the
4602 		 * tree.
4603 		 */
4604 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4605 			switch (*rid) {
4606 			case PCIR_IOBASEL_1:
4607 			case PCIR_MEMBASE_1:
4608 			case PCIR_PMBASEL_1:
4609 				/*
4610 				 * XXX: Should we bother creating a resource
4611 				 * list entry?
4612 				 */
4613 				return (bus_generic_alloc_resource(dev, child,
4614 				    type, rid, start, end, count, flags));
4615 			}
4616 		}
4617 #endif
4618 		/* Reserve resources for this BAR if needed. */
4619 		rle = resource_list_find(rl, type, *rid);
4620 		if (rle == NULL) {
4621 			res = pci_reserve_map(dev, child, type, rid, start, end,
4622 			    count, flags);
4623 			if (res == NULL)
4624 				return (NULL);
4625 		}
4626 	}
4627 	return (resource_list_alloc(rl, dev, child, type, rid,
4628 	    start, end, count, flags));
4629 }
4630 
4631 int
4632 pci_release_resource(device_t dev, device_t child, int type, int rid,
4633     struct resource *r)
4634 {
4635 	struct pci_devinfo *dinfo;
4636 	struct resource_list *rl;
4637 	pcicfgregs *cfg;
4638 
4639 	if (device_get_parent(child) != dev)
4640 		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
4641 		    type, rid, r));
4642 
4643 	dinfo = device_get_ivars(child);
4644 	cfg = &dinfo->cfg;
4645 #ifdef NEW_PCIB
4646 	/*
4647 	 * PCI-PCI bridge I/O window resources are not BARs.  For
4648 	 * those allocations just pass the request up the tree.
4649 	 */
4650 	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
4651 	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
4652 		switch (rid) {
4653 		case PCIR_IOBASEL_1:
4654 		case PCIR_MEMBASE_1:
4655 		case PCIR_PMBASEL_1:
4656 			return (bus_generic_release_resource(dev, child, type,
4657 			    rid, r));
4658 		}
4659 	}
4660 #endif
4661 
4662 	rl = &dinfo->resources;
4663 	return (resource_list_release(rl, dev, child, type, rid, r));
4664 }
4665 
4666 int
4667 pci_activate_resource(device_t dev, device_t child, int type, int rid,
4668     struct resource *r)
4669 {
4670 	struct pci_devinfo *dinfo;
4671 	int error;
4672 
4673 	error = bus_generic_activate_resource(dev, child, type, rid, r);
4674 	if (error)
4675 		return (error);
4676 
4677 	/* Enable decoding in the command register when activating BARs. */
4678 	if (device_get_parent(child) == dev) {
4679 		/* Device ROMs need their decoding explicitly enabled. */
4680 		dinfo = device_get_ivars(child);
4681 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4682 			pci_write_bar(child, pci_find_bar(child, rid),
4683 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4684 		switch (type) {
4685 		case SYS_RES_IOPORT:
4686 		case SYS_RES_MEMORY:
4687 			error = PCI_ENABLE_IO(dev, child, type);
4688 			break;
4689 		}
4690 	}
4691 	return (error);
4692 }
4693 
4694 int
4695 pci_deactivate_resource(device_t dev, device_t child, int type,
4696     int rid, struct resource *r)
4697 {
4698 	struct pci_devinfo *dinfo;
4699 	int error;
4700 
4701 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4702 	if (error)
4703 		return (error);
4704 
4705 	/* Disable decoding for device ROMs. */
4706 	if (device_get_parent(child) == dev) {
4707 		dinfo = device_get_ivars(child);
4708 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4709 			pci_write_bar(child, pci_find_bar(child, rid),
4710 			    rman_get_start(r));
4711 	}
4712 	return (0);
4713 }
4714 
4715 void
4716 pci_delete_child(device_t dev, device_t child)
4717 {
4718 	struct resource_list_entry *rle;
4719 	struct resource_list *rl;
4720 	struct pci_devinfo *dinfo;
4721 
4722 	dinfo = device_get_ivars(child);
4723 	rl = &dinfo->resources;
4724 
4725 	if (device_is_attached(child))
4726 		device_detach(child);
4727 
4728 	/* Turn off access to resources we're about to free */
4729 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4730 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4731 
4732 	/* Free all allocated resources */
4733 	STAILQ_FOREACH(rle, rl, link) {
4734 		if (rle->res) {
4735 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4736 			    resource_list_busy(rl, rle->type, rle->rid)) {
4737 				pci_printf(&dinfo->cfg,
4738 				    "Resource still owned, oops. "
4739 				    "(type=%d, rid=%d, addr=%lx)\n",
4740 				    rle->type, rle->rid,
4741 				    rman_get_start(rle->res));
4742 				bus_release_resource(child, rle->type, rle->rid,
4743 				    rle->res);
4744 			}
4745 			resource_list_unreserve(rl, dev, child, rle->type,
4746 			    rle->rid);
4747 		}
4748 	}
4749 	resource_list_free(rl);
4750 
4751 	device_delete_child(dev, child);
4752 	pci_freecfg(dinfo);
4753 }
4754 
4755 void
4756 pci_delete_resource(device_t dev, device_t child, int type, int rid)
4757 {
4758 	struct pci_devinfo *dinfo;
4759 	struct resource_list *rl;
4760 	struct resource_list_entry *rle;
4761 
4762 	if (device_get_parent(child) != dev)
4763 		return;
4764 
4765 	dinfo = device_get_ivars(child);
4766 	rl = &dinfo->resources;
4767 	rle = resource_list_find(rl, type, rid);
4768 	if (rle == NULL)
4769 		return;
4770 
4771 	if (rle->res) {
4772 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4773 		    resource_list_busy(rl, type, rid)) {
4774 			device_printf(dev, "delete_resource: "
4775 			    "Resource still owned by child, oops. "
4776 			    "(type=%d, rid=%d, addr=%lx)\n",
4777 			    type, rid, rman_get_start(rle->res));
4778 			return;
4779 		}
4780 		resource_list_unreserve(rl, dev, child, type, rid);
4781 	}
4782 	resource_list_delete(rl, type, rid);
4783 }
4784 
4785 struct resource_list *
4786 pci_get_resource_list (device_t dev, device_t child)
4787 {
4788 	struct pci_devinfo *dinfo = device_get_ivars(child);
4789 
4790 	return (&dinfo->resources);
4791 }
4792 
4793 bus_dma_tag_t
4794 pci_get_dma_tag(device_t bus, device_t dev)
4795 {
4796 	struct pci_softc *sc = device_get_softc(bus);
4797 
4798 	return (sc->sc_dma_tag);
4799 }
4800 
4801 uint32_t
4802 pci_read_config_method(device_t dev, device_t child, int reg, int width)
4803 {
4804 	struct pci_devinfo *dinfo = device_get_ivars(child);
4805 	pcicfgregs *cfg = &dinfo->cfg;
4806 
4807 	return (PCIB_READ_CONFIG(device_get_parent(dev),
4808 	    cfg->bus, cfg->slot, cfg->func, reg, width));
4809 }
4810 
4811 void
4812 pci_write_config_method(device_t dev, device_t child, int reg,
4813     uint32_t val, int width)
4814 {
4815 	struct pci_devinfo *dinfo = device_get_ivars(child);
4816 	pcicfgregs *cfg = &dinfo->cfg;
4817 
4818 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4819 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4820 }
4821 
4822 int
4823 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4824     size_t buflen)
4825 {
4826 
4827 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4828 	    pci_get_function(child));
4829 	return (0);
4830 }
4831 
4832 int
4833 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4834     size_t buflen)
4835 {
4836 	struct pci_devinfo *dinfo;
4837 	pcicfgregs *cfg;
4838 
4839 	dinfo = device_get_ivars(child);
4840 	cfg = &dinfo->cfg;
4841 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4842 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4843 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4844 	    cfg->progif);
4845 	return (0);
4846 }
4847 
4848 int
4849 pci_assign_interrupt_method(device_t dev, device_t child)
4850 {
4851 	struct pci_devinfo *dinfo = device_get_ivars(child);
4852 	pcicfgregs *cfg = &dinfo->cfg;
4853 
4854 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4855 	    cfg->intpin));
4856 }
4857 
4858 static int
4859 pci_modevent(module_t mod, int what, void *arg)
4860 {
4861 	static struct cdev *pci_cdev;
4862 
4863 	switch (what) {
4864 	case MOD_LOAD:
4865 		STAILQ_INIT(&pci_devq);
4866 		pci_generation = 0;
4867 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4868 		    "pci");
4869 		pci_load_vendor_data();
4870 		break;
4871 
4872 	case MOD_UNLOAD:
4873 		destroy_dev(pci_cdev);
4874 		break;
4875 	}
4876 
4877 	return (0);
4878 }
4879 
4880 static void
4881 pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
4882 {
4883 #define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
4884 	struct pcicfg_pcie *cfg;
4885 	int version, pos;
4886 
4887 	cfg = &dinfo->cfg.pcie;
4888 	pos = cfg->pcie_location;
4889 
4890 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4891 
4892 	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
4893 
4894 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4895 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4896 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4897 		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
4898 
4899 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4900 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
4901 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
4902 		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
4903 
4904 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4905 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
4906 		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
4907 
4908 	if (version > 1) {
4909 		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
4910 		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
4911 		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
4912 	}
4913 #undef WREG
4914 }
4915 
4916 static void
4917 pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
4918 {
4919 	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
4920 	    dinfo->cfg.pcix.pcix_command,  2);
4921 }
4922 
4923 void
4924 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4925 {
4926 
4927 	/*
4928 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4929 	 * which we know need special treatment.  Type 2 devices are
4930 	 * cardbus bridges which also require special treatment.
4931 	 * Other types are unknown, and we err on the side of safety
4932 	 * by ignoring them.
4933 	 */
4934 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4935 		return;
4936 
4937 	/*
4938 	 * Restore the device to full power mode.  We must do this
4939 	 * before we restore the registers because moving from D3 to
4940 	 * D0 will cause the chip's BARs and some other registers to
4941 	 * be reset to some unknown power on reset values.  Cut down
4942 	 * the noise on boot by doing nothing if we are already in
4943 	 * state D0.
4944 	 */
4945 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4946 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4947 	pci_restore_bars(dev);
4948 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4949 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4950 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4951 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4952 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4953 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4954 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4955 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4956 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4957 
4958 	/*
4959 	 * Restore extended capabilities for PCI-Express and PCI-X
4960 	 */
4961 	if (dinfo->cfg.pcie.pcie_location != 0)
4962 		pci_cfg_restore_pcie(dev, dinfo);
4963 	if (dinfo->cfg.pcix.pcix_location != 0)
4964 		pci_cfg_restore_pcix(dev, dinfo);
4965 
4966 	/* Restore MSI and MSI-X configurations if they are present. */
4967 	if (dinfo->cfg.msi.msi_location != 0)
4968 		pci_resume_msi(dev);
4969 	if (dinfo->cfg.msix.msix_location != 0)
4970 		pci_resume_msix(dev);
4971 }
4972 
4973 static void
4974 pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
4975 {
4976 #define	RREG(n)	pci_read_config(dev, pos + (n), 2)
4977 	struct pcicfg_pcie *cfg;
4978 	int version, pos;
4979 
4980 	cfg = &dinfo->cfg.pcie;
4981 	pos = cfg->pcie_location;
4982 
4983 	cfg->pcie_flags = RREG(PCIER_FLAGS);
4984 
4985 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4986 
4987 	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
4988 
4989 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4990 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4991 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4992 		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
4993 
4994 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4995 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
4996 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
4997 		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
4998 
4999 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5000 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5001 		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
5002 
5003 	if (version > 1) {
5004 		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
5005 		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
5006 		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
5007 	}
5008 #undef RREG
5009 }
5010 
5011 static void
5012 pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
5013 {
5014 	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
5015 	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
5016 }
5017 
5018 void
5019 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
5020 {
5021 	uint32_t cls;
5022 	int ps;
5023 
5024 	/*
5025 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
5026 	 * we know need special treatment.  Type 2 devices are cardbus bridges
5027 	 * which also require special treatment.  Other types are unknown, and
5028 	 * we err on the side of safety by ignoring them.  Powering down
5029 	 * bridges should not be undertaken lightly.
5030 	 */
5031 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
5032 		return;
5033 
5034 	/*
5035 	 * Some drivers apparently write to these registers w/o updating our
5036 	 * cached copy.  No harm happens if we update the copy, so do so here
5037 	 * so we can restore them.  The COMMAND register is modified by the
5038 	 * bus w/o updating the cache.  This should represent the normally
5039 	 * writable portion of the 'defined' part of type 0 headers.  In
5040 	 * theory we also need to save/restore the PCI capability structures
5041 	 * we know about, but apart from power we don't know any that are
5042 	 * writable.
5043 	 */
5044 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
5045 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
5046 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
5047 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
5048 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
5049 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
5050 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
5051 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
5052 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
5053 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
5054 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
5055 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
5056 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
5057 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
5058 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
5059 
5060 	if (dinfo->cfg.pcie.pcie_location != 0)
5061 		pci_cfg_save_pcie(dev, dinfo);
5062 
5063 	if (dinfo->cfg.pcix.pcix_location != 0)
5064 		pci_cfg_save_pcix(dev, dinfo);
5065 
5066 	/*
5067 	 * don't set the state for display devices, base peripherals and
5068 	 * memory devices since bad things happen when they are powered down.
5069 	 * We should (a) have drivers that can easily detach and (b) use
5070 	 * generic drivers for these devices so that some device actually
5071 	 * attaches.  We need to make sure that when we implement (a) we don't
5072 	 * power the device down on a reattach.
5073 	 */
5074 	cls = pci_get_class(dev);
5075 	if (!setstate)
5076 		return;
5077 	switch (pci_do_power_nodriver)
5078 	{
5079 		case 0:		/* NO powerdown at all */
5080 			return;
5081 		case 1:		/* Conservative about what to power down */
5082 			if (cls == PCIC_STORAGE)
5083 				return;
5084 			/*FALLTHROUGH*/
5085 		case 2:		/* Agressive about what to power down */
5086 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
5087 			    cls == PCIC_BASEPERIPH)
5088 				return;
5089 			/*FALLTHROUGH*/
5090 		case 3:		/* Power down everything */
5091 			break;
5092 	}
5093 	/*
5094 	 * PCI spec says we can only go into D3 state from D0 state.
5095 	 * Transition from D[12] into D0 before going to D3 state.
5096 	 */
5097 	ps = pci_get_powerstate(dev);
5098 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
5099 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5100 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
5101 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
5102 }
5103 
5104 /* Wrapper APIs suitable for device driver use. */
5105 void
5106 pci_save_state(device_t dev)
5107 {
5108 	struct pci_devinfo *dinfo;
5109 
5110 	dinfo = device_get_ivars(dev);
5111 	pci_cfg_save(dev, dinfo, 0);
5112 }
5113 
5114 void
5115 pci_restore_state(device_t dev)
5116 {
5117 	struct pci_devinfo *dinfo;
5118 
5119 	dinfo = device_get_ivars(dev);
5120 	pci_cfg_restore(dev, dinfo);
5121 }
5122 
5123 static uint16_t
5124 pci_get_rid_method(device_t dev, device_t child)
5125 {
5126 
5127 	return (PCIB_GET_RID(device_get_parent(dev), child));
5128 }
5129