xref: /freebsd/sys/dev/pci/pci.c (revision be930504becc794c808b529bd65fa09d2d85f846)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 
56 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57 #include <machine/intr_machdep.h>
58 #endif
59 
60 #include <sys/pciio.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/pci/pci_private.h>
64 
65 #include <dev/usb/controller/xhcireg.h>
66 #include <dev/usb/controller/ehcireg.h>
67 #include <dev/usb/controller/ohcireg.h>
68 #include <dev/usb/controller/uhcireg.h>
69 
70 #include "pcib_if.h"
71 #include "pci_if.h"
72 
73 #define	PCIR_IS_BIOS(cfg, reg)						\
74 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
75 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
76 
77 static int		pci_has_quirk(uint32_t devid, int quirk);
78 static pci_addr_t	pci_mapbase(uint64_t mapreg);
79 static const char	*pci_maptype(uint64_t mapreg);
80 static int		pci_mapsize(uint64_t testval);
81 static int		pci_maprange(uint64_t mapreg);
82 static pci_addr_t	pci_rombase(uint64_t mapreg);
83 static int		pci_romsize(uint64_t testval);
84 static void		pci_fixancient(pcicfgregs *cfg);
85 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
86 
87 static int		pci_porten(device_t dev);
88 static int		pci_memen(device_t dev);
89 static void		pci_assign_interrupt(device_t bus, device_t dev,
90 			    int force_route);
91 static int		pci_add_map(device_t bus, device_t dev, int reg,
92 			    struct resource_list *rl, int force, int prefetch);
93 static int		pci_probe(device_t dev);
94 static int		pci_attach(device_t dev);
95 #ifdef PCI_RES_BUS
96 static int		pci_detach(device_t dev);
97 #endif
98 static void		pci_load_vendor_data(void);
99 static int		pci_describe_parse_line(char **ptr, int *vendor,
100 			    int *device, char **desc);
101 static char		*pci_describe_device(device_t dev);
102 static int		pci_modevent(module_t mod, int what, void *arg);
103 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
104 			    pcicfgregs *cfg);
105 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
106 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
107 			    int reg, uint32_t *data);
108 #if 0
109 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
110 			    int reg, uint32_t data);
111 #endif
112 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
113 static void		pci_mask_msix(device_t dev, u_int index);
114 static void		pci_unmask_msix(device_t dev, u_int index);
115 static int		pci_msi_blacklisted(void);
116 static int		pci_msix_blacklisted(void);
117 static void		pci_resume_msi(device_t dev);
118 static void		pci_resume_msix(device_t dev);
119 static int		pci_remap_intr_method(device_t bus, device_t dev,
120 			    u_int irq);
121 
122 static uint16_t		pci_get_rid_method(device_t dev, device_t child);
123 
124 static device_method_t pci_methods[] = {
125 	/* Device interface */
126 	DEVMETHOD(device_probe,		pci_probe),
127 	DEVMETHOD(device_attach,	pci_attach),
128 #ifdef PCI_RES_BUS
129 	DEVMETHOD(device_detach,	pci_detach),
130 #else
131 	DEVMETHOD(device_detach,	bus_generic_detach),
132 #endif
133 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
134 	DEVMETHOD(device_suspend,	bus_generic_suspend),
135 	DEVMETHOD(device_resume,	pci_resume),
136 
137 	/* Bus interface */
138 	DEVMETHOD(bus_print_child,	pci_print_child),
139 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
140 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
141 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
142 	DEVMETHOD(bus_driver_added,	pci_driver_added),
143 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
144 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
145 
146 	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
147 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
148 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
149 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
150 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
151 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
152 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
153 	DEVMETHOD(bus_release_resource,	pci_release_resource),
154 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
155 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
156 	DEVMETHOD(bus_child_detached,	pci_child_detached),
157 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
158 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
159 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
160 	DEVMETHOD(bus_suspend_child,	pci_suspend_child),
161 	DEVMETHOD(bus_resume_child,	pci_resume_child),
162 
163 	/* PCI interface */
164 	DEVMETHOD(pci_read_config,	pci_read_config_method),
165 	DEVMETHOD(pci_write_config,	pci_write_config_method),
166 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
167 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
168 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
169 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
170 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
171 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
172 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
173 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
174 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
175 	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
176 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
177 	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
178 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
179 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
180 	DEVMETHOD(pci_enable_msi,	pci_enable_msi_method),
181 	DEVMETHOD(pci_enable_msix,	pci_enable_msix_method),
182 	DEVMETHOD(pci_disable_msi,	pci_disable_msi_method),
183 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
184 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
185 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
186 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
187 	DEVMETHOD(pci_get_rid,		pci_get_rid_method),
188 	DEVMETHOD(pci_child_added,	pci_child_added_method),
189 
190 	DEVMETHOD_END
191 };
192 
193 DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
194 
195 static devclass_t pci_devclass;
196 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
197 MODULE_VERSION(pci, 1);
198 
199 static char	*pci_vendordata;
200 static size_t	pci_vendordata_size;
201 
202 struct pci_quirk {
203 	uint32_t devid;	/* Vendor/device of the card */
204 	int	type;
205 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
206 #define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
207 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
208 #define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
209 #define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
210 	int	arg1;
211 	int	arg2;
212 };
213 
214 static const struct pci_quirk pci_quirks[] = {
215 	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
216 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
217 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
218 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
219 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
220 
221 	/*
222 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
223 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
224 	 */
225 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
226 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
227 
228 	/*
229 	 * MSI doesn't work on earlier Intel chipsets including
230 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
231 	 */
232 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
233 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
234 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
235 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
236 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
237 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
238 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
239 
240 	/*
241 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
242 	 * bridge.
243 	 */
244 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
245 
246 	/*
247 	 * MSI-X allocation doesn't work properly for devices passed through
248 	 * by VMware up to at least ESXi 5.1.
249 	 */
250 	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
251 	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
252 
253 	/*
254 	 * Some virtualization environments emulate an older chipset
255 	 * but support MSI just fine.  QEMU uses the Intel 82440.
256 	 */
257 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
258 
259 	/*
260 	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
261 	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
262 	 * It prevents us from attaching hpet(4) when the bit is unset.
263 	 * Note this quirk only affects SB600 revision A13 and earlier.
264 	 * For SB600 A21 and later, firmware must set the bit to hide it.
265 	 * For SB700 and later, it is unused and hardcoded to zero.
266 	 */
267 	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
268 
269 	{ 0 }
270 };
271 
272 /* map register information */
273 #define	PCI_MAPMEM	0x01	/* memory map */
274 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
275 #define	PCI_MAPPORT	0x04	/* port map */
276 
277 struct devlist pci_devq;
278 uint32_t pci_generation;
279 uint32_t pci_numdevs = 0;
280 static int pcie_chipset, pcix_chipset;
281 
282 /* sysctl vars */
283 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
284 
285 static int pci_enable_io_modes = 1;
286 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RWTUN,
287     &pci_enable_io_modes, 1,
288     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
289 enable these bits correctly.  We'd like to do this all the time, but there\n\
290 are some peripherals that this causes problems with.");
291 
292 static int pci_do_realloc_bars = 0;
293 SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RWTUN,
294     &pci_do_realloc_bars, 0,
295     "Attempt to allocate a new range for any BARs whose original "
296     "firmware-assigned ranges fail to allocate during the initial device scan.");
297 
298 static int pci_do_power_nodriver = 0;
299 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RWTUN,
300     &pci_do_power_nodriver, 0,
301   "Place a function into D3 state when no driver attaches to it.  0 means\n\
302 disable.  1 means conservatively place devices into D3 state.  2 means\n\
303 agressively place devices into D3 state.  3 means put absolutely everything\n\
304 in D3 state.");
305 
306 int pci_do_power_resume = 1;
307 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RWTUN,
308     &pci_do_power_resume, 1,
309   "Transition from D3 -> D0 on resume.");
310 
311 int pci_do_power_suspend = 1;
312 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RWTUN,
313     &pci_do_power_suspend, 1,
314   "Transition from D0 -> D3 on suspend.");
315 
316 static int pci_do_msi = 1;
317 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RWTUN, &pci_do_msi, 1,
318     "Enable support for MSI interrupts");
319 
320 static int pci_do_msix = 1;
321 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RWTUN, &pci_do_msix, 1,
322     "Enable support for MSI-X interrupts");
323 
324 static int pci_honor_msi_blacklist = 1;
325 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RDTUN,
326     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
327 
328 #if defined(__i386__) || defined(__amd64__)
329 static int pci_usb_takeover = 1;
330 #else
331 static int pci_usb_takeover = 0;
332 #endif
333 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
334     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
335 Disable this if you depend on BIOS emulation of USB devices, that is\n\
336 you use USB devices (like keyboard or mouse) but do not load USB drivers");
337 
338 static int pci_clear_bars;
339 SYSCTL_INT(_hw_pci, OID_AUTO, clear_bars, CTLFLAG_RDTUN, &pci_clear_bars, 0,
340     "Ignore firmware-assigned resources for BARs.");
341 
342 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
343 static int pci_clear_buses;
344 SYSCTL_INT(_hw_pci, OID_AUTO, clear_buses, CTLFLAG_RDTUN, &pci_clear_buses, 0,
345     "Ignore firmware-assigned bus numbers.");
346 #endif
347 
348 static int pci_enable_ari = 1;
349 SYSCTL_INT(_hw_pci, OID_AUTO, enable_ari, CTLFLAG_RDTUN, &pci_enable_ari,
350     0, "Enable support for PCIe Alternative RID Interpretation");
351 
352 static int
353 pci_has_quirk(uint32_t devid, int quirk)
354 {
355 	const struct pci_quirk *q;
356 
357 	for (q = &pci_quirks[0]; q->devid; q++) {
358 		if (q->devid == devid && q->type == quirk)
359 			return (1);
360 	}
361 	return (0);
362 }
363 
364 /* Find a device_t by bus/slot/function in domain 0 */
365 
366 device_t
367 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
368 {
369 
370 	return (pci_find_dbsf(0, bus, slot, func));
371 }
372 
373 /* Find a device_t by domain/bus/slot/function */
374 
375 device_t
376 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
377 {
378 	struct pci_devinfo *dinfo;
379 
380 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
381 		if ((dinfo->cfg.domain == domain) &&
382 		    (dinfo->cfg.bus == bus) &&
383 		    (dinfo->cfg.slot == slot) &&
384 		    (dinfo->cfg.func == func)) {
385 			return (dinfo->cfg.dev);
386 		}
387 	}
388 
389 	return (NULL);
390 }
391 
392 /* Find a device_t by vendor/device ID */
393 
394 device_t
395 pci_find_device(uint16_t vendor, uint16_t device)
396 {
397 	struct pci_devinfo *dinfo;
398 
399 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
400 		if ((dinfo->cfg.vendor == vendor) &&
401 		    (dinfo->cfg.device == device)) {
402 			return (dinfo->cfg.dev);
403 		}
404 	}
405 
406 	return (NULL);
407 }
408 
409 device_t
410 pci_find_class(uint8_t class, uint8_t subclass)
411 {
412 	struct pci_devinfo *dinfo;
413 
414 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
415 		if (dinfo->cfg.baseclass == class &&
416 		    dinfo->cfg.subclass == subclass) {
417 			return (dinfo->cfg.dev);
418 		}
419 	}
420 
421 	return (NULL);
422 }
423 
424 static int
425 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
426 {
427 	va_list ap;
428 	int retval;
429 
430 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
431 	    cfg->func);
432 	va_start(ap, fmt);
433 	retval += vprintf(fmt, ap);
434 	va_end(ap);
435 	return (retval);
436 }
437 
438 /* return base address of memory or port map */
439 
440 static pci_addr_t
441 pci_mapbase(uint64_t mapreg)
442 {
443 
444 	if (PCI_BAR_MEM(mapreg))
445 		return (mapreg & PCIM_BAR_MEM_BASE);
446 	else
447 		return (mapreg & PCIM_BAR_IO_BASE);
448 }
449 
450 /* return map type of memory or port map */
451 
452 static const char *
453 pci_maptype(uint64_t mapreg)
454 {
455 
456 	if (PCI_BAR_IO(mapreg))
457 		return ("I/O Port");
458 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
459 		return ("Prefetchable Memory");
460 	return ("Memory");
461 }
462 
463 /* return log2 of map size decoded for memory or port map */
464 
465 static int
466 pci_mapsize(uint64_t testval)
467 {
468 	int ln2size;
469 
470 	testval = pci_mapbase(testval);
471 	ln2size = 0;
472 	if (testval != 0) {
473 		while ((testval & 1) == 0)
474 		{
475 			ln2size++;
476 			testval >>= 1;
477 		}
478 	}
479 	return (ln2size);
480 }
481 
482 /* return base address of device ROM */
483 
484 static pci_addr_t
485 pci_rombase(uint64_t mapreg)
486 {
487 
488 	return (mapreg & PCIM_BIOS_ADDR_MASK);
489 }
490 
491 /* return log2 of map size decided for device ROM */
492 
493 static int
494 pci_romsize(uint64_t testval)
495 {
496 	int ln2size;
497 
498 	testval = pci_rombase(testval);
499 	ln2size = 0;
500 	if (testval != 0) {
501 		while ((testval & 1) == 0)
502 		{
503 			ln2size++;
504 			testval >>= 1;
505 		}
506 	}
507 	return (ln2size);
508 }
509 
510 /* return log2 of address range supported by map register */
511 
512 static int
513 pci_maprange(uint64_t mapreg)
514 {
515 	int ln2range = 0;
516 
517 	if (PCI_BAR_IO(mapreg))
518 		ln2range = 32;
519 	else
520 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
521 		case PCIM_BAR_MEM_32:
522 			ln2range = 32;
523 			break;
524 		case PCIM_BAR_MEM_1MB:
525 			ln2range = 20;
526 			break;
527 		case PCIM_BAR_MEM_64:
528 			ln2range = 64;
529 			break;
530 		}
531 	return (ln2range);
532 }
533 
534 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
535 
536 static void
537 pci_fixancient(pcicfgregs *cfg)
538 {
539 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
540 		return;
541 
542 	/* PCI to PCI bridges use header type 1 */
543 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
544 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
545 }
546 
547 /* extract header type specific config data */
548 
549 static void
550 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
551 {
552 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
553 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
554 	case PCIM_HDRTYPE_NORMAL:
555 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
556 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
557 		cfg->nummaps	    = PCI_MAXMAPS_0;
558 		break;
559 	case PCIM_HDRTYPE_BRIDGE:
560 		cfg->nummaps	    = PCI_MAXMAPS_1;
561 		break;
562 	case PCIM_HDRTYPE_CARDBUS:
563 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
564 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
565 		cfg->nummaps	    = PCI_MAXMAPS_2;
566 		break;
567 	}
568 #undef REG
569 }
570 
571 /* read configuration header into pcicfgregs structure */
572 struct pci_devinfo *
573 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
574 {
575 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
576 	pcicfgregs *cfg = NULL;
577 	struct pci_devinfo *devlist_entry;
578 	struct devlist *devlist_head;
579 
580 	devlist_head = &pci_devq;
581 
582 	devlist_entry = NULL;
583 
584 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
585 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
586 		if (devlist_entry == NULL)
587 			return (NULL);
588 
589 		cfg = &devlist_entry->cfg;
590 
591 		cfg->domain		= d;
592 		cfg->bus		= b;
593 		cfg->slot		= s;
594 		cfg->func		= f;
595 		cfg->vendor		= REG(PCIR_VENDOR, 2);
596 		cfg->device		= REG(PCIR_DEVICE, 2);
597 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
598 		cfg->statreg		= REG(PCIR_STATUS, 2);
599 		cfg->baseclass		= REG(PCIR_CLASS, 1);
600 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
601 		cfg->progif		= REG(PCIR_PROGIF, 1);
602 		cfg->revid		= REG(PCIR_REVID, 1);
603 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
604 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
605 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
606 		cfg->intpin		= REG(PCIR_INTPIN, 1);
607 		cfg->intline		= REG(PCIR_INTLINE, 1);
608 
609 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
610 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
611 
612 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
613 		cfg->hdrtype		&= ~PCIM_MFDEV;
614 		STAILQ_INIT(&cfg->maps);
615 
616 		pci_fixancient(cfg);
617 		pci_hdrtypedata(pcib, b, s, f, cfg);
618 
619 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
620 			pci_read_cap(pcib, cfg);
621 
622 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
623 
624 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
625 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
626 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
627 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
628 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
629 
630 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
631 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
632 		devlist_entry->conf.pc_vendor = cfg->vendor;
633 		devlist_entry->conf.pc_device = cfg->device;
634 
635 		devlist_entry->conf.pc_class = cfg->baseclass;
636 		devlist_entry->conf.pc_subclass = cfg->subclass;
637 		devlist_entry->conf.pc_progif = cfg->progif;
638 		devlist_entry->conf.pc_revid = cfg->revid;
639 
640 		pci_numdevs++;
641 		pci_generation++;
642 	}
643 	return (devlist_entry);
644 #undef REG
645 }
646 
647 static void
648 pci_read_cap(device_t pcib, pcicfgregs *cfg)
649 {
650 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
651 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
652 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
653 	uint64_t addr;
654 #endif
655 	uint32_t val;
656 	int	ptr, nextptr, ptrptr;
657 
658 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
659 	case PCIM_HDRTYPE_NORMAL:
660 	case PCIM_HDRTYPE_BRIDGE:
661 		ptrptr = PCIR_CAP_PTR;
662 		break;
663 	case PCIM_HDRTYPE_CARDBUS:
664 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
665 		break;
666 	default:
667 		return;		/* no extended capabilities support */
668 	}
669 	nextptr = REG(ptrptr, 1);	/* sanity check? */
670 
671 	/*
672 	 * Read capability entries.
673 	 */
674 	while (nextptr != 0) {
675 		/* Sanity check */
676 		if (nextptr > 255) {
677 			printf("illegal PCI extended capability offset %d\n",
678 			    nextptr);
679 			return;
680 		}
681 		/* Find the next entry */
682 		ptr = nextptr;
683 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
684 
685 		/* Process this entry */
686 		switch (REG(ptr + PCICAP_ID, 1)) {
687 		case PCIY_PMG:		/* PCI power management */
688 			if (cfg->pp.pp_cap == 0) {
689 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
690 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
691 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
692 				if ((nextptr - ptr) > PCIR_POWER_DATA)
693 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
694 			}
695 			break;
696 		case PCIY_HT:		/* HyperTransport */
697 			/* Determine HT-specific capability type. */
698 			val = REG(ptr + PCIR_HT_COMMAND, 2);
699 
700 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
701 				cfg->ht.ht_slave = ptr;
702 
703 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
704 			switch (val & PCIM_HTCMD_CAP_MASK) {
705 			case PCIM_HTCAP_MSI_MAPPING:
706 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
707 					/* Sanity check the mapping window. */
708 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
709 					    4);
710 					addr <<= 32;
711 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
712 					    4);
713 					if (addr != MSI_INTEL_ADDR_BASE)
714 						device_printf(pcib,
715 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
716 						    cfg->domain, cfg->bus,
717 						    cfg->slot, cfg->func,
718 						    (long long)addr);
719 				} else
720 					addr = MSI_INTEL_ADDR_BASE;
721 
722 				cfg->ht.ht_msimap = ptr;
723 				cfg->ht.ht_msictrl = val;
724 				cfg->ht.ht_msiaddr = addr;
725 				break;
726 			}
727 #endif
728 			break;
729 		case PCIY_MSI:		/* PCI MSI */
730 			cfg->msi.msi_location = ptr;
731 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
732 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
733 						     PCIM_MSICTRL_MMC_MASK)>>1);
734 			break;
735 		case PCIY_MSIX:		/* PCI MSI-X */
736 			cfg->msix.msix_location = ptr;
737 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
738 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
739 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
740 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
741 			cfg->msix.msix_table_bar = PCIR_BAR(val &
742 			    PCIM_MSIX_BIR_MASK);
743 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
744 			val = REG(ptr + PCIR_MSIX_PBA, 4);
745 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
746 			    PCIM_MSIX_BIR_MASK);
747 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
748 			break;
749 		case PCIY_VPD:		/* PCI Vital Product Data */
750 			cfg->vpd.vpd_reg = ptr;
751 			break;
752 		case PCIY_SUBVENDOR:
753 			/* Should always be true. */
754 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
755 			    PCIM_HDRTYPE_BRIDGE) {
756 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
757 				cfg->subvendor = val & 0xffff;
758 				cfg->subdevice = val >> 16;
759 			}
760 			break;
761 		case PCIY_PCIX:		/* PCI-X */
762 			/*
763 			 * Assume we have a PCI-X chipset if we have
764 			 * at least one PCI-PCI bridge with a PCI-X
765 			 * capability.  Note that some systems with
766 			 * PCI-express or HT chipsets might match on
767 			 * this check as well.
768 			 */
769 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
770 			    PCIM_HDRTYPE_BRIDGE)
771 				pcix_chipset = 1;
772 			cfg->pcix.pcix_location = ptr;
773 			break;
774 		case PCIY_EXPRESS:	/* PCI-express */
775 			/*
776 			 * Assume we have a PCI-express chipset if we have
777 			 * at least one PCI-express device.
778 			 */
779 			pcie_chipset = 1;
780 			cfg->pcie.pcie_location = ptr;
781 			val = REG(ptr + PCIER_FLAGS, 2);
782 			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
783 			break;
784 		default:
785 			break;
786 		}
787 	}
788 
789 #if defined(__powerpc__)
790 	/*
791 	 * Enable the MSI mapping window for all HyperTransport
792 	 * slaves.  PCI-PCI bridges have their windows enabled via
793 	 * PCIB_MAP_MSI().
794 	 */
795 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
796 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
797 		device_printf(pcib,
798 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
799 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
800 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
801 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
802 		     2);
803 	}
804 #endif
805 /* REG and WREG use carry through to next functions */
806 }
807 
808 /*
809  * PCI Vital Product Data
810  */
811 
812 #define	PCI_VPD_TIMEOUT		1000000
813 
814 static int
815 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
816 {
817 	int count = PCI_VPD_TIMEOUT;
818 
819 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
820 
821 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
822 
823 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
824 		if (--count < 0)
825 			return (ENXIO);
826 		DELAY(1);	/* limit looping */
827 	}
828 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
829 
830 	return (0);
831 }
832 
833 #if 0
834 static int
835 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
836 {
837 	int count = PCI_VPD_TIMEOUT;
838 
839 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
840 
841 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
842 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
843 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
844 		if (--count < 0)
845 			return (ENXIO);
846 		DELAY(1);	/* limit looping */
847 	}
848 
849 	return (0);
850 }
851 #endif
852 
853 #undef PCI_VPD_TIMEOUT
854 
855 struct vpd_readstate {
856 	device_t	pcib;
857 	pcicfgregs	*cfg;
858 	uint32_t	val;
859 	int		bytesinval;
860 	int		off;
861 	uint8_t		cksum;
862 };
863 
864 static int
865 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
866 {
867 	uint32_t reg;
868 	uint8_t byte;
869 
870 	if (vrs->bytesinval == 0) {
871 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
872 			return (ENXIO);
873 		vrs->val = le32toh(reg);
874 		vrs->off += 4;
875 		byte = vrs->val & 0xff;
876 		vrs->bytesinval = 3;
877 	} else {
878 		vrs->val = vrs->val >> 8;
879 		byte = vrs->val & 0xff;
880 		vrs->bytesinval--;
881 	}
882 
883 	vrs->cksum += byte;
884 	*data = byte;
885 	return (0);
886 }
887 
888 static void
889 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
890 {
891 	struct vpd_readstate vrs;
892 	int state;
893 	int name;
894 	int remain;
895 	int i;
896 	int alloc, off;		/* alloc/off for RO/W arrays */
897 	int cksumvalid;
898 	int dflen;
899 	uint8_t byte;
900 	uint8_t byte2;
901 
902 	/* init vpd reader */
903 	vrs.bytesinval = 0;
904 	vrs.off = 0;
905 	vrs.pcib = pcib;
906 	vrs.cfg = cfg;
907 	vrs.cksum = 0;
908 
909 	state = 0;
910 	name = remain = i = 0;	/* shut up stupid gcc */
911 	alloc = off = 0;	/* shut up stupid gcc */
912 	dflen = 0;		/* shut up stupid gcc */
913 	cksumvalid = -1;
914 	while (state >= 0) {
915 		if (vpd_nextbyte(&vrs, &byte)) {
916 			state = -2;
917 			break;
918 		}
919 #if 0
920 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
921 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
922 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
923 #endif
924 		switch (state) {
925 		case 0:		/* item name */
926 			if (byte & 0x80) {
927 				if (vpd_nextbyte(&vrs, &byte2)) {
928 					state = -2;
929 					break;
930 				}
931 				remain = byte2;
932 				if (vpd_nextbyte(&vrs, &byte2)) {
933 					state = -2;
934 					break;
935 				}
936 				remain |= byte2 << 8;
937 				if (remain > (0x7f*4 - vrs.off)) {
938 					state = -1;
939 					pci_printf(cfg,
940 					    "invalid VPD data, remain %#x\n",
941 					    remain);
942 				}
943 				name = byte & 0x7f;
944 			} else {
945 				remain = byte & 0x7;
946 				name = (byte >> 3) & 0xf;
947 			}
948 			switch (name) {
949 			case 0x2:	/* String */
950 				cfg->vpd.vpd_ident = malloc(remain + 1,
951 				    M_DEVBUF, M_WAITOK);
952 				i = 0;
953 				state = 1;
954 				break;
955 			case 0xf:	/* End */
956 				state = -1;
957 				break;
958 			case 0x10:	/* VPD-R */
959 				alloc = 8;
960 				off = 0;
961 				cfg->vpd.vpd_ros = malloc(alloc *
962 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
963 				    M_WAITOK | M_ZERO);
964 				state = 2;
965 				break;
966 			case 0x11:	/* VPD-W */
967 				alloc = 8;
968 				off = 0;
969 				cfg->vpd.vpd_w = malloc(alloc *
970 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
971 				    M_WAITOK | M_ZERO);
972 				state = 5;
973 				break;
974 			default:	/* Invalid data, abort */
975 				state = -1;
976 				break;
977 			}
978 			break;
979 
980 		case 1:	/* Identifier String */
981 			cfg->vpd.vpd_ident[i++] = byte;
982 			remain--;
983 			if (remain == 0)  {
984 				cfg->vpd.vpd_ident[i] = '\0';
985 				state = 0;
986 			}
987 			break;
988 
989 		case 2:	/* VPD-R Keyword Header */
990 			if (off == alloc) {
991 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
992 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
993 				    M_DEVBUF, M_WAITOK | M_ZERO);
994 			}
995 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
996 			if (vpd_nextbyte(&vrs, &byte2)) {
997 				state = -2;
998 				break;
999 			}
1000 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1001 			if (vpd_nextbyte(&vrs, &byte2)) {
1002 				state = -2;
1003 				break;
1004 			}
1005 			cfg->vpd.vpd_ros[off].len = dflen = byte2;
1006 			if (dflen == 0 &&
1007 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1008 			    2) == 0) {
1009 				/*
1010 				 * if this happens, we can't trust the rest
1011 				 * of the VPD.
1012 				 */
1013 				pci_printf(cfg, "bad keyword length: %d\n",
1014 				    dflen);
1015 				cksumvalid = 0;
1016 				state = -1;
1017 				break;
1018 			} else if (dflen == 0) {
1019 				cfg->vpd.vpd_ros[off].value = malloc(1 *
1020 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1021 				    M_DEVBUF, M_WAITOK);
1022 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1023 			} else
1024 				cfg->vpd.vpd_ros[off].value = malloc(
1025 				    (dflen + 1) *
1026 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1027 				    M_DEVBUF, M_WAITOK);
1028 			remain -= 3;
1029 			i = 0;
1030 			/* keep in sync w/ state 3's transistions */
1031 			if (dflen == 0 && remain == 0)
1032 				state = 0;
1033 			else if (dflen == 0)
1034 				state = 2;
1035 			else
1036 				state = 3;
1037 			break;
1038 
1039 		case 3:	/* VPD-R Keyword Value */
1040 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1041 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1042 			    "RV", 2) == 0 && cksumvalid == -1) {
1043 				if (vrs.cksum == 0)
1044 					cksumvalid = 1;
1045 				else {
1046 					if (bootverbose)
1047 						pci_printf(cfg,
1048 					    "bad VPD cksum, remain %hhu\n",
1049 						    vrs.cksum);
1050 					cksumvalid = 0;
1051 					state = -1;
1052 					break;
1053 				}
1054 			}
1055 			dflen--;
1056 			remain--;
1057 			/* keep in sync w/ state 2's transistions */
1058 			if (dflen == 0)
1059 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1060 			if (dflen == 0 && remain == 0) {
1061 				cfg->vpd.vpd_rocnt = off;
1062 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1063 				    off * sizeof(*cfg->vpd.vpd_ros),
1064 				    M_DEVBUF, M_WAITOK | M_ZERO);
1065 				state = 0;
1066 			} else if (dflen == 0)
1067 				state = 2;
1068 			break;
1069 
1070 		case 4:
1071 			remain--;
1072 			if (remain == 0)
1073 				state = 0;
1074 			break;
1075 
1076 		case 5:	/* VPD-W Keyword Header */
1077 			if (off == alloc) {
1078 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1079 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1080 				    M_DEVBUF, M_WAITOK | M_ZERO);
1081 			}
1082 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1083 			if (vpd_nextbyte(&vrs, &byte2)) {
1084 				state = -2;
1085 				break;
1086 			}
1087 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1088 			if (vpd_nextbyte(&vrs, &byte2)) {
1089 				state = -2;
1090 				break;
1091 			}
1092 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1093 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1094 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1095 			    sizeof(*cfg->vpd.vpd_w[off].value),
1096 			    M_DEVBUF, M_WAITOK);
1097 			remain -= 3;
1098 			i = 0;
1099 			/* keep in sync w/ state 6's transistions */
1100 			if (dflen == 0 && remain == 0)
1101 				state = 0;
1102 			else if (dflen == 0)
1103 				state = 5;
1104 			else
1105 				state = 6;
1106 			break;
1107 
1108 		case 6:	/* VPD-W Keyword Value */
1109 			cfg->vpd.vpd_w[off].value[i++] = byte;
1110 			dflen--;
1111 			remain--;
1112 			/* keep in sync w/ state 5's transistions */
1113 			if (dflen == 0)
1114 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1115 			if (dflen == 0 && remain == 0) {
1116 				cfg->vpd.vpd_wcnt = off;
1117 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1118 				    off * sizeof(*cfg->vpd.vpd_w),
1119 				    M_DEVBUF, M_WAITOK | M_ZERO);
1120 				state = 0;
1121 			} else if (dflen == 0)
1122 				state = 5;
1123 			break;
1124 
1125 		default:
1126 			pci_printf(cfg, "invalid state: %d\n", state);
1127 			state = -1;
1128 			break;
1129 		}
1130 	}
1131 
1132 	if (cksumvalid == 0 || state < -1) {
1133 		/* read-only data bad, clean up */
1134 		if (cfg->vpd.vpd_ros != NULL) {
1135 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1136 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1137 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1138 			cfg->vpd.vpd_ros = NULL;
1139 		}
1140 	}
1141 	if (state < -1) {
1142 		/* I/O error, clean up */
1143 		pci_printf(cfg, "failed to read VPD data.\n");
1144 		if (cfg->vpd.vpd_ident != NULL) {
1145 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1146 			cfg->vpd.vpd_ident = NULL;
1147 		}
1148 		if (cfg->vpd.vpd_w != NULL) {
1149 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1150 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1151 			free(cfg->vpd.vpd_w, M_DEVBUF);
1152 			cfg->vpd.vpd_w = NULL;
1153 		}
1154 	}
1155 	cfg->vpd.vpd_cached = 1;
1156 #undef REG
1157 #undef WREG
1158 }
1159 
1160 int
1161 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1162 {
1163 	struct pci_devinfo *dinfo = device_get_ivars(child);
1164 	pcicfgregs *cfg = &dinfo->cfg;
1165 
1166 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1167 		pci_read_vpd(device_get_parent(dev), cfg);
1168 
1169 	*identptr = cfg->vpd.vpd_ident;
1170 
1171 	if (*identptr == NULL)
1172 		return (ENXIO);
1173 
1174 	return (0);
1175 }
1176 
1177 int
1178 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1179 	const char **vptr)
1180 {
1181 	struct pci_devinfo *dinfo = device_get_ivars(child);
1182 	pcicfgregs *cfg = &dinfo->cfg;
1183 	int i;
1184 
1185 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1186 		pci_read_vpd(device_get_parent(dev), cfg);
1187 
1188 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1189 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1190 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1191 			*vptr = cfg->vpd.vpd_ros[i].value;
1192 			return (0);
1193 		}
1194 
1195 	*vptr = NULL;
1196 	return (ENXIO);
1197 }
1198 
1199 struct pcicfg_vpd *
1200 pci_fetch_vpd_list(device_t dev)
1201 {
1202 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1203 	pcicfgregs *cfg = &dinfo->cfg;
1204 
1205 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1206 		pci_read_vpd(device_get_parent(device_get_parent(dev)), cfg);
1207 	return (&cfg->vpd);
1208 }
1209 
1210 /*
1211  * Find the requested HyperTransport capability and return the offset
1212  * in configuration space via the pointer provided.  The function
1213  * returns 0 on success and an error code otherwise.
1214  */
1215 int
1216 pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1217 {
1218 	int ptr, error;
1219 	uint16_t val;
1220 
1221 	error = pci_find_cap(child, PCIY_HT, &ptr);
1222 	if (error)
1223 		return (error);
1224 
1225 	/*
1226 	 * Traverse the capabilities list checking each HT capability
1227 	 * to see if it matches the requested HT capability.
1228 	 */
1229 	while (ptr != 0) {
1230 		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1231 		if (capability == PCIM_HTCAP_SLAVE ||
1232 		    capability == PCIM_HTCAP_HOST)
1233 			val &= 0xe000;
1234 		else
1235 			val &= PCIM_HTCMD_CAP_MASK;
1236 		if (val == capability) {
1237 			if (capreg != NULL)
1238 				*capreg = ptr;
1239 			return (0);
1240 		}
1241 
1242 		/* Skip to the next HT capability. */
1243 		while (ptr != 0) {
1244 			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1245 			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1246 			    PCIY_HT)
1247 				break;
1248 		}
1249 	}
1250 	return (ENOENT);
1251 }
1252 
1253 /*
1254  * Find the requested capability and return the offset in
1255  * configuration space via the pointer provided.  The function returns
1256  * 0 on success and an error code otherwise.
1257  */
1258 int
1259 pci_find_cap_method(device_t dev, device_t child, int capability,
1260     int *capreg)
1261 {
1262 	struct pci_devinfo *dinfo = device_get_ivars(child);
1263 	pcicfgregs *cfg = &dinfo->cfg;
1264 	u_int32_t status;
1265 	u_int8_t ptr;
1266 
1267 	/*
1268 	 * Check the CAP_LIST bit of the PCI status register first.
1269 	 */
1270 	status = pci_read_config(child, PCIR_STATUS, 2);
1271 	if (!(status & PCIM_STATUS_CAPPRESENT))
1272 		return (ENXIO);
1273 
1274 	/*
1275 	 * Determine the start pointer of the capabilities list.
1276 	 */
1277 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1278 	case PCIM_HDRTYPE_NORMAL:
1279 	case PCIM_HDRTYPE_BRIDGE:
1280 		ptr = PCIR_CAP_PTR;
1281 		break;
1282 	case PCIM_HDRTYPE_CARDBUS:
1283 		ptr = PCIR_CAP_PTR_2;
1284 		break;
1285 	default:
1286 		/* XXX: panic? */
1287 		return (ENXIO);		/* no extended capabilities support */
1288 	}
1289 	ptr = pci_read_config(child, ptr, 1);
1290 
1291 	/*
1292 	 * Traverse the capabilities list.
1293 	 */
1294 	while (ptr != 0) {
1295 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1296 			if (capreg != NULL)
1297 				*capreg = ptr;
1298 			return (0);
1299 		}
1300 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1301 	}
1302 
1303 	return (ENOENT);
1304 }
1305 
1306 /*
1307  * Find the requested extended capability and return the offset in
1308  * configuration space via the pointer provided.  The function returns
1309  * 0 on success and an error code otherwise.
1310  */
1311 int
1312 pci_find_extcap_method(device_t dev, device_t child, int capability,
1313     int *capreg)
1314 {
1315 	struct pci_devinfo *dinfo = device_get_ivars(child);
1316 	pcicfgregs *cfg = &dinfo->cfg;
1317 	uint32_t ecap;
1318 	uint16_t ptr;
1319 
1320 	/* Only supported for PCI-express devices. */
1321 	if (cfg->pcie.pcie_location == 0)
1322 		return (ENXIO);
1323 
1324 	ptr = PCIR_EXTCAP;
1325 	ecap = pci_read_config(child, ptr, 4);
1326 	if (ecap == 0xffffffff || ecap == 0)
1327 		return (ENOENT);
1328 	for (;;) {
1329 		if (PCI_EXTCAP_ID(ecap) == capability) {
1330 			if (capreg != NULL)
1331 				*capreg = ptr;
1332 			return (0);
1333 		}
1334 		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1335 		if (ptr == 0)
1336 			break;
1337 		ecap = pci_read_config(child, ptr, 4);
1338 	}
1339 
1340 	return (ENOENT);
1341 }
1342 
1343 /*
1344  * Support for MSI-X message interrupts.
1345  */
1346 void
1347 pci_enable_msix_method(device_t dev, device_t child, u_int index,
1348     uint64_t address, uint32_t data)
1349 {
1350 	struct pci_devinfo *dinfo = device_get_ivars(child);
1351 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1352 	uint32_t offset;
1353 
1354 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1355 	offset = msix->msix_table_offset + index * 16;
1356 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1357 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1358 	bus_write_4(msix->msix_table_res, offset + 8, data);
1359 
1360 	/* Enable MSI -> HT mapping. */
1361 	pci_ht_map_msi(child, address);
1362 }
1363 
1364 void
1365 pci_mask_msix(device_t dev, u_int index)
1366 {
1367 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1368 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1369 	uint32_t offset, val;
1370 
1371 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1372 	offset = msix->msix_table_offset + index * 16 + 12;
1373 	val = bus_read_4(msix->msix_table_res, offset);
1374 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1375 		val |= PCIM_MSIX_VCTRL_MASK;
1376 		bus_write_4(msix->msix_table_res, offset, val);
1377 	}
1378 }
1379 
1380 void
1381 pci_unmask_msix(device_t dev, u_int index)
1382 {
1383 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1384 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1385 	uint32_t offset, val;
1386 
1387 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1388 	offset = msix->msix_table_offset + index * 16 + 12;
1389 	val = bus_read_4(msix->msix_table_res, offset);
1390 	if (val & PCIM_MSIX_VCTRL_MASK) {
1391 		val &= ~PCIM_MSIX_VCTRL_MASK;
1392 		bus_write_4(msix->msix_table_res, offset, val);
1393 	}
1394 }
1395 
1396 int
1397 pci_pending_msix(device_t dev, u_int index)
1398 {
1399 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1400 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1401 	uint32_t offset, bit;
1402 
1403 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1404 	offset = msix->msix_pba_offset + (index / 32) * 4;
1405 	bit = 1 << index % 32;
1406 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1407 }
1408 
1409 /*
1410  * Restore MSI-X registers and table during resume.  If MSI-X is
1411  * enabled then walk the virtual table to restore the actual MSI-X
1412  * table.
1413  */
1414 static void
1415 pci_resume_msix(device_t dev)
1416 {
1417 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1418 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1419 	struct msix_table_entry *mte;
1420 	struct msix_vector *mv;
1421 	int i;
1422 
1423 	if (msix->msix_alloc > 0) {
1424 		/* First, mask all vectors. */
1425 		for (i = 0; i < msix->msix_msgnum; i++)
1426 			pci_mask_msix(dev, i);
1427 
1428 		/* Second, program any messages with at least one handler. */
1429 		for (i = 0; i < msix->msix_table_len; i++) {
1430 			mte = &msix->msix_table[i];
1431 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1432 				continue;
1433 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1434 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1435 			pci_unmask_msix(dev, i);
1436 		}
1437 	}
1438 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1439 	    msix->msix_ctrl, 2);
1440 }
1441 
1442 /*
1443  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1444  * returned in *count.  After this function returns, each message will be
1445  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1446  */
1447 int
1448 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1449 {
1450 	struct pci_devinfo *dinfo = device_get_ivars(child);
1451 	pcicfgregs *cfg = &dinfo->cfg;
1452 	struct resource_list_entry *rle;
1453 	int actual, error, i, irq, max;
1454 
1455 	/* Don't let count == 0 get us into trouble. */
1456 	if (*count == 0)
1457 		return (EINVAL);
1458 
1459 	/* If rid 0 is allocated, then fail. */
1460 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1461 	if (rle != NULL && rle->res != NULL)
1462 		return (ENXIO);
1463 
1464 	/* Already have allocated messages? */
1465 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1466 		return (ENXIO);
1467 
1468 	/* If MSI-X is blacklisted for this system, fail. */
1469 	if (pci_msix_blacklisted())
1470 		return (ENXIO);
1471 
1472 	/* MSI-X capability present? */
1473 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1474 		return (ENODEV);
1475 
1476 	/* Make sure the appropriate BARs are mapped. */
1477 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1478 	    cfg->msix.msix_table_bar);
1479 	if (rle == NULL || rle->res == NULL ||
1480 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1481 		return (ENXIO);
1482 	cfg->msix.msix_table_res = rle->res;
1483 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1484 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1485 		    cfg->msix.msix_pba_bar);
1486 		if (rle == NULL || rle->res == NULL ||
1487 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1488 			return (ENXIO);
1489 	}
1490 	cfg->msix.msix_pba_res = rle->res;
1491 
1492 	if (bootverbose)
1493 		device_printf(child,
1494 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1495 		    *count, cfg->msix.msix_msgnum);
1496 	max = min(*count, cfg->msix.msix_msgnum);
1497 	for (i = 0; i < max; i++) {
1498 		/* Allocate a message. */
1499 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1500 		if (error) {
1501 			if (i == 0)
1502 				return (error);
1503 			break;
1504 		}
1505 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1506 		    irq, 1);
1507 	}
1508 	actual = i;
1509 
1510 	if (bootverbose) {
1511 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1512 		if (actual == 1)
1513 			device_printf(child, "using IRQ %lu for MSI-X\n",
1514 			    rle->start);
1515 		else {
1516 			int run;
1517 
1518 			/*
1519 			 * Be fancy and try to print contiguous runs of
1520 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1521 			 * 'run' is true if we are in a range.
1522 			 */
1523 			device_printf(child, "using IRQs %lu", rle->start);
1524 			irq = rle->start;
1525 			run = 0;
1526 			for (i = 1; i < actual; i++) {
1527 				rle = resource_list_find(&dinfo->resources,
1528 				    SYS_RES_IRQ, i + 1);
1529 
1530 				/* Still in a run? */
1531 				if (rle->start == irq + 1) {
1532 					run = 1;
1533 					irq++;
1534 					continue;
1535 				}
1536 
1537 				/* Finish previous range. */
1538 				if (run) {
1539 					printf("-%d", irq);
1540 					run = 0;
1541 				}
1542 
1543 				/* Start new range. */
1544 				printf(",%lu", rle->start);
1545 				irq = rle->start;
1546 			}
1547 
1548 			/* Unfinished range? */
1549 			if (run)
1550 				printf("-%d", irq);
1551 			printf(" for MSI-X\n");
1552 		}
1553 	}
1554 
1555 	/* Mask all vectors. */
1556 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1557 		pci_mask_msix(child, i);
1558 
1559 	/* Allocate and initialize vector data and virtual table. */
1560 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1561 	    M_DEVBUF, M_WAITOK | M_ZERO);
1562 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1563 	    M_DEVBUF, M_WAITOK | M_ZERO);
1564 	for (i = 0; i < actual; i++) {
1565 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1566 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1567 		cfg->msix.msix_table[i].mte_vector = i + 1;
1568 	}
1569 
1570 	/* Update control register to enable MSI-X. */
1571 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1572 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1573 	    cfg->msix.msix_ctrl, 2);
1574 
1575 	/* Update counts of alloc'd messages. */
1576 	cfg->msix.msix_alloc = actual;
1577 	cfg->msix.msix_table_len = actual;
1578 	*count = actual;
1579 	return (0);
1580 }
1581 
1582 /*
1583  * By default, pci_alloc_msix() will assign the allocated IRQ
1584  * resources consecutively to the first N messages in the MSI-X table.
1585  * However, device drivers may want to use different layouts if they
1586  * either receive fewer messages than they asked for, or they wish to
1587  * populate the MSI-X table sparsely.  This method allows the driver
1588  * to specify what layout it wants.  It must be called after a
1589  * successful pci_alloc_msix() but before any of the associated
1590  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1591  *
1592  * The 'vectors' array contains 'count' message vectors.  The array
1593  * maps directly to the MSI-X table in that index 0 in the array
1594  * specifies the vector for the first message in the MSI-X table, etc.
1595  * The vector value in each array index can either be 0 to indicate
1596  * that no vector should be assigned to a message slot, or it can be a
1597  * number from 1 to N (where N is the count returned from a
1598  * succcessful call to pci_alloc_msix()) to indicate which message
1599  * vector (IRQ) to be used for the corresponding message.
1600  *
1601  * On successful return, each message with a non-zero vector will have
1602  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1603  * 1.  Additionally, if any of the IRQs allocated via the previous
1604  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1605  * will be freed back to the system automatically.
1606  *
1607  * For example, suppose a driver has a MSI-X table with 6 messages and
1608  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1609  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1610  * C.  After the call to pci_alloc_msix(), the device will be setup to
1611  * have an MSI-X table of ABC--- (where - means no vector assigned).
1612  * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1613  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1614  * be freed back to the system.  This device will also have valid
1615  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1616  *
1617  * In any case, the SYS_RES_IRQ rid X will always map to the message
1618  * at MSI-X table index X - 1 and will only be valid if a vector is
1619  * assigned to that table entry.
1620  */
1621 int
1622 pci_remap_msix_method(device_t dev, device_t child, int count,
1623     const u_int *vectors)
1624 {
1625 	struct pci_devinfo *dinfo = device_get_ivars(child);
1626 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1627 	struct resource_list_entry *rle;
1628 	int i, irq, j, *used;
1629 
1630 	/*
1631 	 * Have to have at least one message in the table but the
1632 	 * table can't be bigger than the actual MSI-X table in the
1633 	 * device.
1634 	 */
1635 	if (count == 0 || count > msix->msix_msgnum)
1636 		return (EINVAL);
1637 
1638 	/* Sanity check the vectors. */
1639 	for (i = 0; i < count; i++)
1640 		if (vectors[i] > msix->msix_alloc)
1641 			return (EINVAL);
1642 
1643 	/*
1644 	 * Make sure there aren't any holes in the vectors to be used.
1645 	 * It's a big pain to support it, and it doesn't really make
1646 	 * sense anyway.  Also, at least one vector must be used.
1647 	 */
1648 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1649 	    M_ZERO);
1650 	for (i = 0; i < count; i++)
1651 		if (vectors[i] != 0)
1652 			used[vectors[i] - 1] = 1;
1653 	for (i = 0; i < msix->msix_alloc - 1; i++)
1654 		if (used[i] == 0 && used[i + 1] == 1) {
1655 			free(used, M_DEVBUF);
1656 			return (EINVAL);
1657 		}
1658 	if (used[0] != 1) {
1659 		free(used, M_DEVBUF);
1660 		return (EINVAL);
1661 	}
1662 
1663 	/* Make sure none of the resources are allocated. */
1664 	for (i = 0; i < msix->msix_table_len; i++) {
1665 		if (msix->msix_table[i].mte_vector == 0)
1666 			continue;
1667 		if (msix->msix_table[i].mte_handlers > 0)
1668 			return (EBUSY);
1669 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1670 		KASSERT(rle != NULL, ("missing resource"));
1671 		if (rle->res != NULL)
1672 			return (EBUSY);
1673 	}
1674 
1675 	/* Free the existing resource list entries. */
1676 	for (i = 0; i < msix->msix_table_len; i++) {
1677 		if (msix->msix_table[i].mte_vector == 0)
1678 			continue;
1679 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1680 	}
1681 
1682 	/*
1683 	 * Build the new virtual table keeping track of which vectors are
1684 	 * used.
1685 	 */
1686 	free(msix->msix_table, M_DEVBUF);
1687 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1688 	    M_DEVBUF, M_WAITOK | M_ZERO);
1689 	for (i = 0; i < count; i++)
1690 		msix->msix_table[i].mte_vector = vectors[i];
1691 	msix->msix_table_len = count;
1692 
1693 	/* Free any unused IRQs and resize the vectors array if necessary. */
1694 	j = msix->msix_alloc - 1;
1695 	if (used[j] == 0) {
1696 		struct msix_vector *vec;
1697 
1698 		while (used[j] == 0) {
1699 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1700 			    msix->msix_vectors[j].mv_irq);
1701 			j--;
1702 		}
1703 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1704 		    M_WAITOK);
1705 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1706 		    (j + 1));
1707 		free(msix->msix_vectors, M_DEVBUF);
1708 		msix->msix_vectors = vec;
1709 		msix->msix_alloc = j + 1;
1710 	}
1711 	free(used, M_DEVBUF);
1712 
1713 	/* Map the IRQs onto the rids. */
1714 	for (i = 0; i < count; i++) {
1715 		if (vectors[i] == 0)
1716 			continue;
1717 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1718 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1719 		    irq, 1);
1720 	}
1721 
1722 	if (bootverbose) {
1723 		device_printf(child, "Remapped MSI-X IRQs as: ");
1724 		for (i = 0; i < count; i++) {
1725 			if (i != 0)
1726 				printf(", ");
1727 			if (vectors[i] == 0)
1728 				printf("---");
1729 			else
1730 				printf("%d",
1731 				    msix->msix_vectors[vectors[i]].mv_irq);
1732 		}
1733 		printf("\n");
1734 	}
1735 
1736 	return (0);
1737 }
1738 
1739 static int
1740 pci_release_msix(device_t dev, device_t child)
1741 {
1742 	struct pci_devinfo *dinfo = device_get_ivars(child);
1743 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1744 	struct resource_list_entry *rle;
1745 	int i;
1746 
1747 	/* Do we have any messages to release? */
1748 	if (msix->msix_alloc == 0)
1749 		return (ENODEV);
1750 
1751 	/* Make sure none of the resources are allocated. */
1752 	for (i = 0; i < msix->msix_table_len; i++) {
1753 		if (msix->msix_table[i].mte_vector == 0)
1754 			continue;
1755 		if (msix->msix_table[i].mte_handlers > 0)
1756 			return (EBUSY);
1757 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1758 		KASSERT(rle != NULL, ("missing resource"));
1759 		if (rle->res != NULL)
1760 			return (EBUSY);
1761 	}
1762 
1763 	/* Update control register to disable MSI-X. */
1764 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1765 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1766 	    msix->msix_ctrl, 2);
1767 
1768 	/* Free the resource list entries. */
1769 	for (i = 0; i < msix->msix_table_len; i++) {
1770 		if (msix->msix_table[i].mte_vector == 0)
1771 			continue;
1772 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1773 	}
1774 	free(msix->msix_table, M_DEVBUF);
1775 	msix->msix_table_len = 0;
1776 
1777 	/* Release the IRQs. */
1778 	for (i = 0; i < msix->msix_alloc; i++)
1779 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1780 		    msix->msix_vectors[i].mv_irq);
1781 	free(msix->msix_vectors, M_DEVBUF);
1782 	msix->msix_alloc = 0;
1783 	return (0);
1784 }
1785 
1786 /*
1787  * Return the max supported MSI-X messages this device supports.
1788  * Basically, assuming the MD code can alloc messages, this function
1789  * should return the maximum value that pci_alloc_msix() can return.
1790  * Thus, it is subject to the tunables, etc.
1791  */
1792 int
1793 pci_msix_count_method(device_t dev, device_t child)
1794 {
1795 	struct pci_devinfo *dinfo = device_get_ivars(child);
1796 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1797 
1798 	if (pci_do_msix && msix->msix_location != 0)
1799 		return (msix->msix_msgnum);
1800 	return (0);
1801 }
1802 
1803 /*
1804  * HyperTransport MSI mapping control
1805  */
1806 void
1807 pci_ht_map_msi(device_t dev, uint64_t addr)
1808 {
1809 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1810 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1811 
1812 	if (!ht->ht_msimap)
1813 		return;
1814 
1815 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1816 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1817 		/* Enable MSI -> HT mapping. */
1818 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1819 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1820 		    ht->ht_msictrl, 2);
1821 	}
1822 
1823 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1824 		/* Disable MSI -> HT mapping. */
1825 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1826 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1827 		    ht->ht_msictrl, 2);
1828 	}
1829 }
1830 
1831 int
1832 pci_get_max_read_req(device_t dev)
1833 {
1834 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1835 	int cap;
1836 	uint16_t val;
1837 
1838 	cap = dinfo->cfg.pcie.pcie_location;
1839 	if (cap == 0)
1840 		return (0);
1841 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1842 	val &= PCIEM_CTL_MAX_READ_REQUEST;
1843 	val >>= 12;
1844 	return (1 << (val + 7));
1845 }
1846 
1847 int
1848 pci_set_max_read_req(device_t dev, int size)
1849 {
1850 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1851 	int cap;
1852 	uint16_t val;
1853 
1854 	cap = dinfo->cfg.pcie.pcie_location;
1855 	if (cap == 0)
1856 		return (0);
1857 	if (size < 128)
1858 		size = 128;
1859 	if (size > 4096)
1860 		size = 4096;
1861 	size = (1 << (fls(size) - 1));
1862 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1863 	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
1864 	val |= (fls(size) - 8) << 12;
1865 	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
1866 	return (size);
1867 }
1868 
1869 /*
1870  * Support for MSI message signalled interrupts.
1871  */
1872 void
1873 pci_enable_msi_method(device_t dev, device_t child, uint64_t address,
1874     uint16_t data)
1875 {
1876 	struct pci_devinfo *dinfo = device_get_ivars(child);
1877 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1878 
1879 	/* Write data and address values. */
1880 	pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR,
1881 	    address & 0xffffffff, 4);
1882 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1883 		pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1884 		    address >> 32, 4);
1885 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA_64BIT,
1886 		    data, 2);
1887 	} else
1888 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA, data,
1889 		    2);
1890 
1891 	/* Enable MSI in the control register. */
1892 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1893 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1894 	    msi->msi_ctrl, 2);
1895 
1896 	/* Enable MSI -> HT mapping. */
1897 	pci_ht_map_msi(child, address);
1898 }
1899 
1900 void
1901 pci_disable_msi_method(device_t dev, device_t child)
1902 {
1903 	struct pci_devinfo *dinfo = device_get_ivars(child);
1904 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1905 
1906 	/* Disable MSI -> HT mapping. */
1907 	pci_ht_map_msi(child, 0);
1908 
1909 	/* Disable MSI in the control register. */
1910 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1911 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1912 	    msi->msi_ctrl, 2);
1913 }
1914 
1915 /*
1916  * Restore MSI registers during resume.  If MSI is enabled then
1917  * restore the data and address registers in addition to the control
1918  * register.
1919  */
1920 static void
1921 pci_resume_msi(device_t dev)
1922 {
1923 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1924 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1925 	uint64_t address;
1926 	uint16_t data;
1927 
1928 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1929 		address = msi->msi_addr;
1930 		data = msi->msi_data;
1931 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1932 		    address & 0xffffffff, 4);
1933 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1934 			pci_write_config(dev, msi->msi_location +
1935 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1936 			pci_write_config(dev, msi->msi_location +
1937 			    PCIR_MSI_DATA_64BIT, data, 2);
1938 		} else
1939 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1940 			    data, 2);
1941 	}
1942 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1943 	    2);
1944 }
1945 
1946 static int
1947 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1948 {
1949 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1950 	pcicfgregs *cfg = &dinfo->cfg;
1951 	struct resource_list_entry *rle;
1952 	struct msix_table_entry *mte;
1953 	struct msix_vector *mv;
1954 	uint64_t addr;
1955 	uint32_t data;
1956 	int error, i, j;
1957 
1958 	/*
1959 	 * Handle MSI first.  We try to find this IRQ among our list
1960 	 * of MSI IRQs.  If we find it, we request updated address and
1961 	 * data registers and apply the results.
1962 	 */
1963 	if (cfg->msi.msi_alloc > 0) {
1964 
1965 		/* If we don't have any active handlers, nothing to do. */
1966 		if (cfg->msi.msi_handlers == 0)
1967 			return (0);
1968 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1969 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1970 			    i + 1);
1971 			if (rle->start == irq) {
1972 				error = PCIB_MAP_MSI(device_get_parent(bus),
1973 				    dev, irq, &addr, &data);
1974 				if (error)
1975 					return (error);
1976 				pci_disable_msi(dev);
1977 				dinfo->cfg.msi.msi_addr = addr;
1978 				dinfo->cfg.msi.msi_data = data;
1979 				pci_enable_msi(dev, addr, data);
1980 				return (0);
1981 			}
1982 		}
1983 		return (ENOENT);
1984 	}
1985 
1986 	/*
1987 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1988 	 * we request the updated mapping info.  If that works, we go
1989 	 * through all the slots that use this IRQ and update them.
1990 	 */
1991 	if (cfg->msix.msix_alloc > 0) {
1992 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1993 			mv = &cfg->msix.msix_vectors[i];
1994 			if (mv->mv_irq == irq) {
1995 				error = PCIB_MAP_MSI(device_get_parent(bus),
1996 				    dev, irq, &addr, &data);
1997 				if (error)
1998 					return (error);
1999 				mv->mv_address = addr;
2000 				mv->mv_data = data;
2001 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
2002 					mte = &cfg->msix.msix_table[j];
2003 					if (mte->mte_vector != i + 1)
2004 						continue;
2005 					if (mte->mte_handlers == 0)
2006 						continue;
2007 					pci_mask_msix(dev, j);
2008 					pci_enable_msix(dev, j, addr, data);
2009 					pci_unmask_msix(dev, j);
2010 				}
2011 			}
2012 		}
2013 		return (ENOENT);
2014 	}
2015 
2016 	return (ENOENT);
2017 }
2018 
2019 /*
2020  * Returns true if the specified device is blacklisted because MSI
2021  * doesn't work.
2022  */
2023 int
2024 pci_msi_device_blacklisted(device_t dev)
2025 {
2026 
2027 	if (!pci_honor_msi_blacklist)
2028 		return (0);
2029 
2030 	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2031 }
2032 
2033 /*
2034  * Determine if MSI is blacklisted globally on this system.  Currently,
2035  * we just check for blacklisted chipsets as represented by the
2036  * host-PCI bridge at device 0:0:0.  In the future, it may become
2037  * necessary to check other system attributes, such as the kenv values
2038  * that give the motherboard manufacturer and model number.
2039  */
2040 static int
2041 pci_msi_blacklisted(void)
2042 {
2043 	device_t dev;
2044 
2045 	if (!pci_honor_msi_blacklist)
2046 		return (0);
2047 
2048 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2049 	if (!(pcie_chipset || pcix_chipset)) {
2050 		if (vm_guest != VM_GUEST_NO) {
2051 			/*
2052 			 * Whitelist older chipsets in virtual
2053 			 * machines known to support MSI.
2054 			 */
2055 			dev = pci_find_bsf(0, 0, 0);
2056 			if (dev != NULL)
2057 				return (!pci_has_quirk(pci_get_devid(dev),
2058 					PCI_QUIRK_ENABLE_MSI_VM));
2059 		}
2060 		return (1);
2061 	}
2062 
2063 	dev = pci_find_bsf(0, 0, 0);
2064 	if (dev != NULL)
2065 		return (pci_msi_device_blacklisted(dev));
2066 	return (0);
2067 }
2068 
2069 /*
2070  * Returns true if the specified device is blacklisted because MSI-X
2071  * doesn't work.  Note that this assumes that if MSI doesn't work,
2072  * MSI-X doesn't either.
2073  */
2074 int
2075 pci_msix_device_blacklisted(device_t dev)
2076 {
2077 
2078 	if (!pci_honor_msi_blacklist)
2079 		return (0);
2080 
2081 	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2082 		return (1);
2083 
2084 	return (pci_msi_device_blacklisted(dev));
2085 }
2086 
2087 /*
2088  * Determine if MSI-X is blacklisted globally on this system.  If MSI
2089  * is blacklisted, assume that MSI-X is as well.  Check for additional
2090  * chipsets where MSI works but MSI-X does not.
2091  */
2092 static int
2093 pci_msix_blacklisted(void)
2094 {
2095 	device_t dev;
2096 
2097 	if (!pci_honor_msi_blacklist)
2098 		return (0);
2099 
2100 	dev = pci_find_bsf(0, 0, 0);
2101 	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2102 	    PCI_QUIRK_DISABLE_MSIX))
2103 		return (1);
2104 
2105 	return (pci_msi_blacklisted());
2106 }
2107 
2108 /*
2109  * Attempt to allocate *count MSI messages.  The actual number allocated is
2110  * returned in *count.  After this function returns, each message will be
2111  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2112  */
2113 int
2114 pci_alloc_msi_method(device_t dev, device_t child, int *count)
2115 {
2116 	struct pci_devinfo *dinfo = device_get_ivars(child);
2117 	pcicfgregs *cfg = &dinfo->cfg;
2118 	struct resource_list_entry *rle;
2119 	int actual, error, i, irqs[32];
2120 	uint16_t ctrl;
2121 
2122 	/* Don't let count == 0 get us into trouble. */
2123 	if (*count == 0)
2124 		return (EINVAL);
2125 
2126 	/* If rid 0 is allocated, then fail. */
2127 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2128 	if (rle != NULL && rle->res != NULL)
2129 		return (ENXIO);
2130 
2131 	/* Already have allocated messages? */
2132 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2133 		return (ENXIO);
2134 
2135 	/* If MSI is blacklisted for this system, fail. */
2136 	if (pci_msi_blacklisted())
2137 		return (ENXIO);
2138 
2139 	/* MSI capability present? */
2140 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2141 		return (ENODEV);
2142 
2143 	if (bootverbose)
2144 		device_printf(child,
2145 		    "attempting to allocate %d MSI vectors (%d supported)\n",
2146 		    *count, cfg->msi.msi_msgnum);
2147 
2148 	/* Don't ask for more than the device supports. */
2149 	actual = min(*count, cfg->msi.msi_msgnum);
2150 
2151 	/* Don't ask for more than 32 messages. */
2152 	actual = min(actual, 32);
2153 
2154 	/* MSI requires power of 2 number of messages. */
2155 	if (!powerof2(actual))
2156 		return (EINVAL);
2157 
2158 	for (;;) {
2159 		/* Try to allocate N messages. */
2160 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2161 		    actual, irqs);
2162 		if (error == 0)
2163 			break;
2164 		if (actual == 1)
2165 			return (error);
2166 
2167 		/* Try N / 2. */
2168 		actual >>= 1;
2169 	}
2170 
2171 	/*
2172 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2173 	 * resources in the irqs[] array, so add new resources
2174 	 * starting at rid 1.
2175 	 */
2176 	for (i = 0; i < actual; i++)
2177 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2178 		    irqs[i], irqs[i], 1);
2179 
2180 	if (bootverbose) {
2181 		if (actual == 1)
2182 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2183 		else {
2184 			int run;
2185 
2186 			/*
2187 			 * Be fancy and try to print contiguous runs
2188 			 * of IRQ values as ranges.  'run' is true if
2189 			 * we are in a range.
2190 			 */
2191 			device_printf(child, "using IRQs %d", irqs[0]);
2192 			run = 0;
2193 			for (i = 1; i < actual; i++) {
2194 
2195 				/* Still in a run? */
2196 				if (irqs[i] == irqs[i - 1] + 1) {
2197 					run = 1;
2198 					continue;
2199 				}
2200 
2201 				/* Finish previous range. */
2202 				if (run) {
2203 					printf("-%d", irqs[i - 1]);
2204 					run = 0;
2205 				}
2206 
2207 				/* Start new range. */
2208 				printf(",%d", irqs[i]);
2209 			}
2210 
2211 			/* Unfinished range? */
2212 			if (run)
2213 				printf("-%d", irqs[actual - 1]);
2214 			printf(" for MSI\n");
2215 		}
2216 	}
2217 
2218 	/* Update control register with actual count. */
2219 	ctrl = cfg->msi.msi_ctrl;
2220 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2221 	ctrl |= (ffs(actual) - 1) << 4;
2222 	cfg->msi.msi_ctrl = ctrl;
2223 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2224 
2225 	/* Update counts of alloc'd messages. */
2226 	cfg->msi.msi_alloc = actual;
2227 	cfg->msi.msi_handlers = 0;
2228 	*count = actual;
2229 	return (0);
2230 }
2231 
2232 /* Release the MSI messages associated with this device. */
2233 int
2234 pci_release_msi_method(device_t dev, device_t child)
2235 {
2236 	struct pci_devinfo *dinfo = device_get_ivars(child);
2237 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2238 	struct resource_list_entry *rle;
2239 	int error, i, irqs[32];
2240 
2241 	/* Try MSI-X first. */
2242 	error = pci_release_msix(dev, child);
2243 	if (error != ENODEV)
2244 		return (error);
2245 
2246 	/* Do we have any messages to release? */
2247 	if (msi->msi_alloc == 0)
2248 		return (ENODEV);
2249 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2250 
2251 	/* Make sure none of the resources are allocated. */
2252 	if (msi->msi_handlers > 0)
2253 		return (EBUSY);
2254 	for (i = 0; i < msi->msi_alloc; i++) {
2255 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2256 		KASSERT(rle != NULL, ("missing MSI resource"));
2257 		if (rle->res != NULL)
2258 			return (EBUSY);
2259 		irqs[i] = rle->start;
2260 	}
2261 
2262 	/* Update control register with 0 count. */
2263 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2264 	    ("%s: MSI still enabled", __func__));
2265 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2266 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2267 	    msi->msi_ctrl, 2);
2268 
2269 	/* Release the messages. */
2270 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2271 	for (i = 0; i < msi->msi_alloc; i++)
2272 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2273 
2274 	/* Update alloc count. */
2275 	msi->msi_alloc = 0;
2276 	msi->msi_addr = 0;
2277 	msi->msi_data = 0;
2278 	return (0);
2279 }
2280 
2281 /*
2282  * Return the max supported MSI messages this device supports.
2283  * Basically, assuming the MD code can alloc messages, this function
2284  * should return the maximum value that pci_alloc_msi() can return.
2285  * Thus, it is subject to the tunables, etc.
2286  */
2287 int
2288 pci_msi_count_method(device_t dev, device_t child)
2289 {
2290 	struct pci_devinfo *dinfo = device_get_ivars(child);
2291 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2292 
2293 	if (pci_do_msi && msi->msi_location != 0)
2294 		return (msi->msi_msgnum);
2295 	return (0);
2296 }
2297 
2298 /* free pcicfgregs structure and all depending data structures */
2299 
2300 int
2301 pci_freecfg(struct pci_devinfo *dinfo)
2302 {
2303 	struct devlist *devlist_head;
2304 	struct pci_map *pm, *next;
2305 	int i;
2306 
2307 	devlist_head = &pci_devq;
2308 
2309 	if (dinfo->cfg.vpd.vpd_reg) {
2310 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2311 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2312 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2313 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2314 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2315 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2316 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2317 	}
2318 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2319 		free(pm, M_DEVBUF);
2320 	}
2321 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2322 	free(dinfo, M_DEVBUF);
2323 
2324 	/* increment the generation count */
2325 	pci_generation++;
2326 
2327 	/* we're losing one device */
2328 	pci_numdevs--;
2329 	return (0);
2330 }
2331 
2332 /*
2333  * PCI power manangement
2334  */
2335 int
2336 pci_set_powerstate_method(device_t dev, device_t child, int state)
2337 {
2338 	struct pci_devinfo *dinfo = device_get_ivars(child);
2339 	pcicfgregs *cfg = &dinfo->cfg;
2340 	uint16_t status;
2341 	int result, oldstate, highest, delay;
2342 
2343 	if (cfg->pp.pp_cap == 0)
2344 		return (EOPNOTSUPP);
2345 
2346 	/*
2347 	 * Optimize a no state change request away.  While it would be OK to
2348 	 * write to the hardware in theory, some devices have shown odd
2349 	 * behavior when going from D3 -> D3.
2350 	 */
2351 	oldstate = pci_get_powerstate(child);
2352 	if (oldstate == state)
2353 		return (0);
2354 
2355 	/*
2356 	 * The PCI power management specification states that after a state
2357 	 * transition between PCI power states, system software must
2358 	 * guarantee a minimal delay before the function accesses the device.
2359 	 * Compute the worst case delay that we need to guarantee before we
2360 	 * access the device.  Many devices will be responsive much more
2361 	 * quickly than this delay, but there are some that don't respond
2362 	 * instantly to state changes.  Transitions to/from D3 state require
2363 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2364 	 * is done below with DELAY rather than a sleeper function because
2365 	 * this function can be called from contexts where we cannot sleep.
2366 	 */
2367 	highest = (oldstate > state) ? oldstate : state;
2368 	if (highest == PCI_POWERSTATE_D3)
2369 	    delay = 10000;
2370 	else if (highest == PCI_POWERSTATE_D2)
2371 	    delay = 200;
2372 	else
2373 	    delay = 0;
2374 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2375 	    & ~PCIM_PSTAT_DMASK;
2376 	result = 0;
2377 	switch (state) {
2378 	case PCI_POWERSTATE_D0:
2379 		status |= PCIM_PSTAT_D0;
2380 		break;
2381 	case PCI_POWERSTATE_D1:
2382 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2383 			return (EOPNOTSUPP);
2384 		status |= PCIM_PSTAT_D1;
2385 		break;
2386 	case PCI_POWERSTATE_D2:
2387 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2388 			return (EOPNOTSUPP);
2389 		status |= PCIM_PSTAT_D2;
2390 		break;
2391 	case PCI_POWERSTATE_D3:
2392 		status |= PCIM_PSTAT_D3;
2393 		break;
2394 	default:
2395 		return (EINVAL);
2396 	}
2397 
2398 	if (bootverbose)
2399 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2400 		    state);
2401 
2402 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2403 	if (delay)
2404 		DELAY(delay);
2405 	return (0);
2406 }
2407 
2408 int
2409 pci_get_powerstate_method(device_t dev, device_t child)
2410 {
2411 	struct pci_devinfo *dinfo = device_get_ivars(child);
2412 	pcicfgregs *cfg = &dinfo->cfg;
2413 	uint16_t status;
2414 	int result;
2415 
2416 	if (cfg->pp.pp_cap != 0) {
2417 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2418 		switch (status & PCIM_PSTAT_DMASK) {
2419 		case PCIM_PSTAT_D0:
2420 			result = PCI_POWERSTATE_D0;
2421 			break;
2422 		case PCIM_PSTAT_D1:
2423 			result = PCI_POWERSTATE_D1;
2424 			break;
2425 		case PCIM_PSTAT_D2:
2426 			result = PCI_POWERSTATE_D2;
2427 			break;
2428 		case PCIM_PSTAT_D3:
2429 			result = PCI_POWERSTATE_D3;
2430 			break;
2431 		default:
2432 			result = PCI_POWERSTATE_UNKNOWN;
2433 			break;
2434 		}
2435 	} else {
2436 		/* No support, device is always at D0 */
2437 		result = PCI_POWERSTATE_D0;
2438 	}
2439 	return (result);
2440 }
2441 
2442 /*
2443  * Some convenience functions for PCI device drivers.
2444  */
2445 
2446 static __inline void
2447 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2448 {
2449 	uint16_t	command;
2450 
2451 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2452 	command |= bit;
2453 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2454 }
2455 
2456 static __inline void
2457 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2458 {
2459 	uint16_t	command;
2460 
2461 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2462 	command &= ~bit;
2463 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2464 }
2465 
2466 int
2467 pci_enable_busmaster_method(device_t dev, device_t child)
2468 {
2469 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2470 	return (0);
2471 }
2472 
2473 int
2474 pci_disable_busmaster_method(device_t dev, device_t child)
2475 {
2476 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2477 	return (0);
2478 }
2479 
2480 int
2481 pci_enable_io_method(device_t dev, device_t child, int space)
2482 {
2483 	uint16_t bit;
2484 
2485 	switch(space) {
2486 	case SYS_RES_IOPORT:
2487 		bit = PCIM_CMD_PORTEN;
2488 		break;
2489 	case SYS_RES_MEMORY:
2490 		bit = PCIM_CMD_MEMEN;
2491 		break;
2492 	default:
2493 		return (EINVAL);
2494 	}
2495 	pci_set_command_bit(dev, child, bit);
2496 	return (0);
2497 }
2498 
2499 int
2500 pci_disable_io_method(device_t dev, device_t child, int space)
2501 {
2502 	uint16_t bit;
2503 
2504 	switch(space) {
2505 	case SYS_RES_IOPORT:
2506 		bit = PCIM_CMD_PORTEN;
2507 		break;
2508 	case SYS_RES_MEMORY:
2509 		bit = PCIM_CMD_MEMEN;
2510 		break;
2511 	default:
2512 		return (EINVAL);
2513 	}
2514 	pci_clear_command_bit(dev, child, bit);
2515 	return (0);
2516 }
2517 
2518 /*
2519  * New style pci driver.  Parent device is either a pci-host-bridge or a
2520  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2521  */
2522 
2523 void
2524 pci_print_verbose(struct pci_devinfo *dinfo)
2525 {
2526 
2527 	if (bootverbose) {
2528 		pcicfgregs *cfg = &dinfo->cfg;
2529 
2530 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2531 		    cfg->vendor, cfg->device, cfg->revid);
2532 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2533 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2534 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2535 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2536 		    cfg->mfdev);
2537 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2538 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2539 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2540 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2541 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2542 		if (cfg->intpin > 0)
2543 			printf("\tintpin=%c, irq=%d\n",
2544 			    cfg->intpin +'a' -1, cfg->intline);
2545 		if (cfg->pp.pp_cap) {
2546 			uint16_t status;
2547 
2548 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2549 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2550 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2551 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2552 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2553 			    status & PCIM_PSTAT_DMASK);
2554 		}
2555 		if (cfg->msi.msi_location) {
2556 			int ctrl;
2557 
2558 			ctrl = cfg->msi.msi_ctrl;
2559 			printf("\tMSI supports %d message%s%s%s\n",
2560 			    cfg->msi.msi_msgnum,
2561 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2562 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2563 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2564 		}
2565 		if (cfg->msix.msix_location) {
2566 			printf("\tMSI-X supports %d message%s ",
2567 			    cfg->msix.msix_msgnum,
2568 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2569 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2570 				printf("in map 0x%x\n",
2571 				    cfg->msix.msix_table_bar);
2572 			else
2573 				printf("in maps 0x%x and 0x%x\n",
2574 				    cfg->msix.msix_table_bar,
2575 				    cfg->msix.msix_pba_bar);
2576 		}
2577 	}
2578 }
2579 
2580 static int
2581 pci_porten(device_t dev)
2582 {
2583 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2584 }
2585 
2586 static int
2587 pci_memen(device_t dev)
2588 {
2589 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2590 }
2591 
2592 static void
2593 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2594 {
2595 	struct pci_devinfo *dinfo;
2596 	pci_addr_t map, testval;
2597 	int ln2range;
2598 	uint16_t cmd;
2599 
2600 	/*
2601 	 * The device ROM BAR is special.  It is always a 32-bit
2602 	 * memory BAR.  Bit 0 is special and should not be set when
2603 	 * sizing the BAR.
2604 	 */
2605 	dinfo = device_get_ivars(dev);
2606 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2607 		map = pci_read_config(dev, reg, 4);
2608 		pci_write_config(dev, reg, 0xfffffffe, 4);
2609 		testval = pci_read_config(dev, reg, 4);
2610 		pci_write_config(dev, reg, map, 4);
2611 		*mapp = map;
2612 		*testvalp = testval;
2613 		return;
2614 	}
2615 
2616 	map = pci_read_config(dev, reg, 4);
2617 	ln2range = pci_maprange(map);
2618 	if (ln2range == 64)
2619 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2620 
2621 	/*
2622 	 * Disable decoding via the command register before
2623 	 * determining the BAR's length since we will be placing it in
2624 	 * a weird state.
2625 	 */
2626 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2627 	pci_write_config(dev, PCIR_COMMAND,
2628 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2629 
2630 	/*
2631 	 * Determine the BAR's length by writing all 1's.  The bottom
2632 	 * log_2(size) bits of the BAR will stick as 0 when we read
2633 	 * the value back.
2634 	 */
2635 	pci_write_config(dev, reg, 0xffffffff, 4);
2636 	testval = pci_read_config(dev, reg, 4);
2637 	if (ln2range == 64) {
2638 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2639 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2640 	}
2641 
2642 	/*
2643 	 * Restore the original value of the BAR.  We may have reprogrammed
2644 	 * the BAR of the low-level console device and when booting verbose,
2645 	 * we need the console device addressable.
2646 	 */
2647 	pci_write_config(dev, reg, map, 4);
2648 	if (ln2range == 64)
2649 		pci_write_config(dev, reg + 4, map >> 32, 4);
2650 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2651 
2652 	*mapp = map;
2653 	*testvalp = testval;
2654 }
2655 
2656 static void
2657 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2658 {
2659 	struct pci_devinfo *dinfo;
2660 	int ln2range;
2661 
2662 	/* The device ROM BAR is always a 32-bit memory BAR. */
2663 	dinfo = device_get_ivars(dev);
2664 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2665 		ln2range = 32;
2666 	else
2667 		ln2range = pci_maprange(pm->pm_value);
2668 	pci_write_config(dev, pm->pm_reg, base, 4);
2669 	if (ln2range == 64)
2670 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2671 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2672 	if (ln2range == 64)
2673 		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2674 		    pm->pm_reg + 4, 4) << 32;
2675 }
2676 
2677 struct pci_map *
2678 pci_find_bar(device_t dev, int reg)
2679 {
2680 	struct pci_devinfo *dinfo;
2681 	struct pci_map *pm;
2682 
2683 	dinfo = device_get_ivars(dev);
2684 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2685 		if (pm->pm_reg == reg)
2686 			return (pm);
2687 	}
2688 	return (NULL);
2689 }
2690 
2691 int
2692 pci_bar_enabled(device_t dev, struct pci_map *pm)
2693 {
2694 	struct pci_devinfo *dinfo;
2695 	uint16_t cmd;
2696 
2697 	dinfo = device_get_ivars(dev);
2698 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2699 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2700 		return (0);
2701 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2702 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2703 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2704 	else
2705 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2706 }
2707 
2708 static struct pci_map *
2709 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2710 {
2711 	struct pci_devinfo *dinfo;
2712 	struct pci_map *pm, *prev;
2713 
2714 	dinfo = device_get_ivars(dev);
2715 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2716 	pm->pm_reg = reg;
2717 	pm->pm_value = value;
2718 	pm->pm_size = size;
2719 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2720 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2721 		    reg));
2722 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2723 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2724 			break;
2725 	}
2726 	if (prev != NULL)
2727 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2728 	else
2729 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2730 	return (pm);
2731 }
2732 
2733 static void
2734 pci_restore_bars(device_t dev)
2735 {
2736 	struct pci_devinfo *dinfo;
2737 	struct pci_map *pm;
2738 	int ln2range;
2739 
2740 	dinfo = device_get_ivars(dev);
2741 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2742 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2743 			ln2range = 32;
2744 		else
2745 			ln2range = pci_maprange(pm->pm_value);
2746 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2747 		if (ln2range == 64)
2748 			pci_write_config(dev, pm->pm_reg + 4,
2749 			    pm->pm_value >> 32, 4);
2750 	}
2751 }
2752 
2753 /*
2754  * Add a resource based on a pci map register. Return 1 if the map
2755  * register is a 32bit map register or 2 if it is a 64bit register.
2756  */
2757 static int
2758 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2759     int force, int prefetch)
2760 {
2761 	struct pci_map *pm;
2762 	pci_addr_t base, map, testval;
2763 	pci_addr_t start, end, count;
2764 	int barlen, basezero, flags, maprange, mapsize, type;
2765 	uint16_t cmd;
2766 	struct resource *res;
2767 
2768 	/*
2769 	 * The BAR may already exist if the device is a CardBus card
2770 	 * whose CIS is stored in this BAR.
2771 	 */
2772 	pm = pci_find_bar(dev, reg);
2773 	if (pm != NULL) {
2774 		maprange = pci_maprange(pm->pm_value);
2775 		barlen = maprange == 64 ? 2 : 1;
2776 		return (barlen);
2777 	}
2778 
2779 	pci_read_bar(dev, reg, &map, &testval);
2780 	if (PCI_BAR_MEM(map)) {
2781 		type = SYS_RES_MEMORY;
2782 		if (map & PCIM_BAR_MEM_PREFETCH)
2783 			prefetch = 1;
2784 	} else
2785 		type = SYS_RES_IOPORT;
2786 	mapsize = pci_mapsize(testval);
2787 	base = pci_mapbase(map);
2788 #ifdef __PCI_BAR_ZERO_VALID
2789 	basezero = 0;
2790 #else
2791 	basezero = base == 0;
2792 #endif
2793 	maprange = pci_maprange(map);
2794 	barlen = maprange == 64 ? 2 : 1;
2795 
2796 	/*
2797 	 * For I/O registers, if bottom bit is set, and the next bit up
2798 	 * isn't clear, we know we have a BAR that doesn't conform to the
2799 	 * spec, so ignore it.  Also, sanity check the size of the data
2800 	 * areas to the type of memory involved.  Memory must be at least
2801 	 * 16 bytes in size, while I/O ranges must be at least 4.
2802 	 */
2803 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2804 		return (barlen);
2805 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2806 	    (type == SYS_RES_IOPORT && mapsize < 2))
2807 		return (barlen);
2808 
2809 	/* Save a record of this BAR. */
2810 	pm = pci_add_bar(dev, reg, map, mapsize);
2811 	if (bootverbose) {
2812 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2813 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2814 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2815 			printf(", port disabled\n");
2816 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2817 			printf(", memory disabled\n");
2818 		else
2819 			printf(", enabled\n");
2820 	}
2821 
2822 	/*
2823 	 * If base is 0, then we have problems if this architecture does
2824 	 * not allow that.  It is best to ignore such entries for the
2825 	 * moment.  These will be allocated later if the driver specifically
2826 	 * requests them.  However, some removable busses look better when
2827 	 * all resources are allocated, so allow '0' to be overriden.
2828 	 *
2829 	 * Similarly treat maps whose values is the same as the test value
2830 	 * read back.  These maps have had all f's written to them by the
2831 	 * BIOS in an attempt to disable the resources.
2832 	 */
2833 	if (!force && (basezero || map == testval))
2834 		return (barlen);
2835 	if ((u_long)base != base) {
2836 		device_printf(bus,
2837 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2838 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2839 		    pci_get_function(dev), reg);
2840 		return (barlen);
2841 	}
2842 
2843 	/*
2844 	 * This code theoretically does the right thing, but has
2845 	 * undesirable side effects in some cases where peripherals
2846 	 * respond oddly to having these bits enabled.  Let the user
2847 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2848 	 * default).
2849 	 */
2850 	if (pci_enable_io_modes) {
2851 		/* Turn on resources that have been left off by a lazy BIOS */
2852 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2853 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2854 			cmd |= PCIM_CMD_PORTEN;
2855 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2856 		}
2857 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2858 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2859 			cmd |= PCIM_CMD_MEMEN;
2860 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2861 		}
2862 	} else {
2863 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2864 			return (barlen);
2865 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2866 			return (barlen);
2867 	}
2868 
2869 	count = (pci_addr_t)1 << mapsize;
2870 	flags = RF_ALIGNMENT_LOG2(mapsize);
2871 	if (prefetch)
2872 		flags |= RF_PREFETCHABLE;
2873 	if (basezero || base == pci_mapbase(testval) || pci_clear_bars) {
2874 		start = 0;	/* Let the parent decide. */
2875 		end = ~0ul;
2876 	} else {
2877 		start = base;
2878 		end = base + count - 1;
2879 	}
2880 	resource_list_add(rl, type, reg, start, end, count);
2881 
2882 	/*
2883 	 * Try to allocate the resource for this BAR from our parent
2884 	 * so that this resource range is already reserved.  The
2885 	 * driver for this device will later inherit this resource in
2886 	 * pci_alloc_resource().
2887 	 */
2888 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2889 	    flags);
2890 	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0ul)) {
2891 		/*
2892 		 * If the allocation fails, try to allocate a resource for
2893 		 * this BAR using any available range.  The firmware felt
2894 		 * it was important enough to assign a resource, so don't
2895 		 * disable decoding if we can help it.
2896 		 */
2897 		resource_list_delete(rl, type, reg);
2898 		resource_list_add(rl, type, reg, 0, ~0ul, count);
2899 		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0ul,
2900 		    count, flags);
2901 	}
2902 	if (res == NULL) {
2903 		/*
2904 		 * If the allocation fails, delete the resource list entry
2905 		 * and disable decoding for this device.
2906 		 *
2907 		 * If the driver requests this resource in the future,
2908 		 * pci_reserve_map() will try to allocate a fresh
2909 		 * resource range.
2910 		 */
2911 		resource_list_delete(rl, type, reg);
2912 		pci_disable_io(dev, type);
2913 		if (bootverbose)
2914 			device_printf(bus,
2915 			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
2916 			    pci_get_domain(dev), pci_get_bus(dev),
2917 			    pci_get_slot(dev), pci_get_function(dev), reg);
2918 	} else {
2919 		start = rman_get_start(res);
2920 		pci_write_bar(dev, pm, start);
2921 	}
2922 	return (barlen);
2923 }
2924 
2925 /*
2926  * For ATA devices we need to decide early what addressing mode to use.
2927  * Legacy demands that the primary and secondary ATA ports sits on the
2928  * same addresses that old ISA hardware did. This dictates that we use
2929  * those addresses and ignore the BAR's if we cannot set PCI native
2930  * addressing mode.
2931  */
2932 static void
2933 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2934     uint32_t prefetchmask)
2935 {
2936 	struct resource *r;
2937 	int rid, type, progif;
2938 #if 0
2939 	/* if this device supports PCI native addressing use it */
2940 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2941 	if ((progif & 0x8a) == 0x8a) {
2942 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2943 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2944 			printf("Trying ATA native PCI addressing mode\n");
2945 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2946 		}
2947 	}
2948 #endif
2949 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2950 	type = SYS_RES_IOPORT;
2951 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2952 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2953 		    prefetchmask & (1 << 0));
2954 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2955 		    prefetchmask & (1 << 1));
2956 	} else {
2957 		rid = PCIR_BAR(0);
2958 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2959 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2960 		    0x1f7, 8, 0);
2961 		rid = PCIR_BAR(1);
2962 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2963 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2964 		    0x3f6, 1, 0);
2965 	}
2966 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2967 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2968 		    prefetchmask & (1 << 2));
2969 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2970 		    prefetchmask & (1 << 3));
2971 	} else {
2972 		rid = PCIR_BAR(2);
2973 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2974 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2975 		    0x177, 8, 0);
2976 		rid = PCIR_BAR(3);
2977 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2978 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2979 		    0x376, 1, 0);
2980 	}
2981 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2982 	    prefetchmask & (1 << 4));
2983 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2984 	    prefetchmask & (1 << 5));
2985 }
2986 
2987 static void
2988 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2989 {
2990 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2991 	pcicfgregs *cfg = &dinfo->cfg;
2992 	char tunable_name[64];
2993 	int irq;
2994 
2995 	/* Has to have an intpin to have an interrupt. */
2996 	if (cfg->intpin == 0)
2997 		return;
2998 
2999 	/* Let the user override the IRQ with a tunable. */
3000 	irq = PCI_INVALID_IRQ;
3001 	snprintf(tunable_name, sizeof(tunable_name),
3002 	    "hw.pci%d.%d.%d.INT%c.irq",
3003 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
3004 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
3005 		irq = PCI_INVALID_IRQ;
3006 
3007 	/*
3008 	 * If we didn't get an IRQ via the tunable, then we either use the
3009 	 * IRQ value in the intline register or we ask the bus to route an
3010 	 * interrupt for us.  If force_route is true, then we only use the
3011 	 * value in the intline register if the bus was unable to assign an
3012 	 * IRQ.
3013 	 */
3014 	if (!PCI_INTERRUPT_VALID(irq)) {
3015 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
3016 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
3017 		if (!PCI_INTERRUPT_VALID(irq))
3018 			irq = cfg->intline;
3019 	}
3020 
3021 	/* If after all that we don't have an IRQ, just bail. */
3022 	if (!PCI_INTERRUPT_VALID(irq))
3023 		return;
3024 
3025 	/* Update the config register if it changed. */
3026 	if (irq != cfg->intline) {
3027 		cfg->intline = irq;
3028 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
3029 	}
3030 
3031 	/* Add this IRQ as rid 0 interrupt resource. */
3032 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3033 }
3034 
3035 /* Perform early OHCI takeover from SMM. */
3036 static void
3037 ohci_early_takeover(device_t self)
3038 {
3039 	struct resource *res;
3040 	uint32_t ctl;
3041 	int rid;
3042 	int i;
3043 
3044 	rid = PCIR_BAR(0);
3045 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3046 	if (res == NULL)
3047 		return;
3048 
3049 	ctl = bus_read_4(res, OHCI_CONTROL);
3050 	if (ctl & OHCI_IR) {
3051 		if (bootverbose)
3052 			printf("ohci early: "
3053 			    "SMM active, request owner change\n");
3054 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3055 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3056 			DELAY(1000);
3057 			ctl = bus_read_4(res, OHCI_CONTROL);
3058 		}
3059 		if (ctl & OHCI_IR) {
3060 			if (bootverbose)
3061 				printf("ohci early: "
3062 				    "SMM does not respond, resetting\n");
3063 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3064 		}
3065 		/* Disable interrupts */
3066 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3067 	}
3068 
3069 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3070 }
3071 
3072 /* Perform early UHCI takeover from SMM. */
3073 static void
3074 uhci_early_takeover(device_t self)
3075 {
3076 	struct resource *res;
3077 	int rid;
3078 
3079 	/*
3080 	 * Set the PIRQD enable bit and switch off all the others. We don't
3081 	 * want legacy support to interfere with us XXX Does this also mean
3082 	 * that the BIOS won't touch the keyboard anymore if it is connected
3083 	 * to the ports of the root hub?
3084 	 */
3085 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3086 
3087 	/* Disable interrupts */
3088 	rid = PCI_UHCI_BASE_REG;
3089 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3090 	if (res != NULL) {
3091 		bus_write_2(res, UHCI_INTR, 0);
3092 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3093 	}
3094 }
3095 
3096 /* Perform early EHCI takeover from SMM. */
3097 static void
3098 ehci_early_takeover(device_t self)
3099 {
3100 	struct resource *res;
3101 	uint32_t cparams;
3102 	uint32_t eec;
3103 	uint8_t eecp;
3104 	uint8_t bios_sem;
3105 	uint8_t offs;
3106 	int rid;
3107 	int i;
3108 
3109 	rid = PCIR_BAR(0);
3110 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3111 	if (res == NULL)
3112 		return;
3113 
3114 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3115 
3116 	/* Synchronise with the BIOS if it owns the controller. */
3117 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3118 	    eecp = EHCI_EECP_NEXT(eec)) {
3119 		eec = pci_read_config(self, eecp, 4);
3120 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3121 			continue;
3122 		}
3123 		bios_sem = pci_read_config(self, eecp +
3124 		    EHCI_LEGSUP_BIOS_SEM, 1);
3125 		if (bios_sem == 0) {
3126 			continue;
3127 		}
3128 		if (bootverbose)
3129 			printf("ehci early: "
3130 			    "SMM active, request owner change\n");
3131 
3132 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3133 
3134 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3135 			DELAY(1000);
3136 			bios_sem = pci_read_config(self, eecp +
3137 			    EHCI_LEGSUP_BIOS_SEM, 1);
3138 		}
3139 
3140 		if (bios_sem != 0) {
3141 			if (bootverbose)
3142 				printf("ehci early: "
3143 				    "SMM does not respond\n");
3144 		}
3145 		/* Disable interrupts */
3146 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3147 		bus_write_4(res, offs + EHCI_USBINTR, 0);
3148 	}
3149 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3150 }
3151 
3152 /* Perform early XHCI takeover from SMM. */
3153 static void
3154 xhci_early_takeover(device_t self)
3155 {
3156 	struct resource *res;
3157 	uint32_t cparams;
3158 	uint32_t eec;
3159 	uint8_t eecp;
3160 	uint8_t bios_sem;
3161 	uint8_t offs;
3162 	int rid;
3163 	int i;
3164 
3165 	rid = PCIR_BAR(0);
3166 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3167 	if (res == NULL)
3168 		return;
3169 
3170 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3171 
3172 	eec = -1;
3173 
3174 	/* Synchronise with the BIOS if it owns the controller. */
3175 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3176 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3177 		eec = bus_read_4(res, eecp);
3178 
3179 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3180 			continue;
3181 
3182 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3183 		if (bios_sem == 0)
3184 			continue;
3185 
3186 		if (bootverbose)
3187 			printf("xhci early: "
3188 			    "SMM active, request owner change\n");
3189 
3190 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3191 
3192 		/* wait a maximum of 5 second */
3193 
3194 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3195 			DELAY(1000);
3196 			bios_sem = bus_read_1(res, eecp +
3197 			    XHCI_XECP_BIOS_SEM);
3198 		}
3199 
3200 		if (bios_sem != 0) {
3201 			if (bootverbose)
3202 				printf("xhci early: "
3203 				    "SMM does not respond\n");
3204 		}
3205 
3206 		/* Disable interrupts */
3207 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3208 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3209 		bus_read_4(res, offs + XHCI_USBSTS);
3210 	}
3211 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3212 }
3213 
3214 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3215 static void
3216 pci_reserve_secbus(device_t bus, device_t dev, pcicfgregs *cfg,
3217     struct resource_list *rl)
3218 {
3219 	struct resource *res;
3220 	char *cp;
3221 	u_long start, end, count;
3222 	int rid, sec_bus, sec_reg, sub_bus, sub_reg, sup_bus;
3223 
3224 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3225 	case PCIM_HDRTYPE_BRIDGE:
3226 		sec_reg = PCIR_SECBUS_1;
3227 		sub_reg = PCIR_SUBBUS_1;
3228 		break;
3229 	case PCIM_HDRTYPE_CARDBUS:
3230 		sec_reg = PCIR_SECBUS_2;
3231 		sub_reg = PCIR_SUBBUS_2;
3232 		break;
3233 	default:
3234 		return;
3235 	}
3236 
3237 	/*
3238 	 * If the existing bus range is valid, attempt to reserve it
3239 	 * from our parent.  If this fails for any reason, clear the
3240 	 * secbus and subbus registers.
3241 	 *
3242 	 * XXX: Should we reset sub_bus to sec_bus if it is < sec_bus?
3243 	 * This would at least preserve the existing sec_bus if it is
3244 	 * valid.
3245 	 */
3246 	sec_bus = PCI_READ_CONFIG(bus, dev, sec_reg, 1);
3247 	sub_bus = PCI_READ_CONFIG(bus, dev, sub_reg, 1);
3248 
3249 	/* Quirk handling. */
3250 	switch (pci_get_devid(dev)) {
3251 	case 0x12258086:		/* Intel 82454KX/GX (Orion) */
3252 		sup_bus = pci_read_config(dev, 0x41, 1);
3253 		if (sup_bus != 0xff) {
3254 			sec_bus = sup_bus + 1;
3255 			sub_bus = sup_bus + 1;
3256 			PCI_WRITE_CONFIG(bus, dev, sec_reg, sec_bus, 1);
3257 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3258 		}
3259 		break;
3260 
3261 	case 0x00dd10de:
3262 		/* Compaq R3000 BIOS sets wrong subordinate bus number. */
3263 		if ((cp = getenv("smbios.planar.maker")) == NULL)
3264 			break;
3265 		if (strncmp(cp, "Compal", 6) != 0) {
3266 			freeenv(cp);
3267 			break;
3268 		}
3269 		freeenv(cp);
3270 		if ((cp = getenv("smbios.planar.product")) == NULL)
3271 			break;
3272 		if (strncmp(cp, "08A0", 4) != 0) {
3273 			freeenv(cp);
3274 			break;
3275 		}
3276 		freeenv(cp);
3277 		if (sub_bus < 0xa) {
3278 			sub_bus = 0xa;
3279 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3280 		}
3281 		break;
3282 	}
3283 
3284 	if (bootverbose)
3285 		printf("\tsecbus=%d, subbus=%d\n", sec_bus, sub_bus);
3286 	if (sec_bus > 0 && sub_bus >= sec_bus) {
3287 		start = sec_bus;
3288 		end = sub_bus;
3289 		count = end - start + 1;
3290 
3291 		resource_list_add(rl, PCI_RES_BUS, 0, 0ul, ~0ul, count);
3292 
3293 		/*
3294 		 * If requested, clear secondary bus registers in
3295 		 * bridge devices to force a complete renumbering
3296 		 * rather than reserving the existing range.  However,
3297 		 * preserve the existing size.
3298 		 */
3299 		if (pci_clear_buses)
3300 			goto clear;
3301 
3302 		rid = 0;
3303 		res = resource_list_reserve(rl, bus, dev, PCI_RES_BUS, &rid,
3304 		    start, end, count, 0);
3305 		if (res != NULL)
3306 			return;
3307 
3308 		if (bootverbose)
3309 			device_printf(bus,
3310 			    "pci%d:%d:%d:%d secbus failed to allocate\n",
3311 			    pci_get_domain(dev), pci_get_bus(dev),
3312 			    pci_get_slot(dev), pci_get_function(dev));
3313 	}
3314 
3315 clear:
3316 	PCI_WRITE_CONFIG(bus, dev, sec_reg, 0, 1);
3317 	PCI_WRITE_CONFIG(bus, dev, sub_reg, 0, 1);
3318 }
3319 
3320 static struct resource *
3321 pci_alloc_secbus(device_t dev, device_t child, int *rid, u_long start,
3322     u_long end, u_long count, u_int flags)
3323 {
3324 	struct pci_devinfo *dinfo;
3325 	pcicfgregs *cfg;
3326 	struct resource_list *rl;
3327 	struct resource *res;
3328 	int sec_reg, sub_reg;
3329 
3330 	dinfo = device_get_ivars(child);
3331 	cfg = &dinfo->cfg;
3332 	rl = &dinfo->resources;
3333 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3334 	case PCIM_HDRTYPE_BRIDGE:
3335 		sec_reg = PCIR_SECBUS_1;
3336 		sub_reg = PCIR_SUBBUS_1;
3337 		break;
3338 	case PCIM_HDRTYPE_CARDBUS:
3339 		sec_reg = PCIR_SECBUS_2;
3340 		sub_reg = PCIR_SUBBUS_2;
3341 		break;
3342 	default:
3343 		return (NULL);
3344 	}
3345 
3346 	if (*rid != 0)
3347 		return (NULL);
3348 
3349 	if (resource_list_find(rl, PCI_RES_BUS, *rid) == NULL)
3350 		resource_list_add(rl, PCI_RES_BUS, *rid, start, end, count);
3351 	if (!resource_list_reserved(rl, PCI_RES_BUS, *rid)) {
3352 		res = resource_list_reserve(rl, dev, child, PCI_RES_BUS, rid,
3353 		    start, end, count, flags & ~RF_ACTIVE);
3354 		if (res == NULL) {
3355 			resource_list_delete(rl, PCI_RES_BUS, *rid);
3356 			device_printf(child, "allocating %lu bus%s failed\n",
3357 			    count, count == 1 ? "" : "es");
3358 			return (NULL);
3359 		}
3360 		if (bootverbose)
3361 			device_printf(child,
3362 			    "Lazy allocation of %lu bus%s at %lu\n", count,
3363 			    count == 1 ? "" : "es", rman_get_start(res));
3364 		PCI_WRITE_CONFIG(dev, child, sec_reg, rman_get_start(res), 1);
3365 		PCI_WRITE_CONFIG(dev, child, sub_reg, rman_get_end(res), 1);
3366 	}
3367 	return (resource_list_alloc(rl, dev, child, PCI_RES_BUS, rid, start,
3368 	    end, count, flags));
3369 }
3370 #endif
3371 
3372 void
3373 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3374 {
3375 	struct pci_devinfo *dinfo;
3376 	pcicfgregs *cfg;
3377 	struct resource_list *rl;
3378 	const struct pci_quirk *q;
3379 	uint32_t devid;
3380 	int i;
3381 
3382 	dinfo = device_get_ivars(dev);
3383 	cfg = &dinfo->cfg;
3384 	rl = &dinfo->resources;
3385 	devid = (cfg->device << 16) | cfg->vendor;
3386 
3387 	/* ATA devices needs special map treatment */
3388 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3389 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3390 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3391 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3392 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3393 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3394 	else
3395 		for (i = 0; i < cfg->nummaps;) {
3396 			/*
3397 			 * Skip quirked resources.
3398 			 */
3399 			for (q = &pci_quirks[0]; q->devid != 0; q++)
3400 				if (q->devid == devid &&
3401 				    q->type == PCI_QUIRK_UNMAP_REG &&
3402 				    q->arg1 == PCIR_BAR(i))
3403 					break;
3404 			if (q->devid != 0) {
3405 				i++;
3406 				continue;
3407 			}
3408 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3409 			    prefetchmask & (1 << i));
3410 		}
3411 
3412 	/*
3413 	 * Add additional, quirked resources.
3414 	 */
3415 	for (q = &pci_quirks[0]; q->devid != 0; q++)
3416 		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3417 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3418 
3419 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3420 #ifdef __PCI_REROUTE_INTERRUPT
3421 		/*
3422 		 * Try to re-route interrupts. Sometimes the BIOS or
3423 		 * firmware may leave bogus values in these registers.
3424 		 * If the re-route fails, then just stick with what we
3425 		 * have.
3426 		 */
3427 		pci_assign_interrupt(bus, dev, 1);
3428 #else
3429 		pci_assign_interrupt(bus, dev, 0);
3430 #endif
3431 	}
3432 
3433 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3434 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3435 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3436 			xhci_early_takeover(dev);
3437 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3438 			ehci_early_takeover(dev);
3439 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3440 			ohci_early_takeover(dev);
3441 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3442 			uhci_early_takeover(dev);
3443 	}
3444 
3445 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3446 	/*
3447 	 * Reserve resources for secondary bus ranges behind bridge
3448 	 * devices.
3449 	 */
3450 	pci_reserve_secbus(bus, dev, cfg, rl);
3451 #endif
3452 }
3453 
3454 static struct pci_devinfo *
3455 pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
3456     int slot, int func, size_t dinfo_size)
3457 {
3458 	struct pci_devinfo *dinfo;
3459 
3460 	dinfo = pci_read_device(pcib, domain, busno, slot, func, dinfo_size);
3461 	if (dinfo != NULL)
3462 		pci_add_child(dev, dinfo);
3463 
3464 	return (dinfo);
3465 }
3466 
3467 void
3468 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3469 {
3470 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3471 	device_t pcib = device_get_parent(dev);
3472 	struct pci_devinfo *dinfo;
3473 	int maxslots;
3474 	int s, f, pcifunchigh;
3475 	uint8_t hdrtype;
3476 	int first_func;
3477 
3478 	/*
3479 	 * Try to detect a device at slot 0, function 0.  If it exists, try to
3480 	 * enable ARI.  We must enable ARI before detecting the rest of the
3481 	 * functions on this bus as ARI changes the set of slots and functions
3482 	 * that are legal on this bus.
3483 	 */
3484 	dinfo = pci_identify_function(pcib, dev, domain, busno, 0, 0,
3485 	    dinfo_size);
3486 	if (dinfo != NULL && pci_enable_ari)
3487 		PCIB_TRY_ENABLE_ARI(pcib, dinfo->cfg.dev);
3488 
3489 	/*
3490 	 * Start looking for new devices on slot 0 at function 1 because we
3491 	 * just identified the device at slot 0, function 0.
3492 	 */
3493 	first_func = 1;
3494 
3495 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3496 	    ("dinfo_size too small"));
3497 	maxslots = PCIB_MAXSLOTS(pcib);
3498 	for (s = 0; s <= maxslots; s++, first_func = 0) {
3499 		pcifunchigh = 0;
3500 		f = 0;
3501 		DELAY(1);
3502 		hdrtype = REG(PCIR_HDRTYPE, 1);
3503 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3504 			continue;
3505 		if (hdrtype & PCIM_MFDEV)
3506 			pcifunchigh = PCIB_MAXFUNCS(pcib);
3507 		for (f = first_func; f <= pcifunchigh; f++)
3508 			pci_identify_function(pcib, dev, domain, busno, s, f,
3509 			    dinfo_size);
3510 	}
3511 #undef REG
3512 }
3513 
3514 void
3515 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3516 {
3517 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3518 	device_set_ivars(dinfo->cfg.dev, dinfo);
3519 	resource_list_init(&dinfo->resources);
3520 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3521 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3522 	pci_print_verbose(dinfo);
3523 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3524 	pci_child_added(dinfo->cfg.dev);
3525 }
3526 
3527 void
3528 pci_child_added_method(device_t dev, device_t child)
3529 {
3530 
3531 }
3532 
3533 static int
3534 pci_probe(device_t dev)
3535 {
3536 
3537 	device_set_desc(dev, "PCI bus");
3538 
3539 	/* Allow other subclasses to override this driver. */
3540 	return (BUS_PROBE_GENERIC);
3541 }
3542 
3543 int
3544 pci_attach_common(device_t dev)
3545 {
3546 	struct pci_softc *sc;
3547 	int busno, domain;
3548 #ifdef PCI_DMA_BOUNDARY
3549 	int error, tag_valid;
3550 #endif
3551 #ifdef PCI_RES_BUS
3552 	int rid;
3553 #endif
3554 
3555 	sc = device_get_softc(dev);
3556 	domain = pcib_get_domain(dev);
3557 	busno = pcib_get_bus(dev);
3558 #ifdef PCI_RES_BUS
3559 	rid = 0;
3560 	sc->sc_bus = bus_alloc_resource(dev, PCI_RES_BUS, &rid, busno, busno,
3561 	    1, 0);
3562 	if (sc->sc_bus == NULL) {
3563 		device_printf(dev, "failed to allocate bus number\n");
3564 		return (ENXIO);
3565 	}
3566 #endif
3567 	if (bootverbose)
3568 		device_printf(dev, "domain=%d, physical bus=%d\n",
3569 		    domain, busno);
3570 #ifdef PCI_DMA_BOUNDARY
3571 	tag_valid = 0;
3572 	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
3573 	    devclass_find("pci")) {
3574 		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
3575 		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3576 		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
3577 		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
3578 		if (error)
3579 			device_printf(dev, "Failed to create DMA tag: %d\n",
3580 			    error);
3581 		else
3582 			tag_valid = 1;
3583 	}
3584 	if (!tag_valid)
3585 #endif
3586 		sc->sc_dma_tag = bus_get_dma_tag(dev);
3587 	return (0);
3588 }
3589 
3590 static int
3591 pci_attach(device_t dev)
3592 {
3593 	int busno, domain, error;
3594 
3595 	error = pci_attach_common(dev);
3596 	if (error)
3597 		return (error);
3598 
3599 	/*
3600 	 * Since there can be multiple independantly numbered PCI
3601 	 * busses on systems with multiple PCI domains, we can't use
3602 	 * the unit number to decide which bus we are probing. We ask
3603 	 * the parent pcib what our domain and bus numbers are.
3604 	 */
3605 	domain = pcib_get_domain(dev);
3606 	busno = pcib_get_bus(dev);
3607 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3608 	return (bus_generic_attach(dev));
3609 }
3610 
3611 #ifdef PCI_RES_BUS
3612 static int
3613 pci_detach(device_t dev)
3614 {
3615 	struct pci_softc *sc;
3616 	int error;
3617 
3618 	error = bus_generic_detach(dev);
3619 	if (error)
3620 		return (error);
3621 	sc = device_get_softc(dev);
3622 	return (bus_release_resource(dev, PCI_RES_BUS, 0, sc->sc_bus));
3623 }
3624 #endif
3625 
3626 static void
3627 pci_set_power_child(device_t dev, device_t child, int state)
3628 {
3629 	struct pci_devinfo *dinfo;
3630 	device_t pcib;
3631 	int dstate;
3632 
3633 	/*
3634 	 * Set the device to the given state.  If the firmware suggests
3635 	 * a different power state, use it instead.  If power management
3636 	 * is not present, the firmware is responsible for managing
3637 	 * device power.  Skip children who aren't attached since they
3638 	 * are handled separately.
3639 	 */
3640 	pcib = device_get_parent(dev);
3641 	dinfo = device_get_ivars(child);
3642 	dstate = state;
3643 	if (device_is_attached(child) &&
3644 	    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3645 		pci_set_powerstate(child, dstate);
3646 }
3647 
3648 int
3649 pci_suspend_child(device_t dev, device_t child)
3650 {
3651 	struct pci_devinfo *dinfo;
3652 	int error;
3653 
3654 	dinfo = device_get_ivars(child);
3655 
3656 	/*
3657 	 * Save the PCI configuration space for the child and set the
3658 	 * device in the appropriate power state for this sleep state.
3659 	 */
3660 	pci_cfg_save(child, dinfo, 0);
3661 
3662 	/* Suspend devices before potentially powering them down. */
3663 	error = bus_generic_suspend_child(dev, child);
3664 
3665 	if (error)
3666 		return (error);
3667 
3668 	if (pci_do_power_suspend)
3669 		pci_set_power_child(dev, child, PCI_POWERSTATE_D3);
3670 
3671 	return (0);
3672 }
3673 
3674 int
3675 pci_resume_child(device_t dev, device_t child)
3676 {
3677 	struct pci_devinfo *dinfo;
3678 
3679 	if (pci_do_power_resume)
3680 		pci_set_power_child(dev, child, PCI_POWERSTATE_D0);
3681 
3682 	dinfo = device_get_ivars(child);
3683 	pci_cfg_restore(child, dinfo);
3684 	if (!device_is_attached(child))
3685 		pci_cfg_save(child, dinfo, 1);
3686 
3687 	bus_generic_resume_child(dev, child);
3688 
3689 	return (0);
3690 }
3691 
3692 int
3693 pci_resume(device_t dev)
3694 {
3695 	device_t child, *devlist;
3696 	int error, i, numdevs;
3697 
3698 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3699 		return (error);
3700 
3701 	/*
3702 	 * Resume critical devices first, then everything else later.
3703 	 */
3704 	for (i = 0; i < numdevs; i++) {
3705 		child = devlist[i];
3706 		switch (pci_get_class(child)) {
3707 		case PCIC_DISPLAY:
3708 		case PCIC_MEMORY:
3709 		case PCIC_BRIDGE:
3710 		case PCIC_BASEPERIPH:
3711 			BUS_RESUME_CHILD(dev, child);
3712 			break;
3713 		}
3714 	}
3715 	for (i = 0; i < numdevs; i++) {
3716 		child = devlist[i];
3717 		switch (pci_get_class(child)) {
3718 		case PCIC_DISPLAY:
3719 		case PCIC_MEMORY:
3720 		case PCIC_BRIDGE:
3721 		case PCIC_BASEPERIPH:
3722 			break;
3723 		default:
3724 			BUS_RESUME_CHILD(dev, child);
3725 		}
3726 	}
3727 	free(devlist, M_TEMP);
3728 	return (0);
3729 }
3730 
3731 static void
3732 pci_load_vendor_data(void)
3733 {
3734 	caddr_t data;
3735 	void *ptr;
3736 	size_t sz;
3737 
3738 	data = preload_search_by_type("pci_vendor_data");
3739 	if (data != NULL) {
3740 		ptr = preload_fetch_addr(data);
3741 		sz = preload_fetch_size(data);
3742 		if (ptr != NULL && sz != 0) {
3743 			pci_vendordata = ptr;
3744 			pci_vendordata_size = sz;
3745 			/* terminate the database */
3746 			pci_vendordata[pci_vendordata_size] = '\n';
3747 		}
3748 	}
3749 }
3750 
3751 void
3752 pci_driver_added(device_t dev, driver_t *driver)
3753 {
3754 	int numdevs;
3755 	device_t *devlist;
3756 	device_t child;
3757 	struct pci_devinfo *dinfo;
3758 	int i;
3759 
3760 	if (bootverbose)
3761 		device_printf(dev, "driver added\n");
3762 	DEVICE_IDENTIFY(driver, dev);
3763 	if (device_get_children(dev, &devlist, &numdevs) != 0)
3764 		return;
3765 	for (i = 0; i < numdevs; i++) {
3766 		child = devlist[i];
3767 		if (device_get_state(child) != DS_NOTPRESENT)
3768 			continue;
3769 		dinfo = device_get_ivars(child);
3770 		pci_print_verbose(dinfo);
3771 		if (bootverbose)
3772 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3773 		pci_cfg_restore(child, dinfo);
3774 		if (device_probe_and_attach(child) != 0)
3775 			pci_child_detached(dev, child);
3776 	}
3777 	free(devlist, M_TEMP);
3778 }
3779 
3780 int
3781 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3782     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3783 {
3784 	struct pci_devinfo *dinfo;
3785 	struct msix_table_entry *mte;
3786 	struct msix_vector *mv;
3787 	uint64_t addr;
3788 	uint32_t data;
3789 	void *cookie;
3790 	int error, rid;
3791 
3792 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3793 	    arg, &cookie);
3794 	if (error)
3795 		return (error);
3796 
3797 	/* If this is not a direct child, just bail out. */
3798 	if (device_get_parent(child) != dev) {
3799 		*cookiep = cookie;
3800 		return(0);
3801 	}
3802 
3803 	rid = rman_get_rid(irq);
3804 	if (rid == 0) {
3805 		/* Make sure that INTx is enabled */
3806 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3807 	} else {
3808 		/*
3809 		 * Check to see if the interrupt is MSI or MSI-X.
3810 		 * Ask our parent to map the MSI and give
3811 		 * us the address and data register values.
3812 		 * If we fail for some reason, teardown the
3813 		 * interrupt handler.
3814 		 */
3815 		dinfo = device_get_ivars(child);
3816 		if (dinfo->cfg.msi.msi_alloc > 0) {
3817 			if (dinfo->cfg.msi.msi_addr == 0) {
3818 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3819 			    ("MSI has handlers, but vectors not mapped"));
3820 				error = PCIB_MAP_MSI(device_get_parent(dev),
3821 				    child, rman_get_start(irq), &addr, &data);
3822 				if (error)
3823 					goto bad;
3824 				dinfo->cfg.msi.msi_addr = addr;
3825 				dinfo->cfg.msi.msi_data = data;
3826 			}
3827 			if (dinfo->cfg.msi.msi_handlers == 0)
3828 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3829 				    dinfo->cfg.msi.msi_data);
3830 			dinfo->cfg.msi.msi_handlers++;
3831 		} else {
3832 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3833 			    ("No MSI or MSI-X interrupts allocated"));
3834 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3835 			    ("MSI-X index too high"));
3836 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3837 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3838 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3839 			KASSERT(mv->mv_irq == rman_get_start(irq),
3840 			    ("IRQ mismatch"));
3841 			if (mv->mv_address == 0) {
3842 				KASSERT(mte->mte_handlers == 0,
3843 		    ("MSI-X table entry has handlers, but vector not mapped"));
3844 				error = PCIB_MAP_MSI(device_get_parent(dev),
3845 				    child, rman_get_start(irq), &addr, &data);
3846 				if (error)
3847 					goto bad;
3848 				mv->mv_address = addr;
3849 				mv->mv_data = data;
3850 			}
3851 			if (mte->mte_handlers == 0) {
3852 				pci_enable_msix(child, rid - 1, mv->mv_address,
3853 				    mv->mv_data);
3854 				pci_unmask_msix(child, rid - 1);
3855 			}
3856 			mte->mte_handlers++;
3857 		}
3858 
3859 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3860 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3861 	bad:
3862 		if (error) {
3863 			(void)bus_generic_teardown_intr(dev, child, irq,
3864 			    cookie);
3865 			return (error);
3866 		}
3867 	}
3868 	*cookiep = cookie;
3869 	return (0);
3870 }
3871 
3872 int
3873 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3874     void *cookie)
3875 {
3876 	struct msix_table_entry *mte;
3877 	struct resource_list_entry *rle;
3878 	struct pci_devinfo *dinfo;
3879 	int error, rid;
3880 
3881 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3882 		return (EINVAL);
3883 
3884 	/* If this isn't a direct child, just bail out */
3885 	if (device_get_parent(child) != dev)
3886 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3887 
3888 	rid = rman_get_rid(irq);
3889 	if (rid == 0) {
3890 		/* Mask INTx */
3891 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3892 	} else {
3893 		/*
3894 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3895 		 * decrement the appropriate handlers count and mask the
3896 		 * MSI-X message, or disable MSI messages if the count
3897 		 * drops to 0.
3898 		 */
3899 		dinfo = device_get_ivars(child);
3900 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3901 		if (rle->res != irq)
3902 			return (EINVAL);
3903 		if (dinfo->cfg.msi.msi_alloc > 0) {
3904 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3905 			    ("MSI-X index too high"));
3906 			if (dinfo->cfg.msi.msi_handlers == 0)
3907 				return (EINVAL);
3908 			dinfo->cfg.msi.msi_handlers--;
3909 			if (dinfo->cfg.msi.msi_handlers == 0)
3910 				pci_disable_msi(child);
3911 		} else {
3912 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3913 			    ("No MSI or MSI-X interrupts allocated"));
3914 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3915 			    ("MSI-X index too high"));
3916 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3917 			if (mte->mte_handlers == 0)
3918 				return (EINVAL);
3919 			mte->mte_handlers--;
3920 			if (mte->mte_handlers == 0)
3921 				pci_mask_msix(child, rid - 1);
3922 		}
3923 	}
3924 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3925 	if (rid > 0)
3926 		KASSERT(error == 0,
3927 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3928 	return (error);
3929 }
3930 
3931 int
3932 pci_print_child(device_t dev, device_t child)
3933 {
3934 	struct pci_devinfo *dinfo;
3935 	struct resource_list *rl;
3936 	int retval = 0;
3937 
3938 	dinfo = device_get_ivars(child);
3939 	rl = &dinfo->resources;
3940 
3941 	retval += bus_print_child_header(dev, child);
3942 
3943 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3944 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3945 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3946 	if (device_get_flags(dev))
3947 		retval += printf(" flags %#x", device_get_flags(dev));
3948 
3949 	retval += printf(" at device %d.%d", pci_get_slot(child),
3950 	    pci_get_function(child));
3951 
3952 	retval += bus_print_child_footer(dev, child);
3953 
3954 	return (retval);
3955 }
3956 
3957 static const struct
3958 {
3959 	int		class;
3960 	int		subclass;
3961 	int		report; /* 0 = bootverbose, 1 = always */
3962 	const char	*desc;
3963 } pci_nomatch_tab[] = {
3964 	{PCIC_OLD,		-1,			1, "old"},
3965 	{PCIC_OLD,		PCIS_OLD_NONVGA,	1, "non-VGA display device"},
3966 	{PCIC_OLD,		PCIS_OLD_VGA,		1, "VGA-compatible display device"},
3967 	{PCIC_STORAGE,		-1,			1, "mass storage"},
3968 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	1, "SCSI"},
3969 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	1, "ATA"},
3970 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	1, "floppy disk"},
3971 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	1, "IPI"},
3972 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	1, "RAID"},
3973 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	1, "ATA (ADMA)"},
3974 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	1, "SATA"},
3975 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	1, "SAS"},
3976 	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	1, "NVM"},
3977 	{PCIC_NETWORK,		-1,			1, "network"},
3978 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	1, "ethernet"},
3979 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	1, "token ring"},
3980 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	1, "fddi"},
3981 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	1, "ATM"},
3982 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	1, "ISDN"},
3983 	{PCIC_DISPLAY,		-1,			1, "display"},
3984 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	1, "VGA"},
3985 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	1, "XGA"},
3986 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	1, "3D"},
3987 	{PCIC_MULTIMEDIA,	-1,			1, "multimedia"},
3988 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	1, "video"},
3989 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	1, "audio"},
3990 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	1, "telephony"},
3991 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	1, "HDA"},
3992 	{PCIC_MEMORY,		-1,			1, "memory"},
3993 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	1, "RAM"},
3994 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	1, "flash"},
3995 	{PCIC_BRIDGE,		-1,			1, "bridge"},
3996 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	1, "HOST-PCI"},
3997 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	1, "PCI-ISA"},
3998 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	1, "PCI-EISA"},
3999 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	1, "PCI-MCA"},
4000 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	1, "PCI-PCI"},
4001 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	1, "PCI-PCMCIA"},
4002 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	1, "PCI-NuBus"},
4003 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	1, "PCI-CardBus"},
4004 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	1, "PCI-RACEway"},
4005 	{PCIC_SIMPLECOMM,	-1,			1, "simple comms"},
4006 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	1, "UART"},	/* could detect 16550 */
4007 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	1, "parallel port"},
4008 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	1, "multiport serial"},
4009 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	1, "generic modem"},
4010 	{PCIC_BASEPERIPH,	-1,			0, "base peripheral"},
4011 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	1, "interrupt controller"},
4012 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	1, "DMA controller"},
4013 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	1, "timer"},
4014 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	1, "realtime clock"},
4015 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	1, "PCI hot-plug controller"},
4016 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	1, "SD host controller"},
4017 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_IOMMU,	1, "IOMMU"},
4018 	{PCIC_INPUTDEV,		-1,			1, "input device"},
4019 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	1, "keyboard"},
4020 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,1, "digitizer"},
4021 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	1, "mouse"},
4022 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	1, "scanner"},
4023 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	1, "gameport"},
4024 	{PCIC_DOCKING,		-1,			1, "docking station"},
4025 	{PCIC_PROCESSOR,	-1,			1, "processor"},
4026 	{PCIC_SERIALBUS,	-1,			1, "serial bus"},
4027 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	1, "FireWire"},
4028 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	1, "AccessBus"},
4029 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	1, "SSA"},
4030 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	1, "USB"},
4031 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	1, "Fibre Channel"},
4032 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	0, "SMBus"},
4033 	{PCIC_WIRELESS,		-1,			1, "wireless controller"},
4034 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	1, "iRDA"},
4035 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	1, "IR"},
4036 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	1, "RF"},
4037 	{PCIC_INTELLIIO,	-1,			1, "intelligent I/O controller"},
4038 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	1, "I2O"},
4039 	{PCIC_SATCOM,		-1,			1, "satellite communication"},
4040 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		1, "sat TV"},
4041 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	1, "sat audio"},
4042 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	1, "sat voice"},
4043 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	1, "sat data"},
4044 	{PCIC_CRYPTO,		-1,			1, "encrypt/decrypt"},
4045 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	1, "network/computer crypto"},
4046 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	1, "entertainment crypto"},
4047 	{PCIC_DASP,		-1,			0, "dasp"},
4048 	{PCIC_DASP,		PCIS_DASP_DPIO,		1, "DPIO module"},
4049 	{0, 0, 0,		NULL}
4050 };
4051 
4052 void
4053 pci_probe_nomatch(device_t dev, device_t child)
4054 {
4055 	int i, report;
4056 	const char *cp, *scp;
4057 	char *device;
4058 
4059 	/*
4060 	 * Look for a listing for this device in a loaded device database.
4061 	 */
4062 	report = 1;
4063 	if ((device = pci_describe_device(child)) != NULL) {
4064 		device_printf(dev, "<%s>", device);
4065 		free(device, M_DEVBUF);
4066 	} else {
4067 		/*
4068 		 * Scan the class/subclass descriptions for a general
4069 		 * description.
4070 		 */
4071 		cp = "unknown";
4072 		scp = NULL;
4073 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
4074 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
4075 				if (pci_nomatch_tab[i].subclass == -1) {
4076 					cp = pci_nomatch_tab[i].desc;
4077 					report = pci_nomatch_tab[i].report;
4078 				} else if (pci_nomatch_tab[i].subclass ==
4079 				    pci_get_subclass(child)) {
4080 					scp = pci_nomatch_tab[i].desc;
4081 					report = pci_nomatch_tab[i].report;
4082 				}
4083 			}
4084 		}
4085 		if (report || bootverbose) {
4086 			device_printf(dev, "<%s%s%s>",
4087 			    cp ? cp : "",
4088 			    ((cp != NULL) && (scp != NULL)) ? ", " : "",
4089 			    scp ? scp : "");
4090 		}
4091 	}
4092 	if (report || bootverbose) {
4093 		printf(" at device %d.%d (no driver attached)\n",
4094 		    pci_get_slot(child), pci_get_function(child));
4095 	}
4096 	pci_cfg_save(child, device_get_ivars(child), 1);
4097 }
4098 
4099 void
4100 pci_child_detached(device_t dev, device_t child)
4101 {
4102 	struct pci_devinfo *dinfo;
4103 	struct resource_list *rl;
4104 
4105 	dinfo = device_get_ivars(child);
4106 	rl = &dinfo->resources;
4107 
4108 	/*
4109 	 * Have to deallocate IRQs before releasing any MSI messages and
4110 	 * have to release MSI messages before deallocating any memory
4111 	 * BARs.
4112 	 */
4113 	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
4114 		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
4115 	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
4116 		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
4117 		(void)pci_release_msi(child);
4118 	}
4119 	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
4120 		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
4121 	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
4122 		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
4123 #ifdef PCI_RES_BUS
4124 	if (resource_list_release_active(rl, dev, child, PCI_RES_BUS) != 0)
4125 		pci_printf(&dinfo->cfg, "Device leaked PCI bus numbers\n");
4126 #endif
4127 
4128 	pci_cfg_save(child, dinfo, 1);
4129 }
4130 
4131 /*
4132  * Parse the PCI device database, if loaded, and return a pointer to a
4133  * description of the device.
4134  *
4135  * The database is flat text formatted as follows:
4136  *
4137  * Any line not in a valid format is ignored.
4138  * Lines are terminated with newline '\n' characters.
4139  *
4140  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
4141  * the vendor name.
4142  *
4143  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
4144  * - devices cannot be listed without a corresponding VENDOR line.
4145  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
4146  * another TAB, then the device name.
4147  */
4148 
4149 /*
4150  * Assuming (ptr) points to the beginning of a line in the database,
4151  * return the vendor or device and description of the next entry.
4152  * The value of (vendor) or (device) inappropriate for the entry type
4153  * is set to -1.  Returns nonzero at the end of the database.
4154  *
4155  * Note that this is slightly unrobust in the face of corrupt data;
4156  * we attempt to safeguard against this by spamming the end of the
4157  * database with a newline when we initialise.
4158  */
4159 static int
4160 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
4161 {
4162 	char	*cp = *ptr;
4163 	int	left;
4164 
4165 	*device = -1;
4166 	*vendor = -1;
4167 	**desc = '\0';
4168 	for (;;) {
4169 		left = pci_vendordata_size - (cp - pci_vendordata);
4170 		if (left <= 0) {
4171 			*ptr = cp;
4172 			return(1);
4173 		}
4174 
4175 		/* vendor entry? */
4176 		if (*cp != '\t' &&
4177 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
4178 			break;
4179 		/* device entry? */
4180 		if (*cp == '\t' &&
4181 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
4182 			break;
4183 
4184 		/* skip to next line */
4185 		while (*cp != '\n' && left > 0) {
4186 			cp++;
4187 			left--;
4188 		}
4189 		if (*cp == '\n') {
4190 			cp++;
4191 			left--;
4192 		}
4193 	}
4194 	/* skip to next line */
4195 	while (*cp != '\n' && left > 0) {
4196 		cp++;
4197 		left--;
4198 	}
4199 	if (*cp == '\n' && left > 0)
4200 		cp++;
4201 	*ptr = cp;
4202 	return(0);
4203 }
4204 
4205 static char *
4206 pci_describe_device(device_t dev)
4207 {
4208 	int	vendor, device;
4209 	char	*desc, *vp, *dp, *line;
4210 
4211 	desc = vp = dp = NULL;
4212 
4213 	/*
4214 	 * If we have no vendor data, we can't do anything.
4215 	 */
4216 	if (pci_vendordata == NULL)
4217 		goto out;
4218 
4219 	/*
4220 	 * Scan the vendor data looking for this device
4221 	 */
4222 	line = pci_vendordata;
4223 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4224 		goto out;
4225 	for (;;) {
4226 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
4227 			goto out;
4228 		if (vendor == pci_get_vendor(dev))
4229 			break;
4230 	}
4231 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4232 		goto out;
4233 	for (;;) {
4234 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
4235 			*dp = 0;
4236 			break;
4237 		}
4238 		if (vendor != -1) {
4239 			*dp = 0;
4240 			break;
4241 		}
4242 		if (device == pci_get_device(dev))
4243 			break;
4244 	}
4245 	if (dp[0] == '\0')
4246 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
4247 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
4248 	    NULL)
4249 		sprintf(desc, "%s, %s", vp, dp);
4250 out:
4251 	if (vp != NULL)
4252 		free(vp, M_DEVBUF);
4253 	if (dp != NULL)
4254 		free(dp, M_DEVBUF);
4255 	return(desc);
4256 }
4257 
4258 int
4259 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
4260 {
4261 	struct pci_devinfo *dinfo;
4262 	pcicfgregs *cfg;
4263 
4264 	dinfo = device_get_ivars(child);
4265 	cfg = &dinfo->cfg;
4266 
4267 	switch (which) {
4268 	case PCI_IVAR_ETHADDR:
4269 		/*
4270 		 * The generic accessor doesn't deal with failure, so
4271 		 * we set the return value, then return an error.
4272 		 */
4273 		*((uint8_t **) result) = NULL;
4274 		return (EINVAL);
4275 	case PCI_IVAR_SUBVENDOR:
4276 		*result = cfg->subvendor;
4277 		break;
4278 	case PCI_IVAR_SUBDEVICE:
4279 		*result = cfg->subdevice;
4280 		break;
4281 	case PCI_IVAR_VENDOR:
4282 		*result = cfg->vendor;
4283 		break;
4284 	case PCI_IVAR_DEVICE:
4285 		*result = cfg->device;
4286 		break;
4287 	case PCI_IVAR_DEVID:
4288 		*result = (cfg->device << 16) | cfg->vendor;
4289 		break;
4290 	case PCI_IVAR_CLASS:
4291 		*result = cfg->baseclass;
4292 		break;
4293 	case PCI_IVAR_SUBCLASS:
4294 		*result = cfg->subclass;
4295 		break;
4296 	case PCI_IVAR_PROGIF:
4297 		*result = cfg->progif;
4298 		break;
4299 	case PCI_IVAR_REVID:
4300 		*result = cfg->revid;
4301 		break;
4302 	case PCI_IVAR_INTPIN:
4303 		*result = cfg->intpin;
4304 		break;
4305 	case PCI_IVAR_IRQ:
4306 		*result = cfg->intline;
4307 		break;
4308 	case PCI_IVAR_DOMAIN:
4309 		*result = cfg->domain;
4310 		break;
4311 	case PCI_IVAR_BUS:
4312 		*result = cfg->bus;
4313 		break;
4314 	case PCI_IVAR_SLOT:
4315 		*result = cfg->slot;
4316 		break;
4317 	case PCI_IVAR_FUNCTION:
4318 		*result = cfg->func;
4319 		break;
4320 	case PCI_IVAR_CMDREG:
4321 		*result = cfg->cmdreg;
4322 		break;
4323 	case PCI_IVAR_CACHELNSZ:
4324 		*result = cfg->cachelnsz;
4325 		break;
4326 	case PCI_IVAR_MINGNT:
4327 		*result = cfg->mingnt;
4328 		break;
4329 	case PCI_IVAR_MAXLAT:
4330 		*result = cfg->maxlat;
4331 		break;
4332 	case PCI_IVAR_LATTIMER:
4333 		*result = cfg->lattimer;
4334 		break;
4335 	default:
4336 		return (ENOENT);
4337 	}
4338 	return (0);
4339 }
4340 
4341 int
4342 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4343 {
4344 	struct pci_devinfo *dinfo;
4345 
4346 	dinfo = device_get_ivars(child);
4347 
4348 	switch (which) {
4349 	case PCI_IVAR_INTPIN:
4350 		dinfo->cfg.intpin = value;
4351 		return (0);
4352 	case PCI_IVAR_ETHADDR:
4353 	case PCI_IVAR_SUBVENDOR:
4354 	case PCI_IVAR_SUBDEVICE:
4355 	case PCI_IVAR_VENDOR:
4356 	case PCI_IVAR_DEVICE:
4357 	case PCI_IVAR_DEVID:
4358 	case PCI_IVAR_CLASS:
4359 	case PCI_IVAR_SUBCLASS:
4360 	case PCI_IVAR_PROGIF:
4361 	case PCI_IVAR_REVID:
4362 	case PCI_IVAR_IRQ:
4363 	case PCI_IVAR_DOMAIN:
4364 	case PCI_IVAR_BUS:
4365 	case PCI_IVAR_SLOT:
4366 	case PCI_IVAR_FUNCTION:
4367 		return (EINVAL);	/* disallow for now */
4368 
4369 	default:
4370 		return (ENOENT);
4371 	}
4372 }
4373 
4374 #include "opt_ddb.h"
4375 #ifdef DDB
4376 #include <ddb/ddb.h>
4377 #include <sys/cons.h>
4378 
4379 /*
4380  * List resources based on pci map registers, used for within ddb
4381  */
4382 
4383 DB_SHOW_COMMAND(pciregs, db_pci_dump)
4384 {
4385 	struct pci_devinfo *dinfo;
4386 	struct devlist *devlist_head;
4387 	struct pci_conf *p;
4388 	const char *name;
4389 	int i, error, none_count;
4390 
4391 	none_count = 0;
4392 	/* get the head of the device queue */
4393 	devlist_head = &pci_devq;
4394 
4395 	/*
4396 	 * Go through the list of devices and print out devices
4397 	 */
4398 	for (error = 0, i = 0,
4399 	     dinfo = STAILQ_FIRST(devlist_head);
4400 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4401 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4402 
4403 		/* Populate pd_name and pd_unit */
4404 		name = NULL;
4405 		if (dinfo->cfg.dev)
4406 			name = device_get_name(dinfo->cfg.dev);
4407 
4408 		p = &dinfo->conf;
4409 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4410 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4411 			(name && *name) ? name : "none",
4412 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4413 			none_count++,
4414 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4415 			p->pc_sel.pc_func, (p->pc_class << 16) |
4416 			(p->pc_subclass << 8) | p->pc_progif,
4417 			(p->pc_subdevice << 16) | p->pc_subvendor,
4418 			(p->pc_device << 16) | p->pc_vendor,
4419 			p->pc_revid, p->pc_hdr);
4420 	}
4421 }
4422 #endif /* DDB */
4423 
4424 static struct resource *
4425 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4426     u_long start, u_long end, u_long count, u_int flags)
4427 {
4428 	struct pci_devinfo *dinfo = device_get_ivars(child);
4429 	struct resource_list *rl = &dinfo->resources;
4430 	struct resource *res;
4431 	struct pci_map *pm;
4432 	pci_addr_t map, testval;
4433 	int mapsize;
4434 
4435 	res = NULL;
4436 	pm = pci_find_bar(child, *rid);
4437 	if (pm != NULL) {
4438 		/* This is a BAR that we failed to allocate earlier. */
4439 		mapsize = pm->pm_size;
4440 		map = pm->pm_value;
4441 	} else {
4442 		/*
4443 		 * Weed out the bogons, and figure out how large the
4444 		 * BAR/map is.  BARs that read back 0 here are bogus
4445 		 * and unimplemented.  Note: atapci in legacy mode are
4446 		 * special and handled elsewhere in the code.  If you
4447 		 * have a atapci device in legacy mode and it fails
4448 		 * here, that other code is broken.
4449 		 */
4450 		pci_read_bar(child, *rid, &map, &testval);
4451 
4452 		/*
4453 		 * Determine the size of the BAR and ignore BARs with a size
4454 		 * of 0.  Device ROM BARs use a different mask value.
4455 		 */
4456 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
4457 			mapsize = pci_romsize(testval);
4458 		else
4459 			mapsize = pci_mapsize(testval);
4460 		if (mapsize == 0)
4461 			goto out;
4462 		pm = pci_add_bar(child, *rid, map, mapsize);
4463 	}
4464 
4465 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
4466 		if (type != SYS_RES_MEMORY) {
4467 			if (bootverbose)
4468 				device_printf(dev,
4469 				    "child %s requested type %d for rid %#x,"
4470 				    " but the BAR says it is an memio\n",
4471 				    device_get_nameunit(child), type, *rid);
4472 			goto out;
4473 		}
4474 	} else {
4475 		if (type != SYS_RES_IOPORT) {
4476 			if (bootverbose)
4477 				device_printf(dev,
4478 				    "child %s requested type %d for rid %#x,"
4479 				    " but the BAR says it is an ioport\n",
4480 				    device_get_nameunit(child), type, *rid);
4481 			goto out;
4482 		}
4483 	}
4484 
4485 	/*
4486 	 * For real BARs, we need to override the size that
4487 	 * the driver requests, because that's what the BAR
4488 	 * actually uses and we would otherwise have a
4489 	 * situation where we might allocate the excess to
4490 	 * another driver, which won't work.
4491 	 */
4492 	count = (pci_addr_t)1 << mapsize;
4493 	if (RF_ALIGNMENT(flags) < mapsize)
4494 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
4495 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
4496 		flags |= RF_PREFETCHABLE;
4497 
4498 	/*
4499 	 * Allocate enough resource, and then write back the
4500 	 * appropriate BAR for that resource.
4501 	 */
4502 	resource_list_add(rl, type, *rid, start, end, count);
4503 	res = resource_list_reserve(rl, dev, child, type, rid, start, end,
4504 	    count, flags & ~RF_ACTIVE);
4505 	if (res == NULL) {
4506 		resource_list_delete(rl, type, *rid);
4507 		device_printf(child,
4508 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4509 		    count, *rid, type, start, end);
4510 		goto out;
4511 	}
4512 	if (bootverbose)
4513 		device_printf(child,
4514 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4515 		    count, *rid, type, rman_get_start(res));
4516 	map = rman_get_start(res);
4517 	pci_write_bar(child, pm, map);
4518 out:
4519 	return (res);
4520 }
4521 
4522 struct resource *
4523 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4524 		   u_long start, u_long end, u_long count, u_int flags)
4525 {
4526 	struct pci_devinfo *dinfo;
4527 	struct resource_list *rl;
4528 	struct resource_list_entry *rle;
4529 	struct resource *res;
4530 	pcicfgregs *cfg;
4531 
4532 	if (device_get_parent(child) != dev)
4533 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4534 		    type, rid, start, end, count, flags));
4535 
4536 	/*
4537 	 * Perform lazy resource allocation
4538 	 */
4539 	dinfo = device_get_ivars(child);
4540 	rl = &dinfo->resources;
4541 	cfg = &dinfo->cfg;
4542 	switch (type) {
4543 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
4544 	case PCI_RES_BUS:
4545 		return (pci_alloc_secbus(dev, child, rid, start, end, count,
4546 		    flags));
4547 #endif
4548 	case SYS_RES_IRQ:
4549 		/*
4550 		 * Can't alloc legacy interrupt once MSI messages have
4551 		 * been allocated.
4552 		 */
4553 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4554 		    cfg->msix.msix_alloc > 0))
4555 			return (NULL);
4556 
4557 		/*
4558 		 * If the child device doesn't have an interrupt
4559 		 * routed and is deserving of an interrupt, try to
4560 		 * assign it one.
4561 		 */
4562 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4563 		    (cfg->intpin != 0))
4564 			pci_assign_interrupt(dev, child, 0);
4565 		break;
4566 	case SYS_RES_IOPORT:
4567 	case SYS_RES_MEMORY:
4568 #ifdef NEW_PCIB
4569 		/*
4570 		 * PCI-PCI bridge I/O window resources are not BARs.
4571 		 * For those allocations just pass the request up the
4572 		 * tree.
4573 		 */
4574 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4575 			switch (*rid) {
4576 			case PCIR_IOBASEL_1:
4577 			case PCIR_MEMBASE_1:
4578 			case PCIR_PMBASEL_1:
4579 				/*
4580 				 * XXX: Should we bother creating a resource
4581 				 * list entry?
4582 				 */
4583 				return (bus_generic_alloc_resource(dev, child,
4584 				    type, rid, start, end, count, flags));
4585 			}
4586 		}
4587 #endif
4588 		/* Reserve resources for this BAR if needed. */
4589 		rle = resource_list_find(rl, type, *rid);
4590 		if (rle == NULL) {
4591 			res = pci_reserve_map(dev, child, type, rid, start, end,
4592 			    count, flags);
4593 			if (res == NULL)
4594 				return (NULL);
4595 		}
4596 	}
4597 	return (resource_list_alloc(rl, dev, child, type, rid,
4598 	    start, end, count, flags));
4599 }
4600 
4601 int
4602 pci_release_resource(device_t dev, device_t child, int type, int rid,
4603     struct resource *r)
4604 {
4605 	struct pci_devinfo *dinfo;
4606 	struct resource_list *rl;
4607 	pcicfgregs *cfg;
4608 
4609 	if (device_get_parent(child) != dev)
4610 		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
4611 		    type, rid, r));
4612 
4613 	dinfo = device_get_ivars(child);
4614 	cfg = &dinfo->cfg;
4615 #ifdef NEW_PCIB
4616 	/*
4617 	 * PCI-PCI bridge I/O window resources are not BARs.  For
4618 	 * those allocations just pass the request up the tree.
4619 	 */
4620 	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
4621 	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
4622 		switch (rid) {
4623 		case PCIR_IOBASEL_1:
4624 		case PCIR_MEMBASE_1:
4625 		case PCIR_PMBASEL_1:
4626 			return (bus_generic_release_resource(dev, child, type,
4627 			    rid, r));
4628 		}
4629 	}
4630 #endif
4631 
4632 	rl = &dinfo->resources;
4633 	return (resource_list_release(rl, dev, child, type, rid, r));
4634 }
4635 
4636 int
4637 pci_activate_resource(device_t dev, device_t child, int type, int rid,
4638     struct resource *r)
4639 {
4640 	struct pci_devinfo *dinfo;
4641 	int error;
4642 
4643 	error = bus_generic_activate_resource(dev, child, type, rid, r);
4644 	if (error)
4645 		return (error);
4646 
4647 	/* Enable decoding in the command register when activating BARs. */
4648 	if (device_get_parent(child) == dev) {
4649 		/* Device ROMs need their decoding explicitly enabled. */
4650 		dinfo = device_get_ivars(child);
4651 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4652 			pci_write_bar(child, pci_find_bar(child, rid),
4653 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4654 		switch (type) {
4655 		case SYS_RES_IOPORT:
4656 		case SYS_RES_MEMORY:
4657 			error = PCI_ENABLE_IO(dev, child, type);
4658 			break;
4659 		}
4660 	}
4661 	return (error);
4662 }
4663 
4664 int
4665 pci_deactivate_resource(device_t dev, device_t child, int type,
4666     int rid, struct resource *r)
4667 {
4668 	struct pci_devinfo *dinfo;
4669 	int error;
4670 
4671 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4672 	if (error)
4673 		return (error);
4674 
4675 	/* Disable decoding for device ROMs. */
4676 	if (device_get_parent(child) == dev) {
4677 		dinfo = device_get_ivars(child);
4678 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4679 			pci_write_bar(child, pci_find_bar(child, rid),
4680 			    rman_get_start(r));
4681 	}
4682 	return (0);
4683 }
4684 
4685 void
4686 pci_delete_child(device_t dev, device_t child)
4687 {
4688 	struct resource_list_entry *rle;
4689 	struct resource_list *rl;
4690 	struct pci_devinfo *dinfo;
4691 
4692 	dinfo = device_get_ivars(child);
4693 	rl = &dinfo->resources;
4694 
4695 	if (device_is_attached(child))
4696 		device_detach(child);
4697 
4698 	/* Turn off access to resources we're about to free */
4699 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4700 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4701 
4702 	/* Free all allocated resources */
4703 	STAILQ_FOREACH(rle, rl, link) {
4704 		if (rle->res) {
4705 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4706 			    resource_list_busy(rl, rle->type, rle->rid)) {
4707 				pci_printf(&dinfo->cfg,
4708 				    "Resource still owned, oops. "
4709 				    "(type=%d, rid=%d, addr=%lx)\n",
4710 				    rle->type, rle->rid,
4711 				    rman_get_start(rle->res));
4712 				bus_release_resource(child, rle->type, rle->rid,
4713 				    rle->res);
4714 			}
4715 			resource_list_unreserve(rl, dev, child, rle->type,
4716 			    rle->rid);
4717 		}
4718 	}
4719 	resource_list_free(rl);
4720 
4721 	device_delete_child(dev, child);
4722 	pci_freecfg(dinfo);
4723 }
4724 
4725 void
4726 pci_delete_resource(device_t dev, device_t child, int type, int rid)
4727 {
4728 	struct pci_devinfo *dinfo;
4729 	struct resource_list *rl;
4730 	struct resource_list_entry *rle;
4731 
4732 	if (device_get_parent(child) != dev)
4733 		return;
4734 
4735 	dinfo = device_get_ivars(child);
4736 	rl = &dinfo->resources;
4737 	rle = resource_list_find(rl, type, rid);
4738 	if (rle == NULL)
4739 		return;
4740 
4741 	if (rle->res) {
4742 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4743 		    resource_list_busy(rl, type, rid)) {
4744 			device_printf(dev, "delete_resource: "
4745 			    "Resource still owned by child, oops. "
4746 			    "(type=%d, rid=%d, addr=%lx)\n",
4747 			    type, rid, rman_get_start(rle->res));
4748 			return;
4749 		}
4750 		resource_list_unreserve(rl, dev, child, type, rid);
4751 	}
4752 	resource_list_delete(rl, type, rid);
4753 }
4754 
4755 struct resource_list *
4756 pci_get_resource_list (device_t dev, device_t child)
4757 {
4758 	struct pci_devinfo *dinfo = device_get_ivars(child);
4759 
4760 	return (&dinfo->resources);
4761 }
4762 
4763 bus_dma_tag_t
4764 pci_get_dma_tag(device_t bus, device_t dev)
4765 {
4766 	struct pci_softc *sc = device_get_softc(bus);
4767 
4768 	return (sc->sc_dma_tag);
4769 }
4770 
4771 uint32_t
4772 pci_read_config_method(device_t dev, device_t child, int reg, int width)
4773 {
4774 	struct pci_devinfo *dinfo = device_get_ivars(child);
4775 	pcicfgregs *cfg = &dinfo->cfg;
4776 
4777 	return (PCIB_READ_CONFIG(device_get_parent(dev),
4778 	    cfg->bus, cfg->slot, cfg->func, reg, width));
4779 }
4780 
4781 void
4782 pci_write_config_method(device_t dev, device_t child, int reg,
4783     uint32_t val, int width)
4784 {
4785 	struct pci_devinfo *dinfo = device_get_ivars(child);
4786 	pcicfgregs *cfg = &dinfo->cfg;
4787 
4788 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4789 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4790 }
4791 
4792 int
4793 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4794     size_t buflen)
4795 {
4796 
4797 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4798 	    pci_get_function(child));
4799 	return (0);
4800 }
4801 
4802 int
4803 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4804     size_t buflen)
4805 {
4806 	struct pci_devinfo *dinfo;
4807 	pcicfgregs *cfg;
4808 
4809 	dinfo = device_get_ivars(child);
4810 	cfg = &dinfo->cfg;
4811 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4812 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4813 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4814 	    cfg->progif);
4815 	return (0);
4816 }
4817 
4818 int
4819 pci_assign_interrupt_method(device_t dev, device_t child)
4820 {
4821 	struct pci_devinfo *dinfo = device_get_ivars(child);
4822 	pcicfgregs *cfg = &dinfo->cfg;
4823 
4824 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4825 	    cfg->intpin));
4826 }
4827 
4828 static int
4829 pci_modevent(module_t mod, int what, void *arg)
4830 {
4831 	static struct cdev *pci_cdev;
4832 
4833 	switch (what) {
4834 	case MOD_LOAD:
4835 		STAILQ_INIT(&pci_devq);
4836 		pci_generation = 0;
4837 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4838 		    "pci");
4839 		pci_load_vendor_data();
4840 		break;
4841 
4842 	case MOD_UNLOAD:
4843 		destroy_dev(pci_cdev);
4844 		break;
4845 	}
4846 
4847 	return (0);
4848 }
4849 
4850 static void
4851 pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
4852 {
4853 #define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
4854 	struct pcicfg_pcie *cfg;
4855 	int version, pos;
4856 
4857 	cfg = &dinfo->cfg.pcie;
4858 	pos = cfg->pcie_location;
4859 
4860 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4861 
4862 	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
4863 
4864 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4865 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4866 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4867 		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
4868 
4869 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4870 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
4871 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
4872 		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
4873 
4874 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4875 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
4876 		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
4877 
4878 	if (version > 1) {
4879 		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
4880 		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
4881 		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
4882 	}
4883 #undef WREG
4884 }
4885 
4886 static void
4887 pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
4888 {
4889 	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
4890 	    dinfo->cfg.pcix.pcix_command,  2);
4891 }
4892 
4893 void
4894 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4895 {
4896 
4897 	/*
4898 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4899 	 * which we know need special treatment.  Type 2 devices are
4900 	 * cardbus bridges which also require special treatment.
4901 	 * Other types are unknown, and we err on the side of safety
4902 	 * by ignoring them.
4903 	 */
4904 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4905 		return;
4906 
4907 	/*
4908 	 * Restore the device to full power mode.  We must do this
4909 	 * before we restore the registers because moving from D3 to
4910 	 * D0 will cause the chip's BARs and some other registers to
4911 	 * be reset to some unknown power on reset values.  Cut down
4912 	 * the noise on boot by doing nothing if we are already in
4913 	 * state D0.
4914 	 */
4915 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4916 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4917 	pci_restore_bars(dev);
4918 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4919 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4920 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4921 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4922 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4923 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4924 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4925 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4926 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4927 
4928 	/*
4929 	 * Restore extended capabilities for PCI-Express and PCI-X
4930 	 */
4931 	if (dinfo->cfg.pcie.pcie_location != 0)
4932 		pci_cfg_restore_pcie(dev, dinfo);
4933 	if (dinfo->cfg.pcix.pcix_location != 0)
4934 		pci_cfg_restore_pcix(dev, dinfo);
4935 
4936 	/* Restore MSI and MSI-X configurations if they are present. */
4937 	if (dinfo->cfg.msi.msi_location != 0)
4938 		pci_resume_msi(dev);
4939 	if (dinfo->cfg.msix.msix_location != 0)
4940 		pci_resume_msix(dev);
4941 }
4942 
4943 static void
4944 pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
4945 {
4946 #define	RREG(n)	pci_read_config(dev, pos + (n), 2)
4947 	struct pcicfg_pcie *cfg;
4948 	int version, pos;
4949 
4950 	cfg = &dinfo->cfg.pcie;
4951 	pos = cfg->pcie_location;
4952 
4953 	cfg->pcie_flags = RREG(PCIER_FLAGS);
4954 
4955 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4956 
4957 	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
4958 
4959 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4960 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4961 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4962 		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
4963 
4964 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4965 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
4966 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
4967 		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
4968 
4969 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4970 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
4971 		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
4972 
4973 	if (version > 1) {
4974 		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
4975 		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
4976 		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
4977 	}
4978 #undef RREG
4979 }
4980 
4981 static void
4982 pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
4983 {
4984 	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
4985 	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
4986 }
4987 
4988 void
4989 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4990 {
4991 	uint32_t cls;
4992 	int ps;
4993 
4994 	/*
4995 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4996 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4997 	 * which also require special treatment.  Other types are unknown, and
4998 	 * we err on the side of safety by ignoring them.  Powering down
4999 	 * bridges should not be undertaken lightly.
5000 	 */
5001 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
5002 		return;
5003 
5004 	/*
5005 	 * Some drivers apparently write to these registers w/o updating our
5006 	 * cached copy.  No harm happens if we update the copy, so do so here
5007 	 * so we can restore them.  The COMMAND register is modified by the
5008 	 * bus w/o updating the cache.  This should represent the normally
5009 	 * writable portion of the 'defined' part of type 0 headers.  In
5010 	 * theory we also need to save/restore the PCI capability structures
5011 	 * we know about, but apart from power we don't know any that are
5012 	 * writable.
5013 	 */
5014 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
5015 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
5016 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
5017 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
5018 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
5019 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
5020 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
5021 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
5022 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
5023 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
5024 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
5025 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
5026 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
5027 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
5028 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
5029 
5030 	if (dinfo->cfg.pcie.pcie_location != 0)
5031 		pci_cfg_save_pcie(dev, dinfo);
5032 
5033 	if (dinfo->cfg.pcix.pcix_location != 0)
5034 		pci_cfg_save_pcix(dev, dinfo);
5035 
5036 	/*
5037 	 * don't set the state for display devices, base peripherals and
5038 	 * memory devices since bad things happen when they are powered down.
5039 	 * We should (a) have drivers that can easily detach and (b) use
5040 	 * generic drivers for these devices so that some device actually
5041 	 * attaches.  We need to make sure that when we implement (a) we don't
5042 	 * power the device down on a reattach.
5043 	 */
5044 	cls = pci_get_class(dev);
5045 	if (!setstate)
5046 		return;
5047 	switch (pci_do_power_nodriver)
5048 	{
5049 		case 0:		/* NO powerdown at all */
5050 			return;
5051 		case 1:		/* Conservative about what to power down */
5052 			if (cls == PCIC_STORAGE)
5053 				return;
5054 			/*FALLTHROUGH*/
5055 		case 2:		/* Agressive about what to power down */
5056 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
5057 			    cls == PCIC_BASEPERIPH)
5058 				return;
5059 			/*FALLTHROUGH*/
5060 		case 3:		/* Power down everything */
5061 			break;
5062 	}
5063 	/*
5064 	 * PCI spec says we can only go into D3 state from D0 state.
5065 	 * Transition from D[12] into D0 before going to D3 state.
5066 	 */
5067 	ps = pci_get_powerstate(dev);
5068 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
5069 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5070 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
5071 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
5072 }
5073 
5074 /* Wrapper APIs suitable for device driver use. */
5075 void
5076 pci_save_state(device_t dev)
5077 {
5078 	struct pci_devinfo *dinfo;
5079 
5080 	dinfo = device_get_ivars(dev);
5081 	pci_cfg_save(dev, dinfo, 0);
5082 }
5083 
5084 void
5085 pci_restore_state(device_t dev)
5086 {
5087 	struct pci_devinfo *dinfo;
5088 
5089 	dinfo = device_get_ivars(dev);
5090 	pci_cfg_restore(dev, dinfo);
5091 }
5092 
5093 static uint16_t
5094 pci_get_rid_method(device_t dev, device_t child)
5095 {
5096 
5097 	return (PCIB_GET_RID(device_get_parent(dev), child));
5098 }
5099