xref: /freebsd/sys/dev/pci/pci.c (revision a3cbca537ef1d8ac03a693cd51d98fb8087acc8d)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 
56 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57 #include <machine/intr_machdep.h>
58 #endif
59 
60 #include <sys/pciio.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/pci/pci_private.h>
64 
65 #include <dev/usb/controller/xhcireg.h>
66 #include <dev/usb/controller/ehcireg.h>
67 #include <dev/usb/controller/ohcireg.h>
68 #include <dev/usb/controller/uhcireg.h>
69 
70 #include "pcib_if.h"
71 #include "pci_if.h"
72 
73 #define	PCIR_IS_BIOS(cfg, reg)						\
74 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
75 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
76 
77 static int		pci_has_quirk(uint32_t devid, int quirk);
78 static pci_addr_t	pci_mapbase(uint64_t mapreg);
79 static const char	*pci_maptype(uint64_t mapreg);
80 static int		pci_mapsize(uint64_t testval);
81 static int		pci_maprange(uint64_t mapreg);
82 static pci_addr_t	pci_rombase(uint64_t mapreg);
83 static int		pci_romsize(uint64_t testval);
84 static void		pci_fixancient(pcicfgregs *cfg);
85 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
86 
87 static int		pci_porten(device_t dev);
88 static int		pci_memen(device_t dev);
89 static void		pci_assign_interrupt(device_t bus, device_t dev,
90 			    int force_route);
91 static int		pci_add_map(device_t bus, device_t dev, int reg,
92 			    struct resource_list *rl, int force, int prefetch);
93 static int		pci_probe(device_t dev);
94 static int		pci_attach(device_t dev);
95 #ifdef PCI_RES_BUS
96 static int		pci_detach(device_t dev);
97 #endif
98 static void		pci_load_vendor_data(void);
99 static int		pci_describe_parse_line(char **ptr, int *vendor,
100 			    int *device, char **desc);
101 static char		*pci_describe_device(device_t dev);
102 static int		pci_modevent(module_t mod, int what, void *arg);
103 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
104 			    pcicfgregs *cfg);
105 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
106 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
107 			    int reg, uint32_t *data);
108 #if 0
109 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
110 			    int reg, uint32_t data);
111 #endif
112 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
113 static void		pci_disable_msi(device_t dev);
114 static void		pci_enable_msi(device_t dev, uint64_t address,
115 			    uint16_t data);
116 static void		pci_enable_msix(device_t dev, u_int index,
117 			    uint64_t address, uint32_t data);
118 static void		pci_mask_msix(device_t dev, u_int index);
119 static void		pci_unmask_msix(device_t dev, u_int index);
120 static int		pci_msi_blacklisted(void);
121 static int		pci_msix_blacklisted(void);
122 static void		pci_resume_msi(device_t dev);
123 static void		pci_resume_msix(device_t dev);
124 static int		pci_remap_intr_method(device_t bus, device_t dev,
125 			    u_int irq);
126 
127 static uint16_t		pci_get_rid_method(device_t dev, device_t child);
128 
129 static device_method_t pci_methods[] = {
130 	/* Device interface */
131 	DEVMETHOD(device_probe,		pci_probe),
132 	DEVMETHOD(device_attach,	pci_attach),
133 #ifdef PCI_RES_BUS
134 	DEVMETHOD(device_detach,	pci_detach),
135 #else
136 	DEVMETHOD(device_detach,	bus_generic_detach),
137 #endif
138 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
139 	DEVMETHOD(device_suspend,	pci_suspend),
140 	DEVMETHOD(device_resume,	pci_resume),
141 
142 	/* Bus interface */
143 	DEVMETHOD(bus_print_child,	pci_print_child),
144 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
145 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
146 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
147 	DEVMETHOD(bus_driver_added,	pci_driver_added),
148 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
149 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
150 
151 	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
152 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
153 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
154 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
155 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
156 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
157 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
158 	DEVMETHOD(bus_release_resource,	pci_release_resource),
159 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
160 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
161 	DEVMETHOD(bus_child_detached,	pci_child_detached),
162 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
163 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
164 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
165 
166 	/* PCI interface */
167 	DEVMETHOD(pci_read_config,	pci_read_config_method),
168 	DEVMETHOD(pci_write_config,	pci_write_config_method),
169 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
170 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
171 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
172 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
173 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
174 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
175 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
176 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
177 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
178 	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
179 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
180 	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
181 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
182 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
183 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
184 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
185 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
186 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
187 	DEVMETHOD(pci_get_rid,		pci_get_rid_method),
188 
189 	DEVMETHOD_END
190 };
191 
192 DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
193 
194 static devclass_t pci_devclass;
195 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
196 MODULE_VERSION(pci, 1);
197 
198 static char	*pci_vendordata;
199 static size_t	pci_vendordata_size;
200 
201 struct pci_quirk {
202 	uint32_t devid;	/* Vendor/device of the card */
203 	int	type;
204 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
205 #define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
206 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
207 #define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
208 #define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
209 	int	arg1;
210 	int	arg2;
211 };
212 
213 static const struct pci_quirk pci_quirks[] = {
214 	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
215 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
216 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
217 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
218 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
219 
220 	/*
221 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
222 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
223 	 */
224 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
225 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
226 
227 	/*
228 	 * MSI doesn't work on earlier Intel chipsets including
229 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
230 	 */
231 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
232 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
233 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
234 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
235 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
236 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
237 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
238 
239 	/*
240 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
241 	 * bridge.
242 	 */
243 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
244 
245 	/*
246 	 * MSI-X allocation doesn't work properly for devices passed through
247 	 * by VMware up to at least ESXi 5.1.
248 	 */
249 	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
250 	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
251 
252 	/*
253 	 * Some virtualization environments emulate an older chipset
254 	 * but support MSI just fine.  QEMU uses the Intel 82440.
255 	 */
256 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
257 
258 	/*
259 	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
260 	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
261 	 * It prevents us from attaching hpet(4) when the bit is unset.
262 	 * Note this quirk only affects SB600 revision A13 and earlier.
263 	 * For SB600 A21 and later, firmware must set the bit to hide it.
264 	 * For SB700 and later, it is unused and hardcoded to zero.
265 	 */
266 	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
267 
268 	{ 0 }
269 };
270 
271 /* map register information */
272 #define	PCI_MAPMEM	0x01	/* memory map */
273 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
274 #define	PCI_MAPPORT	0x04	/* port map */
275 
276 struct devlist pci_devq;
277 uint32_t pci_generation;
278 uint32_t pci_numdevs = 0;
279 static int pcie_chipset, pcix_chipset;
280 
281 /* sysctl vars */
282 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
283 
284 static int pci_enable_io_modes = 1;
285 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RWTUN,
286     &pci_enable_io_modes, 1,
287     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
288 enable these bits correctly.  We'd like to do this all the time, but there\n\
289 are some peripherals that this causes problems with.");
290 
291 static int pci_do_realloc_bars = 0;
292 SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RWTUN,
293     &pci_do_realloc_bars, 0,
294     "Attempt to allocate a new range for any BARs whose original "
295     "firmware-assigned ranges fail to allocate during the initial device scan.");
296 
297 static int pci_do_power_nodriver = 0;
298 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RWTUN,
299     &pci_do_power_nodriver, 0,
300   "Place a function into D3 state when no driver attaches to it.  0 means\n\
301 disable.  1 means conservatively place devices into D3 state.  2 means\n\
302 agressively place devices into D3 state.  3 means put absolutely everything\n\
303 in D3 state.");
304 
305 int pci_do_power_resume = 1;
306 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RWTUN,
307     &pci_do_power_resume, 1,
308   "Transition from D3 -> D0 on resume.");
309 
310 int pci_do_power_suspend = 1;
311 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RWTUN,
312     &pci_do_power_suspend, 1,
313   "Transition from D0 -> D3 on suspend.");
314 
315 static int pci_do_msi = 1;
316 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RWTUN, &pci_do_msi, 1,
317     "Enable support for MSI interrupts");
318 
319 static int pci_do_msix = 1;
320 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RWTUN, &pci_do_msix, 1,
321     "Enable support for MSI-X interrupts");
322 
323 static int pci_honor_msi_blacklist = 1;
324 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RDTUN,
325     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
326 
327 #if defined(__i386__) || defined(__amd64__)
328 static int pci_usb_takeover = 1;
329 #else
330 static int pci_usb_takeover = 0;
331 #endif
332 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
333     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
334 Disable this if you depend on BIOS emulation of USB devices, that is\n\
335 you use USB devices (like keyboard or mouse) but do not load USB drivers");
336 
337 static int pci_clear_bars;
338 SYSCTL_INT(_hw_pci, OID_AUTO, clear_bars, CTLFLAG_RDTUN, &pci_clear_bars, 0,
339     "Ignore firmware-assigned resources for BARs.");
340 
341 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
342 static int pci_clear_buses;
343 SYSCTL_INT(_hw_pci, OID_AUTO, clear_buses, CTLFLAG_RDTUN, &pci_clear_buses, 0,
344     "Ignore firmware-assigned bus numbers.");
345 #endif
346 
347 static int pci_enable_ari = 1;
348 SYSCTL_INT(_hw_pci, OID_AUTO, enable_ari, CTLFLAG_RDTUN, &pci_enable_ari,
349     0, "Enable support for PCIe Alternative RID Interpretation");
350 
351 static int
352 pci_has_quirk(uint32_t devid, int quirk)
353 {
354 	const struct pci_quirk *q;
355 
356 	for (q = &pci_quirks[0]; q->devid; q++) {
357 		if (q->devid == devid && q->type == quirk)
358 			return (1);
359 	}
360 	return (0);
361 }
362 
363 /* Find a device_t by bus/slot/function in domain 0 */
364 
365 device_t
366 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
367 {
368 
369 	return (pci_find_dbsf(0, bus, slot, func));
370 }
371 
372 /* Find a device_t by domain/bus/slot/function */
373 
374 device_t
375 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
376 {
377 	struct pci_devinfo *dinfo;
378 
379 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
380 		if ((dinfo->cfg.domain == domain) &&
381 		    (dinfo->cfg.bus == bus) &&
382 		    (dinfo->cfg.slot == slot) &&
383 		    (dinfo->cfg.func == func)) {
384 			return (dinfo->cfg.dev);
385 		}
386 	}
387 
388 	return (NULL);
389 }
390 
391 /* Find a device_t by vendor/device ID */
392 
393 device_t
394 pci_find_device(uint16_t vendor, uint16_t device)
395 {
396 	struct pci_devinfo *dinfo;
397 
398 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
399 		if ((dinfo->cfg.vendor == vendor) &&
400 		    (dinfo->cfg.device == device)) {
401 			return (dinfo->cfg.dev);
402 		}
403 	}
404 
405 	return (NULL);
406 }
407 
408 device_t
409 pci_find_class(uint8_t class, uint8_t subclass)
410 {
411 	struct pci_devinfo *dinfo;
412 
413 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
414 		if (dinfo->cfg.baseclass == class &&
415 		    dinfo->cfg.subclass == subclass) {
416 			return (dinfo->cfg.dev);
417 		}
418 	}
419 
420 	return (NULL);
421 }
422 
423 static int
424 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
425 {
426 	va_list ap;
427 	int retval;
428 
429 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
430 	    cfg->func);
431 	va_start(ap, fmt);
432 	retval += vprintf(fmt, ap);
433 	va_end(ap);
434 	return (retval);
435 }
436 
437 /* return base address of memory or port map */
438 
439 static pci_addr_t
440 pci_mapbase(uint64_t mapreg)
441 {
442 
443 	if (PCI_BAR_MEM(mapreg))
444 		return (mapreg & PCIM_BAR_MEM_BASE);
445 	else
446 		return (mapreg & PCIM_BAR_IO_BASE);
447 }
448 
449 /* return map type of memory or port map */
450 
451 static const char *
452 pci_maptype(uint64_t mapreg)
453 {
454 
455 	if (PCI_BAR_IO(mapreg))
456 		return ("I/O Port");
457 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
458 		return ("Prefetchable Memory");
459 	return ("Memory");
460 }
461 
462 /* return log2 of map size decoded for memory or port map */
463 
464 static int
465 pci_mapsize(uint64_t testval)
466 {
467 	int ln2size;
468 
469 	testval = pci_mapbase(testval);
470 	ln2size = 0;
471 	if (testval != 0) {
472 		while ((testval & 1) == 0)
473 		{
474 			ln2size++;
475 			testval >>= 1;
476 		}
477 	}
478 	return (ln2size);
479 }
480 
481 /* return base address of device ROM */
482 
483 static pci_addr_t
484 pci_rombase(uint64_t mapreg)
485 {
486 
487 	return (mapreg & PCIM_BIOS_ADDR_MASK);
488 }
489 
490 /* return log2 of map size decided for device ROM */
491 
492 static int
493 pci_romsize(uint64_t testval)
494 {
495 	int ln2size;
496 
497 	testval = pci_rombase(testval);
498 	ln2size = 0;
499 	if (testval != 0) {
500 		while ((testval & 1) == 0)
501 		{
502 			ln2size++;
503 			testval >>= 1;
504 		}
505 	}
506 	return (ln2size);
507 }
508 
509 /* return log2 of address range supported by map register */
510 
511 static int
512 pci_maprange(uint64_t mapreg)
513 {
514 	int ln2range = 0;
515 
516 	if (PCI_BAR_IO(mapreg))
517 		ln2range = 32;
518 	else
519 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
520 		case PCIM_BAR_MEM_32:
521 			ln2range = 32;
522 			break;
523 		case PCIM_BAR_MEM_1MB:
524 			ln2range = 20;
525 			break;
526 		case PCIM_BAR_MEM_64:
527 			ln2range = 64;
528 			break;
529 		}
530 	return (ln2range);
531 }
532 
533 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
534 
535 static void
536 pci_fixancient(pcicfgregs *cfg)
537 {
538 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
539 		return;
540 
541 	/* PCI to PCI bridges use header type 1 */
542 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
543 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
544 }
545 
546 /* extract header type specific config data */
547 
548 static void
549 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
550 {
551 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
552 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
553 	case PCIM_HDRTYPE_NORMAL:
554 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
555 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
556 		cfg->nummaps	    = PCI_MAXMAPS_0;
557 		break;
558 	case PCIM_HDRTYPE_BRIDGE:
559 		cfg->nummaps	    = PCI_MAXMAPS_1;
560 		break;
561 	case PCIM_HDRTYPE_CARDBUS:
562 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
563 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
564 		cfg->nummaps	    = PCI_MAXMAPS_2;
565 		break;
566 	}
567 #undef REG
568 }
569 
570 /* read configuration header into pcicfgregs structure */
571 struct pci_devinfo *
572 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
573 {
574 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
575 	pcicfgregs *cfg = NULL;
576 	struct pci_devinfo *devlist_entry;
577 	struct devlist *devlist_head;
578 
579 	devlist_head = &pci_devq;
580 
581 	devlist_entry = NULL;
582 
583 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
584 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
585 		if (devlist_entry == NULL)
586 			return (NULL);
587 
588 		cfg = &devlist_entry->cfg;
589 
590 		cfg->domain		= d;
591 		cfg->bus		= b;
592 		cfg->slot		= s;
593 		cfg->func		= f;
594 		cfg->vendor		= REG(PCIR_VENDOR, 2);
595 		cfg->device		= REG(PCIR_DEVICE, 2);
596 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
597 		cfg->statreg		= REG(PCIR_STATUS, 2);
598 		cfg->baseclass		= REG(PCIR_CLASS, 1);
599 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
600 		cfg->progif		= REG(PCIR_PROGIF, 1);
601 		cfg->revid		= REG(PCIR_REVID, 1);
602 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
603 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
604 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
605 		cfg->intpin		= REG(PCIR_INTPIN, 1);
606 		cfg->intline		= REG(PCIR_INTLINE, 1);
607 
608 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
609 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
610 
611 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
612 		cfg->hdrtype		&= ~PCIM_MFDEV;
613 		STAILQ_INIT(&cfg->maps);
614 
615 		pci_fixancient(cfg);
616 		pci_hdrtypedata(pcib, b, s, f, cfg);
617 
618 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
619 			pci_read_cap(pcib, cfg);
620 
621 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
622 
623 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
624 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
625 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
626 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
627 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
628 
629 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
630 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
631 		devlist_entry->conf.pc_vendor = cfg->vendor;
632 		devlist_entry->conf.pc_device = cfg->device;
633 
634 		devlist_entry->conf.pc_class = cfg->baseclass;
635 		devlist_entry->conf.pc_subclass = cfg->subclass;
636 		devlist_entry->conf.pc_progif = cfg->progif;
637 		devlist_entry->conf.pc_revid = cfg->revid;
638 
639 		pci_numdevs++;
640 		pci_generation++;
641 	}
642 	return (devlist_entry);
643 #undef REG
644 }
645 
646 static void
647 pci_read_cap(device_t pcib, pcicfgregs *cfg)
648 {
649 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
650 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
651 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
652 	uint64_t addr;
653 #endif
654 	uint32_t val;
655 	int	ptr, nextptr, ptrptr;
656 
657 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
658 	case PCIM_HDRTYPE_NORMAL:
659 	case PCIM_HDRTYPE_BRIDGE:
660 		ptrptr = PCIR_CAP_PTR;
661 		break;
662 	case PCIM_HDRTYPE_CARDBUS:
663 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
664 		break;
665 	default:
666 		return;		/* no extended capabilities support */
667 	}
668 	nextptr = REG(ptrptr, 1);	/* sanity check? */
669 
670 	/*
671 	 * Read capability entries.
672 	 */
673 	while (nextptr != 0) {
674 		/* Sanity check */
675 		if (nextptr > 255) {
676 			printf("illegal PCI extended capability offset %d\n",
677 			    nextptr);
678 			return;
679 		}
680 		/* Find the next entry */
681 		ptr = nextptr;
682 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
683 
684 		/* Process this entry */
685 		switch (REG(ptr + PCICAP_ID, 1)) {
686 		case PCIY_PMG:		/* PCI power management */
687 			if (cfg->pp.pp_cap == 0) {
688 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
689 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
690 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
691 				if ((nextptr - ptr) > PCIR_POWER_DATA)
692 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
693 			}
694 			break;
695 		case PCIY_HT:		/* HyperTransport */
696 			/* Determine HT-specific capability type. */
697 			val = REG(ptr + PCIR_HT_COMMAND, 2);
698 
699 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
700 				cfg->ht.ht_slave = ptr;
701 
702 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
703 			switch (val & PCIM_HTCMD_CAP_MASK) {
704 			case PCIM_HTCAP_MSI_MAPPING:
705 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
706 					/* Sanity check the mapping window. */
707 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
708 					    4);
709 					addr <<= 32;
710 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
711 					    4);
712 					if (addr != MSI_INTEL_ADDR_BASE)
713 						device_printf(pcib,
714 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
715 						    cfg->domain, cfg->bus,
716 						    cfg->slot, cfg->func,
717 						    (long long)addr);
718 				} else
719 					addr = MSI_INTEL_ADDR_BASE;
720 
721 				cfg->ht.ht_msimap = ptr;
722 				cfg->ht.ht_msictrl = val;
723 				cfg->ht.ht_msiaddr = addr;
724 				break;
725 			}
726 #endif
727 			break;
728 		case PCIY_MSI:		/* PCI MSI */
729 			cfg->msi.msi_location = ptr;
730 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
731 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
732 						     PCIM_MSICTRL_MMC_MASK)>>1);
733 			break;
734 		case PCIY_MSIX:		/* PCI MSI-X */
735 			cfg->msix.msix_location = ptr;
736 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
737 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
738 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
739 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
740 			cfg->msix.msix_table_bar = PCIR_BAR(val &
741 			    PCIM_MSIX_BIR_MASK);
742 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
743 			val = REG(ptr + PCIR_MSIX_PBA, 4);
744 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
745 			    PCIM_MSIX_BIR_MASK);
746 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
747 			break;
748 		case PCIY_VPD:		/* PCI Vital Product Data */
749 			cfg->vpd.vpd_reg = ptr;
750 			break;
751 		case PCIY_SUBVENDOR:
752 			/* Should always be true. */
753 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
754 			    PCIM_HDRTYPE_BRIDGE) {
755 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
756 				cfg->subvendor = val & 0xffff;
757 				cfg->subdevice = val >> 16;
758 			}
759 			break;
760 		case PCIY_PCIX:		/* PCI-X */
761 			/*
762 			 * Assume we have a PCI-X chipset if we have
763 			 * at least one PCI-PCI bridge with a PCI-X
764 			 * capability.  Note that some systems with
765 			 * PCI-express or HT chipsets might match on
766 			 * this check as well.
767 			 */
768 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
769 			    PCIM_HDRTYPE_BRIDGE)
770 				pcix_chipset = 1;
771 			cfg->pcix.pcix_location = ptr;
772 			break;
773 		case PCIY_EXPRESS:	/* PCI-express */
774 			/*
775 			 * Assume we have a PCI-express chipset if we have
776 			 * at least one PCI-express device.
777 			 */
778 			pcie_chipset = 1;
779 			cfg->pcie.pcie_location = ptr;
780 			val = REG(ptr + PCIER_FLAGS, 2);
781 			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
782 			break;
783 		default:
784 			break;
785 		}
786 	}
787 
788 #if defined(__powerpc__)
789 	/*
790 	 * Enable the MSI mapping window for all HyperTransport
791 	 * slaves.  PCI-PCI bridges have their windows enabled via
792 	 * PCIB_MAP_MSI().
793 	 */
794 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
795 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
796 		device_printf(pcib,
797 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
798 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
799 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
800 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
801 		     2);
802 	}
803 #endif
804 /* REG and WREG use carry through to next functions */
805 }
806 
807 /*
808  * PCI Vital Product Data
809  */
810 
811 #define	PCI_VPD_TIMEOUT		1000000
812 
813 static int
814 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
815 {
816 	int count = PCI_VPD_TIMEOUT;
817 
818 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
819 
820 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
821 
822 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
823 		if (--count < 0)
824 			return (ENXIO);
825 		DELAY(1);	/* limit looping */
826 	}
827 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
828 
829 	return (0);
830 }
831 
832 #if 0
833 static int
834 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
835 {
836 	int count = PCI_VPD_TIMEOUT;
837 
838 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
839 
840 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
841 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
842 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
843 		if (--count < 0)
844 			return (ENXIO);
845 		DELAY(1);	/* limit looping */
846 	}
847 
848 	return (0);
849 }
850 #endif
851 
852 #undef PCI_VPD_TIMEOUT
853 
854 struct vpd_readstate {
855 	device_t	pcib;
856 	pcicfgregs	*cfg;
857 	uint32_t	val;
858 	int		bytesinval;
859 	int		off;
860 	uint8_t		cksum;
861 };
862 
863 static int
864 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
865 {
866 	uint32_t reg;
867 	uint8_t byte;
868 
869 	if (vrs->bytesinval == 0) {
870 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
871 			return (ENXIO);
872 		vrs->val = le32toh(reg);
873 		vrs->off += 4;
874 		byte = vrs->val & 0xff;
875 		vrs->bytesinval = 3;
876 	} else {
877 		vrs->val = vrs->val >> 8;
878 		byte = vrs->val & 0xff;
879 		vrs->bytesinval--;
880 	}
881 
882 	vrs->cksum += byte;
883 	*data = byte;
884 	return (0);
885 }
886 
887 static void
888 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
889 {
890 	struct vpd_readstate vrs;
891 	int state;
892 	int name;
893 	int remain;
894 	int i;
895 	int alloc, off;		/* alloc/off for RO/W arrays */
896 	int cksumvalid;
897 	int dflen;
898 	uint8_t byte;
899 	uint8_t byte2;
900 
901 	/* init vpd reader */
902 	vrs.bytesinval = 0;
903 	vrs.off = 0;
904 	vrs.pcib = pcib;
905 	vrs.cfg = cfg;
906 	vrs.cksum = 0;
907 
908 	state = 0;
909 	name = remain = i = 0;	/* shut up stupid gcc */
910 	alloc = off = 0;	/* shut up stupid gcc */
911 	dflen = 0;		/* shut up stupid gcc */
912 	cksumvalid = -1;
913 	while (state >= 0) {
914 		if (vpd_nextbyte(&vrs, &byte)) {
915 			state = -2;
916 			break;
917 		}
918 #if 0
919 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
920 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
921 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
922 #endif
923 		switch (state) {
924 		case 0:		/* item name */
925 			if (byte & 0x80) {
926 				if (vpd_nextbyte(&vrs, &byte2)) {
927 					state = -2;
928 					break;
929 				}
930 				remain = byte2;
931 				if (vpd_nextbyte(&vrs, &byte2)) {
932 					state = -2;
933 					break;
934 				}
935 				remain |= byte2 << 8;
936 				if (remain > (0x7f*4 - vrs.off)) {
937 					state = -1;
938 					pci_printf(cfg,
939 					    "invalid VPD data, remain %#x\n",
940 					    remain);
941 				}
942 				name = byte & 0x7f;
943 			} else {
944 				remain = byte & 0x7;
945 				name = (byte >> 3) & 0xf;
946 			}
947 			switch (name) {
948 			case 0x2:	/* String */
949 				cfg->vpd.vpd_ident = malloc(remain + 1,
950 				    M_DEVBUF, M_WAITOK);
951 				i = 0;
952 				state = 1;
953 				break;
954 			case 0xf:	/* End */
955 				state = -1;
956 				break;
957 			case 0x10:	/* VPD-R */
958 				alloc = 8;
959 				off = 0;
960 				cfg->vpd.vpd_ros = malloc(alloc *
961 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
962 				    M_WAITOK | M_ZERO);
963 				state = 2;
964 				break;
965 			case 0x11:	/* VPD-W */
966 				alloc = 8;
967 				off = 0;
968 				cfg->vpd.vpd_w = malloc(alloc *
969 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
970 				    M_WAITOK | M_ZERO);
971 				state = 5;
972 				break;
973 			default:	/* Invalid data, abort */
974 				state = -1;
975 				break;
976 			}
977 			break;
978 
979 		case 1:	/* Identifier String */
980 			cfg->vpd.vpd_ident[i++] = byte;
981 			remain--;
982 			if (remain == 0)  {
983 				cfg->vpd.vpd_ident[i] = '\0';
984 				state = 0;
985 			}
986 			break;
987 
988 		case 2:	/* VPD-R Keyword Header */
989 			if (off == alloc) {
990 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
991 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
992 				    M_DEVBUF, M_WAITOK | M_ZERO);
993 			}
994 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
995 			if (vpd_nextbyte(&vrs, &byte2)) {
996 				state = -2;
997 				break;
998 			}
999 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1000 			if (vpd_nextbyte(&vrs, &byte2)) {
1001 				state = -2;
1002 				break;
1003 			}
1004 			cfg->vpd.vpd_ros[off].len = dflen = byte2;
1005 			if (dflen == 0 &&
1006 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1007 			    2) == 0) {
1008 				/*
1009 				 * if this happens, we can't trust the rest
1010 				 * of the VPD.
1011 				 */
1012 				pci_printf(cfg, "bad keyword length: %d\n",
1013 				    dflen);
1014 				cksumvalid = 0;
1015 				state = -1;
1016 				break;
1017 			} else if (dflen == 0) {
1018 				cfg->vpd.vpd_ros[off].value = malloc(1 *
1019 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1020 				    M_DEVBUF, M_WAITOK);
1021 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1022 			} else
1023 				cfg->vpd.vpd_ros[off].value = malloc(
1024 				    (dflen + 1) *
1025 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1026 				    M_DEVBUF, M_WAITOK);
1027 			remain -= 3;
1028 			i = 0;
1029 			/* keep in sync w/ state 3's transistions */
1030 			if (dflen == 0 && remain == 0)
1031 				state = 0;
1032 			else if (dflen == 0)
1033 				state = 2;
1034 			else
1035 				state = 3;
1036 			break;
1037 
1038 		case 3:	/* VPD-R Keyword Value */
1039 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1040 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1041 			    "RV", 2) == 0 && cksumvalid == -1) {
1042 				if (vrs.cksum == 0)
1043 					cksumvalid = 1;
1044 				else {
1045 					if (bootverbose)
1046 						pci_printf(cfg,
1047 					    "bad VPD cksum, remain %hhu\n",
1048 						    vrs.cksum);
1049 					cksumvalid = 0;
1050 					state = -1;
1051 					break;
1052 				}
1053 			}
1054 			dflen--;
1055 			remain--;
1056 			/* keep in sync w/ state 2's transistions */
1057 			if (dflen == 0)
1058 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1059 			if (dflen == 0 && remain == 0) {
1060 				cfg->vpd.vpd_rocnt = off;
1061 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1062 				    off * sizeof(*cfg->vpd.vpd_ros),
1063 				    M_DEVBUF, M_WAITOK | M_ZERO);
1064 				state = 0;
1065 			} else if (dflen == 0)
1066 				state = 2;
1067 			break;
1068 
1069 		case 4:
1070 			remain--;
1071 			if (remain == 0)
1072 				state = 0;
1073 			break;
1074 
1075 		case 5:	/* VPD-W Keyword Header */
1076 			if (off == alloc) {
1077 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1078 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1079 				    M_DEVBUF, M_WAITOK | M_ZERO);
1080 			}
1081 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1082 			if (vpd_nextbyte(&vrs, &byte2)) {
1083 				state = -2;
1084 				break;
1085 			}
1086 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1087 			if (vpd_nextbyte(&vrs, &byte2)) {
1088 				state = -2;
1089 				break;
1090 			}
1091 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1092 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1093 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1094 			    sizeof(*cfg->vpd.vpd_w[off].value),
1095 			    M_DEVBUF, M_WAITOK);
1096 			remain -= 3;
1097 			i = 0;
1098 			/* keep in sync w/ state 6's transistions */
1099 			if (dflen == 0 && remain == 0)
1100 				state = 0;
1101 			else if (dflen == 0)
1102 				state = 5;
1103 			else
1104 				state = 6;
1105 			break;
1106 
1107 		case 6:	/* VPD-W Keyword Value */
1108 			cfg->vpd.vpd_w[off].value[i++] = byte;
1109 			dflen--;
1110 			remain--;
1111 			/* keep in sync w/ state 5's transistions */
1112 			if (dflen == 0)
1113 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1114 			if (dflen == 0 && remain == 0) {
1115 				cfg->vpd.vpd_wcnt = off;
1116 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1117 				    off * sizeof(*cfg->vpd.vpd_w),
1118 				    M_DEVBUF, M_WAITOK | M_ZERO);
1119 				state = 0;
1120 			} else if (dflen == 0)
1121 				state = 5;
1122 			break;
1123 
1124 		default:
1125 			pci_printf(cfg, "invalid state: %d\n", state);
1126 			state = -1;
1127 			break;
1128 		}
1129 	}
1130 
1131 	if (cksumvalid == 0 || state < -1) {
1132 		/* read-only data bad, clean up */
1133 		if (cfg->vpd.vpd_ros != NULL) {
1134 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1135 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1136 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1137 			cfg->vpd.vpd_ros = NULL;
1138 		}
1139 	}
1140 	if (state < -1) {
1141 		/* I/O error, clean up */
1142 		pci_printf(cfg, "failed to read VPD data.\n");
1143 		if (cfg->vpd.vpd_ident != NULL) {
1144 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1145 			cfg->vpd.vpd_ident = NULL;
1146 		}
1147 		if (cfg->vpd.vpd_w != NULL) {
1148 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1149 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1150 			free(cfg->vpd.vpd_w, M_DEVBUF);
1151 			cfg->vpd.vpd_w = NULL;
1152 		}
1153 	}
1154 	cfg->vpd.vpd_cached = 1;
1155 #undef REG
1156 #undef WREG
1157 }
1158 
1159 int
1160 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1161 {
1162 	struct pci_devinfo *dinfo = device_get_ivars(child);
1163 	pcicfgregs *cfg = &dinfo->cfg;
1164 
1165 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1166 		pci_read_vpd(device_get_parent(dev), cfg);
1167 
1168 	*identptr = cfg->vpd.vpd_ident;
1169 
1170 	if (*identptr == NULL)
1171 		return (ENXIO);
1172 
1173 	return (0);
1174 }
1175 
1176 int
1177 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1178 	const char **vptr)
1179 {
1180 	struct pci_devinfo *dinfo = device_get_ivars(child);
1181 	pcicfgregs *cfg = &dinfo->cfg;
1182 	int i;
1183 
1184 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1185 		pci_read_vpd(device_get_parent(dev), cfg);
1186 
1187 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1188 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1189 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1190 			*vptr = cfg->vpd.vpd_ros[i].value;
1191 			return (0);
1192 		}
1193 
1194 	*vptr = NULL;
1195 	return (ENXIO);
1196 }
1197 
1198 struct pcicfg_vpd *
1199 pci_fetch_vpd_list(device_t dev)
1200 {
1201 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1202 	pcicfgregs *cfg = &dinfo->cfg;
1203 
1204 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1205 		pci_read_vpd(device_get_parent(device_get_parent(dev)), cfg);
1206 	return (&cfg->vpd);
1207 }
1208 
1209 /*
1210  * Find the requested HyperTransport capability and return the offset
1211  * in configuration space via the pointer provided.  The function
1212  * returns 0 on success and an error code otherwise.
1213  */
1214 int
1215 pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1216 {
1217 	int ptr, error;
1218 	uint16_t val;
1219 
1220 	error = pci_find_cap(child, PCIY_HT, &ptr);
1221 	if (error)
1222 		return (error);
1223 
1224 	/*
1225 	 * Traverse the capabilities list checking each HT capability
1226 	 * to see if it matches the requested HT capability.
1227 	 */
1228 	while (ptr != 0) {
1229 		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1230 		if (capability == PCIM_HTCAP_SLAVE ||
1231 		    capability == PCIM_HTCAP_HOST)
1232 			val &= 0xe000;
1233 		else
1234 			val &= PCIM_HTCMD_CAP_MASK;
1235 		if (val == capability) {
1236 			if (capreg != NULL)
1237 				*capreg = ptr;
1238 			return (0);
1239 		}
1240 
1241 		/* Skip to the next HT capability. */
1242 		while (ptr != 0) {
1243 			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1244 			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1245 			    PCIY_HT)
1246 				break;
1247 		}
1248 	}
1249 	return (ENOENT);
1250 }
1251 
1252 /*
1253  * Find the requested capability and return the offset in
1254  * configuration space via the pointer provided.  The function returns
1255  * 0 on success and an error code otherwise.
1256  */
1257 int
1258 pci_find_cap_method(device_t dev, device_t child, int capability,
1259     int *capreg)
1260 {
1261 	struct pci_devinfo *dinfo = device_get_ivars(child);
1262 	pcicfgregs *cfg = &dinfo->cfg;
1263 	u_int32_t status;
1264 	u_int8_t ptr;
1265 
1266 	/*
1267 	 * Check the CAP_LIST bit of the PCI status register first.
1268 	 */
1269 	status = pci_read_config(child, PCIR_STATUS, 2);
1270 	if (!(status & PCIM_STATUS_CAPPRESENT))
1271 		return (ENXIO);
1272 
1273 	/*
1274 	 * Determine the start pointer of the capabilities list.
1275 	 */
1276 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1277 	case PCIM_HDRTYPE_NORMAL:
1278 	case PCIM_HDRTYPE_BRIDGE:
1279 		ptr = PCIR_CAP_PTR;
1280 		break;
1281 	case PCIM_HDRTYPE_CARDBUS:
1282 		ptr = PCIR_CAP_PTR_2;
1283 		break;
1284 	default:
1285 		/* XXX: panic? */
1286 		return (ENXIO);		/* no extended capabilities support */
1287 	}
1288 	ptr = pci_read_config(child, ptr, 1);
1289 
1290 	/*
1291 	 * Traverse the capabilities list.
1292 	 */
1293 	while (ptr != 0) {
1294 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1295 			if (capreg != NULL)
1296 				*capreg = ptr;
1297 			return (0);
1298 		}
1299 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1300 	}
1301 
1302 	return (ENOENT);
1303 }
1304 
1305 /*
1306  * Find the requested extended capability and return the offset in
1307  * configuration space via the pointer provided.  The function returns
1308  * 0 on success and an error code otherwise.
1309  */
1310 int
1311 pci_find_extcap_method(device_t dev, device_t child, int capability,
1312     int *capreg)
1313 {
1314 	struct pci_devinfo *dinfo = device_get_ivars(child);
1315 	pcicfgregs *cfg = &dinfo->cfg;
1316 	uint32_t ecap;
1317 	uint16_t ptr;
1318 
1319 	/* Only supported for PCI-express devices. */
1320 	if (cfg->pcie.pcie_location == 0)
1321 		return (ENXIO);
1322 
1323 	ptr = PCIR_EXTCAP;
1324 	ecap = pci_read_config(child, ptr, 4);
1325 	if (ecap == 0xffffffff || ecap == 0)
1326 		return (ENOENT);
1327 	for (;;) {
1328 		if (PCI_EXTCAP_ID(ecap) == capability) {
1329 			if (capreg != NULL)
1330 				*capreg = ptr;
1331 			return (0);
1332 		}
1333 		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1334 		if (ptr == 0)
1335 			break;
1336 		ecap = pci_read_config(child, ptr, 4);
1337 	}
1338 
1339 	return (ENOENT);
1340 }
1341 
1342 /*
1343  * Support for MSI-X message interrupts.
1344  */
1345 void
1346 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1347 {
1348 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1349 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1350 	uint32_t offset;
1351 
1352 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1353 	offset = msix->msix_table_offset + index * 16;
1354 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1355 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1356 	bus_write_4(msix->msix_table_res, offset + 8, data);
1357 
1358 	/* Enable MSI -> HT mapping. */
1359 	pci_ht_map_msi(dev, address);
1360 }
1361 
1362 void
1363 pci_mask_msix(device_t dev, u_int index)
1364 {
1365 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1366 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1367 	uint32_t offset, val;
1368 
1369 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1370 	offset = msix->msix_table_offset + index * 16 + 12;
1371 	val = bus_read_4(msix->msix_table_res, offset);
1372 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1373 		val |= PCIM_MSIX_VCTRL_MASK;
1374 		bus_write_4(msix->msix_table_res, offset, val);
1375 	}
1376 }
1377 
1378 void
1379 pci_unmask_msix(device_t dev, u_int index)
1380 {
1381 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1382 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1383 	uint32_t offset, val;
1384 
1385 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1386 	offset = msix->msix_table_offset + index * 16 + 12;
1387 	val = bus_read_4(msix->msix_table_res, offset);
1388 	if (val & PCIM_MSIX_VCTRL_MASK) {
1389 		val &= ~PCIM_MSIX_VCTRL_MASK;
1390 		bus_write_4(msix->msix_table_res, offset, val);
1391 	}
1392 }
1393 
1394 int
1395 pci_pending_msix(device_t dev, u_int index)
1396 {
1397 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1398 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1399 	uint32_t offset, bit;
1400 
1401 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1402 	offset = msix->msix_pba_offset + (index / 32) * 4;
1403 	bit = 1 << index % 32;
1404 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1405 }
1406 
1407 /*
1408  * Restore MSI-X registers and table during resume.  If MSI-X is
1409  * enabled then walk the virtual table to restore the actual MSI-X
1410  * table.
1411  */
1412 static void
1413 pci_resume_msix(device_t dev)
1414 {
1415 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1416 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1417 	struct msix_table_entry *mte;
1418 	struct msix_vector *mv;
1419 	int i;
1420 
1421 	if (msix->msix_alloc > 0) {
1422 		/* First, mask all vectors. */
1423 		for (i = 0; i < msix->msix_msgnum; i++)
1424 			pci_mask_msix(dev, i);
1425 
1426 		/* Second, program any messages with at least one handler. */
1427 		for (i = 0; i < msix->msix_table_len; i++) {
1428 			mte = &msix->msix_table[i];
1429 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1430 				continue;
1431 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1432 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1433 			pci_unmask_msix(dev, i);
1434 		}
1435 	}
1436 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1437 	    msix->msix_ctrl, 2);
1438 }
1439 
1440 /*
1441  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1442  * returned in *count.  After this function returns, each message will be
1443  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1444  */
1445 int
1446 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1447 {
1448 	struct pci_devinfo *dinfo = device_get_ivars(child);
1449 	pcicfgregs *cfg = &dinfo->cfg;
1450 	struct resource_list_entry *rle;
1451 	int actual, error, i, irq, max;
1452 
1453 	/* Don't let count == 0 get us into trouble. */
1454 	if (*count == 0)
1455 		return (EINVAL);
1456 
1457 	/* If rid 0 is allocated, then fail. */
1458 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1459 	if (rle != NULL && rle->res != NULL)
1460 		return (ENXIO);
1461 
1462 	/* Already have allocated messages? */
1463 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1464 		return (ENXIO);
1465 
1466 	/* If MSI-X is blacklisted for this system, fail. */
1467 	if (pci_msix_blacklisted())
1468 		return (ENXIO);
1469 
1470 	/* MSI-X capability present? */
1471 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1472 		return (ENODEV);
1473 
1474 	/* Make sure the appropriate BARs are mapped. */
1475 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1476 	    cfg->msix.msix_table_bar);
1477 	if (rle == NULL || rle->res == NULL ||
1478 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1479 		return (ENXIO);
1480 	cfg->msix.msix_table_res = rle->res;
1481 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1482 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1483 		    cfg->msix.msix_pba_bar);
1484 		if (rle == NULL || rle->res == NULL ||
1485 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1486 			return (ENXIO);
1487 	}
1488 	cfg->msix.msix_pba_res = rle->res;
1489 
1490 	if (bootverbose)
1491 		device_printf(child,
1492 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1493 		    *count, cfg->msix.msix_msgnum);
1494 	max = min(*count, cfg->msix.msix_msgnum);
1495 	for (i = 0; i < max; i++) {
1496 		/* Allocate a message. */
1497 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1498 		if (error) {
1499 			if (i == 0)
1500 				return (error);
1501 			break;
1502 		}
1503 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1504 		    irq, 1);
1505 	}
1506 	actual = i;
1507 
1508 	if (bootverbose) {
1509 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1510 		if (actual == 1)
1511 			device_printf(child, "using IRQ %lu for MSI-X\n",
1512 			    rle->start);
1513 		else {
1514 			int run;
1515 
1516 			/*
1517 			 * Be fancy and try to print contiguous runs of
1518 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1519 			 * 'run' is true if we are in a range.
1520 			 */
1521 			device_printf(child, "using IRQs %lu", rle->start);
1522 			irq = rle->start;
1523 			run = 0;
1524 			for (i = 1; i < actual; i++) {
1525 				rle = resource_list_find(&dinfo->resources,
1526 				    SYS_RES_IRQ, i + 1);
1527 
1528 				/* Still in a run? */
1529 				if (rle->start == irq + 1) {
1530 					run = 1;
1531 					irq++;
1532 					continue;
1533 				}
1534 
1535 				/* Finish previous range. */
1536 				if (run) {
1537 					printf("-%d", irq);
1538 					run = 0;
1539 				}
1540 
1541 				/* Start new range. */
1542 				printf(",%lu", rle->start);
1543 				irq = rle->start;
1544 			}
1545 
1546 			/* Unfinished range? */
1547 			if (run)
1548 				printf("-%d", irq);
1549 			printf(" for MSI-X\n");
1550 		}
1551 	}
1552 
1553 	/* Mask all vectors. */
1554 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1555 		pci_mask_msix(child, i);
1556 
1557 	/* Allocate and initialize vector data and virtual table. */
1558 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1559 	    M_DEVBUF, M_WAITOK | M_ZERO);
1560 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1561 	    M_DEVBUF, M_WAITOK | M_ZERO);
1562 	for (i = 0; i < actual; i++) {
1563 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1564 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1565 		cfg->msix.msix_table[i].mte_vector = i + 1;
1566 	}
1567 
1568 	/* Update control register to enable MSI-X. */
1569 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1570 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1571 	    cfg->msix.msix_ctrl, 2);
1572 
1573 	/* Update counts of alloc'd messages. */
1574 	cfg->msix.msix_alloc = actual;
1575 	cfg->msix.msix_table_len = actual;
1576 	*count = actual;
1577 	return (0);
1578 }
1579 
1580 /*
1581  * By default, pci_alloc_msix() will assign the allocated IRQ
1582  * resources consecutively to the first N messages in the MSI-X table.
1583  * However, device drivers may want to use different layouts if they
1584  * either receive fewer messages than they asked for, or they wish to
1585  * populate the MSI-X table sparsely.  This method allows the driver
1586  * to specify what layout it wants.  It must be called after a
1587  * successful pci_alloc_msix() but before any of the associated
1588  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1589  *
1590  * The 'vectors' array contains 'count' message vectors.  The array
1591  * maps directly to the MSI-X table in that index 0 in the array
1592  * specifies the vector for the first message in the MSI-X table, etc.
1593  * The vector value in each array index can either be 0 to indicate
1594  * that no vector should be assigned to a message slot, or it can be a
1595  * number from 1 to N (where N is the count returned from a
1596  * succcessful call to pci_alloc_msix()) to indicate which message
1597  * vector (IRQ) to be used for the corresponding message.
1598  *
1599  * On successful return, each message with a non-zero vector will have
1600  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1601  * 1.  Additionally, if any of the IRQs allocated via the previous
1602  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1603  * will be freed back to the system automatically.
1604  *
1605  * For example, suppose a driver has a MSI-X table with 6 messages and
1606  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1607  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1608  * C.  After the call to pci_alloc_msix(), the device will be setup to
1609  * have an MSI-X table of ABC--- (where - means no vector assigned).
1610  * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1611  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1612  * be freed back to the system.  This device will also have valid
1613  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1614  *
1615  * In any case, the SYS_RES_IRQ rid X will always map to the message
1616  * at MSI-X table index X - 1 and will only be valid if a vector is
1617  * assigned to that table entry.
1618  */
1619 int
1620 pci_remap_msix_method(device_t dev, device_t child, int count,
1621     const u_int *vectors)
1622 {
1623 	struct pci_devinfo *dinfo = device_get_ivars(child);
1624 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1625 	struct resource_list_entry *rle;
1626 	int i, irq, j, *used;
1627 
1628 	/*
1629 	 * Have to have at least one message in the table but the
1630 	 * table can't be bigger than the actual MSI-X table in the
1631 	 * device.
1632 	 */
1633 	if (count == 0 || count > msix->msix_msgnum)
1634 		return (EINVAL);
1635 
1636 	/* Sanity check the vectors. */
1637 	for (i = 0; i < count; i++)
1638 		if (vectors[i] > msix->msix_alloc)
1639 			return (EINVAL);
1640 
1641 	/*
1642 	 * Make sure there aren't any holes in the vectors to be used.
1643 	 * It's a big pain to support it, and it doesn't really make
1644 	 * sense anyway.  Also, at least one vector must be used.
1645 	 */
1646 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1647 	    M_ZERO);
1648 	for (i = 0; i < count; i++)
1649 		if (vectors[i] != 0)
1650 			used[vectors[i] - 1] = 1;
1651 	for (i = 0; i < msix->msix_alloc - 1; i++)
1652 		if (used[i] == 0 && used[i + 1] == 1) {
1653 			free(used, M_DEVBUF);
1654 			return (EINVAL);
1655 		}
1656 	if (used[0] != 1) {
1657 		free(used, M_DEVBUF);
1658 		return (EINVAL);
1659 	}
1660 
1661 	/* Make sure none of the resources are allocated. */
1662 	for (i = 0; i < msix->msix_table_len; i++) {
1663 		if (msix->msix_table[i].mte_vector == 0)
1664 			continue;
1665 		if (msix->msix_table[i].mte_handlers > 0)
1666 			return (EBUSY);
1667 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1668 		KASSERT(rle != NULL, ("missing resource"));
1669 		if (rle->res != NULL)
1670 			return (EBUSY);
1671 	}
1672 
1673 	/* Free the existing resource list entries. */
1674 	for (i = 0; i < msix->msix_table_len; i++) {
1675 		if (msix->msix_table[i].mte_vector == 0)
1676 			continue;
1677 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1678 	}
1679 
1680 	/*
1681 	 * Build the new virtual table keeping track of which vectors are
1682 	 * used.
1683 	 */
1684 	free(msix->msix_table, M_DEVBUF);
1685 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1686 	    M_DEVBUF, M_WAITOK | M_ZERO);
1687 	for (i = 0; i < count; i++)
1688 		msix->msix_table[i].mte_vector = vectors[i];
1689 	msix->msix_table_len = count;
1690 
1691 	/* Free any unused IRQs and resize the vectors array if necessary. */
1692 	j = msix->msix_alloc - 1;
1693 	if (used[j] == 0) {
1694 		struct msix_vector *vec;
1695 
1696 		while (used[j] == 0) {
1697 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1698 			    msix->msix_vectors[j].mv_irq);
1699 			j--;
1700 		}
1701 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1702 		    M_WAITOK);
1703 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1704 		    (j + 1));
1705 		free(msix->msix_vectors, M_DEVBUF);
1706 		msix->msix_vectors = vec;
1707 		msix->msix_alloc = j + 1;
1708 	}
1709 	free(used, M_DEVBUF);
1710 
1711 	/* Map the IRQs onto the rids. */
1712 	for (i = 0; i < count; i++) {
1713 		if (vectors[i] == 0)
1714 			continue;
1715 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1716 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1717 		    irq, 1);
1718 	}
1719 
1720 	if (bootverbose) {
1721 		device_printf(child, "Remapped MSI-X IRQs as: ");
1722 		for (i = 0; i < count; i++) {
1723 			if (i != 0)
1724 				printf(", ");
1725 			if (vectors[i] == 0)
1726 				printf("---");
1727 			else
1728 				printf("%d",
1729 				    msix->msix_vectors[vectors[i]].mv_irq);
1730 		}
1731 		printf("\n");
1732 	}
1733 
1734 	return (0);
1735 }
1736 
1737 static int
1738 pci_release_msix(device_t dev, device_t child)
1739 {
1740 	struct pci_devinfo *dinfo = device_get_ivars(child);
1741 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1742 	struct resource_list_entry *rle;
1743 	int i;
1744 
1745 	/* Do we have any messages to release? */
1746 	if (msix->msix_alloc == 0)
1747 		return (ENODEV);
1748 
1749 	/* Make sure none of the resources are allocated. */
1750 	for (i = 0; i < msix->msix_table_len; i++) {
1751 		if (msix->msix_table[i].mte_vector == 0)
1752 			continue;
1753 		if (msix->msix_table[i].mte_handlers > 0)
1754 			return (EBUSY);
1755 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1756 		KASSERT(rle != NULL, ("missing resource"));
1757 		if (rle->res != NULL)
1758 			return (EBUSY);
1759 	}
1760 
1761 	/* Update control register to disable MSI-X. */
1762 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1763 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1764 	    msix->msix_ctrl, 2);
1765 
1766 	/* Free the resource list entries. */
1767 	for (i = 0; i < msix->msix_table_len; i++) {
1768 		if (msix->msix_table[i].mte_vector == 0)
1769 			continue;
1770 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1771 	}
1772 	free(msix->msix_table, M_DEVBUF);
1773 	msix->msix_table_len = 0;
1774 
1775 	/* Release the IRQs. */
1776 	for (i = 0; i < msix->msix_alloc; i++)
1777 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1778 		    msix->msix_vectors[i].mv_irq);
1779 	free(msix->msix_vectors, M_DEVBUF);
1780 	msix->msix_alloc = 0;
1781 	return (0);
1782 }
1783 
1784 /*
1785  * Return the max supported MSI-X messages this device supports.
1786  * Basically, assuming the MD code can alloc messages, this function
1787  * should return the maximum value that pci_alloc_msix() can return.
1788  * Thus, it is subject to the tunables, etc.
1789  */
1790 int
1791 pci_msix_count_method(device_t dev, device_t child)
1792 {
1793 	struct pci_devinfo *dinfo = device_get_ivars(child);
1794 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1795 
1796 	if (pci_do_msix && msix->msix_location != 0)
1797 		return (msix->msix_msgnum);
1798 	return (0);
1799 }
1800 
1801 /*
1802  * HyperTransport MSI mapping control
1803  */
1804 void
1805 pci_ht_map_msi(device_t dev, uint64_t addr)
1806 {
1807 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1808 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1809 
1810 	if (!ht->ht_msimap)
1811 		return;
1812 
1813 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1814 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1815 		/* Enable MSI -> HT mapping. */
1816 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1817 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1818 		    ht->ht_msictrl, 2);
1819 	}
1820 
1821 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1822 		/* Disable MSI -> HT mapping. */
1823 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1824 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1825 		    ht->ht_msictrl, 2);
1826 	}
1827 }
1828 
1829 int
1830 pci_get_max_read_req(device_t dev)
1831 {
1832 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1833 	int cap;
1834 	uint16_t val;
1835 
1836 	cap = dinfo->cfg.pcie.pcie_location;
1837 	if (cap == 0)
1838 		return (0);
1839 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1840 	val &= PCIEM_CTL_MAX_READ_REQUEST;
1841 	val >>= 12;
1842 	return (1 << (val + 7));
1843 }
1844 
1845 int
1846 pci_set_max_read_req(device_t dev, int size)
1847 {
1848 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1849 	int cap;
1850 	uint16_t val;
1851 
1852 	cap = dinfo->cfg.pcie.pcie_location;
1853 	if (cap == 0)
1854 		return (0);
1855 	if (size < 128)
1856 		size = 128;
1857 	if (size > 4096)
1858 		size = 4096;
1859 	size = (1 << (fls(size) - 1));
1860 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1861 	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
1862 	val |= (fls(size) - 8) << 12;
1863 	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
1864 	return (size);
1865 }
1866 
1867 /*
1868  * Support for MSI message signalled interrupts.
1869  */
1870 void
1871 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1872 {
1873 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1874 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1875 
1876 	/* Write data and address values. */
1877 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1878 	    address & 0xffffffff, 4);
1879 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1880 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1881 		    address >> 32, 4);
1882 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1883 		    data, 2);
1884 	} else
1885 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1886 		    2);
1887 
1888 	/* Enable MSI in the control register. */
1889 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1890 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1891 	    2);
1892 
1893 	/* Enable MSI -> HT mapping. */
1894 	pci_ht_map_msi(dev, address);
1895 }
1896 
1897 void
1898 pci_disable_msi(device_t dev)
1899 {
1900 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1901 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1902 
1903 	/* Disable MSI -> HT mapping. */
1904 	pci_ht_map_msi(dev, 0);
1905 
1906 	/* Disable MSI in the control register. */
1907 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1908 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1909 	    2);
1910 }
1911 
1912 /*
1913  * Restore MSI registers during resume.  If MSI is enabled then
1914  * restore the data and address registers in addition to the control
1915  * register.
1916  */
1917 static void
1918 pci_resume_msi(device_t dev)
1919 {
1920 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1921 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1922 	uint64_t address;
1923 	uint16_t data;
1924 
1925 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1926 		address = msi->msi_addr;
1927 		data = msi->msi_data;
1928 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1929 		    address & 0xffffffff, 4);
1930 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1931 			pci_write_config(dev, msi->msi_location +
1932 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1933 			pci_write_config(dev, msi->msi_location +
1934 			    PCIR_MSI_DATA_64BIT, data, 2);
1935 		} else
1936 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1937 			    data, 2);
1938 	}
1939 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1940 	    2);
1941 }
1942 
1943 static int
1944 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1945 {
1946 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1947 	pcicfgregs *cfg = &dinfo->cfg;
1948 	struct resource_list_entry *rle;
1949 	struct msix_table_entry *mte;
1950 	struct msix_vector *mv;
1951 	uint64_t addr;
1952 	uint32_t data;
1953 	int error, i, j;
1954 
1955 	/*
1956 	 * Handle MSI first.  We try to find this IRQ among our list
1957 	 * of MSI IRQs.  If we find it, we request updated address and
1958 	 * data registers and apply the results.
1959 	 */
1960 	if (cfg->msi.msi_alloc > 0) {
1961 
1962 		/* If we don't have any active handlers, nothing to do. */
1963 		if (cfg->msi.msi_handlers == 0)
1964 			return (0);
1965 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1966 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1967 			    i + 1);
1968 			if (rle->start == irq) {
1969 				error = PCIB_MAP_MSI(device_get_parent(bus),
1970 				    dev, irq, &addr, &data);
1971 				if (error)
1972 					return (error);
1973 				pci_disable_msi(dev);
1974 				dinfo->cfg.msi.msi_addr = addr;
1975 				dinfo->cfg.msi.msi_data = data;
1976 				pci_enable_msi(dev, addr, data);
1977 				return (0);
1978 			}
1979 		}
1980 		return (ENOENT);
1981 	}
1982 
1983 	/*
1984 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1985 	 * we request the updated mapping info.  If that works, we go
1986 	 * through all the slots that use this IRQ and update them.
1987 	 */
1988 	if (cfg->msix.msix_alloc > 0) {
1989 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1990 			mv = &cfg->msix.msix_vectors[i];
1991 			if (mv->mv_irq == irq) {
1992 				error = PCIB_MAP_MSI(device_get_parent(bus),
1993 				    dev, irq, &addr, &data);
1994 				if (error)
1995 					return (error);
1996 				mv->mv_address = addr;
1997 				mv->mv_data = data;
1998 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1999 					mte = &cfg->msix.msix_table[j];
2000 					if (mte->mte_vector != i + 1)
2001 						continue;
2002 					if (mte->mte_handlers == 0)
2003 						continue;
2004 					pci_mask_msix(dev, j);
2005 					pci_enable_msix(dev, j, addr, data);
2006 					pci_unmask_msix(dev, j);
2007 				}
2008 			}
2009 		}
2010 		return (ENOENT);
2011 	}
2012 
2013 	return (ENOENT);
2014 }
2015 
2016 /*
2017  * Returns true if the specified device is blacklisted because MSI
2018  * doesn't work.
2019  */
2020 int
2021 pci_msi_device_blacklisted(device_t dev)
2022 {
2023 
2024 	if (!pci_honor_msi_blacklist)
2025 		return (0);
2026 
2027 	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2028 }
2029 
2030 /*
2031  * Determine if MSI is blacklisted globally on this system.  Currently,
2032  * we just check for blacklisted chipsets as represented by the
2033  * host-PCI bridge at device 0:0:0.  In the future, it may become
2034  * necessary to check other system attributes, such as the kenv values
2035  * that give the motherboard manufacturer and model number.
2036  */
2037 static int
2038 pci_msi_blacklisted(void)
2039 {
2040 	device_t dev;
2041 
2042 	if (!pci_honor_msi_blacklist)
2043 		return (0);
2044 
2045 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2046 	if (!(pcie_chipset || pcix_chipset)) {
2047 		if (vm_guest != VM_GUEST_NO) {
2048 			/*
2049 			 * Whitelist older chipsets in virtual
2050 			 * machines known to support MSI.
2051 			 */
2052 			dev = pci_find_bsf(0, 0, 0);
2053 			if (dev != NULL)
2054 				return (!pci_has_quirk(pci_get_devid(dev),
2055 					PCI_QUIRK_ENABLE_MSI_VM));
2056 		}
2057 		return (1);
2058 	}
2059 
2060 	dev = pci_find_bsf(0, 0, 0);
2061 	if (dev != NULL)
2062 		return (pci_msi_device_blacklisted(dev));
2063 	return (0);
2064 }
2065 
2066 /*
2067  * Returns true if the specified device is blacklisted because MSI-X
2068  * doesn't work.  Note that this assumes that if MSI doesn't work,
2069  * MSI-X doesn't either.
2070  */
2071 int
2072 pci_msix_device_blacklisted(device_t dev)
2073 {
2074 
2075 	if (!pci_honor_msi_blacklist)
2076 		return (0);
2077 
2078 	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2079 		return (1);
2080 
2081 	return (pci_msi_device_blacklisted(dev));
2082 }
2083 
2084 /*
2085  * Determine if MSI-X is blacklisted globally on this system.  If MSI
2086  * is blacklisted, assume that MSI-X is as well.  Check for additional
2087  * chipsets where MSI works but MSI-X does not.
2088  */
2089 static int
2090 pci_msix_blacklisted(void)
2091 {
2092 	device_t dev;
2093 
2094 	if (!pci_honor_msi_blacklist)
2095 		return (0);
2096 
2097 	dev = pci_find_bsf(0, 0, 0);
2098 	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2099 	    PCI_QUIRK_DISABLE_MSIX))
2100 		return (1);
2101 
2102 	return (pci_msi_blacklisted());
2103 }
2104 
2105 /*
2106  * Attempt to allocate *count MSI messages.  The actual number allocated is
2107  * returned in *count.  After this function returns, each message will be
2108  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2109  */
2110 int
2111 pci_alloc_msi_method(device_t dev, device_t child, int *count)
2112 {
2113 	struct pci_devinfo *dinfo = device_get_ivars(child);
2114 	pcicfgregs *cfg = &dinfo->cfg;
2115 	struct resource_list_entry *rle;
2116 	int actual, error, i, irqs[32];
2117 	uint16_t ctrl;
2118 
2119 	/* Don't let count == 0 get us into trouble. */
2120 	if (*count == 0)
2121 		return (EINVAL);
2122 
2123 	/* If rid 0 is allocated, then fail. */
2124 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2125 	if (rle != NULL && rle->res != NULL)
2126 		return (ENXIO);
2127 
2128 	/* Already have allocated messages? */
2129 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2130 		return (ENXIO);
2131 
2132 	/* If MSI is blacklisted for this system, fail. */
2133 	if (pci_msi_blacklisted())
2134 		return (ENXIO);
2135 
2136 	/* MSI capability present? */
2137 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2138 		return (ENODEV);
2139 
2140 	if (bootverbose)
2141 		device_printf(child,
2142 		    "attempting to allocate %d MSI vectors (%d supported)\n",
2143 		    *count, cfg->msi.msi_msgnum);
2144 
2145 	/* Don't ask for more than the device supports. */
2146 	actual = min(*count, cfg->msi.msi_msgnum);
2147 
2148 	/* Don't ask for more than 32 messages. */
2149 	actual = min(actual, 32);
2150 
2151 	/* MSI requires power of 2 number of messages. */
2152 	if (!powerof2(actual))
2153 		return (EINVAL);
2154 
2155 	for (;;) {
2156 		/* Try to allocate N messages. */
2157 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2158 		    actual, irqs);
2159 		if (error == 0)
2160 			break;
2161 		if (actual == 1)
2162 			return (error);
2163 
2164 		/* Try N / 2. */
2165 		actual >>= 1;
2166 	}
2167 
2168 	/*
2169 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2170 	 * resources in the irqs[] array, so add new resources
2171 	 * starting at rid 1.
2172 	 */
2173 	for (i = 0; i < actual; i++)
2174 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2175 		    irqs[i], irqs[i], 1);
2176 
2177 	if (bootverbose) {
2178 		if (actual == 1)
2179 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2180 		else {
2181 			int run;
2182 
2183 			/*
2184 			 * Be fancy and try to print contiguous runs
2185 			 * of IRQ values as ranges.  'run' is true if
2186 			 * we are in a range.
2187 			 */
2188 			device_printf(child, "using IRQs %d", irqs[0]);
2189 			run = 0;
2190 			for (i = 1; i < actual; i++) {
2191 
2192 				/* Still in a run? */
2193 				if (irqs[i] == irqs[i - 1] + 1) {
2194 					run = 1;
2195 					continue;
2196 				}
2197 
2198 				/* Finish previous range. */
2199 				if (run) {
2200 					printf("-%d", irqs[i - 1]);
2201 					run = 0;
2202 				}
2203 
2204 				/* Start new range. */
2205 				printf(",%d", irqs[i]);
2206 			}
2207 
2208 			/* Unfinished range? */
2209 			if (run)
2210 				printf("-%d", irqs[actual - 1]);
2211 			printf(" for MSI\n");
2212 		}
2213 	}
2214 
2215 	/* Update control register with actual count. */
2216 	ctrl = cfg->msi.msi_ctrl;
2217 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2218 	ctrl |= (ffs(actual) - 1) << 4;
2219 	cfg->msi.msi_ctrl = ctrl;
2220 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2221 
2222 	/* Update counts of alloc'd messages. */
2223 	cfg->msi.msi_alloc = actual;
2224 	cfg->msi.msi_handlers = 0;
2225 	*count = actual;
2226 	return (0);
2227 }
2228 
2229 /* Release the MSI messages associated with this device. */
2230 int
2231 pci_release_msi_method(device_t dev, device_t child)
2232 {
2233 	struct pci_devinfo *dinfo = device_get_ivars(child);
2234 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2235 	struct resource_list_entry *rle;
2236 	int error, i, irqs[32];
2237 
2238 	/* Try MSI-X first. */
2239 	error = pci_release_msix(dev, child);
2240 	if (error != ENODEV)
2241 		return (error);
2242 
2243 	/* Do we have any messages to release? */
2244 	if (msi->msi_alloc == 0)
2245 		return (ENODEV);
2246 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2247 
2248 	/* Make sure none of the resources are allocated. */
2249 	if (msi->msi_handlers > 0)
2250 		return (EBUSY);
2251 	for (i = 0; i < msi->msi_alloc; i++) {
2252 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2253 		KASSERT(rle != NULL, ("missing MSI resource"));
2254 		if (rle->res != NULL)
2255 			return (EBUSY);
2256 		irqs[i] = rle->start;
2257 	}
2258 
2259 	/* Update control register with 0 count. */
2260 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2261 	    ("%s: MSI still enabled", __func__));
2262 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2263 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2264 	    msi->msi_ctrl, 2);
2265 
2266 	/* Release the messages. */
2267 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2268 	for (i = 0; i < msi->msi_alloc; i++)
2269 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2270 
2271 	/* Update alloc count. */
2272 	msi->msi_alloc = 0;
2273 	msi->msi_addr = 0;
2274 	msi->msi_data = 0;
2275 	return (0);
2276 }
2277 
2278 /*
2279  * Return the max supported MSI messages this device supports.
2280  * Basically, assuming the MD code can alloc messages, this function
2281  * should return the maximum value that pci_alloc_msi() can return.
2282  * Thus, it is subject to the tunables, etc.
2283  */
2284 int
2285 pci_msi_count_method(device_t dev, device_t child)
2286 {
2287 	struct pci_devinfo *dinfo = device_get_ivars(child);
2288 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2289 
2290 	if (pci_do_msi && msi->msi_location != 0)
2291 		return (msi->msi_msgnum);
2292 	return (0);
2293 }
2294 
2295 /* free pcicfgregs structure and all depending data structures */
2296 
2297 int
2298 pci_freecfg(struct pci_devinfo *dinfo)
2299 {
2300 	struct devlist *devlist_head;
2301 	struct pci_map *pm, *next;
2302 	int i;
2303 
2304 	devlist_head = &pci_devq;
2305 
2306 	if (dinfo->cfg.vpd.vpd_reg) {
2307 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2308 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2309 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2310 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2311 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2312 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2313 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2314 	}
2315 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2316 		free(pm, M_DEVBUF);
2317 	}
2318 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2319 	free(dinfo, M_DEVBUF);
2320 
2321 	/* increment the generation count */
2322 	pci_generation++;
2323 
2324 	/* we're losing one device */
2325 	pci_numdevs--;
2326 	return (0);
2327 }
2328 
2329 /*
2330  * PCI power manangement
2331  */
2332 int
2333 pci_set_powerstate_method(device_t dev, device_t child, int state)
2334 {
2335 	struct pci_devinfo *dinfo = device_get_ivars(child);
2336 	pcicfgregs *cfg = &dinfo->cfg;
2337 	uint16_t status;
2338 	int result, oldstate, highest, delay;
2339 
2340 	if (cfg->pp.pp_cap == 0)
2341 		return (EOPNOTSUPP);
2342 
2343 	/*
2344 	 * Optimize a no state change request away.  While it would be OK to
2345 	 * write to the hardware in theory, some devices have shown odd
2346 	 * behavior when going from D3 -> D3.
2347 	 */
2348 	oldstate = pci_get_powerstate(child);
2349 	if (oldstate == state)
2350 		return (0);
2351 
2352 	/*
2353 	 * The PCI power management specification states that after a state
2354 	 * transition between PCI power states, system software must
2355 	 * guarantee a minimal delay before the function accesses the device.
2356 	 * Compute the worst case delay that we need to guarantee before we
2357 	 * access the device.  Many devices will be responsive much more
2358 	 * quickly than this delay, but there are some that don't respond
2359 	 * instantly to state changes.  Transitions to/from D3 state require
2360 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2361 	 * is done below with DELAY rather than a sleeper function because
2362 	 * this function can be called from contexts where we cannot sleep.
2363 	 */
2364 	highest = (oldstate > state) ? oldstate : state;
2365 	if (highest == PCI_POWERSTATE_D3)
2366 	    delay = 10000;
2367 	else if (highest == PCI_POWERSTATE_D2)
2368 	    delay = 200;
2369 	else
2370 	    delay = 0;
2371 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2372 	    & ~PCIM_PSTAT_DMASK;
2373 	result = 0;
2374 	switch (state) {
2375 	case PCI_POWERSTATE_D0:
2376 		status |= PCIM_PSTAT_D0;
2377 		break;
2378 	case PCI_POWERSTATE_D1:
2379 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2380 			return (EOPNOTSUPP);
2381 		status |= PCIM_PSTAT_D1;
2382 		break;
2383 	case PCI_POWERSTATE_D2:
2384 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2385 			return (EOPNOTSUPP);
2386 		status |= PCIM_PSTAT_D2;
2387 		break;
2388 	case PCI_POWERSTATE_D3:
2389 		status |= PCIM_PSTAT_D3;
2390 		break;
2391 	default:
2392 		return (EINVAL);
2393 	}
2394 
2395 	if (bootverbose)
2396 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2397 		    state);
2398 
2399 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2400 	if (delay)
2401 		DELAY(delay);
2402 	return (0);
2403 }
2404 
2405 int
2406 pci_get_powerstate_method(device_t dev, device_t child)
2407 {
2408 	struct pci_devinfo *dinfo = device_get_ivars(child);
2409 	pcicfgregs *cfg = &dinfo->cfg;
2410 	uint16_t status;
2411 	int result;
2412 
2413 	if (cfg->pp.pp_cap != 0) {
2414 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2415 		switch (status & PCIM_PSTAT_DMASK) {
2416 		case PCIM_PSTAT_D0:
2417 			result = PCI_POWERSTATE_D0;
2418 			break;
2419 		case PCIM_PSTAT_D1:
2420 			result = PCI_POWERSTATE_D1;
2421 			break;
2422 		case PCIM_PSTAT_D2:
2423 			result = PCI_POWERSTATE_D2;
2424 			break;
2425 		case PCIM_PSTAT_D3:
2426 			result = PCI_POWERSTATE_D3;
2427 			break;
2428 		default:
2429 			result = PCI_POWERSTATE_UNKNOWN;
2430 			break;
2431 		}
2432 	} else {
2433 		/* No support, device is always at D0 */
2434 		result = PCI_POWERSTATE_D0;
2435 	}
2436 	return (result);
2437 }
2438 
2439 /*
2440  * Some convenience functions for PCI device drivers.
2441  */
2442 
2443 static __inline void
2444 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2445 {
2446 	uint16_t	command;
2447 
2448 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2449 	command |= bit;
2450 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2451 }
2452 
2453 static __inline void
2454 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2455 {
2456 	uint16_t	command;
2457 
2458 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2459 	command &= ~bit;
2460 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2461 }
2462 
2463 int
2464 pci_enable_busmaster_method(device_t dev, device_t child)
2465 {
2466 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2467 	return (0);
2468 }
2469 
2470 int
2471 pci_disable_busmaster_method(device_t dev, device_t child)
2472 {
2473 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2474 	return (0);
2475 }
2476 
2477 int
2478 pci_enable_io_method(device_t dev, device_t child, int space)
2479 {
2480 	uint16_t bit;
2481 
2482 	switch(space) {
2483 	case SYS_RES_IOPORT:
2484 		bit = PCIM_CMD_PORTEN;
2485 		break;
2486 	case SYS_RES_MEMORY:
2487 		bit = PCIM_CMD_MEMEN;
2488 		break;
2489 	default:
2490 		return (EINVAL);
2491 	}
2492 	pci_set_command_bit(dev, child, bit);
2493 	return (0);
2494 }
2495 
2496 int
2497 pci_disable_io_method(device_t dev, device_t child, int space)
2498 {
2499 	uint16_t bit;
2500 
2501 	switch(space) {
2502 	case SYS_RES_IOPORT:
2503 		bit = PCIM_CMD_PORTEN;
2504 		break;
2505 	case SYS_RES_MEMORY:
2506 		bit = PCIM_CMD_MEMEN;
2507 		break;
2508 	default:
2509 		return (EINVAL);
2510 	}
2511 	pci_clear_command_bit(dev, child, bit);
2512 	return (0);
2513 }
2514 
2515 /*
2516  * New style pci driver.  Parent device is either a pci-host-bridge or a
2517  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2518  */
2519 
2520 void
2521 pci_print_verbose(struct pci_devinfo *dinfo)
2522 {
2523 
2524 	if (bootverbose) {
2525 		pcicfgregs *cfg = &dinfo->cfg;
2526 
2527 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2528 		    cfg->vendor, cfg->device, cfg->revid);
2529 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2530 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2531 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2532 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2533 		    cfg->mfdev);
2534 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2535 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2536 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2537 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2538 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2539 		if (cfg->intpin > 0)
2540 			printf("\tintpin=%c, irq=%d\n",
2541 			    cfg->intpin +'a' -1, cfg->intline);
2542 		if (cfg->pp.pp_cap) {
2543 			uint16_t status;
2544 
2545 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2546 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2547 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2548 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2549 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2550 			    status & PCIM_PSTAT_DMASK);
2551 		}
2552 		if (cfg->msi.msi_location) {
2553 			int ctrl;
2554 
2555 			ctrl = cfg->msi.msi_ctrl;
2556 			printf("\tMSI supports %d message%s%s%s\n",
2557 			    cfg->msi.msi_msgnum,
2558 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2559 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2560 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2561 		}
2562 		if (cfg->msix.msix_location) {
2563 			printf("\tMSI-X supports %d message%s ",
2564 			    cfg->msix.msix_msgnum,
2565 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2566 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2567 				printf("in map 0x%x\n",
2568 				    cfg->msix.msix_table_bar);
2569 			else
2570 				printf("in maps 0x%x and 0x%x\n",
2571 				    cfg->msix.msix_table_bar,
2572 				    cfg->msix.msix_pba_bar);
2573 		}
2574 	}
2575 }
2576 
2577 static int
2578 pci_porten(device_t dev)
2579 {
2580 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2581 }
2582 
2583 static int
2584 pci_memen(device_t dev)
2585 {
2586 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2587 }
2588 
2589 static void
2590 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2591 {
2592 	struct pci_devinfo *dinfo;
2593 	pci_addr_t map, testval;
2594 	int ln2range;
2595 	uint16_t cmd;
2596 
2597 	/*
2598 	 * The device ROM BAR is special.  It is always a 32-bit
2599 	 * memory BAR.  Bit 0 is special and should not be set when
2600 	 * sizing the BAR.
2601 	 */
2602 	dinfo = device_get_ivars(dev);
2603 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2604 		map = pci_read_config(dev, reg, 4);
2605 		pci_write_config(dev, reg, 0xfffffffe, 4);
2606 		testval = pci_read_config(dev, reg, 4);
2607 		pci_write_config(dev, reg, map, 4);
2608 		*mapp = map;
2609 		*testvalp = testval;
2610 		return;
2611 	}
2612 
2613 	map = pci_read_config(dev, reg, 4);
2614 	ln2range = pci_maprange(map);
2615 	if (ln2range == 64)
2616 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2617 
2618 	/*
2619 	 * Disable decoding via the command register before
2620 	 * determining the BAR's length since we will be placing it in
2621 	 * a weird state.
2622 	 */
2623 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2624 	pci_write_config(dev, PCIR_COMMAND,
2625 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2626 
2627 	/*
2628 	 * Determine the BAR's length by writing all 1's.  The bottom
2629 	 * log_2(size) bits of the BAR will stick as 0 when we read
2630 	 * the value back.
2631 	 */
2632 	pci_write_config(dev, reg, 0xffffffff, 4);
2633 	testval = pci_read_config(dev, reg, 4);
2634 	if (ln2range == 64) {
2635 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2636 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2637 	}
2638 
2639 	/*
2640 	 * Restore the original value of the BAR.  We may have reprogrammed
2641 	 * the BAR of the low-level console device and when booting verbose,
2642 	 * we need the console device addressable.
2643 	 */
2644 	pci_write_config(dev, reg, map, 4);
2645 	if (ln2range == 64)
2646 		pci_write_config(dev, reg + 4, map >> 32, 4);
2647 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2648 
2649 	*mapp = map;
2650 	*testvalp = testval;
2651 }
2652 
2653 static void
2654 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2655 {
2656 	struct pci_devinfo *dinfo;
2657 	int ln2range;
2658 
2659 	/* The device ROM BAR is always a 32-bit memory BAR. */
2660 	dinfo = device_get_ivars(dev);
2661 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2662 		ln2range = 32;
2663 	else
2664 		ln2range = pci_maprange(pm->pm_value);
2665 	pci_write_config(dev, pm->pm_reg, base, 4);
2666 	if (ln2range == 64)
2667 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2668 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2669 	if (ln2range == 64)
2670 		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2671 		    pm->pm_reg + 4, 4) << 32;
2672 }
2673 
2674 struct pci_map *
2675 pci_find_bar(device_t dev, int reg)
2676 {
2677 	struct pci_devinfo *dinfo;
2678 	struct pci_map *pm;
2679 
2680 	dinfo = device_get_ivars(dev);
2681 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2682 		if (pm->pm_reg == reg)
2683 			return (pm);
2684 	}
2685 	return (NULL);
2686 }
2687 
2688 int
2689 pci_bar_enabled(device_t dev, struct pci_map *pm)
2690 {
2691 	struct pci_devinfo *dinfo;
2692 	uint16_t cmd;
2693 
2694 	dinfo = device_get_ivars(dev);
2695 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2696 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2697 		return (0);
2698 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2699 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2700 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2701 	else
2702 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2703 }
2704 
2705 static struct pci_map *
2706 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2707 {
2708 	struct pci_devinfo *dinfo;
2709 	struct pci_map *pm, *prev;
2710 
2711 	dinfo = device_get_ivars(dev);
2712 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2713 	pm->pm_reg = reg;
2714 	pm->pm_value = value;
2715 	pm->pm_size = size;
2716 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2717 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2718 		    reg));
2719 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2720 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2721 			break;
2722 	}
2723 	if (prev != NULL)
2724 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2725 	else
2726 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2727 	return (pm);
2728 }
2729 
2730 static void
2731 pci_restore_bars(device_t dev)
2732 {
2733 	struct pci_devinfo *dinfo;
2734 	struct pci_map *pm;
2735 	int ln2range;
2736 
2737 	dinfo = device_get_ivars(dev);
2738 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2739 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2740 			ln2range = 32;
2741 		else
2742 			ln2range = pci_maprange(pm->pm_value);
2743 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2744 		if (ln2range == 64)
2745 			pci_write_config(dev, pm->pm_reg + 4,
2746 			    pm->pm_value >> 32, 4);
2747 	}
2748 }
2749 
2750 /*
2751  * Add a resource based on a pci map register. Return 1 if the map
2752  * register is a 32bit map register or 2 if it is a 64bit register.
2753  */
2754 static int
2755 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2756     int force, int prefetch)
2757 {
2758 	struct pci_map *pm;
2759 	pci_addr_t base, map, testval;
2760 	pci_addr_t start, end, count;
2761 	int barlen, basezero, flags, maprange, mapsize, type;
2762 	uint16_t cmd;
2763 	struct resource *res;
2764 
2765 	/*
2766 	 * The BAR may already exist if the device is a CardBus card
2767 	 * whose CIS is stored in this BAR.
2768 	 */
2769 	pm = pci_find_bar(dev, reg);
2770 	if (pm != NULL) {
2771 		maprange = pci_maprange(pm->pm_value);
2772 		barlen = maprange == 64 ? 2 : 1;
2773 		return (barlen);
2774 	}
2775 
2776 	pci_read_bar(dev, reg, &map, &testval);
2777 	if (PCI_BAR_MEM(map)) {
2778 		type = SYS_RES_MEMORY;
2779 		if (map & PCIM_BAR_MEM_PREFETCH)
2780 			prefetch = 1;
2781 	} else
2782 		type = SYS_RES_IOPORT;
2783 	mapsize = pci_mapsize(testval);
2784 	base = pci_mapbase(map);
2785 #ifdef __PCI_BAR_ZERO_VALID
2786 	basezero = 0;
2787 #else
2788 	basezero = base == 0;
2789 #endif
2790 	maprange = pci_maprange(map);
2791 	barlen = maprange == 64 ? 2 : 1;
2792 
2793 	/*
2794 	 * For I/O registers, if bottom bit is set, and the next bit up
2795 	 * isn't clear, we know we have a BAR that doesn't conform to the
2796 	 * spec, so ignore it.  Also, sanity check the size of the data
2797 	 * areas to the type of memory involved.  Memory must be at least
2798 	 * 16 bytes in size, while I/O ranges must be at least 4.
2799 	 */
2800 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2801 		return (barlen);
2802 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2803 	    (type == SYS_RES_IOPORT && mapsize < 2))
2804 		return (barlen);
2805 
2806 	/* Save a record of this BAR. */
2807 	pm = pci_add_bar(dev, reg, map, mapsize);
2808 	if (bootverbose) {
2809 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2810 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2811 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2812 			printf(", port disabled\n");
2813 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2814 			printf(", memory disabled\n");
2815 		else
2816 			printf(", enabled\n");
2817 	}
2818 
2819 	/*
2820 	 * If base is 0, then we have problems if this architecture does
2821 	 * not allow that.  It is best to ignore such entries for the
2822 	 * moment.  These will be allocated later if the driver specifically
2823 	 * requests them.  However, some removable busses look better when
2824 	 * all resources are allocated, so allow '0' to be overriden.
2825 	 *
2826 	 * Similarly treat maps whose values is the same as the test value
2827 	 * read back.  These maps have had all f's written to them by the
2828 	 * BIOS in an attempt to disable the resources.
2829 	 */
2830 	if (!force && (basezero || map == testval))
2831 		return (barlen);
2832 	if ((u_long)base != base) {
2833 		device_printf(bus,
2834 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2835 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2836 		    pci_get_function(dev), reg);
2837 		return (barlen);
2838 	}
2839 
2840 	/*
2841 	 * This code theoretically does the right thing, but has
2842 	 * undesirable side effects in some cases where peripherals
2843 	 * respond oddly to having these bits enabled.  Let the user
2844 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2845 	 * default).
2846 	 */
2847 	if (pci_enable_io_modes) {
2848 		/* Turn on resources that have been left off by a lazy BIOS */
2849 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2850 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2851 			cmd |= PCIM_CMD_PORTEN;
2852 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2853 		}
2854 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2855 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2856 			cmd |= PCIM_CMD_MEMEN;
2857 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2858 		}
2859 	} else {
2860 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2861 			return (barlen);
2862 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2863 			return (barlen);
2864 	}
2865 
2866 	count = (pci_addr_t)1 << mapsize;
2867 	flags = RF_ALIGNMENT_LOG2(mapsize);
2868 	if (prefetch)
2869 		flags |= RF_PREFETCHABLE;
2870 	if (basezero || base == pci_mapbase(testval) || pci_clear_bars) {
2871 		start = 0;	/* Let the parent decide. */
2872 		end = ~0ul;
2873 	} else {
2874 		start = base;
2875 		end = base + count - 1;
2876 	}
2877 	resource_list_add(rl, type, reg, start, end, count);
2878 
2879 	/*
2880 	 * Try to allocate the resource for this BAR from our parent
2881 	 * so that this resource range is already reserved.  The
2882 	 * driver for this device will later inherit this resource in
2883 	 * pci_alloc_resource().
2884 	 */
2885 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2886 	    flags);
2887 	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0ul)) {
2888 		/*
2889 		 * If the allocation fails, try to allocate a resource for
2890 		 * this BAR using any available range.  The firmware felt
2891 		 * it was important enough to assign a resource, so don't
2892 		 * disable decoding if we can help it.
2893 		 */
2894 		resource_list_delete(rl, type, reg);
2895 		resource_list_add(rl, type, reg, 0, ~0ul, count);
2896 		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0ul,
2897 		    count, flags);
2898 	}
2899 	if (res == NULL) {
2900 		/*
2901 		 * If the allocation fails, delete the resource list entry
2902 		 * and disable decoding for this device.
2903 		 *
2904 		 * If the driver requests this resource in the future,
2905 		 * pci_reserve_map() will try to allocate a fresh
2906 		 * resource range.
2907 		 */
2908 		resource_list_delete(rl, type, reg);
2909 		pci_disable_io(dev, type);
2910 		if (bootverbose)
2911 			device_printf(bus,
2912 			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
2913 			    pci_get_domain(dev), pci_get_bus(dev),
2914 			    pci_get_slot(dev), pci_get_function(dev), reg);
2915 	} else {
2916 		start = rman_get_start(res);
2917 		pci_write_bar(dev, pm, start);
2918 	}
2919 	return (barlen);
2920 }
2921 
2922 /*
2923  * For ATA devices we need to decide early what addressing mode to use.
2924  * Legacy demands that the primary and secondary ATA ports sits on the
2925  * same addresses that old ISA hardware did. This dictates that we use
2926  * those addresses and ignore the BAR's if we cannot set PCI native
2927  * addressing mode.
2928  */
2929 static void
2930 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2931     uint32_t prefetchmask)
2932 {
2933 	struct resource *r;
2934 	int rid, type, progif;
2935 #if 0
2936 	/* if this device supports PCI native addressing use it */
2937 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2938 	if ((progif & 0x8a) == 0x8a) {
2939 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2940 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2941 			printf("Trying ATA native PCI addressing mode\n");
2942 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2943 		}
2944 	}
2945 #endif
2946 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2947 	type = SYS_RES_IOPORT;
2948 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2949 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2950 		    prefetchmask & (1 << 0));
2951 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2952 		    prefetchmask & (1 << 1));
2953 	} else {
2954 		rid = PCIR_BAR(0);
2955 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2956 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2957 		    0x1f7, 8, 0);
2958 		rid = PCIR_BAR(1);
2959 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2960 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2961 		    0x3f6, 1, 0);
2962 	}
2963 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2964 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2965 		    prefetchmask & (1 << 2));
2966 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2967 		    prefetchmask & (1 << 3));
2968 	} else {
2969 		rid = PCIR_BAR(2);
2970 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2971 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2972 		    0x177, 8, 0);
2973 		rid = PCIR_BAR(3);
2974 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2975 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2976 		    0x376, 1, 0);
2977 	}
2978 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2979 	    prefetchmask & (1 << 4));
2980 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2981 	    prefetchmask & (1 << 5));
2982 }
2983 
2984 static void
2985 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2986 {
2987 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2988 	pcicfgregs *cfg = &dinfo->cfg;
2989 	char tunable_name[64];
2990 	int irq;
2991 
2992 	/* Has to have an intpin to have an interrupt. */
2993 	if (cfg->intpin == 0)
2994 		return;
2995 
2996 	/* Let the user override the IRQ with a tunable. */
2997 	irq = PCI_INVALID_IRQ;
2998 	snprintf(tunable_name, sizeof(tunable_name),
2999 	    "hw.pci%d.%d.%d.INT%c.irq",
3000 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
3001 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
3002 		irq = PCI_INVALID_IRQ;
3003 
3004 	/*
3005 	 * If we didn't get an IRQ via the tunable, then we either use the
3006 	 * IRQ value in the intline register or we ask the bus to route an
3007 	 * interrupt for us.  If force_route is true, then we only use the
3008 	 * value in the intline register if the bus was unable to assign an
3009 	 * IRQ.
3010 	 */
3011 	if (!PCI_INTERRUPT_VALID(irq)) {
3012 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
3013 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
3014 		if (!PCI_INTERRUPT_VALID(irq))
3015 			irq = cfg->intline;
3016 	}
3017 
3018 	/* If after all that we don't have an IRQ, just bail. */
3019 	if (!PCI_INTERRUPT_VALID(irq))
3020 		return;
3021 
3022 	/* Update the config register if it changed. */
3023 	if (irq != cfg->intline) {
3024 		cfg->intline = irq;
3025 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
3026 	}
3027 
3028 	/* Add this IRQ as rid 0 interrupt resource. */
3029 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3030 }
3031 
3032 /* Perform early OHCI takeover from SMM. */
3033 static void
3034 ohci_early_takeover(device_t self)
3035 {
3036 	struct resource *res;
3037 	uint32_t ctl;
3038 	int rid;
3039 	int i;
3040 
3041 	rid = PCIR_BAR(0);
3042 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3043 	if (res == NULL)
3044 		return;
3045 
3046 	ctl = bus_read_4(res, OHCI_CONTROL);
3047 	if (ctl & OHCI_IR) {
3048 		if (bootverbose)
3049 			printf("ohci early: "
3050 			    "SMM active, request owner change\n");
3051 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3052 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3053 			DELAY(1000);
3054 			ctl = bus_read_4(res, OHCI_CONTROL);
3055 		}
3056 		if (ctl & OHCI_IR) {
3057 			if (bootverbose)
3058 				printf("ohci early: "
3059 				    "SMM does not respond, resetting\n");
3060 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3061 		}
3062 		/* Disable interrupts */
3063 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3064 	}
3065 
3066 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3067 }
3068 
3069 /* Perform early UHCI takeover from SMM. */
3070 static void
3071 uhci_early_takeover(device_t self)
3072 {
3073 	struct resource *res;
3074 	int rid;
3075 
3076 	/*
3077 	 * Set the PIRQD enable bit and switch off all the others. We don't
3078 	 * want legacy support to interfere with us XXX Does this also mean
3079 	 * that the BIOS won't touch the keyboard anymore if it is connected
3080 	 * to the ports of the root hub?
3081 	 */
3082 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3083 
3084 	/* Disable interrupts */
3085 	rid = PCI_UHCI_BASE_REG;
3086 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3087 	if (res != NULL) {
3088 		bus_write_2(res, UHCI_INTR, 0);
3089 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3090 	}
3091 }
3092 
3093 /* Perform early EHCI takeover from SMM. */
3094 static void
3095 ehci_early_takeover(device_t self)
3096 {
3097 	struct resource *res;
3098 	uint32_t cparams;
3099 	uint32_t eec;
3100 	uint8_t eecp;
3101 	uint8_t bios_sem;
3102 	uint8_t offs;
3103 	int rid;
3104 	int i;
3105 
3106 	rid = PCIR_BAR(0);
3107 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3108 	if (res == NULL)
3109 		return;
3110 
3111 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3112 
3113 	/* Synchronise with the BIOS if it owns the controller. */
3114 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3115 	    eecp = EHCI_EECP_NEXT(eec)) {
3116 		eec = pci_read_config(self, eecp, 4);
3117 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3118 			continue;
3119 		}
3120 		bios_sem = pci_read_config(self, eecp +
3121 		    EHCI_LEGSUP_BIOS_SEM, 1);
3122 		if (bios_sem == 0) {
3123 			continue;
3124 		}
3125 		if (bootverbose)
3126 			printf("ehci early: "
3127 			    "SMM active, request owner change\n");
3128 
3129 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3130 
3131 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3132 			DELAY(1000);
3133 			bios_sem = pci_read_config(self, eecp +
3134 			    EHCI_LEGSUP_BIOS_SEM, 1);
3135 		}
3136 
3137 		if (bios_sem != 0) {
3138 			if (bootverbose)
3139 				printf("ehci early: "
3140 				    "SMM does not respond\n");
3141 		}
3142 		/* Disable interrupts */
3143 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3144 		bus_write_4(res, offs + EHCI_USBINTR, 0);
3145 	}
3146 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3147 }
3148 
3149 /* Perform early XHCI takeover from SMM. */
3150 static void
3151 xhci_early_takeover(device_t self)
3152 {
3153 	struct resource *res;
3154 	uint32_t cparams;
3155 	uint32_t eec;
3156 	uint8_t eecp;
3157 	uint8_t bios_sem;
3158 	uint8_t offs;
3159 	int rid;
3160 	int i;
3161 
3162 	rid = PCIR_BAR(0);
3163 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3164 	if (res == NULL)
3165 		return;
3166 
3167 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3168 
3169 	eec = -1;
3170 
3171 	/* Synchronise with the BIOS if it owns the controller. */
3172 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3173 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3174 		eec = bus_read_4(res, eecp);
3175 
3176 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3177 			continue;
3178 
3179 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3180 		if (bios_sem == 0)
3181 			continue;
3182 
3183 		if (bootverbose)
3184 			printf("xhci early: "
3185 			    "SMM active, request owner change\n");
3186 
3187 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3188 
3189 		/* wait a maximum of 5 second */
3190 
3191 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3192 			DELAY(1000);
3193 			bios_sem = bus_read_1(res, eecp +
3194 			    XHCI_XECP_BIOS_SEM);
3195 		}
3196 
3197 		if (bios_sem != 0) {
3198 			if (bootverbose)
3199 				printf("xhci early: "
3200 				    "SMM does not respond\n");
3201 		}
3202 
3203 		/* Disable interrupts */
3204 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3205 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3206 		bus_read_4(res, offs + XHCI_USBSTS);
3207 	}
3208 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3209 }
3210 
3211 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3212 static void
3213 pci_reserve_secbus(device_t bus, device_t dev, pcicfgregs *cfg,
3214     struct resource_list *rl)
3215 {
3216 	struct resource *res;
3217 	char *cp;
3218 	u_long start, end, count;
3219 	int rid, sec_bus, sec_reg, sub_bus, sub_reg, sup_bus;
3220 
3221 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3222 	case PCIM_HDRTYPE_BRIDGE:
3223 		sec_reg = PCIR_SECBUS_1;
3224 		sub_reg = PCIR_SUBBUS_1;
3225 		break;
3226 	case PCIM_HDRTYPE_CARDBUS:
3227 		sec_reg = PCIR_SECBUS_2;
3228 		sub_reg = PCIR_SUBBUS_2;
3229 		break;
3230 	default:
3231 		return;
3232 	}
3233 
3234 	/*
3235 	 * If the existing bus range is valid, attempt to reserve it
3236 	 * from our parent.  If this fails for any reason, clear the
3237 	 * secbus and subbus registers.
3238 	 *
3239 	 * XXX: Should we reset sub_bus to sec_bus if it is < sec_bus?
3240 	 * This would at least preserve the existing sec_bus if it is
3241 	 * valid.
3242 	 */
3243 	sec_bus = PCI_READ_CONFIG(bus, dev, sec_reg, 1);
3244 	sub_bus = PCI_READ_CONFIG(bus, dev, sub_reg, 1);
3245 
3246 	/* Quirk handling. */
3247 	switch (pci_get_devid(dev)) {
3248 	case 0x12258086:		/* Intel 82454KX/GX (Orion) */
3249 		sup_bus = pci_read_config(dev, 0x41, 1);
3250 		if (sup_bus != 0xff) {
3251 			sec_bus = sup_bus + 1;
3252 			sub_bus = sup_bus + 1;
3253 			PCI_WRITE_CONFIG(bus, dev, sec_reg, sec_bus, 1);
3254 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3255 		}
3256 		break;
3257 
3258 	case 0x00dd10de:
3259 		/* Compaq R3000 BIOS sets wrong subordinate bus number. */
3260 		if ((cp = getenv("smbios.planar.maker")) == NULL)
3261 			break;
3262 		if (strncmp(cp, "Compal", 6) != 0) {
3263 			freeenv(cp);
3264 			break;
3265 		}
3266 		freeenv(cp);
3267 		if ((cp = getenv("smbios.planar.product")) == NULL)
3268 			break;
3269 		if (strncmp(cp, "08A0", 4) != 0) {
3270 			freeenv(cp);
3271 			break;
3272 		}
3273 		freeenv(cp);
3274 		if (sub_bus < 0xa) {
3275 			sub_bus = 0xa;
3276 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3277 		}
3278 		break;
3279 	}
3280 
3281 	if (bootverbose)
3282 		printf("\tsecbus=%d, subbus=%d\n", sec_bus, sub_bus);
3283 	if (sec_bus > 0 && sub_bus >= sec_bus) {
3284 		start = sec_bus;
3285 		end = sub_bus;
3286 		count = end - start + 1;
3287 
3288 		resource_list_add(rl, PCI_RES_BUS, 0, 0ul, ~0ul, count);
3289 
3290 		/*
3291 		 * If requested, clear secondary bus registers in
3292 		 * bridge devices to force a complete renumbering
3293 		 * rather than reserving the existing range.  However,
3294 		 * preserve the existing size.
3295 		 */
3296 		if (pci_clear_buses)
3297 			goto clear;
3298 
3299 		rid = 0;
3300 		res = resource_list_reserve(rl, bus, dev, PCI_RES_BUS, &rid,
3301 		    start, end, count, 0);
3302 		if (res != NULL)
3303 			return;
3304 
3305 		if (bootverbose)
3306 			device_printf(bus,
3307 			    "pci%d:%d:%d:%d secbus failed to allocate\n",
3308 			    pci_get_domain(dev), pci_get_bus(dev),
3309 			    pci_get_slot(dev), pci_get_function(dev));
3310 	}
3311 
3312 clear:
3313 	PCI_WRITE_CONFIG(bus, dev, sec_reg, 0, 1);
3314 	PCI_WRITE_CONFIG(bus, dev, sub_reg, 0, 1);
3315 }
3316 
3317 static struct resource *
3318 pci_alloc_secbus(device_t dev, device_t child, int *rid, u_long start,
3319     u_long end, u_long count, u_int flags)
3320 {
3321 	struct pci_devinfo *dinfo;
3322 	pcicfgregs *cfg;
3323 	struct resource_list *rl;
3324 	struct resource *res;
3325 	int sec_reg, sub_reg;
3326 
3327 	dinfo = device_get_ivars(child);
3328 	cfg = &dinfo->cfg;
3329 	rl = &dinfo->resources;
3330 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3331 	case PCIM_HDRTYPE_BRIDGE:
3332 		sec_reg = PCIR_SECBUS_1;
3333 		sub_reg = PCIR_SUBBUS_1;
3334 		break;
3335 	case PCIM_HDRTYPE_CARDBUS:
3336 		sec_reg = PCIR_SECBUS_2;
3337 		sub_reg = PCIR_SUBBUS_2;
3338 		break;
3339 	default:
3340 		return (NULL);
3341 	}
3342 
3343 	if (*rid != 0)
3344 		return (NULL);
3345 
3346 	if (resource_list_find(rl, PCI_RES_BUS, *rid) == NULL)
3347 		resource_list_add(rl, PCI_RES_BUS, *rid, start, end, count);
3348 	if (!resource_list_reserved(rl, PCI_RES_BUS, *rid)) {
3349 		res = resource_list_reserve(rl, dev, child, PCI_RES_BUS, rid,
3350 		    start, end, count, flags & ~RF_ACTIVE);
3351 		if (res == NULL) {
3352 			resource_list_delete(rl, PCI_RES_BUS, *rid);
3353 			device_printf(child, "allocating %lu bus%s failed\n",
3354 			    count, count == 1 ? "" : "es");
3355 			return (NULL);
3356 		}
3357 		if (bootverbose)
3358 			device_printf(child,
3359 			    "Lazy allocation of %lu bus%s at %lu\n", count,
3360 			    count == 1 ? "" : "es", rman_get_start(res));
3361 		PCI_WRITE_CONFIG(dev, child, sec_reg, rman_get_start(res), 1);
3362 		PCI_WRITE_CONFIG(dev, child, sub_reg, rman_get_end(res), 1);
3363 	}
3364 	return (resource_list_alloc(rl, dev, child, PCI_RES_BUS, rid, start,
3365 	    end, count, flags));
3366 }
3367 #endif
3368 
3369 void
3370 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3371 {
3372 	struct pci_devinfo *dinfo;
3373 	pcicfgregs *cfg;
3374 	struct resource_list *rl;
3375 	const struct pci_quirk *q;
3376 	uint32_t devid;
3377 	int i;
3378 
3379 	dinfo = device_get_ivars(dev);
3380 	cfg = &dinfo->cfg;
3381 	rl = &dinfo->resources;
3382 	devid = (cfg->device << 16) | cfg->vendor;
3383 
3384 	/* ATA devices needs special map treatment */
3385 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3386 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3387 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3388 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3389 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3390 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3391 	else
3392 		for (i = 0; i < cfg->nummaps;) {
3393 			/*
3394 			 * Skip quirked resources.
3395 			 */
3396 			for (q = &pci_quirks[0]; q->devid != 0; q++)
3397 				if (q->devid == devid &&
3398 				    q->type == PCI_QUIRK_UNMAP_REG &&
3399 				    q->arg1 == PCIR_BAR(i))
3400 					break;
3401 			if (q->devid != 0) {
3402 				i++;
3403 				continue;
3404 			}
3405 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3406 			    prefetchmask & (1 << i));
3407 		}
3408 
3409 	/*
3410 	 * Add additional, quirked resources.
3411 	 */
3412 	for (q = &pci_quirks[0]; q->devid != 0; q++)
3413 		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3414 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3415 
3416 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3417 #ifdef __PCI_REROUTE_INTERRUPT
3418 		/*
3419 		 * Try to re-route interrupts. Sometimes the BIOS or
3420 		 * firmware may leave bogus values in these registers.
3421 		 * If the re-route fails, then just stick with what we
3422 		 * have.
3423 		 */
3424 		pci_assign_interrupt(bus, dev, 1);
3425 #else
3426 		pci_assign_interrupt(bus, dev, 0);
3427 #endif
3428 	}
3429 
3430 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3431 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3432 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3433 			xhci_early_takeover(dev);
3434 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3435 			ehci_early_takeover(dev);
3436 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3437 			ohci_early_takeover(dev);
3438 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3439 			uhci_early_takeover(dev);
3440 	}
3441 
3442 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3443 	/*
3444 	 * Reserve resources for secondary bus ranges behind bridge
3445 	 * devices.
3446 	 */
3447 	pci_reserve_secbus(bus, dev, cfg, rl);
3448 #endif
3449 }
3450 
3451 static struct pci_devinfo *
3452 pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
3453     int slot, int func, size_t dinfo_size)
3454 {
3455 	struct pci_devinfo *dinfo;
3456 
3457 	dinfo = pci_read_device(pcib, domain, busno, slot, func, dinfo_size);
3458 	if (dinfo != NULL)
3459 		pci_add_child(dev, dinfo);
3460 
3461 	return (dinfo);
3462 }
3463 
3464 void
3465 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3466 {
3467 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3468 	device_t pcib = device_get_parent(dev);
3469 	struct pci_devinfo *dinfo;
3470 	int maxslots;
3471 	int s, f, pcifunchigh;
3472 	uint8_t hdrtype;
3473 	int first_func;
3474 
3475 	/*
3476 	 * Try to detect a device at slot 0, function 0.  If it exists, try to
3477 	 * enable ARI.  We must enable ARI before detecting the rest of the
3478 	 * functions on this bus as ARI changes the set of slots and functions
3479 	 * that are legal on this bus.
3480 	 */
3481 	dinfo = pci_identify_function(pcib, dev, domain, busno, 0, 0,
3482 	    dinfo_size);
3483 	if (dinfo != NULL && pci_enable_ari)
3484 		PCIB_TRY_ENABLE_ARI(pcib, dinfo->cfg.dev);
3485 
3486 	/*
3487 	 * Start looking for new devices on slot 0 at function 1 because we
3488 	 * just identified the device at slot 0, function 0.
3489 	 */
3490 	first_func = 1;
3491 
3492 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3493 	    ("dinfo_size too small"));
3494 	maxslots = PCIB_MAXSLOTS(pcib);
3495 	for (s = 0; s <= maxslots; s++, first_func = 0) {
3496 		pcifunchigh = 0;
3497 		f = 0;
3498 		DELAY(1);
3499 		hdrtype = REG(PCIR_HDRTYPE, 1);
3500 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3501 			continue;
3502 		if (hdrtype & PCIM_MFDEV)
3503 			pcifunchigh = PCIB_MAXFUNCS(pcib);
3504 		for (f = first_func; f <= pcifunchigh; f++)
3505 			pci_identify_function(pcib, dev, domain, busno, s, f,
3506 			    dinfo_size);
3507 	}
3508 #undef REG
3509 }
3510 
3511 void
3512 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3513 {
3514 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3515 	device_set_ivars(dinfo->cfg.dev, dinfo);
3516 	resource_list_init(&dinfo->resources);
3517 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3518 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3519 	pci_print_verbose(dinfo);
3520 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3521 }
3522 
3523 static int
3524 pci_probe(device_t dev)
3525 {
3526 
3527 	device_set_desc(dev, "PCI bus");
3528 
3529 	/* Allow other subclasses to override this driver. */
3530 	return (BUS_PROBE_GENERIC);
3531 }
3532 
3533 int
3534 pci_attach_common(device_t dev)
3535 {
3536 	struct pci_softc *sc;
3537 	int busno, domain;
3538 #ifdef PCI_DMA_BOUNDARY
3539 	int error, tag_valid;
3540 #endif
3541 #ifdef PCI_RES_BUS
3542 	int rid;
3543 #endif
3544 
3545 	sc = device_get_softc(dev);
3546 	domain = pcib_get_domain(dev);
3547 	busno = pcib_get_bus(dev);
3548 #ifdef PCI_RES_BUS
3549 	rid = 0;
3550 	sc->sc_bus = bus_alloc_resource(dev, PCI_RES_BUS, &rid, busno, busno,
3551 	    1, 0);
3552 	if (sc->sc_bus == NULL) {
3553 		device_printf(dev, "failed to allocate bus number\n");
3554 		return (ENXIO);
3555 	}
3556 #endif
3557 	if (bootverbose)
3558 		device_printf(dev, "domain=%d, physical bus=%d\n",
3559 		    domain, busno);
3560 #ifdef PCI_DMA_BOUNDARY
3561 	tag_valid = 0;
3562 	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
3563 	    devclass_find("pci")) {
3564 		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
3565 		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3566 		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
3567 		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
3568 		if (error)
3569 			device_printf(dev, "Failed to create DMA tag: %d\n",
3570 			    error);
3571 		else
3572 			tag_valid = 1;
3573 	}
3574 	if (!tag_valid)
3575 #endif
3576 		sc->sc_dma_tag = bus_get_dma_tag(dev);
3577 	return (0);
3578 }
3579 
3580 static int
3581 pci_attach(device_t dev)
3582 {
3583 	int busno, domain, error;
3584 
3585 	error = pci_attach_common(dev);
3586 	if (error)
3587 		return (error);
3588 
3589 	/*
3590 	 * Since there can be multiple independantly numbered PCI
3591 	 * busses on systems with multiple PCI domains, we can't use
3592 	 * the unit number to decide which bus we are probing. We ask
3593 	 * the parent pcib what our domain and bus numbers are.
3594 	 */
3595 	domain = pcib_get_domain(dev);
3596 	busno = pcib_get_bus(dev);
3597 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3598 	return (bus_generic_attach(dev));
3599 }
3600 
3601 #ifdef PCI_RES_BUS
3602 static int
3603 pci_detach(device_t dev)
3604 {
3605 	struct pci_softc *sc;
3606 	int error;
3607 
3608 	error = bus_generic_detach(dev);
3609 	if (error)
3610 		return (error);
3611 	sc = device_get_softc(dev);
3612 	return (bus_release_resource(dev, PCI_RES_BUS, 0, sc->sc_bus));
3613 }
3614 #endif
3615 
3616 static void
3617 pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3618     int state)
3619 {
3620 	device_t child, pcib;
3621 	struct pci_devinfo *dinfo;
3622 	int dstate, i;
3623 
3624 	/*
3625 	 * Set the device to the given state.  If the firmware suggests
3626 	 * a different power state, use it instead.  If power management
3627 	 * is not present, the firmware is responsible for managing
3628 	 * device power.  Skip children who aren't attached since they
3629 	 * are handled separately.
3630 	 */
3631 	pcib = device_get_parent(dev);
3632 	for (i = 0; i < numdevs; i++) {
3633 		child = devlist[i];
3634 		dinfo = device_get_ivars(child);
3635 		dstate = state;
3636 		if (device_is_attached(child) &&
3637 		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3638 			pci_set_powerstate(child, dstate);
3639 	}
3640 }
3641 
3642 int
3643 pci_suspend(device_t dev)
3644 {
3645 	device_t child, *devlist;
3646 	struct pci_devinfo *dinfo;
3647 	int error, i, numdevs;
3648 
3649 	/*
3650 	 * Save the PCI configuration space for each child and set the
3651 	 * device in the appropriate power state for this sleep state.
3652 	 */
3653 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3654 		return (error);
3655 	for (i = 0; i < numdevs; i++) {
3656 		child = devlist[i];
3657 		dinfo = device_get_ivars(child);
3658 		pci_cfg_save(child, dinfo, 0);
3659 	}
3660 
3661 	/* Suspend devices before potentially powering them down. */
3662 	error = bus_generic_suspend(dev);
3663 	if (error) {
3664 		free(devlist, M_TEMP);
3665 		return (error);
3666 	}
3667 	if (pci_do_power_suspend)
3668 		pci_set_power_children(dev, devlist, numdevs,
3669 		    PCI_POWERSTATE_D3);
3670 	free(devlist, M_TEMP);
3671 	return (0);
3672 }
3673 
3674 int
3675 pci_resume(device_t dev)
3676 {
3677 	device_t child, *devlist;
3678 	struct pci_devinfo *dinfo;
3679 	int error, i, numdevs;
3680 
3681 	/*
3682 	 * Set each child to D0 and restore its PCI configuration space.
3683 	 */
3684 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3685 		return (error);
3686 	if (pci_do_power_resume)
3687 		pci_set_power_children(dev, devlist, numdevs,
3688 		    PCI_POWERSTATE_D0);
3689 
3690 	/* Now the device is powered up, restore its config space. */
3691 	for (i = 0; i < numdevs; i++) {
3692 		child = devlist[i];
3693 		dinfo = device_get_ivars(child);
3694 
3695 		pci_cfg_restore(child, dinfo);
3696 		if (!device_is_attached(child))
3697 			pci_cfg_save(child, dinfo, 1);
3698 	}
3699 
3700 	/*
3701 	 * Resume critical devices first, then everything else later.
3702 	 */
3703 	for (i = 0; i < numdevs; i++) {
3704 		child = devlist[i];
3705 		switch (pci_get_class(child)) {
3706 		case PCIC_DISPLAY:
3707 		case PCIC_MEMORY:
3708 		case PCIC_BRIDGE:
3709 		case PCIC_BASEPERIPH:
3710 			DEVICE_RESUME(child);
3711 			break;
3712 		}
3713 	}
3714 	for (i = 0; i < numdevs; i++) {
3715 		child = devlist[i];
3716 		switch (pci_get_class(child)) {
3717 		case PCIC_DISPLAY:
3718 		case PCIC_MEMORY:
3719 		case PCIC_BRIDGE:
3720 		case PCIC_BASEPERIPH:
3721 			break;
3722 		default:
3723 			DEVICE_RESUME(child);
3724 		}
3725 	}
3726 	free(devlist, M_TEMP);
3727 	return (0);
3728 }
3729 
3730 static void
3731 pci_load_vendor_data(void)
3732 {
3733 	caddr_t data;
3734 	void *ptr;
3735 	size_t sz;
3736 
3737 	data = preload_search_by_type("pci_vendor_data");
3738 	if (data != NULL) {
3739 		ptr = preload_fetch_addr(data);
3740 		sz = preload_fetch_size(data);
3741 		if (ptr != NULL && sz != 0) {
3742 			pci_vendordata = ptr;
3743 			pci_vendordata_size = sz;
3744 			/* terminate the database */
3745 			pci_vendordata[pci_vendordata_size] = '\n';
3746 		}
3747 	}
3748 }
3749 
3750 void
3751 pci_driver_added(device_t dev, driver_t *driver)
3752 {
3753 	int numdevs;
3754 	device_t *devlist;
3755 	device_t child;
3756 	struct pci_devinfo *dinfo;
3757 	int i;
3758 
3759 	if (bootverbose)
3760 		device_printf(dev, "driver added\n");
3761 	DEVICE_IDENTIFY(driver, dev);
3762 	if (device_get_children(dev, &devlist, &numdevs) != 0)
3763 		return;
3764 	for (i = 0; i < numdevs; i++) {
3765 		child = devlist[i];
3766 		if (device_get_state(child) != DS_NOTPRESENT)
3767 			continue;
3768 		dinfo = device_get_ivars(child);
3769 		pci_print_verbose(dinfo);
3770 		if (bootverbose)
3771 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3772 		pci_cfg_restore(child, dinfo);
3773 		if (device_probe_and_attach(child) != 0)
3774 			pci_child_detached(dev, child);
3775 	}
3776 	free(devlist, M_TEMP);
3777 }
3778 
3779 int
3780 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3781     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3782 {
3783 	struct pci_devinfo *dinfo;
3784 	struct msix_table_entry *mte;
3785 	struct msix_vector *mv;
3786 	uint64_t addr;
3787 	uint32_t data;
3788 	void *cookie;
3789 	int error, rid;
3790 
3791 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3792 	    arg, &cookie);
3793 	if (error)
3794 		return (error);
3795 
3796 	/* If this is not a direct child, just bail out. */
3797 	if (device_get_parent(child) != dev) {
3798 		*cookiep = cookie;
3799 		return(0);
3800 	}
3801 
3802 	rid = rman_get_rid(irq);
3803 	if (rid == 0) {
3804 		/* Make sure that INTx is enabled */
3805 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3806 	} else {
3807 		/*
3808 		 * Check to see if the interrupt is MSI or MSI-X.
3809 		 * Ask our parent to map the MSI and give
3810 		 * us the address and data register values.
3811 		 * If we fail for some reason, teardown the
3812 		 * interrupt handler.
3813 		 */
3814 		dinfo = device_get_ivars(child);
3815 		if (dinfo->cfg.msi.msi_alloc > 0) {
3816 			if (dinfo->cfg.msi.msi_addr == 0) {
3817 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3818 			    ("MSI has handlers, but vectors not mapped"));
3819 				error = PCIB_MAP_MSI(device_get_parent(dev),
3820 				    child, rman_get_start(irq), &addr, &data);
3821 				if (error)
3822 					goto bad;
3823 				dinfo->cfg.msi.msi_addr = addr;
3824 				dinfo->cfg.msi.msi_data = data;
3825 			}
3826 			if (dinfo->cfg.msi.msi_handlers == 0)
3827 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3828 				    dinfo->cfg.msi.msi_data);
3829 			dinfo->cfg.msi.msi_handlers++;
3830 		} else {
3831 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3832 			    ("No MSI or MSI-X interrupts allocated"));
3833 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3834 			    ("MSI-X index too high"));
3835 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3836 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3837 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3838 			KASSERT(mv->mv_irq == rman_get_start(irq),
3839 			    ("IRQ mismatch"));
3840 			if (mv->mv_address == 0) {
3841 				KASSERT(mte->mte_handlers == 0,
3842 		    ("MSI-X table entry has handlers, but vector not mapped"));
3843 				error = PCIB_MAP_MSI(device_get_parent(dev),
3844 				    child, rman_get_start(irq), &addr, &data);
3845 				if (error)
3846 					goto bad;
3847 				mv->mv_address = addr;
3848 				mv->mv_data = data;
3849 			}
3850 			if (mte->mte_handlers == 0) {
3851 				pci_enable_msix(child, rid - 1, mv->mv_address,
3852 				    mv->mv_data);
3853 				pci_unmask_msix(child, rid - 1);
3854 			}
3855 			mte->mte_handlers++;
3856 		}
3857 
3858 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3859 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3860 	bad:
3861 		if (error) {
3862 			(void)bus_generic_teardown_intr(dev, child, irq,
3863 			    cookie);
3864 			return (error);
3865 		}
3866 	}
3867 	*cookiep = cookie;
3868 	return (0);
3869 }
3870 
3871 int
3872 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3873     void *cookie)
3874 {
3875 	struct msix_table_entry *mte;
3876 	struct resource_list_entry *rle;
3877 	struct pci_devinfo *dinfo;
3878 	int error, rid;
3879 
3880 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3881 		return (EINVAL);
3882 
3883 	/* If this isn't a direct child, just bail out */
3884 	if (device_get_parent(child) != dev)
3885 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3886 
3887 	rid = rman_get_rid(irq);
3888 	if (rid == 0) {
3889 		/* Mask INTx */
3890 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3891 	} else {
3892 		/*
3893 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3894 		 * decrement the appropriate handlers count and mask the
3895 		 * MSI-X message, or disable MSI messages if the count
3896 		 * drops to 0.
3897 		 */
3898 		dinfo = device_get_ivars(child);
3899 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3900 		if (rle->res != irq)
3901 			return (EINVAL);
3902 		if (dinfo->cfg.msi.msi_alloc > 0) {
3903 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3904 			    ("MSI-X index too high"));
3905 			if (dinfo->cfg.msi.msi_handlers == 0)
3906 				return (EINVAL);
3907 			dinfo->cfg.msi.msi_handlers--;
3908 			if (dinfo->cfg.msi.msi_handlers == 0)
3909 				pci_disable_msi(child);
3910 		} else {
3911 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3912 			    ("No MSI or MSI-X interrupts allocated"));
3913 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3914 			    ("MSI-X index too high"));
3915 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3916 			if (mte->mte_handlers == 0)
3917 				return (EINVAL);
3918 			mte->mte_handlers--;
3919 			if (mte->mte_handlers == 0)
3920 				pci_mask_msix(child, rid - 1);
3921 		}
3922 	}
3923 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3924 	if (rid > 0)
3925 		KASSERT(error == 0,
3926 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3927 	return (error);
3928 }
3929 
3930 int
3931 pci_print_child(device_t dev, device_t child)
3932 {
3933 	struct pci_devinfo *dinfo;
3934 	struct resource_list *rl;
3935 	int retval = 0;
3936 
3937 	dinfo = device_get_ivars(child);
3938 	rl = &dinfo->resources;
3939 
3940 	retval += bus_print_child_header(dev, child);
3941 
3942 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3943 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3944 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3945 	if (device_get_flags(dev))
3946 		retval += printf(" flags %#x", device_get_flags(dev));
3947 
3948 	retval += printf(" at device %d.%d", pci_get_slot(child),
3949 	    pci_get_function(child));
3950 
3951 	retval += bus_print_child_footer(dev, child);
3952 
3953 	return (retval);
3954 }
3955 
3956 static const struct
3957 {
3958 	int		class;
3959 	int		subclass;
3960 	int		report; /* 0 = bootverbose, 1 = always */
3961 	const char	*desc;
3962 } pci_nomatch_tab[] = {
3963 	{PCIC_OLD,		-1,			1, "old"},
3964 	{PCIC_OLD,		PCIS_OLD_NONVGA,	1, "non-VGA display device"},
3965 	{PCIC_OLD,		PCIS_OLD_VGA,		1, "VGA-compatible display device"},
3966 	{PCIC_STORAGE,		-1,			1, "mass storage"},
3967 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	1, "SCSI"},
3968 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	1, "ATA"},
3969 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	1, "floppy disk"},
3970 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	1, "IPI"},
3971 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	1, "RAID"},
3972 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	1, "ATA (ADMA)"},
3973 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	1, "SATA"},
3974 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	1, "SAS"},
3975 	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	1, "NVM"},
3976 	{PCIC_NETWORK,		-1,			1, "network"},
3977 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	1, "ethernet"},
3978 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	1, "token ring"},
3979 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	1, "fddi"},
3980 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	1, "ATM"},
3981 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	1, "ISDN"},
3982 	{PCIC_DISPLAY,		-1,			1, "display"},
3983 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	1, "VGA"},
3984 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	1, "XGA"},
3985 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	1, "3D"},
3986 	{PCIC_MULTIMEDIA,	-1,			1, "multimedia"},
3987 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	1, "video"},
3988 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	1, "audio"},
3989 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	1, "telephony"},
3990 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	1, "HDA"},
3991 	{PCIC_MEMORY,		-1,			1, "memory"},
3992 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	1, "RAM"},
3993 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	1, "flash"},
3994 	{PCIC_BRIDGE,		-1,			1, "bridge"},
3995 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	1, "HOST-PCI"},
3996 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	1, "PCI-ISA"},
3997 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	1, "PCI-EISA"},
3998 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	1, "PCI-MCA"},
3999 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	1, "PCI-PCI"},
4000 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	1, "PCI-PCMCIA"},
4001 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	1, "PCI-NuBus"},
4002 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	1, "PCI-CardBus"},
4003 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	1, "PCI-RACEway"},
4004 	{PCIC_SIMPLECOMM,	-1,			1, "simple comms"},
4005 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	1, "UART"},	/* could detect 16550 */
4006 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	1, "parallel port"},
4007 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	1, "multiport serial"},
4008 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	1, "generic modem"},
4009 	{PCIC_BASEPERIPH,	-1,			0, "base peripheral"},
4010 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	1, "interrupt controller"},
4011 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	1, "DMA controller"},
4012 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	1, "timer"},
4013 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	1, "realtime clock"},
4014 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	1, "PCI hot-plug controller"},
4015 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	1, "SD host controller"},
4016 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_IOMMU,	1, "IOMMU"},
4017 	{PCIC_INPUTDEV,		-1,			1, "input device"},
4018 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	1, "keyboard"},
4019 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,1, "digitizer"},
4020 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	1, "mouse"},
4021 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	1, "scanner"},
4022 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	1, "gameport"},
4023 	{PCIC_DOCKING,		-1,			1, "docking station"},
4024 	{PCIC_PROCESSOR,	-1,			1, "processor"},
4025 	{PCIC_SERIALBUS,	-1,			1, "serial bus"},
4026 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	1, "FireWire"},
4027 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	1, "AccessBus"},
4028 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	1, "SSA"},
4029 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	1, "USB"},
4030 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	1, "Fibre Channel"},
4031 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	0, "SMBus"},
4032 	{PCIC_WIRELESS,		-1,			1, "wireless controller"},
4033 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	1, "iRDA"},
4034 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	1, "IR"},
4035 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	1, "RF"},
4036 	{PCIC_INTELLIIO,	-1,			1, "intelligent I/O controller"},
4037 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	1, "I2O"},
4038 	{PCIC_SATCOM,		-1,			1, "satellite communication"},
4039 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		1, "sat TV"},
4040 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	1, "sat audio"},
4041 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	1, "sat voice"},
4042 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	1, "sat data"},
4043 	{PCIC_CRYPTO,		-1,			1, "encrypt/decrypt"},
4044 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	1, "network/computer crypto"},
4045 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	1, "entertainment crypto"},
4046 	{PCIC_DASP,		-1,			0, "dasp"},
4047 	{PCIC_DASP,		PCIS_DASP_DPIO,		1, "DPIO module"},
4048 	{0, 0, 0,		NULL}
4049 };
4050 
4051 void
4052 pci_probe_nomatch(device_t dev, device_t child)
4053 {
4054 	int i, report;
4055 	const char *cp, *scp;
4056 	char *device;
4057 
4058 	/*
4059 	 * Look for a listing for this device in a loaded device database.
4060 	 */
4061 	report = 1;
4062 	if ((device = pci_describe_device(child)) != NULL) {
4063 		device_printf(dev, "<%s>", device);
4064 		free(device, M_DEVBUF);
4065 	} else {
4066 		/*
4067 		 * Scan the class/subclass descriptions for a general
4068 		 * description.
4069 		 */
4070 		cp = "unknown";
4071 		scp = NULL;
4072 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
4073 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
4074 				if (pci_nomatch_tab[i].subclass == -1) {
4075 					cp = pci_nomatch_tab[i].desc;
4076 					report = pci_nomatch_tab[i].report;
4077 				} else if (pci_nomatch_tab[i].subclass ==
4078 				    pci_get_subclass(child)) {
4079 					scp = pci_nomatch_tab[i].desc;
4080 					report = pci_nomatch_tab[i].report;
4081 				}
4082 			}
4083 		}
4084 		if (report || bootverbose) {
4085 			device_printf(dev, "<%s%s%s>",
4086 			    cp ? cp : "",
4087 			    ((cp != NULL) && (scp != NULL)) ? ", " : "",
4088 			    scp ? scp : "");
4089 		}
4090 	}
4091 	if (report || bootverbose) {
4092 		printf(" at device %d.%d (no driver attached)\n",
4093 		    pci_get_slot(child), pci_get_function(child));
4094 	}
4095 	pci_cfg_save(child, device_get_ivars(child), 1);
4096 }
4097 
4098 void
4099 pci_child_detached(device_t dev, device_t child)
4100 {
4101 	struct pci_devinfo *dinfo;
4102 	struct resource_list *rl;
4103 
4104 	dinfo = device_get_ivars(child);
4105 	rl = &dinfo->resources;
4106 
4107 	/*
4108 	 * Have to deallocate IRQs before releasing any MSI messages and
4109 	 * have to release MSI messages before deallocating any memory
4110 	 * BARs.
4111 	 */
4112 	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
4113 		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
4114 	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
4115 		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
4116 		(void)pci_release_msi(child);
4117 	}
4118 	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
4119 		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
4120 	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
4121 		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
4122 #ifdef PCI_RES_BUS
4123 	if (resource_list_release_active(rl, dev, child, PCI_RES_BUS) != 0)
4124 		pci_printf(&dinfo->cfg, "Device leaked PCI bus numbers\n");
4125 #endif
4126 
4127 	pci_cfg_save(child, dinfo, 1);
4128 }
4129 
4130 /*
4131  * Parse the PCI device database, if loaded, and return a pointer to a
4132  * description of the device.
4133  *
4134  * The database is flat text formatted as follows:
4135  *
4136  * Any line not in a valid format is ignored.
4137  * Lines are terminated with newline '\n' characters.
4138  *
4139  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
4140  * the vendor name.
4141  *
4142  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
4143  * - devices cannot be listed without a corresponding VENDOR line.
4144  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
4145  * another TAB, then the device name.
4146  */
4147 
4148 /*
4149  * Assuming (ptr) points to the beginning of a line in the database,
4150  * return the vendor or device and description of the next entry.
4151  * The value of (vendor) or (device) inappropriate for the entry type
4152  * is set to -1.  Returns nonzero at the end of the database.
4153  *
4154  * Note that this is slightly unrobust in the face of corrupt data;
4155  * we attempt to safeguard against this by spamming the end of the
4156  * database with a newline when we initialise.
4157  */
4158 static int
4159 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
4160 {
4161 	char	*cp = *ptr;
4162 	int	left;
4163 
4164 	*device = -1;
4165 	*vendor = -1;
4166 	**desc = '\0';
4167 	for (;;) {
4168 		left = pci_vendordata_size - (cp - pci_vendordata);
4169 		if (left <= 0) {
4170 			*ptr = cp;
4171 			return(1);
4172 		}
4173 
4174 		/* vendor entry? */
4175 		if (*cp != '\t' &&
4176 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
4177 			break;
4178 		/* device entry? */
4179 		if (*cp == '\t' &&
4180 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
4181 			break;
4182 
4183 		/* skip to next line */
4184 		while (*cp != '\n' && left > 0) {
4185 			cp++;
4186 			left--;
4187 		}
4188 		if (*cp == '\n') {
4189 			cp++;
4190 			left--;
4191 		}
4192 	}
4193 	/* skip to next line */
4194 	while (*cp != '\n' && left > 0) {
4195 		cp++;
4196 		left--;
4197 	}
4198 	if (*cp == '\n' && left > 0)
4199 		cp++;
4200 	*ptr = cp;
4201 	return(0);
4202 }
4203 
4204 static char *
4205 pci_describe_device(device_t dev)
4206 {
4207 	int	vendor, device;
4208 	char	*desc, *vp, *dp, *line;
4209 
4210 	desc = vp = dp = NULL;
4211 
4212 	/*
4213 	 * If we have no vendor data, we can't do anything.
4214 	 */
4215 	if (pci_vendordata == NULL)
4216 		goto out;
4217 
4218 	/*
4219 	 * Scan the vendor data looking for this device
4220 	 */
4221 	line = pci_vendordata;
4222 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4223 		goto out;
4224 	for (;;) {
4225 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
4226 			goto out;
4227 		if (vendor == pci_get_vendor(dev))
4228 			break;
4229 	}
4230 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4231 		goto out;
4232 	for (;;) {
4233 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
4234 			*dp = 0;
4235 			break;
4236 		}
4237 		if (vendor != -1) {
4238 			*dp = 0;
4239 			break;
4240 		}
4241 		if (device == pci_get_device(dev))
4242 			break;
4243 	}
4244 	if (dp[0] == '\0')
4245 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
4246 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
4247 	    NULL)
4248 		sprintf(desc, "%s, %s", vp, dp);
4249 out:
4250 	if (vp != NULL)
4251 		free(vp, M_DEVBUF);
4252 	if (dp != NULL)
4253 		free(dp, M_DEVBUF);
4254 	return(desc);
4255 }
4256 
4257 int
4258 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
4259 {
4260 	struct pci_devinfo *dinfo;
4261 	pcicfgregs *cfg;
4262 
4263 	dinfo = device_get_ivars(child);
4264 	cfg = &dinfo->cfg;
4265 
4266 	switch (which) {
4267 	case PCI_IVAR_ETHADDR:
4268 		/*
4269 		 * The generic accessor doesn't deal with failure, so
4270 		 * we set the return value, then return an error.
4271 		 */
4272 		*((uint8_t **) result) = NULL;
4273 		return (EINVAL);
4274 	case PCI_IVAR_SUBVENDOR:
4275 		*result = cfg->subvendor;
4276 		break;
4277 	case PCI_IVAR_SUBDEVICE:
4278 		*result = cfg->subdevice;
4279 		break;
4280 	case PCI_IVAR_VENDOR:
4281 		*result = cfg->vendor;
4282 		break;
4283 	case PCI_IVAR_DEVICE:
4284 		*result = cfg->device;
4285 		break;
4286 	case PCI_IVAR_DEVID:
4287 		*result = (cfg->device << 16) | cfg->vendor;
4288 		break;
4289 	case PCI_IVAR_CLASS:
4290 		*result = cfg->baseclass;
4291 		break;
4292 	case PCI_IVAR_SUBCLASS:
4293 		*result = cfg->subclass;
4294 		break;
4295 	case PCI_IVAR_PROGIF:
4296 		*result = cfg->progif;
4297 		break;
4298 	case PCI_IVAR_REVID:
4299 		*result = cfg->revid;
4300 		break;
4301 	case PCI_IVAR_INTPIN:
4302 		*result = cfg->intpin;
4303 		break;
4304 	case PCI_IVAR_IRQ:
4305 		*result = cfg->intline;
4306 		break;
4307 	case PCI_IVAR_DOMAIN:
4308 		*result = cfg->domain;
4309 		break;
4310 	case PCI_IVAR_BUS:
4311 		*result = cfg->bus;
4312 		break;
4313 	case PCI_IVAR_SLOT:
4314 		*result = cfg->slot;
4315 		break;
4316 	case PCI_IVAR_FUNCTION:
4317 		*result = cfg->func;
4318 		break;
4319 	case PCI_IVAR_CMDREG:
4320 		*result = cfg->cmdreg;
4321 		break;
4322 	case PCI_IVAR_CACHELNSZ:
4323 		*result = cfg->cachelnsz;
4324 		break;
4325 	case PCI_IVAR_MINGNT:
4326 		*result = cfg->mingnt;
4327 		break;
4328 	case PCI_IVAR_MAXLAT:
4329 		*result = cfg->maxlat;
4330 		break;
4331 	case PCI_IVAR_LATTIMER:
4332 		*result = cfg->lattimer;
4333 		break;
4334 	default:
4335 		return (ENOENT);
4336 	}
4337 	return (0);
4338 }
4339 
4340 int
4341 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4342 {
4343 	struct pci_devinfo *dinfo;
4344 
4345 	dinfo = device_get_ivars(child);
4346 
4347 	switch (which) {
4348 	case PCI_IVAR_INTPIN:
4349 		dinfo->cfg.intpin = value;
4350 		return (0);
4351 	case PCI_IVAR_ETHADDR:
4352 	case PCI_IVAR_SUBVENDOR:
4353 	case PCI_IVAR_SUBDEVICE:
4354 	case PCI_IVAR_VENDOR:
4355 	case PCI_IVAR_DEVICE:
4356 	case PCI_IVAR_DEVID:
4357 	case PCI_IVAR_CLASS:
4358 	case PCI_IVAR_SUBCLASS:
4359 	case PCI_IVAR_PROGIF:
4360 	case PCI_IVAR_REVID:
4361 	case PCI_IVAR_IRQ:
4362 	case PCI_IVAR_DOMAIN:
4363 	case PCI_IVAR_BUS:
4364 	case PCI_IVAR_SLOT:
4365 	case PCI_IVAR_FUNCTION:
4366 		return (EINVAL);	/* disallow for now */
4367 
4368 	default:
4369 		return (ENOENT);
4370 	}
4371 }
4372 
4373 #include "opt_ddb.h"
4374 #ifdef DDB
4375 #include <ddb/ddb.h>
4376 #include <sys/cons.h>
4377 
4378 /*
4379  * List resources based on pci map registers, used for within ddb
4380  */
4381 
4382 DB_SHOW_COMMAND(pciregs, db_pci_dump)
4383 {
4384 	struct pci_devinfo *dinfo;
4385 	struct devlist *devlist_head;
4386 	struct pci_conf *p;
4387 	const char *name;
4388 	int i, error, none_count;
4389 
4390 	none_count = 0;
4391 	/* get the head of the device queue */
4392 	devlist_head = &pci_devq;
4393 
4394 	/*
4395 	 * Go through the list of devices and print out devices
4396 	 */
4397 	for (error = 0, i = 0,
4398 	     dinfo = STAILQ_FIRST(devlist_head);
4399 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4400 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4401 
4402 		/* Populate pd_name and pd_unit */
4403 		name = NULL;
4404 		if (dinfo->cfg.dev)
4405 			name = device_get_name(dinfo->cfg.dev);
4406 
4407 		p = &dinfo->conf;
4408 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4409 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4410 			(name && *name) ? name : "none",
4411 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4412 			none_count++,
4413 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4414 			p->pc_sel.pc_func, (p->pc_class << 16) |
4415 			(p->pc_subclass << 8) | p->pc_progif,
4416 			(p->pc_subdevice << 16) | p->pc_subvendor,
4417 			(p->pc_device << 16) | p->pc_vendor,
4418 			p->pc_revid, p->pc_hdr);
4419 	}
4420 }
4421 #endif /* DDB */
4422 
4423 static struct resource *
4424 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4425     u_long start, u_long end, u_long count, u_int flags)
4426 {
4427 	struct pci_devinfo *dinfo = device_get_ivars(child);
4428 	struct resource_list *rl = &dinfo->resources;
4429 	struct resource *res;
4430 	struct pci_map *pm;
4431 	pci_addr_t map, testval;
4432 	int mapsize;
4433 
4434 	res = NULL;
4435 	pm = pci_find_bar(child, *rid);
4436 	if (pm != NULL) {
4437 		/* This is a BAR that we failed to allocate earlier. */
4438 		mapsize = pm->pm_size;
4439 		map = pm->pm_value;
4440 	} else {
4441 		/*
4442 		 * Weed out the bogons, and figure out how large the
4443 		 * BAR/map is.  BARs that read back 0 here are bogus
4444 		 * and unimplemented.  Note: atapci in legacy mode are
4445 		 * special and handled elsewhere in the code.  If you
4446 		 * have a atapci device in legacy mode and it fails
4447 		 * here, that other code is broken.
4448 		 */
4449 		pci_read_bar(child, *rid, &map, &testval);
4450 
4451 		/*
4452 		 * Determine the size of the BAR and ignore BARs with a size
4453 		 * of 0.  Device ROM BARs use a different mask value.
4454 		 */
4455 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
4456 			mapsize = pci_romsize(testval);
4457 		else
4458 			mapsize = pci_mapsize(testval);
4459 		if (mapsize == 0)
4460 			goto out;
4461 		pm = pci_add_bar(child, *rid, map, mapsize);
4462 	}
4463 
4464 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
4465 		if (type != SYS_RES_MEMORY) {
4466 			if (bootverbose)
4467 				device_printf(dev,
4468 				    "child %s requested type %d for rid %#x,"
4469 				    " but the BAR says it is an memio\n",
4470 				    device_get_nameunit(child), type, *rid);
4471 			goto out;
4472 		}
4473 	} else {
4474 		if (type != SYS_RES_IOPORT) {
4475 			if (bootverbose)
4476 				device_printf(dev,
4477 				    "child %s requested type %d for rid %#x,"
4478 				    " but the BAR says it is an ioport\n",
4479 				    device_get_nameunit(child), type, *rid);
4480 			goto out;
4481 		}
4482 	}
4483 
4484 	/*
4485 	 * For real BARs, we need to override the size that
4486 	 * the driver requests, because that's what the BAR
4487 	 * actually uses and we would otherwise have a
4488 	 * situation where we might allocate the excess to
4489 	 * another driver, which won't work.
4490 	 */
4491 	count = (pci_addr_t)1 << mapsize;
4492 	if (RF_ALIGNMENT(flags) < mapsize)
4493 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
4494 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
4495 		flags |= RF_PREFETCHABLE;
4496 
4497 	/*
4498 	 * Allocate enough resource, and then write back the
4499 	 * appropriate BAR for that resource.
4500 	 */
4501 	resource_list_add(rl, type, *rid, start, end, count);
4502 	res = resource_list_reserve(rl, dev, child, type, rid, start, end,
4503 	    count, flags & ~RF_ACTIVE);
4504 	if (res == NULL) {
4505 		resource_list_delete(rl, type, *rid);
4506 		device_printf(child,
4507 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4508 		    count, *rid, type, start, end);
4509 		goto out;
4510 	}
4511 	if (bootverbose)
4512 		device_printf(child,
4513 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4514 		    count, *rid, type, rman_get_start(res));
4515 	map = rman_get_start(res);
4516 	pci_write_bar(child, pm, map);
4517 out:
4518 	return (res);
4519 }
4520 
4521 struct resource *
4522 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4523 		   u_long start, u_long end, u_long count, u_int flags)
4524 {
4525 	struct pci_devinfo *dinfo;
4526 	struct resource_list *rl;
4527 	struct resource_list_entry *rle;
4528 	struct resource *res;
4529 	pcicfgregs *cfg;
4530 
4531 	if (device_get_parent(child) != dev)
4532 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4533 		    type, rid, start, end, count, flags));
4534 
4535 	/*
4536 	 * Perform lazy resource allocation
4537 	 */
4538 	dinfo = device_get_ivars(child);
4539 	rl = &dinfo->resources;
4540 	cfg = &dinfo->cfg;
4541 	switch (type) {
4542 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
4543 	case PCI_RES_BUS:
4544 		return (pci_alloc_secbus(dev, child, rid, start, end, count,
4545 		    flags));
4546 #endif
4547 	case SYS_RES_IRQ:
4548 		/*
4549 		 * Can't alloc legacy interrupt once MSI messages have
4550 		 * been allocated.
4551 		 */
4552 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4553 		    cfg->msix.msix_alloc > 0))
4554 			return (NULL);
4555 
4556 		/*
4557 		 * If the child device doesn't have an interrupt
4558 		 * routed and is deserving of an interrupt, try to
4559 		 * assign it one.
4560 		 */
4561 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4562 		    (cfg->intpin != 0))
4563 			pci_assign_interrupt(dev, child, 0);
4564 		break;
4565 	case SYS_RES_IOPORT:
4566 	case SYS_RES_MEMORY:
4567 #ifdef NEW_PCIB
4568 		/*
4569 		 * PCI-PCI bridge I/O window resources are not BARs.
4570 		 * For those allocations just pass the request up the
4571 		 * tree.
4572 		 */
4573 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4574 			switch (*rid) {
4575 			case PCIR_IOBASEL_1:
4576 			case PCIR_MEMBASE_1:
4577 			case PCIR_PMBASEL_1:
4578 				/*
4579 				 * XXX: Should we bother creating a resource
4580 				 * list entry?
4581 				 */
4582 				return (bus_generic_alloc_resource(dev, child,
4583 				    type, rid, start, end, count, flags));
4584 			}
4585 		}
4586 #endif
4587 		/* Reserve resources for this BAR if needed. */
4588 		rle = resource_list_find(rl, type, *rid);
4589 		if (rle == NULL) {
4590 			res = pci_reserve_map(dev, child, type, rid, start, end,
4591 			    count, flags);
4592 			if (res == NULL)
4593 				return (NULL);
4594 		}
4595 	}
4596 	return (resource_list_alloc(rl, dev, child, type, rid,
4597 	    start, end, count, flags));
4598 }
4599 
4600 int
4601 pci_release_resource(device_t dev, device_t child, int type, int rid,
4602     struct resource *r)
4603 {
4604 	struct pci_devinfo *dinfo;
4605 	struct resource_list *rl;
4606 	pcicfgregs *cfg;
4607 
4608 	if (device_get_parent(child) != dev)
4609 		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
4610 		    type, rid, r));
4611 
4612 	dinfo = device_get_ivars(child);
4613 	cfg = &dinfo->cfg;
4614 #ifdef NEW_PCIB
4615 	/*
4616 	 * PCI-PCI bridge I/O window resources are not BARs.  For
4617 	 * those allocations just pass the request up the tree.
4618 	 */
4619 	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
4620 	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
4621 		switch (rid) {
4622 		case PCIR_IOBASEL_1:
4623 		case PCIR_MEMBASE_1:
4624 		case PCIR_PMBASEL_1:
4625 			return (bus_generic_release_resource(dev, child, type,
4626 			    rid, r));
4627 		}
4628 	}
4629 #endif
4630 
4631 	rl = &dinfo->resources;
4632 	return (resource_list_release(rl, dev, child, type, rid, r));
4633 }
4634 
4635 int
4636 pci_activate_resource(device_t dev, device_t child, int type, int rid,
4637     struct resource *r)
4638 {
4639 	struct pci_devinfo *dinfo;
4640 	int error;
4641 
4642 	error = bus_generic_activate_resource(dev, child, type, rid, r);
4643 	if (error)
4644 		return (error);
4645 
4646 	/* Enable decoding in the command register when activating BARs. */
4647 	if (device_get_parent(child) == dev) {
4648 		/* Device ROMs need their decoding explicitly enabled. */
4649 		dinfo = device_get_ivars(child);
4650 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4651 			pci_write_bar(child, pci_find_bar(child, rid),
4652 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4653 		switch (type) {
4654 		case SYS_RES_IOPORT:
4655 		case SYS_RES_MEMORY:
4656 			error = PCI_ENABLE_IO(dev, child, type);
4657 			break;
4658 		}
4659 	}
4660 	return (error);
4661 }
4662 
4663 int
4664 pci_deactivate_resource(device_t dev, device_t child, int type,
4665     int rid, struct resource *r)
4666 {
4667 	struct pci_devinfo *dinfo;
4668 	int error;
4669 
4670 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4671 	if (error)
4672 		return (error);
4673 
4674 	/* Disable decoding for device ROMs. */
4675 	if (device_get_parent(child) == dev) {
4676 		dinfo = device_get_ivars(child);
4677 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4678 			pci_write_bar(child, pci_find_bar(child, rid),
4679 			    rman_get_start(r));
4680 	}
4681 	return (0);
4682 }
4683 
4684 void
4685 pci_delete_child(device_t dev, device_t child)
4686 {
4687 	struct resource_list_entry *rle;
4688 	struct resource_list *rl;
4689 	struct pci_devinfo *dinfo;
4690 
4691 	dinfo = device_get_ivars(child);
4692 	rl = &dinfo->resources;
4693 
4694 	if (device_is_attached(child))
4695 		device_detach(child);
4696 
4697 	/* Turn off access to resources we're about to free */
4698 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4699 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4700 
4701 	/* Free all allocated resources */
4702 	STAILQ_FOREACH(rle, rl, link) {
4703 		if (rle->res) {
4704 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4705 			    resource_list_busy(rl, rle->type, rle->rid)) {
4706 				pci_printf(&dinfo->cfg,
4707 				    "Resource still owned, oops. "
4708 				    "(type=%d, rid=%d, addr=%lx)\n",
4709 				    rle->type, rle->rid,
4710 				    rman_get_start(rle->res));
4711 				bus_release_resource(child, rle->type, rle->rid,
4712 				    rle->res);
4713 			}
4714 			resource_list_unreserve(rl, dev, child, rle->type,
4715 			    rle->rid);
4716 		}
4717 	}
4718 	resource_list_free(rl);
4719 
4720 	device_delete_child(dev, child);
4721 	pci_freecfg(dinfo);
4722 }
4723 
4724 void
4725 pci_delete_resource(device_t dev, device_t child, int type, int rid)
4726 {
4727 	struct pci_devinfo *dinfo;
4728 	struct resource_list *rl;
4729 	struct resource_list_entry *rle;
4730 
4731 	if (device_get_parent(child) != dev)
4732 		return;
4733 
4734 	dinfo = device_get_ivars(child);
4735 	rl = &dinfo->resources;
4736 	rle = resource_list_find(rl, type, rid);
4737 	if (rle == NULL)
4738 		return;
4739 
4740 	if (rle->res) {
4741 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4742 		    resource_list_busy(rl, type, rid)) {
4743 			device_printf(dev, "delete_resource: "
4744 			    "Resource still owned by child, oops. "
4745 			    "(type=%d, rid=%d, addr=%lx)\n",
4746 			    type, rid, rman_get_start(rle->res));
4747 			return;
4748 		}
4749 		resource_list_unreserve(rl, dev, child, type, rid);
4750 	}
4751 	resource_list_delete(rl, type, rid);
4752 }
4753 
4754 struct resource_list *
4755 pci_get_resource_list (device_t dev, device_t child)
4756 {
4757 	struct pci_devinfo *dinfo = device_get_ivars(child);
4758 
4759 	return (&dinfo->resources);
4760 }
4761 
4762 bus_dma_tag_t
4763 pci_get_dma_tag(device_t bus, device_t dev)
4764 {
4765 	struct pci_softc *sc = device_get_softc(bus);
4766 
4767 	return (sc->sc_dma_tag);
4768 }
4769 
4770 uint32_t
4771 pci_read_config_method(device_t dev, device_t child, int reg, int width)
4772 {
4773 	struct pci_devinfo *dinfo = device_get_ivars(child);
4774 	pcicfgregs *cfg = &dinfo->cfg;
4775 
4776 	return (PCIB_READ_CONFIG(device_get_parent(dev),
4777 	    cfg->bus, cfg->slot, cfg->func, reg, width));
4778 }
4779 
4780 void
4781 pci_write_config_method(device_t dev, device_t child, int reg,
4782     uint32_t val, int width)
4783 {
4784 	struct pci_devinfo *dinfo = device_get_ivars(child);
4785 	pcicfgregs *cfg = &dinfo->cfg;
4786 
4787 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4788 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4789 }
4790 
4791 int
4792 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4793     size_t buflen)
4794 {
4795 
4796 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4797 	    pci_get_function(child));
4798 	return (0);
4799 }
4800 
4801 int
4802 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4803     size_t buflen)
4804 {
4805 	struct pci_devinfo *dinfo;
4806 	pcicfgregs *cfg;
4807 
4808 	dinfo = device_get_ivars(child);
4809 	cfg = &dinfo->cfg;
4810 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4811 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4812 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4813 	    cfg->progif);
4814 	return (0);
4815 }
4816 
4817 int
4818 pci_assign_interrupt_method(device_t dev, device_t child)
4819 {
4820 	struct pci_devinfo *dinfo = device_get_ivars(child);
4821 	pcicfgregs *cfg = &dinfo->cfg;
4822 
4823 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4824 	    cfg->intpin));
4825 }
4826 
4827 static int
4828 pci_modevent(module_t mod, int what, void *arg)
4829 {
4830 	static struct cdev *pci_cdev;
4831 
4832 	switch (what) {
4833 	case MOD_LOAD:
4834 		STAILQ_INIT(&pci_devq);
4835 		pci_generation = 0;
4836 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4837 		    "pci");
4838 		pci_load_vendor_data();
4839 		break;
4840 
4841 	case MOD_UNLOAD:
4842 		destroy_dev(pci_cdev);
4843 		break;
4844 	}
4845 
4846 	return (0);
4847 }
4848 
4849 static void
4850 pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
4851 {
4852 #define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
4853 	struct pcicfg_pcie *cfg;
4854 	int version, pos;
4855 
4856 	cfg = &dinfo->cfg.pcie;
4857 	pos = cfg->pcie_location;
4858 
4859 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4860 
4861 	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
4862 
4863 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4864 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4865 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4866 		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
4867 
4868 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4869 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
4870 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
4871 		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
4872 
4873 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4874 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
4875 		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
4876 
4877 	if (version > 1) {
4878 		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
4879 		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
4880 		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
4881 	}
4882 #undef WREG
4883 }
4884 
4885 static void
4886 pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
4887 {
4888 	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
4889 	    dinfo->cfg.pcix.pcix_command,  2);
4890 }
4891 
4892 void
4893 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4894 {
4895 
4896 	/*
4897 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4898 	 * which we know need special treatment.  Type 2 devices are
4899 	 * cardbus bridges which also require special treatment.
4900 	 * Other types are unknown, and we err on the side of safety
4901 	 * by ignoring them.
4902 	 */
4903 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4904 		return;
4905 
4906 	/*
4907 	 * Restore the device to full power mode.  We must do this
4908 	 * before we restore the registers because moving from D3 to
4909 	 * D0 will cause the chip's BARs and some other registers to
4910 	 * be reset to some unknown power on reset values.  Cut down
4911 	 * the noise on boot by doing nothing if we are already in
4912 	 * state D0.
4913 	 */
4914 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4915 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4916 	pci_restore_bars(dev);
4917 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4918 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4919 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4920 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4921 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4922 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4923 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4924 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4925 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4926 
4927 	/*
4928 	 * Restore extended capabilities for PCI-Express and PCI-X
4929 	 */
4930 	if (dinfo->cfg.pcie.pcie_location != 0)
4931 		pci_cfg_restore_pcie(dev, dinfo);
4932 	if (dinfo->cfg.pcix.pcix_location != 0)
4933 		pci_cfg_restore_pcix(dev, dinfo);
4934 
4935 	/* Restore MSI and MSI-X configurations if they are present. */
4936 	if (dinfo->cfg.msi.msi_location != 0)
4937 		pci_resume_msi(dev);
4938 	if (dinfo->cfg.msix.msix_location != 0)
4939 		pci_resume_msix(dev);
4940 }
4941 
4942 static void
4943 pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
4944 {
4945 #define	RREG(n)	pci_read_config(dev, pos + (n), 2)
4946 	struct pcicfg_pcie *cfg;
4947 	int version, pos;
4948 
4949 	cfg = &dinfo->cfg.pcie;
4950 	pos = cfg->pcie_location;
4951 
4952 	cfg->pcie_flags = RREG(PCIER_FLAGS);
4953 
4954 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4955 
4956 	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
4957 
4958 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4959 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4960 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4961 		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
4962 
4963 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4964 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
4965 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
4966 		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
4967 
4968 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4969 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
4970 		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
4971 
4972 	if (version > 1) {
4973 		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
4974 		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
4975 		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
4976 	}
4977 #undef RREG
4978 }
4979 
4980 static void
4981 pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
4982 {
4983 	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
4984 	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
4985 }
4986 
4987 void
4988 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4989 {
4990 	uint32_t cls;
4991 	int ps;
4992 
4993 	/*
4994 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4995 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4996 	 * which also require special treatment.  Other types are unknown, and
4997 	 * we err on the side of safety by ignoring them.  Powering down
4998 	 * bridges should not be undertaken lightly.
4999 	 */
5000 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
5001 		return;
5002 
5003 	/*
5004 	 * Some drivers apparently write to these registers w/o updating our
5005 	 * cached copy.  No harm happens if we update the copy, so do so here
5006 	 * so we can restore them.  The COMMAND register is modified by the
5007 	 * bus w/o updating the cache.  This should represent the normally
5008 	 * writable portion of the 'defined' part of type 0 headers.  In
5009 	 * theory we also need to save/restore the PCI capability structures
5010 	 * we know about, but apart from power we don't know any that are
5011 	 * writable.
5012 	 */
5013 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
5014 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
5015 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
5016 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
5017 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
5018 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
5019 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
5020 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
5021 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
5022 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
5023 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
5024 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
5025 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
5026 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
5027 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
5028 
5029 	if (dinfo->cfg.pcie.pcie_location != 0)
5030 		pci_cfg_save_pcie(dev, dinfo);
5031 
5032 	if (dinfo->cfg.pcix.pcix_location != 0)
5033 		pci_cfg_save_pcix(dev, dinfo);
5034 
5035 	/*
5036 	 * don't set the state for display devices, base peripherals and
5037 	 * memory devices since bad things happen when they are powered down.
5038 	 * We should (a) have drivers that can easily detach and (b) use
5039 	 * generic drivers for these devices so that some device actually
5040 	 * attaches.  We need to make sure that when we implement (a) we don't
5041 	 * power the device down on a reattach.
5042 	 */
5043 	cls = pci_get_class(dev);
5044 	if (!setstate)
5045 		return;
5046 	switch (pci_do_power_nodriver)
5047 	{
5048 		case 0:		/* NO powerdown at all */
5049 			return;
5050 		case 1:		/* Conservative about what to power down */
5051 			if (cls == PCIC_STORAGE)
5052 				return;
5053 			/*FALLTHROUGH*/
5054 		case 2:		/* Agressive about what to power down */
5055 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
5056 			    cls == PCIC_BASEPERIPH)
5057 				return;
5058 			/*FALLTHROUGH*/
5059 		case 3:		/* Power down everything */
5060 			break;
5061 	}
5062 	/*
5063 	 * PCI spec says we can only go into D3 state from D0 state.
5064 	 * Transition from D[12] into D0 before going to D3 state.
5065 	 */
5066 	ps = pci_get_powerstate(dev);
5067 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
5068 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5069 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
5070 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
5071 }
5072 
5073 /* Wrapper APIs suitable for device driver use. */
5074 void
5075 pci_save_state(device_t dev)
5076 {
5077 	struct pci_devinfo *dinfo;
5078 
5079 	dinfo = device_get_ivars(dev);
5080 	pci_cfg_save(dev, dinfo, 0);
5081 }
5082 
5083 void
5084 pci_restore_state(device_t dev)
5085 {
5086 	struct pci_devinfo *dinfo;
5087 
5088 	dinfo = device_get_ivars(dev);
5089 	pci_cfg_restore(dev, dinfo);
5090 }
5091 
5092 static uint16_t
5093 pci_get_rid_method(device_t dev, device_t child)
5094 {
5095 
5096 	return (PCIB_GET_RID(device_get_parent(dev), child));
5097 }
5098