xref: /freebsd/sys/dev/pci/pci.c (revision e72055b7feba695a760d45f01f0f8268b1cb4a74)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 
56 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57 #include <machine/intr_machdep.h>
58 #endif
59 
60 #include <sys/pciio.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/pci/pci_private.h>
64 
65 #include <dev/usb/controller/xhcireg.h>
66 #include <dev/usb/controller/ehcireg.h>
67 #include <dev/usb/controller/ohcireg.h>
68 #include <dev/usb/controller/uhcireg.h>
69 
70 #include "pcib_if.h"
71 #include "pci_if.h"
72 
73 #define	PCIR_IS_BIOS(cfg, reg)						\
74 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
75 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
76 
77 static int		pci_has_quirk(uint32_t devid, int quirk);
78 static pci_addr_t	pci_mapbase(uint64_t mapreg);
79 static const char	*pci_maptype(uint64_t mapreg);
80 static int		pci_mapsize(uint64_t testval);
81 static int		pci_maprange(uint64_t mapreg);
82 static pci_addr_t	pci_rombase(uint64_t mapreg);
83 static int		pci_romsize(uint64_t testval);
84 static void		pci_fixancient(pcicfgregs *cfg);
85 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
86 
87 static int		pci_porten(device_t dev);
88 static int		pci_memen(device_t dev);
89 static void		pci_assign_interrupt(device_t bus, device_t dev,
90 			    int force_route);
91 static int		pci_add_map(device_t bus, device_t dev, int reg,
92 			    struct resource_list *rl, int force, int prefetch);
93 static int		pci_probe(device_t dev);
94 static int		pci_attach(device_t dev);
95 #ifdef PCI_RES_BUS
96 static int		pci_detach(device_t dev);
97 #endif
98 static void		pci_load_vendor_data(void);
99 static int		pci_describe_parse_line(char **ptr, int *vendor,
100 			    int *device, char **desc);
101 static char		*pci_describe_device(device_t dev);
102 static int		pci_modevent(module_t mod, int what, void *arg);
103 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
104 			    pcicfgregs *cfg);
105 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
106 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
107 			    int reg, uint32_t *data);
108 #if 0
109 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
110 			    int reg, uint32_t data);
111 #endif
112 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
113 static void		pci_mask_msix(device_t dev, u_int index);
114 static void		pci_unmask_msix(device_t dev, u_int index);
115 static int		pci_msi_blacklisted(void);
116 static int		pci_msix_blacklisted(void);
117 static void		pci_resume_msi(device_t dev);
118 static void		pci_resume_msix(device_t dev);
119 static int		pci_remap_intr_method(device_t bus, device_t dev,
120 			    u_int irq);
121 
122 static uint16_t		pci_get_rid_method(device_t dev, device_t child);
123 
124 static device_method_t pci_methods[] = {
125 	/* Device interface */
126 	DEVMETHOD(device_probe,		pci_probe),
127 	DEVMETHOD(device_attach,	pci_attach),
128 #ifdef PCI_RES_BUS
129 	DEVMETHOD(device_detach,	pci_detach),
130 #else
131 	DEVMETHOD(device_detach,	bus_generic_detach),
132 #endif
133 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
134 	DEVMETHOD(device_suspend,	pci_suspend),
135 	DEVMETHOD(device_resume,	pci_resume),
136 
137 	/* Bus interface */
138 	DEVMETHOD(bus_print_child,	pci_print_child),
139 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
140 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
141 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
142 	DEVMETHOD(bus_driver_added,	pci_driver_added),
143 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
144 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
145 
146 	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
147 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
148 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
149 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
150 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
151 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
152 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
153 	DEVMETHOD(bus_release_resource,	pci_release_resource),
154 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
155 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
156 	DEVMETHOD(bus_child_detached,	pci_child_detached),
157 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
158 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
159 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
160 
161 	/* PCI interface */
162 	DEVMETHOD(pci_read_config,	pci_read_config_method),
163 	DEVMETHOD(pci_write_config,	pci_write_config_method),
164 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
165 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
166 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
167 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
168 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
169 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
170 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
171 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
172 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
173 	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
174 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
175 	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
176 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
177 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
178 	DEVMETHOD(pci_enable_msi,	pci_enable_msi_method),
179 	DEVMETHOD(pci_enable_msix,	pci_enable_msix_method),
180 	DEVMETHOD(pci_disable_msi,	pci_disable_msi_method),
181 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
182 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
183 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
184 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
185 	DEVMETHOD(pci_get_rid,		pci_get_rid_method),
186 	DEVMETHOD(pci_child_added,	pci_child_added_method),
187 
188 	DEVMETHOD_END
189 };
190 
191 DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
192 
193 static devclass_t pci_devclass;
194 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
195 MODULE_VERSION(pci, 1);
196 
197 static char	*pci_vendordata;
198 static size_t	pci_vendordata_size;
199 
200 struct pci_quirk {
201 	uint32_t devid;	/* Vendor/device of the card */
202 	int	type;
203 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
204 #define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
205 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
206 #define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
207 #define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
208 	int	arg1;
209 	int	arg2;
210 };
211 
212 static const struct pci_quirk pci_quirks[] = {
213 	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
214 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
215 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
216 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
217 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
218 
219 	/*
220 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
221 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
222 	 */
223 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
224 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
225 
226 	/*
227 	 * MSI doesn't work on earlier Intel chipsets including
228 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
229 	 */
230 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
231 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
232 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
233 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
234 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
235 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
236 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
237 
238 	/*
239 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
240 	 * bridge.
241 	 */
242 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
243 
244 	/*
245 	 * MSI-X allocation doesn't work properly for devices passed through
246 	 * by VMware up to at least ESXi 5.1.
247 	 */
248 	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
249 	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
250 
251 	/*
252 	 * Some virtualization environments emulate an older chipset
253 	 * but support MSI just fine.  QEMU uses the Intel 82440.
254 	 */
255 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
256 
257 	/*
258 	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
259 	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
260 	 * It prevents us from attaching hpet(4) when the bit is unset.
261 	 * Note this quirk only affects SB600 revision A13 and earlier.
262 	 * For SB600 A21 and later, firmware must set the bit to hide it.
263 	 * For SB700 and later, it is unused and hardcoded to zero.
264 	 */
265 	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
266 
267 	{ 0 }
268 };
269 
270 /* map register information */
271 #define	PCI_MAPMEM	0x01	/* memory map */
272 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
273 #define	PCI_MAPPORT	0x04	/* port map */
274 
275 struct devlist pci_devq;
276 uint32_t pci_generation;
277 uint32_t pci_numdevs = 0;
278 static int pcie_chipset, pcix_chipset;
279 
280 /* sysctl vars */
281 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
282 
283 static int pci_enable_io_modes = 1;
284 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RWTUN,
285     &pci_enable_io_modes, 1,
286     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
287 enable these bits correctly.  We'd like to do this all the time, but there\n\
288 are some peripherals that this causes problems with.");
289 
290 static int pci_do_realloc_bars = 0;
291 SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RWTUN,
292     &pci_do_realloc_bars, 0,
293     "Attempt to allocate a new range for any BARs whose original "
294     "firmware-assigned ranges fail to allocate during the initial device scan.");
295 
296 static int pci_do_power_nodriver = 0;
297 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RWTUN,
298     &pci_do_power_nodriver, 0,
299   "Place a function into D3 state when no driver attaches to it.  0 means\n\
300 disable.  1 means conservatively place devices into D3 state.  2 means\n\
301 agressively place devices into D3 state.  3 means put absolutely everything\n\
302 in D3 state.");
303 
304 int pci_do_power_resume = 1;
305 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RWTUN,
306     &pci_do_power_resume, 1,
307   "Transition from D3 -> D0 on resume.");
308 
309 int pci_do_power_suspend = 1;
310 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RWTUN,
311     &pci_do_power_suspend, 1,
312   "Transition from D0 -> D3 on suspend.");
313 
314 static int pci_do_msi = 1;
315 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RWTUN, &pci_do_msi, 1,
316     "Enable support for MSI interrupts");
317 
318 static int pci_do_msix = 1;
319 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RWTUN, &pci_do_msix, 1,
320     "Enable support for MSI-X interrupts");
321 
322 static int pci_honor_msi_blacklist = 1;
323 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RDTUN,
324     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
325 
326 #if defined(__i386__) || defined(__amd64__)
327 static int pci_usb_takeover = 1;
328 #else
329 static int pci_usb_takeover = 0;
330 #endif
331 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
332     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
333 Disable this if you depend on BIOS emulation of USB devices, that is\n\
334 you use USB devices (like keyboard or mouse) but do not load USB drivers");
335 
336 static int pci_clear_bars;
337 SYSCTL_INT(_hw_pci, OID_AUTO, clear_bars, CTLFLAG_RDTUN, &pci_clear_bars, 0,
338     "Ignore firmware-assigned resources for BARs.");
339 
340 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
341 static int pci_clear_buses;
342 SYSCTL_INT(_hw_pci, OID_AUTO, clear_buses, CTLFLAG_RDTUN, &pci_clear_buses, 0,
343     "Ignore firmware-assigned bus numbers.");
344 #endif
345 
346 static int pci_enable_ari = 1;
347 SYSCTL_INT(_hw_pci, OID_AUTO, enable_ari, CTLFLAG_RDTUN, &pci_enable_ari,
348     0, "Enable support for PCIe Alternative RID Interpretation");
349 
350 static int
351 pci_has_quirk(uint32_t devid, int quirk)
352 {
353 	const struct pci_quirk *q;
354 
355 	for (q = &pci_quirks[0]; q->devid; q++) {
356 		if (q->devid == devid && q->type == quirk)
357 			return (1);
358 	}
359 	return (0);
360 }
361 
362 /* Find a device_t by bus/slot/function in domain 0 */
363 
364 device_t
365 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
366 {
367 
368 	return (pci_find_dbsf(0, bus, slot, func));
369 }
370 
371 /* Find a device_t by domain/bus/slot/function */
372 
373 device_t
374 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
375 {
376 	struct pci_devinfo *dinfo;
377 
378 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
379 		if ((dinfo->cfg.domain == domain) &&
380 		    (dinfo->cfg.bus == bus) &&
381 		    (dinfo->cfg.slot == slot) &&
382 		    (dinfo->cfg.func == func)) {
383 			return (dinfo->cfg.dev);
384 		}
385 	}
386 
387 	return (NULL);
388 }
389 
390 /* Find a device_t by vendor/device ID */
391 
392 device_t
393 pci_find_device(uint16_t vendor, uint16_t device)
394 {
395 	struct pci_devinfo *dinfo;
396 
397 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
398 		if ((dinfo->cfg.vendor == vendor) &&
399 		    (dinfo->cfg.device == device)) {
400 			return (dinfo->cfg.dev);
401 		}
402 	}
403 
404 	return (NULL);
405 }
406 
407 device_t
408 pci_find_class(uint8_t class, uint8_t subclass)
409 {
410 	struct pci_devinfo *dinfo;
411 
412 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
413 		if (dinfo->cfg.baseclass == class &&
414 		    dinfo->cfg.subclass == subclass) {
415 			return (dinfo->cfg.dev);
416 		}
417 	}
418 
419 	return (NULL);
420 }
421 
422 static int
423 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
424 {
425 	va_list ap;
426 	int retval;
427 
428 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
429 	    cfg->func);
430 	va_start(ap, fmt);
431 	retval += vprintf(fmt, ap);
432 	va_end(ap);
433 	return (retval);
434 }
435 
436 /* return base address of memory or port map */
437 
438 static pci_addr_t
439 pci_mapbase(uint64_t mapreg)
440 {
441 
442 	if (PCI_BAR_MEM(mapreg))
443 		return (mapreg & PCIM_BAR_MEM_BASE);
444 	else
445 		return (mapreg & PCIM_BAR_IO_BASE);
446 }
447 
448 /* return map type of memory or port map */
449 
450 static const char *
451 pci_maptype(uint64_t mapreg)
452 {
453 
454 	if (PCI_BAR_IO(mapreg))
455 		return ("I/O Port");
456 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
457 		return ("Prefetchable Memory");
458 	return ("Memory");
459 }
460 
461 /* return log2 of map size decoded for memory or port map */
462 
463 static int
464 pci_mapsize(uint64_t testval)
465 {
466 	int ln2size;
467 
468 	testval = pci_mapbase(testval);
469 	ln2size = 0;
470 	if (testval != 0) {
471 		while ((testval & 1) == 0)
472 		{
473 			ln2size++;
474 			testval >>= 1;
475 		}
476 	}
477 	return (ln2size);
478 }
479 
480 /* return base address of device ROM */
481 
482 static pci_addr_t
483 pci_rombase(uint64_t mapreg)
484 {
485 
486 	return (mapreg & PCIM_BIOS_ADDR_MASK);
487 }
488 
489 /* return log2 of map size decided for device ROM */
490 
491 static int
492 pci_romsize(uint64_t testval)
493 {
494 	int ln2size;
495 
496 	testval = pci_rombase(testval);
497 	ln2size = 0;
498 	if (testval != 0) {
499 		while ((testval & 1) == 0)
500 		{
501 			ln2size++;
502 			testval >>= 1;
503 		}
504 	}
505 	return (ln2size);
506 }
507 
508 /* return log2 of address range supported by map register */
509 
510 static int
511 pci_maprange(uint64_t mapreg)
512 {
513 	int ln2range = 0;
514 
515 	if (PCI_BAR_IO(mapreg))
516 		ln2range = 32;
517 	else
518 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
519 		case PCIM_BAR_MEM_32:
520 			ln2range = 32;
521 			break;
522 		case PCIM_BAR_MEM_1MB:
523 			ln2range = 20;
524 			break;
525 		case PCIM_BAR_MEM_64:
526 			ln2range = 64;
527 			break;
528 		}
529 	return (ln2range);
530 }
531 
532 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
533 
534 static void
535 pci_fixancient(pcicfgregs *cfg)
536 {
537 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
538 		return;
539 
540 	/* PCI to PCI bridges use header type 1 */
541 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
542 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
543 }
544 
545 /* extract header type specific config data */
546 
547 static void
548 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
549 {
550 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
551 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
552 	case PCIM_HDRTYPE_NORMAL:
553 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
554 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
555 		cfg->nummaps	    = PCI_MAXMAPS_0;
556 		break;
557 	case PCIM_HDRTYPE_BRIDGE:
558 		cfg->nummaps	    = PCI_MAXMAPS_1;
559 		break;
560 	case PCIM_HDRTYPE_CARDBUS:
561 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
562 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
563 		cfg->nummaps	    = PCI_MAXMAPS_2;
564 		break;
565 	}
566 #undef REG
567 }
568 
569 /* read configuration header into pcicfgregs structure */
570 struct pci_devinfo *
571 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
572 {
573 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
574 	pcicfgregs *cfg = NULL;
575 	struct pci_devinfo *devlist_entry;
576 	struct devlist *devlist_head;
577 
578 	devlist_head = &pci_devq;
579 
580 	devlist_entry = NULL;
581 
582 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
583 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
584 		if (devlist_entry == NULL)
585 			return (NULL);
586 
587 		cfg = &devlist_entry->cfg;
588 
589 		cfg->domain		= d;
590 		cfg->bus		= b;
591 		cfg->slot		= s;
592 		cfg->func		= f;
593 		cfg->vendor		= REG(PCIR_VENDOR, 2);
594 		cfg->device		= REG(PCIR_DEVICE, 2);
595 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
596 		cfg->statreg		= REG(PCIR_STATUS, 2);
597 		cfg->baseclass		= REG(PCIR_CLASS, 1);
598 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
599 		cfg->progif		= REG(PCIR_PROGIF, 1);
600 		cfg->revid		= REG(PCIR_REVID, 1);
601 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
602 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
603 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
604 		cfg->intpin		= REG(PCIR_INTPIN, 1);
605 		cfg->intline		= REG(PCIR_INTLINE, 1);
606 
607 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
608 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
609 
610 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
611 		cfg->hdrtype		&= ~PCIM_MFDEV;
612 		STAILQ_INIT(&cfg->maps);
613 
614 		pci_fixancient(cfg);
615 		pci_hdrtypedata(pcib, b, s, f, cfg);
616 
617 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
618 			pci_read_cap(pcib, cfg);
619 
620 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
621 
622 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
623 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
624 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
625 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
626 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
627 
628 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
629 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
630 		devlist_entry->conf.pc_vendor = cfg->vendor;
631 		devlist_entry->conf.pc_device = cfg->device;
632 
633 		devlist_entry->conf.pc_class = cfg->baseclass;
634 		devlist_entry->conf.pc_subclass = cfg->subclass;
635 		devlist_entry->conf.pc_progif = cfg->progif;
636 		devlist_entry->conf.pc_revid = cfg->revid;
637 
638 		pci_numdevs++;
639 		pci_generation++;
640 	}
641 	return (devlist_entry);
642 #undef REG
643 }
644 
645 static void
646 pci_read_cap(device_t pcib, pcicfgregs *cfg)
647 {
648 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
649 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
650 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
651 	uint64_t addr;
652 #endif
653 	uint32_t val;
654 	int	ptr, nextptr, ptrptr;
655 
656 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
657 	case PCIM_HDRTYPE_NORMAL:
658 	case PCIM_HDRTYPE_BRIDGE:
659 		ptrptr = PCIR_CAP_PTR;
660 		break;
661 	case PCIM_HDRTYPE_CARDBUS:
662 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
663 		break;
664 	default:
665 		return;		/* no extended capabilities support */
666 	}
667 	nextptr = REG(ptrptr, 1);	/* sanity check? */
668 
669 	/*
670 	 * Read capability entries.
671 	 */
672 	while (nextptr != 0) {
673 		/* Sanity check */
674 		if (nextptr > 255) {
675 			printf("illegal PCI extended capability offset %d\n",
676 			    nextptr);
677 			return;
678 		}
679 		/* Find the next entry */
680 		ptr = nextptr;
681 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
682 
683 		/* Process this entry */
684 		switch (REG(ptr + PCICAP_ID, 1)) {
685 		case PCIY_PMG:		/* PCI power management */
686 			if (cfg->pp.pp_cap == 0) {
687 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
688 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
689 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
690 				if ((nextptr - ptr) > PCIR_POWER_DATA)
691 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
692 			}
693 			break;
694 		case PCIY_HT:		/* HyperTransport */
695 			/* Determine HT-specific capability type. */
696 			val = REG(ptr + PCIR_HT_COMMAND, 2);
697 
698 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
699 				cfg->ht.ht_slave = ptr;
700 
701 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
702 			switch (val & PCIM_HTCMD_CAP_MASK) {
703 			case PCIM_HTCAP_MSI_MAPPING:
704 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
705 					/* Sanity check the mapping window. */
706 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
707 					    4);
708 					addr <<= 32;
709 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
710 					    4);
711 					if (addr != MSI_INTEL_ADDR_BASE)
712 						device_printf(pcib,
713 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
714 						    cfg->domain, cfg->bus,
715 						    cfg->slot, cfg->func,
716 						    (long long)addr);
717 				} else
718 					addr = MSI_INTEL_ADDR_BASE;
719 
720 				cfg->ht.ht_msimap = ptr;
721 				cfg->ht.ht_msictrl = val;
722 				cfg->ht.ht_msiaddr = addr;
723 				break;
724 			}
725 #endif
726 			break;
727 		case PCIY_MSI:		/* PCI MSI */
728 			cfg->msi.msi_location = ptr;
729 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
730 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
731 						     PCIM_MSICTRL_MMC_MASK)>>1);
732 			break;
733 		case PCIY_MSIX:		/* PCI MSI-X */
734 			cfg->msix.msix_location = ptr;
735 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
736 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
737 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
738 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
739 			cfg->msix.msix_table_bar = PCIR_BAR(val &
740 			    PCIM_MSIX_BIR_MASK);
741 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
742 			val = REG(ptr + PCIR_MSIX_PBA, 4);
743 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
744 			    PCIM_MSIX_BIR_MASK);
745 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
746 			break;
747 		case PCIY_VPD:		/* PCI Vital Product Data */
748 			cfg->vpd.vpd_reg = ptr;
749 			break;
750 		case PCIY_SUBVENDOR:
751 			/* Should always be true. */
752 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
753 			    PCIM_HDRTYPE_BRIDGE) {
754 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
755 				cfg->subvendor = val & 0xffff;
756 				cfg->subdevice = val >> 16;
757 			}
758 			break;
759 		case PCIY_PCIX:		/* PCI-X */
760 			/*
761 			 * Assume we have a PCI-X chipset if we have
762 			 * at least one PCI-PCI bridge with a PCI-X
763 			 * capability.  Note that some systems with
764 			 * PCI-express or HT chipsets might match on
765 			 * this check as well.
766 			 */
767 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
768 			    PCIM_HDRTYPE_BRIDGE)
769 				pcix_chipset = 1;
770 			cfg->pcix.pcix_location = ptr;
771 			break;
772 		case PCIY_EXPRESS:	/* PCI-express */
773 			/*
774 			 * Assume we have a PCI-express chipset if we have
775 			 * at least one PCI-express device.
776 			 */
777 			pcie_chipset = 1;
778 			cfg->pcie.pcie_location = ptr;
779 			val = REG(ptr + PCIER_FLAGS, 2);
780 			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
781 			break;
782 		default:
783 			break;
784 		}
785 	}
786 
787 #if defined(__powerpc__)
788 	/*
789 	 * Enable the MSI mapping window for all HyperTransport
790 	 * slaves.  PCI-PCI bridges have their windows enabled via
791 	 * PCIB_MAP_MSI().
792 	 */
793 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
794 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
795 		device_printf(pcib,
796 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
797 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
798 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
799 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
800 		     2);
801 	}
802 #endif
803 /* REG and WREG use carry through to next functions */
804 }
805 
806 /*
807  * PCI Vital Product Data
808  */
809 
810 #define	PCI_VPD_TIMEOUT		1000000
811 
812 static int
813 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
814 {
815 	int count = PCI_VPD_TIMEOUT;
816 
817 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
818 
819 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
820 
821 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
822 		if (--count < 0)
823 			return (ENXIO);
824 		DELAY(1);	/* limit looping */
825 	}
826 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
827 
828 	return (0);
829 }
830 
831 #if 0
832 static int
833 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
834 {
835 	int count = PCI_VPD_TIMEOUT;
836 
837 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
838 
839 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
840 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
841 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
842 		if (--count < 0)
843 			return (ENXIO);
844 		DELAY(1);	/* limit looping */
845 	}
846 
847 	return (0);
848 }
849 #endif
850 
851 #undef PCI_VPD_TIMEOUT
852 
853 struct vpd_readstate {
854 	device_t	pcib;
855 	pcicfgregs	*cfg;
856 	uint32_t	val;
857 	int		bytesinval;
858 	int		off;
859 	uint8_t		cksum;
860 };
861 
862 static int
863 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
864 {
865 	uint32_t reg;
866 	uint8_t byte;
867 
868 	if (vrs->bytesinval == 0) {
869 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
870 			return (ENXIO);
871 		vrs->val = le32toh(reg);
872 		vrs->off += 4;
873 		byte = vrs->val & 0xff;
874 		vrs->bytesinval = 3;
875 	} else {
876 		vrs->val = vrs->val >> 8;
877 		byte = vrs->val & 0xff;
878 		vrs->bytesinval--;
879 	}
880 
881 	vrs->cksum += byte;
882 	*data = byte;
883 	return (0);
884 }
885 
886 static void
887 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
888 {
889 	struct vpd_readstate vrs;
890 	int state;
891 	int name;
892 	int remain;
893 	int i;
894 	int alloc, off;		/* alloc/off for RO/W arrays */
895 	int cksumvalid;
896 	int dflen;
897 	uint8_t byte;
898 	uint8_t byte2;
899 
900 	/* init vpd reader */
901 	vrs.bytesinval = 0;
902 	vrs.off = 0;
903 	vrs.pcib = pcib;
904 	vrs.cfg = cfg;
905 	vrs.cksum = 0;
906 
907 	state = 0;
908 	name = remain = i = 0;	/* shut up stupid gcc */
909 	alloc = off = 0;	/* shut up stupid gcc */
910 	dflen = 0;		/* shut up stupid gcc */
911 	cksumvalid = -1;
912 	while (state >= 0) {
913 		if (vpd_nextbyte(&vrs, &byte)) {
914 			state = -2;
915 			break;
916 		}
917 #if 0
918 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
919 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
920 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
921 #endif
922 		switch (state) {
923 		case 0:		/* item name */
924 			if (byte & 0x80) {
925 				if (vpd_nextbyte(&vrs, &byte2)) {
926 					state = -2;
927 					break;
928 				}
929 				remain = byte2;
930 				if (vpd_nextbyte(&vrs, &byte2)) {
931 					state = -2;
932 					break;
933 				}
934 				remain |= byte2 << 8;
935 				if (remain > (0x7f*4 - vrs.off)) {
936 					state = -1;
937 					pci_printf(cfg,
938 					    "invalid VPD data, remain %#x\n",
939 					    remain);
940 				}
941 				name = byte & 0x7f;
942 			} else {
943 				remain = byte & 0x7;
944 				name = (byte >> 3) & 0xf;
945 			}
946 			switch (name) {
947 			case 0x2:	/* String */
948 				cfg->vpd.vpd_ident = malloc(remain + 1,
949 				    M_DEVBUF, M_WAITOK);
950 				i = 0;
951 				state = 1;
952 				break;
953 			case 0xf:	/* End */
954 				state = -1;
955 				break;
956 			case 0x10:	/* VPD-R */
957 				alloc = 8;
958 				off = 0;
959 				cfg->vpd.vpd_ros = malloc(alloc *
960 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
961 				    M_WAITOK | M_ZERO);
962 				state = 2;
963 				break;
964 			case 0x11:	/* VPD-W */
965 				alloc = 8;
966 				off = 0;
967 				cfg->vpd.vpd_w = malloc(alloc *
968 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
969 				    M_WAITOK | M_ZERO);
970 				state = 5;
971 				break;
972 			default:	/* Invalid data, abort */
973 				state = -1;
974 				break;
975 			}
976 			break;
977 
978 		case 1:	/* Identifier String */
979 			cfg->vpd.vpd_ident[i++] = byte;
980 			remain--;
981 			if (remain == 0)  {
982 				cfg->vpd.vpd_ident[i] = '\0';
983 				state = 0;
984 			}
985 			break;
986 
987 		case 2:	/* VPD-R Keyword Header */
988 			if (off == alloc) {
989 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
990 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
991 				    M_DEVBUF, M_WAITOK | M_ZERO);
992 			}
993 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
994 			if (vpd_nextbyte(&vrs, &byte2)) {
995 				state = -2;
996 				break;
997 			}
998 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
999 			if (vpd_nextbyte(&vrs, &byte2)) {
1000 				state = -2;
1001 				break;
1002 			}
1003 			cfg->vpd.vpd_ros[off].len = dflen = byte2;
1004 			if (dflen == 0 &&
1005 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1006 			    2) == 0) {
1007 				/*
1008 				 * if this happens, we can't trust the rest
1009 				 * of the VPD.
1010 				 */
1011 				pci_printf(cfg, "bad keyword length: %d\n",
1012 				    dflen);
1013 				cksumvalid = 0;
1014 				state = -1;
1015 				break;
1016 			} else if (dflen == 0) {
1017 				cfg->vpd.vpd_ros[off].value = malloc(1 *
1018 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1019 				    M_DEVBUF, M_WAITOK);
1020 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1021 			} else
1022 				cfg->vpd.vpd_ros[off].value = malloc(
1023 				    (dflen + 1) *
1024 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1025 				    M_DEVBUF, M_WAITOK);
1026 			remain -= 3;
1027 			i = 0;
1028 			/* keep in sync w/ state 3's transistions */
1029 			if (dflen == 0 && remain == 0)
1030 				state = 0;
1031 			else if (dflen == 0)
1032 				state = 2;
1033 			else
1034 				state = 3;
1035 			break;
1036 
1037 		case 3:	/* VPD-R Keyword Value */
1038 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1039 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1040 			    "RV", 2) == 0 && cksumvalid == -1) {
1041 				if (vrs.cksum == 0)
1042 					cksumvalid = 1;
1043 				else {
1044 					if (bootverbose)
1045 						pci_printf(cfg,
1046 					    "bad VPD cksum, remain %hhu\n",
1047 						    vrs.cksum);
1048 					cksumvalid = 0;
1049 					state = -1;
1050 					break;
1051 				}
1052 			}
1053 			dflen--;
1054 			remain--;
1055 			/* keep in sync w/ state 2's transistions */
1056 			if (dflen == 0)
1057 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1058 			if (dflen == 0 && remain == 0) {
1059 				cfg->vpd.vpd_rocnt = off;
1060 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1061 				    off * sizeof(*cfg->vpd.vpd_ros),
1062 				    M_DEVBUF, M_WAITOK | M_ZERO);
1063 				state = 0;
1064 			} else if (dflen == 0)
1065 				state = 2;
1066 			break;
1067 
1068 		case 4:
1069 			remain--;
1070 			if (remain == 0)
1071 				state = 0;
1072 			break;
1073 
1074 		case 5:	/* VPD-W Keyword Header */
1075 			if (off == alloc) {
1076 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1077 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1078 				    M_DEVBUF, M_WAITOK | M_ZERO);
1079 			}
1080 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1081 			if (vpd_nextbyte(&vrs, &byte2)) {
1082 				state = -2;
1083 				break;
1084 			}
1085 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1086 			if (vpd_nextbyte(&vrs, &byte2)) {
1087 				state = -2;
1088 				break;
1089 			}
1090 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1091 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1092 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1093 			    sizeof(*cfg->vpd.vpd_w[off].value),
1094 			    M_DEVBUF, M_WAITOK);
1095 			remain -= 3;
1096 			i = 0;
1097 			/* keep in sync w/ state 6's transistions */
1098 			if (dflen == 0 && remain == 0)
1099 				state = 0;
1100 			else if (dflen == 0)
1101 				state = 5;
1102 			else
1103 				state = 6;
1104 			break;
1105 
1106 		case 6:	/* VPD-W Keyword Value */
1107 			cfg->vpd.vpd_w[off].value[i++] = byte;
1108 			dflen--;
1109 			remain--;
1110 			/* keep in sync w/ state 5's transistions */
1111 			if (dflen == 0)
1112 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1113 			if (dflen == 0 && remain == 0) {
1114 				cfg->vpd.vpd_wcnt = off;
1115 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1116 				    off * sizeof(*cfg->vpd.vpd_w),
1117 				    M_DEVBUF, M_WAITOK | M_ZERO);
1118 				state = 0;
1119 			} else if (dflen == 0)
1120 				state = 5;
1121 			break;
1122 
1123 		default:
1124 			pci_printf(cfg, "invalid state: %d\n", state);
1125 			state = -1;
1126 			break;
1127 		}
1128 	}
1129 
1130 	if (cksumvalid == 0 || state < -1) {
1131 		/* read-only data bad, clean up */
1132 		if (cfg->vpd.vpd_ros != NULL) {
1133 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1134 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1135 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1136 			cfg->vpd.vpd_ros = NULL;
1137 		}
1138 	}
1139 	if (state < -1) {
1140 		/* I/O error, clean up */
1141 		pci_printf(cfg, "failed to read VPD data.\n");
1142 		if (cfg->vpd.vpd_ident != NULL) {
1143 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1144 			cfg->vpd.vpd_ident = NULL;
1145 		}
1146 		if (cfg->vpd.vpd_w != NULL) {
1147 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1148 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1149 			free(cfg->vpd.vpd_w, M_DEVBUF);
1150 			cfg->vpd.vpd_w = NULL;
1151 		}
1152 	}
1153 	cfg->vpd.vpd_cached = 1;
1154 #undef REG
1155 #undef WREG
1156 }
1157 
1158 int
1159 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1160 {
1161 	struct pci_devinfo *dinfo = device_get_ivars(child);
1162 	pcicfgregs *cfg = &dinfo->cfg;
1163 
1164 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1165 		pci_read_vpd(device_get_parent(dev), cfg);
1166 
1167 	*identptr = cfg->vpd.vpd_ident;
1168 
1169 	if (*identptr == NULL)
1170 		return (ENXIO);
1171 
1172 	return (0);
1173 }
1174 
1175 int
1176 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1177 	const char **vptr)
1178 {
1179 	struct pci_devinfo *dinfo = device_get_ivars(child);
1180 	pcicfgregs *cfg = &dinfo->cfg;
1181 	int i;
1182 
1183 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1184 		pci_read_vpd(device_get_parent(dev), cfg);
1185 
1186 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1187 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1188 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1189 			*vptr = cfg->vpd.vpd_ros[i].value;
1190 			return (0);
1191 		}
1192 
1193 	*vptr = NULL;
1194 	return (ENXIO);
1195 }
1196 
1197 struct pcicfg_vpd *
1198 pci_fetch_vpd_list(device_t dev)
1199 {
1200 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1201 	pcicfgregs *cfg = &dinfo->cfg;
1202 
1203 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1204 		pci_read_vpd(device_get_parent(device_get_parent(dev)), cfg);
1205 	return (&cfg->vpd);
1206 }
1207 
1208 /*
1209  * Find the requested HyperTransport capability and return the offset
1210  * in configuration space via the pointer provided.  The function
1211  * returns 0 on success and an error code otherwise.
1212  */
1213 int
1214 pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1215 {
1216 	int ptr, error;
1217 	uint16_t val;
1218 
1219 	error = pci_find_cap(child, PCIY_HT, &ptr);
1220 	if (error)
1221 		return (error);
1222 
1223 	/*
1224 	 * Traverse the capabilities list checking each HT capability
1225 	 * to see if it matches the requested HT capability.
1226 	 */
1227 	while (ptr != 0) {
1228 		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1229 		if (capability == PCIM_HTCAP_SLAVE ||
1230 		    capability == PCIM_HTCAP_HOST)
1231 			val &= 0xe000;
1232 		else
1233 			val &= PCIM_HTCMD_CAP_MASK;
1234 		if (val == capability) {
1235 			if (capreg != NULL)
1236 				*capreg = ptr;
1237 			return (0);
1238 		}
1239 
1240 		/* Skip to the next HT capability. */
1241 		while (ptr != 0) {
1242 			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1243 			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1244 			    PCIY_HT)
1245 				break;
1246 		}
1247 	}
1248 	return (ENOENT);
1249 }
1250 
1251 /*
1252  * Find the requested capability and return the offset in
1253  * configuration space via the pointer provided.  The function returns
1254  * 0 on success and an error code otherwise.
1255  */
1256 int
1257 pci_find_cap_method(device_t dev, device_t child, int capability,
1258     int *capreg)
1259 {
1260 	struct pci_devinfo *dinfo = device_get_ivars(child);
1261 	pcicfgregs *cfg = &dinfo->cfg;
1262 	u_int32_t status;
1263 	u_int8_t ptr;
1264 
1265 	/*
1266 	 * Check the CAP_LIST bit of the PCI status register first.
1267 	 */
1268 	status = pci_read_config(child, PCIR_STATUS, 2);
1269 	if (!(status & PCIM_STATUS_CAPPRESENT))
1270 		return (ENXIO);
1271 
1272 	/*
1273 	 * Determine the start pointer of the capabilities list.
1274 	 */
1275 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1276 	case PCIM_HDRTYPE_NORMAL:
1277 	case PCIM_HDRTYPE_BRIDGE:
1278 		ptr = PCIR_CAP_PTR;
1279 		break;
1280 	case PCIM_HDRTYPE_CARDBUS:
1281 		ptr = PCIR_CAP_PTR_2;
1282 		break;
1283 	default:
1284 		/* XXX: panic? */
1285 		return (ENXIO);		/* no extended capabilities support */
1286 	}
1287 	ptr = pci_read_config(child, ptr, 1);
1288 
1289 	/*
1290 	 * Traverse the capabilities list.
1291 	 */
1292 	while (ptr != 0) {
1293 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1294 			if (capreg != NULL)
1295 				*capreg = ptr;
1296 			return (0);
1297 		}
1298 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1299 	}
1300 
1301 	return (ENOENT);
1302 }
1303 
1304 /*
1305  * Find the requested extended capability and return the offset in
1306  * configuration space via the pointer provided.  The function returns
1307  * 0 on success and an error code otherwise.
1308  */
1309 int
1310 pci_find_extcap_method(device_t dev, device_t child, int capability,
1311     int *capreg)
1312 {
1313 	struct pci_devinfo *dinfo = device_get_ivars(child);
1314 	pcicfgregs *cfg = &dinfo->cfg;
1315 	uint32_t ecap;
1316 	uint16_t ptr;
1317 
1318 	/* Only supported for PCI-express devices. */
1319 	if (cfg->pcie.pcie_location == 0)
1320 		return (ENXIO);
1321 
1322 	ptr = PCIR_EXTCAP;
1323 	ecap = pci_read_config(child, ptr, 4);
1324 	if (ecap == 0xffffffff || ecap == 0)
1325 		return (ENOENT);
1326 	for (;;) {
1327 		if (PCI_EXTCAP_ID(ecap) == capability) {
1328 			if (capreg != NULL)
1329 				*capreg = ptr;
1330 			return (0);
1331 		}
1332 		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1333 		if (ptr == 0)
1334 			break;
1335 		ecap = pci_read_config(child, ptr, 4);
1336 	}
1337 
1338 	return (ENOENT);
1339 }
1340 
1341 /*
1342  * Support for MSI-X message interrupts.
1343  */
1344 void
1345 pci_enable_msix_method(device_t dev, device_t child, u_int index,
1346     uint64_t address, uint32_t data)
1347 {
1348 	struct pci_devinfo *dinfo = device_get_ivars(child);
1349 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1350 	uint32_t offset;
1351 
1352 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1353 	offset = msix->msix_table_offset + index * 16;
1354 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1355 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1356 	bus_write_4(msix->msix_table_res, offset + 8, data);
1357 
1358 	/* Enable MSI -> HT mapping. */
1359 	pci_ht_map_msi(child, address);
1360 }
1361 
1362 void
1363 pci_mask_msix(device_t dev, u_int index)
1364 {
1365 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1366 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1367 	uint32_t offset, val;
1368 
1369 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1370 	offset = msix->msix_table_offset + index * 16 + 12;
1371 	val = bus_read_4(msix->msix_table_res, offset);
1372 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1373 		val |= PCIM_MSIX_VCTRL_MASK;
1374 		bus_write_4(msix->msix_table_res, offset, val);
1375 	}
1376 }
1377 
1378 void
1379 pci_unmask_msix(device_t dev, u_int index)
1380 {
1381 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1382 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1383 	uint32_t offset, val;
1384 
1385 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1386 	offset = msix->msix_table_offset + index * 16 + 12;
1387 	val = bus_read_4(msix->msix_table_res, offset);
1388 	if (val & PCIM_MSIX_VCTRL_MASK) {
1389 		val &= ~PCIM_MSIX_VCTRL_MASK;
1390 		bus_write_4(msix->msix_table_res, offset, val);
1391 	}
1392 }
1393 
1394 int
1395 pci_pending_msix(device_t dev, u_int index)
1396 {
1397 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1398 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1399 	uint32_t offset, bit;
1400 
1401 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1402 	offset = msix->msix_pba_offset + (index / 32) * 4;
1403 	bit = 1 << index % 32;
1404 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1405 }
1406 
1407 /*
1408  * Restore MSI-X registers and table during resume.  If MSI-X is
1409  * enabled then walk the virtual table to restore the actual MSI-X
1410  * table.
1411  */
1412 static void
1413 pci_resume_msix(device_t dev)
1414 {
1415 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1416 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1417 	struct msix_table_entry *mte;
1418 	struct msix_vector *mv;
1419 	int i;
1420 
1421 	if (msix->msix_alloc > 0) {
1422 		/* First, mask all vectors. */
1423 		for (i = 0; i < msix->msix_msgnum; i++)
1424 			pci_mask_msix(dev, i);
1425 
1426 		/* Second, program any messages with at least one handler. */
1427 		for (i = 0; i < msix->msix_table_len; i++) {
1428 			mte = &msix->msix_table[i];
1429 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1430 				continue;
1431 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1432 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1433 			pci_unmask_msix(dev, i);
1434 		}
1435 	}
1436 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1437 	    msix->msix_ctrl, 2);
1438 }
1439 
1440 /*
1441  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1442  * returned in *count.  After this function returns, each message will be
1443  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1444  */
1445 int
1446 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1447 {
1448 	struct pci_devinfo *dinfo = device_get_ivars(child);
1449 	pcicfgregs *cfg = &dinfo->cfg;
1450 	struct resource_list_entry *rle;
1451 	int actual, error, i, irq, max;
1452 
1453 	/* Don't let count == 0 get us into trouble. */
1454 	if (*count == 0)
1455 		return (EINVAL);
1456 
1457 	/* If rid 0 is allocated, then fail. */
1458 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1459 	if (rle != NULL && rle->res != NULL)
1460 		return (ENXIO);
1461 
1462 	/* Already have allocated messages? */
1463 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1464 		return (ENXIO);
1465 
1466 	/* If MSI-X is blacklisted for this system, fail. */
1467 	if (pci_msix_blacklisted())
1468 		return (ENXIO);
1469 
1470 	/* MSI-X capability present? */
1471 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1472 		return (ENODEV);
1473 
1474 	/* Make sure the appropriate BARs are mapped. */
1475 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1476 	    cfg->msix.msix_table_bar);
1477 	if (rle == NULL || rle->res == NULL ||
1478 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1479 		return (ENXIO);
1480 	cfg->msix.msix_table_res = rle->res;
1481 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1482 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1483 		    cfg->msix.msix_pba_bar);
1484 		if (rle == NULL || rle->res == NULL ||
1485 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1486 			return (ENXIO);
1487 	}
1488 	cfg->msix.msix_pba_res = rle->res;
1489 
1490 	if (bootverbose)
1491 		device_printf(child,
1492 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1493 		    *count, cfg->msix.msix_msgnum);
1494 	max = min(*count, cfg->msix.msix_msgnum);
1495 	for (i = 0; i < max; i++) {
1496 		/* Allocate a message. */
1497 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1498 		if (error) {
1499 			if (i == 0)
1500 				return (error);
1501 			break;
1502 		}
1503 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1504 		    irq, 1);
1505 	}
1506 	actual = i;
1507 
1508 	if (bootverbose) {
1509 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1510 		if (actual == 1)
1511 			device_printf(child, "using IRQ %lu for MSI-X\n",
1512 			    rle->start);
1513 		else {
1514 			int run;
1515 
1516 			/*
1517 			 * Be fancy and try to print contiguous runs of
1518 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1519 			 * 'run' is true if we are in a range.
1520 			 */
1521 			device_printf(child, "using IRQs %lu", rle->start);
1522 			irq = rle->start;
1523 			run = 0;
1524 			for (i = 1; i < actual; i++) {
1525 				rle = resource_list_find(&dinfo->resources,
1526 				    SYS_RES_IRQ, i + 1);
1527 
1528 				/* Still in a run? */
1529 				if (rle->start == irq + 1) {
1530 					run = 1;
1531 					irq++;
1532 					continue;
1533 				}
1534 
1535 				/* Finish previous range. */
1536 				if (run) {
1537 					printf("-%d", irq);
1538 					run = 0;
1539 				}
1540 
1541 				/* Start new range. */
1542 				printf(",%lu", rle->start);
1543 				irq = rle->start;
1544 			}
1545 
1546 			/* Unfinished range? */
1547 			if (run)
1548 				printf("-%d", irq);
1549 			printf(" for MSI-X\n");
1550 		}
1551 	}
1552 
1553 	/* Mask all vectors. */
1554 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1555 		pci_mask_msix(child, i);
1556 
1557 	/* Allocate and initialize vector data and virtual table. */
1558 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1559 	    M_DEVBUF, M_WAITOK | M_ZERO);
1560 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1561 	    M_DEVBUF, M_WAITOK | M_ZERO);
1562 	for (i = 0; i < actual; i++) {
1563 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1564 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1565 		cfg->msix.msix_table[i].mte_vector = i + 1;
1566 	}
1567 
1568 	/* Update control register to enable MSI-X. */
1569 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1570 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1571 	    cfg->msix.msix_ctrl, 2);
1572 
1573 	/* Update counts of alloc'd messages. */
1574 	cfg->msix.msix_alloc = actual;
1575 	cfg->msix.msix_table_len = actual;
1576 	*count = actual;
1577 	return (0);
1578 }
1579 
1580 /*
1581  * By default, pci_alloc_msix() will assign the allocated IRQ
1582  * resources consecutively to the first N messages in the MSI-X table.
1583  * However, device drivers may want to use different layouts if they
1584  * either receive fewer messages than they asked for, or they wish to
1585  * populate the MSI-X table sparsely.  This method allows the driver
1586  * to specify what layout it wants.  It must be called after a
1587  * successful pci_alloc_msix() but before any of the associated
1588  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1589  *
1590  * The 'vectors' array contains 'count' message vectors.  The array
1591  * maps directly to the MSI-X table in that index 0 in the array
1592  * specifies the vector for the first message in the MSI-X table, etc.
1593  * The vector value in each array index can either be 0 to indicate
1594  * that no vector should be assigned to a message slot, or it can be a
1595  * number from 1 to N (where N is the count returned from a
1596  * succcessful call to pci_alloc_msix()) to indicate which message
1597  * vector (IRQ) to be used for the corresponding message.
1598  *
1599  * On successful return, each message with a non-zero vector will have
1600  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1601  * 1.  Additionally, if any of the IRQs allocated via the previous
1602  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1603  * will be freed back to the system automatically.
1604  *
1605  * For example, suppose a driver has a MSI-X table with 6 messages and
1606  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1607  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1608  * C.  After the call to pci_alloc_msix(), the device will be setup to
1609  * have an MSI-X table of ABC--- (where - means no vector assigned).
1610  * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1611  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1612  * be freed back to the system.  This device will also have valid
1613  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1614  *
1615  * In any case, the SYS_RES_IRQ rid X will always map to the message
1616  * at MSI-X table index X - 1 and will only be valid if a vector is
1617  * assigned to that table entry.
1618  */
1619 int
1620 pci_remap_msix_method(device_t dev, device_t child, int count,
1621     const u_int *vectors)
1622 {
1623 	struct pci_devinfo *dinfo = device_get_ivars(child);
1624 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1625 	struct resource_list_entry *rle;
1626 	int i, irq, j, *used;
1627 
1628 	/*
1629 	 * Have to have at least one message in the table but the
1630 	 * table can't be bigger than the actual MSI-X table in the
1631 	 * device.
1632 	 */
1633 	if (count == 0 || count > msix->msix_msgnum)
1634 		return (EINVAL);
1635 
1636 	/* Sanity check the vectors. */
1637 	for (i = 0; i < count; i++)
1638 		if (vectors[i] > msix->msix_alloc)
1639 			return (EINVAL);
1640 
1641 	/*
1642 	 * Make sure there aren't any holes in the vectors to be used.
1643 	 * It's a big pain to support it, and it doesn't really make
1644 	 * sense anyway.  Also, at least one vector must be used.
1645 	 */
1646 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1647 	    M_ZERO);
1648 	for (i = 0; i < count; i++)
1649 		if (vectors[i] != 0)
1650 			used[vectors[i] - 1] = 1;
1651 	for (i = 0; i < msix->msix_alloc - 1; i++)
1652 		if (used[i] == 0 && used[i + 1] == 1) {
1653 			free(used, M_DEVBUF);
1654 			return (EINVAL);
1655 		}
1656 	if (used[0] != 1) {
1657 		free(used, M_DEVBUF);
1658 		return (EINVAL);
1659 	}
1660 
1661 	/* Make sure none of the resources are allocated. */
1662 	for (i = 0; i < msix->msix_table_len; i++) {
1663 		if (msix->msix_table[i].mte_vector == 0)
1664 			continue;
1665 		if (msix->msix_table[i].mte_handlers > 0)
1666 			return (EBUSY);
1667 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1668 		KASSERT(rle != NULL, ("missing resource"));
1669 		if (rle->res != NULL)
1670 			return (EBUSY);
1671 	}
1672 
1673 	/* Free the existing resource list entries. */
1674 	for (i = 0; i < msix->msix_table_len; i++) {
1675 		if (msix->msix_table[i].mte_vector == 0)
1676 			continue;
1677 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1678 	}
1679 
1680 	/*
1681 	 * Build the new virtual table keeping track of which vectors are
1682 	 * used.
1683 	 */
1684 	free(msix->msix_table, M_DEVBUF);
1685 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1686 	    M_DEVBUF, M_WAITOK | M_ZERO);
1687 	for (i = 0; i < count; i++)
1688 		msix->msix_table[i].mte_vector = vectors[i];
1689 	msix->msix_table_len = count;
1690 
1691 	/* Free any unused IRQs and resize the vectors array if necessary. */
1692 	j = msix->msix_alloc - 1;
1693 	if (used[j] == 0) {
1694 		struct msix_vector *vec;
1695 
1696 		while (used[j] == 0) {
1697 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1698 			    msix->msix_vectors[j].mv_irq);
1699 			j--;
1700 		}
1701 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1702 		    M_WAITOK);
1703 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1704 		    (j + 1));
1705 		free(msix->msix_vectors, M_DEVBUF);
1706 		msix->msix_vectors = vec;
1707 		msix->msix_alloc = j + 1;
1708 	}
1709 	free(used, M_DEVBUF);
1710 
1711 	/* Map the IRQs onto the rids. */
1712 	for (i = 0; i < count; i++) {
1713 		if (vectors[i] == 0)
1714 			continue;
1715 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1716 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1717 		    irq, 1);
1718 	}
1719 
1720 	if (bootverbose) {
1721 		device_printf(child, "Remapped MSI-X IRQs as: ");
1722 		for (i = 0; i < count; i++) {
1723 			if (i != 0)
1724 				printf(", ");
1725 			if (vectors[i] == 0)
1726 				printf("---");
1727 			else
1728 				printf("%d",
1729 				    msix->msix_vectors[vectors[i]].mv_irq);
1730 		}
1731 		printf("\n");
1732 	}
1733 
1734 	return (0);
1735 }
1736 
1737 static int
1738 pci_release_msix(device_t dev, device_t child)
1739 {
1740 	struct pci_devinfo *dinfo = device_get_ivars(child);
1741 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1742 	struct resource_list_entry *rle;
1743 	int i;
1744 
1745 	/* Do we have any messages to release? */
1746 	if (msix->msix_alloc == 0)
1747 		return (ENODEV);
1748 
1749 	/* Make sure none of the resources are allocated. */
1750 	for (i = 0; i < msix->msix_table_len; i++) {
1751 		if (msix->msix_table[i].mte_vector == 0)
1752 			continue;
1753 		if (msix->msix_table[i].mte_handlers > 0)
1754 			return (EBUSY);
1755 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1756 		KASSERT(rle != NULL, ("missing resource"));
1757 		if (rle->res != NULL)
1758 			return (EBUSY);
1759 	}
1760 
1761 	/* Update control register to disable MSI-X. */
1762 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1763 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1764 	    msix->msix_ctrl, 2);
1765 
1766 	/* Free the resource list entries. */
1767 	for (i = 0; i < msix->msix_table_len; i++) {
1768 		if (msix->msix_table[i].mte_vector == 0)
1769 			continue;
1770 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1771 	}
1772 	free(msix->msix_table, M_DEVBUF);
1773 	msix->msix_table_len = 0;
1774 
1775 	/* Release the IRQs. */
1776 	for (i = 0; i < msix->msix_alloc; i++)
1777 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1778 		    msix->msix_vectors[i].mv_irq);
1779 	free(msix->msix_vectors, M_DEVBUF);
1780 	msix->msix_alloc = 0;
1781 	return (0);
1782 }
1783 
1784 /*
1785  * Return the max supported MSI-X messages this device supports.
1786  * Basically, assuming the MD code can alloc messages, this function
1787  * should return the maximum value that pci_alloc_msix() can return.
1788  * Thus, it is subject to the tunables, etc.
1789  */
1790 int
1791 pci_msix_count_method(device_t dev, device_t child)
1792 {
1793 	struct pci_devinfo *dinfo = device_get_ivars(child);
1794 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1795 
1796 	if (pci_do_msix && msix->msix_location != 0)
1797 		return (msix->msix_msgnum);
1798 	return (0);
1799 }
1800 
1801 /*
1802  * HyperTransport MSI mapping control
1803  */
1804 void
1805 pci_ht_map_msi(device_t dev, uint64_t addr)
1806 {
1807 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1808 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1809 
1810 	if (!ht->ht_msimap)
1811 		return;
1812 
1813 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1814 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1815 		/* Enable MSI -> HT mapping. */
1816 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1817 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1818 		    ht->ht_msictrl, 2);
1819 	}
1820 
1821 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1822 		/* Disable MSI -> HT mapping. */
1823 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1824 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1825 		    ht->ht_msictrl, 2);
1826 	}
1827 }
1828 
1829 int
1830 pci_get_max_read_req(device_t dev)
1831 {
1832 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1833 	int cap;
1834 	uint16_t val;
1835 
1836 	cap = dinfo->cfg.pcie.pcie_location;
1837 	if (cap == 0)
1838 		return (0);
1839 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1840 	val &= PCIEM_CTL_MAX_READ_REQUEST;
1841 	val >>= 12;
1842 	return (1 << (val + 7));
1843 }
1844 
1845 int
1846 pci_set_max_read_req(device_t dev, int size)
1847 {
1848 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1849 	int cap;
1850 	uint16_t val;
1851 
1852 	cap = dinfo->cfg.pcie.pcie_location;
1853 	if (cap == 0)
1854 		return (0);
1855 	if (size < 128)
1856 		size = 128;
1857 	if (size > 4096)
1858 		size = 4096;
1859 	size = (1 << (fls(size) - 1));
1860 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1861 	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
1862 	val |= (fls(size) - 8) << 12;
1863 	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
1864 	return (size);
1865 }
1866 
1867 /*
1868  * Support for MSI message signalled interrupts.
1869  */
1870 void
1871 pci_enable_msi_method(device_t dev, device_t child, uint64_t address,
1872     uint16_t data)
1873 {
1874 	struct pci_devinfo *dinfo = device_get_ivars(child);
1875 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1876 
1877 	/* Write data and address values. */
1878 	pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR,
1879 	    address & 0xffffffff, 4);
1880 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1881 		pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1882 		    address >> 32, 4);
1883 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA_64BIT,
1884 		    data, 2);
1885 	} else
1886 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA, data,
1887 		    2);
1888 
1889 	/* Enable MSI in the control register. */
1890 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1891 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1892 	    msi->msi_ctrl, 2);
1893 
1894 	/* Enable MSI -> HT mapping. */
1895 	pci_ht_map_msi(child, address);
1896 }
1897 
1898 void
1899 pci_disable_msi_method(device_t dev, device_t child)
1900 {
1901 	struct pci_devinfo *dinfo = device_get_ivars(child);
1902 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1903 
1904 	/* Disable MSI -> HT mapping. */
1905 	pci_ht_map_msi(child, 0);
1906 
1907 	/* Disable MSI in the control register. */
1908 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1909 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1910 	    msi->msi_ctrl, 2);
1911 }
1912 
1913 /*
1914  * Restore MSI registers during resume.  If MSI is enabled then
1915  * restore the data and address registers in addition to the control
1916  * register.
1917  */
1918 static void
1919 pci_resume_msi(device_t dev)
1920 {
1921 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1922 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1923 	uint64_t address;
1924 	uint16_t data;
1925 
1926 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1927 		address = msi->msi_addr;
1928 		data = msi->msi_data;
1929 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1930 		    address & 0xffffffff, 4);
1931 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1932 			pci_write_config(dev, msi->msi_location +
1933 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1934 			pci_write_config(dev, msi->msi_location +
1935 			    PCIR_MSI_DATA_64BIT, data, 2);
1936 		} else
1937 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1938 			    data, 2);
1939 	}
1940 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1941 	    2);
1942 }
1943 
1944 static int
1945 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1946 {
1947 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1948 	pcicfgregs *cfg = &dinfo->cfg;
1949 	struct resource_list_entry *rle;
1950 	struct msix_table_entry *mte;
1951 	struct msix_vector *mv;
1952 	uint64_t addr;
1953 	uint32_t data;
1954 	int error, i, j;
1955 
1956 	/*
1957 	 * Handle MSI first.  We try to find this IRQ among our list
1958 	 * of MSI IRQs.  If we find it, we request updated address and
1959 	 * data registers and apply the results.
1960 	 */
1961 	if (cfg->msi.msi_alloc > 0) {
1962 
1963 		/* If we don't have any active handlers, nothing to do. */
1964 		if (cfg->msi.msi_handlers == 0)
1965 			return (0);
1966 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1967 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1968 			    i + 1);
1969 			if (rle->start == irq) {
1970 				error = PCIB_MAP_MSI(device_get_parent(bus),
1971 				    dev, irq, &addr, &data);
1972 				if (error)
1973 					return (error);
1974 				pci_disable_msi(dev);
1975 				dinfo->cfg.msi.msi_addr = addr;
1976 				dinfo->cfg.msi.msi_data = data;
1977 				pci_enable_msi(dev, addr, data);
1978 				return (0);
1979 			}
1980 		}
1981 		return (ENOENT);
1982 	}
1983 
1984 	/*
1985 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1986 	 * we request the updated mapping info.  If that works, we go
1987 	 * through all the slots that use this IRQ and update them.
1988 	 */
1989 	if (cfg->msix.msix_alloc > 0) {
1990 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1991 			mv = &cfg->msix.msix_vectors[i];
1992 			if (mv->mv_irq == irq) {
1993 				error = PCIB_MAP_MSI(device_get_parent(bus),
1994 				    dev, irq, &addr, &data);
1995 				if (error)
1996 					return (error);
1997 				mv->mv_address = addr;
1998 				mv->mv_data = data;
1999 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
2000 					mte = &cfg->msix.msix_table[j];
2001 					if (mte->mte_vector != i + 1)
2002 						continue;
2003 					if (mte->mte_handlers == 0)
2004 						continue;
2005 					pci_mask_msix(dev, j);
2006 					pci_enable_msix(dev, j, addr, data);
2007 					pci_unmask_msix(dev, j);
2008 				}
2009 			}
2010 		}
2011 		return (ENOENT);
2012 	}
2013 
2014 	return (ENOENT);
2015 }
2016 
2017 /*
2018  * Returns true if the specified device is blacklisted because MSI
2019  * doesn't work.
2020  */
2021 int
2022 pci_msi_device_blacklisted(device_t dev)
2023 {
2024 
2025 	if (!pci_honor_msi_blacklist)
2026 		return (0);
2027 
2028 	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2029 }
2030 
2031 /*
2032  * Determine if MSI is blacklisted globally on this system.  Currently,
2033  * we just check for blacklisted chipsets as represented by the
2034  * host-PCI bridge at device 0:0:0.  In the future, it may become
2035  * necessary to check other system attributes, such as the kenv values
2036  * that give the motherboard manufacturer and model number.
2037  */
2038 static int
2039 pci_msi_blacklisted(void)
2040 {
2041 	device_t dev;
2042 
2043 	if (!pci_honor_msi_blacklist)
2044 		return (0);
2045 
2046 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2047 	if (!(pcie_chipset || pcix_chipset)) {
2048 		if (vm_guest != VM_GUEST_NO) {
2049 			/*
2050 			 * Whitelist older chipsets in virtual
2051 			 * machines known to support MSI.
2052 			 */
2053 			dev = pci_find_bsf(0, 0, 0);
2054 			if (dev != NULL)
2055 				return (!pci_has_quirk(pci_get_devid(dev),
2056 					PCI_QUIRK_ENABLE_MSI_VM));
2057 		}
2058 		return (1);
2059 	}
2060 
2061 	dev = pci_find_bsf(0, 0, 0);
2062 	if (dev != NULL)
2063 		return (pci_msi_device_blacklisted(dev));
2064 	return (0);
2065 }
2066 
2067 /*
2068  * Returns true if the specified device is blacklisted because MSI-X
2069  * doesn't work.  Note that this assumes that if MSI doesn't work,
2070  * MSI-X doesn't either.
2071  */
2072 int
2073 pci_msix_device_blacklisted(device_t dev)
2074 {
2075 
2076 	if (!pci_honor_msi_blacklist)
2077 		return (0);
2078 
2079 	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2080 		return (1);
2081 
2082 	return (pci_msi_device_blacklisted(dev));
2083 }
2084 
2085 /*
2086  * Determine if MSI-X is blacklisted globally on this system.  If MSI
2087  * is blacklisted, assume that MSI-X is as well.  Check for additional
2088  * chipsets where MSI works but MSI-X does not.
2089  */
2090 static int
2091 pci_msix_blacklisted(void)
2092 {
2093 	device_t dev;
2094 
2095 	if (!pci_honor_msi_blacklist)
2096 		return (0);
2097 
2098 	dev = pci_find_bsf(0, 0, 0);
2099 	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2100 	    PCI_QUIRK_DISABLE_MSIX))
2101 		return (1);
2102 
2103 	return (pci_msi_blacklisted());
2104 }
2105 
2106 /*
2107  * Attempt to allocate *count MSI messages.  The actual number allocated is
2108  * returned in *count.  After this function returns, each message will be
2109  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2110  */
2111 int
2112 pci_alloc_msi_method(device_t dev, device_t child, int *count)
2113 {
2114 	struct pci_devinfo *dinfo = device_get_ivars(child);
2115 	pcicfgregs *cfg = &dinfo->cfg;
2116 	struct resource_list_entry *rle;
2117 	int actual, error, i, irqs[32];
2118 	uint16_t ctrl;
2119 
2120 	/* Don't let count == 0 get us into trouble. */
2121 	if (*count == 0)
2122 		return (EINVAL);
2123 
2124 	/* If rid 0 is allocated, then fail. */
2125 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2126 	if (rle != NULL && rle->res != NULL)
2127 		return (ENXIO);
2128 
2129 	/* Already have allocated messages? */
2130 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2131 		return (ENXIO);
2132 
2133 	/* If MSI is blacklisted for this system, fail. */
2134 	if (pci_msi_blacklisted())
2135 		return (ENXIO);
2136 
2137 	/* MSI capability present? */
2138 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2139 		return (ENODEV);
2140 
2141 	if (bootverbose)
2142 		device_printf(child,
2143 		    "attempting to allocate %d MSI vectors (%d supported)\n",
2144 		    *count, cfg->msi.msi_msgnum);
2145 
2146 	/* Don't ask for more than the device supports. */
2147 	actual = min(*count, cfg->msi.msi_msgnum);
2148 
2149 	/* Don't ask for more than 32 messages. */
2150 	actual = min(actual, 32);
2151 
2152 	/* MSI requires power of 2 number of messages. */
2153 	if (!powerof2(actual))
2154 		return (EINVAL);
2155 
2156 	for (;;) {
2157 		/* Try to allocate N messages. */
2158 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2159 		    actual, irqs);
2160 		if (error == 0)
2161 			break;
2162 		if (actual == 1)
2163 			return (error);
2164 
2165 		/* Try N / 2. */
2166 		actual >>= 1;
2167 	}
2168 
2169 	/*
2170 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2171 	 * resources in the irqs[] array, so add new resources
2172 	 * starting at rid 1.
2173 	 */
2174 	for (i = 0; i < actual; i++)
2175 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2176 		    irqs[i], irqs[i], 1);
2177 
2178 	if (bootverbose) {
2179 		if (actual == 1)
2180 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2181 		else {
2182 			int run;
2183 
2184 			/*
2185 			 * Be fancy and try to print contiguous runs
2186 			 * of IRQ values as ranges.  'run' is true if
2187 			 * we are in a range.
2188 			 */
2189 			device_printf(child, "using IRQs %d", irqs[0]);
2190 			run = 0;
2191 			for (i = 1; i < actual; i++) {
2192 
2193 				/* Still in a run? */
2194 				if (irqs[i] == irqs[i - 1] + 1) {
2195 					run = 1;
2196 					continue;
2197 				}
2198 
2199 				/* Finish previous range. */
2200 				if (run) {
2201 					printf("-%d", irqs[i - 1]);
2202 					run = 0;
2203 				}
2204 
2205 				/* Start new range. */
2206 				printf(",%d", irqs[i]);
2207 			}
2208 
2209 			/* Unfinished range? */
2210 			if (run)
2211 				printf("-%d", irqs[actual - 1]);
2212 			printf(" for MSI\n");
2213 		}
2214 	}
2215 
2216 	/* Update control register with actual count. */
2217 	ctrl = cfg->msi.msi_ctrl;
2218 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2219 	ctrl |= (ffs(actual) - 1) << 4;
2220 	cfg->msi.msi_ctrl = ctrl;
2221 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2222 
2223 	/* Update counts of alloc'd messages. */
2224 	cfg->msi.msi_alloc = actual;
2225 	cfg->msi.msi_handlers = 0;
2226 	*count = actual;
2227 	return (0);
2228 }
2229 
2230 /* Release the MSI messages associated with this device. */
2231 int
2232 pci_release_msi_method(device_t dev, device_t child)
2233 {
2234 	struct pci_devinfo *dinfo = device_get_ivars(child);
2235 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2236 	struct resource_list_entry *rle;
2237 	int error, i, irqs[32];
2238 
2239 	/* Try MSI-X first. */
2240 	error = pci_release_msix(dev, child);
2241 	if (error != ENODEV)
2242 		return (error);
2243 
2244 	/* Do we have any messages to release? */
2245 	if (msi->msi_alloc == 0)
2246 		return (ENODEV);
2247 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2248 
2249 	/* Make sure none of the resources are allocated. */
2250 	if (msi->msi_handlers > 0)
2251 		return (EBUSY);
2252 	for (i = 0; i < msi->msi_alloc; i++) {
2253 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2254 		KASSERT(rle != NULL, ("missing MSI resource"));
2255 		if (rle->res != NULL)
2256 			return (EBUSY);
2257 		irqs[i] = rle->start;
2258 	}
2259 
2260 	/* Update control register with 0 count. */
2261 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2262 	    ("%s: MSI still enabled", __func__));
2263 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2264 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2265 	    msi->msi_ctrl, 2);
2266 
2267 	/* Release the messages. */
2268 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2269 	for (i = 0; i < msi->msi_alloc; i++)
2270 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2271 
2272 	/* Update alloc count. */
2273 	msi->msi_alloc = 0;
2274 	msi->msi_addr = 0;
2275 	msi->msi_data = 0;
2276 	return (0);
2277 }
2278 
2279 /*
2280  * Return the max supported MSI messages this device supports.
2281  * Basically, assuming the MD code can alloc messages, this function
2282  * should return the maximum value that pci_alloc_msi() can return.
2283  * Thus, it is subject to the tunables, etc.
2284  */
2285 int
2286 pci_msi_count_method(device_t dev, device_t child)
2287 {
2288 	struct pci_devinfo *dinfo = device_get_ivars(child);
2289 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2290 
2291 	if (pci_do_msi && msi->msi_location != 0)
2292 		return (msi->msi_msgnum);
2293 	return (0);
2294 }
2295 
2296 /* free pcicfgregs structure and all depending data structures */
2297 
2298 int
2299 pci_freecfg(struct pci_devinfo *dinfo)
2300 {
2301 	struct devlist *devlist_head;
2302 	struct pci_map *pm, *next;
2303 	int i;
2304 
2305 	devlist_head = &pci_devq;
2306 
2307 	if (dinfo->cfg.vpd.vpd_reg) {
2308 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2309 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2310 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2311 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2312 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2313 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2314 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2315 	}
2316 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2317 		free(pm, M_DEVBUF);
2318 	}
2319 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2320 	free(dinfo, M_DEVBUF);
2321 
2322 	/* increment the generation count */
2323 	pci_generation++;
2324 
2325 	/* we're losing one device */
2326 	pci_numdevs--;
2327 	return (0);
2328 }
2329 
2330 /*
2331  * PCI power manangement
2332  */
2333 int
2334 pci_set_powerstate_method(device_t dev, device_t child, int state)
2335 {
2336 	struct pci_devinfo *dinfo = device_get_ivars(child);
2337 	pcicfgregs *cfg = &dinfo->cfg;
2338 	uint16_t status;
2339 	int result, oldstate, highest, delay;
2340 
2341 	if (cfg->pp.pp_cap == 0)
2342 		return (EOPNOTSUPP);
2343 
2344 	/*
2345 	 * Optimize a no state change request away.  While it would be OK to
2346 	 * write to the hardware in theory, some devices have shown odd
2347 	 * behavior when going from D3 -> D3.
2348 	 */
2349 	oldstate = pci_get_powerstate(child);
2350 	if (oldstate == state)
2351 		return (0);
2352 
2353 	/*
2354 	 * The PCI power management specification states that after a state
2355 	 * transition between PCI power states, system software must
2356 	 * guarantee a minimal delay before the function accesses the device.
2357 	 * Compute the worst case delay that we need to guarantee before we
2358 	 * access the device.  Many devices will be responsive much more
2359 	 * quickly than this delay, but there are some that don't respond
2360 	 * instantly to state changes.  Transitions to/from D3 state require
2361 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2362 	 * is done below with DELAY rather than a sleeper function because
2363 	 * this function can be called from contexts where we cannot sleep.
2364 	 */
2365 	highest = (oldstate > state) ? oldstate : state;
2366 	if (highest == PCI_POWERSTATE_D3)
2367 	    delay = 10000;
2368 	else if (highest == PCI_POWERSTATE_D2)
2369 	    delay = 200;
2370 	else
2371 	    delay = 0;
2372 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2373 	    & ~PCIM_PSTAT_DMASK;
2374 	result = 0;
2375 	switch (state) {
2376 	case PCI_POWERSTATE_D0:
2377 		status |= PCIM_PSTAT_D0;
2378 		break;
2379 	case PCI_POWERSTATE_D1:
2380 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2381 			return (EOPNOTSUPP);
2382 		status |= PCIM_PSTAT_D1;
2383 		break;
2384 	case PCI_POWERSTATE_D2:
2385 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2386 			return (EOPNOTSUPP);
2387 		status |= PCIM_PSTAT_D2;
2388 		break;
2389 	case PCI_POWERSTATE_D3:
2390 		status |= PCIM_PSTAT_D3;
2391 		break;
2392 	default:
2393 		return (EINVAL);
2394 	}
2395 
2396 	if (bootverbose)
2397 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2398 		    state);
2399 
2400 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2401 	if (delay)
2402 		DELAY(delay);
2403 	return (0);
2404 }
2405 
2406 int
2407 pci_get_powerstate_method(device_t dev, device_t child)
2408 {
2409 	struct pci_devinfo *dinfo = device_get_ivars(child);
2410 	pcicfgregs *cfg = &dinfo->cfg;
2411 	uint16_t status;
2412 	int result;
2413 
2414 	if (cfg->pp.pp_cap != 0) {
2415 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2416 		switch (status & PCIM_PSTAT_DMASK) {
2417 		case PCIM_PSTAT_D0:
2418 			result = PCI_POWERSTATE_D0;
2419 			break;
2420 		case PCIM_PSTAT_D1:
2421 			result = PCI_POWERSTATE_D1;
2422 			break;
2423 		case PCIM_PSTAT_D2:
2424 			result = PCI_POWERSTATE_D2;
2425 			break;
2426 		case PCIM_PSTAT_D3:
2427 			result = PCI_POWERSTATE_D3;
2428 			break;
2429 		default:
2430 			result = PCI_POWERSTATE_UNKNOWN;
2431 			break;
2432 		}
2433 	} else {
2434 		/* No support, device is always at D0 */
2435 		result = PCI_POWERSTATE_D0;
2436 	}
2437 	return (result);
2438 }
2439 
2440 /*
2441  * Some convenience functions for PCI device drivers.
2442  */
2443 
2444 static __inline void
2445 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2446 {
2447 	uint16_t	command;
2448 
2449 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2450 	command |= bit;
2451 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2452 }
2453 
2454 static __inline void
2455 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2456 {
2457 	uint16_t	command;
2458 
2459 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2460 	command &= ~bit;
2461 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2462 }
2463 
2464 int
2465 pci_enable_busmaster_method(device_t dev, device_t child)
2466 {
2467 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2468 	return (0);
2469 }
2470 
2471 int
2472 pci_disable_busmaster_method(device_t dev, device_t child)
2473 {
2474 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2475 	return (0);
2476 }
2477 
2478 int
2479 pci_enable_io_method(device_t dev, device_t child, int space)
2480 {
2481 	uint16_t bit;
2482 
2483 	switch(space) {
2484 	case SYS_RES_IOPORT:
2485 		bit = PCIM_CMD_PORTEN;
2486 		break;
2487 	case SYS_RES_MEMORY:
2488 		bit = PCIM_CMD_MEMEN;
2489 		break;
2490 	default:
2491 		return (EINVAL);
2492 	}
2493 	pci_set_command_bit(dev, child, bit);
2494 	return (0);
2495 }
2496 
2497 int
2498 pci_disable_io_method(device_t dev, device_t child, int space)
2499 {
2500 	uint16_t bit;
2501 
2502 	switch(space) {
2503 	case SYS_RES_IOPORT:
2504 		bit = PCIM_CMD_PORTEN;
2505 		break;
2506 	case SYS_RES_MEMORY:
2507 		bit = PCIM_CMD_MEMEN;
2508 		break;
2509 	default:
2510 		return (EINVAL);
2511 	}
2512 	pci_clear_command_bit(dev, child, bit);
2513 	return (0);
2514 }
2515 
2516 /*
2517  * New style pci driver.  Parent device is either a pci-host-bridge or a
2518  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2519  */
2520 
2521 void
2522 pci_print_verbose(struct pci_devinfo *dinfo)
2523 {
2524 
2525 	if (bootverbose) {
2526 		pcicfgregs *cfg = &dinfo->cfg;
2527 
2528 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2529 		    cfg->vendor, cfg->device, cfg->revid);
2530 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2531 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2532 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2533 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2534 		    cfg->mfdev);
2535 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2536 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2537 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2538 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2539 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2540 		if (cfg->intpin > 0)
2541 			printf("\tintpin=%c, irq=%d\n",
2542 			    cfg->intpin +'a' -1, cfg->intline);
2543 		if (cfg->pp.pp_cap) {
2544 			uint16_t status;
2545 
2546 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2547 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2548 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2549 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2550 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2551 			    status & PCIM_PSTAT_DMASK);
2552 		}
2553 		if (cfg->msi.msi_location) {
2554 			int ctrl;
2555 
2556 			ctrl = cfg->msi.msi_ctrl;
2557 			printf("\tMSI supports %d message%s%s%s\n",
2558 			    cfg->msi.msi_msgnum,
2559 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2560 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2561 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2562 		}
2563 		if (cfg->msix.msix_location) {
2564 			printf("\tMSI-X supports %d message%s ",
2565 			    cfg->msix.msix_msgnum,
2566 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2567 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2568 				printf("in map 0x%x\n",
2569 				    cfg->msix.msix_table_bar);
2570 			else
2571 				printf("in maps 0x%x and 0x%x\n",
2572 				    cfg->msix.msix_table_bar,
2573 				    cfg->msix.msix_pba_bar);
2574 		}
2575 	}
2576 }
2577 
2578 static int
2579 pci_porten(device_t dev)
2580 {
2581 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2582 }
2583 
2584 static int
2585 pci_memen(device_t dev)
2586 {
2587 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2588 }
2589 
2590 static void
2591 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2592 {
2593 	struct pci_devinfo *dinfo;
2594 	pci_addr_t map, testval;
2595 	int ln2range;
2596 	uint16_t cmd;
2597 
2598 	/*
2599 	 * The device ROM BAR is special.  It is always a 32-bit
2600 	 * memory BAR.  Bit 0 is special and should not be set when
2601 	 * sizing the BAR.
2602 	 */
2603 	dinfo = device_get_ivars(dev);
2604 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2605 		map = pci_read_config(dev, reg, 4);
2606 		pci_write_config(dev, reg, 0xfffffffe, 4);
2607 		testval = pci_read_config(dev, reg, 4);
2608 		pci_write_config(dev, reg, map, 4);
2609 		*mapp = map;
2610 		*testvalp = testval;
2611 		return;
2612 	}
2613 
2614 	map = pci_read_config(dev, reg, 4);
2615 	ln2range = pci_maprange(map);
2616 	if (ln2range == 64)
2617 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2618 
2619 	/*
2620 	 * Disable decoding via the command register before
2621 	 * determining the BAR's length since we will be placing it in
2622 	 * a weird state.
2623 	 */
2624 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2625 	pci_write_config(dev, PCIR_COMMAND,
2626 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2627 
2628 	/*
2629 	 * Determine the BAR's length by writing all 1's.  The bottom
2630 	 * log_2(size) bits of the BAR will stick as 0 when we read
2631 	 * the value back.
2632 	 */
2633 	pci_write_config(dev, reg, 0xffffffff, 4);
2634 	testval = pci_read_config(dev, reg, 4);
2635 	if (ln2range == 64) {
2636 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2637 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2638 	}
2639 
2640 	/*
2641 	 * Restore the original value of the BAR.  We may have reprogrammed
2642 	 * the BAR of the low-level console device and when booting verbose,
2643 	 * we need the console device addressable.
2644 	 */
2645 	pci_write_config(dev, reg, map, 4);
2646 	if (ln2range == 64)
2647 		pci_write_config(dev, reg + 4, map >> 32, 4);
2648 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2649 
2650 	*mapp = map;
2651 	*testvalp = testval;
2652 }
2653 
2654 static void
2655 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2656 {
2657 	struct pci_devinfo *dinfo;
2658 	int ln2range;
2659 
2660 	/* The device ROM BAR is always a 32-bit memory BAR. */
2661 	dinfo = device_get_ivars(dev);
2662 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2663 		ln2range = 32;
2664 	else
2665 		ln2range = pci_maprange(pm->pm_value);
2666 	pci_write_config(dev, pm->pm_reg, base, 4);
2667 	if (ln2range == 64)
2668 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2669 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2670 	if (ln2range == 64)
2671 		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2672 		    pm->pm_reg + 4, 4) << 32;
2673 }
2674 
2675 struct pci_map *
2676 pci_find_bar(device_t dev, int reg)
2677 {
2678 	struct pci_devinfo *dinfo;
2679 	struct pci_map *pm;
2680 
2681 	dinfo = device_get_ivars(dev);
2682 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2683 		if (pm->pm_reg == reg)
2684 			return (pm);
2685 	}
2686 	return (NULL);
2687 }
2688 
2689 int
2690 pci_bar_enabled(device_t dev, struct pci_map *pm)
2691 {
2692 	struct pci_devinfo *dinfo;
2693 	uint16_t cmd;
2694 
2695 	dinfo = device_get_ivars(dev);
2696 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2697 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2698 		return (0);
2699 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2700 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2701 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2702 	else
2703 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2704 }
2705 
2706 static struct pci_map *
2707 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2708 {
2709 	struct pci_devinfo *dinfo;
2710 	struct pci_map *pm, *prev;
2711 
2712 	dinfo = device_get_ivars(dev);
2713 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2714 	pm->pm_reg = reg;
2715 	pm->pm_value = value;
2716 	pm->pm_size = size;
2717 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2718 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2719 		    reg));
2720 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2721 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2722 			break;
2723 	}
2724 	if (prev != NULL)
2725 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2726 	else
2727 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2728 	return (pm);
2729 }
2730 
2731 static void
2732 pci_restore_bars(device_t dev)
2733 {
2734 	struct pci_devinfo *dinfo;
2735 	struct pci_map *pm;
2736 	int ln2range;
2737 
2738 	dinfo = device_get_ivars(dev);
2739 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2740 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2741 			ln2range = 32;
2742 		else
2743 			ln2range = pci_maprange(pm->pm_value);
2744 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2745 		if (ln2range == 64)
2746 			pci_write_config(dev, pm->pm_reg + 4,
2747 			    pm->pm_value >> 32, 4);
2748 	}
2749 }
2750 
2751 /*
2752  * Add a resource based on a pci map register. Return 1 if the map
2753  * register is a 32bit map register or 2 if it is a 64bit register.
2754  */
2755 static int
2756 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2757     int force, int prefetch)
2758 {
2759 	struct pci_map *pm;
2760 	pci_addr_t base, map, testval;
2761 	pci_addr_t start, end, count;
2762 	int barlen, basezero, flags, maprange, mapsize, type;
2763 	uint16_t cmd;
2764 	struct resource *res;
2765 
2766 	/*
2767 	 * The BAR may already exist if the device is a CardBus card
2768 	 * whose CIS is stored in this BAR.
2769 	 */
2770 	pm = pci_find_bar(dev, reg);
2771 	if (pm != NULL) {
2772 		maprange = pci_maprange(pm->pm_value);
2773 		barlen = maprange == 64 ? 2 : 1;
2774 		return (barlen);
2775 	}
2776 
2777 	pci_read_bar(dev, reg, &map, &testval);
2778 	if (PCI_BAR_MEM(map)) {
2779 		type = SYS_RES_MEMORY;
2780 		if (map & PCIM_BAR_MEM_PREFETCH)
2781 			prefetch = 1;
2782 	} else
2783 		type = SYS_RES_IOPORT;
2784 	mapsize = pci_mapsize(testval);
2785 	base = pci_mapbase(map);
2786 #ifdef __PCI_BAR_ZERO_VALID
2787 	basezero = 0;
2788 #else
2789 	basezero = base == 0;
2790 #endif
2791 	maprange = pci_maprange(map);
2792 	barlen = maprange == 64 ? 2 : 1;
2793 
2794 	/*
2795 	 * For I/O registers, if bottom bit is set, and the next bit up
2796 	 * isn't clear, we know we have a BAR that doesn't conform to the
2797 	 * spec, so ignore it.  Also, sanity check the size of the data
2798 	 * areas to the type of memory involved.  Memory must be at least
2799 	 * 16 bytes in size, while I/O ranges must be at least 4.
2800 	 */
2801 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2802 		return (barlen);
2803 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2804 	    (type == SYS_RES_IOPORT && mapsize < 2))
2805 		return (barlen);
2806 
2807 	/* Save a record of this BAR. */
2808 	pm = pci_add_bar(dev, reg, map, mapsize);
2809 	if (bootverbose) {
2810 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2811 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2812 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2813 			printf(", port disabled\n");
2814 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2815 			printf(", memory disabled\n");
2816 		else
2817 			printf(", enabled\n");
2818 	}
2819 
2820 	/*
2821 	 * If base is 0, then we have problems if this architecture does
2822 	 * not allow that.  It is best to ignore such entries for the
2823 	 * moment.  These will be allocated later if the driver specifically
2824 	 * requests them.  However, some removable busses look better when
2825 	 * all resources are allocated, so allow '0' to be overriden.
2826 	 *
2827 	 * Similarly treat maps whose values is the same as the test value
2828 	 * read back.  These maps have had all f's written to them by the
2829 	 * BIOS in an attempt to disable the resources.
2830 	 */
2831 	if (!force && (basezero || map == testval))
2832 		return (barlen);
2833 	if ((u_long)base != base) {
2834 		device_printf(bus,
2835 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2836 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2837 		    pci_get_function(dev), reg);
2838 		return (barlen);
2839 	}
2840 
2841 	/*
2842 	 * This code theoretically does the right thing, but has
2843 	 * undesirable side effects in some cases where peripherals
2844 	 * respond oddly to having these bits enabled.  Let the user
2845 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2846 	 * default).
2847 	 */
2848 	if (pci_enable_io_modes) {
2849 		/* Turn on resources that have been left off by a lazy BIOS */
2850 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2851 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2852 			cmd |= PCIM_CMD_PORTEN;
2853 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2854 		}
2855 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2856 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2857 			cmd |= PCIM_CMD_MEMEN;
2858 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2859 		}
2860 	} else {
2861 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2862 			return (barlen);
2863 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2864 			return (barlen);
2865 	}
2866 
2867 	count = (pci_addr_t)1 << mapsize;
2868 	flags = RF_ALIGNMENT_LOG2(mapsize);
2869 	if (prefetch)
2870 		flags |= RF_PREFETCHABLE;
2871 	if (basezero || base == pci_mapbase(testval) || pci_clear_bars) {
2872 		start = 0;	/* Let the parent decide. */
2873 		end = ~0ul;
2874 	} else {
2875 		start = base;
2876 		end = base + count - 1;
2877 	}
2878 	resource_list_add(rl, type, reg, start, end, count);
2879 
2880 	/*
2881 	 * Try to allocate the resource for this BAR from our parent
2882 	 * so that this resource range is already reserved.  The
2883 	 * driver for this device will later inherit this resource in
2884 	 * pci_alloc_resource().
2885 	 */
2886 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2887 	    flags);
2888 	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0ul)) {
2889 		/*
2890 		 * If the allocation fails, try to allocate a resource for
2891 		 * this BAR using any available range.  The firmware felt
2892 		 * it was important enough to assign a resource, so don't
2893 		 * disable decoding if we can help it.
2894 		 */
2895 		resource_list_delete(rl, type, reg);
2896 		resource_list_add(rl, type, reg, 0, ~0ul, count);
2897 		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0ul,
2898 		    count, flags);
2899 	}
2900 	if (res == NULL) {
2901 		/*
2902 		 * If the allocation fails, delete the resource list entry
2903 		 * and disable decoding for this device.
2904 		 *
2905 		 * If the driver requests this resource in the future,
2906 		 * pci_reserve_map() will try to allocate a fresh
2907 		 * resource range.
2908 		 */
2909 		resource_list_delete(rl, type, reg);
2910 		pci_disable_io(dev, type);
2911 		if (bootverbose)
2912 			device_printf(bus,
2913 			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
2914 			    pci_get_domain(dev), pci_get_bus(dev),
2915 			    pci_get_slot(dev), pci_get_function(dev), reg);
2916 	} else {
2917 		start = rman_get_start(res);
2918 		pci_write_bar(dev, pm, start);
2919 	}
2920 	return (barlen);
2921 }
2922 
2923 /*
2924  * For ATA devices we need to decide early what addressing mode to use.
2925  * Legacy demands that the primary and secondary ATA ports sits on the
2926  * same addresses that old ISA hardware did. This dictates that we use
2927  * those addresses and ignore the BAR's if we cannot set PCI native
2928  * addressing mode.
2929  */
2930 static void
2931 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2932     uint32_t prefetchmask)
2933 {
2934 	struct resource *r;
2935 	int rid, type, progif;
2936 #if 0
2937 	/* if this device supports PCI native addressing use it */
2938 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2939 	if ((progif & 0x8a) == 0x8a) {
2940 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2941 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2942 			printf("Trying ATA native PCI addressing mode\n");
2943 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2944 		}
2945 	}
2946 #endif
2947 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2948 	type = SYS_RES_IOPORT;
2949 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2950 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2951 		    prefetchmask & (1 << 0));
2952 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2953 		    prefetchmask & (1 << 1));
2954 	} else {
2955 		rid = PCIR_BAR(0);
2956 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2957 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2958 		    0x1f7, 8, 0);
2959 		rid = PCIR_BAR(1);
2960 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2961 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2962 		    0x3f6, 1, 0);
2963 	}
2964 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2965 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2966 		    prefetchmask & (1 << 2));
2967 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2968 		    prefetchmask & (1 << 3));
2969 	} else {
2970 		rid = PCIR_BAR(2);
2971 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2972 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2973 		    0x177, 8, 0);
2974 		rid = PCIR_BAR(3);
2975 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2976 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2977 		    0x376, 1, 0);
2978 	}
2979 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2980 	    prefetchmask & (1 << 4));
2981 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2982 	    prefetchmask & (1 << 5));
2983 }
2984 
2985 static void
2986 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2987 {
2988 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2989 	pcicfgregs *cfg = &dinfo->cfg;
2990 	char tunable_name[64];
2991 	int irq;
2992 
2993 	/* Has to have an intpin to have an interrupt. */
2994 	if (cfg->intpin == 0)
2995 		return;
2996 
2997 	/* Let the user override the IRQ with a tunable. */
2998 	irq = PCI_INVALID_IRQ;
2999 	snprintf(tunable_name, sizeof(tunable_name),
3000 	    "hw.pci%d.%d.%d.INT%c.irq",
3001 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
3002 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
3003 		irq = PCI_INVALID_IRQ;
3004 
3005 	/*
3006 	 * If we didn't get an IRQ via the tunable, then we either use the
3007 	 * IRQ value in the intline register or we ask the bus to route an
3008 	 * interrupt for us.  If force_route is true, then we only use the
3009 	 * value in the intline register if the bus was unable to assign an
3010 	 * IRQ.
3011 	 */
3012 	if (!PCI_INTERRUPT_VALID(irq)) {
3013 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
3014 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
3015 		if (!PCI_INTERRUPT_VALID(irq))
3016 			irq = cfg->intline;
3017 	}
3018 
3019 	/* If after all that we don't have an IRQ, just bail. */
3020 	if (!PCI_INTERRUPT_VALID(irq))
3021 		return;
3022 
3023 	/* Update the config register if it changed. */
3024 	if (irq != cfg->intline) {
3025 		cfg->intline = irq;
3026 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
3027 	}
3028 
3029 	/* Add this IRQ as rid 0 interrupt resource. */
3030 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3031 }
3032 
3033 /* Perform early OHCI takeover from SMM. */
3034 static void
3035 ohci_early_takeover(device_t self)
3036 {
3037 	struct resource *res;
3038 	uint32_t ctl;
3039 	int rid;
3040 	int i;
3041 
3042 	rid = PCIR_BAR(0);
3043 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3044 	if (res == NULL)
3045 		return;
3046 
3047 	ctl = bus_read_4(res, OHCI_CONTROL);
3048 	if (ctl & OHCI_IR) {
3049 		if (bootverbose)
3050 			printf("ohci early: "
3051 			    "SMM active, request owner change\n");
3052 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3053 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3054 			DELAY(1000);
3055 			ctl = bus_read_4(res, OHCI_CONTROL);
3056 		}
3057 		if (ctl & OHCI_IR) {
3058 			if (bootverbose)
3059 				printf("ohci early: "
3060 				    "SMM does not respond, resetting\n");
3061 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3062 		}
3063 		/* Disable interrupts */
3064 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3065 	}
3066 
3067 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3068 }
3069 
3070 /* Perform early UHCI takeover from SMM. */
3071 static void
3072 uhci_early_takeover(device_t self)
3073 {
3074 	struct resource *res;
3075 	int rid;
3076 
3077 	/*
3078 	 * Set the PIRQD enable bit and switch off all the others. We don't
3079 	 * want legacy support to interfere with us XXX Does this also mean
3080 	 * that the BIOS won't touch the keyboard anymore if it is connected
3081 	 * to the ports of the root hub?
3082 	 */
3083 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3084 
3085 	/* Disable interrupts */
3086 	rid = PCI_UHCI_BASE_REG;
3087 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3088 	if (res != NULL) {
3089 		bus_write_2(res, UHCI_INTR, 0);
3090 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3091 	}
3092 }
3093 
3094 /* Perform early EHCI takeover from SMM. */
3095 static void
3096 ehci_early_takeover(device_t self)
3097 {
3098 	struct resource *res;
3099 	uint32_t cparams;
3100 	uint32_t eec;
3101 	uint8_t eecp;
3102 	uint8_t bios_sem;
3103 	uint8_t offs;
3104 	int rid;
3105 	int i;
3106 
3107 	rid = PCIR_BAR(0);
3108 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3109 	if (res == NULL)
3110 		return;
3111 
3112 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3113 
3114 	/* Synchronise with the BIOS if it owns the controller. */
3115 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3116 	    eecp = EHCI_EECP_NEXT(eec)) {
3117 		eec = pci_read_config(self, eecp, 4);
3118 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3119 			continue;
3120 		}
3121 		bios_sem = pci_read_config(self, eecp +
3122 		    EHCI_LEGSUP_BIOS_SEM, 1);
3123 		if (bios_sem == 0) {
3124 			continue;
3125 		}
3126 		if (bootverbose)
3127 			printf("ehci early: "
3128 			    "SMM active, request owner change\n");
3129 
3130 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3131 
3132 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3133 			DELAY(1000);
3134 			bios_sem = pci_read_config(self, eecp +
3135 			    EHCI_LEGSUP_BIOS_SEM, 1);
3136 		}
3137 
3138 		if (bios_sem != 0) {
3139 			if (bootverbose)
3140 				printf("ehci early: "
3141 				    "SMM does not respond\n");
3142 		}
3143 		/* Disable interrupts */
3144 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3145 		bus_write_4(res, offs + EHCI_USBINTR, 0);
3146 	}
3147 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3148 }
3149 
3150 /* Perform early XHCI takeover from SMM. */
3151 static void
3152 xhci_early_takeover(device_t self)
3153 {
3154 	struct resource *res;
3155 	uint32_t cparams;
3156 	uint32_t eec;
3157 	uint8_t eecp;
3158 	uint8_t bios_sem;
3159 	uint8_t offs;
3160 	int rid;
3161 	int i;
3162 
3163 	rid = PCIR_BAR(0);
3164 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3165 	if (res == NULL)
3166 		return;
3167 
3168 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3169 
3170 	eec = -1;
3171 
3172 	/* Synchronise with the BIOS if it owns the controller. */
3173 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3174 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3175 		eec = bus_read_4(res, eecp);
3176 
3177 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3178 			continue;
3179 
3180 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3181 		if (bios_sem == 0)
3182 			continue;
3183 
3184 		if (bootverbose)
3185 			printf("xhci early: "
3186 			    "SMM active, request owner change\n");
3187 
3188 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3189 
3190 		/* wait a maximum of 5 second */
3191 
3192 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3193 			DELAY(1000);
3194 			bios_sem = bus_read_1(res, eecp +
3195 			    XHCI_XECP_BIOS_SEM);
3196 		}
3197 
3198 		if (bios_sem != 0) {
3199 			if (bootverbose)
3200 				printf("xhci early: "
3201 				    "SMM does not respond\n");
3202 		}
3203 
3204 		/* Disable interrupts */
3205 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3206 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3207 		bus_read_4(res, offs + XHCI_USBSTS);
3208 	}
3209 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3210 }
3211 
3212 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3213 static void
3214 pci_reserve_secbus(device_t bus, device_t dev, pcicfgregs *cfg,
3215     struct resource_list *rl)
3216 {
3217 	struct resource *res;
3218 	char *cp;
3219 	u_long start, end, count;
3220 	int rid, sec_bus, sec_reg, sub_bus, sub_reg, sup_bus;
3221 
3222 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3223 	case PCIM_HDRTYPE_BRIDGE:
3224 		sec_reg = PCIR_SECBUS_1;
3225 		sub_reg = PCIR_SUBBUS_1;
3226 		break;
3227 	case PCIM_HDRTYPE_CARDBUS:
3228 		sec_reg = PCIR_SECBUS_2;
3229 		sub_reg = PCIR_SUBBUS_2;
3230 		break;
3231 	default:
3232 		return;
3233 	}
3234 
3235 	/*
3236 	 * If the existing bus range is valid, attempt to reserve it
3237 	 * from our parent.  If this fails for any reason, clear the
3238 	 * secbus and subbus registers.
3239 	 *
3240 	 * XXX: Should we reset sub_bus to sec_bus if it is < sec_bus?
3241 	 * This would at least preserve the existing sec_bus if it is
3242 	 * valid.
3243 	 */
3244 	sec_bus = PCI_READ_CONFIG(bus, dev, sec_reg, 1);
3245 	sub_bus = PCI_READ_CONFIG(bus, dev, sub_reg, 1);
3246 
3247 	/* Quirk handling. */
3248 	switch (pci_get_devid(dev)) {
3249 	case 0x12258086:		/* Intel 82454KX/GX (Orion) */
3250 		sup_bus = pci_read_config(dev, 0x41, 1);
3251 		if (sup_bus != 0xff) {
3252 			sec_bus = sup_bus + 1;
3253 			sub_bus = sup_bus + 1;
3254 			PCI_WRITE_CONFIG(bus, dev, sec_reg, sec_bus, 1);
3255 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3256 		}
3257 		break;
3258 
3259 	case 0x00dd10de:
3260 		/* Compaq R3000 BIOS sets wrong subordinate bus number. */
3261 		if ((cp = getenv("smbios.planar.maker")) == NULL)
3262 			break;
3263 		if (strncmp(cp, "Compal", 6) != 0) {
3264 			freeenv(cp);
3265 			break;
3266 		}
3267 		freeenv(cp);
3268 		if ((cp = getenv("smbios.planar.product")) == NULL)
3269 			break;
3270 		if (strncmp(cp, "08A0", 4) != 0) {
3271 			freeenv(cp);
3272 			break;
3273 		}
3274 		freeenv(cp);
3275 		if (sub_bus < 0xa) {
3276 			sub_bus = 0xa;
3277 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3278 		}
3279 		break;
3280 	}
3281 
3282 	if (bootverbose)
3283 		printf("\tsecbus=%d, subbus=%d\n", sec_bus, sub_bus);
3284 	if (sec_bus > 0 && sub_bus >= sec_bus) {
3285 		start = sec_bus;
3286 		end = sub_bus;
3287 		count = end - start + 1;
3288 
3289 		resource_list_add(rl, PCI_RES_BUS, 0, 0ul, ~0ul, count);
3290 
3291 		/*
3292 		 * If requested, clear secondary bus registers in
3293 		 * bridge devices to force a complete renumbering
3294 		 * rather than reserving the existing range.  However,
3295 		 * preserve the existing size.
3296 		 */
3297 		if (pci_clear_buses)
3298 			goto clear;
3299 
3300 		rid = 0;
3301 		res = resource_list_reserve(rl, bus, dev, PCI_RES_BUS, &rid,
3302 		    start, end, count, 0);
3303 		if (res != NULL)
3304 			return;
3305 
3306 		if (bootverbose)
3307 			device_printf(bus,
3308 			    "pci%d:%d:%d:%d secbus failed to allocate\n",
3309 			    pci_get_domain(dev), pci_get_bus(dev),
3310 			    pci_get_slot(dev), pci_get_function(dev));
3311 	}
3312 
3313 clear:
3314 	PCI_WRITE_CONFIG(bus, dev, sec_reg, 0, 1);
3315 	PCI_WRITE_CONFIG(bus, dev, sub_reg, 0, 1);
3316 }
3317 
3318 static struct resource *
3319 pci_alloc_secbus(device_t dev, device_t child, int *rid, u_long start,
3320     u_long end, u_long count, u_int flags)
3321 {
3322 	struct pci_devinfo *dinfo;
3323 	pcicfgregs *cfg;
3324 	struct resource_list *rl;
3325 	struct resource *res;
3326 	int sec_reg, sub_reg;
3327 
3328 	dinfo = device_get_ivars(child);
3329 	cfg = &dinfo->cfg;
3330 	rl = &dinfo->resources;
3331 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3332 	case PCIM_HDRTYPE_BRIDGE:
3333 		sec_reg = PCIR_SECBUS_1;
3334 		sub_reg = PCIR_SUBBUS_1;
3335 		break;
3336 	case PCIM_HDRTYPE_CARDBUS:
3337 		sec_reg = PCIR_SECBUS_2;
3338 		sub_reg = PCIR_SUBBUS_2;
3339 		break;
3340 	default:
3341 		return (NULL);
3342 	}
3343 
3344 	if (*rid != 0)
3345 		return (NULL);
3346 
3347 	if (resource_list_find(rl, PCI_RES_BUS, *rid) == NULL)
3348 		resource_list_add(rl, PCI_RES_BUS, *rid, start, end, count);
3349 	if (!resource_list_reserved(rl, PCI_RES_BUS, *rid)) {
3350 		res = resource_list_reserve(rl, dev, child, PCI_RES_BUS, rid,
3351 		    start, end, count, flags & ~RF_ACTIVE);
3352 		if (res == NULL) {
3353 			resource_list_delete(rl, PCI_RES_BUS, *rid);
3354 			device_printf(child, "allocating %lu bus%s failed\n",
3355 			    count, count == 1 ? "" : "es");
3356 			return (NULL);
3357 		}
3358 		if (bootverbose)
3359 			device_printf(child,
3360 			    "Lazy allocation of %lu bus%s at %lu\n", count,
3361 			    count == 1 ? "" : "es", rman_get_start(res));
3362 		PCI_WRITE_CONFIG(dev, child, sec_reg, rman_get_start(res), 1);
3363 		PCI_WRITE_CONFIG(dev, child, sub_reg, rman_get_end(res), 1);
3364 	}
3365 	return (resource_list_alloc(rl, dev, child, PCI_RES_BUS, rid, start,
3366 	    end, count, flags));
3367 }
3368 #endif
3369 
3370 void
3371 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3372 {
3373 	struct pci_devinfo *dinfo;
3374 	pcicfgregs *cfg;
3375 	struct resource_list *rl;
3376 	const struct pci_quirk *q;
3377 	uint32_t devid;
3378 	int i;
3379 
3380 	dinfo = device_get_ivars(dev);
3381 	cfg = &dinfo->cfg;
3382 	rl = &dinfo->resources;
3383 	devid = (cfg->device << 16) | cfg->vendor;
3384 
3385 	/* ATA devices needs special map treatment */
3386 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3387 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3388 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3389 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3390 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3391 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3392 	else
3393 		for (i = 0; i < cfg->nummaps;) {
3394 			/*
3395 			 * Skip quirked resources.
3396 			 */
3397 			for (q = &pci_quirks[0]; q->devid != 0; q++)
3398 				if (q->devid == devid &&
3399 				    q->type == PCI_QUIRK_UNMAP_REG &&
3400 				    q->arg1 == PCIR_BAR(i))
3401 					break;
3402 			if (q->devid != 0) {
3403 				i++;
3404 				continue;
3405 			}
3406 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3407 			    prefetchmask & (1 << i));
3408 		}
3409 
3410 	/*
3411 	 * Add additional, quirked resources.
3412 	 */
3413 	for (q = &pci_quirks[0]; q->devid != 0; q++)
3414 		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3415 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3416 
3417 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3418 #ifdef __PCI_REROUTE_INTERRUPT
3419 		/*
3420 		 * Try to re-route interrupts. Sometimes the BIOS or
3421 		 * firmware may leave bogus values in these registers.
3422 		 * If the re-route fails, then just stick with what we
3423 		 * have.
3424 		 */
3425 		pci_assign_interrupt(bus, dev, 1);
3426 #else
3427 		pci_assign_interrupt(bus, dev, 0);
3428 #endif
3429 	}
3430 
3431 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3432 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3433 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3434 			xhci_early_takeover(dev);
3435 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3436 			ehci_early_takeover(dev);
3437 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3438 			ohci_early_takeover(dev);
3439 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3440 			uhci_early_takeover(dev);
3441 	}
3442 
3443 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3444 	/*
3445 	 * Reserve resources for secondary bus ranges behind bridge
3446 	 * devices.
3447 	 */
3448 	pci_reserve_secbus(bus, dev, cfg, rl);
3449 #endif
3450 }
3451 
3452 static struct pci_devinfo *
3453 pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
3454     int slot, int func, size_t dinfo_size)
3455 {
3456 	struct pci_devinfo *dinfo;
3457 
3458 	dinfo = pci_read_device(pcib, domain, busno, slot, func, dinfo_size);
3459 	if (dinfo != NULL)
3460 		pci_add_child(dev, dinfo);
3461 
3462 	return (dinfo);
3463 }
3464 
3465 void
3466 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3467 {
3468 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3469 	device_t pcib = device_get_parent(dev);
3470 	struct pci_devinfo *dinfo;
3471 	int maxslots;
3472 	int s, f, pcifunchigh;
3473 	uint8_t hdrtype;
3474 	int first_func;
3475 
3476 	/*
3477 	 * Try to detect a device at slot 0, function 0.  If it exists, try to
3478 	 * enable ARI.  We must enable ARI before detecting the rest of the
3479 	 * functions on this bus as ARI changes the set of slots and functions
3480 	 * that are legal on this bus.
3481 	 */
3482 	dinfo = pci_identify_function(pcib, dev, domain, busno, 0, 0,
3483 	    dinfo_size);
3484 	if (dinfo != NULL && pci_enable_ari)
3485 		PCIB_TRY_ENABLE_ARI(pcib, dinfo->cfg.dev);
3486 
3487 	/*
3488 	 * Start looking for new devices on slot 0 at function 1 because we
3489 	 * just identified the device at slot 0, function 0.
3490 	 */
3491 	first_func = 1;
3492 
3493 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3494 	    ("dinfo_size too small"));
3495 	maxslots = PCIB_MAXSLOTS(pcib);
3496 	for (s = 0; s <= maxslots; s++, first_func = 0) {
3497 		pcifunchigh = 0;
3498 		f = 0;
3499 		DELAY(1);
3500 		hdrtype = REG(PCIR_HDRTYPE, 1);
3501 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3502 			continue;
3503 		if (hdrtype & PCIM_MFDEV)
3504 			pcifunchigh = PCIB_MAXFUNCS(pcib);
3505 		for (f = first_func; f <= pcifunchigh; f++)
3506 			pci_identify_function(pcib, dev, domain, busno, s, f,
3507 			    dinfo_size);
3508 	}
3509 #undef REG
3510 }
3511 
3512 void
3513 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3514 {
3515 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3516 	device_set_ivars(dinfo->cfg.dev, dinfo);
3517 	resource_list_init(&dinfo->resources);
3518 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3519 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3520 	pci_print_verbose(dinfo);
3521 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3522 	pci_child_added(dinfo->cfg.dev);
3523 }
3524 
3525 void
3526 pci_child_added_method(device_t dev, device_t child)
3527 {
3528 
3529 }
3530 
3531 static int
3532 pci_probe(device_t dev)
3533 {
3534 
3535 	device_set_desc(dev, "PCI bus");
3536 
3537 	/* Allow other subclasses to override this driver. */
3538 	return (BUS_PROBE_GENERIC);
3539 }
3540 
3541 int
3542 pci_attach_common(device_t dev)
3543 {
3544 	struct pci_softc *sc;
3545 	int busno, domain;
3546 #ifdef PCI_DMA_BOUNDARY
3547 	int error, tag_valid;
3548 #endif
3549 #ifdef PCI_RES_BUS
3550 	int rid;
3551 #endif
3552 
3553 	sc = device_get_softc(dev);
3554 	domain = pcib_get_domain(dev);
3555 	busno = pcib_get_bus(dev);
3556 #ifdef PCI_RES_BUS
3557 	rid = 0;
3558 	sc->sc_bus = bus_alloc_resource(dev, PCI_RES_BUS, &rid, busno, busno,
3559 	    1, 0);
3560 	if (sc->sc_bus == NULL) {
3561 		device_printf(dev, "failed to allocate bus number\n");
3562 		return (ENXIO);
3563 	}
3564 #endif
3565 	if (bootverbose)
3566 		device_printf(dev, "domain=%d, physical bus=%d\n",
3567 		    domain, busno);
3568 #ifdef PCI_DMA_BOUNDARY
3569 	tag_valid = 0;
3570 	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
3571 	    devclass_find("pci")) {
3572 		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
3573 		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3574 		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
3575 		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
3576 		if (error)
3577 			device_printf(dev, "Failed to create DMA tag: %d\n",
3578 			    error);
3579 		else
3580 			tag_valid = 1;
3581 	}
3582 	if (!tag_valid)
3583 #endif
3584 		sc->sc_dma_tag = bus_get_dma_tag(dev);
3585 	return (0);
3586 }
3587 
3588 static int
3589 pci_attach(device_t dev)
3590 {
3591 	int busno, domain, error;
3592 
3593 	error = pci_attach_common(dev);
3594 	if (error)
3595 		return (error);
3596 
3597 	/*
3598 	 * Since there can be multiple independantly numbered PCI
3599 	 * busses on systems with multiple PCI domains, we can't use
3600 	 * the unit number to decide which bus we are probing. We ask
3601 	 * the parent pcib what our domain and bus numbers are.
3602 	 */
3603 	domain = pcib_get_domain(dev);
3604 	busno = pcib_get_bus(dev);
3605 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3606 	return (bus_generic_attach(dev));
3607 }
3608 
3609 #ifdef PCI_RES_BUS
3610 static int
3611 pci_detach(device_t dev)
3612 {
3613 	struct pci_softc *sc;
3614 	int error;
3615 
3616 	error = bus_generic_detach(dev);
3617 	if (error)
3618 		return (error);
3619 	sc = device_get_softc(dev);
3620 	return (bus_release_resource(dev, PCI_RES_BUS, 0, sc->sc_bus));
3621 }
3622 #endif
3623 
3624 static void
3625 pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3626     int state)
3627 {
3628 	device_t child, pcib;
3629 	struct pci_devinfo *dinfo;
3630 	int dstate, i;
3631 
3632 	/*
3633 	 * Set the device to the given state.  If the firmware suggests
3634 	 * a different power state, use it instead.  If power management
3635 	 * is not present, the firmware is responsible for managing
3636 	 * device power.  Skip children who aren't attached since they
3637 	 * are handled separately.
3638 	 */
3639 	pcib = device_get_parent(dev);
3640 	for (i = 0; i < numdevs; i++) {
3641 		child = devlist[i];
3642 		dinfo = device_get_ivars(child);
3643 		dstate = state;
3644 		if (device_is_attached(child) &&
3645 		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3646 			pci_set_powerstate(child, dstate);
3647 	}
3648 }
3649 
3650 int
3651 pci_suspend(device_t dev)
3652 {
3653 	device_t child, *devlist;
3654 	struct pci_devinfo *dinfo;
3655 	int error, i, numdevs;
3656 
3657 	/*
3658 	 * Save the PCI configuration space for each child and set the
3659 	 * device in the appropriate power state for this sleep state.
3660 	 */
3661 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3662 		return (error);
3663 	for (i = 0; i < numdevs; i++) {
3664 		child = devlist[i];
3665 		dinfo = device_get_ivars(child);
3666 		pci_cfg_save(child, dinfo, 0);
3667 	}
3668 
3669 	/* Suspend devices before potentially powering them down. */
3670 	error = bus_generic_suspend(dev);
3671 	if (error) {
3672 		free(devlist, M_TEMP);
3673 		return (error);
3674 	}
3675 	if (pci_do_power_suspend)
3676 		pci_set_power_children(dev, devlist, numdevs,
3677 		    PCI_POWERSTATE_D3);
3678 	free(devlist, M_TEMP);
3679 	return (0);
3680 }
3681 
3682 int
3683 pci_resume(device_t dev)
3684 {
3685 	device_t child, *devlist;
3686 	struct pci_devinfo *dinfo;
3687 	int error, i, numdevs;
3688 
3689 	/*
3690 	 * Set each child to D0 and restore its PCI configuration space.
3691 	 */
3692 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3693 		return (error);
3694 	if (pci_do_power_resume)
3695 		pci_set_power_children(dev, devlist, numdevs,
3696 		    PCI_POWERSTATE_D0);
3697 
3698 	/* Now the device is powered up, restore its config space. */
3699 	for (i = 0; i < numdevs; i++) {
3700 		child = devlist[i];
3701 		dinfo = device_get_ivars(child);
3702 
3703 		pci_cfg_restore(child, dinfo);
3704 		if (!device_is_attached(child))
3705 			pci_cfg_save(child, dinfo, 1);
3706 	}
3707 
3708 	/*
3709 	 * Resume critical devices first, then everything else later.
3710 	 */
3711 	for (i = 0; i < numdevs; i++) {
3712 		child = devlist[i];
3713 		switch (pci_get_class(child)) {
3714 		case PCIC_DISPLAY:
3715 		case PCIC_MEMORY:
3716 		case PCIC_BRIDGE:
3717 		case PCIC_BASEPERIPH:
3718 			DEVICE_RESUME(child);
3719 			break;
3720 		}
3721 	}
3722 	for (i = 0; i < numdevs; i++) {
3723 		child = devlist[i];
3724 		switch (pci_get_class(child)) {
3725 		case PCIC_DISPLAY:
3726 		case PCIC_MEMORY:
3727 		case PCIC_BRIDGE:
3728 		case PCIC_BASEPERIPH:
3729 			break;
3730 		default:
3731 			DEVICE_RESUME(child);
3732 		}
3733 	}
3734 	free(devlist, M_TEMP);
3735 	return (0);
3736 }
3737 
3738 static void
3739 pci_load_vendor_data(void)
3740 {
3741 	caddr_t data;
3742 	void *ptr;
3743 	size_t sz;
3744 
3745 	data = preload_search_by_type("pci_vendor_data");
3746 	if (data != NULL) {
3747 		ptr = preload_fetch_addr(data);
3748 		sz = preload_fetch_size(data);
3749 		if (ptr != NULL && sz != 0) {
3750 			pci_vendordata = ptr;
3751 			pci_vendordata_size = sz;
3752 			/* terminate the database */
3753 			pci_vendordata[pci_vendordata_size] = '\n';
3754 		}
3755 	}
3756 }
3757 
3758 void
3759 pci_driver_added(device_t dev, driver_t *driver)
3760 {
3761 	int numdevs;
3762 	device_t *devlist;
3763 	device_t child;
3764 	struct pci_devinfo *dinfo;
3765 	int i;
3766 
3767 	if (bootverbose)
3768 		device_printf(dev, "driver added\n");
3769 	DEVICE_IDENTIFY(driver, dev);
3770 	if (device_get_children(dev, &devlist, &numdevs) != 0)
3771 		return;
3772 	for (i = 0; i < numdevs; i++) {
3773 		child = devlist[i];
3774 		if (device_get_state(child) != DS_NOTPRESENT)
3775 			continue;
3776 		dinfo = device_get_ivars(child);
3777 		pci_print_verbose(dinfo);
3778 		if (bootverbose)
3779 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3780 		pci_cfg_restore(child, dinfo);
3781 		if (device_probe_and_attach(child) != 0)
3782 			pci_child_detached(dev, child);
3783 	}
3784 	free(devlist, M_TEMP);
3785 }
3786 
3787 int
3788 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3789     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3790 {
3791 	struct pci_devinfo *dinfo;
3792 	struct msix_table_entry *mte;
3793 	struct msix_vector *mv;
3794 	uint64_t addr;
3795 	uint32_t data;
3796 	void *cookie;
3797 	int error, rid;
3798 
3799 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3800 	    arg, &cookie);
3801 	if (error)
3802 		return (error);
3803 
3804 	/* If this is not a direct child, just bail out. */
3805 	if (device_get_parent(child) != dev) {
3806 		*cookiep = cookie;
3807 		return(0);
3808 	}
3809 
3810 	rid = rman_get_rid(irq);
3811 	if (rid == 0) {
3812 		/* Make sure that INTx is enabled */
3813 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3814 	} else {
3815 		/*
3816 		 * Check to see if the interrupt is MSI or MSI-X.
3817 		 * Ask our parent to map the MSI and give
3818 		 * us the address and data register values.
3819 		 * If we fail for some reason, teardown the
3820 		 * interrupt handler.
3821 		 */
3822 		dinfo = device_get_ivars(child);
3823 		if (dinfo->cfg.msi.msi_alloc > 0) {
3824 			if (dinfo->cfg.msi.msi_addr == 0) {
3825 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3826 			    ("MSI has handlers, but vectors not mapped"));
3827 				error = PCIB_MAP_MSI(device_get_parent(dev),
3828 				    child, rman_get_start(irq), &addr, &data);
3829 				if (error)
3830 					goto bad;
3831 				dinfo->cfg.msi.msi_addr = addr;
3832 				dinfo->cfg.msi.msi_data = data;
3833 			}
3834 			if (dinfo->cfg.msi.msi_handlers == 0)
3835 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3836 				    dinfo->cfg.msi.msi_data);
3837 			dinfo->cfg.msi.msi_handlers++;
3838 		} else {
3839 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3840 			    ("No MSI or MSI-X interrupts allocated"));
3841 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3842 			    ("MSI-X index too high"));
3843 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3844 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3845 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3846 			KASSERT(mv->mv_irq == rman_get_start(irq),
3847 			    ("IRQ mismatch"));
3848 			if (mv->mv_address == 0) {
3849 				KASSERT(mte->mte_handlers == 0,
3850 		    ("MSI-X table entry has handlers, but vector not mapped"));
3851 				error = PCIB_MAP_MSI(device_get_parent(dev),
3852 				    child, rman_get_start(irq), &addr, &data);
3853 				if (error)
3854 					goto bad;
3855 				mv->mv_address = addr;
3856 				mv->mv_data = data;
3857 			}
3858 			if (mte->mte_handlers == 0) {
3859 				pci_enable_msix(child, rid - 1, mv->mv_address,
3860 				    mv->mv_data);
3861 				pci_unmask_msix(child, rid - 1);
3862 			}
3863 			mte->mte_handlers++;
3864 		}
3865 
3866 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3867 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3868 	bad:
3869 		if (error) {
3870 			(void)bus_generic_teardown_intr(dev, child, irq,
3871 			    cookie);
3872 			return (error);
3873 		}
3874 	}
3875 	*cookiep = cookie;
3876 	return (0);
3877 }
3878 
3879 int
3880 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3881     void *cookie)
3882 {
3883 	struct msix_table_entry *mte;
3884 	struct resource_list_entry *rle;
3885 	struct pci_devinfo *dinfo;
3886 	int error, rid;
3887 
3888 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3889 		return (EINVAL);
3890 
3891 	/* If this isn't a direct child, just bail out */
3892 	if (device_get_parent(child) != dev)
3893 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3894 
3895 	rid = rman_get_rid(irq);
3896 	if (rid == 0) {
3897 		/* Mask INTx */
3898 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3899 	} else {
3900 		/*
3901 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3902 		 * decrement the appropriate handlers count and mask the
3903 		 * MSI-X message, or disable MSI messages if the count
3904 		 * drops to 0.
3905 		 */
3906 		dinfo = device_get_ivars(child);
3907 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3908 		if (rle->res != irq)
3909 			return (EINVAL);
3910 		if (dinfo->cfg.msi.msi_alloc > 0) {
3911 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3912 			    ("MSI-X index too high"));
3913 			if (dinfo->cfg.msi.msi_handlers == 0)
3914 				return (EINVAL);
3915 			dinfo->cfg.msi.msi_handlers--;
3916 			if (dinfo->cfg.msi.msi_handlers == 0)
3917 				pci_disable_msi(child);
3918 		} else {
3919 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3920 			    ("No MSI or MSI-X interrupts allocated"));
3921 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3922 			    ("MSI-X index too high"));
3923 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3924 			if (mte->mte_handlers == 0)
3925 				return (EINVAL);
3926 			mte->mte_handlers--;
3927 			if (mte->mte_handlers == 0)
3928 				pci_mask_msix(child, rid - 1);
3929 		}
3930 	}
3931 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3932 	if (rid > 0)
3933 		KASSERT(error == 0,
3934 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3935 	return (error);
3936 }
3937 
3938 int
3939 pci_print_child(device_t dev, device_t child)
3940 {
3941 	struct pci_devinfo *dinfo;
3942 	struct resource_list *rl;
3943 	int retval = 0;
3944 
3945 	dinfo = device_get_ivars(child);
3946 	rl = &dinfo->resources;
3947 
3948 	retval += bus_print_child_header(dev, child);
3949 
3950 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3951 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3952 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3953 	if (device_get_flags(dev))
3954 		retval += printf(" flags %#x", device_get_flags(dev));
3955 
3956 	retval += printf(" at device %d.%d", pci_get_slot(child),
3957 	    pci_get_function(child));
3958 
3959 	retval += bus_print_child_footer(dev, child);
3960 
3961 	return (retval);
3962 }
3963 
3964 static const struct
3965 {
3966 	int		class;
3967 	int		subclass;
3968 	int		report; /* 0 = bootverbose, 1 = always */
3969 	const char	*desc;
3970 } pci_nomatch_tab[] = {
3971 	{PCIC_OLD,		-1,			1, "old"},
3972 	{PCIC_OLD,		PCIS_OLD_NONVGA,	1, "non-VGA display device"},
3973 	{PCIC_OLD,		PCIS_OLD_VGA,		1, "VGA-compatible display device"},
3974 	{PCIC_STORAGE,		-1,			1, "mass storage"},
3975 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	1, "SCSI"},
3976 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	1, "ATA"},
3977 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	1, "floppy disk"},
3978 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	1, "IPI"},
3979 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	1, "RAID"},
3980 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	1, "ATA (ADMA)"},
3981 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	1, "SATA"},
3982 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	1, "SAS"},
3983 	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	1, "NVM"},
3984 	{PCIC_NETWORK,		-1,			1, "network"},
3985 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	1, "ethernet"},
3986 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	1, "token ring"},
3987 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	1, "fddi"},
3988 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	1, "ATM"},
3989 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	1, "ISDN"},
3990 	{PCIC_DISPLAY,		-1,			1, "display"},
3991 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	1, "VGA"},
3992 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	1, "XGA"},
3993 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	1, "3D"},
3994 	{PCIC_MULTIMEDIA,	-1,			1, "multimedia"},
3995 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	1, "video"},
3996 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	1, "audio"},
3997 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	1, "telephony"},
3998 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	1, "HDA"},
3999 	{PCIC_MEMORY,		-1,			1, "memory"},
4000 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	1, "RAM"},
4001 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	1, "flash"},
4002 	{PCIC_BRIDGE,		-1,			1, "bridge"},
4003 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	1, "HOST-PCI"},
4004 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	1, "PCI-ISA"},
4005 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	1, "PCI-EISA"},
4006 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	1, "PCI-MCA"},
4007 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	1, "PCI-PCI"},
4008 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	1, "PCI-PCMCIA"},
4009 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	1, "PCI-NuBus"},
4010 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	1, "PCI-CardBus"},
4011 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	1, "PCI-RACEway"},
4012 	{PCIC_SIMPLECOMM,	-1,			1, "simple comms"},
4013 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	1, "UART"},	/* could detect 16550 */
4014 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	1, "parallel port"},
4015 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	1, "multiport serial"},
4016 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	1, "generic modem"},
4017 	{PCIC_BASEPERIPH,	-1,			0, "base peripheral"},
4018 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	1, "interrupt controller"},
4019 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	1, "DMA controller"},
4020 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	1, "timer"},
4021 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	1, "realtime clock"},
4022 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	1, "PCI hot-plug controller"},
4023 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	1, "SD host controller"},
4024 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_IOMMU,	1, "IOMMU"},
4025 	{PCIC_INPUTDEV,		-1,			1, "input device"},
4026 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	1, "keyboard"},
4027 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,1, "digitizer"},
4028 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	1, "mouse"},
4029 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	1, "scanner"},
4030 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	1, "gameport"},
4031 	{PCIC_DOCKING,		-1,			1, "docking station"},
4032 	{PCIC_PROCESSOR,	-1,			1, "processor"},
4033 	{PCIC_SERIALBUS,	-1,			1, "serial bus"},
4034 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	1, "FireWire"},
4035 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	1, "AccessBus"},
4036 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	1, "SSA"},
4037 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	1, "USB"},
4038 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	1, "Fibre Channel"},
4039 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	0, "SMBus"},
4040 	{PCIC_WIRELESS,		-1,			1, "wireless controller"},
4041 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	1, "iRDA"},
4042 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	1, "IR"},
4043 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	1, "RF"},
4044 	{PCIC_INTELLIIO,	-1,			1, "intelligent I/O controller"},
4045 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	1, "I2O"},
4046 	{PCIC_SATCOM,		-1,			1, "satellite communication"},
4047 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		1, "sat TV"},
4048 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	1, "sat audio"},
4049 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	1, "sat voice"},
4050 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	1, "sat data"},
4051 	{PCIC_CRYPTO,		-1,			1, "encrypt/decrypt"},
4052 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	1, "network/computer crypto"},
4053 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	1, "entertainment crypto"},
4054 	{PCIC_DASP,		-1,			0, "dasp"},
4055 	{PCIC_DASP,		PCIS_DASP_DPIO,		1, "DPIO module"},
4056 	{0, 0, 0,		NULL}
4057 };
4058 
4059 void
4060 pci_probe_nomatch(device_t dev, device_t child)
4061 {
4062 	int i, report;
4063 	const char *cp, *scp;
4064 	char *device;
4065 
4066 	/*
4067 	 * Look for a listing for this device in a loaded device database.
4068 	 */
4069 	report = 1;
4070 	if ((device = pci_describe_device(child)) != NULL) {
4071 		device_printf(dev, "<%s>", device);
4072 		free(device, M_DEVBUF);
4073 	} else {
4074 		/*
4075 		 * Scan the class/subclass descriptions for a general
4076 		 * description.
4077 		 */
4078 		cp = "unknown";
4079 		scp = NULL;
4080 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
4081 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
4082 				if (pci_nomatch_tab[i].subclass == -1) {
4083 					cp = pci_nomatch_tab[i].desc;
4084 					report = pci_nomatch_tab[i].report;
4085 				} else if (pci_nomatch_tab[i].subclass ==
4086 				    pci_get_subclass(child)) {
4087 					scp = pci_nomatch_tab[i].desc;
4088 					report = pci_nomatch_tab[i].report;
4089 				}
4090 			}
4091 		}
4092 		if (report || bootverbose) {
4093 			device_printf(dev, "<%s%s%s>",
4094 			    cp ? cp : "",
4095 			    ((cp != NULL) && (scp != NULL)) ? ", " : "",
4096 			    scp ? scp : "");
4097 		}
4098 	}
4099 	if (report || bootverbose) {
4100 		printf(" at device %d.%d (no driver attached)\n",
4101 		    pci_get_slot(child), pci_get_function(child));
4102 	}
4103 	pci_cfg_save(child, device_get_ivars(child), 1);
4104 }
4105 
4106 void
4107 pci_child_detached(device_t dev, device_t child)
4108 {
4109 	struct pci_devinfo *dinfo;
4110 	struct resource_list *rl;
4111 
4112 	dinfo = device_get_ivars(child);
4113 	rl = &dinfo->resources;
4114 
4115 	/*
4116 	 * Have to deallocate IRQs before releasing any MSI messages and
4117 	 * have to release MSI messages before deallocating any memory
4118 	 * BARs.
4119 	 */
4120 	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
4121 		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
4122 	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
4123 		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
4124 		(void)pci_release_msi(child);
4125 	}
4126 	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
4127 		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
4128 	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
4129 		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
4130 #ifdef PCI_RES_BUS
4131 	if (resource_list_release_active(rl, dev, child, PCI_RES_BUS) != 0)
4132 		pci_printf(&dinfo->cfg, "Device leaked PCI bus numbers\n");
4133 #endif
4134 
4135 	pci_cfg_save(child, dinfo, 1);
4136 }
4137 
4138 /*
4139  * Parse the PCI device database, if loaded, and return a pointer to a
4140  * description of the device.
4141  *
4142  * The database is flat text formatted as follows:
4143  *
4144  * Any line not in a valid format is ignored.
4145  * Lines are terminated with newline '\n' characters.
4146  *
4147  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
4148  * the vendor name.
4149  *
4150  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
4151  * - devices cannot be listed without a corresponding VENDOR line.
4152  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
4153  * another TAB, then the device name.
4154  */
4155 
4156 /*
4157  * Assuming (ptr) points to the beginning of a line in the database,
4158  * return the vendor or device and description of the next entry.
4159  * The value of (vendor) or (device) inappropriate for the entry type
4160  * is set to -1.  Returns nonzero at the end of the database.
4161  *
4162  * Note that this is slightly unrobust in the face of corrupt data;
4163  * we attempt to safeguard against this by spamming the end of the
4164  * database with a newline when we initialise.
4165  */
4166 static int
4167 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
4168 {
4169 	char	*cp = *ptr;
4170 	int	left;
4171 
4172 	*device = -1;
4173 	*vendor = -1;
4174 	**desc = '\0';
4175 	for (;;) {
4176 		left = pci_vendordata_size - (cp - pci_vendordata);
4177 		if (left <= 0) {
4178 			*ptr = cp;
4179 			return(1);
4180 		}
4181 
4182 		/* vendor entry? */
4183 		if (*cp != '\t' &&
4184 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
4185 			break;
4186 		/* device entry? */
4187 		if (*cp == '\t' &&
4188 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
4189 			break;
4190 
4191 		/* skip to next line */
4192 		while (*cp != '\n' && left > 0) {
4193 			cp++;
4194 			left--;
4195 		}
4196 		if (*cp == '\n') {
4197 			cp++;
4198 			left--;
4199 		}
4200 	}
4201 	/* skip to next line */
4202 	while (*cp != '\n' && left > 0) {
4203 		cp++;
4204 		left--;
4205 	}
4206 	if (*cp == '\n' && left > 0)
4207 		cp++;
4208 	*ptr = cp;
4209 	return(0);
4210 }
4211 
4212 static char *
4213 pci_describe_device(device_t dev)
4214 {
4215 	int	vendor, device;
4216 	char	*desc, *vp, *dp, *line;
4217 
4218 	desc = vp = dp = NULL;
4219 
4220 	/*
4221 	 * If we have no vendor data, we can't do anything.
4222 	 */
4223 	if (pci_vendordata == NULL)
4224 		goto out;
4225 
4226 	/*
4227 	 * Scan the vendor data looking for this device
4228 	 */
4229 	line = pci_vendordata;
4230 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4231 		goto out;
4232 	for (;;) {
4233 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
4234 			goto out;
4235 		if (vendor == pci_get_vendor(dev))
4236 			break;
4237 	}
4238 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4239 		goto out;
4240 	for (;;) {
4241 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
4242 			*dp = 0;
4243 			break;
4244 		}
4245 		if (vendor != -1) {
4246 			*dp = 0;
4247 			break;
4248 		}
4249 		if (device == pci_get_device(dev))
4250 			break;
4251 	}
4252 	if (dp[0] == '\0')
4253 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
4254 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
4255 	    NULL)
4256 		sprintf(desc, "%s, %s", vp, dp);
4257 out:
4258 	if (vp != NULL)
4259 		free(vp, M_DEVBUF);
4260 	if (dp != NULL)
4261 		free(dp, M_DEVBUF);
4262 	return(desc);
4263 }
4264 
4265 int
4266 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
4267 {
4268 	struct pci_devinfo *dinfo;
4269 	pcicfgregs *cfg;
4270 
4271 	dinfo = device_get_ivars(child);
4272 	cfg = &dinfo->cfg;
4273 
4274 	switch (which) {
4275 	case PCI_IVAR_ETHADDR:
4276 		/*
4277 		 * The generic accessor doesn't deal with failure, so
4278 		 * we set the return value, then return an error.
4279 		 */
4280 		*((uint8_t **) result) = NULL;
4281 		return (EINVAL);
4282 	case PCI_IVAR_SUBVENDOR:
4283 		*result = cfg->subvendor;
4284 		break;
4285 	case PCI_IVAR_SUBDEVICE:
4286 		*result = cfg->subdevice;
4287 		break;
4288 	case PCI_IVAR_VENDOR:
4289 		*result = cfg->vendor;
4290 		break;
4291 	case PCI_IVAR_DEVICE:
4292 		*result = cfg->device;
4293 		break;
4294 	case PCI_IVAR_DEVID:
4295 		*result = (cfg->device << 16) | cfg->vendor;
4296 		break;
4297 	case PCI_IVAR_CLASS:
4298 		*result = cfg->baseclass;
4299 		break;
4300 	case PCI_IVAR_SUBCLASS:
4301 		*result = cfg->subclass;
4302 		break;
4303 	case PCI_IVAR_PROGIF:
4304 		*result = cfg->progif;
4305 		break;
4306 	case PCI_IVAR_REVID:
4307 		*result = cfg->revid;
4308 		break;
4309 	case PCI_IVAR_INTPIN:
4310 		*result = cfg->intpin;
4311 		break;
4312 	case PCI_IVAR_IRQ:
4313 		*result = cfg->intline;
4314 		break;
4315 	case PCI_IVAR_DOMAIN:
4316 		*result = cfg->domain;
4317 		break;
4318 	case PCI_IVAR_BUS:
4319 		*result = cfg->bus;
4320 		break;
4321 	case PCI_IVAR_SLOT:
4322 		*result = cfg->slot;
4323 		break;
4324 	case PCI_IVAR_FUNCTION:
4325 		*result = cfg->func;
4326 		break;
4327 	case PCI_IVAR_CMDREG:
4328 		*result = cfg->cmdreg;
4329 		break;
4330 	case PCI_IVAR_CACHELNSZ:
4331 		*result = cfg->cachelnsz;
4332 		break;
4333 	case PCI_IVAR_MINGNT:
4334 		*result = cfg->mingnt;
4335 		break;
4336 	case PCI_IVAR_MAXLAT:
4337 		*result = cfg->maxlat;
4338 		break;
4339 	case PCI_IVAR_LATTIMER:
4340 		*result = cfg->lattimer;
4341 		break;
4342 	default:
4343 		return (ENOENT);
4344 	}
4345 	return (0);
4346 }
4347 
4348 int
4349 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4350 {
4351 	struct pci_devinfo *dinfo;
4352 
4353 	dinfo = device_get_ivars(child);
4354 
4355 	switch (which) {
4356 	case PCI_IVAR_INTPIN:
4357 		dinfo->cfg.intpin = value;
4358 		return (0);
4359 	case PCI_IVAR_ETHADDR:
4360 	case PCI_IVAR_SUBVENDOR:
4361 	case PCI_IVAR_SUBDEVICE:
4362 	case PCI_IVAR_VENDOR:
4363 	case PCI_IVAR_DEVICE:
4364 	case PCI_IVAR_DEVID:
4365 	case PCI_IVAR_CLASS:
4366 	case PCI_IVAR_SUBCLASS:
4367 	case PCI_IVAR_PROGIF:
4368 	case PCI_IVAR_REVID:
4369 	case PCI_IVAR_IRQ:
4370 	case PCI_IVAR_DOMAIN:
4371 	case PCI_IVAR_BUS:
4372 	case PCI_IVAR_SLOT:
4373 	case PCI_IVAR_FUNCTION:
4374 		return (EINVAL);	/* disallow for now */
4375 
4376 	default:
4377 		return (ENOENT);
4378 	}
4379 }
4380 
4381 #include "opt_ddb.h"
4382 #ifdef DDB
4383 #include <ddb/ddb.h>
4384 #include <sys/cons.h>
4385 
4386 /*
4387  * List resources based on pci map registers, used for within ddb
4388  */
4389 
4390 DB_SHOW_COMMAND(pciregs, db_pci_dump)
4391 {
4392 	struct pci_devinfo *dinfo;
4393 	struct devlist *devlist_head;
4394 	struct pci_conf *p;
4395 	const char *name;
4396 	int i, error, none_count;
4397 
4398 	none_count = 0;
4399 	/* get the head of the device queue */
4400 	devlist_head = &pci_devq;
4401 
4402 	/*
4403 	 * Go through the list of devices and print out devices
4404 	 */
4405 	for (error = 0, i = 0,
4406 	     dinfo = STAILQ_FIRST(devlist_head);
4407 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4408 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4409 
4410 		/* Populate pd_name and pd_unit */
4411 		name = NULL;
4412 		if (dinfo->cfg.dev)
4413 			name = device_get_name(dinfo->cfg.dev);
4414 
4415 		p = &dinfo->conf;
4416 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4417 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4418 			(name && *name) ? name : "none",
4419 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4420 			none_count++,
4421 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4422 			p->pc_sel.pc_func, (p->pc_class << 16) |
4423 			(p->pc_subclass << 8) | p->pc_progif,
4424 			(p->pc_subdevice << 16) | p->pc_subvendor,
4425 			(p->pc_device << 16) | p->pc_vendor,
4426 			p->pc_revid, p->pc_hdr);
4427 	}
4428 }
4429 #endif /* DDB */
4430 
4431 static struct resource *
4432 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4433     u_long start, u_long end, u_long count, u_int flags)
4434 {
4435 	struct pci_devinfo *dinfo = device_get_ivars(child);
4436 	struct resource_list *rl = &dinfo->resources;
4437 	struct resource *res;
4438 	struct pci_map *pm;
4439 	pci_addr_t map, testval;
4440 	int mapsize;
4441 
4442 	res = NULL;
4443 	pm = pci_find_bar(child, *rid);
4444 	if (pm != NULL) {
4445 		/* This is a BAR that we failed to allocate earlier. */
4446 		mapsize = pm->pm_size;
4447 		map = pm->pm_value;
4448 	} else {
4449 		/*
4450 		 * Weed out the bogons, and figure out how large the
4451 		 * BAR/map is.  BARs that read back 0 here are bogus
4452 		 * and unimplemented.  Note: atapci in legacy mode are
4453 		 * special and handled elsewhere in the code.  If you
4454 		 * have a atapci device in legacy mode and it fails
4455 		 * here, that other code is broken.
4456 		 */
4457 		pci_read_bar(child, *rid, &map, &testval);
4458 
4459 		/*
4460 		 * Determine the size of the BAR and ignore BARs with a size
4461 		 * of 0.  Device ROM BARs use a different mask value.
4462 		 */
4463 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
4464 			mapsize = pci_romsize(testval);
4465 		else
4466 			mapsize = pci_mapsize(testval);
4467 		if (mapsize == 0)
4468 			goto out;
4469 		pm = pci_add_bar(child, *rid, map, mapsize);
4470 	}
4471 
4472 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
4473 		if (type != SYS_RES_MEMORY) {
4474 			if (bootverbose)
4475 				device_printf(dev,
4476 				    "child %s requested type %d for rid %#x,"
4477 				    " but the BAR says it is an memio\n",
4478 				    device_get_nameunit(child), type, *rid);
4479 			goto out;
4480 		}
4481 	} else {
4482 		if (type != SYS_RES_IOPORT) {
4483 			if (bootverbose)
4484 				device_printf(dev,
4485 				    "child %s requested type %d for rid %#x,"
4486 				    " but the BAR says it is an ioport\n",
4487 				    device_get_nameunit(child), type, *rid);
4488 			goto out;
4489 		}
4490 	}
4491 
4492 	/*
4493 	 * For real BARs, we need to override the size that
4494 	 * the driver requests, because that's what the BAR
4495 	 * actually uses and we would otherwise have a
4496 	 * situation where we might allocate the excess to
4497 	 * another driver, which won't work.
4498 	 */
4499 	count = (pci_addr_t)1 << mapsize;
4500 	if (RF_ALIGNMENT(flags) < mapsize)
4501 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
4502 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
4503 		flags |= RF_PREFETCHABLE;
4504 
4505 	/*
4506 	 * Allocate enough resource, and then write back the
4507 	 * appropriate BAR for that resource.
4508 	 */
4509 	resource_list_add(rl, type, *rid, start, end, count);
4510 	res = resource_list_reserve(rl, dev, child, type, rid, start, end,
4511 	    count, flags & ~RF_ACTIVE);
4512 	if (res == NULL) {
4513 		resource_list_delete(rl, type, *rid);
4514 		device_printf(child,
4515 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4516 		    count, *rid, type, start, end);
4517 		goto out;
4518 	}
4519 	if (bootverbose)
4520 		device_printf(child,
4521 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4522 		    count, *rid, type, rman_get_start(res));
4523 	map = rman_get_start(res);
4524 	pci_write_bar(child, pm, map);
4525 out:
4526 	return (res);
4527 }
4528 
4529 struct resource *
4530 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4531 		   u_long start, u_long end, u_long count, u_int flags)
4532 {
4533 	struct pci_devinfo *dinfo;
4534 	struct resource_list *rl;
4535 	struct resource_list_entry *rle;
4536 	struct resource *res;
4537 	pcicfgregs *cfg;
4538 
4539 	if (device_get_parent(child) != dev)
4540 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4541 		    type, rid, start, end, count, flags));
4542 
4543 	/*
4544 	 * Perform lazy resource allocation
4545 	 */
4546 	dinfo = device_get_ivars(child);
4547 	rl = &dinfo->resources;
4548 	cfg = &dinfo->cfg;
4549 	switch (type) {
4550 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
4551 	case PCI_RES_BUS:
4552 		return (pci_alloc_secbus(dev, child, rid, start, end, count,
4553 		    flags));
4554 #endif
4555 	case SYS_RES_IRQ:
4556 		/*
4557 		 * Can't alloc legacy interrupt once MSI messages have
4558 		 * been allocated.
4559 		 */
4560 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4561 		    cfg->msix.msix_alloc > 0))
4562 			return (NULL);
4563 
4564 		/*
4565 		 * If the child device doesn't have an interrupt
4566 		 * routed and is deserving of an interrupt, try to
4567 		 * assign it one.
4568 		 */
4569 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4570 		    (cfg->intpin != 0))
4571 			pci_assign_interrupt(dev, child, 0);
4572 		break;
4573 	case SYS_RES_IOPORT:
4574 	case SYS_RES_MEMORY:
4575 #ifdef NEW_PCIB
4576 		/*
4577 		 * PCI-PCI bridge I/O window resources are not BARs.
4578 		 * For those allocations just pass the request up the
4579 		 * tree.
4580 		 */
4581 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4582 			switch (*rid) {
4583 			case PCIR_IOBASEL_1:
4584 			case PCIR_MEMBASE_1:
4585 			case PCIR_PMBASEL_1:
4586 				/*
4587 				 * XXX: Should we bother creating a resource
4588 				 * list entry?
4589 				 */
4590 				return (bus_generic_alloc_resource(dev, child,
4591 				    type, rid, start, end, count, flags));
4592 			}
4593 		}
4594 #endif
4595 		/* Reserve resources for this BAR if needed. */
4596 		rle = resource_list_find(rl, type, *rid);
4597 		if (rle == NULL) {
4598 			res = pci_reserve_map(dev, child, type, rid, start, end,
4599 			    count, flags);
4600 			if (res == NULL)
4601 				return (NULL);
4602 		}
4603 	}
4604 	return (resource_list_alloc(rl, dev, child, type, rid,
4605 	    start, end, count, flags));
4606 }
4607 
4608 int
4609 pci_release_resource(device_t dev, device_t child, int type, int rid,
4610     struct resource *r)
4611 {
4612 	struct pci_devinfo *dinfo;
4613 	struct resource_list *rl;
4614 	pcicfgregs *cfg;
4615 
4616 	if (device_get_parent(child) != dev)
4617 		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
4618 		    type, rid, r));
4619 
4620 	dinfo = device_get_ivars(child);
4621 	cfg = &dinfo->cfg;
4622 #ifdef NEW_PCIB
4623 	/*
4624 	 * PCI-PCI bridge I/O window resources are not BARs.  For
4625 	 * those allocations just pass the request up the tree.
4626 	 */
4627 	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
4628 	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
4629 		switch (rid) {
4630 		case PCIR_IOBASEL_1:
4631 		case PCIR_MEMBASE_1:
4632 		case PCIR_PMBASEL_1:
4633 			return (bus_generic_release_resource(dev, child, type,
4634 			    rid, r));
4635 		}
4636 	}
4637 #endif
4638 
4639 	rl = &dinfo->resources;
4640 	return (resource_list_release(rl, dev, child, type, rid, r));
4641 }
4642 
4643 int
4644 pci_activate_resource(device_t dev, device_t child, int type, int rid,
4645     struct resource *r)
4646 {
4647 	struct pci_devinfo *dinfo;
4648 	int error;
4649 
4650 	error = bus_generic_activate_resource(dev, child, type, rid, r);
4651 	if (error)
4652 		return (error);
4653 
4654 	/* Enable decoding in the command register when activating BARs. */
4655 	if (device_get_parent(child) == dev) {
4656 		/* Device ROMs need their decoding explicitly enabled. */
4657 		dinfo = device_get_ivars(child);
4658 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4659 			pci_write_bar(child, pci_find_bar(child, rid),
4660 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4661 		switch (type) {
4662 		case SYS_RES_IOPORT:
4663 		case SYS_RES_MEMORY:
4664 			error = PCI_ENABLE_IO(dev, child, type);
4665 			break;
4666 		}
4667 	}
4668 	return (error);
4669 }
4670 
4671 int
4672 pci_deactivate_resource(device_t dev, device_t child, int type,
4673     int rid, struct resource *r)
4674 {
4675 	struct pci_devinfo *dinfo;
4676 	int error;
4677 
4678 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4679 	if (error)
4680 		return (error);
4681 
4682 	/* Disable decoding for device ROMs. */
4683 	if (device_get_parent(child) == dev) {
4684 		dinfo = device_get_ivars(child);
4685 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4686 			pci_write_bar(child, pci_find_bar(child, rid),
4687 			    rman_get_start(r));
4688 	}
4689 	return (0);
4690 }
4691 
4692 void
4693 pci_delete_child(device_t dev, device_t child)
4694 {
4695 	struct resource_list_entry *rle;
4696 	struct resource_list *rl;
4697 	struct pci_devinfo *dinfo;
4698 
4699 	dinfo = device_get_ivars(child);
4700 	rl = &dinfo->resources;
4701 
4702 	if (device_is_attached(child))
4703 		device_detach(child);
4704 
4705 	/* Turn off access to resources we're about to free */
4706 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4707 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4708 
4709 	/* Free all allocated resources */
4710 	STAILQ_FOREACH(rle, rl, link) {
4711 		if (rle->res) {
4712 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4713 			    resource_list_busy(rl, rle->type, rle->rid)) {
4714 				pci_printf(&dinfo->cfg,
4715 				    "Resource still owned, oops. "
4716 				    "(type=%d, rid=%d, addr=%lx)\n",
4717 				    rle->type, rle->rid,
4718 				    rman_get_start(rle->res));
4719 				bus_release_resource(child, rle->type, rle->rid,
4720 				    rle->res);
4721 			}
4722 			resource_list_unreserve(rl, dev, child, rle->type,
4723 			    rle->rid);
4724 		}
4725 	}
4726 	resource_list_free(rl);
4727 
4728 	device_delete_child(dev, child);
4729 	pci_freecfg(dinfo);
4730 }
4731 
4732 void
4733 pci_delete_resource(device_t dev, device_t child, int type, int rid)
4734 {
4735 	struct pci_devinfo *dinfo;
4736 	struct resource_list *rl;
4737 	struct resource_list_entry *rle;
4738 
4739 	if (device_get_parent(child) != dev)
4740 		return;
4741 
4742 	dinfo = device_get_ivars(child);
4743 	rl = &dinfo->resources;
4744 	rle = resource_list_find(rl, type, rid);
4745 	if (rle == NULL)
4746 		return;
4747 
4748 	if (rle->res) {
4749 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4750 		    resource_list_busy(rl, type, rid)) {
4751 			device_printf(dev, "delete_resource: "
4752 			    "Resource still owned by child, oops. "
4753 			    "(type=%d, rid=%d, addr=%lx)\n",
4754 			    type, rid, rman_get_start(rle->res));
4755 			return;
4756 		}
4757 		resource_list_unreserve(rl, dev, child, type, rid);
4758 	}
4759 	resource_list_delete(rl, type, rid);
4760 }
4761 
4762 struct resource_list *
4763 pci_get_resource_list (device_t dev, device_t child)
4764 {
4765 	struct pci_devinfo *dinfo = device_get_ivars(child);
4766 
4767 	return (&dinfo->resources);
4768 }
4769 
4770 bus_dma_tag_t
4771 pci_get_dma_tag(device_t bus, device_t dev)
4772 {
4773 	struct pci_softc *sc = device_get_softc(bus);
4774 
4775 	return (sc->sc_dma_tag);
4776 }
4777 
4778 uint32_t
4779 pci_read_config_method(device_t dev, device_t child, int reg, int width)
4780 {
4781 	struct pci_devinfo *dinfo = device_get_ivars(child);
4782 	pcicfgregs *cfg = &dinfo->cfg;
4783 
4784 	return (PCIB_READ_CONFIG(device_get_parent(dev),
4785 	    cfg->bus, cfg->slot, cfg->func, reg, width));
4786 }
4787 
4788 void
4789 pci_write_config_method(device_t dev, device_t child, int reg,
4790     uint32_t val, int width)
4791 {
4792 	struct pci_devinfo *dinfo = device_get_ivars(child);
4793 	pcicfgregs *cfg = &dinfo->cfg;
4794 
4795 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4796 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4797 }
4798 
4799 int
4800 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4801     size_t buflen)
4802 {
4803 
4804 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4805 	    pci_get_function(child));
4806 	return (0);
4807 }
4808 
4809 int
4810 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4811     size_t buflen)
4812 {
4813 	struct pci_devinfo *dinfo;
4814 	pcicfgregs *cfg;
4815 
4816 	dinfo = device_get_ivars(child);
4817 	cfg = &dinfo->cfg;
4818 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4819 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4820 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4821 	    cfg->progif);
4822 	return (0);
4823 }
4824 
4825 int
4826 pci_assign_interrupt_method(device_t dev, device_t child)
4827 {
4828 	struct pci_devinfo *dinfo = device_get_ivars(child);
4829 	pcicfgregs *cfg = &dinfo->cfg;
4830 
4831 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4832 	    cfg->intpin));
4833 }
4834 
4835 static int
4836 pci_modevent(module_t mod, int what, void *arg)
4837 {
4838 	static struct cdev *pci_cdev;
4839 
4840 	switch (what) {
4841 	case MOD_LOAD:
4842 		STAILQ_INIT(&pci_devq);
4843 		pci_generation = 0;
4844 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4845 		    "pci");
4846 		pci_load_vendor_data();
4847 		break;
4848 
4849 	case MOD_UNLOAD:
4850 		destroy_dev(pci_cdev);
4851 		break;
4852 	}
4853 
4854 	return (0);
4855 }
4856 
4857 static void
4858 pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
4859 {
4860 #define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
4861 	struct pcicfg_pcie *cfg;
4862 	int version, pos;
4863 
4864 	cfg = &dinfo->cfg.pcie;
4865 	pos = cfg->pcie_location;
4866 
4867 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4868 
4869 	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
4870 
4871 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4872 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4873 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4874 		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
4875 
4876 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4877 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
4878 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
4879 		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
4880 
4881 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4882 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
4883 		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
4884 
4885 	if (version > 1) {
4886 		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
4887 		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
4888 		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
4889 	}
4890 #undef WREG
4891 }
4892 
4893 static void
4894 pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
4895 {
4896 	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
4897 	    dinfo->cfg.pcix.pcix_command,  2);
4898 }
4899 
4900 void
4901 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4902 {
4903 
4904 	/*
4905 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4906 	 * which we know need special treatment.  Type 2 devices are
4907 	 * cardbus bridges which also require special treatment.
4908 	 * Other types are unknown, and we err on the side of safety
4909 	 * by ignoring them.
4910 	 */
4911 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4912 		return;
4913 
4914 	/*
4915 	 * Restore the device to full power mode.  We must do this
4916 	 * before we restore the registers because moving from D3 to
4917 	 * D0 will cause the chip's BARs and some other registers to
4918 	 * be reset to some unknown power on reset values.  Cut down
4919 	 * the noise on boot by doing nothing if we are already in
4920 	 * state D0.
4921 	 */
4922 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4923 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4924 	pci_restore_bars(dev);
4925 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4926 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4927 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4928 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4929 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4930 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4931 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4932 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4933 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4934 
4935 	/*
4936 	 * Restore extended capabilities for PCI-Express and PCI-X
4937 	 */
4938 	if (dinfo->cfg.pcie.pcie_location != 0)
4939 		pci_cfg_restore_pcie(dev, dinfo);
4940 	if (dinfo->cfg.pcix.pcix_location != 0)
4941 		pci_cfg_restore_pcix(dev, dinfo);
4942 
4943 	/* Restore MSI and MSI-X configurations if they are present. */
4944 	if (dinfo->cfg.msi.msi_location != 0)
4945 		pci_resume_msi(dev);
4946 	if (dinfo->cfg.msix.msix_location != 0)
4947 		pci_resume_msix(dev);
4948 }
4949 
4950 static void
4951 pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
4952 {
4953 #define	RREG(n)	pci_read_config(dev, pos + (n), 2)
4954 	struct pcicfg_pcie *cfg;
4955 	int version, pos;
4956 
4957 	cfg = &dinfo->cfg.pcie;
4958 	pos = cfg->pcie_location;
4959 
4960 	cfg->pcie_flags = RREG(PCIER_FLAGS);
4961 
4962 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4963 
4964 	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
4965 
4966 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4967 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4968 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4969 		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
4970 
4971 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4972 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
4973 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
4974 		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
4975 
4976 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4977 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
4978 		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
4979 
4980 	if (version > 1) {
4981 		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
4982 		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
4983 		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
4984 	}
4985 #undef RREG
4986 }
4987 
4988 static void
4989 pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
4990 {
4991 	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
4992 	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
4993 }
4994 
4995 void
4996 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4997 {
4998 	uint32_t cls;
4999 	int ps;
5000 
5001 	/*
5002 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
5003 	 * we know need special treatment.  Type 2 devices are cardbus bridges
5004 	 * which also require special treatment.  Other types are unknown, and
5005 	 * we err on the side of safety by ignoring them.  Powering down
5006 	 * bridges should not be undertaken lightly.
5007 	 */
5008 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
5009 		return;
5010 
5011 	/*
5012 	 * Some drivers apparently write to these registers w/o updating our
5013 	 * cached copy.  No harm happens if we update the copy, so do so here
5014 	 * so we can restore them.  The COMMAND register is modified by the
5015 	 * bus w/o updating the cache.  This should represent the normally
5016 	 * writable portion of the 'defined' part of type 0 headers.  In
5017 	 * theory we also need to save/restore the PCI capability structures
5018 	 * we know about, but apart from power we don't know any that are
5019 	 * writable.
5020 	 */
5021 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
5022 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
5023 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
5024 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
5025 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
5026 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
5027 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
5028 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
5029 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
5030 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
5031 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
5032 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
5033 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
5034 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
5035 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
5036 
5037 	if (dinfo->cfg.pcie.pcie_location != 0)
5038 		pci_cfg_save_pcie(dev, dinfo);
5039 
5040 	if (dinfo->cfg.pcix.pcix_location != 0)
5041 		pci_cfg_save_pcix(dev, dinfo);
5042 
5043 	/*
5044 	 * don't set the state for display devices, base peripherals and
5045 	 * memory devices since bad things happen when they are powered down.
5046 	 * We should (a) have drivers that can easily detach and (b) use
5047 	 * generic drivers for these devices so that some device actually
5048 	 * attaches.  We need to make sure that when we implement (a) we don't
5049 	 * power the device down on a reattach.
5050 	 */
5051 	cls = pci_get_class(dev);
5052 	if (!setstate)
5053 		return;
5054 	switch (pci_do_power_nodriver)
5055 	{
5056 		case 0:		/* NO powerdown at all */
5057 			return;
5058 		case 1:		/* Conservative about what to power down */
5059 			if (cls == PCIC_STORAGE)
5060 				return;
5061 			/*FALLTHROUGH*/
5062 		case 2:		/* Agressive about what to power down */
5063 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
5064 			    cls == PCIC_BASEPERIPH)
5065 				return;
5066 			/*FALLTHROUGH*/
5067 		case 3:		/* Power down everything */
5068 			break;
5069 	}
5070 	/*
5071 	 * PCI spec says we can only go into D3 state from D0 state.
5072 	 * Transition from D[12] into D0 before going to D3 state.
5073 	 */
5074 	ps = pci_get_powerstate(dev);
5075 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
5076 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5077 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
5078 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
5079 }
5080 
5081 /* Wrapper APIs suitable for device driver use. */
5082 void
5083 pci_save_state(device_t dev)
5084 {
5085 	struct pci_devinfo *dinfo;
5086 
5087 	dinfo = device_get_ivars(dev);
5088 	pci_cfg_save(dev, dinfo, 0);
5089 }
5090 
5091 void
5092 pci_restore_state(device_t dev)
5093 {
5094 	struct pci_devinfo *dinfo;
5095 
5096 	dinfo = device_get_ivars(dev);
5097 	pci_cfg_restore(dev, dinfo);
5098 }
5099 
5100 static uint16_t
5101 pci_get_rid_method(device_t dev, device_t child)
5102 {
5103 
5104 	return (PCIB_GET_RID(device_get_parent(dev), child));
5105 }
5106