xref: /freebsd/sys/dev/pci/pci.c (revision 529a53abe2287eae08a3af62749273df775254e9)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 
56 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57 #include <machine/intr_machdep.h>
58 #endif
59 
60 #include <sys/pciio.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/pci/pci_private.h>
64 
65 #include <dev/usb/controller/xhcireg.h>
66 #include <dev/usb/controller/ehcireg.h>
67 #include <dev/usb/controller/ohcireg.h>
68 #include <dev/usb/controller/uhcireg.h>
69 
70 #include "pcib_if.h"
71 #include "pci_if.h"
72 
73 #define	PCIR_IS_BIOS(cfg, reg)						\
74 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
75 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
76 
77 static int		pci_has_quirk(uint32_t devid, int quirk);
78 static pci_addr_t	pci_mapbase(uint64_t mapreg);
79 static const char	*pci_maptype(uint64_t mapreg);
80 static int		pci_mapsize(uint64_t testval);
81 static int		pci_maprange(uint64_t mapreg);
82 static pci_addr_t	pci_rombase(uint64_t mapreg);
83 static int		pci_romsize(uint64_t testval);
84 static void		pci_fixancient(pcicfgregs *cfg);
85 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
86 
87 static int		pci_porten(device_t dev);
88 static int		pci_memen(device_t dev);
89 static void		pci_assign_interrupt(device_t bus, device_t dev,
90 			    int force_route);
91 static int		pci_add_map(device_t bus, device_t dev, int reg,
92 			    struct resource_list *rl, int force, int prefetch);
93 static int		pci_probe(device_t dev);
94 static int		pci_attach(device_t dev);
95 static void		pci_load_vendor_data(void);
96 static int		pci_describe_parse_line(char **ptr, int *vendor,
97 			    int *device, char **desc);
98 static char		*pci_describe_device(device_t dev);
99 static int		pci_modevent(module_t mod, int what, void *arg);
100 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
101 			    pcicfgregs *cfg);
102 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
103 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
104 			    int reg, uint32_t *data);
105 #if 0
106 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
107 			    int reg, uint32_t data);
108 #endif
109 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
110 static void		pci_disable_msi(device_t dev);
111 static void		pci_enable_msi(device_t dev, uint64_t address,
112 			    uint16_t data);
113 static void		pci_enable_msix(device_t dev, u_int index,
114 			    uint64_t address, uint32_t data);
115 static void		pci_mask_msix(device_t dev, u_int index);
116 static void		pci_unmask_msix(device_t dev, u_int index);
117 static int		pci_msi_blacklisted(void);
118 static int		pci_msix_blacklisted(void);
119 static void		pci_resume_msi(device_t dev);
120 static void		pci_resume_msix(device_t dev);
121 static int		pci_remap_intr_method(device_t bus, device_t dev,
122 			    u_int irq);
123 
124 static device_method_t pci_methods[] = {
125 	/* Device interface */
126 	DEVMETHOD(device_probe,		pci_probe),
127 	DEVMETHOD(device_attach,	pci_attach),
128 	DEVMETHOD(device_detach,	bus_generic_detach),
129 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
130 	DEVMETHOD(device_suspend,	pci_suspend),
131 	DEVMETHOD(device_resume,	pci_resume),
132 
133 	/* Bus interface */
134 	DEVMETHOD(bus_print_child,	pci_print_child),
135 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
136 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
137 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
138 	DEVMETHOD(bus_driver_added,	pci_driver_added),
139 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
140 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
141 
142 	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
143 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
144 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
145 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
146 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
147 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
148 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
149 	DEVMETHOD(bus_release_resource,	pci_release_resource),
150 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
151 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
152 	DEVMETHOD(bus_child_detached,	pci_child_detached),
153 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
154 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
155 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
156 
157 	/* PCI interface */
158 	DEVMETHOD(pci_read_config,	pci_read_config_method),
159 	DEVMETHOD(pci_write_config,	pci_write_config_method),
160 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
161 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
162 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
163 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
164 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
165 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
166 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
167 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
168 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
169 	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
170 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
171 	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
172 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
173 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
174 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
175 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
176 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
177 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
178 
179 	DEVMETHOD_END
180 };
181 
182 DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
183 
184 static devclass_t pci_devclass;
185 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
186 MODULE_VERSION(pci, 1);
187 
188 static char	*pci_vendordata;
189 static size_t	pci_vendordata_size;
190 
191 struct pci_quirk {
192 	uint32_t devid;	/* Vendor/device of the card */
193 	int	type;
194 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
195 #define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
196 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
197 #define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
198 #define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
199 	int	arg1;
200 	int	arg2;
201 };
202 
203 static const struct pci_quirk pci_quirks[] = {
204 	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
205 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
206 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
207 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
208 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
209 
210 	/*
211 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
212 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
213 	 */
214 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
215 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
216 
217 	/*
218 	 * MSI doesn't work on earlier Intel chipsets including
219 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
220 	 */
221 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
222 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
223 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
224 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
225 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
226 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
227 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
228 
229 	/*
230 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
231 	 * bridge.
232 	 */
233 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
234 
235 	/*
236 	 * MSI-X allocation doesn't work properly for devices passed through
237 	 * by VMware up to at least ESXi 5.1.
238 	 */
239 	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
240 	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
241 
242 	/*
243 	 * Some virtualization environments emulate an older chipset
244 	 * but support MSI just fine.  QEMU uses the Intel 82440.
245 	 */
246 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
247 
248 	/*
249 	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
250 	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
251 	 * It prevents us from attaching hpet(4) when the bit is unset.
252 	 * Note this quirk only affects SB600 revision A13 and earlier.
253 	 * For SB600 A21 and later, firmware must set the bit to hide it.
254 	 * For SB700 and later, it is unused and hardcoded to zero.
255 	 */
256 	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
257 
258 	{ 0 }
259 };
260 
261 /* map register information */
262 #define	PCI_MAPMEM	0x01	/* memory map */
263 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
264 #define	PCI_MAPPORT	0x04	/* port map */
265 
266 struct devlist pci_devq;
267 uint32_t pci_generation;
268 uint32_t pci_numdevs = 0;
269 static int pcie_chipset, pcix_chipset;
270 
271 /* sysctl vars */
272 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
273 
274 static int pci_enable_io_modes = 1;
275 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
276 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
277     &pci_enable_io_modes, 1,
278     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
279 enable these bits correctly.  We'd like to do this all the time, but there\n\
280 are some peripherals that this causes problems with.");
281 
282 static int pci_do_realloc_bars = 0;
283 TUNABLE_INT("hw.pci.realloc_bars", &pci_do_realloc_bars);
284 SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RW,
285     &pci_do_realloc_bars, 0,
286     "Attempt to allocate a new range for any BARs whose original firmware-assigned ranges fail to allocate during the initial device scan.");
287 
288 static int pci_do_power_nodriver = 0;
289 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
290 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
291     &pci_do_power_nodriver, 0,
292   "Place a function into D3 state when no driver attaches to it.  0 means\n\
293 disable.  1 means conservatively place devices into D3 state.  2 means\n\
294 agressively place devices into D3 state.  3 means put absolutely everything\n\
295 in D3 state.");
296 
297 int pci_do_power_resume = 1;
298 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
299 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
300     &pci_do_power_resume, 1,
301   "Transition from D3 -> D0 on resume.");
302 
303 int pci_do_power_suspend = 1;
304 TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
305 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
306     &pci_do_power_suspend, 1,
307   "Transition from D0 -> D3 on suspend.");
308 
309 static int pci_do_msi = 1;
310 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
311 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
312     "Enable support for MSI interrupts");
313 
314 static int pci_do_msix = 1;
315 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
316 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
317     "Enable support for MSI-X interrupts");
318 
319 static int pci_honor_msi_blacklist = 1;
320 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
321 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
322     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
323 
324 #if defined(__i386__) || defined(__amd64__)
325 static int pci_usb_takeover = 1;
326 #else
327 static int pci_usb_takeover = 0;
328 #endif
329 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
330 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
331     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
332 Disable this if you depend on BIOS emulation of USB devices, that is\n\
333 you use USB devices (like keyboard or mouse) but do not load USB drivers");
334 
335 static int
336 pci_has_quirk(uint32_t devid, int quirk)
337 {
338 	const struct pci_quirk *q;
339 
340 	for (q = &pci_quirks[0]; q->devid; q++) {
341 		if (q->devid == devid && q->type == quirk)
342 			return (1);
343 	}
344 	return (0);
345 }
346 
347 /* Find a device_t by bus/slot/function in domain 0 */
348 
349 device_t
350 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
351 {
352 
353 	return (pci_find_dbsf(0, bus, slot, func));
354 }
355 
356 /* Find a device_t by domain/bus/slot/function */
357 
358 device_t
359 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
360 {
361 	struct pci_devinfo *dinfo;
362 
363 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
364 		if ((dinfo->cfg.domain == domain) &&
365 		    (dinfo->cfg.bus == bus) &&
366 		    (dinfo->cfg.slot == slot) &&
367 		    (dinfo->cfg.func == func)) {
368 			return (dinfo->cfg.dev);
369 		}
370 	}
371 
372 	return (NULL);
373 }
374 
375 /* Find a device_t by vendor/device ID */
376 
377 device_t
378 pci_find_device(uint16_t vendor, uint16_t device)
379 {
380 	struct pci_devinfo *dinfo;
381 
382 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
383 		if ((dinfo->cfg.vendor == vendor) &&
384 		    (dinfo->cfg.device == device)) {
385 			return (dinfo->cfg.dev);
386 		}
387 	}
388 
389 	return (NULL);
390 }
391 
392 device_t
393 pci_find_class(uint8_t class, uint8_t subclass)
394 {
395 	struct pci_devinfo *dinfo;
396 
397 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
398 		if (dinfo->cfg.baseclass == class &&
399 		    dinfo->cfg.subclass == subclass) {
400 			return (dinfo->cfg.dev);
401 		}
402 	}
403 
404 	return (NULL);
405 }
406 
407 static int
408 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
409 {
410 	va_list ap;
411 	int retval;
412 
413 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
414 	    cfg->func);
415 	va_start(ap, fmt);
416 	retval += vprintf(fmt, ap);
417 	va_end(ap);
418 	return (retval);
419 }
420 
421 /* return base address of memory or port map */
422 
423 static pci_addr_t
424 pci_mapbase(uint64_t mapreg)
425 {
426 
427 	if (PCI_BAR_MEM(mapreg))
428 		return (mapreg & PCIM_BAR_MEM_BASE);
429 	else
430 		return (mapreg & PCIM_BAR_IO_BASE);
431 }
432 
433 /* return map type of memory or port map */
434 
435 static const char *
436 pci_maptype(uint64_t mapreg)
437 {
438 
439 	if (PCI_BAR_IO(mapreg))
440 		return ("I/O Port");
441 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
442 		return ("Prefetchable Memory");
443 	return ("Memory");
444 }
445 
446 /* return log2 of map size decoded for memory or port map */
447 
448 static int
449 pci_mapsize(uint64_t testval)
450 {
451 	int ln2size;
452 
453 	testval = pci_mapbase(testval);
454 	ln2size = 0;
455 	if (testval != 0) {
456 		while ((testval & 1) == 0)
457 		{
458 			ln2size++;
459 			testval >>= 1;
460 		}
461 	}
462 	return (ln2size);
463 }
464 
465 /* return base address of device ROM */
466 
467 static pci_addr_t
468 pci_rombase(uint64_t mapreg)
469 {
470 
471 	return (mapreg & PCIM_BIOS_ADDR_MASK);
472 }
473 
474 /* return log2 of map size decided for device ROM */
475 
476 static int
477 pci_romsize(uint64_t testval)
478 {
479 	int ln2size;
480 
481 	testval = pci_rombase(testval);
482 	ln2size = 0;
483 	if (testval != 0) {
484 		while ((testval & 1) == 0)
485 		{
486 			ln2size++;
487 			testval >>= 1;
488 		}
489 	}
490 	return (ln2size);
491 }
492 
493 /* return log2 of address range supported by map register */
494 
495 static int
496 pci_maprange(uint64_t mapreg)
497 {
498 	int ln2range = 0;
499 
500 	if (PCI_BAR_IO(mapreg))
501 		ln2range = 32;
502 	else
503 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
504 		case PCIM_BAR_MEM_32:
505 			ln2range = 32;
506 			break;
507 		case PCIM_BAR_MEM_1MB:
508 			ln2range = 20;
509 			break;
510 		case PCIM_BAR_MEM_64:
511 			ln2range = 64;
512 			break;
513 		}
514 	return (ln2range);
515 }
516 
517 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
518 
519 static void
520 pci_fixancient(pcicfgregs *cfg)
521 {
522 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
523 		return;
524 
525 	/* PCI to PCI bridges use header type 1 */
526 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
527 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
528 }
529 
530 /* extract header type specific config data */
531 
532 static void
533 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
534 {
535 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
536 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
537 	case PCIM_HDRTYPE_NORMAL:
538 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
539 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
540 		cfg->nummaps	    = PCI_MAXMAPS_0;
541 		break;
542 	case PCIM_HDRTYPE_BRIDGE:
543 		cfg->nummaps	    = PCI_MAXMAPS_1;
544 		break;
545 	case PCIM_HDRTYPE_CARDBUS:
546 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
547 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
548 		cfg->nummaps	    = PCI_MAXMAPS_2;
549 		break;
550 	}
551 #undef REG
552 }
553 
554 /* read configuration header into pcicfgregs structure */
555 struct pci_devinfo *
556 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
557 {
558 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
559 	pcicfgregs *cfg = NULL;
560 	struct pci_devinfo *devlist_entry;
561 	struct devlist *devlist_head;
562 
563 	devlist_head = &pci_devq;
564 
565 	devlist_entry = NULL;
566 
567 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
568 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
569 		if (devlist_entry == NULL)
570 			return (NULL);
571 
572 		cfg = &devlist_entry->cfg;
573 
574 		cfg->domain		= d;
575 		cfg->bus		= b;
576 		cfg->slot		= s;
577 		cfg->func		= f;
578 		cfg->vendor		= REG(PCIR_VENDOR, 2);
579 		cfg->device		= REG(PCIR_DEVICE, 2);
580 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
581 		cfg->statreg		= REG(PCIR_STATUS, 2);
582 		cfg->baseclass		= REG(PCIR_CLASS, 1);
583 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
584 		cfg->progif		= REG(PCIR_PROGIF, 1);
585 		cfg->revid		= REG(PCIR_REVID, 1);
586 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
587 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
588 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
589 		cfg->intpin		= REG(PCIR_INTPIN, 1);
590 		cfg->intline		= REG(PCIR_INTLINE, 1);
591 
592 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
593 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
594 
595 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
596 		cfg->hdrtype		&= ~PCIM_MFDEV;
597 		STAILQ_INIT(&cfg->maps);
598 
599 		pci_fixancient(cfg);
600 		pci_hdrtypedata(pcib, b, s, f, cfg);
601 
602 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
603 			pci_read_cap(pcib, cfg);
604 
605 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
606 
607 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
608 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
609 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
610 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
611 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
612 
613 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
614 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
615 		devlist_entry->conf.pc_vendor = cfg->vendor;
616 		devlist_entry->conf.pc_device = cfg->device;
617 
618 		devlist_entry->conf.pc_class = cfg->baseclass;
619 		devlist_entry->conf.pc_subclass = cfg->subclass;
620 		devlist_entry->conf.pc_progif = cfg->progif;
621 		devlist_entry->conf.pc_revid = cfg->revid;
622 
623 		pci_numdevs++;
624 		pci_generation++;
625 	}
626 	return (devlist_entry);
627 #undef REG
628 }
629 
630 static void
631 pci_read_cap(device_t pcib, pcicfgregs *cfg)
632 {
633 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
634 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
635 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
636 	uint64_t addr;
637 #endif
638 	uint32_t val;
639 	int	ptr, nextptr, ptrptr;
640 
641 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
642 	case PCIM_HDRTYPE_NORMAL:
643 	case PCIM_HDRTYPE_BRIDGE:
644 		ptrptr = PCIR_CAP_PTR;
645 		break;
646 	case PCIM_HDRTYPE_CARDBUS:
647 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
648 		break;
649 	default:
650 		return;		/* no extended capabilities support */
651 	}
652 	nextptr = REG(ptrptr, 1);	/* sanity check? */
653 
654 	/*
655 	 * Read capability entries.
656 	 */
657 	while (nextptr != 0) {
658 		/* Sanity check */
659 		if (nextptr > 255) {
660 			printf("illegal PCI extended capability offset %d\n",
661 			    nextptr);
662 			return;
663 		}
664 		/* Find the next entry */
665 		ptr = nextptr;
666 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
667 
668 		/* Process this entry */
669 		switch (REG(ptr + PCICAP_ID, 1)) {
670 		case PCIY_PMG:		/* PCI power management */
671 			if (cfg->pp.pp_cap == 0) {
672 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
673 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
674 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
675 				if ((nextptr - ptr) > PCIR_POWER_DATA)
676 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
677 			}
678 			break;
679 		case PCIY_HT:		/* HyperTransport */
680 			/* Determine HT-specific capability type. */
681 			val = REG(ptr + PCIR_HT_COMMAND, 2);
682 
683 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
684 				cfg->ht.ht_slave = ptr;
685 
686 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
687 			switch (val & PCIM_HTCMD_CAP_MASK) {
688 			case PCIM_HTCAP_MSI_MAPPING:
689 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
690 					/* Sanity check the mapping window. */
691 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
692 					    4);
693 					addr <<= 32;
694 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
695 					    4);
696 					if (addr != MSI_INTEL_ADDR_BASE)
697 						device_printf(pcib,
698 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
699 						    cfg->domain, cfg->bus,
700 						    cfg->slot, cfg->func,
701 						    (long long)addr);
702 				} else
703 					addr = MSI_INTEL_ADDR_BASE;
704 
705 				cfg->ht.ht_msimap = ptr;
706 				cfg->ht.ht_msictrl = val;
707 				cfg->ht.ht_msiaddr = addr;
708 				break;
709 			}
710 #endif
711 			break;
712 		case PCIY_MSI:		/* PCI MSI */
713 			cfg->msi.msi_location = ptr;
714 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
715 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
716 						     PCIM_MSICTRL_MMC_MASK)>>1);
717 			break;
718 		case PCIY_MSIX:		/* PCI MSI-X */
719 			cfg->msix.msix_location = ptr;
720 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
721 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
722 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
723 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
724 			cfg->msix.msix_table_bar = PCIR_BAR(val &
725 			    PCIM_MSIX_BIR_MASK);
726 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
727 			val = REG(ptr + PCIR_MSIX_PBA, 4);
728 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
729 			    PCIM_MSIX_BIR_MASK);
730 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
731 			break;
732 		case PCIY_VPD:		/* PCI Vital Product Data */
733 			cfg->vpd.vpd_reg = ptr;
734 			break;
735 		case PCIY_SUBVENDOR:
736 			/* Should always be true. */
737 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
738 			    PCIM_HDRTYPE_BRIDGE) {
739 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
740 				cfg->subvendor = val & 0xffff;
741 				cfg->subdevice = val >> 16;
742 			}
743 			break;
744 		case PCIY_PCIX:		/* PCI-X */
745 			/*
746 			 * Assume we have a PCI-X chipset if we have
747 			 * at least one PCI-PCI bridge with a PCI-X
748 			 * capability.  Note that some systems with
749 			 * PCI-express or HT chipsets might match on
750 			 * this check as well.
751 			 */
752 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
753 			    PCIM_HDRTYPE_BRIDGE)
754 				pcix_chipset = 1;
755 			cfg->pcix.pcix_location = ptr;
756 			break;
757 		case PCIY_EXPRESS:	/* PCI-express */
758 			/*
759 			 * Assume we have a PCI-express chipset if we have
760 			 * at least one PCI-express device.
761 			 */
762 			pcie_chipset = 1;
763 			cfg->pcie.pcie_location = ptr;
764 			val = REG(ptr + PCIER_FLAGS, 2);
765 			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
766 			break;
767 		default:
768 			break;
769 		}
770 	}
771 
772 #if defined(__powerpc__)
773 	/*
774 	 * Enable the MSI mapping window for all HyperTransport
775 	 * slaves.  PCI-PCI bridges have their windows enabled via
776 	 * PCIB_MAP_MSI().
777 	 */
778 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
779 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
780 		device_printf(pcib,
781 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
782 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
783 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
784 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
785 		     2);
786 	}
787 #endif
788 /* REG and WREG use carry through to next functions */
789 }
790 
791 /*
792  * PCI Vital Product Data
793  */
794 
795 #define	PCI_VPD_TIMEOUT		1000000
796 
797 static int
798 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
799 {
800 	int count = PCI_VPD_TIMEOUT;
801 
802 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
803 
804 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
805 
806 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
807 		if (--count < 0)
808 			return (ENXIO);
809 		DELAY(1);	/* limit looping */
810 	}
811 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
812 
813 	return (0);
814 }
815 
816 #if 0
817 static int
818 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
819 {
820 	int count = PCI_VPD_TIMEOUT;
821 
822 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
823 
824 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
825 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
826 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
827 		if (--count < 0)
828 			return (ENXIO);
829 		DELAY(1);	/* limit looping */
830 	}
831 
832 	return (0);
833 }
834 #endif
835 
836 #undef PCI_VPD_TIMEOUT
837 
838 struct vpd_readstate {
839 	device_t	pcib;
840 	pcicfgregs	*cfg;
841 	uint32_t	val;
842 	int		bytesinval;
843 	int		off;
844 	uint8_t		cksum;
845 };
846 
847 static int
848 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
849 {
850 	uint32_t reg;
851 	uint8_t byte;
852 
853 	if (vrs->bytesinval == 0) {
854 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
855 			return (ENXIO);
856 		vrs->val = le32toh(reg);
857 		vrs->off += 4;
858 		byte = vrs->val & 0xff;
859 		vrs->bytesinval = 3;
860 	} else {
861 		vrs->val = vrs->val >> 8;
862 		byte = vrs->val & 0xff;
863 		vrs->bytesinval--;
864 	}
865 
866 	vrs->cksum += byte;
867 	*data = byte;
868 	return (0);
869 }
870 
871 static void
872 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
873 {
874 	struct vpd_readstate vrs;
875 	int state;
876 	int name;
877 	int remain;
878 	int i;
879 	int alloc, off;		/* alloc/off for RO/W arrays */
880 	int cksumvalid;
881 	int dflen;
882 	uint8_t byte;
883 	uint8_t byte2;
884 
885 	/* init vpd reader */
886 	vrs.bytesinval = 0;
887 	vrs.off = 0;
888 	vrs.pcib = pcib;
889 	vrs.cfg = cfg;
890 	vrs.cksum = 0;
891 
892 	state = 0;
893 	name = remain = i = 0;	/* shut up stupid gcc */
894 	alloc = off = 0;	/* shut up stupid gcc */
895 	dflen = 0;		/* shut up stupid gcc */
896 	cksumvalid = -1;
897 	while (state >= 0) {
898 		if (vpd_nextbyte(&vrs, &byte)) {
899 			state = -2;
900 			break;
901 		}
902 #if 0
903 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
904 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
905 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
906 #endif
907 		switch (state) {
908 		case 0:		/* item name */
909 			if (byte & 0x80) {
910 				if (vpd_nextbyte(&vrs, &byte2)) {
911 					state = -2;
912 					break;
913 				}
914 				remain = byte2;
915 				if (vpd_nextbyte(&vrs, &byte2)) {
916 					state = -2;
917 					break;
918 				}
919 				remain |= byte2 << 8;
920 				if (remain > (0x7f*4 - vrs.off)) {
921 					state = -1;
922 					pci_printf(cfg,
923 					    "invalid VPD data, remain %#x\n",
924 					    remain);
925 				}
926 				name = byte & 0x7f;
927 			} else {
928 				remain = byte & 0x7;
929 				name = (byte >> 3) & 0xf;
930 			}
931 			switch (name) {
932 			case 0x2:	/* String */
933 				cfg->vpd.vpd_ident = malloc(remain + 1,
934 				    M_DEVBUF, M_WAITOK);
935 				i = 0;
936 				state = 1;
937 				break;
938 			case 0xf:	/* End */
939 				state = -1;
940 				break;
941 			case 0x10:	/* VPD-R */
942 				alloc = 8;
943 				off = 0;
944 				cfg->vpd.vpd_ros = malloc(alloc *
945 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
946 				    M_WAITOK | M_ZERO);
947 				state = 2;
948 				break;
949 			case 0x11:	/* VPD-W */
950 				alloc = 8;
951 				off = 0;
952 				cfg->vpd.vpd_w = malloc(alloc *
953 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
954 				    M_WAITOK | M_ZERO);
955 				state = 5;
956 				break;
957 			default:	/* Invalid data, abort */
958 				state = -1;
959 				break;
960 			}
961 			break;
962 
963 		case 1:	/* Identifier String */
964 			cfg->vpd.vpd_ident[i++] = byte;
965 			remain--;
966 			if (remain == 0)  {
967 				cfg->vpd.vpd_ident[i] = '\0';
968 				state = 0;
969 			}
970 			break;
971 
972 		case 2:	/* VPD-R Keyword Header */
973 			if (off == alloc) {
974 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
975 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
976 				    M_DEVBUF, M_WAITOK | M_ZERO);
977 			}
978 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
979 			if (vpd_nextbyte(&vrs, &byte2)) {
980 				state = -2;
981 				break;
982 			}
983 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
984 			if (vpd_nextbyte(&vrs, &byte2)) {
985 				state = -2;
986 				break;
987 			}
988 			cfg->vpd.vpd_ros[off].len = dflen = byte2;
989 			if (dflen == 0 &&
990 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
991 			    2) == 0) {
992 				/*
993 				 * if this happens, we can't trust the rest
994 				 * of the VPD.
995 				 */
996 				pci_printf(cfg, "bad keyword length: %d\n",
997 				    dflen);
998 				cksumvalid = 0;
999 				state = -1;
1000 				break;
1001 			} else if (dflen == 0) {
1002 				cfg->vpd.vpd_ros[off].value = malloc(1 *
1003 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1004 				    M_DEVBUF, M_WAITOK);
1005 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1006 			} else
1007 				cfg->vpd.vpd_ros[off].value = malloc(
1008 				    (dflen + 1) *
1009 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1010 				    M_DEVBUF, M_WAITOK);
1011 			remain -= 3;
1012 			i = 0;
1013 			/* keep in sync w/ state 3's transistions */
1014 			if (dflen == 0 && remain == 0)
1015 				state = 0;
1016 			else if (dflen == 0)
1017 				state = 2;
1018 			else
1019 				state = 3;
1020 			break;
1021 
1022 		case 3:	/* VPD-R Keyword Value */
1023 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1024 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1025 			    "RV", 2) == 0 && cksumvalid == -1) {
1026 				if (vrs.cksum == 0)
1027 					cksumvalid = 1;
1028 				else {
1029 					if (bootverbose)
1030 						pci_printf(cfg,
1031 					    "bad VPD cksum, remain %hhu\n",
1032 						    vrs.cksum);
1033 					cksumvalid = 0;
1034 					state = -1;
1035 					break;
1036 				}
1037 			}
1038 			dflen--;
1039 			remain--;
1040 			/* keep in sync w/ state 2's transistions */
1041 			if (dflen == 0)
1042 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1043 			if (dflen == 0 && remain == 0) {
1044 				cfg->vpd.vpd_rocnt = off;
1045 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1046 				    off * sizeof(*cfg->vpd.vpd_ros),
1047 				    M_DEVBUF, M_WAITOK | M_ZERO);
1048 				state = 0;
1049 			} else if (dflen == 0)
1050 				state = 2;
1051 			break;
1052 
1053 		case 4:
1054 			remain--;
1055 			if (remain == 0)
1056 				state = 0;
1057 			break;
1058 
1059 		case 5:	/* VPD-W Keyword Header */
1060 			if (off == alloc) {
1061 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1062 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1063 				    M_DEVBUF, M_WAITOK | M_ZERO);
1064 			}
1065 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1066 			if (vpd_nextbyte(&vrs, &byte2)) {
1067 				state = -2;
1068 				break;
1069 			}
1070 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1071 			if (vpd_nextbyte(&vrs, &byte2)) {
1072 				state = -2;
1073 				break;
1074 			}
1075 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1076 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1077 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1078 			    sizeof(*cfg->vpd.vpd_w[off].value),
1079 			    M_DEVBUF, M_WAITOK);
1080 			remain -= 3;
1081 			i = 0;
1082 			/* keep in sync w/ state 6's transistions */
1083 			if (dflen == 0 && remain == 0)
1084 				state = 0;
1085 			else if (dflen == 0)
1086 				state = 5;
1087 			else
1088 				state = 6;
1089 			break;
1090 
1091 		case 6:	/* VPD-W Keyword Value */
1092 			cfg->vpd.vpd_w[off].value[i++] = byte;
1093 			dflen--;
1094 			remain--;
1095 			/* keep in sync w/ state 5's transistions */
1096 			if (dflen == 0)
1097 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1098 			if (dflen == 0 && remain == 0) {
1099 				cfg->vpd.vpd_wcnt = off;
1100 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1101 				    off * sizeof(*cfg->vpd.vpd_w),
1102 				    M_DEVBUF, M_WAITOK | M_ZERO);
1103 				state = 0;
1104 			} else if (dflen == 0)
1105 				state = 5;
1106 			break;
1107 
1108 		default:
1109 			pci_printf(cfg, "invalid state: %d\n", state);
1110 			state = -1;
1111 			break;
1112 		}
1113 	}
1114 
1115 	if (cksumvalid == 0 || state < -1) {
1116 		/* read-only data bad, clean up */
1117 		if (cfg->vpd.vpd_ros != NULL) {
1118 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1119 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1120 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1121 			cfg->vpd.vpd_ros = NULL;
1122 		}
1123 	}
1124 	if (state < -1) {
1125 		/* I/O error, clean up */
1126 		pci_printf(cfg, "failed to read VPD data.\n");
1127 		if (cfg->vpd.vpd_ident != NULL) {
1128 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1129 			cfg->vpd.vpd_ident = NULL;
1130 		}
1131 		if (cfg->vpd.vpd_w != NULL) {
1132 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1133 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1134 			free(cfg->vpd.vpd_w, M_DEVBUF);
1135 			cfg->vpd.vpd_w = NULL;
1136 		}
1137 	}
1138 	cfg->vpd.vpd_cached = 1;
1139 #undef REG
1140 #undef WREG
1141 }
1142 
1143 int
1144 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1145 {
1146 	struct pci_devinfo *dinfo = device_get_ivars(child);
1147 	pcicfgregs *cfg = &dinfo->cfg;
1148 
1149 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1150 		pci_read_vpd(device_get_parent(dev), cfg);
1151 
1152 	*identptr = cfg->vpd.vpd_ident;
1153 
1154 	if (*identptr == NULL)
1155 		return (ENXIO);
1156 
1157 	return (0);
1158 }
1159 
1160 int
1161 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1162 	const char **vptr)
1163 {
1164 	struct pci_devinfo *dinfo = device_get_ivars(child);
1165 	pcicfgregs *cfg = &dinfo->cfg;
1166 	int i;
1167 
1168 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1169 		pci_read_vpd(device_get_parent(dev), cfg);
1170 
1171 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1172 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1173 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1174 			*vptr = cfg->vpd.vpd_ros[i].value;
1175 			return (0);
1176 		}
1177 
1178 	*vptr = NULL;
1179 	return (ENXIO);
1180 }
1181 
1182 struct pcicfg_vpd *
1183 pci_fetch_vpd_list(device_t dev)
1184 {
1185 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1186 	pcicfgregs *cfg = &dinfo->cfg;
1187 
1188 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1189 		pci_read_vpd(device_get_parent(device_get_parent(dev)), cfg);
1190 	return (&cfg->vpd);
1191 }
1192 
1193 /*
1194  * Find the requested HyperTransport capability and return the offset
1195  * in configuration space via the pointer provided.  The function
1196  * returns 0 on success and an error code otherwise.
1197  */
1198 int
1199 pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1200 {
1201 	int ptr, error;
1202 	uint16_t val;
1203 
1204 	error = pci_find_cap(child, PCIY_HT, &ptr);
1205 	if (error)
1206 		return (error);
1207 
1208 	/*
1209 	 * Traverse the capabilities list checking each HT capability
1210 	 * to see if it matches the requested HT capability.
1211 	 */
1212 	while (ptr != 0) {
1213 		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1214 		if (capability == PCIM_HTCAP_SLAVE ||
1215 		    capability == PCIM_HTCAP_HOST)
1216 			val &= 0xe000;
1217 		else
1218 			val &= PCIM_HTCMD_CAP_MASK;
1219 		if (val == capability) {
1220 			if (capreg != NULL)
1221 				*capreg = ptr;
1222 			return (0);
1223 		}
1224 
1225 		/* Skip to the next HT capability. */
1226 		while (ptr != 0) {
1227 			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1228 			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1229 			    PCIY_HT)
1230 				break;
1231 		}
1232 	}
1233 	return (ENOENT);
1234 }
1235 
1236 /*
1237  * Find the requested capability and return the offset in
1238  * configuration space via the pointer provided.  The function returns
1239  * 0 on success and an error code otherwise.
1240  */
1241 int
1242 pci_find_cap_method(device_t dev, device_t child, int capability,
1243     int *capreg)
1244 {
1245 	struct pci_devinfo *dinfo = device_get_ivars(child);
1246 	pcicfgregs *cfg = &dinfo->cfg;
1247 	u_int32_t status;
1248 	u_int8_t ptr;
1249 
1250 	/*
1251 	 * Check the CAP_LIST bit of the PCI status register first.
1252 	 */
1253 	status = pci_read_config(child, PCIR_STATUS, 2);
1254 	if (!(status & PCIM_STATUS_CAPPRESENT))
1255 		return (ENXIO);
1256 
1257 	/*
1258 	 * Determine the start pointer of the capabilities list.
1259 	 */
1260 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1261 	case PCIM_HDRTYPE_NORMAL:
1262 	case PCIM_HDRTYPE_BRIDGE:
1263 		ptr = PCIR_CAP_PTR;
1264 		break;
1265 	case PCIM_HDRTYPE_CARDBUS:
1266 		ptr = PCIR_CAP_PTR_2;
1267 		break;
1268 	default:
1269 		/* XXX: panic? */
1270 		return (ENXIO);		/* no extended capabilities support */
1271 	}
1272 	ptr = pci_read_config(child, ptr, 1);
1273 
1274 	/*
1275 	 * Traverse the capabilities list.
1276 	 */
1277 	while (ptr != 0) {
1278 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1279 			if (capreg != NULL)
1280 				*capreg = ptr;
1281 			return (0);
1282 		}
1283 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1284 	}
1285 
1286 	return (ENOENT);
1287 }
1288 
1289 /*
1290  * Find the requested extended capability and return the offset in
1291  * configuration space via the pointer provided.  The function returns
1292  * 0 on success and an error code otherwise.
1293  */
1294 int
1295 pci_find_extcap_method(device_t dev, device_t child, int capability,
1296     int *capreg)
1297 {
1298 	struct pci_devinfo *dinfo = device_get_ivars(child);
1299 	pcicfgregs *cfg = &dinfo->cfg;
1300 	uint32_t ecap;
1301 	uint16_t ptr;
1302 
1303 	/* Only supported for PCI-express devices. */
1304 	if (cfg->pcie.pcie_location == 0)
1305 		return (ENXIO);
1306 
1307 	ptr = PCIR_EXTCAP;
1308 	ecap = pci_read_config(child, ptr, 4);
1309 	if (ecap == 0xffffffff || ecap == 0)
1310 		return (ENOENT);
1311 	for (;;) {
1312 		if (PCI_EXTCAP_ID(ecap) == capability) {
1313 			if (capreg != NULL)
1314 				*capreg = ptr;
1315 			return (0);
1316 		}
1317 		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1318 		if (ptr == 0)
1319 			break;
1320 		ecap = pci_read_config(child, ptr, 4);
1321 	}
1322 
1323 	return (ENOENT);
1324 }
1325 
1326 /*
1327  * Support for MSI-X message interrupts.
1328  */
1329 void
1330 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1331 {
1332 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1333 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1334 	uint32_t offset;
1335 
1336 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1337 	offset = msix->msix_table_offset + index * 16;
1338 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1339 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1340 	bus_write_4(msix->msix_table_res, offset + 8, data);
1341 
1342 	/* Enable MSI -> HT mapping. */
1343 	pci_ht_map_msi(dev, address);
1344 }
1345 
1346 void
1347 pci_mask_msix(device_t dev, u_int index)
1348 {
1349 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1350 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1351 	uint32_t offset, val;
1352 
1353 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1354 	offset = msix->msix_table_offset + index * 16 + 12;
1355 	val = bus_read_4(msix->msix_table_res, offset);
1356 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1357 		val |= PCIM_MSIX_VCTRL_MASK;
1358 		bus_write_4(msix->msix_table_res, offset, val);
1359 	}
1360 }
1361 
1362 void
1363 pci_unmask_msix(device_t dev, u_int index)
1364 {
1365 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1366 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1367 	uint32_t offset, val;
1368 
1369 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1370 	offset = msix->msix_table_offset + index * 16 + 12;
1371 	val = bus_read_4(msix->msix_table_res, offset);
1372 	if (val & PCIM_MSIX_VCTRL_MASK) {
1373 		val &= ~PCIM_MSIX_VCTRL_MASK;
1374 		bus_write_4(msix->msix_table_res, offset, val);
1375 	}
1376 }
1377 
1378 int
1379 pci_pending_msix(device_t dev, u_int index)
1380 {
1381 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1382 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1383 	uint32_t offset, bit;
1384 
1385 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1386 	offset = msix->msix_pba_offset + (index / 32) * 4;
1387 	bit = 1 << index % 32;
1388 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1389 }
1390 
1391 /*
1392  * Restore MSI-X registers and table during resume.  If MSI-X is
1393  * enabled then walk the virtual table to restore the actual MSI-X
1394  * table.
1395  */
1396 static void
1397 pci_resume_msix(device_t dev)
1398 {
1399 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1400 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1401 	struct msix_table_entry *mte;
1402 	struct msix_vector *mv;
1403 	int i;
1404 
1405 	if (msix->msix_alloc > 0) {
1406 		/* First, mask all vectors. */
1407 		for (i = 0; i < msix->msix_msgnum; i++)
1408 			pci_mask_msix(dev, i);
1409 
1410 		/* Second, program any messages with at least one handler. */
1411 		for (i = 0; i < msix->msix_table_len; i++) {
1412 			mte = &msix->msix_table[i];
1413 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1414 				continue;
1415 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1416 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1417 			pci_unmask_msix(dev, i);
1418 		}
1419 	}
1420 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1421 	    msix->msix_ctrl, 2);
1422 }
1423 
1424 /*
1425  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1426  * returned in *count.  After this function returns, each message will be
1427  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1428  */
1429 int
1430 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1431 {
1432 	struct pci_devinfo *dinfo = device_get_ivars(child);
1433 	pcicfgregs *cfg = &dinfo->cfg;
1434 	struct resource_list_entry *rle;
1435 	int actual, error, i, irq, max;
1436 
1437 	/* Don't let count == 0 get us into trouble. */
1438 	if (*count == 0)
1439 		return (EINVAL);
1440 
1441 	/* If rid 0 is allocated, then fail. */
1442 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1443 	if (rle != NULL && rle->res != NULL)
1444 		return (ENXIO);
1445 
1446 	/* Already have allocated messages? */
1447 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1448 		return (ENXIO);
1449 
1450 	/* If MSI-X is blacklisted for this system, fail. */
1451 	if (pci_msix_blacklisted())
1452 		return (ENXIO);
1453 
1454 	/* MSI-X capability present? */
1455 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1456 		return (ENODEV);
1457 
1458 	/* Make sure the appropriate BARs are mapped. */
1459 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1460 	    cfg->msix.msix_table_bar);
1461 	if (rle == NULL || rle->res == NULL ||
1462 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1463 		return (ENXIO);
1464 	cfg->msix.msix_table_res = rle->res;
1465 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1466 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1467 		    cfg->msix.msix_pba_bar);
1468 		if (rle == NULL || rle->res == NULL ||
1469 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1470 			return (ENXIO);
1471 	}
1472 	cfg->msix.msix_pba_res = rle->res;
1473 
1474 	if (bootverbose)
1475 		device_printf(child,
1476 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1477 		    *count, cfg->msix.msix_msgnum);
1478 	max = min(*count, cfg->msix.msix_msgnum);
1479 	for (i = 0; i < max; i++) {
1480 		/* Allocate a message. */
1481 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1482 		if (error) {
1483 			if (i == 0)
1484 				return (error);
1485 			break;
1486 		}
1487 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1488 		    irq, 1);
1489 	}
1490 	actual = i;
1491 
1492 	if (bootverbose) {
1493 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1494 		if (actual == 1)
1495 			device_printf(child, "using IRQ %lu for MSI-X\n",
1496 			    rle->start);
1497 		else {
1498 			int run;
1499 
1500 			/*
1501 			 * Be fancy and try to print contiguous runs of
1502 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1503 			 * 'run' is true if we are in a range.
1504 			 */
1505 			device_printf(child, "using IRQs %lu", rle->start);
1506 			irq = rle->start;
1507 			run = 0;
1508 			for (i = 1; i < actual; i++) {
1509 				rle = resource_list_find(&dinfo->resources,
1510 				    SYS_RES_IRQ, i + 1);
1511 
1512 				/* Still in a run? */
1513 				if (rle->start == irq + 1) {
1514 					run = 1;
1515 					irq++;
1516 					continue;
1517 				}
1518 
1519 				/* Finish previous range. */
1520 				if (run) {
1521 					printf("-%d", irq);
1522 					run = 0;
1523 				}
1524 
1525 				/* Start new range. */
1526 				printf(",%lu", rle->start);
1527 				irq = rle->start;
1528 			}
1529 
1530 			/* Unfinished range? */
1531 			if (run)
1532 				printf("-%d", irq);
1533 			printf(" for MSI-X\n");
1534 		}
1535 	}
1536 
1537 	/* Mask all vectors. */
1538 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1539 		pci_mask_msix(child, i);
1540 
1541 	/* Allocate and initialize vector data and virtual table. */
1542 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1543 	    M_DEVBUF, M_WAITOK | M_ZERO);
1544 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1545 	    M_DEVBUF, M_WAITOK | M_ZERO);
1546 	for (i = 0; i < actual; i++) {
1547 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1548 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1549 		cfg->msix.msix_table[i].mte_vector = i + 1;
1550 	}
1551 
1552 	/* Update control register to enable MSI-X. */
1553 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1554 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1555 	    cfg->msix.msix_ctrl, 2);
1556 
1557 	/* Update counts of alloc'd messages. */
1558 	cfg->msix.msix_alloc = actual;
1559 	cfg->msix.msix_table_len = actual;
1560 	*count = actual;
1561 	return (0);
1562 }
1563 
1564 /*
1565  * By default, pci_alloc_msix() will assign the allocated IRQ
1566  * resources consecutively to the first N messages in the MSI-X table.
1567  * However, device drivers may want to use different layouts if they
1568  * either receive fewer messages than they asked for, or they wish to
1569  * populate the MSI-X table sparsely.  This method allows the driver
1570  * to specify what layout it wants.  It must be called after a
1571  * successful pci_alloc_msix() but before any of the associated
1572  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1573  *
1574  * The 'vectors' array contains 'count' message vectors.  The array
1575  * maps directly to the MSI-X table in that index 0 in the array
1576  * specifies the vector for the first message in the MSI-X table, etc.
1577  * The vector value in each array index can either be 0 to indicate
1578  * that no vector should be assigned to a message slot, or it can be a
1579  * number from 1 to N (where N is the count returned from a
1580  * succcessful call to pci_alloc_msix()) to indicate which message
1581  * vector (IRQ) to be used for the corresponding message.
1582  *
1583  * On successful return, each message with a non-zero vector will have
1584  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1585  * 1.  Additionally, if any of the IRQs allocated via the previous
1586  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1587  * will be freed back to the system automatically.
1588  *
1589  * For example, suppose a driver has a MSI-X table with 6 messages and
1590  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1591  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1592  * C.  After the call to pci_alloc_msix(), the device will be setup to
1593  * have an MSI-X table of ABC--- (where - means no vector assigned).
1594  * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1595  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1596  * be freed back to the system.  This device will also have valid
1597  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1598  *
1599  * In any case, the SYS_RES_IRQ rid X will always map to the message
1600  * at MSI-X table index X - 1 and will only be valid if a vector is
1601  * assigned to that table entry.
1602  */
1603 int
1604 pci_remap_msix_method(device_t dev, device_t child, int count,
1605     const u_int *vectors)
1606 {
1607 	struct pci_devinfo *dinfo = device_get_ivars(child);
1608 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1609 	struct resource_list_entry *rle;
1610 	int i, irq, j, *used;
1611 
1612 	/*
1613 	 * Have to have at least one message in the table but the
1614 	 * table can't be bigger than the actual MSI-X table in the
1615 	 * device.
1616 	 */
1617 	if (count == 0 || count > msix->msix_msgnum)
1618 		return (EINVAL);
1619 
1620 	/* Sanity check the vectors. */
1621 	for (i = 0; i < count; i++)
1622 		if (vectors[i] > msix->msix_alloc)
1623 			return (EINVAL);
1624 
1625 	/*
1626 	 * Make sure there aren't any holes in the vectors to be used.
1627 	 * It's a big pain to support it, and it doesn't really make
1628 	 * sense anyway.  Also, at least one vector must be used.
1629 	 */
1630 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1631 	    M_ZERO);
1632 	for (i = 0; i < count; i++)
1633 		if (vectors[i] != 0)
1634 			used[vectors[i] - 1] = 1;
1635 	for (i = 0; i < msix->msix_alloc - 1; i++)
1636 		if (used[i] == 0 && used[i + 1] == 1) {
1637 			free(used, M_DEVBUF);
1638 			return (EINVAL);
1639 		}
1640 	if (used[0] != 1) {
1641 		free(used, M_DEVBUF);
1642 		return (EINVAL);
1643 	}
1644 
1645 	/* Make sure none of the resources are allocated. */
1646 	for (i = 0; i < msix->msix_table_len; i++) {
1647 		if (msix->msix_table[i].mte_vector == 0)
1648 			continue;
1649 		if (msix->msix_table[i].mte_handlers > 0)
1650 			return (EBUSY);
1651 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1652 		KASSERT(rle != NULL, ("missing resource"));
1653 		if (rle->res != NULL)
1654 			return (EBUSY);
1655 	}
1656 
1657 	/* Free the existing resource list entries. */
1658 	for (i = 0; i < msix->msix_table_len; i++) {
1659 		if (msix->msix_table[i].mte_vector == 0)
1660 			continue;
1661 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1662 	}
1663 
1664 	/*
1665 	 * Build the new virtual table keeping track of which vectors are
1666 	 * used.
1667 	 */
1668 	free(msix->msix_table, M_DEVBUF);
1669 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1670 	    M_DEVBUF, M_WAITOK | M_ZERO);
1671 	for (i = 0; i < count; i++)
1672 		msix->msix_table[i].mte_vector = vectors[i];
1673 	msix->msix_table_len = count;
1674 
1675 	/* Free any unused IRQs and resize the vectors array if necessary. */
1676 	j = msix->msix_alloc - 1;
1677 	if (used[j] == 0) {
1678 		struct msix_vector *vec;
1679 
1680 		while (used[j] == 0) {
1681 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1682 			    msix->msix_vectors[j].mv_irq);
1683 			j--;
1684 		}
1685 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1686 		    M_WAITOK);
1687 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1688 		    (j + 1));
1689 		free(msix->msix_vectors, M_DEVBUF);
1690 		msix->msix_vectors = vec;
1691 		msix->msix_alloc = j + 1;
1692 	}
1693 	free(used, M_DEVBUF);
1694 
1695 	/* Map the IRQs onto the rids. */
1696 	for (i = 0; i < count; i++) {
1697 		if (vectors[i] == 0)
1698 			continue;
1699 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1700 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1701 		    irq, 1);
1702 	}
1703 
1704 	if (bootverbose) {
1705 		device_printf(child, "Remapped MSI-X IRQs as: ");
1706 		for (i = 0; i < count; i++) {
1707 			if (i != 0)
1708 				printf(", ");
1709 			if (vectors[i] == 0)
1710 				printf("---");
1711 			else
1712 				printf("%d",
1713 				    msix->msix_vectors[vectors[i]].mv_irq);
1714 		}
1715 		printf("\n");
1716 	}
1717 
1718 	return (0);
1719 }
1720 
1721 static int
1722 pci_release_msix(device_t dev, device_t child)
1723 {
1724 	struct pci_devinfo *dinfo = device_get_ivars(child);
1725 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1726 	struct resource_list_entry *rle;
1727 	int i;
1728 
1729 	/* Do we have any messages to release? */
1730 	if (msix->msix_alloc == 0)
1731 		return (ENODEV);
1732 
1733 	/* Make sure none of the resources are allocated. */
1734 	for (i = 0; i < msix->msix_table_len; i++) {
1735 		if (msix->msix_table[i].mte_vector == 0)
1736 			continue;
1737 		if (msix->msix_table[i].mte_handlers > 0)
1738 			return (EBUSY);
1739 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1740 		KASSERT(rle != NULL, ("missing resource"));
1741 		if (rle->res != NULL)
1742 			return (EBUSY);
1743 	}
1744 
1745 	/* Update control register to disable MSI-X. */
1746 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1747 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1748 	    msix->msix_ctrl, 2);
1749 
1750 	/* Free the resource list entries. */
1751 	for (i = 0; i < msix->msix_table_len; i++) {
1752 		if (msix->msix_table[i].mte_vector == 0)
1753 			continue;
1754 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1755 	}
1756 	free(msix->msix_table, M_DEVBUF);
1757 	msix->msix_table_len = 0;
1758 
1759 	/* Release the IRQs. */
1760 	for (i = 0; i < msix->msix_alloc; i++)
1761 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1762 		    msix->msix_vectors[i].mv_irq);
1763 	free(msix->msix_vectors, M_DEVBUF);
1764 	msix->msix_alloc = 0;
1765 	return (0);
1766 }
1767 
1768 /*
1769  * Return the max supported MSI-X messages this device supports.
1770  * Basically, assuming the MD code can alloc messages, this function
1771  * should return the maximum value that pci_alloc_msix() can return.
1772  * Thus, it is subject to the tunables, etc.
1773  */
1774 int
1775 pci_msix_count_method(device_t dev, device_t child)
1776 {
1777 	struct pci_devinfo *dinfo = device_get_ivars(child);
1778 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1779 
1780 	if (pci_do_msix && msix->msix_location != 0)
1781 		return (msix->msix_msgnum);
1782 	return (0);
1783 }
1784 
1785 /*
1786  * HyperTransport MSI mapping control
1787  */
1788 void
1789 pci_ht_map_msi(device_t dev, uint64_t addr)
1790 {
1791 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1792 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1793 
1794 	if (!ht->ht_msimap)
1795 		return;
1796 
1797 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1798 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1799 		/* Enable MSI -> HT mapping. */
1800 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1801 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1802 		    ht->ht_msictrl, 2);
1803 	}
1804 
1805 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1806 		/* Disable MSI -> HT mapping. */
1807 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1808 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1809 		    ht->ht_msictrl, 2);
1810 	}
1811 }
1812 
1813 int
1814 pci_get_max_read_req(device_t dev)
1815 {
1816 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1817 	int cap;
1818 	uint16_t val;
1819 
1820 	cap = dinfo->cfg.pcie.pcie_location;
1821 	if (cap == 0)
1822 		return (0);
1823 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1824 	val &= PCIEM_CTL_MAX_READ_REQUEST;
1825 	val >>= 12;
1826 	return (1 << (val + 7));
1827 }
1828 
1829 int
1830 pci_set_max_read_req(device_t dev, int size)
1831 {
1832 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1833 	int cap;
1834 	uint16_t val;
1835 
1836 	cap = dinfo->cfg.pcie.pcie_location;
1837 	if (cap == 0)
1838 		return (0);
1839 	if (size < 128)
1840 		size = 128;
1841 	if (size > 4096)
1842 		size = 4096;
1843 	size = (1 << (fls(size) - 1));
1844 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1845 	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
1846 	val |= (fls(size) - 8) << 12;
1847 	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
1848 	return (size);
1849 }
1850 
1851 /*
1852  * Support for MSI message signalled interrupts.
1853  */
1854 void
1855 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1856 {
1857 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1858 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1859 
1860 	/* Write data and address values. */
1861 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1862 	    address & 0xffffffff, 4);
1863 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1864 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1865 		    address >> 32, 4);
1866 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1867 		    data, 2);
1868 	} else
1869 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1870 		    2);
1871 
1872 	/* Enable MSI in the control register. */
1873 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1874 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1875 	    2);
1876 
1877 	/* Enable MSI -> HT mapping. */
1878 	pci_ht_map_msi(dev, address);
1879 }
1880 
1881 void
1882 pci_disable_msi(device_t dev)
1883 {
1884 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1885 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1886 
1887 	/* Disable MSI -> HT mapping. */
1888 	pci_ht_map_msi(dev, 0);
1889 
1890 	/* Disable MSI in the control register. */
1891 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1892 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1893 	    2);
1894 }
1895 
1896 /*
1897  * Restore MSI registers during resume.  If MSI is enabled then
1898  * restore the data and address registers in addition to the control
1899  * register.
1900  */
1901 static void
1902 pci_resume_msi(device_t dev)
1903 {
1904 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1905 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1906 	uint64_t address;
1907 	uint16_t data;
1908 
1909 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1910 		address = msi->msi_addr;
1911 		data = msi->msi_data;
1912 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1913 		    address & 0xffffffff, 4);
1914 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1915 			pci_write_config(dev, msi->msi_location +
1916 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1917 			pci_write_config(dev, msi->msi_location +
1918 			    PCIR_MSI_DATA_64BIT, data, 2);
1919 		} else
1920 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1921 			    data, 2);
1922 	}
1923 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1924 	    2);
1925 }
1926 
1927 static int
1928 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1929 {
1930 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1931 	pcicfgregs *cfg = &dinfo->cfg;
1932 	struct resource_list_entry *rle;
1933 	struct msix_table_entry *mte;
1934 	struct msix_vector *mv;
1935 	uint64_t addr;
1936 	uint32_t data;
1937 	int error, i, j;
1938 
1939 	/*
1940 	 * Handle MSI first.  We try to find this IRQ among our list
1941 	 * of MSI IRQs.  If we find it, we request updated address and
1942 	 * data registers and apply the results.
1943 	 */
1944 	if (cfg->msi.msi_alloc > 0) {
1945 
1946 		/* If we don't have any active handlers, nothing to do. */
1947 		if (cfg->msi.msi_handlers == 0)
1948 			return (0);
1949 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1950 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1951 			    i + 1);
1952 			if (rle->start == irq) {
1953 				error = PCIB_MAP_MSI(device_get_parent(bus),
1954 				    dev, irq, &addr, &data);
1955 				if (error)
1956 					return (error);
1957 				pci_disable_msi(dev);
1958 				dinfo->cfg.msi.msi_addr = addr;
1959 				dinfo->cfg.msi.msi_data = data;
1960 				pci_enable_msi(dev, addr, data);
1961 				return (0);
1962 			}
1963 		}
1964 		return (ENOENT);
1965 	}
1966 
1967 	/*
1968 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1969 	 * we request the updated mapping info.  If that works, we go
1970 	 * through all the slots that use this IRQ and update them.
1971 	 */
1972 	if (cfg->msix.msix_alloc > 0) {
1973 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1974 			mv = &cfg->msix.msix_vectors[i];
1975 			if (mv->mv_irq == irq) {
1976 				error = PCIB_MAP_MSI(device_get_parent(bus),
1977 				    dev, irq, &addr, &data);
1978 				if (error)
1979 					return (error);
1980 				mv->mv_address = addr;
1981 				mv->mv_data = data;
1982 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1983 					mte = &cfg->msix.msix_table[j];
1984 					if (mte->mte_vector != i + 1)
1985 						continue;
1986 					if (mte->mte_handlers == 0)
1987 						continue;
1988 					pci_mask_msix(dev, j);
1989 					pci_enable_msix(dev, j, addr, data);
1990 					pci_unmask_msix(dev, j);
1991 				}
1992 			}
1993 		}
1994 		return (ENOENT);
1995 	}
1996 
1997 	return (ENOENT);
1998 }
1999 
2000 /*
2001  * Returns true if the specified device is blacklisted because MSI
2002  * doesn't work.
2003  */
2004 int
2005 pci_msi_device_blacklisted(device_t dev)
2006 {
2007 
2008 	if (!pci_honor_msi_blacklist)
2009 		return (0);
2010 
2011 	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2012 }
2013 
2014 /*
2015  * Determine if MSI is blacklisted globally on this system.  Currently,
2016  * we just check for blacklisted chipsets as represented by the
2017  * host-PCI bridge at device 0:0:0.  In the future, it may become
2018  * necessary to check other system attributes, such as the kenv values
2019  * that give the motherboard manufacturer and model number.
2020  */
2021 static int
2022 pci_msi_blacklisted(void)
2023 {
2024 	device_t dev;
2025 
2026 	if (!pci_honor_msi_blacklist)
2027 		return (0);
2028 
2029 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2030 	if (!(pcie_chipset || pcix_chipset)) {
2031 		if (vm_guest != VM_GUEST_NO) {
2032 			/*
2033 			 * Whitelist older chipsets in virtual
2034 			 * machines known to support MSI.
2035 			 */
2036 			dev = pci_find_bsf(0, 0, 0);
2037 			if (dev != NULL)
2038 				return (!pci_has_quirk(pci_get_devid(dev),
2039 					PCI_QUIRK_ENABLE_MSI_VM));
2040 		}
2041 		return (1);
2042 	}
2043 
2044 	dev = pci_find_bsf(0, 0, 0);
2045 	if (dev != NULL)
2046 		return (pci_msi_device_blacklisted(dev));
2047 	return (0);
2048 }
2049 
2050 /*
2051  * Returns true if the specified device is blacklisted because MSI-X
2052  * doesn't work.  Note that this assumes that if MSI doesn't work,
2053  * MSI-X doesn't either.
2054  */
2055 int
2056 pci_msix_device_blacklisted(device_t dev)
2057 {
2058 
2059 	if (!pci_honor_msi_blacklist)
2060 		return (0);
2061 
2062 	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2063 		return (1);
2064 
2065 	return (pci_msi_device_blacklisted(dev));
2066 }
2067 
2068 /*
2069  * Determine if MSI-X is blacklisted globally on this system.  If MSI
2070  * is blacklisted, assume that MSI-X is as well.  Check for additional
2071  * chipsets where MSI works but MSI-X does not.
2072  */
2073 static int
2074 pci_msix_blacklisted(void)
2075 {
2076 	device_t dev;
2077 
2078 	if (!pci_honor_msi_blacklist)
2079 		return (0);
2080 
2081 	dev = pci_find_bsf(0, 0, 0);
2082 	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2083 	    PCI_QUIRK_DISABLE_MSIX))
2084 		return (1);
2085 
2086 	return (pci_msi_blacklisted());
2087 }
2088 
2089 /*
2090  * Attempt to allocate *count MSI messages.  The actual number allocated is
2091  * returned in *count.  After this function returns, each message will be
2092  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2093  */
2094 int
2095 pci_alloc_msi_method(device_t dev, device_t child, int *count)
2096 {
2097 	struct pci_devinfo *dinfo = device_get_ivars(child);
2098 	pcicfgregs *cfg = &dinfo->cfg;
2099 	struct resource_list_entry *rle;
2100 	int actual, error, i, irqs[32];
2101 	uint16_t ctrl;
2102 
2103 	/* Don't let count == 0 get us into trouble. */
2104 	if (*count == 0)
2105 		return (EINVAL);
2106 
2107 	/* If rid 0 is allocated, then fail. */
2108 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2109 	if (rle != NULL && rle->res != NULL)
2110 		return (ENXIO);
2111 
2112 	/* Already have allocated messages? */
2113 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2114 		return (ENXIO);
2115 
2116 	/* If MSI is blacklisted for this system, fail. */
2117 	if (pci_msi_blacklisted())
2118 		return (ENXIO);
2119 
2120 	/* MSI capability present? */
2121 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2122 		return (ENODEV);
2123 
2124 	if (bootverbose)
2125 		device_printf(child,
2126 		    "attempting to allocate %d MSI vectors (%d supported)\n",
2127 		    *count, cfg->msi.msi_msgnum);
2128 
2129 	/* Don't ask for more than the device supports. */
2130 	actual = min(*count, cfg->msi.msi_msgnum);
2131 
2132 	/* Don't ask for more than 32 messages. */
2133 	actual = min(actual, 32);
2134 
2135 	/* MSI requires power of 2 number of messages. */
2136 	if (!powerof2(actual))
2137 		return (EINVAL);
2138 
2139 	for (;;) {
2140 		/* Try to allocate N messages. */
2141 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2142 		    actual, irqs);
2143 		if (error == 0)
2144 			break;
2145 		if (actual == 1)
2146 			return (error);
2147 
2148 		/* Try N / 2. */
2149 		actual >>= 1;
2150 	}
2151 
2152 	/*
2153 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2154 	 * resources in the irqs[] array, so add new resources
2155 	 * starting at rid 1.
2156 	 */
2157 	for (i = 0; i < actual; i++)
2158 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2159 		    irqs[i], irqs[i], 1);
2160 
2161 	if (bootverbose) {
2162 		if (actual == 1)
2163 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2164 		else {
2165 			int run;
2166 
2167 			/*
2168 			 * Be fancy and try to print contiguous runs
2169 			 * of IRQ values as ranges.  'run' is true if
2170 			 * we are in a range.
2171 			 */
2172 			device_printf(child, "using IRQs %d", irqs[0]);
2173 			run = 0;
2174 			for (i = 1; i < actual; i++) {
2175 
2176 				/* Still in a run? */
2177 				if (irqs[i] == irqs[i - 1] + 1) {
2178 					run = 1;
2179 					continue;
2180 				}
2181 
2182 				/* Finish previous range. */
2183 				if (run) {
2184 					printf("-%d", irqs[i - 1]);
2185 					run = 0;
2186 				}
2187 
2188 				/* Start new range. */
2189 				printf(",%d", irqs[i]);
2190 			}
2191 
2192 			/* Unfinished range? */
2193 			if (run)
2194 				printf("-%d", irqs[actual - 1]);
2195 			printf(" for MSI\n");
2196 		}
2197 	}
2198 
2199 	/* Update control register with actual count. */
2200 	ctrl = cfg->msi.msi_ctrl;
2201 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2202 	ctrl |= (ffs(actual) - 1) << 4;
2203 	cfg->msi.msi_ctrl = ctrl;
2204 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2205 
2206 	/* Update counts of alloc'd messages. */
2207 	cfg->msi.msi_alloc = actual;
2208 	cfg->msi.msi_handlers = 0;
2209 	*count = actual;
2210 	return (0);
2211 }
2212 
2213 /* Release the MSI messages associated with this device. */
2214 int
2215 pci_release_msi_method(device_t dev, device_t child)
2216 {
2217 	struct pci_devinfo *dinfo = device_get_ivars(child);
2218 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2219 	struct resource_list_entry *rle;
2220 	int error, i, irqs[32];
2221 
2222 	/* Try MSI-X first. */
2223 	error = pci_release_msix(dev, child);
2224 	if (error != ENODEV)
2225 		return (error);
2226 
2227 	/* Do we have any messages to release? */
2228 	if (msi->msi_alloc == 0)
2229 		return (ENODEV);
2230 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2231 
2232 	/* Make sure none of the resources are allocated. */
2233 	if (msi->msi_handlers > 0)
2234 		return (EBUSY);
2235 	for (i = 0; i < msi->msi_alloc; i++) {
2236 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2237 		KASSERT(rle != NULL, ("missing MSI resource"));
2238 		if (rle->res != NULL)
2239 			return (EBUSY);
2240 		irqs[i] = rle->start;
2241 	}
2242 
2243 	/* Update control register with 0 count. */
2244 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2245 	    ("%s: MSI still enabled", __func__));
2246 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2247 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2248 	    msi->msi_ctrl, 2);
2249 
2250 	/* Release the messages. */
2251 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2252 	for (i = 0; i < msi->msi_alloc; i++)
2253 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2254 
2255 	/* Update alloc count. */
2256 	msi->msi_alloc = 0;
2257 	msi->msi_addr = 0;
2258 	msi->msi_data = 0;
2259 	return (0);
2260 }
2261 
2262 /*
2263  * Return the max supported MSI messages this device supports.
2264  * Basically, assuming the MD code can alloc messages, this function
2265  * should return the maximum value that pci_alloc_msi() can return.
2266  * Thus, it is subject to the tunables, etc.
2267  */
2268 int
2269 pci_msi_count_method(device_t dev, device_t child)
2270 {
2271 	struct pci_devinfo *dinfo = device_get_ivars(child);
2272 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2273 
2274 	if (pci_do_msi && msi->msi_location != 0)
2275 		return (msi->msi_msgnum);
2276 	return (0);
2277 }
2278 
2279 /* free pcicfgregs structure and all depending data structures */
2280 
2281 int
2282 pci_freecfg(struct pci_devinfo *dinfo)
2283 {
2284 	struct devlist *devlist_head;
2285 	struct pci_map *pm, *next;
2286 	int i;
2287 
2288 	devlist_head = &pci_devq;
2289 
2290 	if (dinfo->cfg.vpd.vpd_reg) {
2291 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2292 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2293 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2294 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2295 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2296 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2297 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2298 	}
2299 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2300 		free(pm, M_DEVBUF);
2301 	}
2302 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2303 	free(dinfo, M_DEVBUF);
2304 
2305 	/* increment the generation count */
2306 	pci_generation++;
2307 
2308 	/* we're losing one device */
2309 	pci_numdevs--;
2310 	return (0);
2311 }
2312 
2313 /*
2314  * PCI power manangement
2315  */
2316 int
2317 pci_set_powerstate_method(device_t dev, device_t child, int state)
2318 {
2319 	struct pci_devinfo *dinfo = device_get_ivars(child);
2320 	pcicfgregs *cfg = &dinfo->cfg;
2321 	uint16_t status;
2322 	int result, oldstate, highest, delay;
2323 
2324 	if (cfg->pp.pp_cap == 0)
2325 		return (EOPNOTSUPP);
2326 
2327 	/*
2328 	 * Optimize a no state change request away.  While it would be OK to
2329 	 * write to the hardware in theory, some devices have shown odd
2330 	 * behavior when going from D3 -> D3.
2331 	 */
2332 	oldstate = pci_get_powerstate(child);
2333 	if (oldstate == state)
2334 		return (0);
2335 
2336 	/*
2337 	 * The PCI power management specification states that after a state
2338 	 * transition between PCI power states, system software must
2339 	 * guarantee a minimal delay before the function accesses the device.
2340 	 * Compute the worst case delay that we need to guarantee before we
2341 	 * access the device.  Many devices will be responsive much more
2342 	 * quickly than this delay, but there are some that don't respond
2343 	 * instantly to state changes.  Transitions to/from D3 state require
2344 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2345 	 * is done below with DELAY rather than a sleeper function because
2346 	 * this function can be called from contexts where we cannot sleep.
2347 	 */
2348 	highest = (oldstate > state) ? oldstate : state;
2349 	if (highest == PCI_POWERSTATE_D3)
2350 	    delay = 10000;
2351 	else if (highest == PCI_POWERSTATE_D2)
2352 	    delay = 200;
2353 	else
2354 	    delay = 0;
2355 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2356 	    & ~PCIM_PSTAT_DMASK;
2357 	result = 0;
2358 	switch (state) {
2359 	case PCI_POWERSTATE_D0:
2360 		status |= PCIM_PSTAT_D0;
2361 		break;
2362 	case PCI_POWERSTATE_D1:
2363 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2364 			return (EOPNOTSUPP);
2365 		status |= PCIM_PSTAT_D1;
2366 		break;
2367 	case PCI_POWERSTATE_D2:
2368 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2369 			return (EOPNOTSUPP);
2370 		status |= PCIM_PSTAT_D2;
2371 		break;
2372 	case PCI_POWERSTATE_D3:
2373 		status |= PCIM_PSTAT_D3;
2374 		break;
2375 	default:
2376 		return (EINVAL);
2377 	}
2378 
2379 	if (bootverbose)
2380 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2381 		    state);
2382 
2383 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2384 	if (delay)
2385 		DELAY(delay);
2386 	return (0);
2387 }
2388 
2389 int
2390 pci_get_powerstate_method(device_t dev, device_t child)
2391 {
2392 	struct pci_devinfo *dinfo = device_get_ivars(child);
2393 	pcicfgregs *cfg = &dinfo->cfg;
2394 	uint16_t status;
2395 	int result;
2396 
2397 	if (cfg->pp.pp_cap != 0) {
2398 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2399 		switch (status & PCIM_PSTAT_DMASK) {
2400 		case PCIM_PSTAT_D0:
2401 			result = PCI_POWERSTATE_D0;
2402 			break;
2403 		case PCIM_PSTAT_D1:
2404 			result = PCI_POWERSTATE_D1;
2405 			break;
2406 		case PCIM_PSTAT_D2:
2407 			result = PCI_POWERSTATE_D2;
2408 			break;
2409 		case PCIM_PSTAT_D3:
2410 			result = PCI_POWERSTATE_D3;
2411 			break;
2412 		default:
2413 			result = PCI_POWERSTATE_UNKNOWN;
2414 			break;
2415 		}
2416 	} else {
2417 		/* No support, device is always at D0 */
2418 		result = PCI_POWERSTATE_D0;
2419 	}
2420 	return (result);
2421 }
2422 
2423 /*
2424  * Some convenience functions for PCI device drivers.
2425  */
2426 
2427 static __inline void
2428 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2429 {
2430 	uint16_t	command;
2431 
2432 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2433 	command |= bit;
2434 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2435 }
2436 
2437 static __inline void
2438 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2439 {
2440 	uint16_t	command;
2441 
2442 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2443 	command &= ~bit;
2444 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2445 }
2446 
2447 int
2448 pci_enable_busmaster_method(device_t dev, device_t child)
2449 {
2450 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2451 	return (0);
2452 }
2453 
2454 int
2455 pci_disable_busmaster_method(device_t dev, device_t child)
2456 {
2457 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2458 	return (0);
2459 }
2460 
2461 int
2462 pci_enable_io_method(device_t dev, device_t child, int space)
2463 {
2464 	uint16_t bit;
2465 
2466 	switch(space) {
2467 	case SYS_RES_IOPORT:
2468 		bit = PCIM_CMD_PORTEN;
2469 		break;
2470 	case SYS_RES_MEMORY:
2471 		bit = PCIM_CMD_MEMEN;
2472 		break;
2473 	default:
2474 		return (EINVAL);
2475 	}
2476 	pci_set_command_bit(dev, child, bit);
2477 	return (0);
2478 }
2479 
2480 int
2481 pci_disable_io_method(device_t dev, device_t child, int space)
2482 {
2483 	uint16_t bit;
2484 
2485 	switch(space) {
2486 	case SYS_RES_IOPORT:
2487 		bit = PCIM_CMD_PORTEN;
2488 		break;
2489 	case SYS_RES_MEMORY:
2490 		bit = PCIM_CMD_MEMEN;
2491 		break;
2492 	default:
2493 		return (EINVAL);
2494 	}
2495 	pci_clear_command_bit(dev, child, bit);
2496 	return (0);
2497 }
2498 
2499 /*
2500  * New style pci driver.  Parent device is either a pci-host-bridge or a
2501  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2502  */
2503 
2504 void
2505 pci_print_verbose(struct pci_devinfo *dinfo)
2506 {
2507 
2508 	if (bootverbose) {
2509 		pcicfgregs *cfg = &dinfo->cfg;
2510 
2511 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2512 		    cfg->vendor, cfg->device, cfg->revid);
2513 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2514 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2515 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2516 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2517 		    cfg->mfdev);
2518 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2519 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2520 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2521 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2522 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2523 		if (cfg->intpin > 0)
2524 			printf("\tintpin=%c, irq=%d\n",
2525 			    cfg->intpin +'a' -1, cfg->intline);
2526 		if (cfg->pp.pp_cap) {
2527 			uint16_t status;
2528 
2529 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2530 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2531 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2532 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2533 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2534 			    status & PCIM_PSTAT_DMASK);
2535 		}
2536 		if (cfg->msi.msi_location) {
2537 			int ctrl;
2538 
2539 			ctrl = cfg->msi.msi_ctrl;
2540 			printf("\tMSI supports %d message%s%s%s\n",
2541 			    cfg->msi.msi_msgnum,
2542 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2543 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2544 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2545 		}
2546 		if (cfg->msix.msix_location) {
2547 			printf("\tMSI-X supports %d message%s ",
2548 			    cfg->msix.msix_msgnum,
2549 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2550 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2551 				printf("in map 0x%x\n",
2552 				    cfg->msix.msix_table_bar);
2553 			else
2554 				printf("in maps 0x%x and 0x%x\n",
2555 				    cfg->msix.msix_table_bar,
2556 				    cfg->msix.msix_pba_bar);
2557 		}
2558 	}
2559 }
2560 
2561 static int
2562 pci_porten(device_t dev)
2563 {
2564 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2565 }
2566 
2567 static int
2568 pci_memen(device_t dev)
2569 {
2570 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2571 }
2572 
2573 static void
2574 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2575 {
2576 	struct pci_devinfo *dinfo;
2577 	pci_addr_t map, testval;
2578 	int ln2range;
2579 	uint16_t cmd;
2580 
2581 	/*
2582 	 * The device ROM BAR is special.  It is always a 32-bit
2583 	 * memory BAR.  Bit 0 is special and should not be set when
2584 	 * sizing the BAR.
2585 	 */
2586 	dinfo = device_get_ivars(dev);
2587 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2588 		map = pci_read_config(dev, reg, 4);
2589 		pci_write_config(dev, reg, 0xfffffffe, 4);
2590 		testval = pci_read_config(dev, reg, 4);
2591 		pci_write_config(dev, reg, map, 4);
2592 		*mapp = map;
2593 		*testvalp = testval;
2594 		return;
2595 	}
2596 
2597 	map = pci_read_config(dev, reg, 4);
2598 	ln2range = pci_maprange(map);
2599 	if (ln2range == 64)
2600 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2601 
2602 	/*
2603 	 * Disable decoding via the command register before
2604 	 * determining the BAR's length since we will be placing it in
2605 	 * a weird state.
2606 	 */
2607 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2608 	pci_write_config(dev, PCIR_COMMAND,
2609 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2610 
2611 	/*
2612 	 * Determine the BAR's length by writing all 1's.  The bottom
2613 	 * log_2(size) bits of the BAR will stick as 0 when we read
2614 	 * the value back.
2615 	 */
2616 	pci_write_config(dev, reg, 0xffffffff, 4);
2617 	testval = pci_read_config(dev, reg, 4);
2618 	if (ln2range == 64) {
2619 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2620 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2621 	}
2622 
2623 	/*
2624 	 * Restore the original value of the BAR.  We may have reprogrammed
2625 	 * the BAR of the low-level console device and when booting verbose,
2626 	 * we need the console device addressable.
2627 	 */
2628 	pci_write_config(dev, reg, map, 4);
2629 	if (ln2range == 64)
2630 		pci_write_config(dev, reg + 4, map >> 32, 4);
2631 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2632 
2633 	*mapp = map;
2634 	*testvalp = testval;
2635 }
2636 
2637 static void
2638 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2639 {
2640 	struct pci_devinfo *dinfo;
2641 	int ln2range;
2642 
2643 	/* The device ROM BAR is always a 32-bit memory BAR. */
2644 	dinfo = device_get_ivars(dev);
2645 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2646 		ln2range = 32;
2647 	else
2648 		ln2range = pci_maprange(pm->pm_value);
2649 	pci_write_config(dev, pm->pm_reg, base, 4);
2650 	if (ln2range == 64)
2651 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2652 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2653 	if (ln2range == 64)
2654 		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2655 		    pm->pm_reg + 4, 4) << 32;
2656 }
2657 
2658 struct pci_map *
2659 pci_find_bar(device_t dev, int reg)
2660 {
2661 	struct pci_devinfo *dinfo;
2662 	struct pci_map *pm;
2663 
2664 	dinfo = device_get_ivars(dev);
2665 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2666 		if (pm->pm_reg == reg)
2667 			return (pm);
2668 	}
2669 	return (NULL);
2670 }
2671 
2672 int
2673 pci_bar_enabled(device_t dev, struct pci_map *pm)
2674 {
2675 	struct pci_devinfo *dinfo;
2676 	uint16_t cmd;
2677 
2678 	dinfo = device_get_ivars(dev);
2679 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2680 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2681 		return (0);
2682 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2683 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2684 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2685 	else
2686 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2687 }
2688 
2689 static struct pci_map *
2690 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2691 {
2692 	struct pci_devinfo *dinfo;
2693 	struct pci_map *pm, *prev;
2694 
2695 	dinfo = device_get_ivars(dev);
2696 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2697 	pm->pm_reg = reg;
2698 	pm->pm_value = value;
2699 	pm->pm_size = size;
2700 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2701 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2702 		    reg));
2703 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2704 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2705 			break;
2706 	}
2707 	if (prev != NULL)
2708 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2709 	else
2710 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2711 	return (pm);
2712 }
2713 
2714 static void
2715 pci_restore_bars(device_t dev)
2716 {
2717 	struct pci_devinfo *dinfo;
2718 	struct pci_map *pm;
2719 	int ln2range;
2720 
2721 	dinfo = device_get_ivars(dev);
2722 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2723 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2724 			ln2range = 32;
2725 		else
2726 			ln2range = pci_maprange(pm->pm_value);
2727 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2728 		if (ln2range == 64)
2729 			pci_write_config(dev, pm->pm_reg + 4,
2730 			    pm->pm_value >> 32, 4);
2731 	}
2732 }
2733 
2734 /*
2735  * Add a resource based on a pci map register. Return 1 if the map
2736  * register is a 32bit map register or 2 if it is a 64bit register.
2737  */
2738 static int
2739 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2740     int force, int prefetch)
2741 {
2742 	struct pci_map *pm;
2743 	pci_addr_t base, map, testval;
2744 	pci_addr_t start, end, count;
2745 	int barlen, basezero, maprange, mapsize, type;
2746 	uint16_t cmd;
2747 	struct resource *res;
2748 
2749 	/*
2750 	 * The BAR may already exist if the device is a CardBus card
2751 	 * whose CIS is stored in this BAR.
2752 	 */
2753 	pm = pci_find_bar(dev, reg);
2754 	if (pm != NULL) {
2755 		maprange = pci_maprange(pm->pm_value);
2756 		barlen = maprange == 64 ? 2 : 1;
2757 		return (barlen);
2758 	}
2759 
2760 	pci_read_bar(dev, reg, &map, &testval);
2761 	if (PCI_BAR_MEM(map)) {
2762 		type = SYS_RES_MEMORY;
2763 		if (map & PCIM_BAR_MEM_PREFETCH)
2764 			prefetch = 1;
2765 	} else
2766 		type = SYS_RES_IOPORT;
2767 	mapsize = pci_mapsize(testval);
2768 	base = pci_mapbase(map);
2769 #ifdef __PCI_BAR_ZERO_VALID
2770 	basezero = 0;
2771 #else
2772 	basezero = base == 0;
2773 #endif
2774 	maprange = pci_maprange(map);
2775 	barlen = maprange == 64 ? 2 : 1;
2776 
2777 	/*
2778 	 * For I/O registers, if bottom bit is set, and the next bit up
2779 	 * isn't clear, we know we have a BAR that doesn't conform to the
2780 	 * spec, so ignore it.  Also, sanity check the size of the data
2781 	 * areas to the type of memory involved.  Memory must be at least
2782 	 * 16 bytes in size, while I/O ranges must be at least 4.
2783 	 */
2784 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2785 		return (barlen);
2786 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2787 	    (type == SYS_RES_IOPORT && mapsize < 2))
2788 		return (barlen);
2789 
2790 	/* Save a record of this BAR. */
2791 	pm = pci_add_bar(dev, reg, map, mapsize);
2792 	if (bootverbose) {
2793 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2794 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2795 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2796 			printf(", port disabled\n");
2797 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2798 			printf(", memory disabled\n");
2799 		else
2800 			printf(", enabled\n");
2801 	}
2802 
2803 	/*
2804 	 * If base is 0, then we have problems if this architecture does
2805 	 * not allow that.  It is best to ignore such entries for the
2806 	 * moment.  These will be allocated later if the driver specifically
2807 	 * requests them.  However, some removable busses look better when
2808 	 * all resources are allocated, so allow '0' to be overriden.
2809 	 *
2810 	 * Similarly treat maps whose values is the same as the test value
2811 	 * read back.  These maps have had all f's written to them by the
2812 	 * BIOS in an attempt to disable the resources.
2813 	 */
2814 	if (!force && (basezero || map == testval))
2815 		return (barlen);
2816 	if ((u_long)base != base) {
2817 		device_printf(bus,
2818 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2819 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2820 		    pci_get_function(dev), reg);
2821 		return (barlen);
2822 	}
2823 
2824 	/*
2825 	 * This code theoretically does the right thing, but has
2826 	 * undesirable side effects in some cases where peripherals
2827 	 * respond oddly to having these bits enabled.  Let the user
2828 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2829 	 * default).
2830 	 */
2831 	if (pci_enable_io_modes) {
2832 		/* Turn on resources that have been left off by a lazy BIOS */
2833 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2834 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2835 			cmd |= PCIM_CMD_PORTEN;
2836 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2837 		}
2838 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2839 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2840 			cmd |= PCIM_CMD_MEMEN;
2841 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2842 		}
2843 	} else {
2844 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2845 			return (barlen);
2846 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2847 			return (barlen);
2848 	}
2849 
2850 	count = (pci_addr_t)1 << mapsize;
2851 	if (basezero || base == pci_mapbase(testval)) {
2852 		start = 0;	/* Let the parent decide. */
2853 		end = ~0ul;
2854 	} else {
2855 		start = base;
2856 		end = base + count - 1;
2857 	}
2858 	resource_list_add(rl, type, reg, start, end, count);
2859 
2860 	/*
2861 	 * Try to allocate the resource for this BAR from our parent
2862 	 * so that this resource range is already reserved.  The
2863 	 * driver for this device will later inherit this resource in
2864 	 * pci_alloc_resource().
2865 	 */
2866 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2867 	    prefetch ? RF_PREFETCHABLE : 0);
2868 	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0ul)) {
2869 		/*
2870 		 * If the allocation fails, try to allocate a resource for
2871 		 * this BAR using any available range.  The firmware felt
2872 		 * it was important enough to assign a resource, so don't
2873 		 * disable decoding if we can help it.
2874 		 */
2875 		resource_list_delete(rl, type, reg);
2876 		resource_list_add(rl, type, reg, 0, ~0ul, count);
2877 		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0ul,
2878 		    count, prefetch ? RF_PREFETCHABLE : 0);
2879 	}
2880 	if (res == NULL) {
2881 		/*
2882 		 * If the allocation fails, delete the resource list entry
2883 		 * and disable decoding for this device.
2884 		 *
2885 		 * If the driver requests this resource in the future,
2886 		 * pci_reserve_map() will try to allocate a fresh
2887 		 * resource range.
2888 		 */
2889 		resource_list_delete(rl, type, reg);
2890 		pci_disable_io(dev, type);
2891 		if (bootverbose)
2892 			device_printf(bus,
2893 			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
2894 			    pci_get_domain(dev), pci_get_bus(dev),
2895 			    pci_get_slot(dev), pci_get_function(dev), reg);
2896 	} else {
2897 		start = rman_get_start(res);
2898 		pci_write_bar(dev, pm, start);
2899 	}
2900 	return (barlen);
2901 }
2902 
2903 /*
2904  * For ATA devices we need to decide early what addressing mode to use.
2905  * Legacy demands that the primary and secondary ATA ports sits on the
2906  * same addresses that old ISA hardware did. This dictates that we use
2907  * those addresses and ignore the BAR's if we cannot set PCI native
2908  * addressing mode.
2909  */
2910 static void
2911 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2912     uint32_t prefetchmask)
2913 {
2914 	struct resource *r;
2915 	int rid, type, progif;
2916 #if 0
2917 	/* if this device supports PCI native addressing use it */
2918 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2919 	if ((progif & 0x8a) == 0x8a) {
2920 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2921 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2922 			printf("Trying ATA native PCI addressing mode\n");
2923 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2924 		}
2925 	}
2926 #endif
2927 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2928 	type = SYS_RES_IOPORT;
2929 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2930 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2931 		    prefetchmask & (1 << 0));
2932 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2933 		    prefetchmask & (1 << 1));
2934 	} else {
2935 		rid = PCIR_BAR(0);
2936 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2937 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2938 		    0x1f7, 8, 0);
2939 		rid = PCIR_BAR(1);
2940 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2941 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2942 		    0x3f6, 1, 0);
2943 	}
2944 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2945 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2946 		    prefetchmask & (1 << 2));
2947 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2948 		    prefetchmask & (1 << 3));
2949 	} else {
2950 		rid = PCIR_BAR(2);
2951 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2952 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2953 		    0x177, 8, 0);
2954 		rid = PCIR_BAR(3);
2955 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2956 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2957 		    0x376, 1, 0);
2958 	}
2959 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2960 	    prefetchmask & (1 << 4));
2961 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2962 	    prefetchmask & (1 << 5));
2963 }
2964 
2965 static void
2966 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2967 {
2968 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2969 	pcicfgregs *cfg = &dinfo->cfg;
2970 	char tunable_name[64];
2971 	int irq;
2972 
2973 	/* Has to have an intpin to have an interrupt. */
2974 	if (cfg->intpin == 0)
2975 		return;
2976 
2977 	/* Let the user override the IRQ with a tunable. */
2978 	irq = PCI_INVALID_IRQ;
2979 	snprintf(tunable_name, sizeof(tunable_name),
2980 	    "hw.pci%d.%d.%d.INT%c.irq",
2981 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2982 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2983 		irq = PCI_INVALID_IRQ;
2984 
2985 	/*
2986 	 * If we didn't get an IRQ via the tunable, then we either use the
2987 	 * IRQ value in the intline register or we ask the bus to route an
2988 	 * interrupt for us.  If force_route is true, then we only use the
2989 	 * value in the intline register if the bus was unable to assign an
2990 	 * IRQ.
2991 	 */
2992 	if (!PCI_INTERRUPT_VALID(irq)) {
2993 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2994 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2995 		if (!PCI_INTERRUPT_VALID(irq))
2996 			irq = cfg->intline;
2997 	}
2998 
2999 	/* If after all that we don't have an IRQ, just bail. */
3000 	if (!PCI_INTERRUPT_VALID(irq))
3001 		return;
3002 
3003 	/* Update the config register if it changed. */
3004 	if (irq != cfg->intline) {
3005 		cfg->intline = irq;
3006 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
3007 	}
3008 
3009 	/* Add this IRQ as rid 0 interrupt resource. */
3010 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3011 }
3012 
3013 /* Perform early OHCI takeover from SMM. */
3014 static void
3015 ohci_early_takeover(device_t self)
3016 {
3017 	struct resource *res;
3018 	uint32_t ctl;
3019 	int rid;
3020 	int i;
3021 
3022 	rid = PCIR_BAR(0);
3023 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3024 	if (res == NULL)
3025 		return;
3026 
3027 	ctl = bus_read_4(res, OHCI_CONTROL);
3028 	if (ctl & OHCI_IR) {
3029 		if (bootverbose)
3030 			printf("ohci early: "
3031 			    "SMM active, request owner change\n");
3032 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3033 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3034 			DELAY(1000);
3035 			ctl = bus_read_4(res, OHCI_CONTROL);
3036 		}
3037 		if (ctl & OHCI_IR) {
3038 			if (bootverbose)
3039 				printf("ohci early: "
3040 				    "SMM does not respond, resetting\n");
3041 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3042 		}
3043 		/* Disable interrupts */
3044 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3045 	}
3046 
3047 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3048 }
3049 
3050 /* Perform early UHCI takeover from SMM. */
3051 static void
3052 uhci_early_takeover(device_t self)
3053 {
3054 	struct resource *res;
3055 	int rid;
3056 
3057 	/*
3058 	 * Set the PIRQD enable bit and switch off all the others. We don't
3059 	 * want legacy support to interfere with us XXX Does this also mean
3060 	 * that the BIOS won't touch the keyboard anymore if it is connected
3061 	 * to the ports of the root hub?
3062 	 */
3063 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3064 
3065 	/* Disable interrupts */
3066 	rid = PCI_UHCI_BASE_REG;
3067 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3068 	if (res != NULL) {
3069 		bus_write_2(res, UHCI_INTR, 0);
3070 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3071 	}
3072 }
3073 
3074 /* Perform early EHCI takeover from SMM. */
3075 static void
3076 ehci_early_takeover(device_t self)
3077 {
3078 	struct resource *res;
3079 	uint32_t cparams;
3080 	uint32_t eec;
3081 	uint8_t eecp;
3082 	uint8_t bios_sem;
3083 	uint8_t offs;
3084 	int rid;
3085 	int i;
3086 
3087 	rid = PCIR_BAR(0);
3088 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3089 	if (res == NULL)
3090 		return;
3091 
3092 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3093 
3094 	/* Synchronise with the BIOS if it owns the controller. */
3095 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3096 	    eecp = EHCI_EECP_NEXT(eec)) {
3097 		eec = pci_read_config(self, eecp, 4);
3098 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3099 			continue;
3100 		}
3101 		bios_sem = pci_read_config(self, eecp +
3102 		    EHCI_LEGSUP_BIOS_SEM, 1);
3103 		if (bios_sem == 0) {
3104 			continue;
3105 		}
3106 		if (bootverbose)
3107 			printf("ehci early: "
3108 			    "SMM active, request owner change\n");
3109 
3110 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3111 
3112 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3113 			DELAY(1000);
3114 			bios_sem = pci_read_config(self, eecp +
3115 			    EHCI_LEGSUP_BIOS_SEM, 1);
3116 		}
3117 
3118 		if (bios_sem != 0) {
3119 			if (bootverbose)
3120 				printf("ehci early: "
3121 				    "SMM does not respond\n");
3122 		}
3123 		/* Disable interrupts */
3124 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3125 		bus_write_4(res, offs + EHCI_USBINTR, 0);
3126 	}
3127 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3128 }
3129 
3130 /* Perform early XHCI takeover from SMM. */
3131 static void
3132 xhci_early_takeover(device_t self)
3133 {
3134 	struct resource *res;
3135 	uint32_t cparams;
3136 	uint32_t eec;
3137 	uint8_t eecp;
3138 	uint8_t bios_sem;
3139 	uint8_t offs;
3140 	int rid;
3141 	int i;
3142 
3143 	rid = PCIR_BAR(0);
3144 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3145 	if (res == NULL)
3146 		return;
3147 
3148 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3149 
3150 	eec = -1;
3151 
3152 	/* Synchronise with the BIOS if it owns the controller. */
3153 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3154 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3155 		eec = bus_read_4(res, eecp);
3156 
3157 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3158 			continue;
3159 
3160 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3161 		if (bios_sem == 0)
3162 			continue;
3163 
3164 		if (bootverbose)
3165 			printf("xhci early: "
3166 			    "SMM active, request owner change\n");
3167 
3168 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3169 
3170 		/* wait a maximum of 5 second */
3171 
3172 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3173 			DELAY(1000);
3174 			bios_sem = bus_read_1(res, eecp +
3175 			    XHCI_XECP_BIOS_SEM);
3176 		}
3177 
3178 		if (bios_sem != 0) {
3179 			if (bootverbose)
3180 				printf("xhci early: "
3181 				    "SMM does not respond\n");
3182 		}
3183 
3184 		/* Disable interrupts */
3185 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3186 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3187 		bus_read_4(res, offs + XHCI_USBSTS);
3188 	}
3189 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3190 }
3191 
3192 void
3193 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3194 {
3195 	struct pci_devinfo *dinfo;
3196 	pcicfgregs *cfg;
3197 	struct resource_list *rl;
3198 	const struct pci_quirk *q;
3199 	uint32_t devid;
3200 	int i;
3201 
3202 	dinfo = device_get_ivars(dev);
3203 	cfg = &dinfo->cfg;
3204 	rl = &dinfo->resources;
3205 	devid = (cfg->device << 16) | cfg->vendor;
3206 
3207 	/* ATA devices needs special map treatment */
3208 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3209 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3210 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3211 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3212 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3213 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3214 	else
3215 		for (i = 0; i < cfg->nummaps;) {
3216 			/*
3217 			 * Skip quirked resources.
3218 			 */
3219 			for (q = &pci_quirks[0]; q->devid != 0; q++)
3220 				if (q->devid == devid &&
3221 				    q->type == PCI_QUIRK_UNMAP_REG &&
3222 				    q->arg1 == PCIR_BAR(i))
3223 					break;
3224 			if (q->devid != 0) {
3225 				i++;
3226 				continue;
3227 			}
3228 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3229 			    prefetchmask & (1 << i));
3230 		}
3231 
3232 	/*
3233 	 * Add additional, quirked resources.
3234 	 */
3235 	for (q = &pci_quirks[0]; q->devid != 0; q++)
3236 		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3237 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3238 
3239 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3240 #ifdef __PCI_REROUTE_INTERRUPT
3241 		/*
3242 		 * Try to re-route interrupts. Sometimes the BIOS or
3243 		 * firmware may leave bogus values in these registers.
3244 		 * If the re-route fails, then just stick with what we
3245 		 * have.
3246 		 */
3247 		pci_assign_interrupt(bus, dev, 1);
3248 #else
3249 		pci_assign_interrupt(bus, dev, 0);
3250 #endif
3251 	}
3252 
3253 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3254 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3255 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3256 			xhci_early_takeover(dev);
3257 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3258 			ehci_early_takeover(dev);
3259 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3260 			ohci_early_takeover(dev);
3261 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3262 			uhci_early_takeover(dev);
3263 	}
3264 }
3265 
3266 void
3267 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3268 {
3269 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3270 	device_t pcib = device_get_parent(dev);
3271 	struct pci_devinfo *dinfo;
3272 	int maxslots;
3273 	int s, f, pcifunchigh;
3274 	uint8_t hdrtype;
3275 
3276 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3277 	    ("dinfo_size too small"));
3278 	maxslots = PCIB_MAXSLOTS(pcib);
3279 	for (s = 0; s <= maxslots; s++) {
3280 		pcifunchigh = 0;
3281 		f = 0;
3282 		DELAY(1);
3283 		hdrtype = REG(PCIR_HDRTYPE, 1);
3284 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3285 			continue;
3286 		if (hdrtype & PCIM_MFDEV)
3287 			pcifunchigh = PCI_FUNCMAX;
3288 		for (f = 0; f <= pcifunchigh; f++) {
3289 			dinfo = pci_read_device(pcib, domain, busno, s, f,
3290 			    dinfo_size);
3291 			if (dinfo != NULL) {
3292 				pci_add_child(dev, dinfo);
3293 			}
3294 		}
3295 	}
3296 #undef REG
3297 }
3298 
3299 void
3300 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3301 {
3302 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3303 	device_set_ivars(dinfo->cfg.dev, dinfo);
3304 	resource_list_init(&dinfo->resources);
3305 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3306 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3307 	pci_print_verbose(dinfo);
3308 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3309 }
3310 
3311 static int
3312 pci_probe(device_t dev)
3313 {
3314 
3315 	device_set_desc(dev, "PCI bus");
3316 
3317 	/* Allow other subclasses to override this driver. */
3318 	return (BUS_PROBE_GENERIC);
3319 }
3320 
3321 int
3322 pci_attach_common(device_t dev)
3323 {
3324 	struct pci_softc *sc;
3325 	int busno, domain;
3326 #ifdef PCI_DMA_BOUNDARY
3327 	int error, tag_valid;
3328 #endif
3329 
3330 	sc = device_get_softc(dev);
3331 	domain = pcib_get_domain(dev);
3332 	busno = pcib_get_bus(dev);
3333 	if (bootverbose)
3334 		device_printf(dev, "domain=%d, physical bus=%d\n",
3335 		    domain, busno);
3336 #ifdef PCI_DMA_BOUNDARY
3337 	tag_valid = 0;
3338 	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
3339 	    devclass_find("pci")) {
3340 		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
3341 		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3342 		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
3343 		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
3344 		if (error)
3345 			device_printf(dev, "Failed to create DMA tag: %d\n",
3346 			    error);
3347 		else
3348 			tag_valid = 1;
3349 	}
3350 	if (!tag_valid)
3351 #endif
3352 		sc->sc_dma_tag = bus_get_dma_tag(dev);
3353 	return (0);
3354 }
3355 
3356 static int
3357 pci_attach(device_t dev)
3358 {
3359 	int busno, domain, error;
3360 
3361 	error = pci_attach_common(dev);
3362 	if (error)
3363 		return (error);
3364 
3365 	/*
3366 	 * Since there can be multiple independantly numbered PCI
3367 	 * busses on systems with multiple PCI domains, we can't use
3368 	 * the unit number to decide which bus we are probing. We ask
3369 	 * the parent pcib what our domain and bus numbers are.
3370 	 */
3371 	domain = pcib_get_domain(dev);
3372 	busno = pcib_get_bus(dev);
3373 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3374 	return (bus_generic_attach(dev));
3375 }
3376 
3377 static void
3378 pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3379     int state)
3380 {
3381 	device_t child, pcib;
3382 	struct pci_devinfo *dinfo;
3383 	int dstate, i;
3384 
3385 	/*
3386 	 * Set the device to the given state.  If the firmware suggests
3387 	 * a different power state, use it instead.  If power management
3388 	 * is not present, the firmware is responsible for managing
3389 	 * device power.  Skip children who aren't attached since they
3390 	 * are handled separately.
3391 	 */
3392 	pcib = device_get_parent(dev);
3393 	for (i = 0; i < numdevs; i++) {
3394 		child = devlist[i];
3395 		dinfo = device_get_ivars(child);
3396 		dstate = state;
3397 		if (device_is_attached(child) &&
3398 		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3399 			pci_set_powerstate(child, dstate);
3400 	}
3401 }
3402 
3403 int
3404 pci_suspend(device_t dev)
3405 {
3406 	device_t child, *devlist;
3407 	struct pci_devinfo *dinfo;
3408 	int error, i, numdevs;
3409 
3410 	/*
3411 	 * Save the PCI configuration space for each child and set the
3412 	 * device in the appropriate power state for this sleep state.
3413 	 */
3414 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3415 		return (error);
3416 	for (i = 0; i < numdevs; i++) {
3417 		child = devlist[i];
3418 		dinfo = device_get_ivars(child);
3419 		pci_cfg_save(child, dinfo, 0);
3420 	}
3421 
3422 	/* Suspend devices before potentially powering them down. */
3423 	error = bus_generic_suspend(dev);
3424 	if (error) {
3425 		free(devlist, M_TEMP);
3426 		return (error);
3427 	}
3428 	if (pci_do_power_suspend)
3429 		pci_set_power_children(dev, devlist, numdevs,
3430 		    PCI_POWERSTATE_D3);
3431 	free(devlist, M_TEMP);
3432 	return (0);
3433 }
3434 
3435 int
3436 pci_resume(device_t dev)
3437 {
3438 	device_t child, *devlist;
3439 	struct pci_devinfo *dinfo;
3440 	int error, i, numdevs;
3441 
3442 	/*
3443 	 * Set each child to D0 and restore its PCI configuration space.
3444 	 */
3445 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3446 		return (error);
3447 	if (pci_do_power_resume)
3448 		pci_set_power_children(dev, devlist, numdevs,
3449 		    PCI_POWERSTATE_D0);
3450 
3451 	/* Now the device is powered up, restore its config space. */
3452 	for (i = 0; i < numdevs; i++) {
3453 		child = devlist[i];
3454 		dinfo = device_get_ivars(child);
3455 
3456 		pci_cfg_restore(child, dinfo);
3457 		if (!device_is_attached(child))
3458 			pci_cfg_save(child, dinfo, 1);
3459 	}
3460 
3461 	/*
3462 	 * Resume critical devices first, then everything else later.
3463 	 */
3464 	for (i = 0; i < numdevs; i++) {
3465 		child = devlist[i];
3466 		switch (pci_get_class(child)) {
3467 		case PCIC_DISPLAY:
3468 		case PCIC_MEMORY:
3469 		case PCIC_BRIDGE:
3470 		case PCIC_BASEPERIPH:
3471 			DEVICE_RESUME(child);
3472 			break;
3473 		}
3474 	}
3475 	for (i = 0; i < numdevs; i++) {
3476 		child = devlist[i];
3477 		switch (pci_get_class(child)) {
3478 		case PCIC_DISPLAY:
3479 		case PCIC_MEMORY:
3480 		case PCIC_BRIDGE:
3481 		case PCIC_BASEPERIPH:
3482 			break;
3483 		default:
3484 			DEVICE_RESUME(child);
3485 		}
3486 	}
3487 	free(devlist, M_TEMP);
3488 	return (0);
3489 }
3490 
3491 static void
3492 pci_load_vendor_data(void)
3493 {
3494 	caddr_t data;
3495 	void *ptr;
3496 	size_t sz;
3497 
3498 	data = preload_search_by_type("pci_vendor_data");
3499 	if (data != NULL) {
3500 		ptr = preload_fetch_addr(data);
3501 		sz = preload_fetch_size(data);
3502 		if (ptr != NULL && sz != 0) {
3503 			pci_vendordata = ptr;
3504 			pci_vendordata_size = sz;
3505 			/* terminate the database */
3506 			pci_vendordata[pci_vendordata_size] = '\n';
3507 		}
3508 	}
3509 }
3510 
3511 void
3512 pci_driver_added(device_t dev, driver_t *driver)
3513 {
3514 	int numdevs;
3515 	device_t *devlist;
3516 	device_t child;
3517 	struct pci_devinfo *dinfo;
3518 	int i;
3519 
3520 	if (bootverbose)
3521 		device_printf(dev, "driver added\n");
3522 	DEVICE_IDENTIFY(driver, dev);
3523 	if (device_get_children(dev, &devlist, &numdevs) != 0)
3524 		return;
3525 	for (i = 0; i < numdevs; i++) {
3526 		child = devlist[i];
3527 		if (device_get_state(child) != DS_NOTPRESENT)
3528 			continue;
3529 		dinfo = device_get_ivars(child);
3530 		pci_print_verbose(dinfo);
3531 		if (bootverbose)
3532 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3533 		pci_cfg_restore(child, dinfo);
3534 		if (device_probe_and_attach(child) != 0)
3535 			pci_child_detached(dev, child);
3536 	}
3537 	free(devlist, M_TEMP);
3538 }
3539 
3540 int
3541 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3542     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3543 {
3544 	struct pci_devinfo *dinfo;
3545 	struct msix_table_entry *mte;
3546 	struct msix_vector *mv;
3547 	uint64_t addr;
3548 	uint32_t data;
3549 	void *cookie;
3550 	int error, rid;
3551 
3552 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3553 	    arg, &cookie);
3554 	if (error)
3555 		return (error);
3556 
3557 	/* If this is not a direct child, just bail out. */
3558 	if (device_get_parent(child) != dev) {
3559 		*cookiep = cookie;
3560 		return(0);
3561 	}
3562 
3563 	rid = rman_get_rid(irq);
3564 	if (rid == 0) {
3565 		/* Make sure that INTx is enabled */
3566 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3567 	} else {
3568 		/*
3569 		 * Check to see if the interrupt is MSI or MSI-X.
3570 		 * Ask our parent to map the MSI and give
3571 		 * us the address and data register values.
3572 		 * If we fail for some reason, teardown the
3573 		 * interrupt handler.
3574 		 */
3575 		dinfo = device_get_ivars(child);
3576 		if (dinfo->cfg.msi.msi_alloc > 0) {
3577 			if (dinfo->cfg.msi.msi_addr == 0) {
3578 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3579 			    ("MSI has handlers, but vectors not mapped"));
3580 				error = PCIB_MAP_MSI(device_get_parent(dev),
3581 				    child, rman_get_start(irq), &addr, &data);
3582 				if (error)
3583 					goto bad;
3584 				dinfo->cfg.msi.msi_addr = addr;
3585 				dinfo->cfg.msi.msi_data = data;
3586 			}
3587 			if (dinfo->cfg.msi.msi_handlers == 0)
3588 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3589 				    dinfo->cfg.msi.msi_data);
3590 			dinfo->cfg.msi.msi_handlers++;
3591 		} else {
3592 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3593 			    ("No MSI or MSI-X interrupts allocated"));
3594 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3595 			    ("MSI-X index too high"));
3596 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3597 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3598 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3599 			KASSERT(mv->mv_irq == rman_get_start(irq),
3600 			    ("IRQ mismatch"));
3601 			if (mv->mv_address == 0) {
3602 				KASSERT(mte->mte_handlers == 0,
3603 		    ("MSI-X table entry has handlers, but vector not mapped"));
3604 				error = PCIB_MAP_MSI(device_get_parent(dev),
3605 				    child, rman_get_start(irq), &addr, &data);
3606 				if (error)
3607 					goto bad;
3608 				mv->mv_address = addr;
3609 				mv->mv_data = data;
3610 			}
3611 			if (mte->mte_handlers == 0) {
3612 				pci_enable_msix(child, rid - 1, mv->mv_address,
3613 				    mv->mv_data);
3614 				pci_unmask_msix(child, rid - 1);
3615 			}
3616 			mte->mte_handlers++;
3617 		}
3618 
3619 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3620 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3621 	bad:
3622 		if (error) {
3623 			(void)bus_generic_teardown_intr(dev, child, irq,
3624 			    cookie);
3625 			return (error);
3626 		}
3627 	}
3628 	*cookiep = cookie;
3629 	return (0);
3630 }
3631 
3632 int
3633 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3634     void *cookie)
3635 {
3636 	struct msix_table_entry *mte;
3637 	struct resource_list_entry *rle;
3638 	struct pci_devinfo *dinfo;
3639 	int error, rid;
3640 
3641 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3642 		return (EINVAL);
3643 
3644 	/* If this isn't a direct child, just bail out */
3645 	if (device_get_parent(child) != dev)
3646 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3647 
3648 	rid = rman_get_rid(irq);
3649 	if (rid == 0) {
3650 		/* Mask INTx */
3651 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3652 	} else {
3653 		/*
3654 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3655 		 * decrement the appropriate handlers count and mask the
3656 		 * MSI-X message, or disable MSI messages if the count
3657 		 * drops to 0.
3658 		 */
3659 		dinfo = device_get_ivars(child);
3660 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3661 		if (rle->res != irq)
3662 			return (EINVAL);
3663 		if (dinfo->cfg.msi.msi_alloc > 0) {
3664 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3665 			    ("MSI-X index too high"));
3666 			if (dinfo->cfg.msi.msi_handlers == 0)
3667 				return (EINVAL);
3668 			dinfo->cfg.msi.msi_handlers--;
3669 			if (dinfo->cfg.msi.msi_handlers == 0)
3670 				pci_disable_msi(child);
3671 		} else {
3672 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3673 			    ("No MSI or MSI-X interrupts allocated"));
3674 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3675 			    ("MSI-X index too high"));
3676 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3677 			if (mte->mte_handlers == 0)
3678 				return (EINVAL);
3679 			mte->mte_handlers--;
3680 			if (mte->mte_handlers == 0)
3681 				pci_mask_msix(child, rid - 1);
3682 		}
3683 	}
3684 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3685 	if (rid > 0)
3686 		KASSERT(error == 0,
3687 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3688 	return (error);
3689 }
3690 
3691 int
3692 pci_print_child(device_t dev, device_t child)
3693 {
3694 	struct pci_devinfo *dinfo;
3695 	struct resource_list *rl;
3696 	int retval = 0;
3697 
3698 	dinfo = device_get_ivars(child);
3699 	rl = &dinfo->resources;
3700 
3701 	retval += bus_print_child_header(dev, child);
3702 
3703 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3704 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3705 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3706 	if (device_get_flags(dev))
3707 		retval += printf(" flags %#x", device_get_flags(dev));
3708 
3709 	retval += printf(" at device %d.%d", pci_get_slot(child),
3710 	    pci_get_function(child));
3711 
3712 	retval += bus_print_child_footer(dev, child);
3713 
3714 	return (retval);
3715 }
3716 
3717 static const struct
3718 {
3719 	int		class;
3720 	int		subclass;
3721 	const char	*desc;
3722 } pci_nomatch_tab[] = {
3723 	{PCIC_OLD,		-1,			"old"},
3724 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3725 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3726 	{PCIC_STORAGE,		-1,			"mass storage"},
3727 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3728 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3729 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3730 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3731 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3732 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3733 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3734 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3735 	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	"NVM"},
3736 	{PCIC_NETWORK,		-1,			"network"},
3737 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3738 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3739 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3740 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3741 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3742 	{PCIC_DISPLAY,		-1,			"display"},
3743 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3744 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3745 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3746 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3747 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3748 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3749 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3750 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3751 	{PCIC_MEMORY,		-1,			"memory"},
3752 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3753 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3754 	{PCIC_BRIDGE,		-1,			"bridge"},
3755 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3756 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3757 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3758 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3759 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3760 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3761 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3762 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3763 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3764 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3765 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3766 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3767 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3768 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3769 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3770 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3771 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3772 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3773 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3774 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3775 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3776 	{PCIC_INPUTDEV,		-1,			"input device"},
3777 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3778 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3779 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3780 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3781 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3782 	{PCIC_DOCKING,		-1,			"docking station"},
3783 	{PCIC_PROCESSOR,	-1,			"processor"},
3784 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3785 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3786 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3787 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3788 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3789 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3790 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3791 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3792 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3793 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3794 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3795 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3796 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3797 	{PCIC_SATCOM,		-1,			"satellite communication"},
3798 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3799 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3800 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3801 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3802 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3803 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3804 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3805 	{PCIC_DASP,		-1,			"dasp"},
3806 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3807 	{0, 0,		NULL}
3808 };
3809 
3810 void
3811 pci_probe_nomatch(device_t dev, device_t child)
3812 {
3813 	int i;
3814 	const char *cp, *scp;
3815 	char *device;
3816 
3817 	/*
3818 	 * Look for a listing for this device in a loaded device database.
3819 	 */
3820 	if ((device = pci_describe_device(child)) != NULL) {
3821 		device_printf(dev, "<%s>", device);
3822 		free(device, M_DEVBUF);
3823 	} else {
3824 		/*
3825 		 * Scan the class/subclass descriptions for a general
3826 		 * description.
3827 		 */
3828 		cp = "unknown";
3829 		scp = NULL;
3830 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3831 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3832 				if (pci_nomatch_tab[i].subclass == -1) {
3833 					cp = pci_nomatch_tab[i].desc;
3834 				} else if (pci_nomatch_tab[i].subclass ==
3835 				    pci_get_subclass(child)) {
3836 					scp = pci_nomatch_tab[i].desc;
3837 				}
3838 			}
3839 		}
3840 		device_printf(dev, "<%s%s%s>",
3841 		    cp ? cp : "",
3842 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3843 		    scp ? scp : "");
3844 	}
3845 	printf(" at device %d.%d (no driver attached)\n",
3846 	    pci_get_slot(child), pci_get_function(child));
3847 	pci_cfg_save(child, device_get_ivars(child), 1);
3848 }
3849 
3850 void
3851 pci_child_detached(device_t dev, device_t child)
3852 {
3853 	struct pci_devinfo *dinfo;
3854 	struct resource_list *rl;
3855 
3856 	dinfo = device_get_ivars(child);
3857 	rl = &dinfo->resources;
3858 
3859 	/*
3860 	 * Have to deallocate IRQs before releasing any MSI messages and
3861 	 * have to release MSI messages before deallocating any memory
3862 	 * BARs.
3863 	 */
3864 	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
3865 		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
3866 	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
3867 		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
3868 		(void)pci_release_msi(child);
3869 	}
3870 	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
3871 		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
3872 	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
3873 		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
3874 
3875 	pci_cfg_save(child, dinfo, 1);
3876 }
3877 
3878 /*
3879  * Parse the PCI device database, if loaded, and return a pointer to a
3880  * description of the device.
3881  *
3882  * The database is flat text formatted as follows:
3883  *
3884  * Any line not in a valid format is ignored.
3885  * Lines are terminated with newline '\n' characters.
3886  *
3887  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3888  * the vendor name.
3889  *
3890  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3891  * - devices cannot be listed without a corresponding VENDOR line.
3892  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3893  * another TAB, then the device name.
3894  */
3895 
3896 /*
3897  * Assuming (ptr) points to the beginning of a line in the database,
3898  * return the vendor or device and description of the next entry.
3899  * The value of (vendor) or (device) inappropriate for the entry type
3900  * is set to -1.  Returns nonzero at the end of the database.
3901  *
3902  * Note that this is slightly unrobust in the face of corrupt data;
3903  * we attempt to safeguard against this by spamming the end of the
3904  * database with a newline when we initialise.
3905  */
3906 static int
3907 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3908 {
3909 	char	*cp = *ptr;
3910 	int	left;
3911 
3912 	*device = -1;
3913 	*vendor = -1;
3914 	**desc = '\0';
3915 	for (;;) {
3916 		left = pci_vendordata_size - (cp - pci_vendordata);
3917 		if (left <= 0) {
3918 			*ptr = cp;
3919 			return(1);
3920 		}
3921 
3922 		/* vendor entry? */
3923 		if (*cp != '\t' &&
3924 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3925 			break;
3926 		/* device entry? */
3927 		if (*cp == '\t' &&
3928 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3929 			break;
3930 
3931 		/* skip to next line */
3932 		while (*cp != '\n' && left > 0) {
3933 			cp++;
3934 			left--;
3935 		}
3936 		if (*cp == '\n') {
3937 			cp++;
3938 			left--;
3939 		}
3940 	}
3941 	/* skip to next line */
3942 	while (*cp != '\n' && left > 0) {
3943 		cp++;
3944 		left--;
3945 	}
3946 	if (*cp == '\n' && left > 0)
3947 		cp++;
3948 	*ptr = cp;
3949 	return(0);
3950 }
3951 
3952 static char *
3953 pci_describe_device(device_t dev)
3954 {
3955 	int	vendor, device;
3956 	char	*desc, *vp, *dp, *line;
3957 
3958 	desc = vp = dp = NULL;
3959 
3960 	/*
3961 	 * If we have no vendor data, we can't do anything.
3962 	 */
3963 	if (pci_vendordata == NULL)
3964 		goto out;
3965 
3966 	/*
3967 	 * Scan the vendor data looking for this device
3968 	 */
3969 	line = pci_vendordata;
3970 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3971 		goto out;
3972 	for (;;) {
3973 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3974 			goto out;
3975 		if (vendor == pci_get_vendor(dev))
3976 			break;
3977 	}
3978 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3979 		goto out;
3980 	for (;;) {
3981 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3982 			*dp = 0;
3983 			break;
3984 		}
3985 		if (vendor != -1) {
3986 			*dp = 0;
3987 			break;
3988 		}
3989 		if (device == pci_get_device(dev))
3990 			break;
3991 	}
3992 	if (dp[0] == '\0')
3993 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3994 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3995 	    NULL)
3996 		sprintf(desc, "%s, %s", vp, dp);
3997 out:
3998 	if (vp != NULL)
3999 		free(vp, M_DEVBUF);
4000 	if (dp != NULL)
4001 		free(dp, M_DEVBUF);
4002 	return(desc);
4003 }
4004 
4005 int
4006 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
4007 {
4008 	struct pci_devinfo *dinfo;
4009 	pcicfgregs *cfg;
4010 
4011 	dinfo = device_get_ivars(child);
4012 	cfg = &dinfo->cfg;
4013 
4014 	switch (which) {
4015 	case PCI_IVAR_ETHADDR:
4016 		/*
4017 		 * The generic accessor doesn't deal with failure, so
4018 		 * we set the return value, then return an error.
4019 		 */
4020 		*((uint8_t **) result) = NULL;
4021 		return (EINVAL);
4022 	case PCI_IVAR_SUBVENDOR:
4023 		*result = cfg->subvendor;
4024 		break;
4025 	case PCI_IVAR_SUBDEVICE:
4026 		*result = cfg->subdevice;
4027 		break;
4028 	case PCI_IVAR_VENDOR:
4029 		*result = cfg->vendor;
4030 		break;
4031 	case PCI_IVAR_DEVICE:
4032 		*result = cfg->device;
4033 		break;
4034 	case PCI_IVAR_DEVID:
4035 		*result = (cfg->device << 16) | cfg->vendor;
4036 		break;
4037 	case PCI_IVAR_CLASS:
4038 		*result = cfg->baseclass;
4039 		break;
4040 	case PCI_IVAR_SUBCLASS:
4041 		*result = cfg->subclass;
4042 		break;
4043 	case PCI_IVAR_PROGIF:
4044 		*result = cfg->progif;
4045 		break;
4046 	case PCI_IVAR_REVID:
4047 		*result = cfg->revid;
4048 		break;
4049 	case PCI_IVAR_INTPIN:
4050 		*result = cfg->intpin;
4051 		break;
4052 	case PCI_IVAR_IRQ:
4053 		*result = cfg->intline;
4054 		break;
4055 	case PCI_IVAR_DOMAIN:
4056 		*result = cfg->domain;
4057 		break;
4058 	case PCI_IVAR_BUS:
4059 		*result = cfg->bus;
4060 		break;
4061 	case PCI_IVAR_SLOT:
4062 		*result = cfg->slot;
4063 		break;
4064 	case PCI_IVAR_FUNCTION:
4065 		*result = cfg->func;
4066 		break;
4067 	case PCI_IVAR_CMDREG:
4068 		*result = cfg->cmdreg;
4069 		break;
4070 	case PCI_IVAR_CACHELNSZ:
4071 		*result = cfg->cachelnsz;
4072 		break;
4073 	case PCI_IVAR_MINGNT:
4074 		*result = cfg->mingnt;
4075 		break;
4076 	case PCI_IVAR_MAXLAT:
4077 		*result = cfg->maxlat;
4078 		break;
4079 	case PCI_IVAR_LATTIMER:
4080 		*result = cfg->lattimer;
4081 		break;
4082 	default:
4083 		return (ENOENT);
4084 	}
4085 	return (0);
4086 }
4087 
4088 int
4089 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4090 {
4091 	struct pci_devinfo *dinfo;
4092 
4093 	dinfo = device_get_ivars(child);
4094 
4095 	switch (which) {
4096 	case PCI_IVAR_INTPIN:
4097 		dinfo->cfg.intpin = value;
4098 		return (0);
4099 	case PCI_IVAR_ETHADDR:
4100 	case PCI_IVAR_SUBVENDOR:
4101 	case PCI_IVAR_SUBDEVICE:
4102 	case PCI_IVAR_VENDOR:
4103 	case PCI_IVAR_DEVICE:
4104 	case PCI_IVAR_DEVID:
4105 	case PCI_IVAR_CLASS:
4106 	case PCI_IVAR_SUBCLASS:
4107 	case PCI_IVAR_PROGIF:
4108 	case PCI_IVAR_REVID:
4109 	case PCI_IVAR_IRQ:
4110 	case PCI_IVAR_DOMAIN:
4111 	case PCI_IVAR_BUS:
4112 	case PCI_IVAR_SLOT:
4113 	case PCI_IVAR_FUNCTION:
4114 		return (EINVAL);	/* disallow for now */
4115 
4116 	default:
4117 		return (ENOENT);
4118 	}
4119 }
4120 
4121 #include "opt_ddb.h"
4122 #ifdef DDB
4123 #include <ddb/ddb.h>
4124 #include <sys/cons.h>
4125 
4126 /*
4127  * List resources based on pci map registers, used for within ddb
4128  */
4129 
4130 DB_SHOW_COMMAND(pciregs, db_pci_dump)
4131 {
4132 	struct pci_devinfo *dinfo;
4133 	struct devlist *devlist_head;
4134 	struct pci_conf *p;
4135 	const char *name;
4136 	int i, error, none_count;
4137 
4138 	none_count = 0;
4139 	/* get the head of the device queue */
4140 	devlist_head = &pci_devq;
4141 
4142 	/*
4143 	 * Go through the list of devices and print out devices
4144 	 */
4145 	for (error = 0, i = 0,
4146 	     dinfo = STAILQ_FIRST(devlist_head);
4147 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4148 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4149 
4150 		/* Populate pd_name and pd_unit */
4151 		name = NULL;
4152 		if (dinfo->cfg.dev)
4153 			name = device_get_name(dinfo->cfg.dev);
4154 
4155 		p = &dinfo->conf;
4156 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4157 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4158 			(name && *name) ? name : "none",
4159 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4160 			none_count++,
4161 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4162 			p->pc_sel.pc_func, (p->pc_class << 16) |
4163 			(p->pc_subclass << 8) | p->pc_progif,
4164 			(p->pc_subdevice << 16) | p->pc_subvendor,
4165 			(p->pc_device << 16) | p->pc_vendor,
4166 			p->pc_revid, p->pc_hdr);
4167 	}
4168 }
4169 #endif /* DDB */
4170 
4171 static struct resource *
4172 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4173     u_long start, u_long end, u_long count, u_int flags)
4174 {
4175 	struct pci_devinfo *dinfo = device_get_ivars(child);
4176 	struct resource_list *rl = &dinfo->resources;
4177 	struct resource_list_entry *rle;
4178 	struct resource *res;
4179 	struct pci_map *pm;
4180 	pci_addr_t map, testval;
4181 	int mapsize;
4182 
4183 	res = NULL;
4184 	pm = pci_find_bar(child, *rid);
4185 	if (pm != NULL) {
4186 		/* This is a BAR that we failed to allocate earlier. */
4187 		mapsize = pm->pm_size;
4188 		map = pm->pm_value;
4189 	} else {
4190 		/*
4191 		 * Weed out the bogons, and figure out how large the
4192 		 * BAR/map is.  BARs that read back 0 here are bogus
4193 		 * and unimplemented.  Note: atapci in legacy mode are
4194 		 * special and handled elsewhere in the code.  If you
4195 		 * have a atapci device in legacy mode and it fails
4196 		 * here, that other code is broken.
4197 		 */
4198 		pci_read_bar(child, *rid, &map, &testval);
4199 
4200 		/*
4201 		 * Determine the size of the BAR and ignore BARs with a size
4202 		 * of 0.  Device ROM BARs use a different mask value.
4203 		 */
4204 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
4205 			mapsize = pci_romsize(testval);
4206 		else
4207 			mapsize = pci_mapsize(testval);
4208 		if (mapsize == 0)
4209 			goto out;
4210 		pm = pci_add_bar(child, *rid, map, mapsize);
4211 	}
4212 
4213 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
4214 		if (type != SYS_RES_MEMORY) {
4215 			if (bootverbose)
4216 				device_printf(dev,
4217 				    "child %s requested type %d for rid %#x,"
4218 				    " but the BAR says it is an memio\n",
4219 				    device_get_nameunit(child), type, *rid);
4220 			goto out;
4221 		}
4222 	} else {
4223 		if (type != SYS_RES_IOPORT) {
4224 			if (bootverbose)
4225 				device_printf(dev,
4226 				    "child %s requested type %d for rid %#x,"
4227 				    " but the BAR says it is an ioport\n",
4228 				    device_get_nameunit(child), type, *rid);
4229 			goto out;
4230 		}
4231 	}
4232 
4233 	/*
4234 	 * For real BARs, we need to override the size that
4235 	 * the driver requests, because that's what the BAR
4236 	 * actually uses and we would otherwise have a
4237 	 * situation where we might allocate the excess to
4238 	 * another driver, which won't work.
4239 	 */
4240 	count = (pci_addr_t)1 << mapsize;
4241 	if (RF_ALIGNMENT(flags) < mapsize)
4242 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
4243 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
4244 		flags |= RF_PREFETCHABLE;
4245 
4246 	/*
4247 	 * Allocate enough resource, and then write back the
4248 	 * appropriate BAR for that resource.
4249 	 */
4250 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
4251 	    start, end, count, flags & ~RF_ACTIVE);
4252 	if (res == NULL) {
4253 		device_printf(child,
4254 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4255 		    count, *rid, type, start, end);
4256 		goto out;
4257 	}
4258 	resource_list_add(rl, type, *rid, start, end, count);
4259 	rle = resource_list_find(rl, type, *rid);
4260 	if (rle == NULL)
4261 		panic("pci_reserve_map: unexpectedly can't find resource.");
4262 	rle->res = res;
4263 	rle->start = rman_get_start(res);
4264 	rle->end = rman_get_end(res);
4265 	rle->count = count;
4266 	rle->flags = RLE_RESERVED;
4267 	if (bootverbose)
4268 		device_printf(child,
4269 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4270 		    count, *rid, type, rman_get_start(res));
4271 	map = rman_get_start(res);
4272 	pci_write_bar(child, pm, map);
4273 out:
4274 	return (res);
4275 }
4276 
4277 struct resource *
4278 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4279 		   u_long start, u_long end, u_long count, u_int flags)
4280 {
4281 	struct pci_devinfo *dinfo;
4282 	struct resource_list *rl;
4283 	struct resource_list_entry *rle;
4284 	struct resource *res;
4285 	pcicfgregs *cfg;
4286 
4287 	if (device_get_parent(child) != dev)
4288 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4289 		    type, rid, start, end, count, flags));
4290 
4291 	/*
4292 	 * Perform lazy resource allocation
4293 	 */
4294 	dinfo = device_get_ivars(child);
4295 	rl = &dinfo->resources;
4296 	cfg = &dinfo->cfg;
4297 	switch (type) {
4298 	case SYS_RES_IRQ:
4299 		/*
4300 		 * Can't alloc legacy interrupt once MSI messages have
4301 		 * been allocated.
4302 		 */
4303 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4304 		    cfg->msix.msix_alloc > 0))
4305 			return (NULL);
4306 
4307 		/*
4308 		 * If the child device doesn't have an interrupt
4309 		 * routed and is deserving of an interrupt, try to
4310 		 * assign it one.
4311 		 */
4312 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4313 		    (cfg->intpin != 0))
4314 			pci_assign_interrupt(dev, child, 0);
4315 		break;
4316 	case SYS_RES_IOPORT:
4317 	case SYS_RES_MEMORY:
4318 #ifdef NEW_PCIB
4319 		/*
4320 		 * PCI-PCI bridge I/O window resources are not BARs.
4321 		 * For those allocations just pass the request up the
4322 		 * tree.
4323 		 */
4324 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4325 			switch (*rid) {
4326 			case PCIR_IOBASEL_1:
4327 			case PCIR_MEMBASE_1:
4328 			case PCIR_PMBASEL_1:
4329 				/*
4330 				 * XXX: Should we bother creating a resource
4331 				 * list entry?
4332 				 */
4333 				return (bus_generic_alloc_resource(dev, child,
4334 				    type, rid, start, end, count, flags));
4335 			}
4336 		}
4337 #endif
4338 		/* Reserve resources for this BAR if needed. */
4339 		rle = resource_list_find(rl, type, *rid);
4340 		if (rle == NULL) {
4341 			res = pci_reserve_map(dev, child, type, rid, start, end,
4342 			    count, flags);
4343 			if (res == NULL)
4344 				return (NULL);
4345 		}
4346 	}
4347 	return (resource_list_alloc(rl, dev, child, type, rid,
4348 	    start, end, count, flags));
4349 }
4350 
4351 int
4352 pci_release_resource(device_t dev, device_t child, int type, int rid,
4353     struct resource *r)
4354 {
4355 	struct pci_devinfo *dinfo;
4356 	struct resource_list *rl;
4357 	pcicfgregs *cfg;
4358 
4359 	if (device_get_parent(child) != dev)
4360 		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
4361 		    type, rid, r));
4362 
4363 	dinfo = device_get_ivars(child);
4364 	cfg = &dinfo->cfg;
4365 #ifdef NEW_PCIB
4366 	/*
4367 	 * PCI-PCI bridge I/O window resources are not BARs.  For
4368 	 * those allocations just pass the request up the tree.
4369 	 */
4370 	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
4371 	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
4372 		switch (rid) {
4373 		case PCIR_IOBASEL_1:
4374 		case PCIR_MEMBASE_1:
4375 		case PCIR_PMBASEL_1:
4376 			return (bus_generic_release_resource(dev, child, type,
4377 			    rid, r));
4378 		}
4379 	}
4380 #endif
4381 
4382 	rl = &dinfo->resources;
4383 	return (resource_list_release(rl, dev, child, type, rid, r));
4384 }
4385 
4386 int
4387 pci_activate_resource(device_t dev, device_t child, int type, int rid,
4388     struct resource *r)
4389 {
4390 	struct pci_devinfo *dinfo;
4391 	int error;
4392 
4393 	error = bus_generic_activate_resource(dev, child, type, rid, r);
4394 	if (error)
4395 		return (error);
4396 
4397 	/* Enable decoding in the command register when activating BARs. */
4398 	if (device_get_parent(child) == dev) {
4399 		/* Device ROMs need their decoding explicitly enabled. */
4400 		dinfo = device_get_ivars(child);
4401 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4402 			pci_write_bar(child, pci_find_bar(child, rid),
4403 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4404 		switch (type) {
4405 		case SYS_RES_IOPORT:
4406 		case SYS_RES_MEMORY:
4407 			error = PCI_ENABLE_IO(dev, child, type);
4408 			break;
4409 		}
4410 	}
4411 	return (error);
4412 }
4413 
4414 int
4415 pci_deactivate_resource(device_t dev, device_t child, int type,
4416     int rid, struct resource *r)
4417 {
4418 	struct pci_devinfo *dinfo;
4419 	int error;
4420 
4421 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4422 	if (error)
4423 		return (error);
4424 
4425 	/* Disable decoding for device ROMs. */
4426 	if (device_get_parent(child) == dev) {
4427 		dinfo = device_get_ivars(child);
4428 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4429 			pci_write_bar(child, pci_find_bar(child, rid),
4430 			    rman_get_start(r));
4431 	}
4432 	return (0);
4433 }
4434 
4435 void
4436 pci_delete_child(device_t dev, device_t child)
4437 {
4438 	struct resource_list_entry *rle;
4439 	struct resource_list *rl;
4440 	struct pci_devinfo *dinfo;
4441 
4442 	dinfo = device_get_ivars(child);
4443 	rl = &dinfo->resources;
4444 
4445 	if (device_is_attached(child))
4446 		device_detach(child);
4447 
4448 	/* Turn off access to resources we're about to free */
4449 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4450 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4451 
4452 	/* Free all allocated resources */
4453 	STAILQ_FOREACH(rle, rl, link) {
4454 		if (rle->res) {
4455 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4456 			    resource_list_busy(rl, rle->type, rle->rid)) {
4457 				pci_printf(&dinfo->cfg,
4458 				    "Resource still owned, oops. "
4459 				    "(type=%d, rid=%d, addr=%lx)\n",
4460 				    rle->type, rle->rid,
4461 				    rman_get_start(rle->res));
4462 				bus_release_resource(child, rle->type, rle->rid,
4463 				    rle->res);
4464 			}
4465 			resource_list_unreserve(rl, dev, child, rle->type,
4466 			    rle->rid);
4467 		}
4468 	}
4469 	resource_list_free(rl);
4470 
4471 	device_delete_child(dev, child);
4472 	pci_freecfg(dinfo);
4473 }
4474 
4475 void
4476 pci_delete_resource(device_t dev, device_t child, int type, int rid)
4477 {
4478 	struct pci_devinfo *dinfo;
4479 	struct resource_list *rl;
4480 	struct resource_list_entry *rle;
4481 
4482 	if (device_get_parent(child) != dev)
4483 		return;
4484 
4485 	dinfo = device_get_ivars(child);
4486 	rl = &dinfo->resources;
4487 	rle = resource_list_find(rl, type, rid);
4488 	if (rle == NULL)
4489 		return;
4490 
4491 	if (rle->res) {
4492 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4493 		    resource_list_busy(rl, type, rid)) {
4494 			device_printf(dev, "delete_resource: "
4495 			    "Resource still owned by child, oops. "
4496 			    "(type=%d, rid=%d, addr=%lx)\n",
4497 			    type, rid, rman_get_start(rle->res));
4498 			return;
4499 		}
4500 		resource_list_unreserve(rl, dev, child, type, rid);
4501 	}
4502 	resource_list_delete(rl, type, rid);
4503 }
4504 
4505 struct resource_list *
4506 pci_get_resource_list (device_t dev, device_t child)
4507 {
4508 	struct pci_devinfo *dinfo = device_get_ivars(child);
4509 
4510 	return (&dinfo->resources);
4511 }
4512 
4513 bus_dma_tag_t
4514 pci_get_dma_tag(device_t bus, device_t dev)
4515 {
4516 	struct pci_softc *sc = device_get_softc(bus);
4517 
4518 	return (sc->sc_dma_tag);
4519 }
4520 
4521 uint32_t
4522 pci_read_config_method(device_t dev, device_t child, int reg, int width)
4523 {
4524 	struct pci_devinfo *dinfo = device_get_ivars(child);
4525 	pcicfgregs *cfg = &dinfo->cfg;
4526 
4527 	return (PCIB_READ_CONFIG(device_get_parent(dev),
4528 	    cfg->bus, cfg->slot, cfg->func, reg, width));
4529 }
4530 
4531 void
4532 pci_write_config_method(device_t dev, device_t child, int reg,
4533     uint32_t val, int width)
4534 {
4535 	struct pci_devinfo *dinfo = device_get_ivars(child);
4536 	pcicfgregs *cfg = &dinfo->cfg;
4537 
4538 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4539 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4540 }
4541 
4542 int
4543 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4544     size_t buflen)
4545 {
4546 
4547 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4548 	    pci_get_function(child));
4549 	return (0);
4550 }
4551 
4552 int
4553 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4554     size_t buflen)
4555 {
4556 	struct pci_devinfo *dinfo;
4557 	pcicfgregs *cfg;
4558 
4559 	dinfo = device_get_ivars(child);
4560 	cfg = &dinfo->cfg;
4561 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4562 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4563 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4564 	    cfg->progif);
4565 	return (0);
4566 }
4567 
4568 int
4569 pci_assign_interrupt_method(device_t dev, device_t child)
4570 {
4571 	struct pci_devinfo *dinfo = device_get_ivars(child);
4572 	pcicfgregs *cfg = &dinfo->cfg;
4573 
4574 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4575 	    cfg->intpin));
4576 }
4577 
4578 static int
4579 pci_modevent(module_t mod, int what, void *arg)
4580 {
4581 	static struct cdev *pci_cdev;
4582 
4583 	switch (what) {
4584 	case MOD_LOAD:
4585 		STAILQ_INIT(&pci_devq);
4586 		pci_generation = 0;
4587 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4588 		    "pci");
4589 		pci_load_vendor_data();
4590 		break;
4591 
4592 	case MOD_UNLOAD:
4593 		destroy_dev(pci_cdev);
4594 		break;
4595 	}
4596 
4597 	return (0);
4598 }
4599 
4600 static void
4601 pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
4602 {
4603 #define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
4604 	struct pcicfg_pcie *cfg;
4605 	int version, pos;
4606 
4607 	cfg = &dinfo->cfg.pcie;
4608 	pos = cfg->pcie_location;
4609 
4610 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4611 
4612 	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
4613 
4614 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4615 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4616 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4617 		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
4618 
4619 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4620 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
4621 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
4622 		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
4623 
4624 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4625 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
4626 		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
4627 
4628 	if (version > 1) {
4629 		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
4630 		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
4631 		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
4632 	}
4633 #undef WREG
4634 }
4635 
4636 static void
4637 pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
4638 {
4639 	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
4640 	    dinfo->cfg.pcix.pcix_command,  2);
4641 }
4642 
4643 void
4644 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4645 {
4646 
4647 	/*
4648 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4649 	 * which we know need special treatment.  Type 2 devices are
4650 	 * cardbus bridges which also require special treatment.
4651 	 * Other types are unknown, and we err on the side of safety
4652 	 * by ignoring them.
4653 	 */
4654 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4655 		return;
4656 
4657 	/*
4658 	 * Restore the device to full power mode.  We must do this
4659 	 * before we restore the registers because moving from D3 to
4660 	 * D0 will cause the chip's BARs and some other registers to
4661 	 * be reset to some unknown power on reset values.  Cut down
4662 	 * the noise on boot by doing nothing if we are already in
4663 	 * state D0.
4664 	 */
4665 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4666 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4667 	pci_restore_bars(dev);
4668 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4669 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4670 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4671 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4672 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4673 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4674 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4675 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4676 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4677 
4678 	/*
4679 	 * Restore extended capabilities for PCI-Express and PCI-X
4680 	 */
4681 	if (dinfo->cfg.pcie.pcie_location != 0)
4682 		pci_cfg_restore_pcie(dev, dinfo);
4683 	if (dinfo->cfg.pcix.pcix_location != 0)
4684 		pci_cfg_restore_pcix(dev, dinfo);
4685 
4686 	/* Restore MSI and MSI-X configurations if they are present. */
4687 	if (dinfo->cfg.msi.msi_location != 0)
4688 		pci_resume_msi(dev);
4689 	if (dinfo->cfg.msix.msix_location != 0)
4690 		pci_resume_msix(dev);
4691 }
4692 
4693 static void
4694 pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
4695 {
4696 #define	RREG(n)	pci_read_config(dev, pos + (n), 2)
4697 	struct pcicfg_pcie *cfg;
4698 	int version, pos;
4699 
4700 	cfg = &dinfo->cfg.pcie;
4701 	pos = cfg->pcie_location;
4702 
4703 	cfg->pcie_flags = RREG(PCIER_FLAGS);
4704 
4705 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4706 
4707 	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
4708 
4709 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4710 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4711 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4712 		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
4713 
4714 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4715 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
4716 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
4717 		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
4718 
4719 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4720 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
4721 		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
4722 
4723 	if (version > 1) {
4724 		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
4725 		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
4726 		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
4727 	}
4728 #undef RREG
4729 }
4730 
4731 static void
4732 pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
4733 {
4734 	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
4735 	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
4736 }
4737 
4738 void
4739 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4740 {
4741 	uint32_t cls;
4742 	int ps;
4743 
4744 	/*
4745 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4746 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4747 	 * which also require special treatment.  Other types are unknown, and
4748 	 * we err on the side of safety by ignoring them.  Powering down
4749 	 * bridges should not be undertaken lightly.
4750 	 */
4751 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4752 		return;
4753 
4754 	/*
4755 	 * Some drivers apparently write to these registers w/o updating our
4756 	 * cached copy.  No harm happens if we update the copy, so do so here
4757 	 * so we can restore them.  The COMMAND register is modified by the
4758 	 * bus w/o updating the cache.  This should represent the normally
4759 	 * writable portion of the 'defined' part of type 0 headers.  In
4760 	 * theory we also need to save/restore the PCI capability structures
4761 	 * we know about, but apart from power we don't know any that are
4762 	 * writable.
4763 	 */
4764 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4765 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4766 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4767 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4768 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4769 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4770 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4771 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4772 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4773 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4774 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4775 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4776 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4777 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4778 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4779 
4780 	if (dinfo->cfg.pcie.pcie_location != 0)
4781 		pci_cfg_save_pcie(dev, dinfo);
4782 
4783 	if (dinfo->cfg.pcix.pcix_location != 0)
4784 		pci_cfg_save_pcix(dev, dinfo);
4785 
4786 	/*
4787 	 * don't set the state for display devices, base peripherals and
4788 	 * memory devices since bad things happen when they are powered down.
4789 	 * We should (a) have drivers that can easily detach and (b) use
4790 	 * generic drivers for these devices so that some device actually
4791 	 * attaches.  We need to make sure that when we implement (a) we don't
4792 	 * power the device down on a reattach.
4793 	 */
4794 	cls = pci_get_class(dev);
4795 	if (!setstate)
4796 		return;
4797 	switch (pci_do_power_nodriver)
4798 	{
4799 		case 0:		/* NO powerdown at all */
4800 			return;
4801 		case 1:		/* Conservative about what to power down */
4802 			if (cls == PCIC_STORAGE)
4803 				return;
4804 			/*FALLTHROUGH*/
4805 		case 2:		/* Agressive about what to power down */
4806 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4807 			    cls == PCIC_BASEPERIPH)
4808 				return;
4809 			/*FALLTHROUGH*/
4810 		case 3:		/* Power down everything */
4811 			break;
4812 	}
4813 	/*
4814 	 * PCI spec says we can only go into D3 state from D0 state.
4815 	 * Transition from D[12] into D0 before going to D3 state.
4816 	 */
4817 	ps = pci_get_powerstate(dev);
4818 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4819 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4820 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4821 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4822 }
4823 
4824 /* Wrapper APIs suitable for device driver use. */
4825 void
4826 pci_save_state(device_t dev)
4827 {
4828 	struct pci_devinfo *dinfo;
4829 
4830 	dinfo = device_get_ivars(dev);
4831 	pci_cfg_save(dev, dinfo, 0);
4832 }
4833 
4834 void
4835 pci_restore_state(device_t dev)
4836 {
4837 	struct pci_devinfo *dinfo;
4838 
4839 	dinfo = device_get_ivars(dev);
4840 	pci_cfg_restore(dev, dinfo);
4841 }
4842