xref: /freebsd/sys/dev/pci/pci.c (revision a18eacbefdfa1085ca3db829e86ece78cd416493)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 
56 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57 #include <machine/intr_machdep.h>
58 #endif
59 
60 #include <sys/pciio.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/pci/pci_private.h>
64 
65 #include <dev/usb/controller/xhcireg.h>
66 #include <dev/usb/controller/ehcireg.h>
67 #include <dev/usb/controller/ohcireg.h>
68 #include <dev/usb/controller/uhcireg.h>
69 
70 #include "pcib_if.h"
71 #include "pci_if.h"
72 
73 #define	PCIR_IS_BIOS(cfg, reg)						\
74 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
75 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
76 
77 static int		pci_has_quirk(uint32_t devid, int quirk);
78 static pci_addr_t	pci_mapbase(uint64_t mapreg);
79 static const char	*pci_maptype(uint64_t mapreg);
80 static int		pci_mapsize(uint64_t testval);
81 static int		pci_maprange(uint64_t mapreg);
82 static pci_addr_t	pci_rombase(uint64_t mapreg);
83 static int		pci_romsize(uint64_t testval);
84 static void		pci_fixancient(pcicfgregs *cfg);
85 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
86 
87 static int		pci_porten(device_t dev);
88 static int		pci_memen(device_t dev);
89 static void		pci_assign_interrupt(device_t bus, device_t dev,
90 			    int force_route);
91 static int		pci_add_map(device_t bus, device_t dev, int reg,
92 			    struct resource_list *rl, int force, int prefetch);
93 static int		pci_probe(device_t dev);
94 static int		pci_attach(device_t dev);
95 static void		pci_load_vendor_data(void);
96 static int		pci_describe_parse_line(char **ptr, int *vendor,
97 			    int *device, char **desc);
98 static char		*pci_describe_device(device_t dev);
99 static int		pci_modevent(module_t mod, int what, void *arg);
100 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
101 			    pcicfgregs *cfg);
102 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
103 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
104 			    int reg, uint32_t *data);
105 #if 0
106 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
107 			    int reg, uint32_t data);
108 #endif
109 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
110 static void		pci_disable_msi(device_t dev);
111 static void		pci_enable_msi(device_t dev, uint64_t address,
112 			    uint16_t data);
113 static void		pci_enable_msix(device_t dev, u_int index,
114 			    uint64_t address, uint32_t data);
115 static void		pci_mask_msix(device_t dev, u_int index);
116 static void		pci_unmask_msix(device_t dev, u_int index);
117 static int		pci_msi_blacklisted(void);
118 static int		pci_msix_blacklisted(void);
119 static void		pci_resume_msi(device_t dev);
120 static void		pci_resume_msix(device_t dev);
121 static int		pci_remap_intr_method(device_t bus, device_t dev,
122 			    u_int irq);
123 
124 static device_method_t pci_methods[] = {
125 	/* Device interface */
126 	DEVMETHOD(device_probe,		pci_probe),
127 	DEVMETHOD(device_attach,	pci_attach),
128 	DEVMETHOD(device_detach,	bus_generic_detach),
129 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
130 	DEVMETHOD(device_suspend,	pci_suspend),
131 	DEVMETHOD(device_resume,	pci_resume),
132 
133 	/* Bus interface */
134 	DEVMETHOD(bus_print_child,	pci_print_child),
135 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
136 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
137 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
138 	DEVMETHOD(bus_driver_added,	pci_driver_added),
139 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
140 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
141 
142 	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
143 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
144 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
145 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
146 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
147 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
148 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
149 	DEVMETHOD(bus_release_resource,	pci_release_resource),
150 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
151 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
152 	DEVMETHOD(bus_child_detached,	pci_child_detached),
153 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
154 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
155 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
156 
157 	/* PCI interface */
158 	DEVMETHOD(pci_read_config,	pci_read_config_method),
159 	DEVMETHOD(pci_write_config,	pci_write_config_method),
160 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
161 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
162 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
163 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
164 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
165 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
166 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
167 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
168 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
169 	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
170 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
171 	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
172 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
173 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
174 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
175 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
176 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
177 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
178 
179 	DEVMETHOD_END
180 };
181 
182 DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
183 
184 static devclass_t pci_devclass;
185 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
186 MODULE_VERSION(pci, 1);
187 
188 static char	*pci_vendordata;
189 static size_t	pci_vendordata_size;
190 
191 struct pci_quirk {
192 	uint32_t devid;	/* Vendor/device of the card */
193 	int	type;
194 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
195 #define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
196 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
197 #define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
198 #define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
199 	int	arg1;
200 	int	arg2;
201 };
202 
203 static const struct pci_quirk pci_quirks[] = {
204 	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
205 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
206 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
207 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
208 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
209 
210 	/*
211 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
212 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
213 	 */
214 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
215 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
216 
217 	/*
218 	 * MSI doesn't work on earlier Intel chipsets including
219 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
220 	 */
221 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
222 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
223 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
224 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
225 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
226 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
227 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
228 
229 	/*
230 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
231 	 * bridge.
232 	 */
233 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
234 
235 	/*
236 	 * MSI-X allocation doesn't work properly for devices passed through
237 	 * by VMware up to at least ESXi 5.1.
238 	 */
239 	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
240 	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
241 
242 	/*
243 	 * Some virtualization environments emulate an older chipset
244 	 * but support MSI just fine.  QEMU uses the Intel 82440.
245 	 */
246 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
247 
248 	/*
249 	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
250 	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
251 	 * It prevents us from attaching hpet(4) when the bit is unset.
252 	 * Note this quirk only affects SB600 revision A13 and earlier.
253 	 * For SB600 A21 and later, firmware must set the bit to hide it.
254 	 * For SB700 and later, it is unused and hardcoded to zero.
255 	 */
256 	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
257 
258 	{ 0 }
259 };
260 
261 /* map register information */
262 #define	PCI_MAPMEM	0x01	/* memory map */
263 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
264 #define	PCI_MAPPORT	0x04	/* port map */
265 
266 struct devlist pci_devq;
267 uint32_t pci_generation;
268 uint32_t pci_numdevs = 0;
269 static int pcie_chipset, pcix_chipset;
270 
271 /* sysctl vars */
272 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
273 
274 static int pci_enable_io_modes = 1;
275 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
276 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
277     &pci_enable_io_modes, 1,
278     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
279 enable these bits correctly.  We'd like to do this all the time, but there\n\
280 are some peripherals that this causes problems with.");
281 
282 static int pci_do_realloc_bars = 0;
283 TUNABLE_INT("hw.pci.realloc_bars", &pci_do_realloc_bars);
284 SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RW,
285     &pci_do_realloc_bars, 0,
286     "Attempt to allocate a new range for any BARs whose original firmware-assigned ranges fail to allocate during the initial device scan.");
287 
288 static int pci_do_power_nodriver = 0;
289 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
290 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
291     &pci_do_power_nodriver, 0,
292   "Place a function into D3 state when no driver attaches to it.  0 means\n\
293 disable.  1 means conservatively place devices into D3 state.  2 means\n\
294 agressively place devices into D3 state.  3 means put absolutely everything\n\
295 in D3 state.");
296 
297 int pci_do_power_resume = 1;
298 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
299 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
300     &pci_do_power_resume, 1,
301   "Transition from D3 -> D0 on resume.");
302 
303 int pci_do_power_suspend = 1;
304 TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
305 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
306     &pci_do_power_suspend, 1,
307   "Transition from D0 -> D3 on suspend.");
308 
309 static int pci_do_msi = 1;
310 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
311 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
312     "Enable support for MSI interrupts");
313 
314 static int pci_do_msix = 1;
315 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
316 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
317     "Enable support for MSI-X interrupts");
318 
319 static int pci_honor_msi_blacklist = 1;
320 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
321 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
322     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
323 
324 #if defined(__i386__) || defined(__amd64__)
325 static int pci_usb_takeover = 1;
326 #else
327 static int pci_usb_takeover = 0;
328 #endif
329 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
330 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
331     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
332 Disable this if you depend on BIOS emulation of USB devices, that is\n\
333 you use USB devices (like keyboard or mouse) but do not load USB drivers");
334 
335 static int
336 pci_has_quirk(uint32_t devid, int quirk)
337 {
338 	const struct pci_quirk *q;
339 
340 	for (q = &pci_quirks[0]; q->devid; q++) {
341 		if (q->devid == devid && q->type == quirk)
342 			return (1);
343 	}
344 	return (0);
345 }
346 
347 /* Find a device_t by bus/slot/function in domain 0 */
348 
349 device_t
350 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
351 {
352 
353 	return (pci_find_dbsf(0, bus, slot, func));
354 }
355 
356 /* Find a device_t by domain/bus/slot/function */
357 
358 device_t
359 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
360 {
361 	struct pci_devinfo *dinfo;
362 
363 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
364 		if ((dinfo->cfg.domain == domain) &&
365 		    (dinfo->cfg.bus == bus) &&
366 		    (dinfo->cfg.slot == slot) &&
367 		    (dinfo->cfg.func == func)) {
368 			return (dinfo->cfg.dev);
369 		}
370 	}
371 
372 	return (NULL);
373 }
374 
375 /* Find a device_t by vendor/device ID */
376 
377 device_t
378 pci_find_device(uint16_t vendor, uint16_t device)
379 {
380 	struct pci_devinfo *dinfo;
381 
382 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
383 		if ((dinfo->cfg.vendor == vendor) &&
384 		    (dinfo->cfg.device == device)) {
385 			return (dinfo->cfg.dev);
386 		}
387 	}
388 
389 	return (NULL);
390 }
391 
392 device_t
393 pci_find_class(uint8_t class, uint8_t subclass)
394 {
395 	struct pci_devinfo *dinfo;
396 
397 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
398 		if (dinfo->cfg.baseclass == class &&
399 		    dinfo->cfg.subclass == subclass) {
400 			return (dinfo->cfg.dev);
401 		}
402 	}
403 
404 	return (NULL);
405 }
406 
407 static int
408 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
409 {
410 	va_list ap;
411 	int retval;
412 
413 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
414 	    cfg->func);
415 	va_start(ap, fmt);
416 	retval += vprintf(fmt, ap);
417 	va_end(ap);
418 	return (retval);
419 }
420 
421 /* return base address of memory or port map */
422 
423 static pci_addr_t
424 pci_mapbase(uint64_t mapreg)
425 {
426 
427 	if (PCI_BAR_MEM(mapreg))
428 		return (mapreg & PCIM_BAR_MEM_BASE);
429 	else
430 		return (mapreg & PCIM_BAR_IO_BASE);
431 }
432 
433 /* return map type of memory or port map */
434 
435 static const char *
436 pci_maptype(uint64_t mapreg)
437 {
438 
439 	if (PCI_BAR_IO(mapreg))
440 		return ("I/O Port");
441 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
442 		return ("Prefetchable Memory");
443 	return ("Memory");
444 }
445 
446 /* return log2 of map size decoded for memory or port map */
447 
448 static int
449 pci_mapsize(uint64_t testval)
450 {
451 	int ln2size;
452 
453 	testval = pci_mapbase(testval);
454 	ln2size = 0;
455 	if (testval != 0) {
456 		while ((testval & 1) == 0)
457 		{
458 			ln2size++;
459 			testval >>= 1;
460 		}
461 	}
462 	return (ln2size);
463 }
464 
465 /* return base address of device ROM */
466 
467 static pci_addr_t
468 pci_rombase(uint64_t mapreg)
469 {
470 
471 	return (mapreg & PCIM_BIOS_ADDR_MASK);
472 }
473 
474 /* return log2 of map size decided for device ROM */
475 
476 static int
477 pci_romsize(uint64_t testval)
478 {
479 	int ln2size;
480 
481 	testval = pci_rombase(testval);
482 	ln2size = 0;
483 	if (testval != 0) {
484 		while ((testval & 1) == 0)
485 		{
486 			ln2size++;
487 			testval >>= 1;
488 		}
489 	}
490 	return (ln2size);
491 }
492 
493 /* return log2 of address range supported by map register */
494 
495 static int
496 pci_maprange(uint64_t mapreg)
497 {
498 	int ln2range = 0;
499 
500 	if (PCI_BAR_IO(mapreg))
501 		ln2range = 32;
502 	else
503 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
504 		case PCIM_BAR_MEM_32:
505 			ln2range = 32;
506 			break;
507 		case PCIM_BAR_MEM_1MB:
508 			ln2range = 20;
509 			break;
510 		case PCIM_BAR_MEM_64:
511 			ln2range = 64;
512 			break;
513 		}
514 	return (ln2range);
515 }
516 
517 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
518 
519 static void
520 pci_fixancient(pcicfgregs *cfg)
521 {
522 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
523 		return;
524 
525 	/* PCI to PCI bridges use header type 1 */
526 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
527 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
528 }
529 
530 /* extract header type specific config data */
531 
532 static void
533 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
534 {
535 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
536 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
537 	case PCIM_HDRTYPE_NORMAL:
538 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
539 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
540 		cfg->nummaps	    = PCI_MAXMAPS_0;
541 		break;
542 	case PCIM_HDRTYPE_BRIDGE:
543 		cfg->nummaps	    = PCI_MAXMAPS_1;
544 		break;
545 	case PCIM_HDRTYPE_CARDBUS:
546 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
547 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
548 		cfg->nummaps	    = PCI_MAXMAPS_2;
549 		break;
550 	}
551 #undef REG
552 }
553 
554 /* read configuration header into pcicfgregs structure */
555 struct pci_devinfo *
556 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
557 {
558 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
559 	pcicfgregs *cfg = NULL;
560 	struct pci_devinfo *devlist_entry;
561 	struct devlist *devlist_head;
562 
563 	devlist_head = &pci_devq;
564 
565 	devlist_entry = NULL;
566 
567 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
568 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
569 		if (devlist_entry == NULL)
570 			return (NULL);
571 
572 		cfg = &devlist_entry->cfg;
573 
574 		cfg->domain		= d;
575 		cfg->bus		= b;
576 		cfg->slot		= s;
577 		cfg->func		= f;
578 		cfg->vendor		= REG(PCIR_VENDOR, 2);
579 		cfg->device		= REG(PCIR_DEVICE, 2);
580 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
581 		cfg->statreg		= REG(PCIR_STATUS, 2);
582 		cfg->baseclass		= REG(PCIR_CLASS, 1);
583 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
584 		cfg->progif		= REG(PCIR_PROGIF, 1);
585 		cfg->revid		= REG(PCIR_REVID, 1);
586 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
587 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
588 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
589 		cfg->intpin		= REG(PCIR_INTPIN, 1);
590 		cfg->intline		= REG(PCIR_INTLINE, 1);
591 
592 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
593 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
594 
595 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
596 		cfg->hdrtype		&= ~PCIM_MFDEV;
597 		STAILQ_INIT(&cfg->maps);
598 
599 		pci_fixancient(cfg);
600 		pci_hdrtypedata(pcib, b, s, f, cfg);
601 
602 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
603 			pci_read_cap(pcib, cfg);
604 
605 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
606 
607 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
608 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
609 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
610 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
611 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
612 
613 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
614 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
615 		devlist_entry->conf.pc_vendor = cfg->vendor;
616 		devlist_entry->conf.pc_device = cfg->device;
617 
618 		devlist_entry->conf.pc_class = cfg->baseclass;
619 		devlist_entry->conf.pc_subclass = cfg->subclass;
620 		devlist_entry->conf.pc_progif = cfg->progif;
621 		devlist_entry->conf.pc_revid = cfg->revid;
622 
623 		pci_numdevs++;
624 		pci_generation++;
625 	}
626 	return (devlist_entry);
627 #undef REG
628 }
629 
630 static void
631 pci_read_cap(device_t pcib, pcicfgregs *cfg)
632 {
633 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
634 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
635 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
636 	uint64_t addr;
637 #endif
638 	uint32_t val;
639 	int	ptr, nextptr, ptrptr;
640 
641 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
642 	case PCIM_HDRTYPE_NORMAL:
643 	case PCIM_HDRTYPE_BRIDGE:
644 		ptrptr = PCIR_CAP_PTR;
645 		break;
646 	case PCIM_HDRTYPE_CARDBUS:
647 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
648 		break;
649 	default:
650 		return;		/* no extended capabilities support */
651 	}
652 	nextptr = REG(ptrptr, 1);	/* sanity check? */
653 
654 	/*
655 	 * Read capability entries.
656 	 */
657 	while (nextptr != 0) {
658 		/* Sanity check */
659 		if (nextptr > 255) {
660 			printf("illegal PCI extended capability offset %d\n",
661 			    nextptr);
662 			return;
663 		}
664 		/* Find the next entry */
665 		ptr = nextptr;
666 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
667 
668 		/* Process this entry */
669 		switch (REG(ptr + PCICAP_ID, 1)) {
670 		case PCIY_PMG:		/* PCI power management */
671 			if (cfg->pp.pp_cap == 0) {
672 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
673 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
674 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
675 				if ((nextptr - ptr) > PCIR_POWER_DATA)
676 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
677 			}
678 			break;
679 		case PCIY_HT:		/* HyperTransport */
680 			/* Determine HT-specific capability type. */
681 			val = REG(ptr + PCIR_HT_COMMAND, 2);
682 
683 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
684 				cfg->ht.ht_slave = ptr;
685 
686 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
687 			switch (val & PCIM_HTCMD_CAP_MASK) {
688 			case PCIM_HTCAP_MSI_MAPPING:
689 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
690 					/* Sanity check the mapping window. */
691 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
692 					    4);
693 					addr <<= 32;
694 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
695 					    4);
696 					if (addr != MSI_INTEL_ADDR_BASE)
697 						device_printf(pcib,
698 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
699 						    cfg->domain, cfg->bus,
700 						    cfg->slot, cfg->func,
701 						    (long long)addr);
702 				} else
703 					addr = MSI_INTEL_ADDR_BASE;
704 
705 				cfg->ht.ht_msimap = ptr;
706 				cfg->ht.ht_msictrl = val;
707 				cfg->ht.ht_msiaddr = addr;
708 				break;
709 			}
710 #endif
711 			break;
712 		case PCIY_MSI:		/* PCI MSI */
713 			cfg->msi.msi_location = ptr;
714 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
715 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
716 						     PCIM_MSICTRL_MMC_MASK)>>1);
717 			break;
718 		case PCIY_MSIX:		/* PCI MSI-X */
719 			cfg->msix.msix_location = ptr;
720 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
721 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
722 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
723 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
724 			cfg->msix.msix_table_bar = PCIR_BAR(val &
725 			    PCIM_MSIX_BIR_MASK);
726 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
727 			val = REG(ptr + PCIR_MSIX_PBA, 4);
728 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
729 			    PCIM_MSIX_BIR_MASK);
730 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
731 			break;
732 		case PCIY_VPD:		/* PCI Vital Product Data */
733 			cfg->vpd.vpd_reg = ptr;
734 			break;
735 		case PCIY_SUBVENDOR:
736 			/* Should always be true. */
737 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
738 			    PCIM_HDRTYPE_BRIDGE) {
739 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
740 				cfg->subvendor = val & 0xffff;
741 				cfg->subdevice = val >> 16;
742 			}
743 			break;
744 		case PCIY_PCIX:		/* PCI-X */
745 			/*
746 			 * Assume we have a PCI-X chipset if we have
747 			 * at least one PCI-PCI bridge with a PCI-X
748 			 * capability.  Note that some systems with
749 			 * PCI-express or HT chipsets might match on
750 			 * this check as well.
751 			 */
752 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
753 			    PCIM_HDRTYPE_BRIDGE)
754 				pcix_chipset = 1;
755 			cfg->pcix.pcix_location = ptr;
756 			break;
757 		case PCIY_EXPRESS:	/* PCI-express */
758 			/*
759 			 * Assume we have a PCI-express chipset if we have
760 			 * at least one PCI-express device.
761 			 */
762 			pcie_chipset = 1;
763 			cfg->pcie.pcie_location = ptr;
764 			val = REG(ptr + PCIER_FLAGS, 2);
765 			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
766 			break;
767 		default:
768 			break;
769 		}
770 	}
771 
772 #if defined(__powerpc__)
773 	/*
774 	 * Enable the MSI mapping window for all HyperTransport
775 	 * slaves.  PCI-PCI bridges have their windows enabled via
776 	 * PCIB_MAP_MSI().
777 	 */
778 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
779 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
780 		device_printf(pcib,
781 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
782 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
783 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
784 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
785 		     2);
786 	}
787 #endif
788 /* REG and WREG use carry through to next functions */
789 }
790 
791 /*
792  * PCI Vital Product Data
793  */
794 
795 #define	PCI_VPD_TIMEOUT		1000000
796 
797 static int
798 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
799 {
800 	int count = PCI_VPD_TIMEOUT;
801 
802 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
803 
804 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
805 
806 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
807 		if (--count < 0)
808 			return (ENXIO);
809 		DELAY(1);	/* limit looping */
810 	}
811 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
812 
813 	return (0);
814 }
815 
816 #if 0
817 static int
818 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
819 {
820 	int count = PCI_VPD_TIMEOUT;
821 
822 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
823 
824 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
825 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
826 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
827 		if (--count < 0)
828 			return (ENXIO);
829 		DELAY(1);	/* limit looping */
830 	}
831 
832 	return (0);
833 }
834 #endif
835 
836 #undef PCI_VPD_TIMEOUT
837 
838 struct vpd_readstate {
839 	device_t	pcib;
840 	pcicfgregs	*cfg;
841 	uint32_t	val;
842 	int		bytesinval;
843 	int		off;
844 	uint8_t		cksum;
845 };
846 
847 static int
848 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
849 {
850 	uint32_t reg;
851 	uint8_t byte;
852 
853 	if (vrs->bytesinval == 0) {
854 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
855 			return (ENXIO);
856 		vrs->val = le32toh(reg);
857 		vrs->off += 4;
858 		byte = vrs->val & 0xff;
859 		vrs->bytesinval = 3;
860 	} else {
861 		vrs->val = vrs->val >> 8;
862 		byte = vrs->val & 0xff;
863 		vrs->bytesinval--;
864 	}
865 
866 	vrs->cksum += byte;
867 	*data = byte;
868 	return (0);
869 }
870 
871 static void
872 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
873 {
874 	struct vpd_readstate vrs;
875 	int state;
876 	int name;
877 	int remain;
878 	int i;
879 	int alloc, off;		/* alloc/off for RO/W arrays */
880 	int cksumvalid;
881 	int dflen;
882 	uint8_t byte;
883 	uint8_t byte2;
884 
885 	/* init vpd reader */
886 	vrs.bytesinval = 0;
887 	vrs.off = 0;
888 	vrs.pcib = pcib;
889 	vrs.cfg = cfg;
890 	vrs.cksum = 0;
891 
892 	state = 0;
893 	name = remain = i = 0;	/* shut up stupid gcc */
894 	alloc = off = 0;	/* shut up stupid gcc */
895 	dflen = 0;		/* shut up stupid gcc */
896 	cksumvalid = -1;
897 	while (state >= 0) {
898 		if (vpd_nextbyte(&vrs, &byte)) {
899 			state = -2;
900 			break;
901 		}
902 #if 0
903 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
904 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
905 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
906 #endif
907 		switch (state) {
908 		case 0:		/* item name */
909 			if (byte & 0x80) {
910 				if (vpd_nextbyte(&vrs, &byte2)) {
911 					state = -2;
912 					break;
913 				}
914 				remain = byte2;
915 				if (vpd_nextbyte(&vrs, &byte2)) {
916 					state = -2;
917 					break;
918 				}
919 				remain |= byte2 << 8;
920 				if (remain > (0x7f*4 - vrs.off)) {
921 					state = -1;
922 					pci_printf(cfg,
923 					    "invalid VPD data, remain %#x\n",
924 					    remain);
925 				}
926 				name = byte & 0x7f;
927 			} else {
928 				remain = byte & 0x7;
929 				name = (byte >> 3) & 0xf;
930 			}
931 			switch (name) {
932 			case 0x2:	/* String */
933 				cfg->vpd.vpd_ident = malloc(remain + 1,
934 				    M_DEVBUF, M_WAITOK);
935 				i = 0;
936 				state = 1;
937 				break;
938 			case 0xf:	/* End */
939 				state = -1;
940 				break;
941 			case 0x10:	/* VPD-R */
942 				alloc = 8;
943 				off = 0;
944 				cfg->vpd.vpd_ros = malloc(alloc *
945 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
946 				    M_WAITOK | M_ZERO);
947 				state = 2;
948 				break;
949 			case 0x11:	/* VPD-W */
950 				alloc = 8;
951 				off = 0;
952 				cfg->vpd.vpd_w = malloc(alloc *
953 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
954 				    M_WAITOK | M_ZERO);
955 				state = 5;
956 				break;
957 			default:	/* Invalid data, abort */
958 				state = -1;
959 				break;
960 			}
961 			break;
962 
963 		case 1:	/* Identifier String */
964 			cfg->vpd.vpd_ident[i++] = byte;
965 			remain--;
966 			if (remain == 0)  {
967 				cfg->vpd.vpd_ident[i] = '\0';
968 				state = 0;
969 			}
970 			break;
971 
972 		case 2:	/* VPD-R Keyword Header */
973 			if (off == alloc) {
974 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
975 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
976 				    M_DEVBUF, M_WAITOK | M_ZERO);
977 			}
978 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
979 			if (vpd_nextbyte(&vrs, &byte2)) {
980 				state = -2;
981 				break;
982 			}
983 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
984 			if (vpd_nextbyte(&vrs, &byte2)) {
985 				state = -2;
986 				break;
987 			}
988 			dflen = byte2;
989 			if (dflen == 0 &&
990 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
991 			    2) == 0) {
992 				/*
993 				 * if this happens, we can't trust the rest
994 				 * of the VPD.
995 				 */
996 				pci_printf(cfg, "bad keyword length: %d\n",
997 				    dflen);
998 				cksumvalid = 0;
999 				state = -1;
1000 				break;
1001 			} else if (dflen == 0) {
1002 				cfg->vpd.vpd_ros[off].value = malloc(1 *
1003 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1004 				    M_DEVBUF, M_WAITOK);
1005 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1006 			} else
1007 				cfg->vpd.vpd_ros[off].value = malloc(
1008 				    (dflen + 1) *
1009 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1010 				    M_DEVBUF, M_WAITOK);
1011 			remain -= 3;
1012 			i = 0;
1013 			/* keep in sync w/ state 3's transistions */
1014 			if (dflen == 0 && remain == 0)
1015 				state = 0;
1016 			else if (dflen == 0)
1017 				state = 2;
1018 			else
1019 				state = 3;
1020 			break;
1021 
1022 		case 3:	/* VPD-R Keyword Value */
1023 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1024 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1025 			    "RV", 2) == 0 && cksumvalid == -1) {
1026 				if (vrs.cksum == 0)
1027 					cksumvalid = 1;
1028 				else {
1029 					if (bootverbose)
1030 						pci_printf(cfg,
1031 					    "bad VPD cksum, remain %hhu\n",
1032 						    vrs.cksum);
1033 					cksumvalid = 0;
1034 					state = -1;
1035 					break;
1036 				}
1037 			}
1038 			dflen--;
1039 			remain--;
1040 			/* keep in sync w/ state 2's transistions */
1041 			if (dflen == 0)
1042 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1043 			if (dflen == 0 && remain == 0) {
1044 				cfg->vpd.vpd_rocnt = off;
1045 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1046 				    off * sizeof(*cfg->vpd.vpd_ros),
1047 				    M_DEVBUF, M_WAITOK | M_ZERO);
1048 				state = 0;
1049 			} else if (dflen == 0)
1050 				state = 2;
1051 			break;
1052 
1053 		case 4:
1054 			remain--;
1055 			if (remain == 0)
1056 				state = 0;
1057 			break;
1058 
1059 		case 5:	/* VPD-W Keyword Header */
1060 			if (off == alloc) {
1061 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1062 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1063 				    M_DEVBUF, M_WAITOK | M_ZERO);
1064 			}
1065 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1066 			if (vpd_nextbyte(&vrs, &byte2)) {
1067 				state = -2;
1068 				break;
1069 			}
1070 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1071 			if (vpd_nextbyte(&vrs, &byte2)) {
1072 				state = -2;
1073 				break;
1074 			}
1075 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1076 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1077 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1078 			    sizeof(*cfg->vpd.vpd_w[off].value),
1079 			    M_DEVBUF, M_WAITOK);
1080 			remain -= 3;
1081 			i = 0;
1082 			/* keep in sync w/ state 6's transistions */
1083 			if (dflen == 0 && remain == 0)
1084 				state = 0;
1085 			else if (dflen == 0)
1086 				state = 5;
1087 			else
1088 				state = 6;
1089 			break;
1090 
1091 		case 6:	/* VPD-W Keyword Value */
1092 			cfg->vpd.vpd_w[off].value[i++] = byte;
1093 			dflen--;
1094 			remain--;
1095 			/* keep in sync w/ state 5's transistions */
1096 			if (dflen == 0)
1097 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1098 			if (dflen == 0 && remain == 0) {
1099 				cfg->vpd.vpd_wcnt = off;
1100 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1101 				    off * sizeof(*cfg->vpd.vpd_w),
1102 				    M_DEVBUF, M_WAITOK | M_ZERO);
1103 				state = 0;
1104 			} else if (dflen == 0)
1105 				state = 5;
1106 			break;
1107 
1108 		default:
1109 			pci_printf(cfg, "invalid state: %d\n", state);
1110 			state = -1;
1111 			break;
1112 		}
1113 	}
1114 
1115 	if (cksumvalid == 0 || state < -1) {
1116 		/* read-only data bad, clean up */
1117 		if (cfg->vpd.vpd_ros != NULL) {
1118 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1119 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1120 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1121 			cfg->vpd.vpd_ros = NULL;
1122 		}
1123 	}
1124 	if (state < -1) {
1125 		/* I/O error, clean up */
1126 		pci_printf(cfg, "failed to read VPD data.\n");
1127 		if (cfg->vpd.vpd_ident != NULL) {
1128 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1129 			cfg->vpd.vpd_ident = NULL;
1130 		}
1131 		if (cfg->vpd.vpd_w != NULL) {
1132 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1133 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1134 			free(cfg->vpd.vpd_w, M_DEVBUF);
1135 			cfg->vpd.vpd_w = NULL;
1136 		}
1137 	}
1138 	cfg->vpd.vpd_cached = 1;
1139 #undef REG
1140 #undef WREG
1141 }
1142 
1143 int
1144 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1145 {
1146 	struct pci_devinfo *dinfo = device_get_ivars(child);
1147 	pcicfgregs *cfg = &dinfo->cfg;
1148 
1149 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1150 		pci_read_vpd(device_get_parent(dev), cfg);
1151 
1152 	*identptr = cfg->vpd.vpd_ident;
1153 
1154 	if (*identptr == NULL)
1155 		return (ENXIO);
1156 
1157 	return (0);
1158 }
1159 
1160 int
1161 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1162 	const char **vptr)
1163 {
1164 	struct pci_devinfo *dinfo = device_get_ivars(child);
1165 	pcicfgregs *cfg = &dinfo->cfg;
1166 	int i;
1167 
1168 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1169 		pci_read_vpd(device_get_parent(dev), cfg);
1170 
1171 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1172 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1173 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1174 			*vptr = cfg->vpd.vpd_ros[i].value;
1175 			return (0);
1176 		}
1177 
1178 	*vptr = NULL;
1179 	return (ENXIO);
1180 }
1181 
1182 /*
1183  * Find the requested HyperTransport capability and return the offset
1184  * in configuration space via the pointer provided.  The function
1185  * returns 0 on success and an error code otherwise.
1186  */
1187 int
1188 pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1189 {
1190 	int ptr, error;
1191 	uint16_t val;
1192 
1193 	error = pci_find_cap(child, PCIY_HT, &ptr);
1194 	if (error)
1195 		return (error);
1196 
1197 	/*
1198 	 * Traverse the capabilities list checking each HT capability
1199 	 * to see if it matches the requested HT capability.
1200 	 */
1201 	while (ptr != 0) {
1202 		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1203 		if (capability == PCIM_HTCAP_SLAVE ||
1204 		    capability == PCIM_HTCAP_HOST)
1205 			val &= 0xe000;
1206 		else
1207 			val &= PCIM_HTCMD_CAP_MASK;
1208 		if (val == capability) {
1209 			if (capreg != NULL)
1210 				*capreg = ptr;
1211 			return (0);
1212 		}
1213 
1214 		/* Skip to the next HT capability. */
1215 		while (ptr != 0) {
1216 			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1217 			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1218 			    PCIY_HT)
1219 				break;
1220 		}
1221 	}
1222 	return (ENOENT);
1223 }
1224 
1225 /*
1226  * Find the requested capability and return the offset in
1227  * configuration space via the pointer provided.  The function returns
1228  * 0 on success and an error code otherwise.
1229  */
1230 int
1231 pci_find_cap_method(device_t dev, device_t child, int capability,
1232     int *capreg)
1233 {
1234 	struct pci_devinfo *dinfo = device_get_ivars(child);
1235 	pcicfgregs *cfg = &dinfo->cfg;
1236 	u_int32_t status;
1237 	u_int8_t ptr;
1238 
1239 	/*
1240 	 * Check the CAP_LIST bit of the PCI status register first.
1241 	 */
1242 	status = pci_read_config(child, PCIR_STATUS, 2);
1243 	if (!(status & PCIM_STATUS_CAPPRESENT))
1244 		return (ENXIO);
1245 
1246 	/*
1247 	 * Determine the start pointer of the capabilities list.
1248 	 */
1249 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1250 	case PCIM_HDRTYPE_NORMAL:
1251 	case PCIM_HDRTYPE_BRIDGE:
1252 		ptr = PCIR_CAP_PTR;
1253 		break;
1254 	case PCIM_HDRTYPE_CARDBUS:
1255 		ptr = PCIR_CAP_PTR_2;
1256 		break;
1257 	default:
1258 		/* XXX: panic? */
1259 		return (ENXIO);		/* no extended capabilities support */
1260 	}
1261 	ptr = pci_read_config(child, ptr, 1);
1262 
1263 	/*
1264 	 * Traverse the capabilities list.
1265 	 */
1266 	while (ptr != 0) {
1267 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1268 			if (capreg != NULL)
1269 				*capreg = ptr;
1270 			return (0);
1271 		}
1272 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1273 	}
1274 
1275 	return (ENOENT);
1276 }
1277 
1278 /*
1279  * Find the requested extended capability and return the offset in
1280  * configuration space via the pointer provided.  The function returns
1281  * 0 on success and an error code otherwise.
1282  */
1283 int
1284 pci_find_extcap_method(device_t dev, device_t child, int capability,
1285     int *capreg)
1286 {
1287 	struct pci_devinfo *dinfo = device_get_ivars(child);
1288 	pcicfgregs *cfg = &dinfo->cfg;
1289 	uint32_t ecap;
1290 	uint16_t ptr;
1291 
1292 	/* Only supported for PCI-express devices. */
1293 	if (cfg->pcie.pcie_location == 0)
1294 		return (ENXIO);
1295 
1296 	ptr = PCIR_EXTCAP;
1297 	ecap = pci_read_config(child, ptr, 4);
1298 	if (ecap == 0xffffffff || ecap == 0)
1299 		return (ENOENT);
1300 	for (;;) {
1301 		if (PCI_EXTCAP_ID(ecap) == capability) {
1302 			if (capreg != NULL)
1303 				*capreg = ptr;
1304 			return (0);
1305 		}
1306 		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1307 		if (ptr == 0)
1308 			break;
1309 		ecap = pci_read_config(child, ptr, 4);
1310 	}
1311 
1312 	return (ENOENT);
1313 }
1314 
1315 /*
1316  * Support for MSI-X message interrupts.
1317  */
1318 void
1319 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1320 {
1321 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1322 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1323 	uint32_t offset;
1324 
1325 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1326 	offset = msix->msix_table_offset + index * 16;
1327 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1328 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1329 	bus_write_4(msix->msix_table_res, offset + 8, data);
1330 
1331 	/* Enable MSI -> HT mapping. */
1332 	pci_ht_map_msi(dev, address);
1333 }
1334 
1335 void
1336 pci_mask_msix(device_t dev, u_int index)
1337 {
1338 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1339 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1340 	uint32_t offset, val;
1341 
1342 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1343 	offset = msix->msix_table_offset + index * 16 + 12;
1344 	val = bus_read_4(msix->msix_table_res, offset);
1345 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1346 		val |= PCIM_MSIX_VCTRL_MASK;
1347 		bus_write_4(msix->msix_table_res, offset, val);
1348 	}
1349 }
1350 
1351 void
1352 pci_unmask_msix(device_t dev, u_int index)
1353 {
1354 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1355 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1356 	uint32_t offset, val;
1357 
1358 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1359 	offset = msix->msix_table_offset + index * 16 + 12;
1360 	val = bus_read_4(msix->msix_table_res, offset);
1361 	if (val & PCIM_MSIX_VCTRL_MASK) {
1362 		val &= ~PCIM_MSIX_VCTRL_MASK;
1363 		bus_write_4(msix->msix_table_res, offset, val);
1364 	}
1365 }
1366 
1367 int
1368 pci_pending_msix(device_t dev, u_int index)
1369 {
1370 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1371 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1372 	uint32_t offset, bit;
1373 
1374 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1375 	offset = msix->msix_pba_offset + (index / 32) * 4;
1376 	bit = 1 << index % 32;
1377 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1378 }
1379 
1380 /*
1381  * Restore MSI-X registers and table during resume.  If MSI-X is
1382  * enabled then walk the virtual table to restore the actual MSI-X
1383  * table.
1384  */
1385 static void
1386 pci_resume_msix(device_t dev)
1387 {
1388 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1389 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1390 	struct msix_table_entry *mte;
1391 	struct msix_vector *mv;
1392 	int i;
1393 
1394 	if (msix->msix_alloc > 0) {
1395 		/* First, mask all vectors. */
1396 		for (i = 0; i < msix->msix_msgnum; i++)
1397 			pci_mask_msix(dev, i);
1398 
1399 		/* Second, program any messages with at least one handler. */
1400 		for (i = 0; i < msix->msix_table_len; i++) {
1401 			mte = &msix->msix_table[i];
1402 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1403 				continue;
1404 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1405 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1406 			pci_unmask_msix(dev, i);
1407 		}
1408 	}
1409 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1410 	    msix->msix_ctrl, 2);
1411 }
1412 
1413 /*
1414  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1415  * returned in *count.  After this function returns, each message will be
1416  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1417  */
1418 int
1419 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1420 {
1421 	struct pci_devinfo *dinfo = device_get_ivars(child);
1422 	pcicfgregs *cfg = &dinfo->cfg;
1423 	struct resource_list_entry *rle;
1424 	int actual, error, i, irq, max;
1425 
1426 	/* Don't let count == 0 get us into trouble. */
1427 	if (*count == 0)
1428 		return (EINVAL);
1429 
1430 	/* If rid 0 is allocated, then fail. */
1431 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1432 	if (rle != NULL && rle->res != NULL)
1433 		return (ENXIO);
1434 
1435 	/* Already have allocated messages? */
1436 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1437 		return (ENXIO);
1438 
1439 	/* If MSI-X is blacklisted for this system, fail. */
1440 	if (pci_msix_blacklisted())
1441 		return (ENXIO);
1442 
1443 	/* MSI-X capability present? */
1444 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1445 		return (ENODEV);
1446 
1447 	/* Make sure the appropriate BARs are mapped. */
1448 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1449 	    cfg->msix.msix_table_bar);
1450 	if (rle == NULL || rle->res == NULL ||
1451 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1452 		return (ENXIO);
1453 	cfg->msix.msix_table_res = rle->res;
1454 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1455 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1456 		    cfg->msix.msix_pba_bar);
1457 		if (rle == NULL || rle->res == NULL ||
1458 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1459 			return (ENXIO);
1460 	}
1461 	cfg->msix.msix_pba_res = rle->res;
1462 
1463 	if (bootverbose)
1464 		device_printf(child,
1465 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1466 		    *count, cfg->msix.msix_msgnum);
1467 	max = min(*count, cfg->msix.msix_msgnum);
1468 	for (i = 0; i < max; i++) {
1469 		/* Allocate a message. */
1470 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1471 		if (error) {
1472 			if (i == 0)
1473 				return (error);
1474 			break;
1475 		}
1476 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1477 		    irq, 1);
1478 	}
1479 	actual = i;
1480 
1481 	if (bootverbose) {
1482 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1483 		if (actual == 1)
1484 			device_printf(child, "using IRQ %lu for MSI-X\n",
1485 			    rle->start);
1486 		else {
1487 			int run;
1488 
1489 			/*
1490 			 * Be fancy and try to print contiguous runs of
1491 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1492 			 * 'run' is true if we are in a range.
1493 			 */
1494 			device_printf(child, "using IRQs %lu", rle->start);
1495 			irq = rle->start;
1496 			run = 0;
1497 			for (i = 1; i < actual; i++) {
1498 				rle = resource_list_find(&dinfo->resources,
1499 				    SYS_RES_IRQ, i + 1);
1500 
1501 				/* Still in a run? */
1502 				if (rle->start == irq + 1) {
1503 					run = 1;
1504 					irq++;
1505 					continue;
1506 				}
1507 
1508 				/* Finish previous range. */
1509 				if (run) {
1510 					printf("-%d", irq);
1511 					run = 0;
1512 				}
1513 
1514 				/* Start new range. */
1515 				printf(",%lu", rle->start);
1516 				irq = rle->start;
1517 			}
1518 
1519 			/* Unfinished range? */
1520 			if (run)
1521 				printf("-%d", irq);
1522 			printf(" for MSI-X\n");
1523 		}
1524 	}
1525 
1526 	/* Mask all vectors. */
1527 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1528 		pci_mask_msix(child, i);
1529 
1530 	/* Allocate and initialize vector data and virtual table. */
1531 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1532 	    M_DEVBUF, M_WAITOK | M_ZERO);
1533 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1534 	    M_DEVBUF, M_WAITOK | M_ZERO);
1535 	for (i = 0; i < actual; i++) {
1536 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1537 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1538 		cfg->msix.msix_table[i].mte_vector = i + 1;
1539 	}
1540 
1541 	/* Update control register to enable MSI-X. */
1542 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1543 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1544 	    cfg->msix.msix_ctrl, 2);
1545 
1546 	/* Update counts of alloc'd messages. */
1547 	cfg->msix.msix_alloc = actual;
1548 	cfg->msix.msix_table_len = actual;
1549 	*count = actual;
1550 	return (0);
1551 }
1552 
1553 /*
1554  * By default, pci_alloc_msix() will assign the allocated IRQ
1555  * resources consecutively to the first N messages in the MSI-X table.
1556  * However, device drivers may want to use different layouts if they
1557  * either receive fewer messages than they asked for, or they wish to
1558  * populate the MSI-X table sparsely.  This method allows the driver
1559  * to specify what layout it wants.  It must be called after a
1560  * successful pci_alloc_msix() but before any of the associated
1561  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1562  *
1563  * The 'vectors' array contains 'count' message vectors.  The array
1564  * maps directly to the MSI-X table in that index 0 in the array
1565  * specifies the vector for the first message in the MSI-X table, etc.
1566  * The vector value in each array index can either be 0 to indicate
1567  * that no vector should be assigned to a message slot, or it can be a
1568  * number from 1 to N (where N is the count returned from a
1569  * succcessful call to pci_alloc_msix()) to indicate which message
1570  * vector (IRQ) to be used for the corresponding message.
1571  *
1572  * On successful return, each message with a non-zero vector will have
1573  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1574  * 1.  Additionally, if any of the IRQs allocated via the previous
1575  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1576  * will be freed back to the system automatically.
1577  *
1578  * For example, suppose a driver has a MSI-X table with 6 messages and
1579  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1580  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1581  * C.  After the call to pci_alloc_msix(), the device will be setup to
1582  * have an MSI-X table of ABC--- (where - means no vector assigned).
1583  * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1584  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1585  * be freed back to the system.  This device will also have valid
1586  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1587  *
1588  * In any case, the SYS_RES_IRQ rid X will always map to the message
1589  * at MSI-X table index X - 1 and will only be valid if a vector is
1590  * assigned to that table entry.
1591  */
1592 int
1593 pci_remap_msix_method(device_t dev, device_t child, int count,
1594     const u_int *vectors)
1595 {
1596 	struct pci_devinfo *dinfo = device_get_ivars(child);
1597 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1598 	struct resource_list_entry *rle;
1599 	int i, irq, j, *used;
1600 
1601 	/*
1602 	 * Have to have at least one message in the table but the
1603 	 * table can't be bigger than the actual MSI-X table in the
1604 	 * device.
1605 	 */
1606 	if (count == 0 || count > msix->msix_msgnum)
1607 		return (EINVAL);
1608 
1609 	/* Sanity check the vectors. */
1610 	for (i = 0; i < count; i++)
1611 		if (vectors[i] > msix->msix_alloc)
1612 			return (EINVAL);
1613 
1614 	/*
1615 	 * Make sure there aren't any holes in the vectors to be used.
1616 	 * It's a big pain to support it, and it doesn't really make
1617 	 * sense anyway.  Also, at least one vector must be used.
1618 	 */
1619 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1620 	    M_ZERO);
1621 	for (i = 0; i < count; i++)
1622 		if (vectors[i] != 0)
1623 			used[vectors[i] - 1] = 1;
1624 	for (i = 0; i < msix->msix_alloc - 1; i++)
1625 		if (used[i] == 0 && used[i + 1] == 1) {
1626 			free(used, M_DEVBUF);
1627 			return (EINVAL);
1628 		}
1629 	if (used[0] != 1) {
1630 		free(used, M_DEVBUF);
1631 		return (EINVAL);
1632 	}
1633 
1634 	/* Make sure none of the resources are allocated. */
1635 	for (i = 0; i < msix->msix_table_len; i++) {
1636 		if (msix->msix_table[i].mte_vector == 0)
1637 			continue;
1638 		if (msix->msix_table[i].mte_handlers > 0)
1639 			return (EBUSY);
1640 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1641 		KASSERT(rle != NULL, ("missing resource"));
1642 		if (rle->res != NULL)
1643 			return (EBUSY);
1644 	}
1645 
1646 	/* Free the existing resource list entries. */
1647 	for (i = 0; i < msix->msix_table_len; i++) {
1648 		if (msix->msix_table[i].mte_vector == 0)
1649 			continue;
1650 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1651 	}
1652 
1653 	/*
1654 	 * Build the new virtual table keeping track of which vectors are
1655 	 * used.
1656 	 */
1657 	free(msix->msix_table, M_DEVBUF);
1658 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1659 	    M_DEVBUF, M_WAITOK | M_ZERO);
1660 	for (i = 0; i < count; i++)
1661 		msix->msix_table[i].mte_vector = vectors[i];
1662 	msix->msix_table_len = count;
1663 
1664 	/* Free any unused IRQs and resize the vectors array if necessary. */
1665 	j = msix->msix_alloc - 1;
1666 	if (used[j] == 0) {
1667 		struct msix_vector *vec;
1668 
1669 		while (used[j] == 0) {
1670 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1671 			    msix->msix_vectors[j].mv_irq);
1672 			j--;
1673 		}
1674 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1675 		    M_WAITOK);
1676 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1677 		    (j + 1));
1678 		free(msix->msix_vectors, M_DEVBUF);
1679 		msix->msix_vectors = vec;
1680 		msix->msix_alloc = j + 1;
1681 	}
1682 	free(used, M_DEVBUF);
1683 
1684 	/* Map the IRQs onto the rids. */
1685 	for (i = 0; i < count; i++) {
1686 		if (vectors[i] == 0)
1687 			continue;
1688 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1689 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1690 		    irq, 1);
1691 	}
1692 
1693 	if (bootverbose) {
1694 		device_printf(child, "Remapped MSI-X IRQs as: ");
1695 		for (i = 0; i < count; i++) {
1696 			if (i != 0)
1697 				printf(", ");
1698 			if (vectors[i] == 0)
1699 				printf("---");
1700 			else
1701 				printf("%d",
1702 				    msix->msix_vectors[vectors[i]].mv_irq);
1703 		}
1704 		printf("\n");
1705 	}
1706 
1707 	return (0);
1708 }
1709 
1710 static int
1711 pci_release_msix(device_t dev, device_t child)
1712 {
1713 	struct pci_devinfo *dinfo = device_get_ivars(child);
1714 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1715 	struct resource_list_entry *rle;
1716 	int i;
1717 
1718 	/* Do we have any messages to release? */
1719 	if (msix->msix_alloc == 0)
1720 		return (ENODEV);
1721 
1722 	/* Make sure none of the resources are allocated. */
1723 	for (i = 0; i < msix->msix_table_len; i++) {
1724 		if (msix->msix_table[i].mte_vector == 0)
1725 			continue;
1726 		if (msix->msix_table[i].mte_handlers > 0)
1727 			return (EBUSY);
1728 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1729 		KASSERT(rle != NULL, ("missing resource"));
1730 		if (rle->res != NULL)
1731 			return (EBUSY);
1732 	}
1733 
1734 	/* Update control register to disable MSI-X. */
1735 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1736 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1737 	    msix->msix_ctrl, 2);
1738 
1739 	/* Free the resource list entries. */
1740 	for (i = 0; i < msix->msix_table_len; i++) {
1741 		if (msix->msix_table[i].mte_vector == 0)
1742 			continue;
1743 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1744 	}
1745 	free(msix->msix_table, M_DEVBUF);
1746 	msix->msix_table_len = 0;
1747 
1748 	/* Release the IRQs. */
1749 	for (i = 0; i < msix->msix_alloc; i++)
1750 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1751 		    msix->msix_vectors[i].mv_irq);
1752 	free(msix->msix_vectors, M_DEVBUF);
1753 	msix->msix_alloc = 0;
1754 	return (0);
1755 }
1756 
1757 /*
1758  * Return the max supported MSI-X messages this device supports.
1759  * Basically, assuming the MD code can alloc messages, this function
1760  * should return the maximum value that pci_alloc_msix() can return.
1761  * Thus, it is subject to the tunables, etc.
1762  */
1763 int
1764 pci_msix_count_method(device_t dev, device_t child)
1765 {
1766 	struct pci_devinfo *dinfo = device_get_ivars(child);
1767 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1768 
1769 	if (pci_do_msix && msix->msix_location != 0)
1770 		return (msix->msix_msgnum);
1771 	return (0);
1772 }
1773 
1774 /*
1775  * HyperTransport MSI mapping control
1776  */
1777 void
1778 pci_ht_map_msi(device_t dev, uint64_t addr)
1779 {
1780 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1781 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1782 
1783 	if (!ht->ht_msimap)
1784 		return;
1785 
1786 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1787 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1788 		/* Enable MSI -> HT mapping. */
1789 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1790 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1791 		    ht->ht_msictrl, 2);
1792 	}
1793 
1794 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1795 		/* Disable MSI -> HT mapping. */
1796 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1797 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1798 		    ht->ht_msictrl, 2);
1799 	}
1800 }
1801 
1802 int
1803 pci_get_max_read_req(device_t dev)
1804 {
1805 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1806 	int cap;
1807 	uint16_t val;
1808 
1809 	cap = dinfo->cfg.pcie.pcie_location;
1810 	if (cap == 0)
1811 		return (0);
1812 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1813 	val &= PCIEM_CTL_MAX_READ_REQUEST;
1814 	val >>= 12;
1815 	return (1 << (val + 7));
1816 }
1817 
1818 int
1819 pci_set_max_read_req(device_t dev, int size)
1820 {
1821 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1822 	int cap;
1823 	uint16_t val;
1824 
1825 	cap = dinfo->cfg.pcie.pcie_location;
1826 	if (cap == 0)
1827 		return (0);
1828 	if (size < 128)
1829 		size = 128;
1830 	if (size > 4096)
1831 		size = 4096;
1832 	size = (1 << (fls(size) - 1));
1833 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1834 	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
1835 	val |= (fls(size) - 8) << 12;
1836 	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
1837 	return (size);
1838 }
1839 
1840 /*
1841  * Support for MSI message signalled interrupts.
1842  */
1843 void
1844 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1845 {
1846 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1847 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1848 
1849 	/* Write data and address values. */
1850 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1851 	    address & 0xffffffff, 4);
1852 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1853 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1854 		    address >> 32, 4);
1855 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1856 		    data, 2);
1857 	} else
1858 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1859 		    2);
1860 
1861 	/* Enable MSI in the control register. */
1862 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1863 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1864 	    2);
1865 
1866 	/* Enable MSI -> HT mapping. */
1867 	pci_ht_map_msi(dev, address);
1868 }
1869 
1870 void
1871 pci_disable_msi(device_t dev)
1872 {
1873 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1874 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1875 
1876 	/* Disable MSI -> HT mapping. */
1877 	pci_ht_map_msi(dev, 0);
1878 
1879 	/* Disable MSI in the control register. */
1880 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1881 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1882 	    2);
1883 }
1884 
1885 /*
1886  * Restore MSI registers during resume.  If MSI is enabled then
1887  * restore the data and address registers in addition to the control
1888  * register.
1889  */
1890 static void
1891 pci_resume_msi(device_t dev)
1892 {
1893 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1894 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1895 	uint64_t address;
1896 	uint16_t data;
1897 
1898 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1899 		address = msi->msi_addr;
1900 		data = msi->msi_data;
1901 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1902 		    address & 0xffffffff, 4);
1903 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1904 			pci_write_config(dev, msi->msi_location +
1905 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1906 			pci_write_config(dev, msi->msi_location +
1907 			    PCIR_MSI_DATA_64BIT, data, 2);
1908 		} else
1909 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1910 			    data, 2);
1911 	}
1912 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1913 	    2);
1914 }
1915 
1916 static int
1917 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1918 {
1919 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1920 	pcicfgregs *cfg = &dinfo->cfg;
1921 	struct resource_list_entry *rle;
1922 	struct msix_table_entry *mte;
1923 	struct msix_vector *mv;
1924 	uint64_t addr;
1925 	uint32_t data;
1926 	int error, i, j;
1927 
1928 	/*
1929 	 * Handle MSI first.  We try to find this IRQ among our list
1930 	 * of MSI IRQs.  If we find it, we request updated address and
1931 	 * data registers and apply the results.
1932 	 */
1933 	if (cfg->msi.msi_alloc > 0) {
1934 
1935 		/* If we don't have any active handlers, nothing to do. */
1936 		if (cfg->msi.msi_handlers == 0)
1937 			return (0);
1938 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1939 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1940 			    i + 1);
1941 			if (rle->start == irq) {
1942 				error = PCIB_MAP_MSI(device_get_parent(bus),
1943 				    dev, irq, &addr, &data);
1944 				if (error)
1945 					return (error);
1946 				pci_disable_msi(dev);
1947 				dinfo->cfg.msi.msi_addr = addr;
1948 				dinfo->cfg.msi.msi_data = data;
1949 				pci_enable_msi(dev, addr, data);
1950 				return (0);
1951 			}
1952 		}
1953 		return (ENOENT);
1954 	}
1955 
1956 	/*
1957 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1958 	 * we request the updated mapping info.  If that works, we go
1959 	 * through all the slots that use this IRQ and update them.
1960 	 */
1961 	if (cfg->msix.msix_alloc > 0) {
1962 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1963 			mv = &cfg->msix.msix_vectors[i];
1964 			if (mv->mv_irq == irq) {
1965 				error = PCIB_MAP_MSI(device_get_parent(bus),
1966 				    dev, irq, &addr, &data);
1967 				if (error)
1968 					return (error);
1969 				mv->mv_address = addr;
1970 				mv->mv_data = data;
1971 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1972 					mte = &cfg->msix.msix_table[j];
1973 					if (mte->mte_vector != i + 1)
1974 						continue;
1975 					if (mte->mte_handlers == 0)
1976 						continue;
1977 					pci_mask_msix(dev, j);
1978 					pci_enable_msix(dev, j, addr, data);
1979 					pci_unmask_msix(dev, j);
1980 				}
1981 			}
1982 		}
1983 		return (ENOENT);
1984 	}
1985 
1986 	return (ENOENT);
1987 }
1988 
1989 /*
1990  * Returns true if the specified device is blacklisted because MSI
1991  * doesn't work.
1992  */
1993 int
1994 pci_msi_device_blacklisted(device_t dev)
1995 {
1996 
1997 	if (!pci_honor_msi_blacklist)
1998 		return (0);
1999 
2000 	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2001 }
2002 
2003 /*
2004  * Determine if MSI is blacklisted globally on this system.  Currently,
2005  * we just check for blacklisted chipsets as represented by the
2006  * host-PCI bridge at device 0:0:0.  In the future, it may become
2007  * necessary to check other system attributes, such as the kenv values
2008  * that give the motherboard manufacturer and model number.
2009  */
2010 static int
2011 pci_msi_blacklisted(void)
2012 {
2013 	device_t dev;
2014 
2015 	if (!pci_honor_msi_blacklist)
2016 		return (0);
2017 
2018 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2019 	if (!(pcie_chipset || pcix_chipset)) {
2020 		if (vm_guest != VM_GUEST_NO) {
2021 			/*
2022 			 * Whitelist older chipsets in virtual
2023 			 * machines known to support MSI.
2024 			 */
2025 			dev = pci_find_bsf(0, 0, 0);
2026 			if (dev != NULL)
2027 				return (!pci_has_quirk(pci_get_devid(dev),
2028 					PCI_QUIRK_ENABLE_MSI_VM));
2029 		}
2030 		return (1);
2031 	}
2032 
2033 	dev = pci_find_bsf(0, 0, 0);
2034 	if (dev != NULL)
2035 		return (pci_msi_device_blacklisted(dev));
2036 	return (0);
2037 }
2038 
2039 /*
2040  * Returns true if the specified device is blacklisted because MSI-X
2041  * doesn't work.  Note that this assumes that if MSI doesn't work,
2042  * MSI-X doesn't either.
2043  */
2044 int
2045 pci_msix_device_blacklisted(device_t dev)
2046 {
2047 
2048 	if (!pci_honor_msi_blacklist)
2049 		return (0);
2050 
2051 	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2052 		return (1);
2053 
2054 	return (pci_msi_device_blacklisted(dev));
2055 }
2056 
2057 /*
2058  * Determine if MSI-X is blacklisted globally on this system.  If MSI
2059  * is blacklisted, assume that MSI-X is as well.  Check for additional
2060  * chipsets where MSI works but MSI-X does not.
2061  */
2062 static int
2063 pci_msix_blacklisted(void)
2064 {
2065 	device_t dev;
2066 
2067 	if (!pci_honor_msi_blacklist)
2068 		return (0);
2069 
2070 	dev = pci_find_bsf(0, 0, 0);
2071 	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2072 	    PCI_QUIRK_DISABLE_MSIX))
2073 		return (1);
2074 
2075 	return (pci_msi_blacklisted());
2076 }
2077 
2078 /*
2079  * Attempt to allocate *count MSI messages.  The actual number allocated is
2080  * returned in *count.  After this function returns, each message will be
2081  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2082  */
2083 int
2084 pci_alloc_msi_method(device_t dev, device_t child, int *count)
2085 {
2086 	struct pci_devinfo *dinfo = device_get_ivars(child);
2087 	pcicfgregs *cfg = &dinfo->cfg;
2088 	struct resource_list_entry *rle;
2089 	int actual, error, i, irqs[32];
2090 	uint16_t ctrl;
2091 
2092 	/* Don't let count == 0 get us into trouble. */
2093 	if (*count == 0)
2094 		return (EINVAL);
2095 
2096 	/* If rid 0 is allocated, then fail. */
2097 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2098 	if (rle != NULL && rle->res != NULL)
2099 		return (ENXIO);
2100 
2101 	/* Already have allocated messages? */
2102 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2103 		return (ENXIO);
2104 
2105 	/* If MSI is blacklisted for this system, fail. */
2106 	if (pci_msi_blacklisted())
2107 		return (ENXIO);
2108 
2109 	/* MSI capability present? */
2110 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2111 		return (ENODEV);
2112 
2113 	if (bootverbose)
2114 		device_printf(child,
2115 		    "attempting to allocate %d MSI vectors (%d supported)\n",
2116 		    *count, cfg->msi.msi_msgnum);
2117 
2118 	/* Don't ask for more than the device supports. */
2119 	actual = min(*count, cfg->msi.msi_msgnum);
2120 
2121 	/* Don't ask for more than 32 messages. */
2122 	actual = min(actual, 32);
2123 
2124 	/* MSI requires power of 2 number of messages. */
2125 	if (!powerof2(actual))
2126 		return (EINVAL);
2127 
2128 	for (;;) {
2129 		/* Try to allocate N messages. */
2130 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2131 		    actual, irqs);
2132 		if (error == 0)
2133 			break;
2134 		if (actual == 1)
2135 			return (error);
2136 
2137 		/* Try N / 2. */
2138 		actual >>= 1;
2139 	}
2140 
2141 	/*
2142 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2143 	 * resources in the irqs[] array, so add new resources
2144 	 * starting at rid 1.
2145 	 */
2146 	for (i = 0; i < actual; i++)
2147 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2148 		    irqs[i], irqs[i], 1);
2149 
2150 	if (bootverbose) {
2151 		if (actual == 1)
2152 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2153 		else {
2154 			int run;
2155 
2156 			/*
2157 			 * Be fancy and try to print contiguous runs
2158 			 * of IRQ values as ranges.  'run' is true if
2159 			 * we are in a range.
2160 			 */
2161 			device_printf(child, "using IRQs %d", irqs[0]);
2162 			run = 0;
2163 			for (i = 1; i < actual; i++) {
2164 
2165 				/* Still in a run? */
2166 				if (irqs[i] == irqs[i - 1] + 1) {
2167 					run = 1;
2168 					continue;
2169 				}
2170 
2171 				/* Finish previous range. */
2172 				if (run) {
2173 					printf("-%d", irqs[i - 1]);
2174 					run = 0;
2175 				}
2176 
2177 				/* Start new range. */
2178 				printf(",%d", irqs[i]);
2179 			}
2180 
2181 			/* Unfinished range? */
2182 			if (run)
2183 				printf("-%d", irqs[actual - 1]);
2184 			printf(" for MSI\n");
2185 		}
2186 	}
2187 
2188 	/* Update control register with actual count. */
2189 	ctrl = cfg->msi.msi_ctrl;
2190 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2191 	ctrl |= (ffs(actual) - 1) << 4;
2192 	cfg->msi.msi_ctrl = ctrl;
2193 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2194 
2195 	/* Update counts of alloc'd messages. */
2196 	cfg->msi.msi_alloc = actual;
2197 	cfg->msi.msi_handlers = 0;
2198 	*count = actual;
2199 	return (0);
2200 }
2201 
2202 /* Release the MSI messages associated with this device. */
2203 int
2204 pci_release_msi_method(device_t dev, device_t child)
2205 {
2206 	struct pci_devinfo *dinfo = device_get_ivars(child);
2207 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2208 	struct resource_list_entry *rle;
2209 	int error, i, irqs[32];
2210 
2211 	/* Try MSI-X first. */
2212 	error = pci_release_msix(dev, child);
2213 	if (error != ENODEV)
2214 		return (error);
2215 
2216 	/* Do we have any messages to release? */
2217 	if (msi->msi_alloc == 0)
2218 		return (ENODEV);
2219 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2220 
2221 	/* Make sure none of the resources are allocated. */
2222 	if (msi->msi_handlers > 0)
2223 		return (EBUSY);
2224 	for (i = 0; i < msi->msi_alloc; i++) {
2225 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2226 		KASSERT(rle != NULL, ("missing MSI resource"));
2227 		if (rle->res != NULL)
2228 			return (EBUSY);
2229 		irqs[i] = rle->start;
2230 	}
2231 
2232 	/* Update control register with 0 count. */
2233 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2234 	    ("%s: MSI still enabled", __func__));
2235 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2236 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2237 	    msi->msi_ctrl, 2);
2238 
2239 	/* Release the messages. */
2240 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2241 	for (i = 0; i < msi->msi_alloc; i++)
2242 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2243 
2244 	/* Update alloc count. */
2245 	msi->msi_alloc = 0;
2246 	msi->msi_addr = 0;
2247 	msi->msi_data = 0;
2248 	return (0);
2249 }
2250 
2251 /*
2252  * Return the max supported MSI messages this device supports.
2253  * Basically, assuming the MD code can alloc messages, this function
2254  * should return the maximum value that pci_alloc_msi() can return.
2255  * Thus, it is subject to the tunables, etc.
2256  */
2257 int
2258 pci_msi_count_method(device_t dev, device_t child)
2259 {
2260 	struct pci_devinfo *dinfo = device_get_ivars(child);
2261 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2262 
2263 	if (pci_do_msi && msi->msi_location != 0)
2264 		return (msi->msi_msgnum);
2265 	return (0);
2266 }
2267 
2268 /* free pcicfgregs structure and all depending data structures */
2269 
2270 int
2271 pci_freecfg(struct pci_devinfo *dinfo)
2272 {
2273 	struct devlist *devlist_head;
2274 	struct pci_map *pm, *next;
2275 	int i;
2276 
2277 	devlist_head = &pci_devq;
2278 
2279 	if (dinfo->cfg.vpd.vpd_reg) {
2280 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2281 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2282 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2283 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2284 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2285 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2286 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2287 	}
2288 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2289 		free(pm, M_DEVBUF);
2290 	}
2291 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2292 	free(dinfo, M_DEVBUF);
2293 
2294 	/* increment the generation count */
2295 	pci_generation++;
2296 
2297 	/* we're losing one device */
2298 	pci_numdevs--;
2299 	return (0);
2300 }
2301 
2302 /*
2303  * PCI power manangement
2304  */
2305 int
2306 pci_set_powerstate_method(device_t dev, device_t child, int state)
2307 {
2308 	struct pci_devinfo *dinfo = device_get_ivars(child);
2309 	pcicfgregs *cfg = &dinfo->cfg;
2310 	uint16_t status;
2311 	int result, oldstate, highest, delay;
2312 
2313 	if (cfg->pp.pp_cap == 0)
2314 		return (EOPNOTSUPP);
2315 
2316 	/*
2317 	 * Optimize a no state change request away.  While it would be OK to
2318 	 * write to the hardware in theory, some devices have shown odd
2319 	 * behavior when going from D3 -> D3.
2320 	 */
2321 	oldstate = pci_get_powerstate(child);
2322 	if (oldstate == state)
2323 		return (0);
2324 
2325 	/*
2326 	 * The PCI power management specification states that after a state
2327 	 * transition between PCI power states, system software must
2328 	 * guarantee a minimal delay before the function accesses the device.
2329 	 * Compute the worst case delay that we need to guarantee before we
2330 	 * access the device.  Many devices will be responsive much more
2331 	 * quickly than this delay, but there are some that don't respond
2332 	 * instantly to state changes.  Transitions to/from D3 state require
2333 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2334 	 * is done below with DELAY rather than a sleeper function because
2335 	 * this function can be called from contexts where we cannot sleep.
2336 	 */
2337 	highest = (oldstate > state) ? oldstate : state;
2338 	if (highest == PCI_POWERSTATE_D3)
2339 	    delay = 10000;
2340 	else if (highest == PCI_POWERSTATE_D2)
2341 	    delay = 200;
2342 	else
2343 	    delay = 0;
2344 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2345 	    & ~PCIM_PSTAT_DMASK;
2346 	result = 0;
2347 	switch (state) {
2348 	case PCI_POWERSTATE_D0:
2349 		status |= PCIM_PSTAT_D0;
2350 		break;
2351 	case PCI_POWERSTATE_D1:
2352 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2353 			return (EOPNOTSUPP);
2354 		status |= PCIM_PSTAT_D1;
2355 		break;
2356 	case PCI_POWERSTATE_D2:
2357 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2358 			return (EOPNOTSUPP);
2359 		status |= PCIM_PSTAT_D2;
2360 		break;
2361 	case PCI_POWERSTATE_D3:
2362 		status |= PCIM_PSTAT_D3;
2363 		break;
2364 	default:
2365 		return (EINVAL);
2366 	}
2367 
2368 	if (bootverbose)
2369 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2370 		    state);
2371 
2372 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2373 	if (delay)
2374 		DELAY(delay);
2375 	return (0);
2376 }
2377 
2378 int
2379 pci_get_powerstate_method(device_t dev, device_t child)
2380 {
2381 	struct pci_devinfo *dinfo = device_get_ivars(child);
2382 	pcicfgregs *cfg = &dinfo->cfg;
2383 	uint16_t status;
2384 	int result;
2385 
2386 	if (cfg->pp.pp_cap != 0) {
2387 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2388 		switch (status & PCIM_PSTAT_DMASK) {
2389 		case PCIM_PSTAT_D0:
2390 			result = PCI_POWERSTATE_D0;
2391 			break;
2392 		case PCIM_PSTAT_D1:
2393 			result = PCI_POWERSTATE_D1;
2394 			break;
2395 		case PCIM_PSTAT_D2:
2396 			result = PCI_POWERSTATE_D2;
2397 			break;
2398 		case PCIM_PSTAT_D3:
2399 			result = PCI_POWERSTATE_D3;
2400 			break;
2401 		default:
2402 			result = PCI_POWERSTATE_UNKNOWN;
2403 			break;
2404 		}
2405 	} else {
2406 		/* No support, device is always at D0 */
2407 		result = PCI_POWERSTATE_D0;
2408 	}
2409 	return (result);
2410 }
2411 
2412 /*
2413  * Some convenience functions for PCI device drivers.
2414  */
2415 
2416 static __inline void
2417 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2418 {
2419 	uint16_t	command;
2420 
2421 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2422 	command |= bit;
2423 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2424 }
2425 
2426 static __inline void
2427 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2428 {
2429 	uint16_t	command;
2430 
2431 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2432 	command &= ~bit;
2433 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2434 }
2435 
2436 int
2437 pci_enable_busmaster_method(device_t dev, device_t child)
2438 {
2439 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2440 	return (0);
2441 }
2442 
2443 int
2444 pci_disable_busmaster_method(device_t dev, device_t child)
2445 {
2446 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2447 	return (0);
2448 }
2449 
2450 int
2451 pci_enable_io_method(device_t dev, device_t child, int space)
2452 {
2453 	uint16_t bit;
2454 
2455 	switch(space) {
2456 	case SYS_RES_IOPORT:
2457 		bit = PCIM_CMD_PORTEN;
2458 		break;
2459 	case SYS_RES_MEMORY:
2460 		bit = PCIM_CMD_MEMEN;
2461 		break;
2462 	default:
2463 		return (EINVAL);
2464 	}
2465 	pci_set_command_bit(dev, child, bit);
2466 	return (0);
2467 }
2468 
2469 int
2470 pci_disable_io_method(device_t dev, device_t child, int space)
2471 {
2472 	uint16_t bit;
2473 
2474 	switch(space) {
2475 	case SYS_RES_IOPORT:
2476 		bit = PCIM_CMD_PORTEN;
2477 		break;
2478 	case SYS_RES_MEMORY:
2479 		bit = PCIM_CMD_MEMEN;
2480 		break;
2481 	default:
2482 		return (EINVAL);
2483 	}
2484 	pci_clear_command_bit(dev, child, bit);
2485 	return (0);
2486 }
2487 
2488 /*
2489  * New style pci driver.  Parent device is either a pci-host-bridge or a
2490  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2491  */
2492 
2493 void
2494 pci_print_verbose(struct pci_devinfo *dinfo)
2495 {
2496 
2497 	if (bootverbose) {
2498 		pcicfgregs *cfg = &dinfo->cfg;
2499 
2500 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2501 		    cfg->vendor, cfg->device, cfg->revid);
2502 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2503 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2504 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2505 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2506 		    cfg->mfdev);
2507 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2508 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2509 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2510 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2511 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2512 		if (cfg->intpin > 0)
2513 			printf("\tintpin=%c, irq=%d\n",
2514 			    cfg->intpin +'a' -1, cfg->intline);
2515 		if (cfg->pp.pp_cap) {
2516 			uint16_t status;
2517 
2518 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2519 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2520 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2521 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2522 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2523 			    status & PCIM_PSTAT_DMASK);
2524 		}
2525 		if (cfg->msi.msi_location) {
2526 			int ctrl;
2527 
2528 			ctrl = cfg->msi.msi_ctrl;
2529 			printf("\tMSI supports %d message%s%s%s\n",
2530 			    cfg->msi.msi_msgnum,
2531 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2532 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2533 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2534 		}
2535 		if (cfg->msix.msix_location) {
2536 			printf("\tMSI-X supports %d message%s ",
2537 			    cfg->msix.msix_msgnum,
2538 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2539 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2540 				printf("in map 0x%x\n",
2541 				    cfg->msix.msix_table_bar);
2542 			else
2543 				printf("in maps 0x%x and 0x%x\n",
2544 				    cfg->msix.msix_table_bar,
2545 				    cfg->msix.msix_pba_bar);
2546 		}
2547 	}
2548 }
2549 
2550 static int
2551 pci_porten(device_t dev)
2552 {
2553 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2554 }
2555 
2556 static int
2557 pci_memen(device_t dev)
2558 {
2559 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2560 }
2561 
2562 static void
2563 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2564 {
2565 	struct pci_devinfo *dinfo;
2566 	pci_addr_t map, testval;
2567 	int ln2range;
2568 	uint16_t cmd;
2569 
2570 	/*
2571 	 * The device ROM BAR is special.  It is always a 32-bit
2572 	 * memory BAR.  Bit 0 is special and should not be set when
2573 	 * sizing the BAR.
2574 	 */
2575 	dinfo = device_get_ivars(dev);
2576 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2577 		map = pci_read_config(dev, reg, 4);
2578 		pci_write_config(dev, reg, 0xfffffffe, 4);
2579 		testval = pci_read_config(dev, reg, 4);
2580 		pci_write_config(dev, reg, map, 4);
2581 		*mapp = map;
2582 		*testvalp = testval;
2583 		return;
2584 	}
2585 
2586 	map = pci_read_config(dev, reg, 4);
2587 	ln2range = pci_maprange(map);
2588 	if (ln2range == 64)
2589 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2590 
2591 	/*
2592 	 * Disable decoding via the command register before
2593 	 * determining the BAR's length since we will be placing it in
2594 	 * a weird state.
2595 	 */
2596 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2597 	pci_write_config(dev, PCIR_COMMAND,
2598 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2599 
2600 	/*
2601 	 * Determine the BAR's length by writing all 1's.  The bottom
2602 	 * log_2(size) bits of the BAR will stick as 0 when we read
2603 	 * the value back.
2604 	 */
2605 	pci_write_config(dev, reg, 0xffffffff, 4);
2606 	testval = pci_read_config(dev, reg, 4);
2607 	if (ln2range == 64) {
2608 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2609 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2610 	}
2611 
2612 	/*
2613 	 * Restore the original value of the BAR.  We may have reprogrammed
2614 	 * the BAR of the low-level console device and when booting verbose,
2615 	 * we need the console device addressable.
2616 	 */
2617 	pci_write_config(dev, reg, map, 4);
2618 	if (ln2range == 64)
2619 		pci_write_config(dev, reg + 4, map >> 32, 4);
2620 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2621 
2622 	*mapp = map;
2623 	*testvalp = testval;
2624 }
2625 
2626 static void
2627 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2628 {
2629 	struct pci_devinfo *dinfo;
2630 	int ln2range;
2631 
2632 	/* The device ROM BAR is always a 32-bit memory BAR. */
2633 	dinfo = device_get_ivars(dev);
2634 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2635 		ln2range = 32;
2636 	else
2637 		ln2range = pci_maprange(pm->pm_value);
2638 	pci_write_config(dev, pm->pm_reg, base, 4);
2639 	if (ln2range == 64)
2640 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2641 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2642 	if (ln2range == 64)
2643 		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2644 		    pm->pm_reg + 4, 4) << 32;
2645 }
2646 
2647 struct pci_map *
2648 pci_find_bar(device_t dev, int reg)
2649 {
2650 	struct pci_devinfo *dinfo;
2651 	struct pci_map *pm;
2652 
2653 	dinfo = device_get_ivars(dev);
2654 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2655 		if (pm->pm_reg == reg)
2656 			return (pm);
2657 	}
2658 	return (NULL);
2659 }
2660 
2661 int
2662 pci_bar_enabled(device_t dev, struct pci_map *pm)
2663 {
2664 	struct pci_devinfo *dinfo;
2665 	uint16_t cmd;
2666 
2667 	dinfo = device_get_ivars(dev);
2668 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2669 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2670 		return (0);
2671 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2672 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2673 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2674 	else
2675 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2676 }
2677 
2678 static struct pci_map *
2679 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2680 {
2681 	struct pci_devinfo *dinfo;
2682 	struct pci_map *pm, *prev;
2683 
2684 	dinfo = device_get_ivars(dev);
2685 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2686 	pm->pm_reg = reg;
2687 	pm->pm_value = value;
2688 	pm->pm_size = size;
2689 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2690 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2691 		    reg));
2692 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2693 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2694 			break;
2695 	}
2696 	if (prev != NULL)
2697 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2698 	else
2699 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2700 	return (pm);
2701 }
2702 
2703 static void
2704 pci_restore_bars(device_t dev)
2705 {
2706 	struct pci_devinfo *dinfo;
2707 	struct pci_map *pm;
2708 	int ln2range;
2709 
2710 	dinfo = device_get_ivars(dev);
2711 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2712 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2713 			ln2range = 32;
2714 		else
2715 			ln2range = pci_maprange(pm->pm_value);
2716 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2717 		if (ln2range == 64)
2718 			pci_write_config(dev, pm->pm_reg + 4,
2719 			    pm->pm_value >> 32, 4);
2720 	}
2721 }
2722 
2723 /*
2724  * Add a resource based on a pci map register. Return 1 if the map
2725  * register is a 32bit map register or 2 if it is a 64bit register.
2726  */
2727 static int
2728 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2729     int force, int prefetch)
2730 {
2731 	struct pci_map *pm;
2732 	pci_addr_t base, map, testval;
2733 	pci_addr_t start, end, count;
2734 	int barlen, basezero, maprange, mapsize, type;
2735 	uint16_t cmd;
2736 	struct resource *res;
2737 
2738 	/*
2739 	 * The BAR may already exist if the device is a CardBus card
2740 	 * whose CIS is stored in this BAR.
2741 	 */
2742 	pm = pci_find_bar(dev, reg);
2743 	if (pm != NULL) {
2744 		maprange = pci_maprange(pm->pm_value);
2745 		barlen = maprange == 64 ? 2 : 1;
2746 		return (barlen);
2747 	}
2748 
2749 	pci_read_bar(dev, reg, &map, &testval);
2750 	if (PCI_BAR_MEM(map)) {
2751 		type = SYS_RES_MEMORY;
2752 		if (map & PCIM_BAR_MEM_PREFETCH)
2753 			prefetch = 1;
2754 	} else
2755 		type = SYS_RES_IOPORT;
2756 	mapsize = pci_mapsize(testval);
2757 	base = pci_mapbase(map);
2758 #ifdef __PCI_BAR_ZERO_VALID
2759 	basezero = 0;
2760 #else
2761 	basezero = base == 0;
2762 #endif
2763 	maprange = pci_maprange(map);
2764 	barlen = maprange == 64 ? 2 : 1;
2765 
2766 	/*
2767 	 * For I/O registers, if bottom bit is set, and the next bit up
2768 	 * isn't clear, we know we have a BAR that doesn't conform to the
2769 	 * spec, so ignore it.  Also, sanity check the size of the data
2770 	 * areas to the type of memory involved.  Memory must be at least
2771 	 * 16 bytes in size, while I/O ranges must be at least 4.
2772 	 */
2773 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2774 		return (barlen);
2775 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2776 	    (type == SYS_RES_IOPORT && mapsize < 2))
2777 		return (barlen);
2778 
2779 	/* Save a record of this BAR. */
2780 	pm = pci_add_bar(dev, reg, map, mapsize);
2781 	if (bootverbose) {
2782 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2783 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2784 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2785 			printf(", port disabled\n");
2786 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2787 			printf(", memory disabled\n");
2788 		else
2789 			printf(", enabled\n");
2790 	}
2791 
2792 	/*
2793 	 * If base is 0, then we have problems if this architecture does
2794 	 * not allow that.  It is best to ignore such entries for the
2795 	 * moment.  These will be allocated later if the driver specifically
2796 	 * requests them.  However, some removable busses look better when
2797 	 * all resources are allocated, so allow '0' to be overriden.
2798 	 *
2799 	 * Similarly treat maps whose values is the same as the test value
2800 	 * read back.  These maps have had all f's written to them by the
2801 	 * BIOS in an attempt to disable the resources.
2802 	 */
2803 	if (!force && (basezero || map == testval))
2804 		return (barlen);
2805 	if ((u_long)base != base) {
2806 		device_printf(bus,
2807 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2808 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2809 		    pci_get_function(dev), reg);
2810 		return (barlen);
2811 	}
2812 
2813 	/*
2814 	 * This code theoretically does the right thing, but has
2815 	 * undesirable side effects in some cases where peripherals
2816 	 * respond oddly to having these bits enabled.  Let the user
2817 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2818 	 * default).
2819 	 */
2820 	if (pci_enable_io_modes) {
2821 		/* Turn on resources that have been left off by a lazy BIOS */
2822 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2823 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2824 			cmd |= PCIM_CMD_PORTEN;
2825 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2826 		}
2827 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2828 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2829 			cmd |= PCIM_CMD_MEMEN;
2830 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2831 		}
2832 	} else {
2833 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2834 			return (barlen);
2835 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2836 			return (barlen);
2837 	}
2838 
2839 	count = (pci_addr_t)1 << mapsize;
2840 	if (basezero || base == pci_mapbase(testval)) {
2841 		start = 0;	/* Let the parent decide. */
2842 		end = ~0ul;
2843 	} else {
2844 		start = base;
2845 		end = base + count - 1;
2846 	}
2847 	resource_list_add(rl, type, reg, start, end, count);
2848 
2849 	/*
2850 	 * Try to allocate the resource for this BAR from our parent
2851 	 * so that this resource range is already reserved.  The
2852 	 * driver for this device will later inherit this resource in
2853 	 * pci_alloc_resource().
2854 	 */
2855 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2856 	    prefetch ? RF_PREFETCHABLE : 0);
2857 	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0ul)) {
2858 		/*
2859 		 * If the allocation fails, try to allocate a resource for
2860 		 * this BAR using any available range.  The firmware felt
2861 		 * it was important enough to assign a resource, so don't
2862 		 * disable decoding if we can help it.
2863 		 */
2864 		resource_list_delete(rl, type, reg);
2865 		resource_list_add(rl, type, reg, 0, ~0ul, count);
2866 		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0ul,
2867 		    count, prefetch ? RF_PREFETCHABLE : 0);
2868 	}
2869 	if (res == NULL) {
2870 		/*
2871 		 * If the allocation fails, delete the resource list entry
2872 		 * and disable decoding for this device.
2873 		 *
2874 		 * If the driver requests this resource in the future,
2875 		 * pci_reserve_map() will try to allocate a fresh
2876 		 * resource range.
2877 		 */
2878 		resource_list_delete(rl, type, reg);
2879 		pci_disable_io(dev, type);
2880 		if (bootverbose)
2881 			device_printf(bus,
2882 			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
2883 			    pci_get_domain(dev), pci_get_bus(dev),
2884 			    pci_get_slot(dev), pci_get_function(dev), reg);
2885 	} else {
2886 		start = rman_get_start(res);
2887 		pci_write_bar(dev, pm, start);
2888 	}
2889 	return (barlen);
2890 }
2891 
2892 /*
2893  * For ATA devices we need to decide early what addressing mode to use.
2894  * Legacy demands that the primary and secondary ATA ports sits on the
2895  * same addresses that old ISA hardware did. This dictates that we use
2896  * those addresses and ignore the BAR's if we cannot set PCI native
2897  * addressing mode.
2898  */
2899 static void
2900 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2901     uint32_t prefetchmask)
2902 {
2903 	struct resource *r;
2904 	int rid, type, progif;
2905 #if 0
2906 	/* if this device supports PCI native addressing use it */
2907 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2908 	if ((progif & 0x8a) == 0x8a) {
2909 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2910 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2911 			printf("Trying ATA native PCI addressing mode\n");
2912 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2913 		}
2914 	}
2915 #endif
2916 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2917 	type = SYS_RES_IOPORT;
2918 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2919 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2920 		    prefetchmask & (1 << 0));
2921 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2922 		    prefetchmask & (1 << 1));
2923 	} else {
2924 		rid = PCIR_BAR(0);
2925 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2926 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2927 		    0x1f7, 8, 0);
2928 		rid = PCIR_BAR(1);
2929 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2930 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2931 		    0x3f6, 1, 0);
2932 	}
2933 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2934 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2935 		    prefetchmask & (1 << 2));
2936 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2937 		    prefetchmask & (1 << 3));
2938 	} else {
2939 		rid = PCIR_BAR(2);
2940 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2941 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2942 		    0x177, 8, 0);
2943 		rid = PCIR_BAR(3);
2944 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2945 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2946 		    0x376, 1, 0);
2947 	}
2948 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2949 	    prefetchmask & (1 << 4));
2950 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2951 	    prefetchmask & (1 << 5));
2952 }
2953 
2954 static void
2955 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2956 {
2957 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2958 	pcicfgregs *cfg = &dinfo->cfg;
2959 	char tunable_name[64];
2960 	int irq;
2961 
2962 	/* Has to have an intpin to have an interrupt. */
2963 	if (cfg->intpin == 0)
2964 		return;
2965 
2966 	/* Let the user override the IRQ with a tunable. */
2967 	irq = PCI_INVALID_IRQ;
2968 	snprintf(tunable_name, sizeof(tunable_name),
2969 	    "hw.pci%d.%d.%d.INT%c.irq",
2970 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2971 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2972 		irq = PCI_INVALID_IRQ;
2973 
2974 	/*
2975 	 * If we didn't get an IRQ via the tunable, then we either use the
2976 	 * IRQ value in the intline register or we ask the bus to route an
2977 	 * interrupt for us.  If force_route is true, then we only use the
2978 	 * value in the intline register if the bus was unable to assign an
2979 	 * IRQ.
2980 	 */
2981 	if (!PCI_INTERRUPT_VALID(irq)) {
2982 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2983 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2984 		if (!PCI_INTERRUPT_VALID(irq))
2985 			irq = cfg->intline;
2986 	}
2987 
2988 	/* If after all that we don't have an IRQ, just bail. */
2989 	if (!PCI_INTERRUPT_VALID(irq))
2990 		return;
2991 
2992 	/* Update the config register if it changed. */
2993 	if (irq != cfg->intline) {
2994 		cfg->intline = irq;
2995 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2996 	}
2997 
2998 	/* Add this IRQ as rid 0 interrupt resource. */
2999 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3000 }
3001 
3002 /* Perform early OHCI takeover from SMM. */
3003 static void
3004 ohci_early_takeover(device_t self)
3005 {
3006 	struct resource *res;
3007 	uint32_t ctl;
3008 	int rid;
3009 	int i;
3010 
3011 	rid = PCIR_BAR(0);
3012 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3013 	if (res == NULL)
3014 		return;
3015 
3016 	ctl = bus_read_4(res, OHCI_CONTROL);
3017 	if (ctl & OHCI_IR) {
3018 		if (bootverbose)
3019 			printf("ohci early: "
3020 			    "SMM active, request owner change\n");
3021 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3022 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3023 			DELAY(1000);
3024 			ctl = bus_read_4(res, OHCI_CONTROL);
3025 		}
3026 		if (ctl & OHCI_IR) {
3027 			if (bootverbose)
3028 				printf("ohci early: "
3029 				    "SMM does not respond, resetting\n");
3030 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3031 		}
3032 		/* Disable interrupts */
3033 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3034 	}
3035 
3036 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3037 }
3038 
3039 /* Perform early UHCI takeover from SMM. */
3040 static void
3041 uhci_early_takeover(device_t self)
3042 {
3043 	struct resource *res;
3044 	int rid;
3045 
3046 	/*
3047 	 * Set the PIRQD enable bit and switch off all the others. We don't
3048 	 * want legacy support to interfere with us XXX Does this also mean
3049 	 * that the BIOS won't touch the keyboard anymore if it is connected
3050 	 * to the ports of the root hub?
3051 	 */
3052 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3053 
3054 	/* Disable interrupts */
3055 	rid = PCI_UHCI_BASE_REG;
3056 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3057 	if (res != NULL) {
3058 		bus_write_2(res, UHCI_INTR, 0);
3059 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3060 	}
3061 }
3062 
3063 /* Perform early EHCI takeover from SMM. */
3064 static void
3065 ehci_early_takeover(device_t self)
3066 {
3067 	struct resource *res;
3068 	uint32_t cparams;
3069 	uint32_t eec;
3070 	uint8_t eecp;
3071 	uint8_t bios_sem;
3072 	uint8_t offs;
3073 	int rid;
3074 	int i;
3075 
3076 	rid = PCIR_BAR(0);
3077 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3078 	if (res == NULL)
3079 		return;
3080 
3081 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3082 
3083 	/* Synchronise with the BIOS if it owns the controller. */
3084 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3085 	    eecp = EHCI_EECP_NEXT(eec)) {
3086 		eec = pci_read_config(self, eecp, 4);
3087 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3088 			continue;
3089 		}
3090 		bios_sem = pci_read_config(self, eecp +
3091 		    EHCI_LEGSUP_BIOS_SEM, 1);
3092 		if (bios_sem == 0) {
3093 			continue;
3094 		}
3095 		if (bootverbose)
3096 			printf("ehci early: "
3097 			    "SMM active, request owner change\n");
3098 
3099 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3100 
3101 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3102 			DELAY(1000);
3103 			bios_sem = pci_read_config(self, eecp +
3104 			    EHCI_LEGSUP_BIOS_SEM, 1);
3105 		}
3106 
3107 		if (bios_sem != 0) {
3108 			if (bootverbose)
3109 				printf("ehci early: "
3110 				    "SMM does not respond\n");
3111 		}
3112 		/* Disable interrupts */
3113 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3114 		bus_write_4(res, offs + EHCI_USBINTR, 0);
3115 	}
3116 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3117 }
3118 
3119 /* Perform early XHCI takeover from SMM. */
3120 static void
3121 xhci_early_takeover(device_t self)
3122 {
3123 	struct resource *res;
3124 	uint32_t cparams;
3125 	uint32_t eec;
3126 	uint8_t eecp;
3127 	uint8_t bios_sem;
3128 	uint8_t offs;
3129 	int rid;
3130 	int i;
3131 
3132 	rid = PCIR_BAR(0);
3133 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3134 	if (res == NULL)
3135 		return;
3136 
3137 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3138 
3139 	eec = -1;
3140 
3141 	/* Synchronise with the BIOS if it owns the controller. */
3142 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3143 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3144 		eec = bus_read_4(res, eecp);
3145 
3146 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3147 			continue;
3148 
3149 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3150 		if (bios_sem == 0)
3151 			continue;
3152 
3153 		if (bootverbose)
3154 			printf("xhci early: "
3155 			    "SMM active, request owner change\n");
3156 
3157 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3158 
3159 		/* wait a maximum of 5 second */
3160 
3161 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3162 			DELAY(1000);
3163 			bios_sem = bus_read_1(res, eecp +
3164 			    XHCI_XECP_BIOS_SEM);
3165 		}
3166 
3167 		if (bios_sem != 0) {
3168 			if (bootverbose)
3169 				printf("xhci early: "
3170 				    "SMM does not respond\n");
3171 		}
3172 
3173 		/* Disable interrupts */
3174 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3175 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3176 		bus_read_4(res, offs + XHCI_USBSTS);
3177 	}
3178 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3179 }
3180 
3181 void
3182 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3183 {
3184 	struct pci_devinfo *dinfo;
3185 	pcicfgregs *cfg;
3186 	struct resource_list *rl;
3187 	const struct pci_quirk *q;
3188 	uint32_t devid;
3189 	int i;
3190 
3191 	dinfo = device_get_ivars(dev);
3192 	cfg = &dinfo->cfg;
3193 	rl = &dinfo->resources;
3194 	devid = (cfg->device << 16) | cfg->vendor;
3195 
3196 	/* ATA devices needs special map treatment */
3197 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3198 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3199 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3200 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3201 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3202 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3203 	else
3204 		for (i = 0; i < cfg->nummaps;) {
3205 			/*
3206 			 * Skip quirked resources.
3207 			 */
3208 			for (q = &pci_quirks[0]; q->devid != 0; q++)
3209 				if (q->devid == devid &&
3210 				    q->type == PCI_QUIRK_UNMAP_REG &&
3211 				    q->arg1 == PCIR_BAR(i))
3212 					break;
3213 			if (q->devid != 0) {
3214 				i++;
3215 				continue;
3216 			}
3217 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3218 			    prefetchmask & (1 << i));
3219 		}
3220 
3221 	/*
3222 	 * Add additional, quirked resources.
3223 	 */
3224 	for (q = &pci_quirks[0]; q->devid != 0; q++)
3225 		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3226 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3227 
3228 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3229 #ifdef __PCI_REROUTE_INTERRUPT
3230 		/*
3231 		 * Try to re-route interrupts. Sometimes the BIOS or
3232 		 * firmware may leave bogus values in these registers.
3233 		 * If the re-route fails, then just stick with what we
3234 		 * have.
3235 		 */
3236 		pci_assign_interrupt(bus, dev, 1);
3237 #else
3238 		pci_assign_interrupt(bus, dev, 0);
3239 #endif
3240 	}
3241 
3242 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3243 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3244 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3245 			xhci_early_takeover(dev);
3246 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3247 			ehci_early_takeover(dev);
3248 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3249 			ohci_early_takeover(dev);
3250 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3251 			uhci_early_takeover(dev);
3252 	}
3253 }
3254 
3255 void
3256 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3257 {
3258 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3259 	device_t pcib = device_get_parent(dev);
3260 	struct pci_devinfo *dinfo;
3261 	int maxslots;
3262 	int s, f, pcifunchigh;
3263 	uint8_t hdrtype;
3264 
3265 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3266 	    ("dinfo_size too small"));
3267 	maxslots = PCIB_MAXSLOTS(pcib);
3268 	for (s = 0; s <= maxslots; s++) {
3269 		pcifunchigh = 0;
3270 		f = 0;
3271 		DELAY(1);
3272 		hdrtype = REG(PCIR_HDRTYPE, 1);
3273 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3274 			continue;
3275 		if (hdrtype & PCIM_MFDEV)
3276 			pcifunchigh = PCI_FUNCMAX;
3277 		for (f = 0; f <= pcifunchigh; f++) {
3278 			dinfo = pci_read_device(pcib, domain, busno, s, f,
3279 			    dinfo_size);
3280 			if (dinfo != NULL) {
3281 				pci_add_child(dev, dinfo);
3282 			}
3283 		}
3284 	}
3285 #undef REG
3286 }
3287 
3288 void
3289 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3290 {
3291 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3292 	device_set_ivars(dinfo->cfg.dev, dinfo);
3293 	resource_list_init(&dinfo->resources);
3294 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3295 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3296 	pci_print_verbose(dinfo);
3297 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3298 }
3299 
3300 static int
3301 pci_probe(device_t dev)
3302 {
3303 
3304 	device_set_desc(dev, "PCI bus");
3305 
3306 	/* Allow other subclasses to override this driver. */
3307 	return (BUS_PROBE_GENERIC);
3308 }
3309 
3310 int
3311 pci_attach_common(device_t dev)
3312 {
3313 	struct pci_softc *sc;
3314 	int busno, domain;
3315 #ifdef PCI_DMA_BOUNDARY
3316 	int error, tag_valid;
3317 #endif
3318 
3319 	sc = device_get_softc(dev);
3320 	domain = pcib_get_domain(dev);
3321 	busno = pcib_get_bus(dev);
3322 	if (bootverbose)
3323 		device_printf(dev, "domain=%d, physical bus=%d\n",
3324 		    domain, busno);
3325 #ifdef PCI_DMA_BOUNDARY
3326 	tag_valid = 0;
3327 	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
3328 	    devclass_find("pci")) {
3329 		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
3330 		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3331 		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
3332 		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
3333 		if (error)
3334 			device_printf(dev, "Failed to create DMA tag: %d\n",
3335 			    error);
3336 		else
3337 			tag_valid = 1;
3338 	}
3339 	if (!tag_valid)
3340 #endif
3341 		sc->sc_dma_tag = bus_get_dma_tag(dev);
3342 	return (0);
3343 }
3344 
3345 static int
3346 pci_attach(device_t dev)
3347 {
3348 	int busno, domain, error;
3349 
3350 	error = pci_attach_common(dev);
3351 	if (error)
3352 		return (error);
3353 
3354 	/*
3355 	 * Since there can be multiple independantly numbered PCI
3356 	 * busses on systems with multiple PCI domains, we can't use
3357 	 * the unit number to decide which bus we are probing. We ask
3358 	 * the parent pcib what our domain and bus numbers are.
3359 	 */
3360 	domain = pcib_get_domain(dev);
3361 	busno = pcib_get_bus(dev);
3362 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3363 	return (bus_generic_attach(dev));
3364 }
3365 
3366 static void
3367 pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3368     int state)
3369 {
3370 	device_t child, pcib;
3371 	struct pci_devinfo *dinfo;
3372 	int dstate, i;
3373 
3374 	/*
3375 	 * Set the device to the given state.  If the firmware suggests
3376 	 * a different power state, use it instead.  If power management
3377 	 * is not present, the firmware is responsible for managing
3378 	 * device power.  Skip children who aren't attached since they
3379 	 * are handled separately.
3380 	 */
3381 	pcib = device_get_parent(dev);
3382 	for (i = 0; i < numdevs; i++) {
3383 		child = devlist[i];
3384 		dinfo = device_get_ivars(child);
3385 		dstate = state;
3386 		if (device_is_attached(child) &&
3387 		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3388 			pci_set_powerstate(child, dstate);
3389 	}
3390 }
3391 
3392 int
3393 pci_suspend(device_t dev)
3394 {
3395 	device_t child, *devlist;
3396 	struct pci_devinfo *dinfo;
3397 	int error, i, numdevs;
3398 
3399 	/*
3400 	 * Save the PCI configuration space for each child and set the
3401 	 * device in the appropriate power state for this sleep state.
3402 	 */
3403 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3404 		return (error);
3405 	for (i = 0; i < numdevs; i++) {
3406 		child = devlist[i];
3407 		dinfo = device_get_ivars(child);
3408 		pci_cfg_save(child, dinfo, 0);
3409 	}
3410 
3411 	/* Suspend devices before potentially powering them down. */
3412 	error = bus_generic_suspend(dev);
3413 	if (error) {
3414 		free(devlist, M_TEMP);
3415 		return (error);
3416 	}
3417 	if (pci_do_power_suspend)
3418 		pci_set_power_children(dev, devlist, numdevs,
3419 		    PCI_POWERSTATE_D3);
3420 	free(devlist, M_TEMP);
3421 	return (0);
3422 }
3423 
3424 int
3425 pci_resume(device_t dev)
3426 {
3427 	device_t child, *devlist;
3428 	struct pci_devinfo *dinfo;
3429 	int error, i, numdevs;
3430 
3431 	/*
3432 	 * Set each child to D0 and restore its PCI configuration space.
3433 	 */
3434 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3435 		return (error);
3436 	if (pci_do_power_resume)
3437 		pci_set_power_children(dev, devlist, numdevs,
3438 		    PCI_POWERSTATE_D0);
3439 
3440 	/* Now the device is powered up, restore its config space. */
3441 	for (i = 0; i < numdevs; i++) {
3442 		child = devlist[i];
3443 		dinfo = device_get_ivars(child);
3444 
3445 		pci_cfg_restore(child, dinfo);
3446 		if (!device_is_attached(child))
3447 			pci_cfg_save(child, dinfo, 1);
3448 	}
3449 
3450 	/*
3451 	 * Resume critical devices first, then everything else later.
3452 	 */
3453 	for (i = 0; i < numdevs; i++) {
3454 		child = devlist[i];
3455 		switch (pci_get_class(child)) {
3456 		case PCIC_DISPLAY:
3457 		case PCIC_MEMORY:
3458 		case PCIC_BRIDGE:
3459 		case PCIC_BASEPERIPH:
3460 			DEVICE_RESUME(child);
3461 			break;
3462 		}
3463 	}
3464 	for (i = 0; i < numdevs; i++) {
3465 		child = devlist[i];
3466 		switch (pci_get_class(child)) {
3467 		case PCIC_DISPLAY:
3468 		case PCIC_MEMORY:
3469 		case PCIC_BRIDGE:
3470 		case PCIC_BASEPERIPH:
3471 			break;
3472 		default:
3473 			DEVICE_RESUME(child);
3474 		}
3475 	}
3476 	free(devlist, M_TEMP);
3477 	return (0);
3478 }
3479 
3480 static void
3481 pci_load_vendor_data(void)
3482 {
3483 	caddr_t data;
3484 	void *ptr;
3485 	size_t sz;
3486 
3487 	data = preload_search_by_type("pci_vendor_data");
3488 	if (data != NULL) {
3489 		ptr = preload_fetch_addr(data);
3490 		sz = preload_fetch_size(data);
3491 		if (ptr != NULL && sz != 0) {
3492 			pci_vendordata = ptr;
3493 			pci_vendordata_size = sz;
3494 			/* terminate the database */
3495 			pci_vendordata[pci_vendordata_size] = '\n';
3496 		}
3497 	}
3498 }
3499 
3500 void
3501 pci_driver_added(device_t dev, driver_t *driver)
3502 {
3503 	int numdevs;
3504 	device_t *devlist;
3505 	device_t child;
3506 	struct pci_devinfo *dinfo;
3507 	int i;
3508 
3509 	if (bootverbose)
3510 		device_printf(dev, "driver added\n");
3511 	DEVICE_IDENTIFY(driver, dev);
3512 	if (device_get_children(dev, &devlist, &numdevs) != 0)
3513 		return;
3514 	for (i = 0; i < numdevs; i++) {
3515 		child = devlist[i];
3516 		if (device_get_state(child) != DS_NOTPRESENT)
3517 			continue;
3518 		dinfo = device_get_ivars(child);
3519 		pci_print_verbose(dinfo);
3520 		if (bootverbose)
3521 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3522 		pci_cfg_restore(child, dinfo);
3523 		if (device_probe_and_attach(child) != 0)
3524 			pci_child_detached(dev, child);
3525 	}
3526 	free(devlist, M_TEMP);
3527 }
3528 
3529 int
3530 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3531     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3532 {
3533 	struct pci_devinfo *dinfo;
3534 	struct msix_table_entry *mte;
3535 	struct msix_vector *mv;
3536 	uint64_t addr;
3537 	uint32_t data;
3538 	void *cookie;
3539 	int error, rid;
3540 
3541 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3542 	    arg, &cookie);
3543 	if (error)
3544 		return (error);
3545 
3546 	/* If this is not a direct child, just bail out. */
3547 	if (device_get_parent(child) != dev) {
3548 		*cookiep = cookie;
3549 		return(0);
3550 	}
3551 
3552 	rid = rman_get_rid(irq);
3553 	if (rid == 0) {
3554 		/* Make sure that INTx is enabled */
3555 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3556 	} else {
3557 		/*
3558 		 * Check to see if the interrupt is MSI or MSI-X.
3559 		 * Ask our parent to map the MSI and give
3560 		 * us the address and data register values.
3561 		 * If we fail for some reason, teardown the
3562 		 * interrupt handler.
3563 		 */
3564 		dinfo = device_get_ivars(child);
3565 		if (dinfo->cfg.msi.msi_alloc > 0) {
3566 			if (dinfo->cfg.msi.msi_addr == 0) {
3567 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3568 			    ("MSI has handlers, but vectors not mapped"));
3569 				error = PCIB_MAP_MSI(device_get_parent(dev),
3570 				    child, rman_get_start(irq), &addr, &data);
3571 				if (error)
3572 					goto bad;
3573 				dinfo->cfg.msi.msi_addr = addr;
3574 				dinfo->cfg.msi.msi_data = data;
3575 			}
3576 			if (dinfo->cfg.msi.msi_handlers == 0)
3577 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3578 				    dinfo->cfg.msi.msi_data);
3579 			dinfo->cfg.msi.msi_handlers++;
3580 		} else {
3581 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3582 			    ("No MSI or MSI-X interrupts allocated"));
3583 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3584 			    ("MSI-X index too high"));
3585 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3586 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3587 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3588 			KASSERT(mv->mv_irq == rman_get_start(irq),
3589 			    ("IRQ mismatch"));
3590 			if (mv->mv_address == 0) {
3591 				KASSERT(mte->mte_handlers == 0,
3592 		    ("MSI-X table entry has handlers, but vector not mapped"));
3593 				error = PCIB_MAP_MSI(device_get_parent(dev),
3594 				    child, rman_get_start(irq), &addr, &data);
3595 				if (error)
3596 					goto bad;
3597 				mv->mv_address = addr;
3598 				mv->mv_data = data;
3599 			}
3600 			if (mte->mte_handlers == 0) {
3601 				pci_enable_msix(child, rid - 1, mv->mv_address,
3602 				    mv->mv_data);
3603 				pci_unmask_msix(child, rid - 1);
3604 			}
3605 			mte->mte_handlers++;
3606 		}
3607 
3608 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3609 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3610 	bad:
3611 		if (error) {
3612 			(void)bus_generic_teardown_intr(dev, child, irq,
3613 			    cookie);
3614 			return (error);
3615 		}
3616 	}
3617 	*cookiep = cookie;
3618 	return (0);
3619 }
3620 
3621 int
3622 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3623     void *cookie)
3624 {
3625 	struct msix_table_entry *mte;
3626 	struct resource_list_entry *rle;
3627 	struct pci_devinfo *dinfo;
3628 	int error, rid;
3629 
3630 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3631 		return (EINVAL);
3632 
3633 	/* If this isn't a direct child, just bail out */
3634 	if (device_get_parent(child) != dev)
3635 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3636 
3637 	rid = rman_get_rid(irq);
3638 	if (rid == 0) {
3639 		/* Mask INTx */
3640 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3641 	} else {
3642 		/*
3643 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3644 		 * decrement the appropriate handlers count and mask the
3645 		 * MSI-X message, or disable MSI messages if the count
3646 		 * drops to 0.
3647 		 */
3648 		dinfo = device_get_ivars(child);
3649 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3650 		if (rle->res != irq)
3651 			return (EINVAL);
3652 		if (dinfo->cfg.msi.msi_alloc > 0) {
3653 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3654 			    ("MSI-X index too high"));
3655 			if (dinfo->cfg.msi.msi_handlers == 0)
3656 				return (EINVAL);
3657 			dinfo->cfg.msi.msi_handlers--;
3658 			if (dinfo->cfg.msi.msi_handlers == 0)
3659 				pci_disable_msi(child);
3660 		} else {
3661 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3662 			    ("No MSI or MSI-X interrupts allocated"));
3663 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3664 			    ("MSI-X index too high"));
3665 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3666 			if (mte->mte_handlers == 0)
3667 				return (EINVAL);
3668 			mte->mte_handlers--;
3669 			if (mte->mte_handlers == 0)
3670 				pci_mask_msix(child, rid - 1);
3671 		}
3672 	}
3673 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3674 	if (rid > 0)
3675 		KASSERT(error == 0,
3676 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3677 	return (error);
3678 }
3679 
3680 int
3681 pci_print_child(device_t dev, device_t child)
3682 {
3683 	struct pci_devinfo *dinfo;
3684 	struct resource_list *rl;
3685 	int retval = 0;
3686 
3687 	dinfo = device_get_ivars(child);
3688 	rl = &dinfo->resources;
3689 
3690 	retval += bus_print_child_header(dev, child);
3691 
3692 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3693 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3694 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3695 	if (device_get_flags(dev))
3696 		retval += printf(" flags %#x", device_get_flags(dev));
3697 
3698 	retval += printf(" at device %d.%d", pci_get_slot(child),
3699 	    pci_get_function(child));
3700 
3701 	retval += bus_print_child_footer(dev, child);
3702 
3703 	return (retval);
3704 }
3705 
3706 static const struct
3707 {
3708 	int		class;
3709 	int		subclass;
3710 	const char	*desc;
3711 } pci_nomatch_tab[] = {
3712 	{PCIC_OLD,		-1,			"old"},
3713 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3714 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3715 	{PCIC_STORAGE,		-1,			"mass storage"},
3716 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3717 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3718 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3719 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3720 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3721 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3722 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3723 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3724 	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	"NVM"},
3725 	{PCIC_NETWORK,		-1,			"network"},
3726 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3727 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3728 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3729 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3730 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3731 	{PCIC_DISPLAY,		-1,			"display"},
3732 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3733 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3734 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3735 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3736 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3737 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3738 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3739 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3740 	{PCIC_MEMORY,		-1,			"memory"},
3741 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3742 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3743 	{PCIC_BRIDGE,		-1,			"bridge"},
3744 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3745 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3746 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3747 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3748 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3749 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3750 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3751 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3752 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3753 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3754 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3755 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3756 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3757 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3758 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3759 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3760 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3761 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3762 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3763 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3764 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3765 	{PCIC_INPUTDEV,		-1,			"input device"},
3766 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3767 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3768 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3769 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3770 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3771 	{PCIC_DOCKING,		-1,			"docking station"},
3772 	{PCIC_PROCESSOR,	-1,			"processor"},
3773 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3774 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3775 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3776 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3777 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3778 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3779 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3780 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3781 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3782 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3783 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3784 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3785 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3786 	{PCIC_SATCOM,		-1,			"satellite communication"},
3787 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3788 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3789 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3790 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3791 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3792 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3793 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3794 	{PCIC_DASP,		-1,			"dasp"},
3795 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3796 	{0, 0,		NULL}
3797 };
3798 
3799 void
3800 pci_probe_nomatch(device_t dev, device_t child)
3801 {
3802 	int i;
3803 	const char *cp, *scp;
3804 	char *device;
3805 
3806 	/*
3807 	 * Look for a listing for this device in a loaded device database.
3808 	 */
3809 	if ((device = pci_describe_device(child)) != NULL) {
3810 		device_printf(dev, "<%s>", device);
3811 		free(device, M_DEVBUF);
3812 	} else {
3813 		/*
3814 		 * Scan the class/subclass descriptions for a general
3815 		 * description.
3816 		 */
3817 		cp = "unknown";
3818 		scp = NULL;
3819 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3820 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3821 				if (pci_nomatch_tab[i].subclass == -1) {
3822 					cp = pci_nomatch_tab[i].desc;
3823 				} else if (pci_nomatch_tab[i].subclass ==
3824 				    pci_get_subclass(child)) {
3825 					scp = pci_nomatch_tab[i].desc;
3826 				}
3827 			}
3828 		}
3829 		device_printf(dev, "<%s%s%s>",
3830 		    cp ? cp : "",
3831 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3832 		    scp ? scp : "");
3833 	}
3834 	printf(" at device %d.%d (no driver attached)\n",
3835 	    pci_get_slot(child), pci_get_function(child));
3836 	pci_cfg_save(child, device_get_ivars(child), 1);
3837 }
3838 
3839 void
3840 pci_child_detached(device_t dev, device_t child)
3841 {
3842 	struct pci_devinfo *dinfo;
3843 	struct resource_list *rl;
3844 
3845 	dinfo = device_get_ivars(child);
3846 	rl = &dinfo->resources;
3847 
3848 	/*
3849 	 * Have to deallocate IRQs before releasing any MSI messages and
3850 	 * have to release MSI messages before deallocating any memory
3851 	 * BARs.
3852 	 */
3853 	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
3854 		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
3855 	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
3856 		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
3857 		(void)pci_release_msi(child);
3858 	}
3859 	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
3860 		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
3861 	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
3862 		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
3863 
3864 	pci_cfg_save(child, dinfo, 1);
3865 }
3866 
3867 /*
3868  * Parse the PCI device database, if loaded, and return a pointer to a
3869  * description of the device.
3870  *
3871  * The database is flat text formatted as follows:
3872  *
3873  * Any line not in a valid format is ignored.
3874  * Lines are terminated with newline '\n' characters.
3875  *
3876  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3877  * the vendor name.
3878  *
3879  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3880  * - devices cannot be listed without a corresponding VENDOR line.
3881  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3882  * another TAB, then the device name.
3883  */
3884 
3885 /*
3886  * Assuming (ptr) points to the beginning of a line in the database,
3887  * return the vendor or device and description of the next entry.
3888  * The value of (vendor) or (device) inappropriate for the entry type
3889  * is set to -1.  Returns nonzero at the end of the database.
3890  *
3891  * Note that this is slightly unrobust in the face of corrupt data;
3892  * we attempt to safeguard against this by spamming the end of the
3893  * database with a newline when we initialise.
3894  */
3895 static int
3896 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3897 {
3898 	char	*cp = *ptr;
3899 	int	left;
3900 
3901 	*device = -1;
3902 	*vendor = -1;
3903 	**desc = '\0';
3904 	for (;;) {
3905 		left = pci_vendordata_size - (cp - pci_vendordata);
3906 		if (left <= 0) {
3907 			*ptr = cp;
3908 			return(1);
3909 		}
3910 
3911 		/* vendor entry? */
3912 		if (*cp != '\t' &&
3913 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3914 			break;
3915 		/* device entry? */
3916 		if (*cp == '\t' &&
3917 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3918 			break;
3919 
3920 		/* skip to next line */
3921 		while (*cp != '\n' && left > 0) {
3922 			cp++;
3923 			left--;
3924 		}
3925 		if (*cp == '\n') {
3926 			cp++;
3927 			left--;
3928 		}
3929 	}
3930 	/* skip to next line */
3931 	while (*cp != '\n' && left > 0) {
3932 		cp++;
3933 		left--;
3934 	}
3935 	if (*cp == '\n' && left > 0)
3936 		cp++;
3937 	*ptr = cp;
3938 	return(0);
3939 }
3940 
3941 static char *
3942 pci_describe_device(device_t dev)
3943 {
3944 	int	vendor, device;
3945 	char	*desc, *vp, *dp, *line;
3946 
3947 	desc = vp = dp = NULL;
3948 
3949 	/*
3950 	 * If we have no vendor data, we can't do anything.
3951 	 */
3952 	if (pci_vendordata == NULL)
3953 		goto out;
3954 
3955 	/*
3956 	 * Scan the vendor data looking for this device
3957 	 */
3958 	line = pci_vendordata;
3959 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3960 		goto out;
3961 	for (;;) {
3962 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3963 			goto out;
3964 		if (vendor == pci_get_vendor(dev))
3965 			break;
3966 	}
3967 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3968 		goto out;
3969 	for (;;) {
3970 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3971 			*dp = 0;
3972 			break;
3973 		}
3974 		if (vendor != -1) {
3975 			*dp = 0;
3976 			break;
3977 		}
3978 		if (device == pci_get_device(dev))
3979 			break;
3980 	}
3981 	if (dp[0] == '\0')
3982 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3983 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3984 	    NULL)
3985 		sprintf(desc, "%s, %s", vp, dp);
3986 out:
3987 	if (vp != NULL)
3988 		free(vp, M_DEVBUF);
3989 	if (dp != NULL)
3990 		free(dp, M_DEVBUF);
3991 	return(desc);
3992 }
3993 
3994 int
3995 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3996 {
3997 	struct pci_devinfo *dinfo;
3998 	pcicfgregs *cfg;
3999 
4000 	dinfo = device_get_ivars(child);
4001 	cfg = &dinfo->cfg;
4002 
4003 	switch (which) {
4004 	case PCI_IVAR_ETHADDR:
4005 		/*
4006 		 * The generic accessor doesn't deal with failure, so
4007 		 * we set the return value, then return an error.
4008 		 */
4009 		*((uint8_t **) result) = NULL;
4010 		return (EINVAL);
4011 	case PCI_IVAR_SUBVENDOR:
4012 		*result = cfg->subvendor;
4013 		break;
4014 	case PCI_IVAR_SUBDEVICE:
4015 		*result = cfg->subdevice;
4016 		break;
4017 	case PCI_IVAR_VENDOR:
4018 		*result = cfg->vendor;
4019 		break;
4020 	case PCI_IVAR_DEVICE:
4021 		*result = cfg->device;
4022 		break;
4023 	case PCI_IVAR_DEVID:
4024 		*result = (cfg->device << 16) | cfg->vendor;
4025 		break;
4026 	case PCI_IVAR_CLASS:
4027 		*result = cfg->baseclass;
4028 		break;
4029 	case PCI_IVAR_SUBCLASS:
4030 		*result = cfg->subclass;
4031 		break;
4032 	case PCI_IVAR_PROGIF:
4033 		*result = cfg->progif;
4034 		break;
4035 	case PCI_IVAR_REVID:
4036 		*result = cfg->revid;
4037 		break;
4038 	case PCI_IVAR_INTPIN:
4039 		*result = cfg->intpin;
4040 		break;
4041 	case PCI_IVAR_IRQ:
4042 		*result = cfg->intline;
4043 		break;
4044 	case PCI_IVAR_DOMAIN:
4045 		*result = cfg->domain;
4046 		break;
4047 	case PCI_IVAR_BUS:
4048 		*result = cfg->bus;
4049 		break;
4050 	case PCI_IVAR_SLOT:
4051 		*result = cfg->slot;
4052 		break;
4053 	case PCI_IVAR_FUNCTION:
4054 		*result = cfg->func;
4055 		break;
4056 	case PCI_IVAR_CMDREG:
4057 		*result = cfg->cmdreg;
4058 		break;
4059 	case PCI_IVAR_CACHELNSZ:
4060 		*result = cfg->cachelnsz;
4061 		break;
4062 	case PCI_IVAR_MINGNT:
4063 		*result = cfg->mingnt;
4064 		break;
4065 	case PCI_IVAR_MAXLAT:
4066 		*result = cfg->maxlat;
4067 		break;
4068 	case PCI_IVAR_LATTIMER:
4069 		*result = cfg->lattimer;
4070 		break;
4071 	default:
4072 		return (ENOENT);
4073 	}
4074 	return (0);
4075 }
4076 
4077 int
4078 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4079 {
4080 	struct pci_devinfo *dinfo;
4081 
4082 	dinfo = device_get_ivars(child);
4083 
4084 	switch (which) {
4085 	case PCI_IVAR_INTPIN:
4086 		dinfo->cfg.intpin = value;
4087 		return (0);
4088 	case PCI_IVAR_ETHADDR:
4089 	case PCI_IVAR_SUBVENDOR:
4090 	case PCI_IVAR_SUBDEVICE:
4091 	case PCI_IVAR_VENDOR:
4092 	case PCI_IVAR_DEVICE:
4093 	case PCI_IVAR_DEVID:
4094 	case PCI_IVAR_CLASS:
4095 	case PCI_IVAR_SUBCLASS:
4096 	case PCI_IVAR_PROGIF:
4097 	case PCI_IVAR_REVID:
4098 	case PCI_IVAR_IRQ:
4099 	case PCI_IVAR_DOMAIN:
4100 	case PCI_IVAR_BUS:
4101 	case PCI_IVAR_SLOT:
4102 	case PCI_IVAR_FUNCTION:
4103 		return (EINVAL);	/* disallow for now */
4104 
4105 	default:
4106 		return (ENOENT);
4107 	}
4108 }
4109 
4110 #include "opt_ddb.h"
4111 #ifdef DDB
4112 #include <ddb/ddb.h>
4113 #include <sys/cons.h>
4114 
4115 /*
4116  * List resources based on pci map registers, used for within ddb
4117  */
4118 
4119 DB_SHOW_COMMAND(pciregs, db_pci_dump)
4120 {
4121 	struct pci_devinfo *dinfo;
4122 	struct devlist *devlist_head;
4123 	struct pci_conf *p;
4124 	const char *name;
4125 	int i, error, none_count;
4126 
4127 	none_count = 0;
4128 	/* get the head of the device queue */
4129 	devlist_head = &pci_devq;
4130 
4131 	/*
4132 	 * Go through the list of devices and print out devices
4133 	 */
4134 	for (error = 0, i = 0,
4135 	     dinfo = STAILQ_FIRST(devlist_head);
4136 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4137 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4138 
4139 		/* Populate pd_name and pd_unit */
4140 		name = NULL;
4141 		if (dinfo->cfg.dev)
4142 			name = device_get_name(dinfo->cfg.dev);
4143 
4144 		p = &dinfo->conf;
4145 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4146 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4147 			(name && *name) ? name : "none",
4148 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4149 			none_count++,
4150 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4151 			p->pc_sel.pc_func, (p->pc_class << 16) |
4152 			(p->pc_subclass << 8) | p->pc_progif,
4153 			(p->pc_subdevice << 16) | p->pc_subvendor,
4154 			(p->pc_device << 16) | p->pc_vendor,
4155 			p->pc_revid, p->pc_hdr);
4156 	}
4157 }
4158 #endif /* DDB */
4159 
4160 static struct resource *
4161 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4162     u_long start, u_long end, u_long count, u_int flags)
4163 {
4164 	struct pci_devinfo *dinfo = device_get_ivars(child);
4165 	struct resource_list *rl = &dinfo->resources;
4166 	struct resource_list_entry *rle;
4167 	struct resource *res;
4168 	struct pci_map *pm;
4169 	pci_addr_t map, testval;
4170 	int mapsize;
4171 
4172 	res = NULL;
4173 	pm = pci_find_bar(child, *rid);
4174 	if (pm != NULL) {
4175 		/* This is a BAR that we failed to allocate earlier. */
4176 		mapsize = pm->pm_size;
4177 		map = pm->pm_value;
4178 	} else {
4179 		/*
4180 		 * Weed out the bogons, and figure out how large the
4181 		 * BAR/map is.  BARs that read back 0 here are bogus
4182 		 * and unimplemented.  Note: atapci in legacy mode are
4183 		 * special and handled elsewhere in the code.  If you
4184 		 * have a atapci device in legacy mode and it fails
4185 		 * here, that other code is broken.
4186 		 */
4187 		pci_read_bar(child, *rid, &map, &testval);
4188 
4189 		/*
4190 		 * Determine the size of the BAR and ignore BARs with a size
4191 		 * of 0.  Device ROM BARs use a different mask value.
4192 		 */
4193 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
4194 			mapsize = pci_romsize(testval);
4195 		else
4196 			mapsize = pci_mapsize(testval);
4197 		if (mapsize == 0)
4198 			goto out;
4199 		pm = pci_add_bar(child, *rid, map, mapsize);
4200 	}
4201 
4202 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
4203 		if (type != SYS_RES_MEMORY) {
4204 			if (bootverbose)
4205 				device_printf(dev,
4206 				    "child %s requested type %d for rid %#x,"
4207 				    " but the BAR says it is an memio\n",
4208 				    device_get_nameunit(child), type, *rid);
4209 			goto out;
4210 		}
4211 	} else {
4212 		if (type != SYS_RES_IOPORT) {
4213 			if (bootverbose)
4214 				device_printf(dev,
4215 				    "child %s requested type %d for rid %#x,"
4216 				    " but the BAR says it is an ioport\n",
4217 				    device_get_nameunit(child), type, *rid);
4218 			goto out;
4219 		}
4220 	}
4221 
4222 	/*
4223 	 * For real BARs, we need to override the size that
4224 	 * the driver requests, because that's what the BAR
4225 	 * actually uses and we would otherwise have a
4226 	 * situation where we might allocate the excess to
4227 	 * another driver, which won't work.
4228 	 */
4229 	count = (pci_addr_t)1 << mapsize;
4230 	if (RF_ALIGNMENT(flags) < mapsize)
4231 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
4232 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
4233 		flags |= RF_PREFETCHABLE;
4234 
4235 	/*
4236 	 * Allocate enough resource, and then write back the
4237 	 * appropriate BAR for that resource.
4238 	 */
4239 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
4240 	    start, end, count, flags & ~RF_ACTIVE);
4241 	if (res == NULL) {
4242 		device_printf(child,
4243 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4244 		    count, *rid, type, start, end);
4245 		goto out;
4246 	}
4247 	resource_list_add(rl, type, *rid, start, end, count);
4248 	rle = resource_list_find(rl, type, *rid);
4249 	if (rle == NULL)
4250 		panic("pci_reserve_map: unexpectedly can't find resource.");
4251 	rle->res = res;
4252 	rle->start = rman_get_start(res);
4253 	rle->end = rman_get_end(res);
4254 	rle->count = count;
4255 	rle->flags = RLE_RESERVED;
4256 	if (bootverbose)
4257 		device_printf(child,
4258 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4259 		    count, *rid, type, rman_get_start(res));
4260 	map = rman_get_start(res);
4261 	pci_write_bar(child, pm, map);
4262 out:
4263 	return (res);
4264 }
4265 
4266 struct resource *
4267 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4268 		   u_long start, u_long end, u_long count, u_int flags)
4269 {
4270 	struct pci_devinfo *dinfo;
4271 	struct resource_list *rl;
4272 	struct resource_list_entry *rle;
4273 	struct resource *res;
4274 	pcicfgregs *cfg;
4275 
4276 	if (device_get_parent(child) != dev)
4277 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4278 		    type, rid, start, end, count, flags));
4279 
4280 	/*
4281 	 * Perform lazy resource allocation
4282 	 */
4283 	dinfo = device_get_ivars(child);
4284 	rl = &dinfo->resources;
4285 	cfg = &dinfo->cfg;
4286 	switch (type) {
4287 	case SYS_RES_IRQ:
4288 		/*
4289 		 * Can't alloc legacy interrupt once MSI messages have
4290 		 * been allocated.
4291 		 */
4292 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4293 		    cfg->msix.msix_alloc > 0))
4294 			return (NULL);
4295 
4296 		/*
4297 		 * If the child device doesn't have an interrupt
4298 		 * routed and is deserving of an interrupt, try to
4299 		 * assign it one.
4300 		 */
4301 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4302 		    (cfg->intpin != 0))
4303 			pci_assign_interrupt(dev, child, 0);
4304 		break;
4305 	case SYS_RES_IOPORT:
4306 	case SYS_RES_MEMORY:
4307 #ifdef NEW_PCIB
4308 		/*
4309 		 * PCI-PCI bridge I/O window resources are not BARs.
4310 		 * For those allocations just pass the request up the
4311 		 * tree.
4312 		 */
4313 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4314 			switch (*rid) {
4315 			case PCIR_IOBASEL_1:
4316 			case PCIR_MEMBASE_1:
4317 			case PCIR_PMBASEL_1:
4318 				/*
4319 				 * XXX: Should we bother creating a resource
4320 				 * list entry?
4321 				 */
4322 				return (bus_generic_alloc_resource(dev, child,
4323 				    type, rid, start, end, count, flags));
4324 			}
4325 		}
4326 #endif
4327 		/* Reserve resources for this BAR if needed. */
4328 		rle = resource_list_find(rl, type, *rid);
4329 		if (rle == NULL) {
4330 			res = pci_reserve_map(dev, child, type, rid, start, end,
4331 			    count, flags);
4332 			if (res == NULL)
4333 				return (NULL);
4334 		}
4335 	}
4336 	return (resource_list_alloc(rl, dev, child, type, rid,
4337 	    start, end, count, flags));
4338 }
4339 
4340 int
4341 pci_release_resource(device_t dev, device_t child, int type, int rid,
4342     struct resource *r)
4343 {
4344 	struct pci_devinfo *dinfo;
4345 	struct resource_list *rl;
4346 	pcicfgregs *cfg;
4347 
4348 	if (device_get_parent(child) != dev)
4349 		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
4350 		    type, rid, r));
4351 
4352 	dinfo = device_get_ivars(child);
4353 	cfg = &dinfo->cfg;
4354 #ifdef NEW_PCIB
4355 	/*
4356 	 * PCI-PCI bridge I/O window resources are not BARs.  For
4357 	 * those allocations just pass the request up the tree.
4358 	 */
4359 	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
4360 	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
4361 		switch (rid) {
4362 		case PCIR_IOBASEL_1:
4363 		case PCIR_MEMBASE_1:
4364 		case PCIR_PMBASEL_1:
4365 			return (bus_generic_release_resource(dev, child, type,
4366 			    rid, r));
4367 		}
4368 	}
4369 #endif
4370 
4371 	rl = &dinfo->resources;
4372 	return (resource_list_release(rl, dev, child, type, rid, r));
4373 }
4374 
4375 int
4376 pci_activate_resource(device_t dev, device_t child, int type, int rid,
4377     struct resource *r)
4378 {
4379 	struct pci_devinfo *dinfo;
4380 	int error;
4381 
4382 	error = bus_generic_activate_resource(dev, child, type, rid, r);
4383 	if (error)
4384 		return (error);
4385 
4386 	/* Enable decoding in the command register when activating BARs. */
4387 	if (device_get_parent(child) == dev) {
4388 		/* Device ROMs need their decoding explicitly enabled. */
4389 		dinfo = device_get_ivars(child);
4390 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4391 			pci_write_bar(child, pci_find_bar(child, rid),
4392 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4393 		switch (type) {
4394 		case SYS_RES_IOPORT:
4395 		case SYS_RES_MEMORY:
4396 			error = PCI_ENABLE_IO(dev, child, type);
4397 			break;
4398 		}
4399 	}
4400 	return (error);
4401 }
4402 
4403 int
4404 pci_deactivate_resource(device_t dev, device_t child, int type,
4405     int rid, struct resource *r)
4406 {
4407 	struct pci_devinfo *dinfo;
4408 	int error;
4409 
4410 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4411 	if (error)
4412 		return (error);
4413 
4414 	/* Disable decoding for device ROMs. */
4415 	if (device_get_parent(child) == dev) {
4416 		dinfo = device_get_ivars(child);
4417 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4418 			pci_write_bar(child, pci_find_bar(child, rid),
4419 			    rman_get_start(r));
4420 	}
4421 	return (0);
4422 }
4423 
4424 void
4425 pci_delete_child(device_t dev, device_t child)
4426 {
4427 	struct resource_list_entry *rle;
4428 	struct resource_list *rl;
4429 	struct pci_devinfo *dinfo;
4430 
4431 	dinfo = device_get_ivars(child);
4432 	rl = &dinfo->resources;
4433 
4434 	if (device_is_attached(child))
4435 		device_detach(child);
4436 
4437 	/* Turn off access to resources we're about to free */
4438 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4439 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4440 
4441 	/* Free all allocated resources */
4442 	STAILQ_FOREACH(rle, rl, link) {
4443 		if (rle->res) {
4444 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4445 			    resource_list_busy(rl, rle->type, rle->rid)) {
4446 				pci_printf(&dinfo->cfg,
4447 				    "Resource still owned, oops. "
4448 				    "(type=%d, rid=%d, addr=%lx)\n",
4449 				    rle->type, rle->rid,
4450 				    rman_get_start(rle->res));
4451 				bus_release_resource(child, rle->type, rle->rid,
4452 				    rle->res);
4453 			}
4454 			resource_list_unreserve(rl, dev, child, rle->type,
4455 			    rle->rid);
4456 		}
4457 	}
4458 	resource_list_free(rl);
4459 
4460 	device_delete_child(dev, child);
4461 	pci_freecfg(dinfo);
4462 }
4463 
4464 void
4465 pci_delete_resource(device_t dev, device_t child, int type, int rid)
4466 {
4467 	struct pci_devinfo *dinfo;
4468 	struct resource_list *rl;
4469 	struct resource_list_entry *rle;
4470 
4471 	if (device_get_parent(child) != dev)
4472 		return;
4473 
4474 	dinfo = device_get_ivars(child);
4475 	rl = &dinfo->resources;
4476 	rle = resource_list_find(rl, type, rid);
4477 	if (rle == NULL)
4478 		return;
4479 
4480 	if (rle->res) {
4481 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4482 		    resource_list_busy(rl, type, rid)) {
4483 			device_printf(dev, "delete_resource: "
4484 			    "Resource still owned by child, oops. "
4485 			    "(type=%d, rid=%d, addr=%lx)\n",
4486 			    type, rid, rman_get_start(rle->res));
4487 			return;
4488 		}
4489 		resource_list_unreserve(rl, dev, child, type, rid);
4490 	}
4491 	resource_list_delete(rl, type, rid);
4492 }
4493 
4494 struct resource_list *
4495 pci_get_resource_list (device_t dev, device_t child)
4496 {
4497 	struct pci_devinfo *dinfo = device_get_ivars(child);
4498 
4499 	return (&dinfo->resources);
4500 }
4501 
4502 bus_dma_tag_t
4503 pci_get_dma_tag(device_t bus, device_t dev)
4504 {
4505 	struct pci_softc *sc = device_get_softc(bus);
4506 
4507 	return (sc->sc_dma_tag);
4508 }
4509 
4510 uint32_t
4511 pci_read_config_method(device_t dev, device_t child, int reg, int width)
4512 {
4513 	struct pci_devinfo *dinfo = device_get_ivars(child);
4514 	pcicfgregs *cfg = &dinfo->cfg;
4515 
4516 	return (PCIB_READ_CONFIG(device_get_parent(dev),
4517 	    cfg->bus, cfg->slot, cfg->func, reg, width));
4518 }
4519 
4520 void
4521 pci_write_config_method(device_t dev, device_t child, int reg,
4522     uint32_t val, int width)
4523 {
4524 	struct pci_devinfo *dinfo = device_get_ivars(child);
4525 	pcicfgregs *cfg = &dinfo->cfg;
4526 
4527 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4528 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4529 }
4530 
4531 int
4532 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4533     size_t buflen)
4534 {
4535 
4536 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4537 	    pci_get_function(child));
4538 	return (0);
4539 }
4540 
4541 int
4542 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4543     size_t buflen)
4544 {
4545 	struct pci_devinfo *dinfo;
4546 	pcicfgregs *cfg;
4547 
4548 	dinfo = device_get_ivars(child);
4549 	cfg = &dinfo->cfg;
4550 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4551 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4552 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4553 	    cfg->progif);
4554 	return (0);
4555 }
4556 
4557 int
4558 pci_assign_interrupt_method(device_t dev, device_t child)
4559 {
4560 	struct pci_devinfo *dinfo = device_get_ivars(child);
4561 	pcicfgregs *cfg = &dinfo->cfg;
4562 
4563 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4564 	    cfg->intpin));
4565 }
4566 
4567 static int
4568 pci_modevent(module_t mod, int what, void *arg)
4569 {
4570 	static struct cdev *pci_cdev;
4571 
4572 	switch (what) {
4573 	case MOD_LOAD:
4574 		STAILQ_INIT(&pci_devq);
4575 		pci_generation = 0;
4576 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4577 		    "pci");
4578 		pci_load_vendor_data();
4579 		break;
4580 
4581 	case MOD_UNLOAD:
4582 		destroy_dev(pci_cdev);
4583 		break;
4584 	}
4585 
4586 	return (0);
4587 }
4588 
4589 static void
4590 pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
4591 {
4592 #define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
4593 	struct pcicfg_pcie *cfg;
4594 	int version, pos;
4595 
4596 	cfg = &dinfo->cfg.pcie;
4597 	pos = cfg->pcie_location;
4598 
4599 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4600 
4601 	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
4602 
4603 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4604 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4605 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4606 		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
4607 
4608 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4609 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
4610 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
4611 		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
4612 
4613 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4614 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
4615 		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
4616 
4617 	if (version > 1) {
4618 		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
4619 		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
4620 		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
4621 	}
4622 #undef WREG
4623 }
4624 
4625 static void
4626 pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
4627 {
4628 	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
4629 	    dinfo->cfg.pcix.pcix_command,  2);
4630 }
4631 
4632 void
4633 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4634 {
4635 
4636 	/*
4637 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4638 	 * which we know need special treatment.  Type 2 devices are
4639 	 * cardbus bridges which also require special treatment.
4640 	 * Other types are unknown, and we err on the side of safety
4641 	 * by ignoring them.
4642 	 */
4643 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4644 		return;
4645 
4646 	/*
4647 	 * Restore the device to full power mode.  We must do this
4648 	 * before we restore the registers because moving from D3 to
4649 	 * D0 will cause the chip's BARs and some other registers to
4650 	 * be reset to some unknown power on reset values.  Cut down
4651 	 * the noise on boot by doing nothing if we are already in
4652 	 * state D0.
4653 	 */
4654 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4655 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4656 	pci_restore_bars(dev);
4657 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4658 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4659 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4660 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4661 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4662 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4663 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4664 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4665 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4666 
4667 	/*
4668 	 * Restore extended capabilities for PCI-Express and PCI-X
4669 	 */
4670 	if (dinfo->cfg.pcie.pcie_location != 0)
4671 		pci_cfg_restore_pcie(dev, dinfo);
4672 	if (dinfo->cfg.pcix.pcix_location != 0)
4673 		pci_cfg_restore_pcix(dev, dinfo);
4674 
4675 	/* Restore MSI and MSI-X configurations if they are present. */
4676 	if (dinfo->cfg.msi.msi_location != 0)
4677 		pci_resume_msi(dev);
4678 	if (dinfo->cfg.msix.msix_location != 0)
4679 		pci_resume_msix(dev);
4680 }
4681 
4682 static void
4683 pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
4684 {
4685 #define	RREG(n)	pci_read_config(dev, pos + (n), 2)
4686 	struct pcicfg_pcie *cfg;
4687 	int version, pos;
4688 
4689 	cfg = &dinfo->cfg.pcie;
4690 	pos = cfg->pcie_location;
4691 
4692 	cfg->pcie_flags = RREG(PCIER_FLAGS);
4693 
4694 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4695 
4696 	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
4697 
4698 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4699 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4700 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4701 		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
4702 
4703 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4704 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
4705 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
4706 		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
4707 
4708 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4709 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
4710 		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
4711 
4712 	if (version > 1) {
4713 		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
4714 		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
4715 		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
4716 	}
4717 #undef RREG
4718 }
4719 
4720 static void
4721 pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
4722 {
4723 	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
4724 	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
4725 }
4726 
4727 void
4728 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4729 {
4730 	uint32_t cls;
4731 	int ps;
4732 
4733 	/*
4734 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4735 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4736 	 * which also require special treatment.  Other types are unknown, and
4737 	 * we err on the side of safety by ignoring them.  Powering down
4738 	 * bridges should not be undertaken lightly.
4739 	 */
4740 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4741 		return;
4742 
4743 	/*
4744 	 * Some drivers apparently write to these registers w/o updating our
4745 	 * cached copy.  No harm happens if we update the copy, so do so here
4746 	 * so we can restore them.  The COMMAND register is modified by the
4747 	 * bus w/o updating the cache.  This should represent the normally
4748 	 * writable portion of the 'defined' part of type 0 headers.  In
4749 	 * theory we also need to save/restore the PCI capability structures
4750 	 * we know about, but apart from power we don't know any that are
4751 	 * writable.
4752 	 */
4753 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4754 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4755 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4756 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4757 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4758 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4759 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4760 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4761 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4762 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4763 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4764 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4765 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4766 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4767 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4768 
4769 	if (dinfo->cfg.pcie.pcie_location != 0)
4770 		pci_cfg_save_pcie(dev, dinfo);
4771 
4772 	if (dinfo->cfg.pcix.pcix_location != 0)
4773 		pci_cfg_save_pcix(dev, dinfo);
4774 
4775 	/*
4776 	 * don't set the state for display devices, base peripherals and
4777 	 * memory devices since bad things happen when they are powered down.
4778 	 * We should (a) have drivers that can easily detach and (b) use
4779 	 * generic drivers for these devices so that some device actually
4780 	 * attaches.  We need to make sure that when we implement (a) we don't
4781 	 * power the device down on a reattach.
4782 	 */
4783 	cls = pci_get_class(dev);
4784 	if (!setstate)
4785 		return;
4786 	switch (pci_do_power_nodriver)
4787 	{
4788 		case 0:		/* NO powerdown at all */
4789 			return;
4790 		case 1:		/* Conservative about what to power down */
4791 			if (cls == PCIC_STORAGE)
4792 				return;
4793 			/*FALLTHROUGH*/
4794 		case 2:		/* Agressive about what to power down */
4795 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4796 			    cls == PCIC_BASEPERIPH)
4797 				return;
4798 			/*FALLTHROUGH*/
4799 		case 3:		/* Power down everything */
4800 			break;
4801 	}
4802 	/*
4803 	 * PCI spec says we can only go into D3 state from D0 state.
4804 	 * Transition from D[12] into D0 before going to D3 state.
4805 	 */
4806 	ps = pci_get_powerstate(dev);
4807 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4808 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4809 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4810 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4811 }
4812 
4813 /* Wrapper APIs suitable for device driver use. */
4814 void
4815 pci_save_state(device_t dev)
4816 {
4817 	struct pci_devinfo *dinfo;
4818 
4819 	dinfo = device_get_ivars(dev);
4820 	pci_cfg_save(dev, dinfo, 0);
4821 }
4822 
4823 void
4824 pci_restore_state(device_t dev)
4825 {
4826 	struct pci_devinfo *dinfo;
4827 
4828 	dinfo = device_get_ivars(dev);
4829 	pci_cfg_restore(dev, dinfo);
4830 }
4831