xref: /freebsd/sys/dev/pci/pci.c (revision f0157ce528a128e2abb181a5c766033a2ce49a5f)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 
56 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57 #include <machine/intr_machdep.h>
58 #endif
59 
60 #include <sys/pciio.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/pci/pci_private.h>
64 
65 #include <dev/usb/controller/xhcireg.h>
66 #include <dev/usb/controller/ehcireg.h>
67 #include <dev/usb/controller/ohcireg.h>
68 #include <dev/usb/controller/uhcireg.h>
69 
70 #include "pcib_if.h"
71 #include "pci_if.h"
72 
73 #if (BUS_SPACE_MAXADDR > 0xFFFFFFFF)
74 #define	PCI_DMA_BOUNDARY	0x100000000
75 #endif
76 
77 #define	PCIR_IS_BIOS(cfg, reg)						\
78 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
79 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
80 
81 static pci_addr_t	pci_mapbase(uint64_t mapreg);
82 static const char	*pci_maptype(uint64_t mapreg);
83 static int		pci_mapsize(uint64_t testval);
84 static int		pci_maprange(uint64_t mapreg);
85 static pci_addr_t	pci_rombase(uint64_t mapreg);
86 static int		pci_romsize(uint64_t testval);
87 static void		pci_fixancient(pcicfgregs *cfg);
88 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
89 
90 static int		pci_porten(device_t dev);
91 static int		pci_memen(device_t dev);
92 static void		pci_assign_interrupt(device_t bus, device_t dev,
93 			    int force_route);
94 static int		pci_add_map(device_t bus, device_t dev, int reg,
95 			    struct resource_list *rl, int force, int prefetch);
96 static int		pci_probe(device_t dev);
97 static int		pci_attach(device_t dev);
98 static void		pci_load_vendor_data(void);
99 static int		pci_describe_parse_line(char **ptr, int *vendor,
100 			    int *device, char **desc);
101 static char		*pci_describe_device(device_t dev);
102 static bus_dma_tag_t	pci_get_dma_tag(device_t bus, device_t dev);
103 static int		pci_modevent(module_t mod, int what, void *arg);
104 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
105 			    pcicfgregs *cfg);
106 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
107 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
108 			    int reg, uint32_t *data);
109 #if 0
110 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
111 			    int reg, uint32_t data);
112 #endif
113 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
114 static void		pci_disable_msi(device_t dev);
115 static void		pci_enable_msi(device_t dev, uint64_t address,
116 			    uint16_t data);
117 static void		pci_enable_msix(device_t dev, u_int index,
118 			    uint64_t address, uint32_t data);
119 static void		pci_mask_msix(device_t dev, u_int index);
120 static void		pci_unmask_msix(device_t dev, u_int index);
121 static int		pci_msi_blacklisted(void);
122 static void		pci_resume_msi(device_t dev);
123 static void		pci_resume_msix(device_t dev);
124 static int		pci_remap_intr_method(device_t bus, device_t dev,
125 			    u_int irq);
126 
127 static device_method_t pci_methods[] = {
128 	/* Device interface */
129 	DEVMETHOD(device_probe,		pci_probe),
130 	DEVMETHOD(device_attach,	pci_attach),
131 	DEVMETHOD(device_detach,	bus_generic_detach),
132 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
133 	DEVMETHOD(device_suspend,	pci_suspend),
134 	DEVMETHOD(device_resume,	pci_resume),
135 
136 	/* Bus interface */
137 	DEVMETHOD(bus_print_child,	pci_print_child),
138 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
139 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
140 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
141 	DEVMETHOD(bus_driver_added,	pci_driver_added),
142 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
143 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
144 
145 	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
146 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
147 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
148 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
149 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
150 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
151 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
152 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
153 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
154 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
155 	DEVMETHOD(bus_child_detached,	pci_child_detached),
156 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
157 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
158 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
159 
160 	/* PCI interface */
161 	DEVMETHOD(pci_read_config,	pci_read_config_method),
162 	DEVMETHOD(pci_write_config,	pci_write_config_method),
163 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
164 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
165 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
166 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
167 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
168 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
169 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
170 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
171 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
172 	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
173 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
174 	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
175 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
176 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
177 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
178 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
179 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
180 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
181 
182 	DEVMETHOD_END
183 };
184 
185 DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
186 
187 static devclass_t pci_devclass;
188 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
189 MODULE_VERSION(pci, 1);
190 
191 static char	*pci_vendordata;
192 static size_t	pci_vendordata_size;
193 
194 struct pci_quirk {
195 	uint32_t devid;	/* Vendor/device of the card */
196 	int	type;
197 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
198 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
199 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
200 #define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
201 	int	arg1;
202 	int	arg2;
203 };
204 
205 static const struct pci_quirk pci_quirks[] = {
206 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
207 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
208 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
209 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
210 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
211 
212 	/*
213 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
214 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
215 	 */
216 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
218 
219 	/*
220 	 * MSI doesn't work on earlier Intel chipsets including
221 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
222 	 */
223 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
224 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
225 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
226 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
227 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
228 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
229 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
230 
231 	/*
232 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
233 	 * bridge.
234 	 */
235 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
236 
237 	/*
238 	 * MSI-X allocation doesn't work properly for devices passed through
239 	 * by VMware up to at least ESXi 5.1.
240 	 */
241 	{ 0x079015ad, PCI_QUIRK_DISABLE_MSI,	0,	0 }, /* PCI/PCI-X */
242 	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSI,	0,	0 }, /* PCIe */
243 
244 	/*
245 	 * Some virtualization environments emulate an older chipset
246 	 * but support MSI just fine.  QEMU uses the Intel 82440.
247 	 */
248 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
249 
250 	/*
251 	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
252 	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
253 	 * It prevents us from attaching hpet(4) when the bit is unset.
254 	 * Note this quirk only affects SB600 revision A13 and earlier.
255 	 * For SB600 A21 and later, firmware must set the bit to hide it.
256 	 * For SB700 and later, it is unused and hardcoded to zero.
257 	 */
258 	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
259 
260 	{ 0 }
261 };
262 
263 /* map register information */
264 #define	PCI_MAPMEM	0x01	/* memory map */
265 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
266 #define	PCI_MAPPORT	0x04	/* port map */
267 
268 struct devlist pci_devq;
269 uint32_t pci_generation;
270 uint32_t pci_numdevs = 0;
271 static int pcie_chipset, pcix_chipset;
272 
273 /* sysctl vars */
274 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
275 
276 static int pci_enable_io_modes = 1;
277 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
278 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
279     &pci_enable_io_modes, 1,
280     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
281 enable these bits correctly.  We'd like to do this all the time, but there\n\
282 are some peripherals that this causes problems with.");
283 
284 static int pci_do_realloc_bars = 0;
285 TUNABLE_INT("hw.pci.realloc_bars", &pci_do_realloc_bars);
286 SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RW,
287     &pci_do_realloc_bars, 0,
288     "Attempt to allocate a new range for any BARs whose original firmware-assigned ranges fail to allocate during the initial device scan.");
289 
290 static int pci_do_power_nodriver = 0;
291 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
292 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
293     &pci_do_power_nodriver, 0,
294   "Place a function into D3 state when no driver attaches to it.  0 means\n\
295 disable.  1 means conservatively place devices into D3 state.  2 means\n\
296 agressively place devices into D3 state.  3 means put absolutely everything\n\
297 in D3 state.");
298 
299 int pci_do_power_resume = 1;
300 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
301 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
302     &pci_do_power_resume, 1,
303   "Transition from D3 -> D0 on resume.");
304 
305 int pci_do_power_suspend = 1;
306 TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
307 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
308     &pci_do_power_suspend, 1,
309   "Transition from D0 -> D3 on suspend.");
310 
311 static int pci_do_msi = 1;
312 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
313 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
314     "Enable support for MSI interrupts");
315 
316 static int pci_do_msix = 1;
317 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
318 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
319     "Enable support for MSI-X interrupts");
320 
321 static int pci_honor_msi_blacklist = 1;
322 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
323 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
324     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
325 
326 #if defined(__i386__) || defined(__amd64__)
327 static int pci_usb_takeover = 1;
328 #else
329 static int pci_usb_takeover = 0;
330 #endif
331 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
332 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
333     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
334 Disable this if you depend on BIOS emulation of USB devices, that is\n\
335 you use USB devices (like keyboard or mouse) but do not load USB drivers");
336 
337 /* Find a device_t by bus/slot/function in domain 0 */
338 
339 device_t
340 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
341 {
342 
343 	return (pci_find_dbsf(0, bus, slot, func));
344 }
345 
346 /* Find a device_t by domain/bus/slot/function */
347 
348 device_t
349 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
350 {
351 	struct pci_devinfo *dinfo;
352 
353 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
354 		if ((dinfo->cfg.domain == domain) &&
355 		    (dinfo->cfg.bus == bus) &&
356 		    (dinfo->cfg.slot == slot) &&
357 		    (dinfo->cfg.func == func)) {
358 			return (dinfo->cfg.dev);
359 		}
360 	}
361 
362 	return (NULL);
363 }
364 
365 /* Find a device_t by vendor/device ID */
366 
367 device_t
368 pci_find_device(uint16_t vendor, uint16_t device)
369 {
370 	struct pci_devinfo *dinfo;
371 
372 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
373 		if ((dinfo->cfg.vendor == vendor) &&
374 		    (dinfo->cfg.device == device)) {
375 			return (dinfo->cfg.dev);
376 		}
377 	}
378 
379 	return (NULL);
380 }
381 
382 device_t
383 pci_find_class(uint8_t class, uint8_t subclass)
384 {
385 	struct pci_devinfo *dinfo;
386 
387 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
388 		if (dinfo->cfg.baseclass == class &&
389 		    dinfo->cfg.subclass == subclass) {
390 			return (dinfo->cfg.dev);
391 		}
392 	}
393 
394 	return (NULL);
395 }
396 
397 static int
398 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
399 {
400 	va_list ap;
401 	int retval;
402 
403 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
404 	    cfg->func);
405 	va_start(ap, fmt);
406 	retval += vprintf(fmt, ap);
407 	va_end(ap);
408 	return (retval);
409 }
410 
411 /* return base address of memory or port map */
412 
413 static pci_addr_t
414 pci_mapbase(uint64_t mapreg)
415 {
416 
417 	if (PCI_BAR_MEM(mapreg))
418 		return (mapreg & PCIM_BAR_MEM_BASE);
419 	else
420 		return (mapreg & PCIM_BAR_IO_BASE);
421 }
422 
423 /* return map type of memory or port map */
424 
425 static const char *
426 pci_maptype(uint64_t mapreg)
427 {
428 
429 	if (PCI_BAR_IO(mapreg))
430 		return ("I/O Port");
431 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
432 		return ("Prefetchable Memory");
433 	return ("Memory");
434 }
435 
436 /* return log2 of map size decoded for memory or port map */
437 
438 static int
439 pci_mapsize(uint64_t testval)
440 {
441 	int ln2size;
442 
443 	testval = pci_mapbase(testval);
444 	ln2size = 0;
445 	if (testval != 0) {
446 		while ((testval & 1) == 0)
447 		{
448 			ln2size++;
449 			testval >>= 1;
450 		}
451 	}
452 	return (ln2size);
453 }
454 
455 /* return base address of device ROM */
456 
457 static pci_addr_t
458 pci_rombase(uint64_t mapreg)
459 {
460 
461 	return (mapreg & PCIM_BIOS_ADDR_MASK);
462 }
463 
464 /* return log2 of map size decided for device ROM */
465 
466 static int
467 pci_romsize(uint64_t testval)
468 {
469 	int ln2size;
470 
471 	testval = pci_rombase(testval);
472 	ln2size = 0;
473 	if (testval != 0) {
474 		while ((testval & 1) == 0)
475 		{
476 			ln2size++;
477 			testval >>= 1;
478 		}
479 	}
480 	return (ln2size);
481 }
482 
483 /* return log2 of address range supported by map register */
484 
485 static int
486 pci_maprange(uint64_t mapreg)
487 {
488 	int ln2range = 0;
489 
490 	if (PCI_BAR_IO(mapreg))
491 		ln2range = 32;
492 	else
493 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
494 		case PCIM_BAR_MEM_32:
495 			ln2range = 32;
496 			break;
497 		case PCIM_BAR_MEM_1MB:
498 			ln2range = 20;
499 			break;
500 		case PCIM_BAR_MEM_64:
501 			ln2range = 64;
502 			break;
503 		}
504 	return (ln2range);
505 }
506 
507 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
508 
509 static void
510 pci_fixancient(pcicfgregs *cfg)
511 {
512 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
513 		return;
514 
515 	/* PCI to PCI bridges use header type 1 */
516 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
517 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
518 }
519 
520 /* extract header type specific config data */
521 
522 static void
523 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
524 {
525 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
526 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
527 	case PCIM_HDRTYPE_NORMAL:
528 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
529 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
530 		cfg->nummaps	    = PCI_MAXMAPS_0;
531 		break;
532 	case PCIM_HDRTYPE_BRIDGE:
533 		cfg->nummaps	    = PCI_MAXMAPS_1;
534 		break;
535 	case PCIM_HDRTYPE_CARDBUS:
536 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
537 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
538 		cfg->nummaps	    = PCI_MAXMAPS_2;
539 		break;
540 	}
541 #undef REG
542 }
543 
544 /* read configuration header into pcicfgregs structure */
545 struct pci_devinfo *
546 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
547 {
548 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
549 	pcicfgregs *cfg = NULL;
550 	struct pci_devinfo *devlist_entry;
551 	struct devlist *devlist_head;
552 
553 	devlist_head = &pci_devq;
554 
555 	devlist_entry = NULL;
556 
557 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
558 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
559 		if (devlist_entry == NULL)
560 			return (NULL);
561 
562 		cfg = &devlist_entry->cfg;
563 
564 		cfg->domain		= d;
565 		cfg->bus		= b;
566 		cfg->slot		= s;
567 		cfg->func		= f;
568 		cfg->vendor		= REG(PCIR_VENDOR, 2);
569 		cfg->device		= REG(PCIR_DEVICE, 2);
570 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
571 		cfg->statreg		= REG(PCIR_STATUS, 2);
572 		cfg->baseclass		= REG(PCIR_CLASS, 1);
573 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
574 		cfg->progif		= REG(PCIR_PROGIF, 1);
575 		cfg->revid		= REG(PCIR_REVID, 1);
576 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
577 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
578 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
579 		cfg->intpin		= REG(PCIR_INTPIN, 1);
580 		cfg->intline		= REG(PCIR_INTLINE, 1);
581 
582 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
583 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
584 
585 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
586 		cfg->hdrtype		&= ~PCIM_MFDEV;
587 		STAILQ_INIT(&cfg->maps);
588 
589 		pci_fixancient(cfg);
590 		pci_hdrtypedata(pcib, b, s, f, cfg);
591 
592 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
593 			pci_read_cap(pcib, cfg);
594 
595 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
596 
597 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
598 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
599 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
600 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
601 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
602 
603 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
604 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
605 		devlist_entry->conf.pc_vendor = cfg->vendor;
606 		devlist_entry->conf.pc_device = cfg->device;
607 
608 		devlist_entry->conf.pc_class = cfg->baseclass;
609 		devlist_entry->conf.pc_subclass = cfg->subclass;
610 		devlist_entry->conf.pc_progif = cfg->progif;
611 		devlist_entry->conf.pc_revid = cfg->revid;
612 
613 		pci_numdevs++;
614 		pci_generation++;
615 	}
616 	return (devlist_entry);
617 #undef REG
618 }
619 
620 static void
621 pci_read_cap(device_t pcib, pcicfgregs *cfg)
622 {
623 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
624 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
625 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
626 	uint64_t addr;
627 #endif
628 	uint32_t val;
629 	int	ptr, nextptr, ptrptr;
630 
631 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
632 	case PCIM_HDRTYPE_NORMAL:
633 	case PCIM_HDRTYPE_BRIDGE:
634 		ptrptr = PCIR_CAP_PTR;
635 		break;
636 	case PCIM_HDRTYPE_CARDBUS:
637 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
638 		break;
639 	default:
640 		return;		/* no extended capabilities support */
641 	}
642 	nextptr = REG(ptrptr, 1);	/* sanity check? */
643 
644 	/*
645 	 * Read capability entries.
646 	 */
647 	while (nextptr != 0) {
648 		/* Sanity check */
649 		if (nextptr > 255) {
650 			printf("illegal PCI extended capability offset %d\n",
651 			    nextptr);
652 			return;
653 		}
654 		/* Find the next entry */
655 		ptr = nextptr;
656 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
657 
658 		/* Process this entry */
659 		switch (REG(ptr + PCICAP_ID, 1)) {
660 		case PCIY_PMG:		/* PCI power management */
661 			if (cfg->pp.pp_cap == 0) {
662 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
663 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
664 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
665 				if ((nextptr - ptr) > PCIR_POWER_DATA)
666 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
667 			}
668 			break;
669 		case PCIY_HT:		/* HyperTransport */
670 			/* Determine HT-specific capability type. */
671 			val = REG(ptr + PCIR_HT_COMMAND, 2);
672 
673 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
674 				cfg->ht.ht_slave = ptr;
675 
676 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
677 			switch (val & PCIM_HTCMD_CAP_MASK) {
678 			case PCIM_HTCAP_MSI_MAPPING:
679 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
680 					/* Sanity check the mapping window. */
681 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
682 					    4);
683 					addr <<= 32;
684 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
685 					    4);
686 					if (addr != MSI_INTEL_ADDR_BASE)
687 						device_printf(pcib,
688 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
689 						    cfg->domain, cfg->bus,
690 						    cfg->slot, cfg->func,
691 						    (long long)addr);
692 				} else
693 					addr = MSI_INTEL_ADDR_BASE;
694 
695 				cfg->ht.ht_msimap = ptr;
696 				cfg->ht.ht_msictrl = val;
697 				cfg->ht.ht_msiaddr = addr;
698 				break;
699 			}
700 #endif
701 			break;
702 		case PCIY_MSI:		/* PCI MSI */
703 			cfg->msi.msi_location = ptr;
704 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
705 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
706 						     PCIM_MSICTRL_MMC_MASK)>>1);
707 			break;
708 		case PCIY_MSIX:		/* PCI MSI-X */
709 			cfg->msix.msix_location = ptr;
710 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
711 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
712 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
713 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
714 			cfg->msix.msix_table_bar = PCIR_BAR(val &
715 			    PCIM_MSIX_BIR_MASK);
716 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
717 			val = REG(ptr + PCIR_MSIX_PBA, 4);
718 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
719 			    PCIM_MSIX_BIR_MASK);
720 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
721 			break;
722 		case PCIY_VPD:		/* PCI Vital Product Data */
723 			cfg->vpd.vpd_reg = ptr;
724 			break;
725 		case PCIY_SUBVENDOR:
726 			/* Should always be true. */
727 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
728 			    PCIM_HDRTYPE_BRIDGE) {
729 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
730 				cfg->subvendor = val & 0xffff;
731 				cfg->subdevice = val >> 16;
732 			}
733 			break;
734 		case PCIY_PCIX:		/* PCI-X */
735 			/*
736 			 * Assume we have a PCI-X chipset if we have
737 			 * at least one PCI-PCI bridge with a PCI-X
738 			 * capability.  Note that some systems with
739 			 * PCI-express or HT chipsets might match on
740 			 * this check as well.
741 			 */
742 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
743 			    PCIM_HDRTYPE_BRIDGE)
744 				pcix_chipset = 1;
745 			cfg->pcix.pcix_location = ptr;
746 			break;
747 		case PCIY_EXPRESS:	/* PCI-express */
748 			/*
749 			 * Assume we have a PCI-express chipset if we have
750 			 * at least one PCI-express device.
751 			 */
752 			pcie_chipset = 1;
753 			cfg->pcie.pcie_location = ptr;
754 			val = REG(ptr + PCIER_FLAGS, 2);
755 			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
756 			break;
757 		default:
758 			break;
759 		}
760 	}
761 
762 #if defined(__powerpc__)
763 	/*
764 	 * Enable the MSI mapping window for all HyperTransport
765 	 * slaves.  PCI-PCI bridges have their windows enabled via
766 	 * PCIB_MAP_MSI().
767 	 */
768 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
769 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
770 		device_printf(pcib,
771 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
772 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
773 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
774 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
775 		     2);
776 	}
777 #endif
778 /* REG and WREG use carry through to next functions */
779 }
780 
781 /*
782  * PCI Vital Product Data
783  */
784 
785 #define	PCI_VPD_TIMEOUT		1000000
786 
787 static int
788 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
789 {
790 	int count = PCI_VPD_TIMEOUT;
791 
792 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
793 
794 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
795 
796 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
797 		if (--count < 0)
798 			return (ENXIO);
799 		DELAY(1);	/* limit looping */
800 	}
801 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
802 
803 	return (0);
804 }
805 
806 #if 0
807 static int
808 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
809 {
810 	int count = PCI_VPD_TIMEOUT;
811 
812 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
813 
814 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
815 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
816 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
817 		if (--count < 0)
818 			return (ENXIO);
819 		DELAY(1);	/* limit looping */
820 	}
821 
822 	return (0);
823 }
824 #endif
825 
826 #undef PCI_VPD_TIMEOUT
827 
828 struct vpd_readstate {
829 	device_t	pcib;
830 	pcicfgregs	*cfg;
831 	uint32_t	val;
832 	int		bytesinval;
833 	int		off;
834 	uint8_t		cksum;
835 };
836 
837 static int
838 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
839 {
840 	uint32_t reg;
841 	uint8_t byte;
842 
843 	if (vrs->bytesinval == 0) {
844 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
845 			return (ENXIO);
846 		vrs->val = le32toh(reg);
847 		vrs->off += 4;
848 		byte = vrs->val & 0xff;
849 		vrs->bytesinval = 3;
850 	} else {
851 		vrs->val = vrs->val >> 8;
852 		byte = vrs->val & 0xff;
853 		vrs->bytesinval--;
854 	}
855 
856 	vrs->cksum += byte;
857 	*data = byte;
858 	return (0);
859 }
860 
861 static void
862 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
863 {
864 	struct vpd_readstate vrs;
865 	int state;
866 	int name;
867 	int remain;
868 	int i;
869 	int alloc, off;		/* alloc/off for RO/W arrays */
870 	int cksumvalid;
871 	int dflen;
872 	uint8_t byte;
873 	uint8_t byte2;
874 
875 	/* init vpd reader */
876 	vrs.bytesinval = 0;
877 	vrs.off = 0;
878 	vrs.pcib = pcib;
879 	vrs.cfg = cfg;
880 	vrs.cksum = 0;
881 
882 	state = 0;
883 	name = remain = i = 0;	/* shut up stupid gcc */
884 	alloc = off = 0;	/* shut up stupid gcc */
885 	dflen = 0;		/* shut up stupid gcc */
886 	cksumvalid = -1;
887 	while (state >= 0) {
888 		if (vpd_nextbyte(&vrs, &byte)) {
889 			state = -2;
890 			break;
891 		}
892 #if 0
893 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
894 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
895 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
896 #endif
897 		switch (state) {
898 		case 0:		/* item name */
899 			if (byte & 0x80) {
900 				if (vpd_nextbyte(&vrs, &byte2)) {
901 					state = -2;
902 					break;
903 				}
904 				remain = byte2;
905 				if (vpd_nextbyte(&vrs, &byte2)) {
906 					state = -2;
907 					break;
908 				}
909 				remain |= byte2 << 8;
910 				if (remain > (0x7f*4 - vrs.off)) {
911 					state = -1;
912 					pci_printf(cfg,
913 					    "invalid VPD data, remain %#x\n",
914 					    remain);
915 				}
916 				name = byte & 0x7f;
917 			} else {
918 				remain = byte & 0x7;
919 				name = (byte >> 3) & 0xf;
920 			}
921 			switch (name) {
922 			case 0x2:	/* String */
923 				cfg->vpd.vpd_ident = malloc(remain + 1,
924 				    M_DEVBUF, M_WAITOK);
925 				i = 0;
926 				state = 1;
927 				break;
928 			case 0xf:	/* End */
929 				state = -1;
930 				break;
931 			case 0x10:	/* VPD-R */
932 				alloc = 8;
933 				off = 0;
934 				cfg->vpd.vpd_ros = malloc(alloc *
935 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
936 				    M_WAITOK | M_ZERO);
937 				state = 2;
938 				break;
939 			case 0x11:	/* VPD-W */
940 				alloc = 8;
941 				off = 0;
942 				cfg->vpd.vpd_w = malloc(alloc *
943 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
944 				    M_WAITOK | M_ZERO);
945 				state = 5;
946 				break;
947 			default:	/* Invalid data, abort */
948 				state = -1;
949 				break;
950 			}
951 			break;
952 
953 		case 1:	/* Identifier String */
954 			cfg->vpd.vpd_ident[i++] = byte;
955 			remain--;
956 			if (remain == 0)  {
957 				cfg->vpd.vpd_ident[i] = '\0';
958 				state = 0;
959 			}
960 			break;
961 
962 		case 2:	/* VPD-R Keyword Header */
963 			if (off == alloc) {
964 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
965 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
966 				    M_DEVBUF, M_WAITOK | M_ZERO);
967 			}
968 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
969 			if (vpd_nextbyte(&vrs, &byte2)) {
970 				state = -2;
971 				break;
972 			}
973 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
974 			if (vpd_nextbyte(&vrs, &byte2)) {
975 				state = -2;
976 				break;
977 			}
978 			dflen = byte2;
979 			if (dflen == 0 &&
980 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
981 			    2) == 0) {
982 				/*
983 				 * if this happens, we can't trust the rest
984 				 * of the VPD.
985 				 */
986 				pci_printf(cfg, "bad keyword length: %d\n",
987 				    dflen);
988 				cksumvalid = 0;
989 				state = -1;
990 				break;
991 			} else if (dflen == 0) {
992 				cfg->vpd.vpd_ros[off].value = malloc(1 *
993 				    sizeof(*cfg->vpd.vpd_ros[off].value),
994 				    M_DEVBUF, M_WAITOK);
995 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
996 			} else
997 				cfg->vpd.vpd_ros[off].value = malloc(
998 				    (dflen + 1) *
999 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1000 				    M_DEVBUF, M_WAITOK);
1001 			remain -= 3;
1002 			i = 0;
1003 			/* keep in sync w/ state 3's transistions */
1004 			if (dflen == 0 && remain == 0)
1005 				state = 0;
1006 			else if (dflen == 0)
1007 				state = 2;
1008 			else
1009 				state = 3;
1010 			break;
1011 
1012 		case 3:	/* VPD-R Keyword Value */
1013 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1014 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1015 			    "RV", 2) == 0 && cksumvalid == -1) {
1016 				if (vrs.cksum == 0)
1017 					cksumvalid = 1;
1018 				else {
1019 					if (bootverbose)
1020 						pci_printf(cfg,
1021 					    "bad VPD cksum, remain %hhu\n",
1022 						    vrs.cksum);
1023 					cksumvalid = 0;
1024 					state = -1;
1025 					break;
1026 				}
1027 			}
1028 			dflen--;
1029 			remain--;
1030 			/* keep in sync w/ state 2's transistions */
1031 			if (dflen == 0)
1032 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1033 			if (dflen == 0 && remain == 0) {
1034 				cfg->vpd.vpd_rocnt = off;
1035 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1036 				    off * sizeof(*cfg->vpd.vpd_ros),
1037 				    M_DEVBUF, M_WAITOK | M_ZERO);
1038 				state = 0;
1039 			} else if (dflen == 0)
1040 				state = 2;
1041 			break;
1042 
1043 		case 4:
1044 			remain--;
1045 			if (remain == 0)
1046 				state = 0;
1047 			break;
1048 
1049 		case 5:	/* VPD-W Keyword Header */
1050 			if (off == alloc) {
1051 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1052 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1053 				    M_DEVBUF, M_WAITOK | M_ZERO);
1054 			}
1055 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1056 			if (vpd_nextbyte(&vrs, &byte2)) {
1057 				state = -2;
1058 				break;
1059 			}
1060 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1061 			if (vpd_nextbyte(&vrs, &byte2)) {
1062 				state = -2;
1063 				break;
1064 			}
1065 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1066 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1067 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1068 			    sizeof(*cfg->vpd.vpd_w[off].value),
1069 			    M_DEVBUF, M_WAITOK);
1070 			remain -= 3;
1071 			i = 0;
1072 			/* keep in sync w/ state 6's transistions */
1073 			if (dflen == 0 && remain == 0)
1074 				state = 0;
1075 			else if (dflen == 0)
1076 				state = 5;
1077 			else
1078 				state = 6;
1079 			break;
1080 
1081 		case 6:	/* VPD-W Keyword Value */
1082 			cfg->vpd.vpd_w[off].value[i++] = byte;
1083 			dflen--;
1084 			remain--;
1085 			/* keep in sync w/ state 5's transistions */
1086 			if (dflen == 0)
1087 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1088 			if (dflen == 0 && remain == 0) {
1089 				cfg->vpd.vpd_wcnt = off;
1090 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1091 				    off * sizeof(*cfg->vpd.vpd_w),
1092 				    M_DEVBUF, M_WAITOK | M_ZERO);
1093 				state = 0;
1094 			} else if (dflen == 0)
1095 				state = 5;
1096 			break;
1097 
1098 		default:
1099 			pci_printf(cfg, "invalid state: %d\n", state);
1100 			state = -1;
1101 			break;
1102 		}
1103 	}
1104 
1105 	if (cksumvalid == 0 || state < -1) {
1106 		/* read-only data bad, clean up */
1107 		if (cfg->vpd.vpd_ros != NULL) {
1108 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1109 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1110 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1111 			cfg->vpd.vpd_ros = NULL;
1112 		}
1113 	}
1114 	if (state < -1) {
1115 		/* I/O error, clean up */
1116 		pci_printf(cfg, "failed to read VPD data.\n");
1117 		if (cfg->vpd.vpd_ident != NULL) {
1118 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1119 			cfg->vpd.vpd_ident = NULL;
1120 		}
1121 		if (cfg->vpd.vpd_w != NULL) {
1122 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1123 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1124 			free(cfg->vpd.vpd_w, M_DEVBUF);
1125 			cfg->vpd.vpd_w = NULL;
1126 		}
1127 	}
1128 	cfg->vpd.vpd_cached = 1;
1129 #undef REG
1130 #undef WREG
1131 }
1132 
1133 int
1134 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1135 {
1136 	struct pci_devinfo *dinfo = device_get_ivars(child);
1137 	pcicfgregs *cfg = &dinfo->cfg;
1138 
1139 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1140 		pci_read_vpd(device_get_parent(dev), cfg);
1141 
1142 	*identptr = cfg->vpd.vpd_ident;
1143 
1144 	if (*identptr == NULL)
1145 		return (ENXIO);
1146 
1147 	return (0);
1148 }
1149 
1150 int
1151 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1152 	const char **vptr)
1153 {
1154 	struct pci_devinfo *dinfo = device_get_ivars(child);
1155 	pcicfgregs *cfg = &dinfo->cfg;
1156 	int i;
1157 
1158 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1159 		pci_read_vpd(device_get_parent(dev), cfg);
1160 
1161 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1162 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1163 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1164 			*vptr = cfg->vpd.vpd_ros[i].value;
1165 			return (0);
1166 		}
1167 
1168 	*vptr = NULL;
1169 	return (ENXIO);
1170 }
1171 
1172 /*
1173  * Find the requested HyperTransport capability and return the offset
1174  * in configuration space via the pointer provided.  The function
1175  * returns 0 on success and an error code otherwise.
1176  */
1177 int
1178 pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1179 {
1180 	int ptr, error;
1181 	uint16_t val;
1182 
1183 	error = pci_find_cap(child, PCIY_HT, &ptr);
1184 	if (error)
1185 		return (error);
1186 
1187 	/*
1188 	 * Traverse the capabilities list checking each HT capability
1189 	 * to see if it matches the requested HT capability.
1190 	 */
1191 	while (ptr != 0) {
1192 		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1193 		if (capability == PCIM_HTCAP_SLAVE ||
1194 		    capability == PCIM_HTCAP_HOST)
1195 			val &= 0xe000;
1196 		else
1197 			val &= PCIM_HTCMD_CAP_MASK;
1198 		if (val == capability) {
1199 			if (capreg != NULL)
1200 				*capreg = ptr;
1201 			return (0);
1202 		}
1203 
1204 		/* Skip to the next HT capability. */
1205 		while (ptr != 0) {
1206 			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1207 			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1208 			    PCIY_HT)
1209 				break;
1210 		}
1211 	}
1212 	return (ENOENT);
1213 }
1214 
1215 /*
1216  * Find the requested capability and return the offset in
1217  * configuration space via the pointer provided.  The function returns
1218  * 0 on success and an error code otherwise.
1219  */
1220 int
1221 pci_find_cap_method(device_t dev, device_t child, int capability,
1222     int *capreg)
1223 {
1224 	struct pci_devinfo *dinfo = device_get_ivars(child);
1225 	pcicfgregs *cfg = &dinfo->cfg;
1226 	u_int32_t status;
1227 	u_int8_t ptr;
1228 
1229 	/*
1230 	 * Check the CAP_LIST bit of the PCI status register first.
1231 	 */
1232 	status = pci_read_config(child, PCIR_STATUS, 2);
1233 	if (!(status & PCIM_STATUS_CAPPRESENT))
1234 		return (ENXIO);
1235 
1236 	/*
1237 	 * Determine the start pointer of the capabilities list.
1238 	 */
1239 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1240 	case PCIM_HDRTYPE_NORMAL:
1241 	case PCIM_HDRTYPE_BRIDGE:
1242 		ptr = PCIR_CAP_PTR;
1243 		break;
1244 	case PCIM_HDRTYPE_CARDBUS:
1245 		ptr = PCIR_CAP_PTR_2;
1246 		break;
1247 	default:
1248 		/* XXX: panic? */
1249 		return (ENXIO);		/* no extended capabilities support */
1250 	}
1251 	ptr = pci_read_config(child, ptr, 1);
1252 
1253 	/*
1254 	 * Traverse the capabilities list.
1255 	 */
1256 	while (ptr != 0) {
1257 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1258 			if (capreg != NULL)
1259 				*capreg = ptr;
1260 			return (0);
1261 		}
1262 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1263 	}
1264 
1265 	return (ENOENT);
1266 }
1267 
1268 /*
1269  * Find the requested extended capability and return the offset in
1270  * configuration space via the pointer provided.  The function returns
1271  * 0 on success and an error code otherwise.
1272  */
1273 int
1274 pci_find_extcap_method(device_t dev, device_t child, int capability,
1275     int *capreg)
1276 {
1277 	struct pci_devinfo *dinfo = device_get_ivars(child);
1278 	pcicfgregs *cfg = &dinfo->cfg;
1279 	uint32_t ecap;
1280 	uint16_t ptr;
1281 
1282 	/* Only supported for PCI-express devices. */
1283 	if (cfg->pcie.pcie_location == 0)
1284 		return (ENXIO);
1285 
1286 	ptr = PCIR_EXTCAP;
1287 	ecap = pci_read_config(child, ptr, 4);
1288 	if (ecap == 0xffffffff || ecap == 0)
1289 		return (ENOENT);
1290 	for (;;) {
1291 		if (PCI_EXTCAP_ID(ecap) == capability) {
1292 			if (capreg != NULL)
1293 				*capreg = ptr;
1294 			return (0);
1295 		}
1296 		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1297 		if (ptr == 0)
1298 			break;
1299 		ecap = pci_read_config(child, ptr, 4);
1300 	}
1301 
1302 	return (ENOENT);
1303 }
1304 
1305 /*
1306  * Support for MSI-X message interrupts.
1307  */
1308 void
1309 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1310 {
1311 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1312 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1313 	uint32_t offset;
1314 
1315 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1316 	offset = msix->msix_table_offset + index * 16;
1317 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1318 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1319 	bus_write_4(msix->msix_table_res, offset + 8, data);
1320 
1321 	/* Enable MSI -> HT mapping. */
1322 	pci_ht_map_msi(dev, address);
1323 }
1324 
1325 void
1326 pci_mask_msix(device_t dev, u_int index)
1327 {
1328 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1329 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1330 	uint32_t offset, val;
1331 
1332 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1333 	offset = msix->msix_table_offset + index * 16 + 12;
1334 	val = bus_read_4(msix->msix_table_res, offset);
1335 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1336 		val |= PCIM_MSIX_VCTRL_MASK;
1337 		bus_write_4(msix->msix_table_res, offset, val);
1338 	}
1339 }
1340 
1341 void
1342 pci_unmask_msix(device_t dev, u_int index)
1343 {
1344 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1345 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1346 	uint32_t offset, val;
1347 
1348 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1349 	offset = msix->msix_table_offset + index * 16 + 12;
1350 	val = bus_read_4(msix->msix_table_res, offset);
1351 	if (val & PCIM_MSIX_VCTRL_MASK) {
1352 		val &= ~PCIM_MSIX_VCTRL_MASK;
1353 		bus_write_4(msix->msix_table_res, offset, val);
1354 	}
1355 }
1356 
1357 int
1358 pci_pending_msix(device_t dev, u_int index)
1359 {
1360 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1361 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1362 	uint32_t offset, bit;
1363 
1364 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1365 	offset = msix->msix_pba_offset + (index / 32) * 4;
1366 	bit = 1 << index % 32;
1367 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1368 }
1369 
1370 /*
1371  * Restore MSI-X registers and table during resume.  If MSI-X is
1372  * enabled then walk the virtual table to restore the actual MSI-X
1373  * table.
1374  */
1375 static void
1376 pci_resume_msix(device_t dev)
1377 {
1378 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1379 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1380 	struct msix_table_entry *mte;
1381 	struct msix_vector *mv;
1382 	int i;
1383 
1384 	if (msix->msix_alloc > 0) {
1385 		/* First, mask all vectors. */
1386 		for (i = 0; i < msix->msix_msgnum; i++)
1387 			pci_mask_msix(dev, i);
1388 
1389 		/* Second, program any messages with at least one handler. */
1390 		for (i = 0; i < msix->msix_table_len; i++) {
1391 			mte = &msix->msix_table[i];
1392 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1393 				continue;
1394 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1395 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1396 			pci_unmask_msix(dev, i);
1397 		}
1398 	}
1399 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1400 	    msix->msix_ctrl, 2);
1401 }
1402 
1403 /*
1404  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1405  * returned in *count.  After this function returns, each message will be
1406  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1407  */
1408 int
1409 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1410 {
1411 	struct pci_devinfo *dinfo = device_get_ivars(child);
1412 	pcicfgregs *cfg = &dinfo->cfg;
1413 	struct resource_list_entry *rle;
1414 	int actual, error, i, irq, max;
1415 
1416 	/* Don't let count == 0 get us into trouble. */
1417 	if (*count == 0)
1418 		return (EINVAL);
1419 
1420 	/* If rid 0 is allocated, then fail. */
1421 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1422 	if (rle != NULL && rle->res != NULL)
1423 		return (ENXIO);
1424 
1425 	/* Already have allocated messages? */
1426 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1427 		return (ENXIO);
1428 
1429 	/* If MSI is blacklisted for this system, fail. */
1430 	if (pci_msi_blacklisted())
1431 		return (ENXIO);
1432 
1433 	/* MSI-X capability present? */
1434 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1435 		return (ENODEV);
1436 
1437 	/* Make sure the appropriate BARs are mapped. */
1438 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1439 	    cfg->msix.msix_table_bar);
1440 	if (rle == NULL || rle->res == NULL ||
1441 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1442 		return (ENXIO);
1443 	cfg->msix.msix_table_res = rle->res;
1444 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1445 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1446 		    cfg->msix.msix_pba_bar);
1447 		if (rle == NULL || rle->res == NULL ||
1448 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1449 			return (ENXIO);
1450 	}
1451 	cfg->msix.msix_pba_res = rle->res;
1452 
1453 	if (bootverbose)
1454 		device_printf(child,
1455 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1456 		    *count, cfg->msix.msix_msgnum);
1457 	max = min(*count, cfg->msix.msix_msgnum);
1458 	for (i = 0; i < max; i++) {
1459 		/* Allocate a message. */
1460 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1461 		if (error) {
1462 			if (i == 0)
1463 				return (error);
1464 			break;
1465 		}
1466 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1467 		    irq, 1);
1468 	}
1469 	actual = i;
1470 
1471 	if (bootverbose) {
1472 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1473 		if (actual == 1)
1474 			device_printf(child, "using IRQ %lu for MSI-X\n",
1475 			    rle->start);
1476 		else {
1477 			int run;
1478 
1479 			/*
1480 			 * Be fancy and try to print contiguous runs of
1481 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1482 			 * 'run' is true if we are in a range.
1483 			 */
1484 			device_printf(child, "using IRQs %lu", rle->start);
1485 			irq = rle->start;
1486 			run = 0;
1487 			for (i = 1; i < actual; i++) {
1488 				rle = resource_list_find(&dinfo->resources,
1489 				    SYS_RES_IRQ, i + 1);
1490 
1491 				/* Still in a run? */
1492 				if (rle->start == irq + 1) {
1493 					run = 1;
1494 					irq++;
1495 					continue;
1496 				}
1497 
1498 				/* Finish previous range. */
1499 				if (run) {
1500 					printf("-%d", irq);
1501 					run = 0;
1502 				}
1503 
1504 				/* Start new range. */
1505 				printf(",%lu", rle->start);
1506 				irq = rle->start;
1507 			}
1508 
1509 			/* Unfinished range? */
1510 			if (run)
1511 				printf("-%d", irq);
1512 			printf(" for MSI-X\n");
1513 		}
1514 	}
1515 
1516 	/* Mask all vectors. */
1517 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1518 		pci_mask_msix(child, i);
1519 
1520 	/* Allocate and initialize vector data and virtual table. */
1521 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1522 	    M_DEVBUF, M_WAITOK | M_ZERO);
1523 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1524 	    M_DEVBUF, M_WAITOK | M_ZERO);
1525 	for (i = 0; i < actual; i++) {
1526 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1527 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1528 		cfg->msix.msix_table[i].mte_vector = i + 1;
1529 	}
1530 
1531 	/* Update control register to enable MSI-X. */
1532 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1533 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1534 	    cfg->msix.msix_ctrl, 2);
1535 
1536 	/* Update counts of alloc'd messages. */
1537 	cfg->msix.msix_alloc = actual;
1538 	cfg->msix.msix_table_len = actual;
1539 	*count = actual;
1540 	return (0);
1541 }
1542 
1543 /*
1544  * By default, pci_alloc_msix() will assign the allocated IRQ
1545  * resources consecutively to the first N messages in the MSI-X table.
1546  * However, device drivers may want to use different layouts if they
1547  * either receive fewer messages than they asked for, or they wish to
1548  * populate the MSI-X table sparsely.  This method allows the driver
1549  * to specify what layout it wants.  It must be called after a
1550  * successful pci_alloc_msix() but before any of the associated
1551  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1552  *
1553  * The 'vectors' array contains 'count' message vectors.  The array
1554  * maps directly to the MSI-X table in that index 0 in the array
1555  * specifies the vector for the first message in the MSI-X table, etc.
1556  * The vector value in each array index can either be 0 to indicate
1557  * that no vector should be assigned to a message slot, or it can be a
1558  * number from 1 to N (where N is the count returned from a
1559  * succcessful call to pci_alloc_msix()) to indicate which message
1560  * vector (IRQ) to be used for the corresponding message.
1561  *
1562  * On successful return, each message with a non-zero vector will have
1563  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1564  * 1.  Additionally, if any of the IRQs allocated via the previous
1565  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1566  * will be freed back to the system automatically.
1567  *
1568  * For example, suppose a driver has a MSI-X table with 6 messages and
1569  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1570  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1571  * C.  After the call to pci_alloc_msix(), the device will be setup to
1572  * have an MSI-X table of ABC--- (where - means no vector assigned).
1573  * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1574  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1575  * be freed back to the system.  This device will also have valid
1576  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1577  *
1578  * In any case, the SYS_RES_IRQ rid X will always map to the message
1579  * at MSI-X table index X - 1 and will only be valid if a vector is
1580  * assigned to that table entry.
1581  */
1582 int
1583 pci_remap_msix_method(device_t dev, device_t child, int count,
1584     const u_int *vectors)
1585 {
1586 	struct pci_devinfo *dinfo = device_get_ivars(child);
1587 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1588 	struct resource_list_entry *rle;
1589 	int i, irq, j, *used;
1590 
1591 	/*
1592 	 * Have to have at least one message in the table but the
1593 	 * table can't be bigger than the actual MSI-X table in the
1594 	 * device.
1595 	 */
1596 	if (count == 0 || count > msix->msix_msgnum)
1597 		return (EINVAL);
1598 
1599 	/* Sanity check the vectors. */
1600 	for (i = 0; i < count; i++)
1601 		if (vectors[i] > msix->msix_alloc)
1602 			return (EINVAL);
1603 
1604 	/*
1605 	 * Make sure there aren't any holes in the vectors to be used.
1606 	 * It's a big pain to support it, and it doesn't really make
1607 	 * sense anyway.  Also, at least one vector must be used.
1608 	 */
1609 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1610 	    M_ZERO);
1611 	for (i = 0; i < count; i++)
1612 		if (vectors[i] != 0)
1613 			used[vectors[i] - 1] = 1;
1614 	for (i = 0; i < msix->msix_alloc - 1; i++)
1615 		if (used[i] == 0 && used[i + 1] == 1) {
1616 			free(used, M_DEVBUF);
1617 			return (EINVAL);
1618 		}
1619 	if (used[0] != 1) {
1620 		free(used, M_DEVBUF);
1621 		return (EINVAL);
1622 	}
1623 
1624 	/* Make sure none of the resources are allocated. */
1625 	for (i = 0; i < msix->msix_table_len; i++) {
1626 		if (msix->msix_table[i].mte_vector == 0)
1627 			continue;
1628 		if (msix->msix_table[i].mte_handlers > 0)
1629 			return (EBUSY);
1630 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1631 		KASSERT(rle != NULL, ("missing resource"));
1632 		if (rle->res != NULL)
1633 			return (EBUSY);
1634 	}
1635 
1636 	/* Free the existing resource list entries. */
1637 	for (i = 0; i < msix->msix_table_len; i++) {
1638 		if (msix->msix_table[i].mte_vector == 0)
1639 			continue;
1640 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1641 	}
1642 
1643 	/*
1644 	 * Build the new virtual table keeping track of which vectors are
1645 	 * used.
1646 	 */
1647 	free(msix->msix_table, M_DEVBUF);
1648 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1649 	    M_DEVBUF, M_WAITOK | M_ZERO);
1650 	for (i = 0; i < count; i++)
1651 		msix->msix_table[i].mte_vector = vectors[i];
1652 	msix->msix_table_len = count;
1653 
1654 	/* Free any unused IRQs and resize the vectors array if necessary. */
1655 	j = msix->msix_alloc - 1;
1656 	if (used[j] == 0) {
1657 		struct msix_vector *vec;
1658 
1659 		while (used[j] == 0) {
1660 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1661 			    msix->msix_vectors[j].mv_irq);
1662 			j--;
1663 		}
1664 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1665 		    M_WAITOK);
1666 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1667 		    (j + 1));
1668 		free(msix->msix_vectors, M_DEVBUF);
1669 		msix->msix_vectors = vec;
1670 		msix->msix_alloc = j + 1;
1671 	}
1672 	free(used, M_DEVBUF);
1673 
1674 	/* Map the IRQs onto the rids. */
1675 	for (i = 0; i < count; i++) {
1676 		if (vectors[i] == 0)
1677 			continue;
1678 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1679 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1680 		    irq, 1);
1681 	}
1682 
1683 	if (bootverbose) {
1684 		device_printf(child, "Remapped MSI-X IRQs as: ");
1685 		for (i = 0; i < count; i++) {
1686 			if (i != 0)
1687 				printf(", ");
1688 			if (vectors[i] == 0)
1689 				printf("---");
1690 			else
1691 				printf("%d",
1692 				    msix->msix_vectors[vectors[i]].mv_irq);
1693 		}
1694 		printf("\n");
1695 	}
1696 
1697 	return (0);
1698 }
1699 
1700 static int
1701 pci_release_msix(device_t dev, device_t child)
1702 {
1703 	struct pci_devinfo *dinfo = device_get_ivars(child);
1704 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1705 	struct resource_list_entry *rle;
1706 	int i;
1707 
1708 	/* Do we have any messages to release? */
1709 	if (msix->msix_alloc == 0)
1710 		return (ENODEV);
1711 
1712 	/* Make sure none of the resources are allocated. */
1713 	for (i = 0; i < msix->msix_table_len; i++) {
1714 		if (msix->msix_table[i].mte_vector == 0)
1715 			continue;
1716 		if (msix->msix_table[i].mte_handlers > 0)
1717 			return (EBUSY);
1718 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1719 		KASSERT(rle != NULL, ("missing resource"));
1720 		if (rle->res != NULL)
1721 			return (EBUSY);
1722 	}
1723 
1724 	/* Update control register to disable MSI-X. */
1725 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1726 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1727 	    msix->msix_ctrl, 2);
1728 
1729 	/* Free the resource list entries. */
1730 	for (i = 0; i < msix->msix_table_len; i++) {
1731 		if (msix->msix_table[i].mte_vector == 0)
1732 			continue;
1733 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1734 	}
1735 	free(msix->msix_table, M_DEVBUF);
1736 	msix->msix_table_len = 0;
1737 
1738 	/* Release the IRQs. */
1739 	for (i = 0; i < msix->msix_alloc; i++)
1740 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1741 		    msix->msix_vectors[i].mv_irq);
1742 	free(msix->msix_vectors, M_DEVBUF);
1743 	msix->msix_alloc = 0;
1744 	return (0);
1745 }
1746 
1747 /*
1748  * Return the max supported MSI-X messages this device supports.
1749  * Basically, assuming the MD code can alloc messages, this function
1750  * should return the maximum value that pci_alloc_msix() can return.
1751  * Thus, it is subject to the tunables, etc.
1752  */
1753 int
1754 pci_msix_count_method(device_t dev, device_t child)
1755 {
1756 	struct pci_devinfo *dinfo = device_get_ivars(child);
1757 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1758 
1759 	if (pci_do_msix && msix->msix_location != 0)
1760 		return (msix->msix_msgnum);
1761 	return (0);
1762 }
1763 
1764 /*
1765  * HyperTransport MSI mapping control
1766  */
1767 void
1768 pci_ht_map_msi(device_t dev, uint64_t addr)
1769 {
1770 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1771 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1772 
1773 	if (!ht->ht_msimap)
1774 		return;
1775 
1776 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1777 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1778 		/* Enable MSI -> HT mapping. */
1779 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1780 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1781 		    ht->ht_msictrl, 2);
1782 	}
1783 
1784 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1785 		/* Disable MSI -> HT mapping. */
1786 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1787 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1788 		    ht->ht_msictrl, 2);
1789 	}
1790 }
1791 
1792 int
1793 pci_get_max_read_req(device_t dev)
1794 {
1795 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1796 	int cap;
1797 	uint16_t val;
1798 
1799 	cap = dinfo->cfg.pcie.pcie_location;
1800 	if (cap == 0)
1801 		return (0);
1802 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1803 	val &= PCIEM_CTL_MAX_READ_REQUEST;
1804 	val >>= 12;
1805 	return (1 << (val + 7));
1806 }
1807 
1808 int
1809 pci_set_max_read_req(device_t dev, int size)
1810 {
1811 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1812 	int cap;
1813 	uint16_t val;
1814 
1815 	cap = dinfo->cfg.pcie.pcie_location;
1816 	if (cap == 0)
1817 		return (0);
1818 	if (size < 128)
1819 		size = 128;
1820 	if (size > 4096)
1821 		size = 4096;
1822 	size = (1 << (fls(size) - 1));
1823 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1824 	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
1825 	val |= (fls(size) - 8) << 12;
1826 	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
1827 	return (size);
1828 }
1829 
1830 /*
1831  * Support for MSI message signalled interrupts.
1832  */
1833 void
1834 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1835 {
1836 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1837 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1838 
1839 	/* Write data and address values. */
1840 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1841 	    address & 0xffffffff, 4);
1842 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1843 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1844 		    address >> 32, 4);
1845 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1846 		    data, 2);
1847 	} else
1848 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1849 		    2);
1850 
1851 	/* Enable MSI in the control register. */
1852 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1853 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1854 	    2);
1855 
1856 	/* Enable MSI -> HT mapping. */
1857 	pci_ht_map_msi(dev, address);
1858 }
1859 
1860 void
1861 pci_disable_msi(device_t dev)
1862 {
1863 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1864 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1865 
1866 	/* Disable MSI -> HT mapping. */
1867 	pci_ht_map_msi(dev, 0);
1868 
1869 	/* Disable MSI in the control register. */
1870 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1871 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1872 	    2);
1873 }
1874 
1875 /*
1876  * Restore MSI registers during resume.  If MSI is enabled then
1877  * restore the data and address registers in addition to the control
1878  * register.
1879  */
1880 static void
1881 pci_resume_msi(device_t dev)
1882 {
1883 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1884 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1885 	uint64_t address;
1886 	uint16_t data;
1887 
1888 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1889 		address = msi->msi_addr;
1890 		data = msi->msi_data;
1891 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1892 		    address & 0xffffffff, 4);
1893 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1894 			pci_write_config(dev, msi->msi_location +
1895 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1896 			pci_write_config(dev, msi->msi_location +
1897 			    PCIR_MSI_DATA_64BIT, data, 2);
1898 		} else
1899 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1900 			    data, 2);
1901 	}
1902 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1903 	    2);
1904 }
1905 
1906 static int
1907 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1908 {
1909 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1910 	pcicfgregs *cfg = &dinfo->cfg;
1911 	struct resource_list_entry *rle;
1912 	struct msix_table_entry *mte;
1913 	struct msix_vector *mv;
1914 	uint64_t addr;
1915 	uint32_t data;
1916 	int error, i, j;
1917 
1918 	/*
1919 	 * Handle MSI first.  We try to find this IRQ among our list
1920 	 * of MSI IRQs.  If we find it, we request updated address and
1921 	 * data registers and apply the results.
1922 	 */
1923 	if (cfg->msi.msi_alloc > 0) {
1924 
1925 		/* If we don't have any active handlers, nothing to do. */
1926 		if (cfg->msi.msi_handlers == 0)
1927 			return (0);
1928 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1929 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1930 			    i + 1);
1931 			if (rle->start == irq) {
1932 				error = PCIB_MAP_MSI(device_get_parent(bus),
1933 				    dev, irq, &addr, &data);
1934 				if (error)
1935 					return (error);
1936 				pci_disable_msi(dev);
1937 				dinfo->cfg.msi.msi_addr = addr;
1938 				dinfo->cfg.msi.msi_data = data;
1939 				pci_enable_msi(dev, addr, data);
1940 				return (0);
1941 			}
1942 		}
1943 		return (ENOENT);
1944 	}
1945 
1946 	/*
1947 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1948 	 * we request the updated mapping info.  If that works, we go
1949 	 * through all the slots that use this IRQ and update them.
1950 	 */
1951 	if (cfg->msix.msix_alloc > 0) {
1952 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1953 			mv = &cfg->msix.msix_vectors[i];
1954 			if (mv->mv_irq == irq) {
1955 				error = PCIB_MAP_MSI(device_get_parent(bus),
1956 				    dev, irq, &addr, &data);
1957 				if (error)
1958 					return (error);
1959 				mv->mv_address = addr;
1960 				mv->mv_data = data;
1961 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1962 					mte = &cfg->msix.msix_table[j];
1963 					if (mte->mte_vector != i + 1)
1964 						continue;
1965 					if (mte->mte_handlers == 0)
1966 						continue;
1967 					pci_mask_msix(dev, j);
1968 					pci_enable_msix(dev, j, addr, data);
1969 					pci_unmask_msix(dev, j);
1970 				}
1971 			}
1972 		}
1973 		return (ENOENT);
1974 	}
1975 
1976 	return (ENOENT);
1977 }
1978 
1979 /*
1980  * Returns true if the specified device is blacklisted because MSI
1981  * doesn't work.
1982  */
1983 int
1984 pci_msi_device_blacklisted(device_t dev)
1985 {
1986 	const struct pci_quirk *q;
1987 
1988 	if (!pci_honor_msi_blacklist)
1989 		return (0);
1990 
1991 	for (q = &pci_quirks[0]; q->devid; q++) {
1992 		if (q->devid == pci_get_devid(dev) &&
1993 		    q->type == PCI_QUIRK_DISABLE_MSI)
1994 			return (1);
1995 	}
1996 	return (0);
1997 }
1998 
1999 /*
2000  * Returns true if a specified chipset supports MSI when it is
2001  * emulated hardware in a virtual machine.
2002  */
2003 static int
2004 pci_msi_vm_chipset(device_t dev)
2005 {
2006 	const struct pci_quirk *q;
2007 
2008 	for (q = &pci_quirks[0]; q->devid; q++) {
2009 		if (q->devid == pci_get_devid(dev) &&
2010 		    q->type == PCI_QUIRK_ENABLE_MSI_VM)
2011 			return (1);
2012 	}
2013 	return (0);
2014 }
2015 
2016 /*
2017  * Determine if MSI is blacklisted globally on this sytem.  Currently,
2018  * we just check for blacklisted chipsets as represented by the
2019  * host-PCI bridge at device 0:0:0.  In the future, it may become
2020  * necessary to check other system attributes, such as the kenv values
2021  * that give the motherboard manufacturer and model number.
2022  */
2023 static int
2024 pci_msi_blacklisted(void)
2025 {
2026 	device_t dev;
2027 
2028 	if (!pci_honor_msi_blacklist)
2029 		return (0);
2030 
2031 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2032 	if (!(pcie_chipset || pcix_chipset)) {
2033 		if (vm_guest != VM_GUEST_NO) {
2034 			dev = pci_find_bsf(0, 0, 0);
2035 			if (dev != NULL)
2036 				return (pci_msi_vm_chipset(dev) == 0);
2037 		}
2038 		return (1);
2039 	}
2040 
2041 	dev = pci_find_bsf(0, 0, 0);
2042 	if (dev != NULL)
2043 		return (pci_msi_device_blacklisted(dev));
2044 	return (0);
2045 }
2046 
2047 /*
2048  * Attempt to allocate *count MSI messages.  The actual number allocated is
2049  * returned in *count.  After this function returns, each message will be
2050  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2051  */
2052 int
2053 pci_alloc_msi_method(device_t dev, device_t child, int *count)
2054 {
2055 	struct pci_devinfo *dinfo = device_get_ivars(child);
2056 	pcicfgregs *cfg = &dinfo->cfg;
2057 	struct resource_list_entry *rle;
2058 	int actual, error, i, irqs[32];
2059 	uint16_t ctrl;
2060 
2061 	/* Don't let count == 0 get us into trouble. */
2062 	if (*count == 0)
2063 		return (EINVAL);
2064 
2065 	/* If rid 0 is allocated, then fail. */
2066 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2067 	if (rle != NULL && rle->res != NULL)
2068 		return (ENXIO);
2069 
2070 	/* Already have allocated messages? */
2071 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2072 		return (ENXIO);
2073 
2074 	/* If MSI is blacklisted for this system, fail. */
2075 	if (pci_msi_blacklisted())
2076 		return (ENXIO);
2077 
2078 	/* MSI capability present? */
2079 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2080 		return (ENODEV);
2081 
2082 	if (bootverbose)
2083 		device_printf(child,
2084 		    "attempting to allocate %d MSI vectors (%d supported)\n",
2085 		    *count, cfg->msi.msi_msgnum);
2086 
2087 	/* Don't ask for more than the device supports. */
2088 	actual = min(*count, cfg->msi.msi_msgnum);
2089 
2090 	/* Don't ask for more than 32 messages. */
2091 	actual = min(actual, 32);
2092 
2093 	/* MSI requires power of 2 number of messages. */
2094 	if (!powerof2(actual))
2095 		return (EINVAL);
2096 
2097 	for (;;) {
2098 		/* Try to allocate N messages. */
2099 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2100 		    actual, irqs);
2101 		if (error == 0)
2102 			break;
2103 		if (actual == 1)
2104 			return (error);
2105 
2106 		/* Try N / 2. */
2107 		actual >>= 1;
2108 	}
2109 
2110 	/*
2111 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2112 	 * resources in the irqs[] array, so add new resources
2113 	 * starting at rid 1.
2114 	 */
2115 	for (i = 0; i < actual; i++)
2116 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2117 		    irqs[i], irqs[i], 1);
2118 
2119 	if (bootverbose) {
2120 		if (actual == 1)
2121 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2122 		else {
2123 			int run;
2124 
2125 			/*
2126 			 * Be fancy and try to print contiguous runs
2127 			 * of IRQ values as ranges.  'run' is true if
2128 			 * we are in a range.
2129 			 */
2130 			device_printf(child, "using IRQs %d", irqs[0]);
2131 			run = 0;
2132 			for (i = 1; i < actual; i++) {
2133 
2134 				/* Still in a run? */
2135 				if (irqs[i] == irqs[i - 1] + 1) {
2136 					run = 1;
2137 					continue;
2138 				}
2139 
2140 				/* Finish previous range. */
2141 				if (run) {
2142 					printf("-%d", irqs[i - 1]);
2143 					run = 0;
2144 				}
2145 
2146 				/* Start new range. */
2147 				printf(",%d", irqs[i]);
2148 			}
2149 
2150 			/* Unfinished range? */
2151 			if (run)
2152 				printf("-%d", irqs[actual - 1]);
2153 			printf(" for MSI\n");
2154 		}
2155 	}
2156 
2157 	/* Update control register with actual count. */
2158 	ctrl = cfg->msi.msi_ctrl;
2159 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2160 	ctrl |= (ffs(actual) - 1) << 4;
2161 	cfg->msi.msi_ctrl = ctrl;
2162 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2163 
2164 	/* Update counts of alloc'd messages. */
2165 	cfg->msi.msi_alloc = actual;
2166 	cfg->msi.msi_handlers = 0;
2167 	*count = actual;
2168 	return (0);
2169 }
2170 
2171 /* Release the MSI messages associated with this device. */
2172 int
2173 pci_release_msi_method(device_t dev, device_t child)
2174 {
2175 	struct pci_devinfo *dinfo = device_get_ivars(child);
2176 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2177 	struct resource_list_entry *rle;
2178 	int error, i, irqs[32];
2179 
2180 	/* Try MSI-X first. */
2181 	error = pci_release_msix(dev, child);
2182 	if (error != ENODEV)
2183 		return (error);
2184 
2185 	/* Do we have any messages to release? */
2186 	if (msi->msi_alloc == 0)
2187 		return (ENODEV);
2188 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2189 
2190 	/* Make sure none of the resources are allocated. */
2191 	if (msi->msi_handlers > 0)
2192 		return (EBUSY);
2193 	for (i = 0; i < msi->msi_alloc; i++) {
2194 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2195 		KASSERT(rle != NULL, ("missing MSI resource"));
2196 		if (rle->res != NULL)
2197 			return (EBUSY);
2198 		irqs[i] = rle->start;
2199 	}
2200 
2201 	/* Update control register with 0 count. */
2202 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2203 	    ("%s: MSI still enabled", __func__));
2204 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2205 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2206 	    msi->msi_ctrl, 2);
2207 
2208 	/* Release the messages. */
2209 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2210 	for (i = 0; i < msi->msi_alloc; i++)
2211 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2212 
2213 	/* Update alloc count. */
2214 	msi->msi_alloc = 0;
2215 	msi->msi_addr = 0;
2216 	msi->msi_data = 0;
2217 	return (0);
2218 }
2219 
2220 /*
2221  * Return the max supported MSI messages this device supports.
2222  * Basically, assuming the MD code can alloc messages, this function
2223  * should return the maximum value that pci_alloc_msi() can return.
2224  * Thus, it is subject to the tunables, etc.
2225  */
2226 int
2227 pci_msi_count_method(device_t dev, device_t child)
2228 {
2229 	struct pci_devinfo *dinfo = device_get_ivars(child);
2230 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2231 
2232 	if (pci_do_msi && msi->msi_location != 0)
2233 		return (msi->msi_msgnum);
2234 	return (0);
2235 }
2236 
2237 /* free pcicfgregs structure and all depending data structures */
2238 
2239 int
2240 pci_freecfg(struct pci_devinfo *dinfo)
2241 {
2242 	struct devlist *devlist_head;
2243 	struct pci_map *pm, *next;
2244 	int i;
2245 
2246 	devlist_head = &pci_devq;
2247 
2248 	if (dinfo->cfg.vpd.vpd_reg) {
2249 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2250 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2251 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2252 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2253 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2254 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2255 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2256 	}
2257 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2258 		free(pm, M_DEVBUF);
2259 	}
2260 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2261 	free(dinfo, M_DEVBUF);
2262 
2263 	/* increment the generation count */
2264 	pci_generation++;
2265 
2266 	/* we're losing one device */
2267 	pci_numdevs--;
2268 	return (0);
2269 }
2270 
2271 /*
2272  * PCI power manangement
2273  */
2274 int
2275 pci_set_powerstate_method(device_t dev, device_t child, int state)
2276 {
2277 	struct pci_devinfo *dinfo = device_get_ivars(child);
2278 	pcicfgregs *cfg = &dinfo->cfg;
2279 	uint16_t status;
2280 	int result, oldstate, highest, delay;
2281 
2282 	if (cfg->pp.pp_cap == 0)
2283 		return (EOPNOTSUPP);
2284 
2285 	/*
2286 	 * Optimize a no state change request away.  While it would be OK to
2287 	 * write to the hardware in theory, some devices have shown odd
2288 	 * behavior when going from D3 -> D3.
2289 	 */
2290 	oldstate = pci_get_powerstate(child);
2291 	if (oldstate == state)
2292 		return (0);
2293 
2294 	/*
2295 	 * The PCI power management specification states that after a state
2296 	 * transition between PCI power states, system software must
2297 	 * guarantee a minimal delay before the function accesses the device.
2298 	 * Compute the worst case delay that we need to guarantee before we
2299 	 * access the device.  Many devices will be responsive much more
2300 	 * quickly than this delay, but there are some that don't respond
2301 	 * instantly to state changes.  Transitions to/from D3 state require
2302 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2303 	 * is done below with DELAY rather than a sleeper function because
2304 	 * this function can be called from contexts where we cannot sleep.
2305 	 */
2306 	highest = (oldstate > state) ? oldstate : state;
2307 	if (highest == PCI_POWERSTATE_D3)
2308 	    delay = 10000;
2309 	else if (highest == PCI_POWERSTATE_D2)
2310 	    delay = 200;
2311 	else
2312 	    delay = 0;
2313 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2314 	    & ~PCIM_PSTAT_DMASK;
2315 	result = 0;
2316 	switch (state) {
2317 	case PCI_POWERSTATE_D0:
2318 		status |= PCIM_PSTAT_D0;
2319 		break;
2320 	case PCI_POWERSTATE_D1:
2321 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2322 			return (EOPNOTSUPP);
2323 		status |= PCIM_PSTAT_D1;
2324 		break;
2325 	case PCI_POWERSTATE_D2:
2326 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2327 			return (EOPNOTSUPP);
2328 		status |= PCIM_PSTAT_D2;
2329 		break;
2330 	case PCI_POWERSTATE_D3:
2331 		status |= PCIM_PSTAT_D3;
2332 		break;
2333 	default:
2334 		return (EINVAL);
2335 	}
2336 
2337 	if (bootverbose)
2338 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2339 		    state);
2340 
2341 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2342 	if (delay)
2343 		DELAY(delay);
2344 	return (0);
2345 }
2346 
2347 int
2348 pci_get_powerstate_method(device_t dev, device_t child)
2349 {
2350 	struct pci_devinfo *dinfo = device_get_ivars(child);
2351 	pcicfgregs *cfg = &dinfo->cfg;
2352 	uint16_t status;
2353 	int result;
2354 
2355 	if (cfg->pp.pp_cap != 0) {
2356 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2357 		switch (status & PCIM_PSTAT_DMASK) {
2358 		case PCIM_PSTAT_D0:
2359 			result = PCI_POWERSTATE_D0;
2360 			break;
2361 		case PCIM_PSTAT_D1:
2362 			result = PCI_POWERSTATE_D1;
2363 			break;
2364 		case PCIM_PSTAT_D2:
2365 			result = PCI_POWERSTATE_D2;
2366 			break;
2367 		case PCIM_PSTAT_D3:
2368 			result = PCI_POWERSTATE_D3;
2369 			break;
2370 		default:
2371 			result = PCI_POWERSTATE_UNKNOWN;
2372 			break;
2373 		}
2374 	} else {
2375 		/* No support, device is always at D0 */
2376 		result = PCI_POWERSTATE_D0;
2377 	}
2378 	return (result);
2379 }
2380 
2381 /*
2382  * Some convenience functions for PCI device drivers.
2383  */
2384 
2385 static __inline void
2386 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2387 {
2388 	uint16_t	command;
2389 
2390 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2391 	command |= bit;
2392 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2393 }
2394 
2395 static __inline void
2396 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2397 {
2398 	uint16_t	command;
2399 
2400 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2401 	command &= ~bit;
2402 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2403 }
2404 
2405 int
2406 pci_enable_busmaster_method(device_t dev, device_t child)
2407 {
2408 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2409 	return (0);
2410 }
2411 
2412 int
2413 pci_disable_busmaster_method(device_t dev, device_t child)
2414 {
2415 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2416 	return (0);
2417 }
2418 
2419 int
2420 pci_enable_io_method(device_t dev, device_t child, int space)
2421 {
2422 	uint16_t bit;
2423 
2424 	switch(space) {
2425 	case SYS_RES_IOPORT:
2426 		bit = PCIM_CMD_PORTEN;
2427 		break;
2428 	case SYS_RES_MEMORY:
2429 		bit = PCIM_CMD_MEMEN;
2430 		break;
2431 	default:
2432 		return (EINVAL);
2433 	}
2434 	pci_set_command_bit(dev, child, bit);
2435 	return (0);
2436 }
2437 
2438 int
2439 pci_disable_io_method(device_t dev, device_t child, int space)
2440 {
2441 	uint16_t bit;
2442 
2443 	switch(space) {
2444 	case SYS_RES_IOPORT:
2445 		bit = PCIM_CMD_PORTEN;
2446 		break;
2447 	case SYS_RES_MEMORY:
2448 		bit = PCIM_CMD_MEMEN;
2449 		break;
2450 	default:
2451 		return (EINVAL);
2452 	}
2453 	pci_clear_command_bit(dev, child, bit);
2454 	return (0);
2455 }
2456 
2457 /*
2458  * New style pci driver.  Parent device is either a pci-host-bridge or a
2459  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2460  */
2461 
2462 void
2463 pci_print_verbose(struct pci_devinfo *dinfo)
2464 {
2465 
2466 	if (bootverbose) {
2467 		pcicfgregs *cfg = &dinfo->cfg;
2468 
2469 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2470 		    cfg->vendor, cfg->device, cfg->revid);
2471 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2472 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2473 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2474 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2475 		    cfg->mfdev);
2476 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2477 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2478 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2479 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2480 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2481 		if (cfg->intpin > 0)
2482 			printf("\tintpin=%c, irq=%d\n",
2483 			    cfg->intpin +'a' -1, cfg->intline);
2484 		if (cfg->pp.pp_cap) {
2485 			uint16_t status;
2486 
2487 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2488 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2489 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2490 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2491 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2492 			    status & PCIM_PSTAT_DMASK);
2493 		}
2494 		if (cfg->msi.msi_location) {
2495 			int ctrl;
2496 
2497 			ctrl = cfg->msi.msi_ctrl;
2498 			printf("\tMSI supports %d message%s%s%s\n",
2499 			    cfg->msi.msi_msgnum,
2500 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2501 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2502 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2503 		}
2504 		if (cfg->msix.msix_location) {
2505 			printf("\tMSI-X supports %d message%s ",
2506 			    cfg->msix.msix_msgnum,
2507 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2508 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2509 				printf("in map 0x%x\n",
2510 				    cfg->msix.msix_table_bar);
2511 			else
2512 				printf("in maps 0x%x and 0x%x\n",
2513 				    cfg->msix.msix_table_bar,
2514 				    cfg->msix.msix_pba_bar);
2515 		}
2516 	}
2517 }
2518 
2519 static int
2520 pci_porten(device_t dev)
2521 {
2522 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2523 }
2524 
2525 static int
2526 pci_memen(device_t dev)
2527 {
2528 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2529 }
2530 
2531 static void
2532 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2533 {
2534 	struct pci_devinfo *dinfo;
2535 	pci_addr_t map, testval;
2536 	int ln2range;
2537 	uint16_t cmd;
2538 
2539 	/*
2540 	 * The device ROM BAR is special.  It is always a 32-bit
2541 	 * memory BAR.  Bit 0 is special and should not be set when
2542 	 * sizing the BAR.
2543 	 */
2544 	dinfo = device_get_ivars(dev);
2545 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2546 		map = pci_read_config(dev, reg, 4);
2547 		pci_write_config(dev, reg, 0xfffffffe, 4);
2548 		testval = pci_read_config(dev, reg, 4);
2549 		pci_write_config(dev, reg, map, 4);
2550 		*mapp = map;
2551 		*testvalp = testval;
2552 		return;
2553 	}
2554 
2555 	map = pci_read_config(dev, reg, 4);
2556 	ln2range = pci_maprange(map);
2557 	if (ln2range == 64)
2558 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2559 
2560 	/*
2561 	 * Disable decoding via the command register before
2562 	 * determining the BAR's length since we will be placing it in
2563 	 * a weird state.
2564 	 */
2565 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2566 	pci_write_config(dev, PCIR_COMMAND,
2567 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2568 
2569 	/*
2570 	 * Determine the BAR's length by writing all 1's.  The bottom
2571 	 * log_2(size) bits of the BAR will stick as 0 when we read
2572 	 * the value back.
2573 	 */
2574 	pci_write_config(dev, reg, 0xffffffff, 4);
2575 	testval = pci_read_config(dev, reg, 4);
2576 	if (ln2range == 64) {
2577 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2578 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2579 	}
2580 
2581 	/*
2582 	 * Restore the original value of the BAR.  We may have reprogrammed
2583 	 * the BAR of the low-level console device and when booting verbose,
2584 	 * we need the console device addressable.
2585 	 */
2586 	pci_write_config(dev, reg, map, 4);
2587 	if (ln2range == 64)
2588 		pci_write_config(dev, reg + 4, map >> 32, 4);
2589 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2590 
2591 	*mapp = map;
2592 	*testvalp = testval;
2593 }
2594 
2595 static void
2596 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2597 {
2598 	struct pci_devinfo *dinfo;
2599 	int ln2range;
2600 
2601 	/* The device ROM BAR is always a 32-bit memory BAR. */
2602 	dinfo = device_get_ivars(dev);
2603 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2604 		ln2range = 32;
2605 	else
2606 		ln2range = pci_maprange(pm->pm_value);
2607 	pci_write_config(dev, pm->pm_reg, base, 4);
2608 	if (ln2range == 64)
2609 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2610 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2611 	if (ln2range == 64)
2612 		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2613 		    pm->pm_reg + 4, 4) << 32;
2614 }
2615 
2616 struct pci_map *
2617 pci_find_bar(device_t dev, int reg)
2618 {
2619 	struct pci_devinfo *dinfo;
2620 	struct pci_map *pm;
2621 
2622 	dinfo = device_get_ivars(dev);
2623 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2624 		if (pm->pm_reg == reg)
2625 			return (pm);
2626 	}
2627 	return (NULL);
2628 }
2629 
2630 int
2631 pci_bar_enabled(device_t dev, struct pci_map *pm)
2632 {
2633 	struct pci_devinfo *dinfo;
2634 	uint16_t cmd;
2635 
2636 	dinfo = device_get_ivars(dev);
2637 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2638 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2639 		return (0);
2640 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2641 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2642 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2643 	else
2644 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2645 }
2646 
2647 static struct pci_map *
2648 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2649 {
2650 	struct pci_devinfo *dinfo;
2651 	struct pci_map *pm, *prev;
2652 
2653 	dinfo = device_get_ivars(dev);
2654 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2655 	pm->pm_reg = reg;
2656 	pm->pm_value = value;
2657 	pm->pm_size = size;
2658 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2659 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2660 		    reg));
2661 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2662 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2663 			break;
2664 	}
2665 	if (prev != NULL)
2666 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2667 	else
2668 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2669 	return (pm);
2670 }
2671 
2672 static void
2673 pci_restore_bars(device_t dev)
2674 {
2675 	struct pci_devinfo *dinfo;
2676 	struct pci_map *pm;
2677 	int ln2range;
2678 
2679 	dinfo = device_get_ivars(dev);
2680 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2681 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2682 			ln2range = 32;
2683 		else
2684 			ln2range = pci_maprange(pm->pm_value);
2685 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2686 		if (ln2range == 64)
2687 			pci_write_config(dev, pm->pm_reg + 4,
2688 			    pm->pm_value >> 32, 4);
2689 	}
2690 }
2691 
2692 /*
2693  * Add a resource based on a pci map register. Return 1 if the map
2694  * register is a 32bit map register or 2 if it is a 64bit register.
2695  */
2696 static int
2697 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2698     int force, int prefetch)
2699 {
2700 	struct pci_map *pm;
2701 	pci_addr_t base, map, testval;
2702 	pci_addr_t start, end, count;
2703 	int barlen, basezero, maprange, mapsize, type;
2704 	uint16_t cmd;
2705 	struct resource *res;
2706 
2707 	/*
2708 	 * The BAR may already exist if the device is a CardBus card
2709 	 * whose CIS is stored in this BAR.
2710 	 */
2711 	pm = pci_find_bar(dev, reg);
2712 	if (pm != NULL) {
2713 		maprange = pci_maprange(pm->pm_value);
2714 		barlen = maprange == 64 ? 2 : 1;
2715 		return (barlen);
2716 	}
2717 
2718 	pci_read_bar(dev, reg, &map, &testval);
2719 	if (PCI_BAR_MEM(map)) {
2720 		type = SYS_RES_MEMORY;
2721 		if (map & PCIM_BAR_MEM_PREFETCH)
2722 			prefetch = 1;
2723 	} else
2724 		type = SYS_RES_IOPORT;
2725 	mapsize = pci_mapsize(testval);
2726 	base = pci_mapbase(map);
2727 #ifdef __PCI_BAR_ZERO_VALID
2728 	basezero = 0;
2729 #else
2730 	basezero = base == 0;
2731 #endif
2732 	maprange = pci_maprange(map);
2733 	barlen = maprange == 64 ? 2 : 1;
2734 
2735 	/*
2736 	 * For I/O registers, if bottom bit is set, and the next bit up
2737 	 * isn't clear, we know we have a BAR that doesn't conform to the
2738 	 * spec, so ignore it.  Also, sanity check the size of the data
2739 	 * areas to the type of memory involved.  Memory must be at least
2740 	 * 16 bytes in size, while I/O ranges must be at least 4.
2741 	 */
2742 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2743 		return (barlen);
2744 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2745 	    (type == SYS_RES_IOPORT && mapsize < 2))
2746 		return (barlen);
2747 
2748 	/* Save a record of this BAR. */
2749 	pm = pci_add_bar(dev, reg, map, mapsize);
2750 	if (bootverbose) {
2751 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2752 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2753 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2754 			printf(", port disabled\n");
2755 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2756 			printf(", memory disabled\n");
2757 		else
2758 			printf(", enabled\n");
2759 	}
2760 
2761 	/*
2762 	 * If base is 0, then we have problems if this architecture does
2763 	 * not allow that.  It is best to ignore such entries for the
2764 	 * moment.  These will be allocated later if the driver specifically
2765 	 * requests them.  However, some removable busses look better when
2766 	 * all resources are allocated, so allow '0' to be overriden.
2767 	 *
2768 	 * Similarly treat maps whose values is the same as the test value
2769 	 * read back.  These maps have had all f's written to them by the
2770 	 * BIOS in an attempt to disable the resources.
2771 	 */
2772 	if (!force && (basezero || map == testval))
2773 		return (barlen);
2774 	if ((u_long)base != base) {
2775 		device_printf(bus,
2776 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2777 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2778 		    pci_get_function(dev), reg);
2779 		return (barlen);
2780 	}
2781 
2782 	/*
2783 	 * This code theoretically does the right thing, but has
2784 	 * undesirable side effects in some cases where peripherals
2785 	 * respond oddly to having these bits enabled.  Let the user
2786 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2787 	 * default).
2788 	 */
2789 	if (pci_enable_io_modes) {
2790 		/* Turn on resources that have been left off by a lazy BIOS */
2791 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2792 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2793 			cmd |= PCIM_CMD_PORTEN;
2794 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2795 		}
2796 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2797 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2798 			cmd |= PCIM_CMD_MEMEN;
2799 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2800 		}
2801 	} else {
2802 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2803 			return (barlen);
2804 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2805 			return (barlen);
2806 	}
2807 
2808 	count = (pci_addr_t)1 << mapsize;
2809 	if (basezero || base == pci_mapbase(testval)) {
2810 		start = 0;	/* Let the parent decide. */
2811 		end = ~0ul;
2812 	} else {
2813 		start = base;
2814 		end = base + count - 1;
2815 	}
2816 	resource_list_add(rl, type, reg, start, end, count);
2817 
2818 	/*
2819 	 * Try to allocate the resource for this BAR from our parent
2820 	 * so that this resource range is already reserved.  The
2821 	 * driver for this device will later inherit this resource in
2822 	 * pci_alloc_resource().
2823 	 */
2824 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2825 	    prefetch ? RF_PREFETCHABLE : 0);
2826 	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0ul)) {
2827 		/*
2828 		 * If the allocation fails, try to allocate a resource for
2829 		 * this BAR using any available range.  The firmware felt
2830 		 * it was important enough to assign a resource, so don't
2831 		 * disable decoding if we can help it.
2832 		 */
2833 		resource_list_delete(rl, type, reg);
2834 		resource_list_add(rl, type, reg, 0, ~0ul, count);
2835 		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0ul,
2836 		    count, prefetch ? RF_PREFETCHABLE : 0);
2837 	}
2838 	if (res == NULL) {
2839 		/*
2840 		 * If the allocation fails, delete the resource list entry
2841 		 * and disable decoding for this device.
2842 		 *
2843 		 * If the driver requests this resource in the future,
2844 		 * pci_reserve_map() will try to allocate a fresh
2845 		 * resource range.
2846 		 */
2847 		resource_list_delete(rl, type, reg);
2848 		pci_disable_io(dev, type);
2849 		if (bootverbose)
2850 			device_printf(bus,
2851 			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
2852 			    pci_get_domain(dev), pci_get_bus(dev),
2853 			    pci_get_slot(dev), pci_get_function(dev), reg);
2854 	} else {
2855 		start = rman_get_start(res);
2856 		pci_write_bar(dev, pm, start);
2857 	}
2858 	return (barlen);
2859 }
2860 
2861 /*
2862  * For ATA devices we need to decide early what addressing mode to use.
2863  * Legacy demands that the primary and secondary ATA ports sits on the
2864  * same addresses that old ISA hardware did. This dictates that we use
2865  * those addresses and ignore the BAR's if we cannot set PCI native
2866  * addressing mode.
2867  */
2868 static void
2869 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2870     uint32_t prefetchmask)
2871 {
2872 	struct resource *r;
2873 	int rid, type, progif;
2874 #if 0
2875 	/* if this device supports PCI native addressing use it */
2876 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2877 	if ((progif & 0x8a) == 0x8a) {
2878 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2879 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2880 			printf("Trying ATA native PCI addressing mode\n");
2881 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2882 		}
2883 	}
2884 #endif
2885 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2886 	type = SYS_RES_IOPORT;
2887 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2888 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2889 		    prefetchmask & (1 << 0));
2890 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2891 		    prefetchmask & (1 << 1));
2892 	} else {
2893 		rid = PCIR_BAR(0);
2894 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2895 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2896 		    0x1f7, 8, 0);
2897 		rid = PCIR_BAR(1);
2898 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2899 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2900 		    0x3f6, 1, 0);
2901 	}
2902 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2903 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2904 		    prefetchmask & (1 << 2));
2905 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2906 		    prefetchmask & (1 << 3));
2907 	} else {
2908 		rid = PCIR_BAR(2);
2909 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2910 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2911 		    0x177, 8, 0);
2912 		rid = PCIR_BAR(3);
2913 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2914 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2915 		    0x376, 1, 0);
2916 	}
2917 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2918 	    prefetchmask & (1 << 4));
2919 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2920 	    prefetchmask & (1 << 5));
2921 }
2922 
2923 static void
2924 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2925 {
2926 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2927 	pcicfgregs *cfg = &dinfo->cfg;
2928 	char tunable_name[64];
2929 	int irq;
2930 
2931 	/* Has to have an intpin to have an interrupt. */
2932 	if (cfg->intpin == 0)
2933 		return;
2934 
2935 	/* Let the user override the IRQ with a tunable. */
2936 	irq = PCI_INVALID_IRQ;
2937 	snprintf(tunable_name, sizeof(tunable_name),
2938 	    "hw.pci%d.%d.%d.INT%c.irq",
2939 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2940 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2941 		irq = PCI_INVALID_IRQ;
2942 
2943 	/*
2944 	 * If we didn't get an IRQ via the tunable, then we either use the
2945 	 * IRQ value in the intline register or we ask the bus to route an
2946 	 * interrupt for us.  If force_route is true, then we only use the
2947 	 * value in the intline register if the bus was unable to assign an
2948 	 * IRQ.
2949 	 */
2950 	if (!PCI_INTERRUPT_VALID(irq)) {
2951 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2952 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2953 		if (!PCI_INTERRUPT_VALID(irq))
2954 			irq = cfg->intline;
2955 	}
2956 
2957 	/* If after all that we don't have an IRQ, just bail. */
2958 	if (!PCI_INTERRUPT_VALID(irq))
2959 		return;
2960 
2961 	/* Update the config register if it changed. */
2962 	if (irq != cfg->intline) {
2963 		cfg->intline = irq;
2964 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2965 	}
2966 
2967 	/* Add this IRQ as rid 0 interrupt resource. */
2968 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2969 }
2970 
2971 /* Perform early OHCI takeover from SMM. */
2972 static void
2973 ohci_early_takeover(device_t self)
2974 {
2975 	struct resource *res;
2976 	uint32_t ctl;
2977 	int rid;
2978 	int i;
2979 
2980 	rid = PCIR_BAR(0);
2981 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2982 	if (res == NULL)
2983 		return;
2984 
2985 	ctl = bus_read_4(res, OHCI_CONTROL);
2986 	if (ctl & OHCI_IR) {
2987 		if (bootverbose)
2988 			printf("ohci early: "
2989 			    "SMM active, request owner change\n");
2990 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2991 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2992 			DELAY(1000);
2993 			ctl = bus_read_4(res, OHCI_CONTROL);
2994 		}
2995 		if (ctl & OHCI_IR) {
2996 			if (bootverbose)
2997 				printf("ohci early: "
2998 				    "SMM does not respond, resetting\n");
2999 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3000 		}
3001 		/* Disable interrupts */
3002 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3003 	}
3004 
3005 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3006 }
3007 
3008 /* Perform early UHCI takeover from SMM. */
3009 static void
3010 uhci_early_takeover(device_t self)
3011 {
3012 	struct resource *res;
3013 	int rid;
3014 
3015 	/*
3016 	 * Set the PIRQD enable bit and switch off all the others. We don't
3017 	 * want legacy support to interfere with us XXX Does this also mean
3018 	 * that the BIOS won't touch the keyboard anymore if it is connected
3019 	 * to the ports of the root hub?
3020 	 */
3021 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3022 
3023 	/* Disable interrupts */
3024 	rid = PCI_UHCI_BASE_REG;
3025 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3026 	if (res != NULL) {
3027 		bus_write_2(res, UHCI_INTR, 0);
3028 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3029 	}
3030 }
3031 
3032 /* Perform early EHCI takeover from SMM. */
3033 static void
3034 ehci_early_takeover(device_t self)
3035 {
3036 	struct resource *res;
3037 	uint32_t cparams;
3038 	uint32_t eec;
3039 	uint8_t eecp;
3040 	uint8_t bios_sem;
3041 	uint8_t offs;
3042 	int rid;
3043 	int i;
3044 
3045 	rid = PCIR_BAR(0);
3046 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3047 	if (res == NULL)
3048 		return;
3049 
3050 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3051 
3052 	/* Synchronise with the BIOS if it owns the controller. */
3053 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3054 	    eecp = EHCI_EECP_NEXT(eec)) {
3055 		eec = pci_read_config(self, eecp, 4);
3056 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3057 			continue;
3058 		}
3059 		bios_sem = pci_read_config(self, eecp +
3060 		    EHCI_LEGSUP_BIOS_SEM, 1);
3061 		if (bios_sem == 0) {
3062 			continue;
3063 		}
3064 		if (bootverbose)
3065 			printf("ehci early: "
3066 			    "SMM active, request owner change\n");
3067 
3068 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3069 
3070 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3071 			DELAY(1000);
3072 			bios_sem = pci_read_config(self, eecp +
3073 			    EHCI_LEGSUP_BIOS_SEM, 1);
3074 		}
3075 
3076 		if (bios_sem != 0) {
3077 			if (bootverbose)
3078 				printf("ehci early: "
3079 				    "SMM does not respond\n");
3080 		}
3081 		/* Disable interrupts */
3082 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3083 		bus_write_4(res, offs + EHCI_USBINTR, 0);
3084 	}
3085 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3086 }
3087 
3088 /* Perform early XHCI takeover from SMM. */
3089 static void
3090 xhci_early_takeover(device_t self)
3091 {
3092 	struct resource *res;
3093 	uint32_t cparams;
3094 	uint32_t eec;
3095 	uint8_t eecp;
3096 	uint8_t bios_sem;
3097 	uint8_t offs;
3098 	int rid;
3099 	int i;
3100 
3101 	rid = PCIR_BAR(0);
3102 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3103 	if (res == NULL)
3104 		return;
3105 
3106 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3107 
3108 	eec = -1;
3109 
3110 	/* Synchronise with the BIOS if it owns the controller. */
3111 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3112 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3113 		eec = bus_read_4(res, eecp);
3114 
3115 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3116 			continue;
3117 
3118 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3119 		if (bios_sem == 0)
3120 			continue;
3121 
3122 		if (bootverbose)
3123 			printf("xhci early: "
3124 			    "SMM active, request owner change\n");
3125 
3126 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3127 
3128 		/* wait a maximum of 5 second */
3129 
3130 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3131 			DELAY(1000);
3132 			bios_sem = bus_read_1(res, eecp +
3133 			    XHCI_XECP_BIOS_SEM);
3134 		}
3135 
3136 		if (bios_sem != 0) {
3137 			if (bootverbose)
3138 				printf("xhci early: "
3139 				    "SMM does not respond\n");
3140 		}
3141 
3142 		/* Disable interrupts */
3143 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3144 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3145 		bus_read_4(res, offs + XHCI_USBSTS);
3146 	}
3147 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3148 }
3149 
3150 void
3151 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3152 {
3153 	struct pci_devinfo *dinfo;
3154 	pcicfgregs *cfg;
3155 	struct resource_list *rl;
3156 	const struct pci_quirk *q;
3157 	uint32_t devid;
3158 	int i;
3159 
3160 	dinfo = device_get_ivars(dev);
3161 	cfg = &dinfo->cfg;
3162 	rl = &dinfo->resources;
3163 	devid = (cfg->device << 16) | cfg->vendor;
3164 
3165 	/* ATA devices needs special map treatment */
3166 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3167 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3168 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3169 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3170 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3171 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3172 	else
3173 		for (i = 0; i < cfg->nummaps;) {
3174 			/*
3175 			 * Skip quirked resources.
3176 			 */
3177 			for (q = &pci_quirks[0]; q->devid != 0; q++)
3178 				if (q->devid == devid &&
3179 				    q->type == PCI_QUIRK_UNMAP_REG &&
3180 				    q->arg1 == PCIR_BAR(i))
3181 					break;
3182 			if (q->devid != 0) {
3183 				i++;
3184 				continue;
3185 			}
3186 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3187 			    prefetchmask & (1 << i));
3188 		}
3189 
3190 	/*
3191 	 * Add additional, quirked resources.
3192 	 */
3193 	for (q = &pci_quirks[0]; q->devid != 0; q++)
3194 		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3195 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3196 
3197 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3198 #ifdef __PCI_REROUTE_INTERRUPT
3199 		/*
3200 		 * Try to re-route interrupts. Sometimes the BIOS or
3201 		 * firmware may leave bogus values in these registers.
3202 		 * If the re-route fails, then just stick with what we
3203 		 * have.
3204 		 */
3205 		pci_assign_interrupt(bus, dev, 1);
3206 #else
3207 		pci_assign_interrupt(bus, dev, 0);
3208 #endif
3209 	}
3210 
3211 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3212 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3213 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3214 			xhci_early_takeover(dev);
3215 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3216 			ehci_early_takeover(dev);
3217 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3218 			ohci_early_takeover(dev);
3219 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3220 			uhci_early_takeover(dev);
3221 	}
3222 }
3223 
3224 void
3225 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3226 {
3227 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3228 	device_t pcib = device_get_parent(dev);
3229 	struct pci_devinfo *dinfo;
3230 	int maxslots;
3231 	int s, f, pcifunchigh;
3232 	uint8_t hdrtype;
3233 
3234 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3235 	    ("dinfo_size too small"));
3236 	maxslots = PCIB_MAXSLOTS(pcib);
3237 	for (s = 0; s <= maxslots; s++) {
3238 		pcifunchigh = 0;
3239 		f = 0;
3240 		DELAY(1);
3241 		hdrtype = REG(PCIR_HDRTYPE, 1);
3242 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3243 			continue;
3244 		if (hdrtype & PCIM_MFDEV)
3245 			pcifunchigh = PCI_FUNCMAX;
3246 		for (f = 0; f <= pcifunchigh; f++) {
3247 			dinfo = pci_read_device(pcib, domain, busno, s, f,
3248 			    dinfo_size);
3249 			if (dinfo != NULL) {
3250 				pci_add_child(dev, dinfo);
3251 			}
3252 		}
3253 	}
3254 #undef REG
3255 }
3256 
3257 void
3258 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3259 {
3260 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3261 	device_set_ivars(dinfo->cfg.dev, dinfo);
3262 	resource_list_init(&dinfo->resources);
3263 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3264 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3265 	pci_print_verbose(dinfo);
3266 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3267 }
3268 
3269 static int
3270 pci_probe(device_t dev)
3271 {
3272 
3273 	device_set_desc(dev, "PCI bus");
3274 
3275 	/* Allow other subclasses to override this driver. */
3276 	return (BUS_PROBE_GENERIC);
3277 }
3278 
3279 int
3280 pci_attach_common(device_t dev)
3281 {
3282 	struct pci_softc *sc;
3283 	int busno, domain;
3284 #ifdef PCI_DMA_BOUNDARY
3285 	int error, tag_valid;
3286 #endif
3287 
3288 	sc = device_get_softc(dev);
3289 	domain = pcib_get_domain(dev);
3290 	busno = pcib_get_bus(dev);
3291 	if (bootverbose)
3292 		device_printf(dev, "domain=%d, physical bus=%d\n",
3293 		    domain, busno);
3294 #ifdef PCI_DMA_BOUNDARY
3295 	tag_valid = 0;
3296 	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
3297 	    devclass_find("pci")) {
3298 		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
3299 		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3300 		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
3301 		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
3302 		if (error)
3303 			device_printf(dev, "Failed to create DMA tag: %d\n",
3304 			    error);
3305 		else
3306 			tag_valid = 1;
3307 	}
3308 	if (!tag_valid)
3309 #endif
3310 		sc->sc_dma_tag = bus_get_dma_tag(dev);
3311 	return (0);
3312 }
3313 
3314 static int
3315 pci_attach(device_t dev)
3316 {
3317 	int busno, domain, error;
3318 
3319 	error = pci_attach_common(dev);
3320 	if (error)
3321 		return (error);
3322 
3323 	/*
3324 	 * Since there can be multiple independantly numbered PCI
3325 	 * busses on systems with multiple PCI domains, we can't use
3326 	 * the unit number to decide which bus we are probing. We ask
3327 	 * the parent pcib what our domain and bus numbers are.
3328 	 */
3329 	domain = pcib_get_domain(dev);
3330 	busno = pcib_get_bus(dev);
3331 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3332 	return (bus_generic_attach(dev));
3333 }
3334 
3335 static void
3336 pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3337     int state)
3338 {
3339 	device_t child, pcib;
3340 	struct pci_devinfo *dinfo;
3341 	int dstate, i;
3342 
3343 	/*
3344 	 * Set the device to the given state.  If the firmware suggests
3345 	 * a different power state, use it instead.  If power management
3346 	 * is not present, the firmware is responsible for managing
3347 	 * device power.  Skip children who aren't attached since they
3348 	 * are handled separately.
3349 	 */
3350 	pcib = device_get_parent(dev);
3351 	for (i = 0; i < numdevs; i++) {
3352 		child = devlist[i];
3353 		dinfo = device_get_ivars(child);
3354 		dstate = state;
3355 		if (device_is_attached(child) &&
3356 		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3357 			pci_set_powerstate(child, dstate);
3358 	}
3359 }
3360 
3361 int
3362 pci_suspend(device_t dev)
3363 {
3364 	device_t child, *devlist;
3365 	struct pci_devinfo *dinfo;
3366 	int error, i, numdevs;
3367 
3368 	/*
3369 	 * Save the PCI configuration space for each child and set the
3370 	 * device in the appropriate power state for this sleep state.
3371 	 */
3372 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3373 		return (error);
3374 	for (i = 0; i < numdevs; i++) {
3375 		child = devlist[i];
3376 		dinfo = device_get_ivars(child);
3377 		pci_cfg_save(child, dinfo, 0);
3378 	}
3379 
3380 	/* Suspend devices before potentially powering them down. */
3381 	error = bus_generic_suspend(dev);
3382 	if (error) {
3383 		free(devlist, M_TEMP);
3384 		return (error);
3385 	}
3386 	if (pci_do_power_suspend)
3387 		pci_set_power_children(dev, devlist, numdevs,
3388 		    PCI_POWERSTATE_D3);
3389 	free(devlist, M_TEMP);
3390 	return (0);
3391 }
3392 
3393 int
3394 pci_resume(device_t dev)
3395 {
3396 	device_t child, *devlist;
3397 	struct pci_devinfo *dinfo;
3398 	int error, i, numdevs;
3399 
3400 	/*
3401 	 * Set each child to D0 and restore its PCI configuration space.
3402 	 */
3403 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3404 		return (error);
3405 	if (pci_do_power_resume)
3406 		pci_set_power_children(dev, devlist, numdevs,
3407 		    PCI_POWERSTATE_D0);
3408 
3409 	/* Now the device is powered up, restore its config space. */
3410 	for (i = 0; i < numdevs; i++) {
3411 		child = devlist[i];
3412 		dinfo = device_get_ivars(child);
3413 
3414 		pci_cfg_restore(child, dinfo);
3415 		if (!device_is_attached(child))
3416 			pci_cfg_save(child, dinfo, 1);
3417 	}
3418 
3419 	/*
3420 	 * Resume critical devices first, then everything else later.
3421 	 */
3422 	for (i = 0; i < numdevs; i++) {
3423 		child = devlist[i];
3424 		switch (pci_get_class(child)) {
3425 		case PCIC_DISPLAY:
3426 		case PCIC_MEMORY:
3427 		case PCIC_BRIDGE:
3428 		case PCIC_BASEPERIPH:
3429 			DEVICE_RESUME(child);
3430 			break;
3431 		}
3432 	}
3433 	for (i = 0; i < numdevs; i++) {
3434 		child = devlist[i];
3435 		switch (pci_get_class(child)) {
3436 		case PCIC_DISPLAY:
3437 		case PCIC_MEMORY:
3438 		case PCIC_BRIDGE:
3439 		case PCIC_BASEPERIPH:
3440 			break;
3441 		default:
3442 			DEVICE_RESUME(child);
3443 		}
3444 	}
3445 	free(devlist, M_TEMP);
3446 	return (0);
3447 }
3448 
3449 static void
3450 pci_load_vendor_data(void)
3451 {
3452 	caddr_t data;
3453 	void *ptr;
3454 	size_t sz;
3455 
3456 	data = preload_search_by_type("pci_vendor_data");
3457 	if (data != NULL) {
3458 		ptr = preload_fetch_addr(data);
3459 		sz = preload_fetch_size(data);
3460 		if (ptr != NULL && sz != 0) {
3461 			pci_vendordata = ptr;
3462 			pci_vendordata_size = sz;
3463 			/* terminate the database */
3464 			pci_vendordata[pci_vendordata_size] = '\n';
3465 		}
3466 	}
3467 }
3468 
3469 void
3470 pci_driver_added(device_t dev, driver_t *driver)
3471 {
3472 	int numdevs;
3473 	device_t *devlist;
3474 	device_t child;
3475 	struct pci_devinfo *dinfo;
3476 	int i;
3477 
3478 	if (bootverbose)
3479 		device_printf(dev, "driver added\n");
3480 	DEVICE_IDENTIFY(driver, dev);
3481 	if (device_get_children(dev, &devlist, &numdevs) != 0)
3482 		return;
3483 	for (i = 0; i < numdevs; i++) {
3484 		child = devlist[i];
3485 		if (device_get_state(child) != DS_NOTPRESENT)
3486 			continue;
3487 		dinfo = device_get_ivars(child);
3488 		pci_print_verbose(dinfo);
3489 		if (bootverbose)
3490 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3491 		pci_cfg_restore(child, dinfo);
3492 		if (device_probe_and_attach(child) != 0)
3493 			pci_child_detached(dev, child);
3494 	}
3495 	free(devlist, M_TEMP);
3496 }
3497 
3498 int
3499 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3500     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3501 {
3502 	struct pci_devinfo *dinfo;
3503 	struct msix_table_entry *mte;
3504 	struct msix_vector *mv;
3505 	uint64_t addr;
3506 	uint32_t data;
3507 	void *cookie;
3508 	int error, rid;
3509 
3510 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3511 	    arg, &cookie);
3512 	if (error)
3513 		return (error);
3514 
3515 	/* If this is not a direct child, just bail out. */
3516 	if (device_get_parent(child) != dev) {
3517 		*cookiep = cookie;
3518 		return(0);
3519 	}
3520 
3521 	rid = rman_get_rid(irq);
3522 	if (rid == 0) {
3523 		/* Make sure that INTx is enabled */
3524 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3525 	} else {
3526 		/*
3527 		 * Check to see if the interrupt is MSI or MSI-X.
3528 		 * Ask our parent to map the MSI and give
3529 		 * us the address and data register values.
3530 		 * If we fail for some reason, teardown the
3531 		 * interrupt handler.
3532 		 */
3533 		dinfo = device_get_ivars(child);
3534 		if (dinfo->cfg.msi.msi_alloc > 0) {
3535 			if (dinfo->cfg.msi.msi_addr == 0) {
3536 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3537 			    ("MSI has handlers, but vectors not mapped"));
3538 				error = PCIB_MAP_MSI(device_get_parent(dev),
3539 				    child, rman_get_start(irq), &addr, &data);
3540 				if (error)
3541 					goto bad;
3542 				dinfo->cfg.msi.msi_addr = addr;
3543 				dinfo->cfg.msi.msi_data = data;
3544 			}
3545 			if (dinfo->cfg.msi.msi_handlers == 0)
3546 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3547 				    dinfo->cfg.msi.msi_data);
3548 			dinfo->cfg.msi.msi_handlers++;
3549 		} else {
3550 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3551 			    ("No MSI or MSI-X interrupts allocated"));
3552 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3553 			    ("MSI-X index too high"));
3554 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3555 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3556 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3557 			KASSERT(mv->mv_irq == rman_get_start(irq),
3558 			    ("IRQ mismatch"));
3559 			if (mv->mv_address == 0) {
3560 				KASSERT(mte->mte_handlers == 0,
3561 		    ("MSI-X table entry has handlers, but vector not mapped"));
3562 				error = PCIB_MAP_MSI(device_get_parent(dev),
3563 				    child, rman_get_start(irq), &addr, &data);
3564 				if (error)
3565 					goto bad;
3566 				mv->mv_address = addr;
3567 				mv->mv_data = data;
3568 			}
3569 			if (mte->mte_handlers == 0) {
3570 				pci_enable_msix(child, rid - 1, mv->mv_address,
3571 				    mv->mv_data);
3572 				pci_unmask_msix(child, rid - 1);
3573 			}
3574 			mte->mte_handlers++;
3575 		}
3576 
3577 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3578 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3579 	bad:
3580 		if (error) {
3581 			(void)bus_generic_teardown_intr(dev, child, irq,
3582 			    cookie);
3583 			return (error);
3584 		}
3585 	}
3586 	*cookiep = cookie;
3587 	return (0);
3588 }
3589 
3590 int
3591 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3592     void *cookie)
3593 {
3594 	struct msix_table_entry *mte;
3595 	struct resource_list_entry *rle;
3596 	struct pci_devinfo *dinfo;
3597 	int error, rid;
3598 
3599 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3600 		return (EINVAL);
3601 
3602 	/* If this isn't a direct child, just bail out */
3603 	if (device_get_parent(child) != dev)
3604 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3605 
3606 	rid = rman_get_rid(irq);
3607 	if (rid == 0) {
3608 		/* Mask INTx */
3609 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3610 	} else {
3611 		/*
3612 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3613 		 * decrement the appropriate handlers count and mask the
3614 		 * MSI-X message, or disable MSI messages if the count
3615 		 * drops to 0.
3616 		 */
3617 		dinfo = device_get_ivars(child);
3618 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3619 		if (rle->res != irq)
3620 			return (EINVAL);
3621 		if (dinfo->cfg.msi.msi_alloc > 0) {
3622 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3623 			    ("MSI-X index too high"));
3624 			if (dinfo->cfg.msi.msi_handlers == 0)
3625 				return (EINVAL);
3626 			dinfo->cfg.msi.msi_handlers--;
3627 			if (dinfo->cfg.msi.msi_handlers == 0)
3628 				pci_disable_msi(child);
3629 		} else {
3630 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3631 			    ("No MSI or MSI-X interrupts allocated"));
3632 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3633 			    ("MSI-X index too high"));
3634 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3635 			if (mte->mte_handlers == 0)
3636 				return (EINVAL);
3637 			mte->mte_handlers--;
3638 			if (mte->mte_handlers == 0)
3639 				pci_mask_msix(child, rid - 1);
3640 		}
3641 	}
3642 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3643 	if (rid > 0)
3644 		KASSERT(error == 0,
3645 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3646 	return (error);
3647 }
3648 
3649 int
3650 pci_print_child(device_t dev, device_t child)
3651 {
3652 	struct pci_devinfo *dinfo;
3653 	struct resource_list *rl;
3654 	int retval = 0;
3655 
3656 	dinfo = device_get_ivars(child);
3657 	rl = &dinfo->resources;
3658 
3659 	retval += bus_print_child_header(dev, child);
3660 
3661 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3662 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3663 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3664 	if (device_get_flags(dev))
3665 		retval += printf(" flags %#x", device_get_flags(dev));
3666 
3667 	retval += printf(" at device %d.%d", pci_get_slot(child),
3668 	    pci_get_function(child));
3669 
3670 	retval += bus_print_child_footer(dev, child);
3671 
3672 	return (retval);
3673 }
3674 
3675 static const struct
3676 {
3677 	int		class;
3678 	int		subclass;
3679 	const char	*desc;
3680 } pci_nomatch_tab[] = {
3681 	{PCIC_OLD,		-1,			"old"},
3682 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3683 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3684 	{PCIC_STORAGE,		-1,			"mass storage"},
3685 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3686 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3687 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3688 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3689 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3690 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3691 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3692 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3693 	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	"NVM"},
3694 	{PCIC_NETWORK,		-1,			"network"},
3695 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3696 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3697 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3698 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3699 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3700 	{PCIC_DISPLAY,		-1,			"display"},
3701 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3702 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3703 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3704 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3705 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3706 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3707 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3708 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3709 	{PCIC_MEMORY,		-1,			"memory"},
3710 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3711 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3712 	{PCIC_BRIDGE,		-1,			"bridge"},
3713 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3714 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3715 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3716 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3717 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3718 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3719 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3720 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3721 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3722 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3723 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3724 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3725 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3726 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3727 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3728 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3729 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3730 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3731 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3732 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3733 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3734 	{PCIC_INPUTDEV,		-1,			"input device"},
3735 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3736 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3737 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3738 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3739 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3740 	{PCIC_DOCKING,		-1,			"docking station"},
3741 	{PCIC_PROCESSOR,	-1,			"processor"},
3742 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3743 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3744 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3745 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3746 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3747 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3748 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3749 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3750 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3751 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3752 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3753 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3754 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3755 	{PCIC_SATCOM,		-1,			"satellite communication"},
3756 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3757 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3758 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3759 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3760 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3761 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3762 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3763 	{PCIC_DASP,		-1,			"dasp"},
3764 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3765 	{0, 0,		NULL}
3766 };
3767 
3768 void
3769 pci_probe_nomatch(device_t dev, device_t child)
3770 {
3771 	int i;
3772 	const char *cp, *scp;
3773 	char *device;
3774 
3775 	/*
3776 	 * Look for a listing for this device in a loaded device database.
3777 	 */
3778 	if ((device = pci_describe_device(child)) != NULL) {
3779 		device_printf(dev, "<%s>", device);
3780 		free(device, M_DEVBUF);
3781 	} else {
3782 		/*
3783 		 * Scan the class/subclass descriptions for a general
3784 		 * description.
3785 		 */
3786 		cp = "unknown";
3787 		scp = NULL;
3788 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3789 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3790 				if (pci_nomatch_tab[i].subclass == -1) {
3791 					cp = pci_nomatch_tab[i].desc;
3792 				} else if (pci_nomatch_tab[i].subclass ==
3793 				    pci_get_subclass(child)) {
3794 					scp = pci_nomatch_tab[i].desc;
3795 				}
3796 			}
3797 		}
3798 		device_printf(dev, "<%s%s%s>",
3799 		    cp ? cp : "",
3800 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3801 		    scp ? scp : "");
3802 	}
3803 	printf(" at device %d.%d (no driver attached)\n",
3804 	    pci_get_slot(child), pci_get_function(child));
3805 	pci_cfg_save(child, device_get_ivars(child), 1);
3806 }
3807 
3808 void
3809 pci_child_detached(device_t dev, device_t child)
3810 {
3811 	struct pci_devinfo *dinfo;
3812 	struct resource_list *rl;
3813 
3814 	dinfo = device_get_ivars(child);
3815 	rl = &dinfo->resources;
3816 
3817 	/*
3818 	 * Have to deallocate IRQs before releasing any MSI messages and
3819 	 * have to release MSI messages before deallocating any memory
3820 	 * BARs.
3821 	 */
3822 	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
3823 		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
3824 	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
3825 		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
3826 		(void)pci_release_msi(child);
3827 	}
3828 	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
3829 		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
3830 	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
3831 		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
3832 
3833 	pci_cfg_save(child, dinfo, 1);
3834 }
3835 
3836 /*
3837  * Parse the PCI device database, if loaded, and return a pointer to a
3838  * description of the device.
3839  *
3840  * The database is flat text formatted as follows:
3841  *
3842  * Any line not in a valid format is ignored.
3843  * Lines are terminated with newline '\n' characters.
3844  *
3845  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3846  * the vendor name.
3847  *
3848  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3849  * - devices cannot be listed without a corresponding VENDOR line.
3850  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3851  * another TAB, then the device name.
3852  */
3853 
3854 /*
3855  * Assuming (ptr) points to the beginning of a line in the database,
3856  * return the vendor or device and description of the next entry.
3857  * The value of (vendor) or (device) inappropriate for the entry type
3858  * is set to -1.  Returns nonzero at the end of the database.
3859  *
3860  * Note that this is slightly unrobust in the face of corrupt data;
3861  * we attempt to safeguard against this by spamming the end of the
3862  * database with a newline when we initialise.
3863  */
3864 static int
3865 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3866 {
3867 	char	*cp = *ptr;
3868 	int	left;
3869 
3870 	*device = -1;
3871 	*vendor = -1;
3872 	**desc = '\0';
3873 	for (;;) {
3874 		left = pci_vendordata_size - (cp - pci_vendordata);
3875 		if (left <= 0) {
3876 			*ptr = cp;
3877 			return(1);
3878 		}
3879 
3880 		/* vendor entry? */
3881 		if (*cp != '\t' &&
3882 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3883 			break;
3884 		/* device entry? */
3885 		if (*cp == '\t' &&
3886 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3887 			break;
3888 
3889 		/* skip to next line */
3890 		while (*cp != '\n' && left > 0) {
3891 			cp++;
3892 			left--;
3893 		}
3894 		if (*cp == '\n') {
3895 			cp++;
3896 			left--;
3897 		}
3898 	}
3899 	/* skip to next line */
3900 	while (*cp != '\n' && left > 0) {
3901 		cp++;
3902 		left--;
3903 	}
3904 	if (*cp == '\n' && left > 0)
3905 		cp++;
3906 	*ptr = cp;
3907 	return(0);
3908 }
3909 
3910 static char *
3911 pci_describe_device(device_t dev)
3912 {
3913 	int	vendor, device;
3914 	char	*desc, *vp, *dp, *line;
3915 
3916 	desc = vp = dp = NULL;
3917 
3918 	/*
3919 	 * If we have no vendor data, we can't do anything.
3920 	 */
3921 	if (pci_vendordata == NULL)
3922 		goto out;
3923 
3924 	/*
3925 	 * Scan the vendor data looking for this device
3926 	 */
3927 	line = pci_vendordata;
3928 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3929 		goto out;
3930 	for (;;) {
3931 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3932 			goto out;
3933 		if (vendor == pci_get_vendor(dev))
3934 			break;
3935 	}
3936 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3937 		goto out;
3938 	for (;;) {
3939 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3940 			*dp = 0;
3941 			break;
3942 		}
3943 		if (vendor != -1) {
3944 			*dp = 0;
3945 			break;
3946 		}
3947 		if (device == pci_get_device(dev))
3948 			break;
3949 	}
3950 	if (dp[0] == '\0')
3951 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3952 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3953 	    NULL)
3954 		sprintf(desc, "%s, %s", vp, dp);
3955 out:
3956 	if (vp != NULL)
3957 		free(vp, M_DEVBUF);
3958 	if (dp != NULL)
3959 		free(dp, M_DEVBUF);
3960 	return(desc);
3961 }
3962 
3963 int
3964 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3965 {
3966 	struct pci_devinfo *dinfo;
3967 	pcicfgregs *cfg;
3968 
3969 	dinfo = device_get_ivars(child);
3970 	cfg = &dinfo->cfg;
3971 
3972 	switch (which) {
3973 	case PCI_IVAR_ETHADDR:
3974 		/*
3975 		 * The generic accessor doesn't deal with failure, so
3976 		 * we set the return value, then return an error.
3977 		 */
3978 		*((uint8_t **) result) = NULL;
3979 		return (EINVAL);
3980 	case PCI_IVAR_SUBVENDOR:
3981 		*result = cfg->subvendor;
3982 		break;
3983 	case PCI_IVAR_SUBDEVICE:
3984 		*result = cfg->subdevice;
3985 		break;
3986 	case PCI_IVAR_VENDOR:
3987 		*result = cfg->vendor;
3988 		break;
3989 	case PCI_IVAR_DEVICE:
3990 		*result = cfg->device;
3991 		break;
3992 	case PCI_IVAR_DEVID:
3993 		*result = (cfg->device << 16) | cfg->vendor;
3994 		break;
3995 	case PCI_IVAR_CLASS:
3996 		*result = cfg->baseclass;
3997 		break;
3998 	case PCI_IVAR_SUBCLASS:
3999 		*result = cfg->subclass;
4000 		break;
4001 	case PCI_IVAR_PROGIF:
4002 		*result = cfg->progif;
4003 		break;
4004 	case PCI_IVAR_REVID:
4005 		*result = cfg->revid;
4006 		break;
4007 	case PCI_IVAR_INTPIN:
4008 		*result = cfg->intpin;
4009 		break;
4010 	case PCI_IVAR_IRQ:
4011 		*result = cfg->intline;
4012 		break;
4013 	case PCI_IVAR_DOMAIN:
4014 		*result = cfg->domain;
4015 		break;
4016 	case PCI_IVAR_BUS:
4017 		*result = cfg->bus;
4018 		break;
4019 	case PCI_IVAR_SLOT:
4020 		*result = cfg->slot;
4021 		break;
4022 	case PCI_IVAR_FUNCTION:
4023 		*result = cfg->func;
4024 		break;
4025 	case PCI_IVAR_CMDREG:
4026 		*result = cfg->cmdreg;
4027 		break;
4028 	case PCI_IVAR_CACHELNSZ:
4029 		*result = cfg->cachelnsz;
4030 		break;
4031 	case PCI_IVAR_MINGNT:
4032 		*result = cfg->mingnt;
4033 		break;
4034 	case PCI_IVAR_MAXLAT:
4035 		*result = cfg->maxlat;
4036 		break;
4037 	case PCI_IVAR_LATTIMER:
4038 		*result = cfg->lattimer;
4039 		break;
4040 	default:
4041 		return (ENOENT);
4042 	}
4043 	return (0);
4044 }
4045 
4046 int
4047 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4048 {
4049 	struct pci_devinfo *dinfo;
4050 
4051 	dinfo = device_get_ivars(child);
4052 
4053 	switch (which) {
4054 	case PCI_IVAR_INTPIN:
4055 		dinfo->cfg.intpin = value;
4056 		return (0);
4057 	case PCI_IVAR_ETHADDR:
4058 	case PCI_IVAR_SUBVENDOR:
4059 	case PCI_IVAR_SUBDEVICE:
4060 	case PCI_IVAR_VENDOR:
4061 	case PCI_IVAR_DEVICE:
4062 	case PCI_IVAR_DEVID:
4063 	case PCI_IVAR_CLASS:
4064 	case PCI_IVAR_SUBCLASS:
4065 	case PCI_IVAR_PROGIF:
4066 	case PCI_IVAR_REVID:
4067 	case PCI_IVAR_IRQ:
4068 	case PCI_IVAR_DOMAIN:
4069 	case PCI_IVAR_BUS:
4070 	case PCI_IVAR_SLOT:
4071 	case PCI_IVAR_FUNCTION:
4072 		return (EINVAL);	/* disallow for now */
4073 
4074 	default:
4075 		return (ENOENT);
4076 	}
4077 }
4078 
4079 #include "opt_ddb.h"
4080 #ifdef DDB
4081 #include <ddb/ddb.h>
4082 #include <sys/cons.h>
4083 
4084 /*
4085  * List resources based on pci map registers, used for within ddb
4086  */
4087 
4088 DB_SHOW_COMMAND(pciregs, db_pci_dump)
4089 {
4090 	struct pci_devinfo *dinfo;
4091 	struct devlist *devlist_head;
4092 	struct pci_conf *p;
4093 	const char *name;
4094 	int i, error, none_count;
4095 
4096 	none_count = 0;
4097 	/* get the head of the device queue */
4098 	devlist_head = &pci_devq;
4099 
4100 	/*
4101 	 * Go through the list of devices and print out devices
4102 	 */
4103 	for (error = 0, i = 0,
4104 	     dinfo = STAILQ_FIRST(devlist_head);
4105 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4106 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4107 
4108 		/* Populate pd_name and pd_unit */
4109 		name = NULL;
4110 		if (dinfo->cfg.dev)
4111 			name = device_get_name(dinfo->cfg.dev);
4112 
4113 		p = &dinfo->conf;
4114 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4115 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4116 			(name && *name) ? name : "none",
4117 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4118 			none_count++,
4119 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4120 			p->pc_sel.pc_func, (p->pc_class << 16) |
4121 			(p->pc_subclass << 8) | p->pc_progif,
4122 			(p->pc_subdevice << 16) | p->pc_subvendor,
4123 			(p->pc_device << 16) | p->pc_vendor,
4124 			p->pc_revid, p->pc_hdr);
4125 	}
4126 }
4127 #endif /* DDB */
4128 
4129 static struct resource *
4130 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4131     u_long start, u_long end, u_long count, u_int flags)
4132 {
4133 	struct pci_devinfo *dinfo = device_get_ivars(child);
4134 	struct resource_list *rl = &dinfo->resources;
4135 	struct resource_list_entry *rle;
4136 	struct resource *res;
4137 	struct pci_map *pm;
4138 	pci_addr_t map, testval;
4139 	int mapsize;
4140 
4141 	res = NULL;
4142 	pm = pci_find_bar(child, *rid);
4143 	if (pm != NULL) {
4144 		/* This is a BAR that we failed to allocate earlier. */
4145 		mapsize = pm->pm_size;
4146 		map = pm->pm_value;
4147 	} else {
4148 		/*
4149 		 * Weed out the bogons, and figure out how large the
4150 		 * BAR/map is.  BARs that read back 0 here are bogus
4151 		 * and unimplemented.  Note: atapci in legacy mode are
4152 		 * special and handled elsewhere in the code.  If you
4153 		 * have a atapci device in legacy mode and it fails
4154 		 * here, that other code is broken.
4155 		 */
4156 		pci_read_bar(child, *rid, &map, &testval);
4157 
4158 		/*
4159 		 * Determine the size of the BAR and ignore BARs with a size
4160 		 * of 0.  Device ROM BARs use a different mask value.
4161 		 */
4162 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
4163 			mapsize = pci_romsize(testval);
4164 		else
4165 			mapsize = pci_mapsize(testval);
4166 		if (mapsize == 0)
4167 			goto out;
4168 		pm = pci_add_bar(child, *rid, map, mapsize);
4169 	}
4170 
4171 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
4172 		if (type != SYS_RES_MEMORY) {
4173 			if (bootverbose)
4174 				device_printf(dev,
4175 				    "child %s requested type %d for rid %#x,"
4176 				    " but the BAR says it is an memio\n",
4177 				    device_get_nameunit(child), type, *rid);
4178 			goto out;
4179 		}
4180 	} else {
4181 		if (type != SYS_RES_IOPORT) {
4182 			if (bootverbose)
4183 				device_printf(dev,
4184 				    "child %s requested type %d for rid %#x,"
4185 				    " but the BAR says it is an ioport\n",
4186 				    device_get_nameunit(child), type, *rid);
4187 			goto out;
4188 		}
4189 	}
4190 
4191 	/*
4192 	 * For real BARs, we need to override the size that
4193 	 * the driver requests, because that's what the BAR
4194 	 * actually uses and we would otherwise have a
4195 	 * situation where we might allocate the excess to
4196 	 * another driver, which won't work.
4197 	 */
4198 	count = (pci_addr_t)1 << mapsize;
4199 	if (RF_ALIGNMENT(flags) < mapsize)
4200 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
4201 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
4202 		flags |= RF_PREFETCHABLE;
4203 
4204 	/*
4205 	 * Allocate enough resource, and then write back the
4206 	 * appropriate BAR for that resource.
4207 	 */
4208 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
4209 	    start, end, count, flags & ~RF_ACTIVE);
4210 	if (res == NULL) {
4211 		device_printf(child,
4212 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4213 		    count, *rid, type, start, end);
4214 		goto out;
4215 	}
4216 	resource_list_add(rl, type, *rid, start, end, count);
4217 	rle = resource_list_find(rl, type, *rid);
4218 	if (rle == NULL)
4219 		panic("pci_reserve_map: unexpectedly can't find resource.");
4220 	rle->res = res;
4221 	rle->start = rman_get_start(res);
4222 	rle->end = rman_get_end(res);
4223 	rle->count = count;
4224 	rle->flags = RLE_RESERVED;
4225 	if (bootverbose)
4226 		device_printf(child,
4227 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4228 		    count, *rid, type, rman_get_start(res));
4229 	map = rman_get_start(res);
4230 	pci_write_bar(child, pm, map);
4231 out:
4232 	return (res);
4233 }
4234 
4235 struct resource *
4236 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4237 		   u_long start, u_long end, u_long count, u_int flags)
4238 {
4239 	struct pci_devinfo *dinfo = device_get_ivars(child);
4240 	struct resource_list *rl = &dinfo->resources;
4241 	struct resource_list_entry *rle;
4242 	struct resource *res;
4243 	pcicfgregs *cfg = &dinfo->cfg;
4244 
4245 	if (device_get_parent(child) != dev)
4246 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4247 		    type, rid, start, end, count, flags));
4248 
4249 	/*
4250 	 * Perform lazy resource allocation
4251 	 */
4252 	switch (type) {
4253 	case SYS_RES_IRQ:
4254 		/*
4255 		 * Can't alloc legacy interrupt once MSI messages have
4256 		 * been allocated.
4257 		 */
4258 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4259 		    cfg->msix.msix_alloc > 0))
4260 			return (NULL);
4261 
4262 		/*
4263 		 * If the child device doesn't have an interrupt
4264 		 * routed and is deserving of an interrupt, try to
4265 		 * assign it one.
4266 		 */
4267 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4268 		    (cfg->intpin != 0))
4269 			pci_assign_interrupt(dev, child, 0);
4270 		break;
4271 	case SYS_RES_IOPORT:
4272 	case SYS_RES_MEMORY:
4273 #ifdef NEW_PCIB
4274 		/*
4275 		 * PCI-PCI bridge I/O window resources are not BARs.
4276 		 * For those allocations just pass the request up the
4277 		 * tree.
4278 		 */
4279 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4280 			switch (*rid) {
4281 			case PCIR_IOBASEL_1:
4282 			case PCIR_MEMBASE_1:
4283 			case PCIR_PMBASEL_1:
4284 				/*
4285 				 * XXX: Should we bother creating a resource
4286 				 * list entry?
4287 				 */
4288 				return (bus_generic_alloc_resource(dev, child,
4289 				    type, rid, start, end, count, flags));
4290 			}
4291 		}
4292 #endif
4293 		/* Reserve resources for this BAR if needed. */
4294 		rle = resource_list_find(rl, type, *rid);
4295 		if (rle == NULL) {
4296 			res = pci_reserve_map(dev, child, type, rid, start, end,
4297 			    count, flags);
4298 			if (res == NULL)
4299 				return (NULL);
4300 		}
4301 	}
4302 	return (resource_list_alloc(rl, dev, child, type, rid,
4303 	    start, end, count, flags));
4304 }
4305 
4306 int
4307 pci_activate_resource(device_t dev, device_t child, int type, int rid,
4308     struct resource *r)
4309 {
4310 	struct pci_devinfo *dinfo;
4311 	int error;
4312 
4313 	error = bus_generic_activate_resource(dev, child, type, rid, r);
4314 	if (error)
4315 		return (error);
4316 
4317 	/* Enable decoding in the command register when activating BARs. */
4318 	if (device_get_parent(child) == dev) {
4319 		/* Device ROMs need their decoding explicitly enabled. */
4320 		dinfo = device_get_ivars(child);
4321 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4322 			pci_write_bar(child, pci_find_bar(child, rid),
4323 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4324 		switch (type) {
4325 		case SYS_RES_IOPORT:
4326 		case SYS_RES_MEMORY:
4327 			error = PCI_ENABLE_IO(dev, child, type);
4328 			break;
4329 		}
4330 	}
4331 	return (error);
4332 }
4333 
4334 int
4335 pci_deactivate_resource(device_t dev, device_t child, int type,
4336     int rid, struct resource *r)
4337 {
4338 	struct pci_devinfo *dinfo;
4339 	int error;
4340 
4341 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4342 	if (error)
4343 		return (error);
4344 
4345 	/* Disable decoding for device ROMs. */
4346 	if (device_get_parent(child) == dev) {
4347 		dinfo = device_get_ivars(child);
4348 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4349 			pci_write_bar(child, pci_find_bar(child, rid),
4350 			    rman_get_start(r));
4351 	}
4352 	return (0);
4353 }
4354 
4355 void
4356 pci_delete_child(device_t dev, device_t child)
4357 {
4358 	struct resource_list_entry *rle;
4359 	struct resource_list *rl;
4360 	struct pci_devinfo *dinfo;
4361 
4362 	dinfo = device_get_ivars(child);
4363 	rl = &dinfo->resources;
4364 
4365 	if (device_is_attached(child))
4366 		device_detach(child);
4367 
4368 	/* Turn off access to resources we're about to free */
4369 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4370 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4371 
4372 	/* Free all allocated resources */
4373 	STAILQ_FOREACH(rle, rl, link) {
4374 		if (rle->res) {
4375 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4376 			    resource_list_busy(rl, rle->type, rle->rid)) {
4377 				pci_printf(&dinfo->cfg,
4378 				    "Resource still owned, oops. "
4379 				    "(type=%d, rid=%d, addr=%lx)\n",
4380 				    rle->type, rle->rid,
4381 				    rman_get_start(rle->res));
4382 				bus_release_resource(child, rle->type, rle->rid,
4383 				    rle->res);
4384 			}
4385 			resource_list_unreserve(rl, dev, child, rle->type,
4386 			    rle->rid);
4387 		}
4388 	}
4389 	resource_list_free(rl);
4390 
4391 	device_delete_child(dev, child);
4392 	pci_freecfg(dinfo);
4393 }
4394 
4395 void
4396 pci_delete_resource(device_t dev, device_t child, int type, int rid)
4397 {
4398 	struct pci_devinfo *dinfo;
4399 	struct resource_list *rl;
4400 	struct resource_list_entry *rle;
4401 
4402 	if (device_get_parent(child) != dev)
4403 		return;
4404 
4405 	dinfo = device_get_ivars(child);
4406 	rl = &dinfo->resources;
4407 	rle = resource_list_find(rl, type, rid);
4408 	if (rle == NULL)
4409 		return;
4410 
4411 	if (rle->res) {
4412 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4413 		    resource_list_busy(rl, type, rid)) {
4414 			device_printf(dev, "delete_resource: "
4415 			    "Resource still owned by child, oops. "
4416 			    "(type=%d, rid=%d, addr=%lx)\n",
4417 			    type, rid, rman_get_start(rle->res));
4418 			return;
4419 		}
4420 		resource_list_unreserve(rl, dev, child, type, rid);
4421 	}
4422 	resource_list_delete(rl, type, rid);
4423 }
4424 
4425 struct resource_list *
4426 pci_get_resource_list (device_t dev, device_t child)
4427 {
4428 	struct pci_devinfo *dinfo = device_get_ivars(child);
4429 
4430 	return (&dinfo->resources);
4431 }
4432 
4433 bus_dma_tag_t
4434 pci_get_dma_tag(device_t bus, device_t dev)
4435 {
4436 	struct pci_softc *sc = device_get_softc(bus);
4437 
4438 	return (sc->sc_dma_tag);
4439 }
4440 
4441 uint32_t
4442 pci_read_config_method(device_t dev, device_t child, int reg, int width)
4443 {
4444 	struct pci_devinfo *dinfo = device_get_ivars(child);
4445 	pcicfgregs *cfg = &dinfo->cfg;
4446 
4447 	return (PCIB_READ_CONFIG(device_get_parent(dev),
4448 	    cfg->bus, cfg->slot, cfg->func, reg, width));
4449 }
4450 
4451 void
4452 pci_write_config_method(device_t dev, device_t child, int reg,
4453     uint32_t val, int width)
4454 {
4455 	struct pci_devinfo *dinfo = device_get_ivars(child);
4456 	pcicfgregs *cfg = &dinfo->cfg;
4457 
4458 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4459 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4460 }
4461 
4462 int
4463 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4464     size_t buflen)
4465 {
4466 
4467 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4468 	    pci_get_function(child));
4469 	return (0);
4470 }
4471 
4472 int
4473 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4474     size_t buflen)
4475 {
4476 	struct pci_devinfo *dinfo;
4477 	pcicfgregs *cfg;
4478 
4479 	dinfo = device_get_ivars(child);
4480 	cfg = &dinfo->cfg;
4481 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4482 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4483 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4484 	    cfg->progif);
4485 	return (0);
4486 }
4487 
4488 int
4489 pci_assign_interrupt_method(device_t dev, device_t child)
4490 {
4491 	struct pci_devinfo *dinfo = device_get_ivars(child);
4492 	pcicfgregs *cfg = &dinfo->cfg;
4493 
4494 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4495 	    cfg->intpin));
4496 }
4497 
4498 static int
4499 pci_modevent(module_t mod, int what, void *arg)
4500 {
4501 	static struct cdev *pci_cdev;
4502 
4503 	switch (what) {
4504 	case MOD_LOAD:
4505 		STAILQ_INIT(&pci_devq);
4506 		pci_generation = 0;
4507 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4508 		    "pci");
4509 		pci_load_vendor_data();
4510 		break;
4511 
4512 	case MOD_UNLOAD:
4513 		destroy_dev(pci_cdev);
4514 		break;
4515 	}
4516 
4517 	return (0);
4518 }
4519 
4520 static void
4521 pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
4522 {
4523 #define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
4524 	struct pcicfg_pcie *cfg;
4525 	int version, pos;
4526 
4527 	cfg = &dinfo->cfg.pcie;
4528 	pos = cfg->pcie_location;
4529 
4530 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4531 
4532 	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
4533 
4534 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4535 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4536 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4537 		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
4538 
4539 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4540 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
4541 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
4542 		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
4543 
4544 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4545 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
4546 		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
4547 
4548 	if (version > 1) {
4549 		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
4550 		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
4551 		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
4552 	}
4553 #undef WREG
4554 }
4555 
4556 static void
4557 pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
4558 {
4559 	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
4560 	    dinfo->cfg.pcix.pcix_command,  2);
4561 }
4562 
4563 void
4564 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4565 {
4566 
4567 	/*
4568 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4569 	 * which we know need special treatment.  Type 2 devices are
4570 	 * cardbus bridges which also require special treatment.
4571 	 * Other types are unknown, and we err on the side of safety
4572 	 * by ignoring them.
4573 	 */
4574 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4575 		return;
4576 
4577 	/*
4578 	 * Restore the device to full power mode.  We must do this
4579 	 * before we restore the registers because moving from D3 to
4580 	 * D0 will cause the chip's BARs and some other registers to
4581 	 * be reset to some unknown power on reset values.  Cut down
4582 	 * the noise on boot by doing nothing if we are already in
4583 	 * state D0.
4584 	 */
4585 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4586 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4587 	pci_restore_bars(dev);
4588 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4589 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4590 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4591 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4592 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4593 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4594 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4595 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4596 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4597 
4598 	/*
4599 	 * Restore extended capabilities for PCI-Express and PCI-X
4600 	 */
4601 	if (dinfo->cfg.pcie.pcie_location != 0)
4602 		pci_cfg_restore_pcie(dev, dinfo);
4603 	if (dinfo->cfg.pcix.pcix_location != 0)
4604 		pci_cfg_restore_pcix(dev, dinfo);
4605 
4606 	/* Restore MSI and MSI-X configurations if they are present. */
4607 	if (dinfo->cfg.msi.msi_location != 0)
4608 		pci_resume_msi(dev);
4609 	if (dinfo->cfg.msix.msix_location != 0)
4610 		pci_resume_msix(dev);
4611 }
4612 
4613 static void
4614 pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
4615 {
4616 #define	RREG(n)	pci_read_config(dev, pos + (n), 2)
4617 	struct pcicfg_pcie *cfg;
4618 	int version, pos;
4619 
4620 	cfg = &dinfo->cfg.pcie;
4621 	pos = cfg->pcie_location;
4622 
4623 	cfg->pcie_flags = RREG(PCIER_FLAGS);
4624 
4625 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4626 
4627 	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
4628 
4629 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4630 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4631 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4632 		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
4633 
4634 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4635 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
4636 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
4637 		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
4638 
4639 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4640 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
4641 		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
4642 
4643 	if (version > 1) {
4644 		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
4645 		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
4646 		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
4647 	}
4648 #undef RREG
4649 }
4650 
4651 static void
4652 pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
4653 {
4654 	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
4655 	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
4656 }
4657 
4658 void
4659 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4660 {
4661 	uint32_t cls;
4662 	int ps;
4663 
4664 	/*
4665 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4666 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4667 	 * which also require special treatment.  Other types are unknown, and
4668 	 * we err on the side of safety by ignoring them.  Powering down
4669 	 * bridges should not be undertaken lightly.
4670 	 */
4671 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4672 		return;
4673 
4674 	/*
4675 	 * Some drivers apparently write to these registers w/o updating our
4676 	 * cached copy.  No harm happens if we update the copy, so do so here
4677 	 * so we can restore them.  The COMMAND register is modified by the
4678 	 * bus w/o updating the cache.  This should represent the normally
4679 	 * writable portion of the 'defined' part of type 0 headers.  In
4680 	 * theory we also need to save/restore the PCI capability structures
4681 	 * we know about, but apart from power we don't know any that are
4682 	 * writable.
4683 	 */
4684 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4685 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4686 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4687 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4688 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4689 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4690 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4691 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4692 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4693 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4694 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4695 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4696 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4697 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4698 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4699 
4700 	if (dinfo->cfg.pcie.pcie_location != 0)
4701 		pci_cfg_save_pcie(dev, dinfo);
4702 
4703 	if (dinfo->cfg.pcix.pcix_location != 0)
4704 		pci_cfg_save_pcix(dev, dinfo);
4705 
4706 	/*
4707 	 * don't set the state for display devices, base peripherals and
4708 	 * memory devices since bad things happen when they are powered down.
4709 	 * We should (a) have drivers that can easily detach and (b) use
4710 	 * generic drivers for these devices so that some device actually
4711 	 * attaches.  We need to make sure that when we implement (a) we don't
4712 	 * power the device down on a reattach.
4713 	 */
4714 	cls = pci_get_class(dev);
4715 	if (!setstate)
4716 		return;
4717 	switch (pci_do_power_nodriver)
4718 	{
4719 		case 0:		/* NO powerdown at all */
4720 			return;
4721 		case 1:		/* Conservative about what to power down */
4722 			if (cls == PCIC_STORAGE)
4723 				return;
4724 			/*FALLTHROUGH*/
4725 		case 2:		/* Agressive about what to power down */
4726 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4727 			    cls == PCIC_BASEPERIPH)
4728 				return;
4729 			/*FALLTHROUGH*/
4730 		case 3:		/* Power down everything */
4731 			break;
4732 	}
4733 	/*
4734 	 * PCI spec says we can only go into D3 state from D0 state.
4735 	 * Transition from D[12] into D0 before going to D3 state.
4736 	 */
4737 	ps = pci_get_powerstate(dev);
4738 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4739 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4740 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4741 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4742 }
4743 
4744 /* Wrapper APIs suitable for device driver use. */
4745 void
4746 pci_save_state(device_t dev)
4747 {
4748 	struct pci_devinfo *dinfo;
4749 
4750 	dinfo = device_get_ivars(dev);
4751 	pci_cfg_save(dev, dinfo, 0);
4752 }
4753 
4754 void
4755 pci_restore_state(device_t dev)
4756 {
4757 	struct pci_devinfo *dinfo;
4758 
4759 	dinfo = device_get_ivars(dev);
4760 	pci_cfg_restore(dev, dinfo);
4761 }
4762