xref: /freebsd/sys/dev/pci/pci.c (revision 595e514d0df2bac5b813d35f83e32875dbf16a83)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 
56 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57 #include <machine/intr_machdep.h>
58 #endif
59 
60 #include <sys/pciio.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/pci/pci_private.h>
64 
65 #include <dev/usb/controller/xhcireg.h>
66 #include <dev/usb/controller/ehcireg.h>
67 #include <dev/usb/controller/ohcireg.h>
68 #include <dev/usb/controller/uhcireg.h>
69 
70 #include "pcib_if.h"
71 #include "pci_if.h"
72 
73 #if (BUS_SPACE_MAXADDR > 0xFFFFFFFF)
74 #define	PCI_DMA_BOUNDARY	0x100000000
75 #endif
76 
77 #define	PCIR_IS_BIOS(cfg, reg)						\
78 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
79 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
80 
81 static pci_addr_t	pci_mapbase(uint64_t mapreg);
82 static const char	*pci_maptype(uint64_t mapreg);
83 static int		pci_mapsize(uint64_t testval);
84 static int		pci_maprange(uint64_t mapreg);
85 static pci_addr_t	pci_rombase(uint64_t mapreg);
86 static int		pci_romsize(uint64_t testval);
87 static void		pci_fixancient(pcicfgregs *cfg);
88 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
89 
90 static int		pci_porten(device_t dev);
91 static int		pci_memen(device_t dev);
92 static void		pci_assign_interrupt(device_t bus, device_t dev,
93 			    int force_route);
94 static int		pci_add_map(device_t bus, device_t dev, int reg,
95 			    struct resource_list *rl, int force, int prefetch);
96 static int		pci_probe(device_t dev);
97 static int		pci_attach(device_t dev);
98 static void		pci_load_vendor_data(void);
99 static int		pci_describe_parse_line(char **ptr, int *vendor,
100 			    int *device, char **desc);
101 static char		*pci_describe_device(device_t dev);
102 static bus_dma_tag_t	pci_get_dma_tag(device_t bus, device_t dev);
103 static int		pci_modevent(module_t mod, int what, void *arg);
104 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
105 			    pcicfgregs *cfg);
106 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
107 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
108 			    int reg, uint32_t *data);
109 #if 0
110 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
111 			    int reg, uint32_t data);
112 #endif
113 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
114 static void		pci_disable_msi(device_t dev);
115 static void		pci_enable_msi(device_t dev, uint64_t address,
116 			    uint16_t data);
117 static void		pci_enable_msix(device_t dev, u_int index,
118 			    uint64_t address, uint32_t data);
119 static void		pci_mask_msix(device_t dev, u_int index);
120 static void		pci_unmask_msix(device_t dev, u_int index);
121 static int		pci_msi_blacklisted(void);
122 static void		pci_resume_msi(device_t dev);
123 static void		pci_resume_msix(device_t dev);
124 static int		pci_remap_intr_method(device_t bus, device_t dev,
125 			    u_int irq);
126 
127 static device_method_t pci_methods[] = {
128 	/* Device interface */
129 	DEVMETHOD(device_probe,		pci_probe),
130 	DEVMETHOD(device_attach,	pci_attach),
131 	DEVMETHOD(device_detach,	bus_generic_detach),
132 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
133 	DEVMETHOD(device_suspend,	pci_suspend),
134 	DEVMETHOD(device_resume,	pci_resume),
135 
136 	/* Bus interface */
137 	DEVMETHOD(bus_print_child,	pci_print_child),
138 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
139 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
140 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
141 	DEVMETHOD(bus_driver_added,	pci_driver_added),
142 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
143 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
144 
145 	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
146 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
147 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
148 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
149 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
150 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
151 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
152 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
153 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
154 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
155 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
156 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
157 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
158 
159 	/* PCI interface */
160 	DEVMETHOD(pci_read_config,	pci_read_config_method),
161 	DEVMETHOD(pci_write_config,	pci_write_config_method),
162 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
163 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
164 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
165 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
166 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
167 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
168 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
169 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
170 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
171 	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
172 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
173 	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
174 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
175 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
176 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
177 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
178 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
179 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
180 
181 	DEVMETHOD_END
182 };
183 
184 DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
185 
186 static devclass_t pci_devclass;
187 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
188 MODULE_VERSION(pci, 1);
189 
190 static char	*pci_vendordata;
191 static size_t	pci_vendordata_size;
192 
193 struct pci_quirk {
194 	uint32_t devid;	/* Vendor/device of the card */
195 	int	type;
196 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
197 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
198 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
199 #define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
200 	int	arg1;
201 	int	arg2;
202 };
203 
204 static const struct pci_quirk pci_quirks[] = {
205 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
206 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
207 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
208 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
209 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
210 
211 	/*
212 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
213 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
214 	 */
215 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
216 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217 
218 	/*
219 	 * MSI doesn't work on earlier Intel chipsets including
220 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
221 	 */
222 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
223 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
224 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
225 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
226 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
227 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
228 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
229 
230 	/*
231 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
232 	 * bridge.
233 	 */
234 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
235 
236 	/*
237 	 * MSI-X allocation doesn't work properly for devices passed through
238 	 * by VMware up to at least ESXi 5.1.
239 	 */
240 	{ 0x079015ad, PCI_QUIRK_DISABLE_MSI,	0,	0 }, /* PCI/PCI-X */
241 	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSI,	0,	0 }, /* PCIe */
242 
243 	/*
244 	 * Some virtualization environments emulate an older chipset
245 	 * but support MSI just fine.  QEMU uses the Intel 82440.
246 	 */
247 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
248 
249 	/*
250 	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
251 	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
252 	 * It prevents us from attaching hpet(4) when the bit is unset.
253 	 * Note this quirk only affects SB600 revision A13 and earlier.
254 	 * For SB600 A21 and later, firmware must set the bit to hide it.
255 	 * For SB700 and later, it is unused and hardcoded to zero.
256 	 */
257 	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
258 
259 	{ 0 }
260 };
261 
262 /* map register information */
263 #define	PCI_MAPMEM	0x01	/* memory map */
264 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
265 #define	PCI_MAPPORT	0x04	/* port map */
266 
267 struct devlist pci_devq;
268 uint32_t pci_generation;
269 uint32_t pci_numdevs = 0;
270 static int pcie_chipset, pcix_chipset;
271 
272 /* sysctl vars */
273 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
274 
275 static int pci_enable_io_modes = 1;
276 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
277 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
278     &pci_enable_io_modes, 1,
279     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
280 enable these bits correctly.  We'd like to do this all the time, but there\n\
281 are some peripherals that this causes problems with.");
282 
283 static int pci_do_realloc_bars = 1;
284 TUNABLE_INT("hw.pci.realloc_bars", &pci_do_realloc_bars);
285 SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RW,
286     &pci_do_realloc_bars, 0,
287     "Attempt to allocate a new range for any BARs whose original firmware-assigned ranges fail to allocate during the initial device scan.");
288 
289 static int pci_do_power_nodriver = 0;
290 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
291 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
292     &pci_do_power_nodriver, 0,
293   "Place a function into D3 state when no driver attaches to it.  0 means\n\
294 disable.  1 means conservatively place devices into D3 state.  2 means\n\
295 agressively place devices into D3 state.  3 means put absolutely everything\n\
296 in D3 state.");
297 
298 int pci_do_power_resume = 1;
299 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
300 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
301     &pci_do_power_resume, 1,
302   "Transition from D3 -> D0 on resume.");
303 
304 int pci_do_power_suspend = 1;
305 TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
306 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
307     &pci_do_power_suspend, 1,
308   "Transition from D0 -> D3 on suspend.");
309 
310 static int pci_do_msi = 1;
311 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
312 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
313     "Enable support for MSI interrupts");
314 
315 static int pci_do_msix = 1;
316 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
317 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
318     "Enable support for MSI-X interrupts");
319 
320 static int pci_honor_msi_blacklist = 1;
321 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
322 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
323     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
324 
325 #if defined(__i386__) || defined(__amd64__)
326 static int pci_usb_takeover = 1;
327 #else
328 static int pci_usb_takeover = 0;
329 #endif
330 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
331 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
332     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
333 Disable this if you depend on BIOS emulation of USB devices, that is\n\
334 you use USB devices (like keyboard or mouse) but do not load USB drivers");
335 
336 /* Find a device_t by bus/slot/function in domain 0 */
337 
338 device_t
339 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
340 {
341 
342 	return (pci_find_dbsf(0, bus, slot, func));
343 }
344 
345 /* Find a device_t by domain/bus/slot/function */
346 
347 device_t
348 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
349 {
350 	struct pci_devinfo *dinfo;
351 
352 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
353 		if ((dinfo->cfg.domain == domain) &&
354 		    (dinfo->cfg.bus == bus) &&
355 		    (dinfo->cfg.slot == slot) &&
356 		    (dinfo->cfg.func == func)) {
357 			return (dinfo->cfg.dev);
358 		}
359 	}
360 
361 	return (NULL);
362 }
363 
364 /* Find a device_t by vendor/device ID */
365 
366 device_t
367 pci_find_device(uint16_t vendor, uint16_t device)
368 {
369 	struct pci_devinfo *dinfo;
370 
371 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
372 		if ((dinfo->cfg.vendor == vendor) &&
373 		    (dinfo->cfg.device == device)) {
374 			return (dinfo->cfg.dev);
375 		}
376 	}
377 
378 	return (NULL);
379 }
380 
381 device_t
382 pci_find_class(uint8_t class, uint8_t subclass)
383 {
384 	struct pci_devinfo *dinfo;
385 
386 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
387 		if (dinfo->cfg.baseclass == class &&
388 		    dinfo->cfg.subclass == subclass) {
389 			return (dinfo->cfg.dev);
390 		}
391 	}
392 
393 	return (NULL);
394 }
395 
396 static int
397 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
398 {
399 	va_list ap;
400 	int retval;
401 
402 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
403 	    cfg->func);
404 	va_start(ap, fmt);
405 	retval += vprintf(fmt, ap);
406 	va_end(ap);
407 	return (retval);
408 }
409 
410 /* return base address of memory or port map */
411 
412 static pci_addr_t
413 pci_mapbase(uint64_t mapreg)
414 {
415 
416 	if (PCI_BAR_MEM(mapreg))
417 		return (mapreg & PCIM_BAR_MEM_BASE);
418 	else
419 		return (mapreg & PCIM_BAR_IO_BASE);
420 }
421 
422 /* return map type of memory or port map */
423 
424 static const char *
425 pci_maptype(uint64_t mapreg)
426 {
427 
428 	if (PCI_BAR_IO(mapreg))
429 		return ("I/O Port");
430 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
431 		return ("Prefetchable Memory");
432 	return ("Memory");
433 }
434 
435 /* return log2 of map size decoded for memory or port map */
436 
437 static int
438 pci_mapsize(uint64_t testval)
439 {
440 	int ln2size;
441 
442 	testval = pci_mapbase(testval);
443 	ln2size = 0;
444 	if (testval != 0) {
445 		while ((testval & 1) == 0)
446 		{
447 			ln2size++;
448 			testval >>= 1;
449 		}
450 	}
451 	return (ln2size);
452 }
453 
454 /* return base address of device ROM */
455 
456 static pci_addr_t
457 pci_rombase(uint64_t mapreg)
458 {
459 
460 	return (mapreg & PCIM_BIOS_ADDR_MASK);
461 }
462 
463 /* return log2 of map size decided for device ROM */
464 
465 static int
466 pci_romsize(uint64_t testval)
467 {
468 	int ln2size;
469 
470 	testval = pci_rombase(testval);
471 	ln2size = 0;
472 	if (testval != 0) {
473 		while ((testval & 1) == 0)
474 		{
475 			ln2size++;
476 			testval >>= 1;
477 		}
478 	}
479 	return (ln2size);
480 }
481 
482 /* return log2 of address range supported by map register */
483 
484 static int
485 pci_maprange(uint64_t mapreg)
486 {
487 	int ln2range = 0;
488 
489 	if (PCI_BAR_IO(mapreg))
490 		ln2range = 32;
491 	else
492 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
493 		case PCIM_BAR_MEM_32:
494 			ln2range = 32;
495 			break;
496 		case PCIM_BAR_MEM_1MB:
497 			ln2range = 20;
498 			break;
499 		case PCIM_BAR_MEM_64:
500 			ln2range = 64;
501 			break;
502 		}
503 	return (ln2range);
504 }
505 
506 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
507 
508 static void
509 pci_fixancient(pcicfgregs *cfg)
510 {
511 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
512 		return;
513 
514 	/* PCI to PCI bridges use header type 1 */
515 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
516 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
517 }
518 
519 /* extract header type specific config data */
520 
521 static void
522 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
523 {
524 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
525 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
526 	case PCIM_HDRTYPE_NORMAL:
527 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
528 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
529 		cfg->nummaps	    = PCI_MAXMAPS_0;
530 		break;
531 	case PCIM_HDRTYPE_BRIDGE:
532 		cfg->nummaps	    = PCI_MAXMAPS_1;
533 		break;
534 	case PCIM_HDRTYPE_CARDBUS:
535 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
536 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
537 		cfg->nummaps	    = PCI_MAXMAPS_2;
538 		break;
539 	}
540 #undef REG
541 }
542 
543 /* read configuration header into pcicfgregs structure */
544 struct pci_devinfo *
545 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
546 {
547 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
548 	pcicfgregs *cfg = NULL;
549 	struct pci_devinfo *devlist_entry;
550 	struct devlist *devlist_head;
551 
552 	devlist_head = &pci_devq;
553 
554 	devlist_entry = NULL;
555 
556 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
557 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
558 		if (devlist_entry == NULL)
559 			return (NULL);
560 
561 		cfg = &devlist_entry->cfg;
562 
563 		cfg->domain		= d;
564 		cfg->bus		= b;
565 		cfg->slot		= s;
566 		cfg->func		= f;
567 		cfg->vendor		= REG(PCIR_VENDOR, 2);
568 		cfg->device		= REG(PCIR_DEVICE, 2);
569 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
570 		cfg->statreg		= REG(PCIR_STATUS, 2);
571 		cfg->baseclass		= REG(PCIR_CLASS, 1);
572 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
573 		cfg->progif		= REG(PCIR_PROGIF, 1);
574 		cfg->revid		= REG(PCIR_REVID, 1);
575 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
576 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
577 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
578 		cfg->intpin		= REG(PCIR_INTPIN, 1);
579 		cfg->intline		= REG(PCIR_INTLINE, 1);
580 
581 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
582 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
583 
584 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
585 		cfg->hdrtype		&= ~PCIM_MFDEV;
586 		STAILQ_INIT(&cfg->maps);
587 
588 		pci_fixancient(cfg);
589 		pci_hdrtypedata(pcib, b, s, f, cfg);
590 
591 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
592 			pci_read_cap(pcib, cfg);
593 
594 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
595 
596 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
597 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
598 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
599 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
600 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
601 
602 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
603 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
604 		devlist_entry->conf.pc_vendor = cfg->vendor;
605 		devlist_entry->conf.pc_device = cfg->device;
606 
607 		devlist_entry->conf.pc_class = cfg->baseclass;
608 		devlist_entry->conf.pc_subclass = cfg->subclass;
609 		devlist_entry->conf.pc_progif = cfg->progif;
610 		devlist_entry->conf.pc_revid = cfg->revid;
611 
612 		pci_numdevs++;
613 		pci_generation++;
614 	}
615 	return (devlist_entry);
616 #undef REG
617 }
618 
619 static void
620 pci_read_cap(device_t pcib, pcicfgregs *cfg)
621 {
622 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
623 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
624 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
625 	uint64_t addr;
626 #endif
627 	uint32_t val;
628 	int	ptr, nextptr, ptrptr;
629 
630 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
631 	case PCIM_HDRTYPE_NORMAL:
632 	case PCIM_HDRTYPE_BRIDGE:
633 		ptrptr = PCIR_CAP_PTR;
634 		break;
635 	case PCIM_HDRTYPE_CARDBUS:
636 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
637 		break;
638 	default:
639 		return;		/* no extended capabilities support */
640 	}
641 	nextptr = REG(ptrptr, 1);	/* sanity check? */
642 
643 	/*
644 	 * Read capability entries.
645 	 */
646 	while (nextptr != 0) {
647 		/* Sanity check */
648 		if (nextptr > 255) {
649 			printf("illegal PCI extended capability offset %d\n",
650 			    nextptr);
651 			return;
652 		}
653 		/* Find the next entry */
654 		ptr = nextptr;
655 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
656 
657 		/* Process this entry */
658 		switch (REG(ptr + PCICAP_ID, 1)) {
659 		case PCIY_PMG:		/* PCI power management */
660 			if (cfg->pp.pp_cap == 0) {
661 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
662 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
663 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
664 				if ((nextptr - ptr) > PCIR_POWER_DATA)
665 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
666 			}
667 			break;
668 		case PCIY_HT:		/* HyperTransport */
669 			/* Determine HT-specific capability type. */
670 			val = REG(ptr + PCIR_HT_COMMAND, 2);
671 
672 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
673 				cfg->ht.ht_slave = ptr;
674 
675 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
676 			switch (val & PCIM_HTCMD_CAP_MASK) {
677 			case PCIM_HTCAP_MSI_MAPPING:
678 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
679 					/* Sanity check the mapping window. */
680 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
681 					    4);
682 					addr <<= 32;
683 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
684 					    4);
685 					if (addr != MSI_INTEL_ADDR_BASE)
686 						device_printf(pcib,
687 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
688 						    cfg->domain, cfg->bus,
689 						    cfg->slot, cfg->func,
690 						    (long long)addr);
691 				} else
692 					addr = MSI_INTEL_ADDR_BASE;
693 
694 				cfg->ht.ht_msimap = ptr;
695 				cfg->ht.ht_msictrl = val;
696 				cfg->ht.ht_msiaddr = addr;
697 				break;
698 			}
699 #endif
700 			break;
701 		case PCIY_MSI:		/* PCI MSI */
702 			cfg->msi.msi_location = ptr;
703 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
704 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
705 						     PCIM_MSICTRL_MMC_MASK)>>1);
706 			break;
707 		case PCIY_MSIX:		/* PCI MSI-X */
708 			cfg->msix.msix_location = ptr;
709 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
710 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
711 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
712 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
713 			cfg->msix.msix_table_bar = PCIR_BAR(val &
714 			    PCIM_MSIX_BIR_MASK);
715 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
716 			val = REG(ptr + PCIR_MSIX_PBA, 4);
717 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
718 			    PCIM_MSIX_BIR_MASK);
719 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
720 			break;
721 		case PCIY_VPD:		/* PCI Vital Product Data */
722 			cfg->vpd.vpd_reg = ptr;
723 			break;
724 		case PCIY_SUBVENDOR:
725 			/* Should always be true. */
726 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
727 			    PCIM_HDRTYPE_BRIDGE) {
728 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
729 				cfg->subvendor = val & 0xffff;
730 				cfg->subdevice = val >> 16;
731 			}
732 			break;
733 		case PCIY_PCIX:		/* PCI-X */
734 			/*
735 			 * Assume we have a PCI-X chipset if we have
736 			 * at least one PCI-PCI bridge with a PCI-X
737 			 * capability.  Note that some systems with
738 			 * PCI-express or HT chipsets might match on
739 			 * this check as well.
740 			 */
741 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
742 			    PCIM_HDRTYPE_BRIDGE)
743 				pcix_chipset = 1;
744 			cfg->pcix.pcix_location = ptr;
745 			break;
746 		case PCIY_EXPRESS:	/* PCI-express */
747 			/*
748 			 * Assume we have a PCI-express chipset if we have
749 			 * at least one PCI-express device.
750 			 */
751 			pcie_chipset = 1;
752 			cfg->pcie.pcie_location = ptr;
753 			val = REG(ptr + PCIER_FLAGS, 2);
754 			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
755 			break;
756 		default:
757 			break;
758 		}
759 	}
760 
761 #if defined(__powerpc__)
762 	/*
763 	 * Enable the MSI mapping window for all HyperTransport
764 	 * slaves.  PCI-PCI bridges have their windows enabled via
765 	 * PCIB_MAP_MSI().
766 	 */
767 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
768 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
769 		device_printf(pcib,
770 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
771 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
772 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
773 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
774 		     2);
775 	}
776 #endif
777 /* REG and WREG use carry through to next functions */
778 }
779 
780 /*
781  * PCI Vital Product Data
782  */
783 
784 #define	PCI_VPD_TIMEOUT		1000000
785 
786 static int
787 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
788 {
789 	int count = PCI_VPD_TIMEOUT;
790 
791 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
792 
793 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
794 
795 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
796 		if (--count < 0)
797 			return (ENXIO);
798 		DELAY(1);	/* limit looping */
799 	}
800 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
801 
802 	return (0);
803 }
804 
805 #if 0
806 static int
807 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
808 {
809 	int count = PCI_VPD_TIMEOUT;
810 
811 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
812 
813 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
814 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
815 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
816 		if (--count < 0)
817 			return (ENXIO);
818 		DELAY(1);	/* limit looping */
819 	}
820 
821 	return (0);
822 }
823 #endif
824 
825 #undef PCI_VPD_TIMEOUT
826 
827 struct vpd_readstate {
828 	device_t	pcib;
829 	pcicfgregs	*cfg;
830 	uint32_t	val;
831 	int		bytesinval;
832 	int		off;
833 	uint8_t		cksum;
834 };
835 
836 static int
837 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
838 {
839 	uint32_t reg;
840 	uint8_t byte;
841 
842 	if (vrs->bytesinval == 0) {
843 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
844 			return (ENXIO);
845 		vrs->val = le32toh(reg);
846 		vrs->off += 4;
847 		byte = vrs->val & 0xff;
848 		vrs->bytesinval = 3;
849 	} else {
850 		vrs->val = vrs->val >> 8;
851 		byte = vrs->val & 0xff;
852 		vrs->bytesinval--;
853 	}
854 
855 	vrs->cksum += byte;
856 	*data = byte;
857 	return (0);
858 }
859 
860 static void
861 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
862 {
863 	struct vpd_readstate vrs;
864 	int state;
865 	int name;
866 	int remain;
867 	int i;
868 	int alloc, off;		/* alloc/off for RO/W arrays */
869 	int cksumvalid;
870 	int dflen;
871 	uint8_t byte;
872 	uint8_t byte2;
873 
874 	/* init vpd reader */
875 	vrs.bytesinval = 0;
876 	vrs.off = 0;
877 	vrs.pcib = pcib;
878 	vrs.cfg = cfg;
879 	vrs.cksum = 0;
880 
881 	state = 0;
882 	name = remain = i = 0;	/* shut up stupid gcc */
883 	alloc = off = 0;	/* shut up stupid gcc */
884 	dflen = 0;		/* shut up stupid gcc */
885 	cksumvalid = -1;
886 	while (state >= 0) {
887 		if (vpd_nextbyte(&vrs, &byte)) {
888 			state = -2;
889 			break;
890 		}
891 #if 0
892 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
893 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
894 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
895 #endif
896 		switch (state) {
897 		case 0:		/* item name */
898 			if (byte & 0x80) {
899 				if (vpd_nextbyte(&vrs, &byte2)) {
900 					state = -2;
901 					break;
902 				}
903 				remain = byte2;
904 				if (vpd_nextbyte(&vrs, &byte2)) {
905 					state = -2;
906 					break;
907 				}
908 				remain |= byte2 << 8;
909 				if (remain > (0x7f*4 - vrs.off)) {
910 					state = -1;
911 					pci_printf(cfg,
912 					    "invalid VPD data, remain %#x\n",
913 					    remain);
914 				}
915 				name = byte & 0x7f;
916 			} else {
917 				remain = byte & 0x7;
918 				name = (byte >> 3) & 0xf;
919 			}
920 			switch (name) {
921 			case 0x2:	/* String */
922 				cfg->vpd.vpd_ident = malloc(remain + 1,
923 				    M_DEVBUF, M_WAITOK);
924 				i = 0;
925 				state = 1;
926 				break;
927 			case 0xf:	/* End */
928 				state = -1;
929 				break;
930 			case 0x10:	/* VPD-R */
931 				alloc = 8;
932 				off = 0;
933 				cfg->vpd.vpd_ros = malloc(alloc *
934 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
935 				    M_WAITOK | M_ZERO);
936 				state = 2;
937 				break;
938 			case 0x11:	/* VPD-W */
939 				alloc = 8;
940 				off = 0;
941 				cfg->vpd.vpd_w = malloc(alloc *
942 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
943 				    M_WAITOK | M_ZERO);
944 				state = 5;
945 				break;
946 			default:	/* Invalid data, abort */
947 				state = -1;
948 				break;
949 			}
950 			break;
951 
952 		case 1:	/* Identifier String */
953 			cfg->vpd.vpd_ident[i++] = byte;
954 			remain--;
955 			if (remain == 0)  {
956 				cfg->vpd.vpd_ident[i] = '\0';
957 				state = 0;
958 			}
959 			break;
960 
961 		case 2:	/* VPD-R Keyword Header */
962 			if (off == alloc) {
963 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
964 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
965 				    M_DEVBUF, M_WAITOK | M_ZERO);
966 			}
967 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
968 			if (vpd_nextbyte(&vrs, &byte2)) {
969 				state = -2;
970 				break;
971 			}
972 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
973 			if (vpd_nextbyte(&vrs, &byte2)) {
974 				state = -2;
975 				break;
976 			}
977 			dflen = byte2;
978 			if (dflen == 0 &&
979 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
980 			    2) == 0) {
981 				/*
982 				 * if this happens, we can't trust the rest
983 				 * of the VPD.
984 				 */
985 				pci_printf(cfg, "bad keyword length: %d\n",
986 				    dflen);
987 				cksumvalid = 0;
988 				state = -1;
989 				break;
990 			} else if (dflen == 0) {
991 				cfg->vpd.vpd_ros[off].value = malloc(1 *
992 				    sizeof(*cfg->vpd.vpd_ros[off].value),
993 				    M_DEVBUF, M_WAITOK);
994 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
995 			} else
996 				cfg->vpd.vpd_ros[off].value = malloc(
997 				    (dflen + 1) *
998 				    sizeof(*cfg->vpd.vpd_ros[off].value),
999 				    M_DEVBUF, M_WAITOK);
1000 			remain -= 3;
1001 			i = 0;
1002 			/* keep in sync w/ state 3's transistions */
1003 			if (dflen == 0 && remain == 0)
1004 				state = 0;
1005 			else if (dflen == 0)
1006 				state = 2;
1007 			else
1008 				state = 3;
1009 			break;
1010 
1011 		case 3:	/* VPD-R Keyword Value */
1012 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1013 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1014 			    "RV", 2) == 0 && cksumvalid == -1) {
1015 				if (vrs.cksum == 0)
1016 					cksumvalid = 1;
1017 				else {
1018 					if (bootverbose)
1019 						pci_printf(cfg,
1020 					    "bad VPD cksum, remain %hhu\n",
1021 						    vrs.cksum);
1022 					cksumvalid = 0;
1023 					state = -1;
1024 					break;
1025 				}
1026 			}
1027 			dflen--;
1028 			remain--;
1029 			/* keep in sync w/ state 2's transistions */
1030 			if (dflen == 0)
1031 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1032 			if (dflen == 0 && remain == 0) {
1033 				cfg->vpd.vpd_rocnt = off;
1034 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1035 				    off * sizeof(*cfg->vpd.vpd_ros),
1036 				    M_DEVBUF, M_WAITOK | M_ZERO);
1037 				state = 0;
1038 			} else if (dflen == 0)
1039 				state = 2;
1040 			break;
1041 
1042 		case 4:
1043 			remain--;
1044 			if (remain == 0)
1045 				state = 0;
1046 			break;
1047 
1048 		case 5:	/* VPD-W Keyword Header */
1049 			if (off == alloc) {
1050 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1051 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1052 				    M_DEVBUF, M_WAITOK | M_ZERO);
1053 			}
1054 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1055 			if (vpd_nextbyte(&vrs, &byte2)) {
1056 				state = -2;
1057 				break;
1058 			}
1059 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1060 			if (vpd_nextbyte(&vrs, &byte2)) {
1061 				state = -2;
1062 				break;
1063 			}
1064 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1065 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1066 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1067 			    sizeof(*cfg->vpd.vpd_w[off].value),
1068 			    M_DEVBUF, M_WAITOK);
1069 			remain -= 3;
1070 			i = 0;
1071 			/* keep in sync w/ state 6's transistions */
1072 			if (dflen == 0 && remain == 0)
1073 				state = 0;
1074 			else if (dflen == 0)
1075 				state = 5;
1076 			else
1077 				state = 6;
1078 			break;
1079 
1080 		case 6:	/* VPD-W Keyword Value */
1081 			cfg->vpd.vpd_w[off].value[i++] = byte;
1082 			dflen--;
1083 			remain--;
1084 			/* keep in sync w/ state 5's transistions */
1085 			if (dflen == 0)
1086 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1087 			if (dflen == 0 && remain == 0) {
1088 				cfg->vpd.vpd_wcnt = off;
1089 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1090 				    off * sizeof(*cfg->vpd.vpd_w),
1091 				    M_DEVBUF, M_WAITOK | M_ZERO);
1092 				state = 0;
1093 			} else if (dflen == 0)
1094 				state = 5;
1095 			break;
1096 
1097 		default:
1098 			pci_printf(cfg, "invalid state: %d\n", state);
1099 			state = -1;
1100 			break;
1101 		}
1102 	}
1103 
1104 	if (cksumvalid == 0 || state < -1) {
1105 		/* read-only data bad, clean up */
1106 		if (cfg->vpd.vpd_ros != NULL) {
1107 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1108 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1109 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1110 			cfg->vpd.vpd_ros = NULL;
1111 		}
1112 	}
1113 	if (state < -1) {
1114 		/* I/O error, clean up */
1115 		pci_printf(cfg, "failed to read VPD data.\n");
1116 		if (cfg->vpd.vpd_ident != NULL) {
1117 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1118 			cfg->vpd.vpd_ident = NULL;
1119 		}
1120 		if (cfg->vpd.vpd_w != NULL) {
1121 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1122 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1123 			free(cfg->vpd.vpd_w, M_DEVBUF);
1124 			cfg->vpd.vpd_w = NULL;
1125 		}
1126 	}
1127 	cfg->vpd.vpd_cached = 1;
1128 #undef REG
1129 #undef WREG
1130 }
1131 
1132 int
1133 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1134 {
1135 	struct pci_devinfo *dinfo = device_get_ivars(child);
1136 	pcicfgregs *cfg = &dinfo->cfg;
1137 
1138 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1139 		pci_read_vpd(device_get_parent(dev), cfg);
1140 
1141 	*identptr = cfg->vpd.vpd_ident;
1142 
1143 	if (*identptr == NULL)
1144 		return (ENXIO);
1145 
1146 	return (0);
1147 }
1148 
1149 int
1150 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1151 	const char **vptr)
1152 {
1153 	struct pci_devinfo *dinfo = device_get_ivars(child);
1154 	pcicfgregs *cfg = &dinfo->cfg;
1155 	int i;
1156 
1157 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1158 		pci_read_vpd(device_get_parent(dev), cfg);
1159 
1160 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1161 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1162 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1163 			*vptr = cfg->vpd.vpd_ros[i].value;
1164 			return (0);
1165 		}
1166 
1167 	*vptr = NULL;
1168 	return (ENXIO);
1169 }
1170 
1171 /*
1172  * Find the requested HyperTransport capability and return the offset
1173  * in configuration space via the pointer provided.  The function
1174  * returns 0 on success and an error code otherwise.
1175  */
1176 int
1177 pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1178 {
1179 	int ptr, error;
1180 	uint16_t val;
1181 
1182 	error = pci_find_cap(child, PCIY_HT, &ptr);
1183 	if (error)
1184 		return (error);
1185 
1186 	/*
1187 	 * Traverse the capabilities list checking each HT capability
1188 	 * to see if it matches the requested HT capability.
1189 	 */
1190 	while (ptr != 0) {
1191 		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1192 		if (capability == PCIM_HTCAP_SLAVE ||
1193 		    capability == PCIM_HTCAP_HOST)
1194 			val &= 0xe000;
1195 		else
1196 			val &= PCIM_HTCMD_CAP_MASK;
1197 		if (val == capability) {
1198 			if (capreg != NULL)
1199 				*capreg = ptr;
1200 			return (0);
1201 		}
1202 
1203 		/* Skip to the next HT capability. */
1204 		while (ptr != 0) {
1205 			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1206 			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1207 			    PCIY_HT)
1208 				break;
1209 		}
1210 	}
1211 	return (ENOENT);
1212 }
1213 
1214 /*
1215  * Find the requested capability and return the offset in
1216  * configuration space via the pointer provided.  The function returns
1217  * 0 on success and an error code otherwise.
1218  */
1219 int
1220 pci_find_cap_method(device_t dev, device_t child, int capability,
1221     int *capreg)
1222 {
1223 	struct pci_devinfo *dinfo = device_get_ivars(child);
1224 	pcicfgregs *cfg = &dinfo->cfg;
1225 	u_int32_t status;
1226 	u_int8_t ptr;
1227 
1228 	/*
1229 	 * Check the CAP_LIST bit of the PCI status register first.
1230 	 */
1231 	status = pci_read_config(child, PCIR_STATUS, 2);
1232 	if (!(status & PCIM_STATUS_CAPPRESENT))
1233 		return (ENXIO);
1234 
1235 	/*
1236 	 * Determine the start pointer of the capabilities list.
1237 	 */
1238 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1239 	case PCIM_HDRTYPE_NORMAL:
1240 	case PCIM_HDRTYPE_BRIDGE:
1241 		ptr = PCIR_CAP_PTR;
1242 		break;
1243 	case PCIM_HDRTYPE_CARDBUS:
1244 		ptr = PCIR_CAP_PTR_2;
1245 		break;
1246 	default:
1247 		/* XXX: panic? */
1248 		return (ENXIO);		/* no extended capabilities support */
1249 	}
1250 	ptr = pci_read_config(child, ptr, 1);
1251 
1252 	/*
1253 	 * Traverse the capabilities list.
1254 	 */
1255 	while (ptr != 0) {
1256 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1257 			if (capreg != NULL)
1258 				*capreg = ptr;
1259 			return (0);
1260 		}
1261 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1262 	}
1263 
1264 	return (ENOENT);
1265 }
1266 
1267 /*
1268  * Find the requested extended capability and return the offset in
1269  * configuration space via the pointer provided.  The function returns
1270  * 0 on success and an error code otherwise.
1271  */
1272 int
1273 pci_find_extcap_method(device_t dev, device_t child, int capability,
1274     int *capreg)
1275 {
1276 	struct pci_devinfo *dinfo = device_get_ivars(child);
1277 	pcicfgregs *cfg = &dinfo->cfg;
1278 	uint32_t ecap;
1279 	uint16_t ptr;
1280 
1281 	/* Only supported for PCI-express devices. */
1282 	if (cfg->pcie.pcie_location == 0)
1283 		return (ENXIO);
1284 
1285 	ptr = PCIR_EXTCAP;
1286 	ecap = pci_read_config(child, ptr, 4);
1287 	if (ecap == 0xffffffff || ecap == 0)
1288 		return (ENOENT);
1289 	for (;;) {
1290 		if (PCI_EXTCAP_ID(ecap) == capability) {
1291 			if (capreg != NULL)
1292 				*capreg = ptr;
1293 			return (0);
1294 		}
1295 		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1296 		if (ptr == 0)
1297 			break;
1298 		ecap = pci_read_config(child, ptr, 4);
1299 	}
1300 
1301 	return (ENOENT);
1302 }
1303 
1304 /*
1305  * Support for MSI-X message interrupts.
1306  */
1307 void
1308 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1309 {
1310 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1311 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1312 	uint32_t offset;
1313 
1314 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1315 	offset = msix->msix_table_offset + index * 16;
1316 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1317 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1318 	bus_write_4(msix->msix_table_res, offset + 8, data);
1319 
1320 	/* Enable MSI -> HT mapping. */
1321 	pci_ht_map_msi(dev, address);
1322 }
1323 
1324 void
1325 pci_mask_msix(device_t dev, u_int index)
1326 {
1327 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1328 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1329 	uint32_t offset, val;
1330 
1331 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1332 	offset = msix->msix_table_offset + index * 16 + 12;
1333 	val = bus_read_4(msix->msix_table_res, offset);
1334 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1335 		val |= PCIM_MSIX_VCTRL_MASK;
1336 		bus_write_4(msix->msix_table_res, offset, val);
1337 	}
1338 }
1339 
1340 void
1341 pci_unmask_msix(device_t dev, u_int index)
1342 {
1343 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1344 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1345 	uint32_t offset, val;
1346 
1347 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1348 	offset = msix->msix_table_offset + index * 16 + 12;
1349 	val = bus_read_4(msix->msix_table_res, offset);
1350 	if (val & PCIM_MSIX_VCTRL_MASK) {
1351 		val &= ~PCIM_MSIX_VCTRL_MASK;
1352 		bus_write_4(msix->msix_table_res, offset, val);
1353 	}
1354 }
1355 
1356 int
1357 pci_pending_msix(device_t dev, u_int index)
1358 {
1359 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1360 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1361 	uint32_t offset, bit;
1362 
1363 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1364 	offset = msix->msix_pba_offset + (index / 32) * 4;
1365 	bit = 1 << index % 32;
1366 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1367 }
1368 
1369 /*
1370  * Restore MSI-X registers and table during resume.  If MSI-X is
1371  * enabled then walk the virtual table to restore the actual MSI-X
1372  * table.
1373  */
1374 static void
1375 pci_resume_msix(device_t dev)
1376 {
1377 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1378 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1379 	struct msix_table_entry *mte;
1380 	struct msix_vector *mv;
1381 	int i;
1382 
1383 	if (msix->msix_alloc > 0) {
1384 		/* First, mask all vectors. */
1385 		for (i = 0; i < msix->msix_msgnum; i++)
1386 			pci_mask_msix(dev, i);
1387 
1388 		/* Second, program any messages with at least one handler. */
1389 		for (i = 0; i < msix->msix_table_len; i++) {
1390 			mte = &msix->msix_table[i];
1391 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1392 				continue;
1393 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1394 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1395 			pci_unmask_msix(dev, i);
1396 		}
1397 	}
1398 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1399 	    msix->msix_ctrl, 2);
1400 }
1401 
1402 /*
1403  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1404  * returned in *count.  After this function returns, each message will be
1405  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1406  */
1407 int
1408 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1409 {
1410 	struct pci_devinfo *dinfo = device_get_ivars(child);
1411 	pcicfgregs *cfg = &dinfo->cfg;
1412 	struct resource_list_entry *rle;
1413 	int actual, error, i, irq, max;
1414 
1415 	/* Don't let count == 0 get us into trouble. */
1416 	if (*count == 0)
1417 		return (EINVAL);
1418 
1419 	/* If rid 0 is allocated, then fail. */
1420 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1421 	if (rle != NULL && rle->res != NULL)
1422 		return (ENXIO);
1423 
1424 	/* Already have allocated messages? */
1425 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1426 		return (ENXIO);
1427 
1428 	/* If MSI is blacklisted for this system, fail. */
1429 	if (pci_msi_blacklisted())
1430 		return (ENXIO);
1431 
1432 	/* MSI-X capability present? */
1433 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1434 		return (ENODEV);
1435 
1436 	/* Make sure the appropriate BARs are mapped. */
1437 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1438 	    cfg->msix.msix_table_bar);
1439 	if (rle == NULL || rle->res == NULL ||
1440 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1441 		return (ENXIO);
1442 	cfg->msix.msix_table_res = rle->res;
1443 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1444 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1445 		    cfg->msix.msix_pba_bar);
1446 		if (rle == NULL || rle->res == NULL ||
1447 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1448 			return (ENXIO);
1449 	}
1450 	cfg->msix.msix_pba_res = rle->res;
1451 
1452 	if (bootverbose)
1453 		device_printf(child,
1454 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1455 		    *count, cfg->msix.msix_msgnum);
1456 	max = min(*count, cfg->msix.msix_msgnum);
1457 	for (i = 0; i < max; i++) {
1458 		/* Allocate a message. */
1459 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1460 		if (error) {
1461 			if (i == 0)
1462 				return (error);
1463 			break;
1464 		}
1465 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1466 		    irq, 1);
1467 	}
1468 	actual = i;
1469 
1470 	if (bootverbose) {
1471 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1472 		if (actual == 1)
1473 			device_printf(child, "using IRQ %lu for MSI-X\n",
1474 			    rle->start);
1475 		else {
1476 			int run;
1477 
1478 			/*
1479 			 * Be fancy and try to print contiguous runs of
1480 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1481 			 * 'run' is true if we are in a range.
1482 			 */
1483 			device_printf(child, "using IRQs %lu", rle->start);
1484 			irq = rle->start;
1485 			run = 0;
1486 			for (i = 1; i < actual; i++) {
1487 				rle = resource_list_find(&dinfo->resources,
1488 				    SYS_RES_IRQ, i + 1);
1489 
1490 				/* Still in a run? */
1491 				if (rle->start == irq + 1) {
1492 					run = 1;
1493 					irq++;
1494 					continue;
1495 				}
1496 
1497 				/* Finish previous range. */
1498 				if (run) {
1499 					printf("-%d", irq);
1500 					run = 0;
1501 				}
1502 
1503 				/* Start new range. */
1504 				printf(",%lu", rle->start);
1505 				irq = rle->start;
1506 			}
1507 
1508 			/* Unfinished range? */
1509 			if (run)
1510 				printf("-%d", irq);
1511 			printf(" for MSI-X\n");
1512 		}
1513 	}
1514 
1515 	/* Mask all vectors. */
1516 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1517 		pci_mask_msix(child, i);
1518 
1519 	/* Allocate and initialize vector data and virtual table. */
1520 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1521 	    M_DEVBUF, M_WAITOK | M_ZERO);
1522 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1523 	    M_DEVBUF, M_WAITOK | M_ZERO);
1524 	for (i = 0; i < actual; i++) {
1525 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1526 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1527 		cfg->msix.msix_table[i].mte_vector = i + 1;
1528 	}
1529 
1530 	/* Update control register to enable MSI-X. */
1531 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1532 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1533 	    cfg->msix.msix_ctrl, 2);
1534 
1535 	/* Update counts of alloc'd messages. */
1536 	cfg->msix.msix_alloc = actual;
1537 	cfg->msix.msix_table_len = actual;
1538 	*count = actual;
1539 	return (0);
1540 }
1541 
1542 /*
1543  * By default, pci_alloc_msix() will assign the allocated IRQ
1544  * resources consecutively to the first N messages in the MSI-X table.
1545  * However, device drivers may want to use different layouts if they
1546  * either receive fewer messages than they asked for, or they wish to
1547  * populate the MSI-X table sparsely.  This method allows the driver
1548  * to specify what layout it wants.  It must be called after a
1549  * successful pci_alloc_msix() but before any of the associated
1550  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1551  *
1552  * The 'vectors' array contains 'count' message vectors.  The array
1553  * maps directly to the MSI-X table in that index 0 in the array
1554  * specifies the vector for the first message in the MSI-X table, etc.
1555  * The vector value in each array index can either be 0 to indicate
1556  * that no vector should be assigned to a message slot, or it can be a
1557  * number from 1 to N (where N is the count returned from a
1558  * succcessful call to pci_alloc_msix()) to indicate which message
1559  * vector (IRQ) to be used for the corresponding message.
1560  *
1561  * On successful return, each message with a non-zero vector will have
1562  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1563  * 1.  Additionally, if any of the IRQs allocated via the previous
1564  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1565  * will be freed back to the system automatically.
1566  *
1567  * For example, suppose a driver has a MSI-X table with 6 messages and
1568  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1569  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1570  * C.  After the call to pci_alloc_msix(), the device will be setup to
1571  * have an MSI-X table of ABC--- (where - means no vector assigned).
1572  * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1573  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1574  * be freed back to the system.  This device will also have valid
1575  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1576  *
1577  * In any case, the SYS_RES_IRQ rid X will always map to the message
1578  * at MSI-X table index X - 1 and will only be valid if a vector is
1579  * assigned to that table entry.
1580  */
1581 int
1582 pci_remap_msix_method(device_t dev, device_t child, int count,
1583     const u_int *vectors)
1584 {
1585 	struct pci_devinfo *dinfo = device_get_ivars(child);
1586 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1587 	struct resource_list_entry *rle;
1588 	int i, irq, j, *used;
1589 
1590 	/*
1591 	 * Have to have at least one message in the table but the
1592 	 * table can't be bigger than the actual MSI-X table in the
1593 	 * device.
1594 	 */
1595 	if (count == 0 || count > msix->msix_msgnum)
1596 		return (EINVAL);
1597 
1598 	/* Sanity check the vectors. */
1599 	for (i = 0; i < count; i++)
1600 		if (vectors[i] > msix->msix_alloc)
1601 			return (EINVAL);
1602 
1603 	/*
1604 	 * Make sure there aren't any holes in the vectors to be used.
1605 	 * It's a big pain to support it, and it doesn't really make
1606 	 * sense anyway.  Also, at least one vector must be used.
1607 	 */
1608 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1609 	    M_ZERO);
1610 	for (i = 0; i < count; i++)
1611 		if (vectors[i] != 0)
1612 			used[vectors[i] - 1] = 1;
1613 	for (i = 0; i < msix->msix_alloc - 1; i++)
1614 		if (used[i] == 0 && used[i + 1] == 1) {
1615 			free(used, M_DEVBUF);
1616 			return (EINVAL);
1617 		}
1618 	if (used[0] != 1) {
1619 		free(used, M_DEVBUF);
1620 		return (EINVAL);
1621 	}
1622 
1623 	/* Make sure none of the resources are allocated. */
1624 	for (i = 0; i < msix->msix_table_len; i++) {
1625 		if (msix->msix_table[i].mte_vector == 0)
1626 			continue;
1627 		if (msix->msix_table[i].mte_handlers > 0)
1628 			return (EBUSY);
1629 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1630 		KASSERT(rle != NULL, ("missing resource"));
1631 		if (rle->res != NULL)
1632 			return (EBUSY);
1633 	}
1634 
1635 	/* Free the existing resource list entries. */
1636 	for (i = 0; i < msix->msix_table_len; i++) {
1637 		if (msix->msix_table[i].mte_vector == 0)
1638 			continue;
1639 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1640 	}
1641 
1642 	/*
1643 	 * Build the new virtual table keeping track of which vectors are
1644 	 * used.
1645 	 */
1646 	free(msix->msix_table, M_DEVBUF);
1647 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1648 	    M_DEVBUF, M_WAITOK | M_ZERO);
1649 	for (i = 0; i < count; i++)
1650 		msix->msix_table[i].mte_vector = vectors[i];
1651 	msix->msix_table_len = count;
1652 
1653 	/* Free any unused IRQs and resize the vectors array if necessary. */
1654 	j = msix->msix_alloc - 1;
1655 	if (used[j] == 0) {
1656 		struct msix_vector *vec;
1657 
1658 		while (used[j] == 0) {
1659 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1660 			    msix->msix_vectors[j].mv_irq);
1661 			j--;
1662 		}
1663 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1664 		    M_WAITOK);
1665 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1666 		    (j + 1));
1667 		free(msix->msix_vectors, M_DEVBUF);
1668 		msix->msix_vectors = vec;
1669 		msix->msix_alloc = j + 1;
1670 	}
1671 	free(used, M_DEVBUF);
1672 
1673 	/* Map the IRQs onto the rids. */
1674 	for (i = 0; i < count; i++) {
1675 		if (vectors[i] == 0)
1676 			continue;
1677 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1678 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1679 		    irq, 1);
1680 	}
1681 
1682 	if (bootverbose) {
1683 		device_printf(child, "Remapped MSI-X IRQs as: ");
1684 		for (i = 0; i < count; i++) {
1685 			if (i != 0)
1686 				printf(", ");
1687 			if (vectors[i] == 0)
1688 				printf("---");
1689 			else
1690 				printf("%d",
1691 				    msix->msix_vectors[vectors[i]].mv_irq);
1692 		}
1693 		printf("\n");
1694 	}
1695 
1696 	return (0);
1697 }
1698 
1699 static int
1700 pci_release_msix(device_t dev, device_t child)
1701 {
1702 	struct pci_devinfo *dinfo = device_get_ivars(child);
1703 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1704 	struct resource_list_entry *rle;
1705 	int i;
1706 
1707 	/* Do we have any messages to release? */
1708 	if (msix->msix_alloc == 0)
1709 		return (ENODEV);
1710 
1711 	/* Make sure none of the resources are allocated. */
1712 	for (i = 0; i < msix->msix_table_len; i++) {
1713 		if (msix->msix_table[i].mte_vector == 0)
1714 			continue;
1715 		if (msix->msix_table[i].mte_handlers > 0)
1716 			return (EBUSY);
1717 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1718 		KASSERT(rle != NULL, ("missing resource"));
1719 		if (rle->res != NULL)
1720 			return (EBUSY);
1721 	}
1722 
1723 	/* Update control register to disable MSI-X. */
1724 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1725 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1726 	    msix->msix_ctrl, 2);
1727 
1728 	/* Free the resource list entries. */
1729 	for (i = 0; i < msix->msix_table_len; i++) {
1730 		if (msix->msix_table[i].mte_vector == 0)
1731 			continue;
1732 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1733 	}
1734 	free(msix->msix_table, M_DEVBUF);
1735 	msix->msix_table_len = 0;
1736 
1737 	/* Release the IRQs. */
1738 	for (i = 0; i < msix->msix_alloc; i++)
1739 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1740 		    msix->msix_vectors[i].mv_irq);
1741 	free(msix->msix_vectors, M_DEVBUF);
1742 	msix->msix_alloc = 0;
1743 	return (0);
1744 }
1745 
1746 /*
1747  * Return the max supported MSI-X messages this device supports.
1748  * Basically, assuming the MD code can alloc messages, this function
1749  * should return the maximum value that pci_alloc_msix() can return.
1750  * Thus, it is subject to the tunables, etc.
1751  */
1752 int
1753 pci_msix_count_method(device_t dev, device_t child)
1754 {
1755 	struct pci_devinfo *dinfo = device_get_ivars(child);
1756 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1757 
1758 	if (pci_do_msix && msix->msix_location != 0)
1759 		return (msix->msix_msgnum);
1760 	return (0);
1761 }
1762 
1763 /*
1764  * HyperTransport MSI mapping control
1765  */
1766 void
1767 pci_ht_map_msi(device_t dev, uint64_t addr)
1768 {
1769 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1770 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1771 
1772 	if (!ht->ht_msimap)
1773 		return;
1774 
1775 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1776 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1777 		/* Enable MSI -> HT mapping. */
1778 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1779 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1780 		    ht->ht_msictrl, 2);
1781 	}
1782 
1783 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1784 		/* Disable MSI -> HT mapping. */
1785 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1786 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1787 		    ht->ht_msictrl, 2);
1788 	}
1789 }
1790 
1791 int
1792 pci_get_max_read_req(device_t dev)
1793 {
1794 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1795 	int cap;
1796 	uint16_t val;
1797 
1798 	cap = dinfo->cfg.pcie.pcie_location;
1799 	if (cap == 0)
1800 		return (0);
1801 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1802 	val &= PCIEM_CTL_MAX_READ_REQUEST;
1803 	val >>= 12;
1804 	return (1 << (val + 7));
1805 }
1806 
1807 int
1808 pci_set_max_read_req(device_t dev, int size)
1809 {
1810 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1811 	int cap;
1812 	uint16_t val;
1813 
1814 	cap = dinfo->cfg.pcie.pcie_location;
1815 	if (cap == 0)
1816 		return (0);
1817 	if (size < 128)
1818 		size = 128;
1819 	if (size > 4096)
1820 		size = 4096;
1821 	size = (1 << (fls(size) - 1));
1822 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1823 	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
1824 	val |= (fls(size) - 8) << 12;
1825 	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
1826 	return (size);
1827 }
1828 
1829 /*
1830  * Support for MSI message signalled interrupts.
1831  */
1832 void
1833 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1834 {
1835 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1836 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1837 
1838 	/* Write data and address values. */
1839 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1840 	    address & 0xffffffff, 4);
1841 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1842 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1843 		    address >> 32, 4);
1844 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1845 		    data, 2);
1846 	} else
1847 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1848 		    2);
1849 
1850 	/* Enable MSI in the control register. */
1851 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1852 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1853 	    2);
1854 
1855 	/* Enable MSI -> HT mapping. */
1856 	pci_ht_map_msi(dev, address);
1857 }
1858 
1859 void
1860 pci_disable_msi(device_t dev)
1861 {
1862 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1863 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1864 
1865 	/* Disable MSI -> HT mapping. */
1866 	pci_ht_map_msi(dev, 0);
1867 
1868 	/* Disable MSI in the control register. */
1869 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1870 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1871 	    2);
1872 }
1873 
1874 /*
1875  * Restore MSI registers during resume.  If MSI is enabled then
1876  * restore the data and address registers in addition to the control
1877  * register.
1878  */
1879 static void
1880 pci_resume_msi(device_t dev)
1881 {
1882 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1883 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1884 	uint64_t address;
1885 	uint16_t data;
1886 
1887 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1888 		address = msi->msi_addr;
1889 		data = msi->msi_data;
1890 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1891 		    address & 0xffffffff, 4);
1892 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1893 			pci_write_config(dev, msi->msi_location +
1894 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1895 			pci_write_config(dev, msi->msi_location +
1896 			    PCIR_MSI_DATA_64BIT, data, 2);
1897 		} else
1898 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1899 			    data, 2);
1900 	}
1901 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1902 	    2);
1903 }
1904 
1905 static int
1906 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1907 {
1908 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1909 	pcicfgregs *cfg = &dinfo->cfg;
1910 	struct resource_list_entry *rle;
1911 	struct msix_table_entry *mte;
1912 	struct msix_vector *mv;
1913 	uint64_t addr;
1914 	uint32_t data;
1915 	int error, i, j;
1916 
1917 	/*
1918 	 * Handle MSI first.  We try to find this IRQ among our list
1919 	 * of MSI IRQs.  If we find it, we request updated address and
1920 	 * data registers and apply the results.
1921 	 */
1922 	if (cfg->msi.msi_alloc > 0) {
1923 
1924 		/* If we don't have any active handlers, nothing to do. */
1925 		if (cfg->msi.msi_handlers == 0)
1926 			return (0);
1927 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1928 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1929 			    i + 1);
1930 			if (rle->start == irq) {
1931 				error = PCIB_MAP_MSI(device_get_parent(bus),
1932 				    dev, irq, &addr, &data);
1933 				if (error)
1934 					return (error);
1935 				pci_disable_msi(dev);
1936 				dinfo->cfg.msi.msi_addr = addr;
1937 				dinfo->cfg.msi.msi_data = data;
1938 				pci_enable_msi(dev, addr, data);
1939 				return (0);
1940 			}
1941 		}
1942 		return (ENOENT);
1943 	}
1944 
1945 	/*
1946 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1947 	 * we request the updated mapping info.  If that works, we go
1948 	 * through all the slots that use this IRQ and update them.
1949 	 */
1950 	if (cfg->msix.msix_alloc > 0) {
1951 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1952 			mv = &cfg->msix.msix_vectors[i];
1953 			if (mv->mv_irq == irq) {
1954 				error = PCIB_MAP_MSI(device_get_parent(bus),
1955 				    dev, irq, &addr, &data);
1956 				if (error)
1957 					return (error);
1958 				mv->mv_address = addr;
1959 				mv->mv_data = data;
1960 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1961 					mte = &cfg->msix.msix_table[j];
1962 					if (mte->mte_vector != i + 1)
1963 						continue;
1964 					if (mte->mte_handlers == 0)
1965 						continue;
1966 					pci_mask_msix(dev, j);
1967 					pci_enable_msix(dev, j, addr, data);
1968 					pci_unmask_msix(dev, j);
1969 				}
1970 			}
1971 		}
1972 		return (ENOENT);
1973 	}
1974 
1975 	return (ENOENT);
1976 }
1977 
1978 /*
1979  * Returns true if the specified device is blacklisted because MSI
1980  * doesn't work.
1981  */
1982 int
1983 pci_msi_device_blacklisted(device_t dev)
1984 {
1985 	const struct pci_quirk *q;
1986 
1987 	if (!pci_honor_msi_blacklist)
1988 		return (0);
1989 
1990 	for (q = &pci_quirks[0]; q->devid; q++) {
1991 		if (q->devid == pci_get_devid(dev) &&
1992 		    q->type == PCI_QUIRK_DISABLE_MSI)
1993 			return (1);
1994 	}
1995 	return (0);
1996 }
1997 
1998 /*
1999  * Returns true if a specified chipset supports MSI when it is
2000  * emulated hardware in a virtual machine.
2001  */
2002 static int
2003 pci_msi_vm_chipset(device_t dev)
2004 {
2005 	const struct pci_quirk *q;
2006 
2007 	for (q = &pci_quirks[0]; q->devid; q++) {
2008 		if (q->devid == pci_get_devid(dev) &&
2009 		    q->type == PCI_QUIRK_ENABLE_MSI_VM)
2010 			return (1);
2011 	}
2012 	return (0);
2013 }
2014 
2015 /*
2016  * Determine if MSI is blacklisted globally on this sytem.  Currently,
2017  * we just check for blacklisted chipsets as represented by the
2018  * host-PCI bridge at device 0:0:0.  In the future, it may become
2019  * necessary to check other system attributes, such as the kenv values
2020  * that give the motherboard manufacturer and model number.
2021  */
2022 static int
2023 pci_msi_blacklisted(void)
2024 {
2025 	device_t dev;
2026 
2027 	if (!pci_honor_msi_blacklist)
2028 		return (0);
2029 
2030 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2031 	if (!(pcie_chipset || pcix_chipset)) {
2032 		if (vm_guest != VM_GUEST_NO) {
2033 			dev = pci_find_bsf(0, 0, 0);
2034 			if (dev != NULL)
2035 				return (pci_msi_vm_chipset(dev) == 0);
2036 		}
2037 		return (1);
2038 	}
2039 
2040 	dev = pci_find_bsf(0, 0, 0);
2041 	if (dev != NULL)
2042 		return (pci_msi_device_blacklisted(dev));
2043 	return (0);
2044 }
2045 
2046 /*
2047  * Attempt to allocate *count MSI messages.  The actual number allocated is
2048  * returned in *count.  After this function returns, each message will be
2049  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2050  */
2051 int
2052 pci_alloc_msi_method(device_t dev, device_t child, int *count)
2053 {
2054 	struct pci_devinfo *dinfo = device_get_ivars(child);
2055 	pcicfgregs *cfg = &dinfo->cfg;
2056 	struct resource_list_entry *rle;
2057 	int actual, error, i, irqs[32];
2058 	uint16_t ctrl;
2059 
2060 	/* Don't let count == 0 get us into trouble. */
2061 	if (*count == 0)
2062 		return (EINVAL);
2063 
2064 	/* If rid 0 is allocated, then fail. */
2065 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2066 	if (rle != NULL && rle->res != NULL)
2067 		return (ENXIO);
2068 
2069 	/* Already have allocated messages? */
2070 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2071 		return (ENXIO);
2072 
2073 	/* If MSI is blacklisted for this system, fail. */
2074 	if (pci_msi_blacklisted())
2075 		return (ENXIO);
2076 
2077 	/* MSI capability present? */
2078 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2079 		return (ENODEV);
2080 
2081 	if (bootverbose)
2082 		device_printf(child,
2083 		    "attempting to allocate %d MSI vectors (%d supported)\n",
2084 		    *count, cfg->msi.msi_msgnum);
2085 
2086 	/* Don't ask for more than the device supports. */
2087 	actual = min(*count, cfg->msi.msi_msgnum);
2088 
2089 	/* Don't ask for more than 32 messages. */
2090 	actual = min(actual, 32);
2091 
2092 	/* MSI requires power of 2 number of messages. */
2093 	if (!powerof2(actual))
2094 		return (EINVAL);
2095 
2096 	for (;;) {
2097 		/* Try to allocate N messages. */
2098 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2099 		    actual, irqs);
2100 		if (error == 0)
2101 			break;
2102 		if (actual == 1)
2103 			return (error);
2104 
2105 		/* Try N / 2. */
2106 		actual >>= 1;
2107 	}
2108 
2109 	/*
2110 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2111 	 * resources in the irqs[] array, so add new resources
2112 	 * starting at rid 1.
2113 	 */
2114 	for (i = 0; i < actual; i++)
2115 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2116 		    irqs[i], irqs[i], 1);
2117 
2118 	if (bootverbose) {
2119 		if (actual == 1)
2120 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2121 		else {
2122 			int run;
2123 
2124 			/*
2125 			 * Be fancy and try to print contiguous runs
2126 			 * of IRQ values as ranges.  'run' is true if
2127 			 * we are in a range.
2128 			 */
2129 			device_printf(child, "using IRQs %d", irqs[0]);
2130 			run = 0;
2131 			for (i = 1; i < actual; i++) {
2132 
2133 				/* Still in a run? */
2134 				if (irqs[i] == irqs[i - 1] + 1) {
2135 					run = 1;
2136 					continue;
2137 				}
2138 
2139 				/* Finish previous range. */
2140 				if (run) {
2141 					printf("-%d", irqs[i - 1]);
2142 					run = 0;
2143 				}
2144 
2145 				/* Start new range. */
2146 				printf(",%d", irqs[i]);
2147 			}
2148 
2149 			/* Unfinished range? */
2150 			if (run)
2151 				printf("-%d", irqs[actual - 1]);
2152 			printf(" for MSI\n");
2153 		}
2154 	}
2155 
2156 	/* Update control register with actual count. */
2157 	ctrl = cfg->msi.msi_ctrl;
2158 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2159 	ctrl |= (ffs(actual) - 1) << 4;
2160 	cfg->msi.msi_ctrl = ctrl;
2161 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2162 
2163 	/* Update counts of alloc'd messages. */
2164 	cfg->msi.msi_alloc = actual;
2165 	cfg->msi.msi_handlers = 0;
2166 	*count = actual;
2167 	return (0);
2168 }
2169 
2170 /* Release the MSI messages associated with this device. */
2171 int
2172 pci_release_msi_method(device_t dev, device_t child)
2173 {
2174 	struct pci_devinfo *dinfo = device_get_ivars(child);
2175 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2176 	struct resource_list_entry *rle;
2177 	int error, i, irqs[32];
2178 
2179 	/* Try MSI-X first. */
2180 	error = pci_release_msix(dev, child);
2181 	if (error != ENODEV)
2182 		return (error);
2183 
2184 	/* Do we have any messages to release? */
2185 	if (msi->msi_alloc == 0)
2186 		return (ENODEV);
2187 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2188 
2189 	/* Make sure none of the resources are allocated. */
2190 	if (msi->msi_handlers > 0)
2191 		return (EBUSY);
2192 	for (i = 0; i < msi->msi_alloc; i++) {
2193 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2194 		KASSERT(rle != NULL, ("missing MSI resource"));
2195 		if (rle->res != NULL)
2196 			return (EBUSY);
2197 		irqs[i] = rle->start;
2198 	}
2199 
2200 	/* Update control register with 0 count. */
2201 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2202 	    ("%s: MSI still enabled", __func__));
2203 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2204 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2205 	    msi->msi_ctrl, 2);
2206 
2207 	/* Release the messages. */
2208 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2209 	for (i = 0; i < msi->msi_alloc; i++)
2210 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2211 
2212 	/* Update alloc count. */
2213 	msi->msi_alloc = 0;
2214 	msi->msi_addr = 0;
2215 	msi->msi_data = 0;
2216 	return (0);
2217 }
2218 
2219 /*
2220  * Return the max supported MSI messages this device supports.
2221  * Basically, assuming the MD code can alloc messages, this function
2222  * should return the maximum value that pci_alloc_msi() can return.
2223  * Thus, it is subject to the tunables, etc.
2224  */
2225 int
2226 pci_msi_count_method(device_t dev, device_t child)
2227 {
2228 	struct pci_devinfo *dinfo = device_get_ivars(child);
2229 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2230 
2231 	if (pci_do_msi && msi->msi_location != 0)
2232 		return (msi->msi_msgnum);
2233 	return (0);
2234 }
2235 
2236 /* free pcicfgregs structure and all depending data structures */
2237 
2238 int
2239 pci_freecfg(struct pci_devinfo *dinfo)
2240 {
2241 	struct devlist *devlist_head;
2242 	struct pci_map *pm, *next;
2243 	int i;
2244 
2245 	devlist_head = &pci_devq;
2246 
2247 	if (dinfo->cfg.vpd.vpd_reg) {
2248 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2249 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2250 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2251 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2252 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2253 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2254 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2255 	}
2256 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2257 		free(pm, M_DEVBUF);
2258 	}
2259 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2260 	free(dinfo, M_DEVBUF);
2261 
2262 	/* increment the generation count */
2263 	pci_generation++;
2264 
2265 	/* we're losing one device */
2266 	pci_numdevs--;
2267 	return (0);
2268 }
2269 
2270 /*
2271  * PCI power manangement
2272  */
2273 int
2274 pci_set_powerstate_method(device_t dev, device_t child, int state)
2275 {
2276 	struct pci_devinfo *dinfo = device_get_ivars(child);
2277 	pcicfgregs *cfg = &dinfo->cfg;
2278 	uint16_t status;
2279 	int result, oldstate, highest, delay;
2280 
2281 	if (cfg->pp.pp_cap == 0)
2282 		return (EOPNOTSUPP);
2283 
2284 	/*
2285 	 * Optimize a no state change request away.  While it would be OK to
2286 	 * write to the hardware in theory, some devices have shown odd
2287 	 * behavior when going from D3 -> D3.
2288 	 */
2289 	oldstate = pci_get_powerstate(child);
2290 	if (oldstate == state)
2291 		return (0);
2292 
2293 	/*
2294 	 * The PCI power management specification states that after a state
2295 	 * transition between PCI power states, system software must
2296 	 * guarantee a minimal delay before the function accesses the device.
2297 	 * Compute the worst case delay that we need to guarantee before we
2298 	 * access the device.  Many devices will be responsive much more
2299 	 * quickly than this delay, but there are some that don't respond
2300 	 * instantly to state changes.  Transitions to/from D3 state require
2301 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2302 	 * is done below with DELAY rather than a sleeper function because
2303 	 * this function can be called from contexts where we cannot sleep.
2304 	 */
2305 	highest = (oldstate > state) ? oldstate : state;
2306 	if (highest == PCI_POWERSTATE_D3)
2307 	    delay = 10000;
2308 	else if (highest == PCI_POWERSTATE_D2)
2309 	    delay = 200;
2310 	else
2311 	    delay = 0;
2312 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2313 	    & ~PCIM_PSTAT_DMASK;
2314 	result = 0;
2315 	switch (state) {
2316 	case PCI_POWERSTATE_D0:
2317 		status |= PCIM_PSTAT_D0;
2318 		break;
2319 	case PCI_POWERSTATE_D1:
2320 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2321 			return (EOPNOTSUPP);
2322 		status |= PCIM_PSTAT_D1;
2323 		break;
2324 	case PCI_POWERSTATE_D2:
2325 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2326 			return (EOPNOTSUPP);
2327 		status |= PCIM_PSTAT_D2;
2328 		break;
2329 	case PCI_POWERSTATE_D3:
2330 		status |= PCIM_PSTAT_D3;
2331 		break;
2332 	default:
2333 		return (EINVAL);
2334 	}
2335 
2336 	if (bootverbose)
2337 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2338 		    state);
2339 
2340 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2341 	if (delay)
2342 		DELAY(delay);
2343 	return (0);
2344 }
2345 
2346 int
2347 pci_get_powerstate_method(device_t dev, device_t child)
2348 {
2349 	struct pci_devinfo *dinfo = device_get_ivars(child);
2350 	pcicfgregs *cfg = &dinfo->cfg;
2351 	uint16_t status;
2352 	int result;
2353 
2354 	if (cfg->pp.pp_cap != 0) {
2355 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2356 		switch (status & PCIM_PSTAT_DMASK) {
2357 		case PCIM_PSTAT_D0:
2358 			result = PCI_POWERSTATE_D0;
2359 			break;
2360 		case PCIM_PSTAT_D1:
2361 			result = PCI_POWERSTATE_D1;
2362 			break;
2363 		case PCIM_PSTAT_D2:
2364 			result = PCI_POWERSTATE_D2;
2365 			break;
2366 		case PCIM_PSTAT_D3:
2367 			result = PCI_POWERSTATE_D3;
2368 			break;
2369 		default:
2370 			result = PCI_POWERSTATE_UNKNOWN;
2371 			break;
2372 		}
2373 	} else {
2374 		/* No support, device is always at D0 */
2375 		result = PCI_POWERSTATE_D0;
2376 	}
2377 	return (result);
2378 }
2379 
2380 /*
2381  * Some convenience functions for PCI device drivers.
2382  */
2383 
2384 static __inline void
2385 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2386 {
2387 	uint16_t	command;
2388 
2389 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2390 	command |= bit;
2391 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2392 }
2393 
2394 static __inline void
2395 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2396 {
2397 	uint16_t	command;
2398 
2399 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2400 	command &= ~bit;
2401 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2402 }
2403 
2404 int
2405 pci_enable_busmaster_method(device_t dev, device_t child)
2406 {
2407 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2408 	return (0);
2409 }
2410 
2411 int
2412 pci_disable_busmaster_method(device_t dev, device_t child)
2413 {
2414 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2415 	return (0);
2416 }
2417 
2418 int
2419 pci_enable_io_method(device_t dev, device_t child, int space)
2420 {
2421 	uint16_t bit;
2422 
2423 	switch(space) {
2424 	case SYS_RES_IOPORT:
2425 		bit = PCIM_CMD_PORTEN;
2426 		break;
2427 	case SYS_RES_MEMORY:
2428 		bit = PCIM_CMD_MEMEN;
2429 		break;
2430 	default:
2431 		return (EINVAL);
2432 	}
2433 	pci_set_command_bit(dev, child, bit);
2434 	return (0);
2435 }
2436 
2437 int
2438 pci_disable_io_method(device_t dev, device_t child, int space)
2439 {
2440 	uint16_t bit;
2441 
2442 	switch(space) {
2443 	case SYS_RES_IOPORT:
2444 		bit = PCIM_CMD_PORTEN;
2445 		break;
2446 	case SYS_RES_MEMORY:
2447 		bit = PCIM_CMD_MEMEN;
2448 		break;
2449 	default:
2450 		return (EINVAL);
2451 	}
2452 	pci_clear_command_bit(dev, child, bit);
2453 	return (0);
2454 }
2455 
2456 /*
2457  * New style pci driver.  Parent device is either a pci-host-bridge or a
2458  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2459  */
2460 
2461 void
2462 pci_print_verbose(struct pci_devinfo *dinfo)
2463 {
2464 
2465 	if (bootverbose) {
2466 		pcicfgregs *cfg = &dinfo->cfg;
2467 
2468 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2469 		    cfg->vendor, cfg->device, cfg->revid);
2470 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2471 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2472 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2473 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2474 		    cfg->mfdev);
2475 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2476 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2477 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2478 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2479 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2480 		if (cfg->intpin > 0)
2481 			printf("\tintpin=%c, irq=%d\n",
2482 			    cfg->intpin +'a' -1, cfg->intline);
2483 		if (cfg->pp.pp_cap) {
2484 			uint16_t status;
2485 
2486 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2487 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2488 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2489 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2490 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2491 			    status & PCIM_PSTAT_DMASK);
2492 		}
2493 		if (cfg->msi.msi_location) {
2494 			int ctrl;
2495 
2496 			ctrl = cfg->msi.msi_ctrl;
2497 			printf("\tMSI supports %d message%s%s%s\n",
2498 			    cfg->msi.msi_msgnum,
2499 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2500 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2501 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2502 		}
2503 		if (cfg->msix.msix_location) {
2504 			printf("\tMSI-X supports %d message%s ",
2505 			    cfg->msix.msix_msgnum,
2506 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2507 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2508 				printf("in map 0x%x\n",
2509 				    cfg->msix.msix_table_bar);
2510 			else
2511 				printf("in maps 0x%x and 0x%x\n",
2512 				    cfg->msix.msix_table_bar,
2513 				    cfg->msix.msix_pba_bar);
2514 		}
2515 	}
2516 }
2517 
2518 static int
2519 pci_porten(device_t dev)
2520 {
2521 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2522 }
2523 
2524 static int
2525 pci_memen(device_t dev)
2526 {
2527 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2528 }
2529 
2530 static void
2531 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2532 {
2533 	struct pci_devinfo *dinfo;
2534 	pci_addr_t map, testval;
2535 	int ln2range;
2536 	uint16_t cmd;
2537 
2538 	/*
2539 	 * The device ROM BAR is special.  It is always a 32-bit
2540 	 * memory BAR.  Bit 0 is special and should not be set when
2541 	 * sizing the BAR.
2542 	 */
2543 	dinfo = device_get_ivars(dev);
2544 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2545 		map = pci_read_config(dev, reg, 4);
2546 		pci_write_config(dev, reg, 0xfffffffe, 4);
2547 		testval = pci_read_config(dev, reg, 4);
2548 		pci_write_config(dev, reg, map, 4);
2549 		*mapp = map;
2550 		*testvalp = testval;
2551 		return;
2552 	}
2553 
2554 	map = pci_read_config(dev, reg, 4);
2555 	ln2range = pci_maprange(map);
2556 	if (ln2range == 64)
2557 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2558 
2559 	/*
2560 	 * Disable decoding via the command register before
2561 	 * determining the BAR's length since we will be placing it in
2562 	 * a weird state.
2563 	 */
2564 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2565 	pci_write_config(dev, PCIR_COMMAND,
2566 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2567 
2568 	/*
2569 	 * Determine the BAR's length by writing all 1's.  The bottom
2570 	 * log_2(size) bits of the BAR will stick as 0 when we read
2571 	 * the value back.
2572 	 */
2573 	pci_write_config(dev, reg, 0xffffffff, 4);
2574 	testval = pci_read_config(dev, reg, 4);
2575 	if (ln2range == 64) {
2576 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2577 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2578 	}
2579 
2580 	/*
2581 	 * Restore the original value of the BAR.  We may have reprogrammed
2582 	 * the BAR of the low-level console device and when booting verbose,
2583 	 * we need the console device addressable.
2584 	 */
2585 	pci_write_config(dev, reg, map, 4);
2586 	if (ln2range == 64)
2587 		pci_write_config(dev, reg + 4, map >> 32, 4);
2588 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2589 
2590 	*mapp = map;
2591 	*testvalp = testval;
2592 }
2593 
2594 static void
2595 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2596 {
2597 	struct pci_devinfo *dinfo;
2598 	int ln2range;
2599 
2600 	/* The device ROM BAR is always a 32-bit memory BAR. */
2601 	dinfo = device_get_ivars(dev);
2602 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2603 		ln2range = 32;
2604 	else
2605 		ln2range = pci_maprange(pm->pm_value);
2606 	pci_write_config(dev, pm->pm_reg, base, 4);
2607 	if (ln2range == 64)
2608 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2609 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2610 	if (ln2range == 64)
2611 		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2612 		    pm->pm_reg + 4, 4) << 32;
2613 }
2614 
2615 struct pci_map *
2616 pci_find_bar(device_t dev, int reg)
2617 {
2618 	struct pci_devinfo *dinfo;
2619 	struct pci_map *pm;
2620 
2621 	dinfo = device_get_ivars(dev);
2622 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2623 		if (pm->pm_reg == reg)
2624 			return (pm);
2625 	}
2626 	return (NULL);
2627 }
2628 
2629 int
2630 pci_bar_enabled(device_t dev, struct pci_map *pm)
2631 {
2632 	struct pci_devinfo *dinfo;
2633 	uint16_t cmd;
2634 
2635 	dinfo = device_get_ivars(dev);
2636 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2637 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2638 		return (0);
2639 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2640 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2641 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2642 	else
2643 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2644 }
2645 
2646 static struct pci_map *
2647 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2648 {
2649 	struct pci_devinfo *dinfo;
2650 	struct pci_map *pm, *prev;
2651 
2652 	dinfo = device_get_ivars(dev);
2653 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2654 	pm->pm_reg = reg;
2655 	pm->pm_value = value;
2656 	pm->pm_size = size;
2657 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2658 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2659 		    reg));
2660 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2661 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2662 			break;
2663 	}
2664 	if (prev != NULL)
2665 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2666 	else
2667 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2668 	return (pm);
2669 }
2670 
2671 static void
2672 pci_restore_bars(device_t dev)
2673 {
2674 	struct pci_devinfo *dinfo;
2675 	struct pci_map *pm;
2676 	int ln2range;
2677 
2678 	dinfo = device_get_ivars(dev);
2679 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2680 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2681 			ln2range = 32;
2682 		else
2683 			ln2range = pci_maprange(pm->pm_value);
2684 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2685 		if (ln2range == 64)
2686 			pci_write_config(dev, pm->pm_reg + 4,
2687 			    pm->pm_value >> 32, 4);
2688 	}
2689 }
2690 
2691 /*
2692  * Add a resource based on a pci map register. Return 1 if the map
2693  * register is a 32bit map register or 2 if it is a 64bit register.
2694  */
2695 static int
2696 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2697     int force, int prefetch)
2698 {
2699 	struct pci_map *pm;
2700 	pci_addr_t base, map, testval;
2701 	pci_addr_t start, end, count;
2702 	int barlen, basezero, maprange, mapsize, type;
2703 	uint16_t cmd;
2704 	struct resource *res;
2705 
2706 	/*
2707 	 * The BAR may already exist if the device is a CardBus card
2708 	 * whose CIS is stored in this BAR.
2709 	 */
2710 	pm = pci_find_bar(dev, reg);
2711 	if (pm != NULL) {
2712 		maprange = pci_maprange(pm->pm_value);
2713 		barlen = maprange == 64 ? 2 : 1;
2714 		return (barlen);
2715 	}
2716 
2717 	pci_read_bar(dev, reg, &map, &testval);
2718 	if (PCI_BAR_MEM(map)) {
2719 		type = SYS_RES_MEMORY;
2720 		if (map & PCIM_BAR_MEM_PREFETCH)
2721 			prefetch = 1;
2722 	} else
2723 		type = SYS_RES_IOPORT;
2724 	mapsize = pci_mapsize(testval);
2725 	base = pci_mapbase(map);
2726 #ifdef __PCI_BAR_ZERO_VALID
2727 	basezero = 0;
2728 #else
2729 	basezero = base == 0;
2730 #endif
2731 	maprange = pci_maprange(map);
2732 	barlen = maprange == 64 ? 2 : 1;
2733 
2734 	/*
2735 	 * For I/O registers, if bottom bit is set, and the next bit up
2736 	 * isn't clear, we know we have a BAR that doesn't conform to the
2737 	 * spec, so ignore it.  Also, sanity check the size of the data
2738 	 * areas to the type of memory involved.  Memory must be at least
2739 	 * 16 bytes in size, while I/O ranges must be at least 4.
2740 	 */
2741 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2742 		return (barlen);
2743 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2744 	    (type == SYS_RES_IOPORT && mapsize < 2))
2745 		return (barlen);
2746 
2747 	/* Save a record of this BAR. */
2748 	pm = pci_add_bar(dev, reg, map, mapsize);
2749 	if (bootverbose) {
2750 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2751 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2752 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2753 			printf(", port disabled\n");
2754 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2755 			printf(", memory disabled\n");
2756 		else
2757 			printf(", enabled\n");
2758 	}
2759 
2760 	/*
2761 	 * If base is 0, then we have problems if this architecture does
2762 	 * not allow that.  It is best to ignore such entries for the
2763 	 * moment.  These will be allocated later if the driver specifically
2764 	 * requests them.  However, some removable busses look better when
2765 	 * all resources are allocated, so allow '0' to be overriden.
2766 	 *
2767 	 * Similarly treat maps whose values is the same as the test value
2768 	 * read back.  These maps have had all f's written to them by the
2769 	 * BIOS in an attempt to disable the resources.
2770 	 */
2771 	if (!force && (basezero || map == testval))
2772 		return (barlen);
2773 	if ((u_long)base != base) {
2774 		device_printf(bus,
2775 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2776 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2777 		    pci_get_function(dev), reg);
2778 		return (barlen);
2779 	}
2780 
2781 	/*
2782 	 * This code theoretically does the right thing, but has
2783 	 * undesirable side effects in some cases where peripherals
2784 	 * respond oddly to having these bits enabled.  Let the user
2785 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2786 	 * default).
2787 	 */
2788 	if (pci_enable_io_modes) {
2789 		/* Turn on resources that have been left off by a lazy BIOS */
2790 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2791 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2792 			cmd |= PCIM_CMD_PORTEN;
2793 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2794 		}
2795 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2796 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2797 			cmd |= PCIM_CMD_MEMEN;
2798 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2799 		}
2800 	} else {
2801 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2802 			return (barlen);
2803 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2804 			return (barlen);
2805 	}
2806 
2807 	count = (pci_addr_t)1 << mapsize;
2808 	if (basezero || base == pci_mapbase(testval)) {
2809 		start = 0;	/* Let the parent decide. */
2810 		end = ~0ul;
2811 	} else {
2812 		start = base;
2813 		end = base + count - 1;
2814 	}
2815 	resource_list_add(rl, type, reg, start, end, count);
2816 
2817 	/*
2818 	 * Try to allocate the resource for this BAR from our parent
2819 	 * so that this resource range is already reserved.  The
2820 	 * driver for this device will later inherit this resource in
2821 	 * pci_alloc_resource().
2822 	 */
2823 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2824 	    prefetch ? RF_PREFETCHABLE : 0);
2825 	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0ul)) {
2826 		/*
2827 		 * If the allocation fails, try to allocate a resource for
2828 		 * this BAR using any available range.  The firmware felt
2829 		 * it was important enough to assign a resource, so don't
2830 		 * disable decoding if we can help it.
2831 		 */
2832 		resource_list_delete(rl, type, reg);
2833 		resource_list_add(rl, type, reg, 0, ~0ul, count);
2834 		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0ul,
2835 		    count, prefetch ? RF_PREFETCHABLE : 0);
2836 	}
2837 	if (res == NULL) {
2838 		/*
2839 		 * If the allocation fails, delete the resource list entry
2840 		 * and disable decoding for this device.
2841 		 *
2842 		 * If the driver requests this resource in the future,
2843 		 * pci_reserve_map() will try to allocate a fresh
2844 		 * resource range.
2845 		 */
2846 		resource_list_delete(rl, type, reg);
2847 		pci_disable_io(dev, type);
2848 		if (bootverbose)
2849 			device_printf(bus,
2850 			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
2851 			    pci_get_domain(dev), pci_get_bus(dev),
2852 			    pci_get_slot(dev), pci_get_function(dev), reg);
2853 	} else {
2854 		start = rman_get_start(res);
2855 		pci_write_bar(dev, pm, start);
2856 	}
2857 	return (barlen);
2858 }
2859 
2860 /*
2861  * For ATA devices we need to decide early what addressing mode to use.
2862  * Legacy demands that the primary and secondary ATA ports sits on the
2863  * same addresses that old ISA hardware did. This dictates that we use
2864  * those addresses and ignore the BAR's if we cannot set PCI native
2865  * addressing mode.
2866  */
2867 static void
2868 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2869     uint32_t prefetchmask)
2870 {
2871 	struct resource *r;
2872 	int rid, type, progif;
2873 #if 0
2874 	/* if this device supports PCI native addressing use it */
2875 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2876 	if ((progif & 0x8a) == 0x8a) {
2877 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2878 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2879 			printf("Trying ATA native PCI addressing mode\n");
2880 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2881 		}
2882 	}
2883 #endif
2884 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2885 	type = SYS_RES_IOPORT;
2886 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2887 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2888 		    prefetchmask & (1 << 0));
2889 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2890 		    prefetchmask & (1 << 1));
2891 	} else {
2892 		rid = PCIR_BAR(0);
2893 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2894 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2895 		    0x1f7, 8, 0);
2896 		rid = PCIR_BAR(1);
2897 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2898 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2899 		    0x3f6, 1, 0);
2900 	}
2901 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2902 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2903 		    prefetchmask & (1 << 2));
2904 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2905 		    prefetchmask & (1 << 3));
2906 	} else {
2907 		rid = PCIR_BAR(2);
2908 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2909 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2910 		    0x177, 8, 0);
2911 		rid = PCIR_BAR(3);
2912 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2913 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2914 		    0x376, 1, 0);
2915 	}
2916 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2917 	    prefetchmask & (1 << 4));
2918 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2919 	    prefetchmask & (1 << 5));
2920 }
2921 
2922 static void
2923 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2924 {
2925 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2926 	pcicfgregs *cfg = &dinfo->cfg;
2927 	char tunable_name[64];
2928 	int irq;
2929 
2930 	/* Has to have an intpin to have an interrupt. */
2931 	if (cfg->intpin == 0)
2932 		return;
2933 
2934 	/* Let the user override the IRQ with a tunable. */
2935 	irq = PCI_INVALID_IRQ;
2936 	snprintf(tunable_name, sizeof(tunable_name),
2937 	    "hw.pci%d.%d.%d.INT%c.irq",
2938 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2939 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2940 		irq = PCI_INVALID_IRQ;
2941 
2942 	/*
2943 	 * If we didn't get an IRQ via the tunable, then we either use the
2944 	 * IRQ value in the intline register or we ask the bus to route an
2945 	 * interrupt for us.  If force_route is true, then we only use the
2946 	 * value in the intline register if the bus was unable to assign an
2947 	 * IRQ.
2948 	 */
2949 	if (!PCI_INTERRUPT_VALID(irq)) {
2950 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2951 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2952 		if (!PCI_INTERRUPT_VALID(irq))
2953 			irq = cfg->intline;
2954 	}
2955 
2956 	/* If after all that we don't have an IRQ, just bail. */
2957 	if (!PCI_INTERRUPT_VALID(irq))
2958 		return;
2959 
2960 	/* Update the config register if it changed. */
2961 	if (irq != cfg->intline) {
2962 		cfg->intline = irq;
2963 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2964 	}
2965 
2966 	/* Add this IRQ as rid 0 interrupt resource. */
2967 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2968 }
2969 
2970 /* Perform early OHCI takeover from SMM. */
2971 static void
2972 ohci_early_takeover(device_t self)
2973 {
2974 	struct resource *res;
2975 	uint32_t ctl;
2976 	int rid;
2977 	int i;
2978 
2979 	rid = PCIR_BAR(0);
2980 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2981 	if (res == NULL)
2982 		return;
2983 
2984 	ctl = bus_read_4(res, OHCI_CONTROL);
2985 	if (ctl & OHCI_IR) {
2986 		if (bootverbose)
2987 			printf("ohci early: "
2988 			    "SMM active, request owner change\n");
2989 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2990 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2991 			DELAY(1000);
2992 			ctl = bus_read_4(res, OHCI_CONTROL);
2993 		}
2994 		if (ctl & OHCI_IR) {
2995 			if (bootverbose)
2996 				printf("ohci early: "
2997 				    "SMM does not respond, resetting\n");
2998 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2999 		}
3000 		/* Disable interrupts */
3001 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3002 	}
3003 
3004 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3005 }
3006 
3007 /* Perform early UHCI takeover from SMM. */
3008 static void
3009 uhci_early_takeover(device_t self)
3010 {
3011 	struct resource *res;
3012 	int rid;
3013 
3014 	/*
3015 	 * Set the PIRQD enable bit and switch off all the others. We don't
3016 	 * want legacy support to interfere with us XXX Does this also mean
3017 	 * that the BIOS won't touch the keyboard anymore if it is connected
3018 	 * to the ports of the root hub?
3019 	 */
3020 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3021 
3022 	/* Disable interrupts */
3023 	rid = PCI_UHCI_BASE_REG;
3024 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3025 	if (res != NULL) {
3026 		bus_write_2(res, UHCI_INTR, 0);
3027 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3028 	}
3029 }
3030 
3031 /* Perform early EHCI takeover from SMM. */
3032 static void
3033 ehci_early_takeover(device_t self)
3034 {
3035 	struct resource *res;
3036 	uint32_t cparams;
3037 	uint32_t eec;
3038 	uint8_t eecp;
3039 	uint8_t bios_sem;
3040 	uint8_t offs;
3041 	int rid;
3042 	int i;
3043 
3044 	rid = PCIR_BAR(0);
3045 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3046 	if (res == NULL)
3047 		return;
3048 
3049 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3050 
3051 	/* Synchronise with the BIOS if it owns the controller. */
3052 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3053 	    eecp = EHCI_EECP_NEXT(eec)) {
3054 		eec = pci_read_config(self, eecp, 4);
3055 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3056 			continue;
3057 		}
3058 		bios_sem = pci_read_config(self, eecp +
3059 		    EHCI_LEGSUP_BIOS_SEM, 1);
3060 		if (bios_sem == 0) {
3061 			continue;
3062 		}
3063 		if (bootverbose)
3064 			printf("ehci early: "
3065 			    "SMM active, request owner change\n");
3066 
3067 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3068 
3069 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3070 			DELAY(1000);
3071 			bios_sem = pci_read_config(self, eecp +
3072 			    EHCI_LEGSUP_BIOS_SEM, 1);
3073 		}
3074 
3075 		if (bios_sem != 0) {
3076 			if (bootverbose)
3077 				printf("ehci early: "
3078 				    "SMM does not respond\n");
3079 		}
3080 		/* Disable interrupts */
3081 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3082 		bus_write_4(res, offs + EHCI_USBINTR, 0);
3083 	}
3084 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3085 }
3086 
3087 /* Perform early XHCI takeover from SMM. */
3088 static void
3089 xhci_early_takeover(device_t self)
3090 {
3091 	struct resource *res;
3092 	uint32_t cparams;
3093 	uint32_t eec;
3094 	uint8_t eecp;
3095 	uint8_t bios_sem;
3096 	uint8_t offs;
3097 	int rid;
3098 	int i;
3099 
3100 	rid = PCIR_BAR(0);
3101 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3102 	if (res == NULL)
3103 		return;
3104 
3105 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3106 
3107 	eec = -1;
3108 
3109 	/* Synchronise with the BIOS if it owns the controller. */
3110 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3111 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3112 		eec = bus_read_4(res, eecp);
3113 
3114 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3115 			continue;
3116 
3117 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3118 		if (bios_sem == 0)
3119 			continue;
3120 
3121 		if (bootverbose)
3122 			printf("xhci early: "
3123 			    "SMM active, request owner change\n");
3124 
3125 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3126 
3127 		/* wait a maximum of 5 second */
3128 
3129 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3130 			DELAY(1000);
3131 			bios_sem = bus_read_1(res, eecp +
3132 			    XHCI_XECP_BIOS_SEM);
3133 		}
3134 
3135 		if (bios_sem != 0) {
3136 			if (bootverbose)
3137 				printf("xhci early: "
3138 				    "SMM does not respond\n");
3139 		}
3140 
3141 		/* Disable interrupts */
3142 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3143 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3144 		bus_read_4(res, offs + XHCI_USBSTS);
3145 	}
3146 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3147 }
3148 
3149 void
3150 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3151 {
3152 	struct pci_devinfo *dinfo;
3153 	pcicfgregs *cfg;
3154 	struct resource_list *rl;
3155 	const struct pci_quirk *q;
3156 	uint32_t devid;
3157 	int i;
3158 
3159 	dinfo = device_get_ivars(dev);
3160 	cfg = &dinfo->cfg;
3161 	rl = &dinfo->resources;
3162 	devid = (cfg->device << 16) | cfg->vendor;
3163 
3164 	/* ATA devices needs special map treatment */
3165 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3166 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3167 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3168 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3169 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3170 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3171 	else
3172 		for (i = 0; i < cfg->nummaps;) {
3173 			/*
3174 			 * Skip quirked resources.
3175 			 */
3176 			for (q = &pci_quirks[0]; q->devid != 0; q++)
3177 				if (q->devid == devid &&
3178 				    q->type == PCI_QUIRK_UNMAP_REG &&
3179 				    q->arg1 == PCIR_BAR(i))
3180 					break;
3181 			if (q->devid != 0) {
3182 				i++;
3183 				continue;
3184 			}
3185 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3186 			    prefetchmask & (1 << i));
3187 		}
3188 
3189 	/*
3190 	 * Add additional, quirked resources.
3191 	 */
3192 	for (q = &pci_quirks[0]; q->devid != 0; q++)
3193 		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3194 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3195 
3196 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3197 #ifdef __PCI_REROUTE_INTERRUPT
3198 		/*
3199 		 * Try to re-route interrupts. Sometimes the BIOS or
3200 		 * firmware may leave bogus values in these registers.
3201 		 * If the re-route fails, then just stick with what we
3202 		 * have.
3203 		 */
3204 		pci_assign_interrupt(bus, dev, 1);
3205 #else
3206 		pci_assign_interrupt(bus, dev, 0);
3207 #endif
3208 	}
3209 
3210 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3211 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3212 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3213 			xhci_early_takeover(dev);
3214 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3215 			ehci_early_takeover(dev);
3216 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3217 			ohci_early_takeover(dev);
3218 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3219 			uhci_early_takeover(dev);
3220 	}
3221 }
3222 
3223 void
3224 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3225 {
3226 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3227 	device_t pcib = device_get_parent(dev);
3228 	struct pci_devinfo *dinfo;
3229 	int maxslots;
3230 	int s, f, pcifunchigh;
3231 	uint8_t hdrtype;
3232 
3233 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3234 	    ("dinfo_size too small"));
3235 	maxslots = PCIB_MAXSLOTS(pcib);
3236 	for (s = 0; s <= maxslots; s++) {
3237 		pcifunchigh = 0;
3238 		f = 0;
3239 		DELAY(1);
3240 		hdrtype = REG(PCIR_HDRTYPE, 1);
3241 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3242 			continue;
3243 		if (hdrtype & PCIM_MFDEV)
3244 			pcifunchigh = PCI_FUNCMAX;
3245 		for (f = 0; f <= pcifunchigh; f++) {
3246 			dinfo = pci_read_device(pcib, domain, busno, s, f,
3247 			    dinfo_size);
3248 			if (dinfo != NULL) {
3249 				pci_add_child(dev, dinfo);
3250 			}
3251 		}
3252 	}
3253 #undef REG
3254 }
3255 
3256 void
3257 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3258 {
3259 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3260 	device_set_ivars(dinfo->cfg.dev, dinfo);
3261 	resource_list_init(&dinfo->resources);
3262 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3263 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3264 	pci_print_verbose(dinfo);
3265 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3266 }
3267 
3268 static int
3269 pci_probe(device_t dev)
3270 {
3271 
3272 	device_set_desc(dev, "PCI bus");
3273 
3274 	/* Allow other subclasses to override this driver. */
3275 	return (BUS_PROBE_GENERIC);
3276 }
3277 
3278 int
3279 pci_attach_common(device_t dev)
3280 {
3281 	struct pci_softc *sc;
3282 	int busno, domain;
3283 #ifdef PCI_DMA_BOUNDARY
3284 	int error, tag_valid;
3285 #endif
3286 
3287 	sc = device_get_softc(dev);
3288 	domain = pcib_get_domain(dev);
3289 	busno = pcib_get_bus(dev);
3290 	if (bootverbose)
3291 		device_printf(dev, "domain=%d, physical bus=%d\n",
3292 		    domain, busno);
3293 #ifdef PCI_DMA_BOUNDARY
3294 	tag_valid = 0;
3295 	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
3296 	    devclass_find("pci")) {
3297 		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
3298 		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3299 		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
3300 		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
3301 		if (error)
3302 			device_printf(dev, "Failed to create DMA tag: %d\n",
3303 			    error);
3304 		else
3305 			tag_valid = 1;
3306 	}
3307 	if (!tag_valid)
3308 #endif
3309 		sc->sc_dma_tag = bus_get_dma_tag(dev);
3310 	return (0);
3311 }
3312 
3313 static int
3314 pci_attach(device_t dev)
3315 {
3316 	int busno, domain, error;
3317 
3318 	error = pci_attach_common(dev);
3319 	if (error)
3320 		return (error);
3321 
3322 	/*
3323 	 * Since there can be multiple independantly numbered PCI
3324 	 * busses on systems with multiple PCI domains, we can't use
3325 	 * the unit number to decide which bus we are probing. We ask
3326 	 * the parent pcib what our domain and bus numbers are.
3327 	 */
3328 	domain = pcib_get_domain(dev);
3329 	busno = pcib_get_bus(dev);
3330 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3331 	return (bus_generic_attach(dev));
3332 }
3333 
3334 static void
3335 pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3336     int state)
3337 {
3338 	device_t child, pcib;
3339 	struct pci_devinfo *dinfo;
3340 	int dstate, i;
3341 
3342 	/*
3343 	 * Set the device to the given state.  If the firmware suggests
3344 	 * a different power state, use it instead.  If power management
3345 	 * is not present, the firmware is responsible for managing
3346 	 * device power.  Skip children who aren't attached since they
3347 	 * are handled separately.
3348 	 */
3349 	pcib = device_get_parent(dev);
3350 	for (i = 0; i < numdevs; i++) {
3351 		child = devlist[i];
3352 		dinfo = device_get_ivars(child);
3353 		dstate = state;
3354 		if (device_is_attached(child) &&
3355 		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3356 			pci_set_powerstate(child, dstate);
3357 	}
3358 }
3359 
3360 int
3361 pci_suspend(device_t dev)
3362 {
3363 	device_t child, *devlist;
3364 	struct pci_devinfo *dinfo;
3365 	int error, i, numdevs;
3366 
3367 	/*
3368 	 * Save the PCI configuration space for each child and set the
3369 	 * device in the appropriate power state for this sleep state.
3370 	 */
3371 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3372 		return (error);
3373 	for (i = 0; i < numdevs; i++) {
3374 		child = devlist[i];
3375 		dinfo = device_get_ivars(child);
3376 		pci_cfg_save(child, dinfo, 0);
3377 	}
3378 
3379 	/* Suspend devices before potentially powering them down. */
3380 	error = bus_generic_suspend(dev);
3381 	if (error) {
3382 		free(devlist, M_TEMP);
3383 		return (error);
3384 	}
3385 	if (pci_do_power_suspend)
3386 		pci_set_power_children(dev, devlist, numdevs,
3387 		    PCI_POWERSTATE_D3);
3388 	free(devlist, M_TEMP);
3389 	return (0);
3390 }
3391 
3392 int
3393 pci_resume(device_t dev)
3394 {
3395 	device_t child, *devlist;
3396 	struct pci_devinfo *dinfo;
3397 	int error, i, numdevs;
3398 
3399 	/*
3400 	 * Set each child to D0 and restore its PCI configuration space.
3401 	 */
3402 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3403 		return (error);
3404 	if (pci_do_power_resume)
3405 		pci_set_power_children(dev, devlist, numdevs,
3406 		    PCI_POWERSTATE_D0);
3407 
3408 	/* Now the device is powered up, restore its config space. */
3409 	for (i = 0; i < numdevs; i++) {
3410 		child = devlist[i];
3411 		dinfo = device_get_ivars(child);
3412 
3413 		pci_cfg_restore(child, dinfo);
3414 		if (!device_is_attached(child))
3415 			pci_cfg_save(child, dinfo, 1);
3416 	}
3417 
3418 	/*
3419 	 * Resume critical devices first, then everything else later.
3420 	 */
3421 	for (i = 0; i < numdevs; i++) {
3422 		child = devlist[i];
3423 		switch (pci_get_class(child)) {
3424 		case PCIC_DISPLAY:
3425 		case PCIC_MEMORY:
3426 		case PCIC_BRIDGE:
3427 		case PCIC_BASEPERIPH:
3428 			DEVICE_RESUME(child);
3429 			break;
3430 		}
3431 	}
3432 	for (i = 0; i < numdevs; i++) {
3433 		child = devlist[i];
3434 		switch (pci_get_class(child)) {
3435 		case PCIC_DISPLAY:
3436 		case PCIC_MEMORY:
3437 		case PCIC_BRIDGE:
3438 		case PCIC_BASEPERIPH:
3439 			break;
3440 		default:
3441 			DEVICE_RESUME(child);
3442 		}
3443 	}
3444 	free(devlist, M_TEMP);
3445 	return (0);
3446 }
3447 
3448 static void
3449 pci_load_vendor_data(void)
3450 {
3451 	caddr_t data;
3452 	void *ptr;
3453 	size_t sz;
3454 
3455 	data = preload_search_by_type("pci_vendor_data");
3456 	if (data != NULL) {
3457 		ptr = preload_fetch_addr(data);
3458 		sz = preload_fetch_size(data);
3459 		if (ptr != NULL && sz != 0) {
3460 			pci_vendordata = ptr;
3461 			pci_vendordata_size = sz;
3462 			/* terminate the database */
3463 			pci_vendordata[pci_vendordata_size] = '\n';
3464 		}
3465 	}
3466 }
3467 
3468 void
3469 pci_driver_added(device_t dev, driver_t *driver)
3470 {
3471 	int numdevs;
3472 	device_t *devlist;
3473 	device_t child;
3474 	struct pci_devinfo *dinfo;
3475 	int i;
3476 
3477 	if (bootverbose)
3478 		device_printf(dev, "driver added\n");
3479 	DEVICE_IDENTIFY(driver, dev);
3480 	if (device_get_children(dev, &devlist, &numdevs) != 0)
3481 		return;
3482 	for (i = 0; i < numdevs; i++) {
3483 		child = devlist[i];
3484 		if (device_get_state(child) != DS_NOTPRESENT)
3485 			continue;
3486 		dinfo = device_get_ivars(child);
3487 		pci_print_verbose(dinfo);
3488 		if (bootverbose)
3489 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3490 		pci_cfg_restore(child, dinfo);
3491 		if (device_probe_and_attach(child) != 0)
3492 			pci_cfg_save(child, dinfo, 1);
3493 	}
3494 	free(devlist, M_TEMP);
3495 }
3496 
3497 int
3498 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3499     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3500 {
3501 	struct pci_devinfo *dinfo;
3502 	struct msix_table_entry *mte;
3503 	struct msix_vector *mv;
3504 	uint64_t addr;
3505 	uint32_t data;
3506 	void *cookie;
3507 	int error, rid;
3508 
3509 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3510 	    arg, &cookie);
3511 	if (error)
3512 		return (error);
3513 
3514 	/* If this is not a direct child, just bail out. */
3515 	if (device_get_parent(child) != dev) {
3516 		*cookiep = cookie;
3517 		return(0);
3518 	}
3519 
3520 	rid = rman_get_rid(irq);
3521 	if (rid == 0) {
3522 		/* Make sure that INTx is enabled */
3523 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3524 	} else {
3525 		/*
3526 		 * Check to see if the interrupt is MSI or MSI-X.
3527 		 * Ask our parent to map the MSI and give
3528 		 * us the address and data register values.
3529 		 * If we fail for some reason, teardown the
3530 		 * interrupt handler.
3531 		 */
3532 		dinfo = device_get_ivars(child);
3533 		if (dinfo->cfg.msi.msi_alloc > 0) {
3534 			if (dinfo->cfg.msi.msi_addr == 0) {
3535 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3536 			    ("MSI has handlers, but vectors not mapped"));
3537 				error = PCIB_MAP_MSI(device_get_parent(dev),
3538 				    child, rman_get_start(irq), &addr, &data);
3539 				if (error)
3540 					goto bad;
3541 				dinfo->cfg.msi.msi_addr = addr;
3542 				dinfo->cfg.msi.msi_data = data;
3543 			}
3544 			if (dinfo->cfg.msi.msi_handlers == 0)
3545 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3546 				    dinfo->cfg.msi.msi_data);
3547 			dinfo->cfg.msi.msi_handlers++;
3548 		} else {
3549 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3550 			    ("No MSI or MSI-X interrupts allocated"));
3551 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3552 			    ("MSI-X index too high"));
3553 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3554 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3555 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3556 			KASSERT(mv->mv_irq == rman_get_start(irq),
3557 			    ("IRQ mismatch"));
3558 			if (mv->mv_address == 0) {
3559 				KASSERT(mte->mte_handlers == 0,
3560 		    ("MSI-X table entry has handlers, but vector not mapped"));
3561 				error = PCIB_MAP_MSI(device_get_parent(dev),
3562 				    child, rman_get_start(irq), &addr, &data);
3563 				if (error)
3564 					goto bad;
3565 				mv->mv_address = addr;
3566 				mv->mv_data = data;
3567 			}
3568 			if (mte->mte_handlers == 0) {
3569 				pci_enable_msix(child, rid - 1, mv->mv_address,
3570 				    mv->mv_data);
3571 				pci_unmask_msix(child, rid - 1);
3572 			}
3573 			mte->mte_handlers++;
3574 		}
3575 
3576 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3577 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3578 	bad:
3579 		if (error) {
3580 			(void)bus_generic_teardown_intr(dev, child, irq,
3581 			    cookie);
3582 			return (error);
3583 		}
3584 	}
3585 	*cookiep = cookie;
3586 	return (0);
3587 }
3588 
3589 int
3590 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3591     void *cookie)
3592 {
3593 	struct msix_table_entry *mte;
3594 	struct resource_list_entry *rle;
3595 	struct pci_devinfo *dinfo;
3596 	int error, rid;
3597 
3598 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3599 		return (EINVAL);
3600 
3601 	/* If this isn't a direct child, just bail out */
3602 	if (device_get_parent(child) != dev)
3603 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3604 
3605 	rid = rman_get_rid(irq);
3606 	if (rid == 0) {
3607 		/* Mask INTx */
3608 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3609 	} else {
3610 		/*
3611 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3612 		 * decrement the appropriate handlers count and mask the
3613 		 * MSI-X message, or disable MSI messages if the count
3614 		 * drops to 0.
3615 		 */
3616 		dinfo = device_get_ivars(child);
3617 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3618 		if (rle->res != irq)
3619 			return (EINVAL);
3620 		if (dinfo->cfg.msi.msi_alloc > 0) {
3621 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3622 			    ("MSI-X index too high"));
3623 			if (dinfo->cfg.msi.msi_handlers == 0)
3624 				return (EINVAL);
3625 			dinfo->cfg.msi.msi_handlers--;
3626 			if (dinfo->cfg.msi.msi_handlers == 0)
3627 				pci_disable_msi(child);
3628 		} else {
3629 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3630 			    ("No MSI or MSI-X interrupts allocated"));
3631 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3632 			    ("MSI-X index too high"));
3633 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3634 			if (mte->mte_handlers == 0)
3635 				return (EINVAL);
3636 			mte->mte_handlers--;
3637 			if (mte->mte_handlers == 0)
3638 				pci_mask_msix(child, rid - 1);
3639 		}
3640 	}
3641 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3642 	if (rid > 0)
3643 		KASSERT(error == 0,
3644 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3645 	return (error);
3646 }
3647 
3648 int
3649 pci_print_child(device_t dev, device_t child)
3650 {
3651 	struct pci_devinfo *dinfo;
3652 	struct resource_list *rl;
3653 	int retval = 0;
3654 
3655 	dinfo = device_get_ivars(child);
3656 	rl = &dinfo->resources;
3657 
3658 	retval += bus_print_child_header(dev, child);
3659 
3660 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3661 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3662 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3663 	if (device_get_flags(dev))
3664 		retval += printf(" flags %#x", device_get_flags(dev));
3665 
3666 	retval += printf(" at device %d.%d", pci_get_slot(child),
3667 	    pci_get_function(child));
3668 
3669 	retval += bus_print_child_footer(dev, child);
3670 
3671 	return (retval);
3672 }
3673 
3674 static const struct
3675 {
3676 	int		class;
3677 	int		subclass;
3678 	const char	*desc;
3679 } pci_nomatch_tab[] = {
3680 	{PCIC_OLD,		-1,			"old"},
3681 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3682 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3683 	{PCIC_STORAGE,		-1,			"mass storage"},
3684 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3685 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3686 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3687 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3688 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3689 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3690 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3691 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3692 	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	"NVM"},
3693 	{PCIC_NETWORK,		-1,			"network"},
3694 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3695 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3696 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3697 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3698 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3699 	{PCIC_DISPLAY,		-1,			"display"},
3700 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3701 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3702 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3703 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3704 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3705 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3706 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3707 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3708 	{PCIC_MEMORY,		-1,			"memory"},
3709 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3710 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3711 	{PCIC_BRIDGE,		-1,			"bridge"},
3712 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3713 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3714 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3715 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3716 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3717 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3718 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3719 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3720 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3721 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3722 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3723 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3724 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3725 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3726 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3727 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3728 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3729 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3730 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3731 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3732 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3733 	{PCIC_INPUTDEV,		-1,			"input device"},
3734 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3735 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3736 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3737 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3738 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3739 	{PCIC_DOCKING,		-1,			"docking station"},
3740 	{PCIC_PROCESSOR,	-1,			"processor"},
3741 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3742 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3743 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3744 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3745 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3746 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3747 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3748 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3749 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3750 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3751 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3752 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3753 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3754 	{PCIC_SATCOM,		-1,			"satellite communication"},
3755 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3756 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3757 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3758 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3759 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3760 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3761 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3762 	{PCIC_DASP,		-1,			"dasp"},
3763 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3764 	{0, 0,		NULL}
3765 };
3766 
3767 void
3768 pci_probe_nomatch(device_t dev, device_t child)
3769 {
3770 	int i;
3771 	const char *cp, *scp;
3772 	char *device;
3773 
3774 	/*
3775 	 * Look for a listing for this device in a loaded device database.
3776 	 */
3777 	if ((device = pci_describe_device(child)) != NULL) {
3778 		device_printf(dev, "<%s>", device);
3779 		free(device, M_DEVBUF);
3780 	} else {
3781 		/*
3782 		 * Scan the class/subclass descriptions for a general
3783 		 * description.
3784 		 */
3785 		cp = "unknown";
3786 		scp = NULL;
3787 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3788 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3789 				if (pci_nomatch_tab[i].subclass == -1) {
3790 					cp = pci_nomatch_tab[i].desc;
3791 				} else if (pci_nomatch_tab[i].subclass ==
3792 				    pci_get_subclass(child)) {
3793 					scp = pci_nomatch_tab[i].desc;
3794 				}
3795 			}
3796 		}
3797 		device_printf(dev, "<%s%s%s>",
3798 		    cp ? cp : "",
3799 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3800 		    scp ? scp : "");
3801 	}
3802 	printf(" at device %d.%d (no driver attached)\n",
3803 	    pci_get_slot(child), pci_get_function(child));
3804 	pci_cfg_save(child, device_get_ivars(child), 1);
3805 }
3806 
3807 /*
3808  * Parse the PCI device database, if loaded, and return a pointer to a
3809  * description of the device.
3810  *
3811  * The database is flat text formatted as follows:
3812  *
3813  * Any line not in a valid format is ignored.
3814  * Lines are terminated with newline '\n' characters.
3815  *
3816  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3817  * the vendor name.
3818  *
3819  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3820  * - devices cannot be listed without a corresponding VENDOR line.
3821  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3822  * another TAB, then the device name.
3823  */
3824 
3825 /*
3826  * Assuming (ptr) points to the beginning of a line in the database,
3827  * return the vendor or device and description of the next entry.
3828  * The value of (vendor) or (device) inappropriate for the entry type
3829  * is set to -1.  Returns nonzero at the end of the database.
3830  *
3831  * Note that this is slightly unrobust in the face of corrupt data;
3832  * we attempt to safeguard against this by spamming the end of the
3833  * database with a newline when we initialise.
3834  */
3835 static int
3836 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3837 {
3838 	char	*cp = *ptr;
3839 	int	left;
3840 
3841 	*device = -1;
3842 	*vendor = -1;
3843 	**desc = '\0';
3844 	for (;;) {
3845 		left = pci_vendordata_size - (cp - pci_vendordata);
3846 		if (left <= 0) {
3847 			*ptr = cp;
3848 			return(1);
3849 		}
3850 
3851 		/* vendor entry? */
3852 		if (*cp != '\t' &&
3853 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3854 			break;
3855 		/* device entry? */
3856 		if (*cp == '\t' &&
3857 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3858 			break;
3859 
3860 		/* skip to next line */
3861 		while (*cp != '\n' && left > 0) {
3862 			cp++;
3863 			left--;
3864 		}
3865 		if (*cp == '\n') {
3866 			cp++;
3867 			left--;
3868 		}
3869 	}
3870 	/* skip to next line */
3871 	while (*cp != '\n' && left > 0) {
3872 		cp++;
3873 		left--;
3874 	}
3875 	if (*cp == '\n' && left > 0)
3876 		cp++;
3877 	*ptr = cp;
3878 	return(0);
3879 }
3880 
3881 static char *
3882 pci_describe_device(device_t dev)
3883 {
3884 	int	vendor, device;
3885 	char	*desc, *vp, *dp, *line;
3886 
3887 	desc = vp = dp = NULL;
3888 
3889 	/*
3890 	 * If we have no vendor data, we can't do anything.
3891 	 */
3892 	if (pci_vendordata == NULL)
3893 		goto out;
3894 
3895 	/*
3896 	 * Scan the vendor data looking for this device
3897 	 */
3898 	line = pci_vendordata;
3899 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3900 		goto out;
3901 	for (;;) {
3902 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3903 			goto out;
3904 		if (vendor == pci_get_vendor(dev))
3905 			break;
3906 	}
3907 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3908 		goto out;
3909 	for (;;) {
3910 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3911 			*dp = 0;
3912 			break;
3913 		}
3914 		if (vendor != -1) {
3915 			*dp = 0;
3916 			break;
3917 		}
3918 		if (device == pci_get_device(dev))
3919 			break;
3920 	}
3921 	if (dp[0] == '\0')
3922 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3923 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3924 	    NULL)
3925 		sprintf(desc, "%s, %s", vp, dp);
3926 out:
3927 	if (vp != NULL)
3928 		free(vp, M_DEVBUF);
3929 	if (dp != NULL)
3930 		free(dp, M_DEVBUF);
3931 	return(desc);
3932 }
3933 
3934 int
3935 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3936 {
3937 	struct pci_devinfo *dinfo;
3938 	pcicfgregs *cfg;
3939 
3940 	dinfo = device_get_ivars(child);
3941 	cfg = &dinfo->cfg;
3942 
3943 	switch (which) {
3944 	case PCI_IVAR_ETHADDR:
3945 		/*
3946 		 * The generic accessor doesn't deal with failure, so
3947 		 * we set the return value, then return an error.
3948 		 */
3949 		*((uint8_t **) result) = NULL;
3950 		return (EINVAL);
3951 	case PCI_IVAR_SUBVENDOR:
3952 		*result = cfg->subvendor;
3953 		break;
3954 	case PCI_IVAR_SUBDEVICE:
3955 		*result = cfg->subdevice;
3956 		break;
3957 	case PCI_IVAR_VENDOR:
3958 		*result = cfg->vendor;
3959 		break;
3960 	case PCI_IVAR_DEVICE:
3961 		*result = cfg->device;
3962 		break;
3963 	case PCI_IVAR_DEVID:
3964 		*result = (cfg->device << 16) | cfg->vendor;
3965 		break;
3966 	case PCI_IVAR_CLASS:
3967 		*result = cfg->baseclass;
3968 		break;
3969 	case PCI_IVAR_SUBCLASS:
3970 		*result = cfg->subclass;
3971 		break;
3972 	case PCI_IVAR_PROGIF:
3973 		*result = cfg->progif;
3974 		break;
3975 	case PCI_IVAR_REVID:
3976 		*result = cfg->revid;
3977 		break;
3978 	case PCI_IVAR_INTPIN:
3979 		*result = cfg->intpin;
3980 		break;
3981 	case PCI_IVAR_IRQ:
3982 		*result = cfg->intline;
3983 		break;
3984 	case PCI_IVAR_DOMAIN:
3985 		*result = cfg->domain;
3986 		break;
3987 	case PCI_IVAR_BUS:
3988 		*result = cfg->bus;
3989 		break;
3990 	case PCI_IVAR_SLOT:
3991 		*result = cfg->slot;
3992 		break;
3993 	case PCI_IVAR_FUNCTION:
3994 		*result = cfg->func;
3995 		break;
3996 	case PCI_IVAR_CMDREG:
3997 		*result = cfg->cmdreg;
3998 		break;
3999 	case PCI_IVAR_CACHELNSZ:
4000 		*result = cfg->cachelnsz;
4001 		break;
4002 	case PCI_IVAR_MINGNT:
4003 		*result = cfg->mingnt;
4004 		break;
4005 	case PCI_IVAR_MAXLAT:
4006 		*result = cfg->maxlat;
4007 		break;
4008 	case PCI_IVAR_LATTIMER:
4009 		*result = cfg->lattimer;
4010 		break;
4011 	default:
4012 		return (ENOENT);
4013 	}
4014 	return (0);
4015 }
4016 
4017 int
4018 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4019 {
4020 	struct pci_devinfo *dinfo;
4021 
4022 	dinfo = device_get_ivars(child);
4023 
4024 	switch (which) {
4025 	case PCI_IVAR_INTPIN:
4026 		dinfo->cfg.intpin = value;
4027 		return (0);
4028 	case PCI_IVAR_ETHADDR:
4029 	case PCI_IVAR_SUBVENDOR:
4030 	case PCI_IVAR_SUBDEVICE:
4031 	case PCI_IVAR_VENDOR:
4032 	case PCI_IVAR_DEVICE:
4033 	case PCI_IVAR_DEVID:
4034 	case PCI_IVAR_CLASS:
4035 	case PCI_IVAR_SUBCLASS:
4036 	case PCI_IVAR_PROGIF:
4037 	case PCI_IVAR_REVID:
4038 	case PCI_IVAR_IRQ:
4039 	case PCI_IVAR_DOMAIN:
4040 	case PCI_IVAR_BUS:
4041 	case PCI_IVAR_SLOT:
4042 	case PCI_IVAR_FUNCTION:
4043 		return (EINVAL);	/* disallow for now */
4044 
4045 	default:
4046 		return (ENOENT);
4047 	}
4048 }
4049 
4050 #include "opt_ddb.h"
4051 #ifdef DDB
4052 #include <ddb/ddb.h>
4053 #include <sys/cons.h>
4054 
4055 /*
4056  * List resources based on pci map registers, used for within ddb
4057  */
4058 
4059 DB_SHOW_COMMAND(pciregs, db_pci_dump)
4060 {
4061 	struct pci_devinfo *dinfo;
4062 	struct devlist *devlist_head;
4063 	struct pci_conf *p;
4064 	const char *name;
4065 	int i, error, none_count;
4066 
4067 	none_count = 0;
4068 	/* get the head of the device queue */
4069 	devlist_head = &pci_devq;
4070 
4071 	/*
4072 	 * Go through the list of devices and print out devices
4073 	 */
4074 	for (error = 0, i = 0,
4075 	     dinfo = STAILQ_FIRST(devlist_head);
4076 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4077 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4078 
4079 		/* Populate pd_name and pd_unit */
4080 		name = NULL;
4081 		if (dinfo->cfg.dev)
4082 			name = device_get_name(dinfo->cfg.dev);
4083 
4084 		p = &dinfo->conf;
4085 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4086 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4087 			(name && *name) ? name : "none",
4088 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4089 			none_count++,
4090 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4091 			p->pc_sel.pc_func, (p->pc_class << 16) |
4092 			(p->pc_subclass << 8) | p->pc_progif,
4093 			(p->pc_subdevice << 16) | p->pc_subvendor,
4094 			(p->pc_device << 16) | p->pc_vendor,
4095 			p->pc_revid, p->pc_hdr);
4096 	}
4097 }
4098 #endif /* DDB */
4099 
4100 static struct resource *
4101 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4102     u_long start, u_long end, u_long count, u_int flags)
4103 {
4104 	struct pci_devinfo *dinfo = device_get_ivars(child);
4105 	struct resource_list *rl = &dinfo->resources;
4106 	struct resource_list_entry *rle;
4107 	struct resource *res;
4108 	struct pci_map *pm;
4109 	pci_addr_t map, testval;
4110 	int mapsize;
4111 
4112 	res = NULL;
4113 	pm = pci_find_bar(child, *rid);
4114 	if (pm != NULL) {
4115 		/* This is a BAR that we failed to allocate earlier. */
4116 		mapsize = pm->pm_size;
4117 		map = pm->pm_value;
4118 	} else {
4119 		/*
4120 		 * Weed out the bogons, and figure out how large the
4121 		 * BAR/map is.  BARs that read back 0 here are bogus
4122 		 * and unimplemented.  Note: atapci in legacy mode are
4123 		 * special and handled elsewhere in the code.  If you
4124 		 * have a atapci device in legacy mode and it fails
4125 		 * here, that other code is broken.
4126 		 */
4127 		pci_read_bar(child, *rid, &map, &testval);
4128 
4129 		/*
4130 		 * Determine the size of the BAR and ignore BARs with a size
4131 		 * of 0.  Device ROM BARs use a different mask value.
4132 		 */
4133 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
4134 			mapsize = pci_romsize(testval);
4135 		else
4136 			mapsize = pci_mapsize(testval);
4137 		if (mapsize == 0)
4138 			goto out;
4139 		pm = pci_add_bar(child, *rid, map, mapsize);
4140 	}
4141 
4142 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
4143 		if (type != SYS_RES_MEMORY) {
4144 			if (bootverbose)
4145 				device_printf(dev,
4146 				    "child %s requested type %d for rid %#x,"
4147 				    " but the BAR says it is an memio\n",
4148 				    device_get_nameunit(child), type, *rid);
4149 			goto out;
4150 		}
4151 	} else {
4152 		if (type != SYS_RES_IOPORT) {
4153 			if (bootverbose)
4154 				device_printf(dev,
4155 				    "child %s requested type %d for rid %#x,"
4156 				    " but the BAR says it is an ioport\n",
4157 				    device_get_nameunit(child), type, *rid);
4158 			goto out;
4159 		}
4160 	}
4161 
4162 	/*
4163 	 * For real BARs, we need to override the size that
4164 	 * the driver requests, because that's what the BAR
4165 	 * actually uses and we would otherwise have a
4166 	 * situation where we might allocate the excess to
4167 	 * another driver, which won't work.
4168 	 */
4169 	count = (pci_addr_t)1 << mapsize;
4170 	if (RF_ALIGNMENT(flags) < mapsize)
4171 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
4172 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
4173 		flags |= RF_PREFETCHABLE;
4174 
4175 	/*
4176 	 * Allocate enough resource, and then write back the
4177 	 * appropriate BAR for that resource.
4178 	 */
4179 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
4180 	    start, end, count, flags & ~RF_ACTIVE);
4181 	if (res == NULL) {
4182 		device_printf(child,
4183 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4184 		    count, *rid, type, start, end);
4185 		goto out;
4186 	}
4187 	resource_list_add(rl, type, *rid, start, end, count);
4188 	rle = resource_list_find(rl, type, *rid);
4189 	if (rle == NULL)
4190 		panic("pci_reserve_map: unexpectedly can't find resource.");
4191 	rle->res = res;
4192 	rle->start = rman_get_start(res);
4193 	rle->end = rman_get_end(res);
4194 	rle->count = count;
4195 	rle->flags = RLE_RESERVED;
4196 	if (bootverbose)
4197 		device_printf(child,
4198 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4199 		    count, *rid, type, rman_get_start(res));
4200 	map = rman_get_start(res);
4201 	pci_write_bar(child, pm, map);
4202 out:
4203 	return (res);
4204 }
4205 
4206 struct resource *
4207 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4208 		   u_long start, u_long end, u_long count, u_int flags)
4209 {
4210 	struct pci_devinfo *dinfo = device_get_ivars(child);
4211 	struct resource_list *rl = &dinfo->resources;
4212 	struct resource_list_entry *rle;
4213 	struct resource *res;
4214 	pcicfgregs *cfg = &dinfo->cfg;
4215 
4216 	if (device_get_parent(child) != dev)
4217 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4218 		    type, rid, start, end, count, flags));
4219 
4220 	/*
4221 	 * Perform lazy resource allocation
4222 	 */
4223 	switch (type) {
4224 	case SYS_RES_IRQ:
4225 		/*
4226 		 * Can't alloc legacy interrupt once MSI messages have
4227 		 * been allocated.
4228 		 */
4229 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4230 		    cfg->msix.msix_alloc > 0))
4231 			return (NULL);
4232 
4233 		/*
4234 		 * If the child device doesn't have an interrupt
4235 		 * routed and is deserving of an interrupt, try to
4236 		 * assign it one.
4237 		 */
4238 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4239 		    (cfg->intpin != 0))
4240 			pci_assign_interrupt(dev, child, 0);
4241 		break;
4242 	case SYS_RES_IOPORT:
4243 	case SYS_RES_MEMORY:
4244 #ifdef NEW_PCIB
4245 		/*
4246 		 * PCI-PCI bridge I/O window resources are not BARs.
4247 		 * For those allocations just pass the request up the
4248 		 * tree.
4249 		 */
4250 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4251 			switch (*rid) {
4252 			case PCIR_IOBASEL_1:
4253 			case PCIR_MEMBASE_1:
4254 			case PCIR_PMBASEL_1:
4255 				/*
4256 				 * XXX: Should we bother creating a resource
4257 				 * list entry?
4258 				 */
4259 				return (bus_generic_alloc_resource(dev, child,
4260 				    type, rid, start, end, count, flags));
4261 			}
4262 		}
4263 #endif
4264 		/* Reserve resources for this BAR if needed. */
4265 		rle = resource_list_find(rl, type, *rid);
4266 		if (rle == NULL) {
4267 			res = pci_reserve_map(dev, child, type, rid, start, end,
4268 			    count, flags);
4269 			if (res == NULL)
4270 				return (NULL);
4271 		}
4272 	}
4273 	return (resource_list_alloc(rl, dev, child, type, rid,
4274 	    start, end, count, flags));
4275 }
4276 
4277 int
4278 pci_activate_resource(device_t dev, device_t child, int type, int rid,
4279     struct resource *r)
4280 {
4281 	struct pci_devinfo *dinfo;
4282 	int error;
4283 
4284 	error = bus_generic_activate_resource(dev, child, type, rid, r);
4285 	if (error)
4286 		return (error);
4287 
4288 	/* Enable decoding in the command register when activating BARs. */
4289 	if (device_get_parent(child) == dev) {
4290 		/* Device ROMs need their decoding explicitly enabled. */
4291 		dinfo = device_get_ivars(child);
4292 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4293 			pci_write_bar(child, pci_find_bar(child, rid),
4294 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4295 		switch (type) {
4296 		case SYS_RES_IOPORT:
4297 		case SYS_RES_MEMORY:
4298 			error = PCI_ENABLE_IO(dev, child, type);
4299 			break;
4300 		}
4301 	}
4302 	return (error);
4303 }
4304 
4305 int
4306 pci_deactivate_resource(device_t dev, device_t child, int type,
4307     int rid, struct resource *r)
4308 {
4309 	struct pci_devinfo *dinfo;
4310 	int error;
4311 
4312 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4313 	if (error)
4314 		return (error);
4315 
4316 	/* Disable decoding for device ROMs. */
4317 	if (device_get_parent(child) == dev) {
4318 		dinfo = device_get_ivars(child);
4319 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4320 			pci_write_bar(child, pci_find_bar(child, rid),
4321 			    rman_get_start(r));
4322 	}
4323 	return (0);
4324 }
4325 
4326 void
4327 pci_delete_child(device_t dev, device_t child)
4328 {
4329 	struct resource_list_entry *rle;
4330 	struct resource_list *rl;
4331 	struct pci_devinfo *dinfo;
4332 
4333 	dinfo = device_get_ivars(child);
4334 	rl = &dinfo->resources;
4335 
4336 	if (device_is_attached(child))
4337 		device_detach(child);
4338 
4339 	/* Turn off access to resources we're about to free */
4340 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4341 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4342 
4343 	/* Free all allocated resources */
4344 	STAILQ_FOREACH(rle, rl, link) {
4345 		if (rle->res) {
4346 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4347 			    resource_list_busy(rl, rle->type, rle->rid)) {
4348 				pci_printf(&dinfo->cfg,
4349 				    "Resource still owned, oops. "
4350 				    "(type=%d, rid=%d, addr=%lx)\n",
4351 				    rle->type, rle->rid,
4352 				    rman_get_start(rle->res));
4353 				bus_release_resource(child, rle->type, rle->rid,
4354 				    rle->res);
4355 			}
4356 			resource_list_unreserve(rl, dev, child, rle->type,
4357 			    rle->rid);
4358 		}
4359 	}
4360 	resource_list_free(rl);
4361 
4362 	device_delete_child(dev, child);
4363 	pci_freecfg(dinfo);
4364 }
4365 
4366 void
4367 pci_delete_resource(device_t dev, device_t child, int type, int rid)
4368 {
4369 	struct pci_devinfo *dinfo;
4370 	struct resource_list *rl;
4371 	struct resource_list_entry *rle;
4372 
4373 	if (device_get_parent(child) != dev)
4374 		return;
4375 
4376 	dinfo = device_get_ivars(child);
4377 	rl = &dinfo->resources;
4378 	rle = resource_list_find(rl, type, rid);
4379 	if (rle == NULL)
4380 		return;
4381 
4382 	if (rle->res) {
4383 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4384 		    resource_list_busy(rl, type, rid)) {
4385 			device_printf(dev, "delete_resource: "
4386 			    "Resource still owned by child, oops. "
4387 			    "(type=%d, rid=%d, addr=%lx)\n",
4388 			    type, rid, rman_get_start(rle->res));
4389 			return;
4390 		}
4391 		resource_list_unreserve(rl, dev, child, type, rid);
4392 	}
4393 	resource_list_delete(rl, type, rid);
4394 }
4395 
4396 struct resource_list *
4397 pci_get_resource_list (device_t dev, device_t child)
4398 {
4399 	struct pci_devinfo *dinfo = device_get_ivars(child);
4400 
4401 	return (&dinfo->resources);
4402 }
4403 
4404 bus_dma_tag_t
4405 pci_get_dma_tag(device_t bus, device_t dev)
4406 {
4407 	struct pci_softc *sc = device_get_softc(bus);
4408 
4409 	return (sc->sc_dma_tag);
4410 }
4411 
4412 uint32_t
4413 pci_read_config_method(device_t dev, device_t child, int reg, int width)
4414 {
4415 	struct pci_devinfo *dinfo = device_get_ivars(child);
4416 	pcicfgregs *cfg = &dinfo->cfg;
4417 
4418 	return (PCIB_READ_CONFIG(device_get_parent(dev),
4419 	    cfg->bus, cfg->slot, cfg->func, reg, width));
4420 }
4421 
4422 void
4423 pci_write_config_method(device_t dev, device_t child, int reg,
4424     uint32_t val, int width)
4425 {
4426 	struct pci_devinfo *dinfo = device_get_ivars(child);
4427 	pcicfgregs *cfg = &dinfo->cfg;
4428 
4429 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4430 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4431 }
4432 
4433 int
4434 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4435     size_t buflen)
4436 {
4437 
4438 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4439 	    pci_get_function(child));
4440 	return (0);
4441 }
4442 
4443 int
4444 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4445     size_t buflen)
4446 {
4447 	struct pci_devinfo *dinfo;
4448 	pcicfgregs *cfg;
4449 
4450 	dinfo = device_get_ivars(child);
4451 	cfg = &dinfo->cfg;
4452 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4453 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4454 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4455 	    cfg->progif);
4456 	return (0);
4457 }
4458 
4459 int
4460 pci_assign_interrupt_method(device_t dev, device_t child)
4461 {
4462 	struct pci_devinfo *dinfo = device_get_ivars(child);
4463 	pcicfgregs *cfg = &dinfo->cfg;
4464 
4465 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4466 	    cfg->intpin));
4467 }
4468 
4469 static int
4470 pci_modevent(module_t mod, int what, void *arg)
4471 {
4472 	static struct cdev *pci_cdev;
4473 
4474 	switch (what) {
4475 	case MOD_LOAD:
4476 		STAILQ_INIT(&pci_devq);
4477 		pci_generation = 0;
4478 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4479 		    "pci");
4480 		pci_load_vendor_data();
4481 		break;
4482 
4483 	case MOD_UNLOAD:
4484 		destroy_dev(pci_cdev);
4485 		break;
4486 	}
4487 
4488 	return (0);
4489 }
4490 
4491 static void
4492 pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
4493 {
4494 #define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
4495 	struct pcicfg_pcie *cfg;
4496 	int version, pos;
4497 
4498 	cfg = &dinfo->cfg.pcie;
4499 	pos = cfg->pcie_location;
4500 
4501 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4502 
4503 	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
4504 
4505 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4506 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4507 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4508 		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
4509 
4510 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4511 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
4512 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
4513 		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
4514 
4515 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4516 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
4517 		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
4518 
4519 	if (version > 1) {
4520 		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
4521 		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
4522 		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
4523 	}
4524 #undef WREG
4525 }
4526 
4527 static void
4528 pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
4529 {
4530 	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
4531 	    dinfo->cfg.pcix.pcix_command,  2);
4532 }
4533 
4534 void
4535 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4536 {
4537 
4538 	/*
4539 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4540 	 * which we know need special treatment.  Type 2 devices are
4541 	 * cardbus bridges which also require special treatment.
4542 	 * Other types are unknown, and we err on the side of safety
4543 	 * by ignoring them.
4544 	 */
4545 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4546 		return;
4547 
4548 	/*
4549 	 * Restore the device to full power mode.  We must do this
4550 	 * before we restore the registers because moving from D3 to
4551 	 * D0 will cause the chip's BARs and some other registers to
4552 	 * be reset to some unknown power on reset values.  Cut down
4553 	 * the noise on boot by doing nothing if we are already in
4554 	 * state D0.
4555 	 */
4556 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4557 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4558 	pci_restore_bars(dev);
4559 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4560 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4561 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4562 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4563 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4564 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4565 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4566 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4567 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4568 
4569 	/*
4570 	 * Restore extended capabilities for PCI-Express and PCI-X
4571 	 */
4572 	if (dinfo->cfg.pcie.pcie_location != 0)
4573 		pci_cfg_restore_pcie(dev, dinfo);
4574 	if (dinfo->cfg.pcix.pcix_location != 0)
4575 		pci_cfg_restore_pcix(dev, dinfo);
4576 
4577 	/* Restore MSI and MSI-X configurations if they are present. */
4578 	if (dinfo->cfg.msi.msi_location != 0)
4579 		pci_resume_msi(dev);
4580 	if (dinfo->cfg.msix.msix_location != 0)
4581 		pci_resume_msix(dev);
4582 }
4583 
4584 static void
4585 pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
4586 {
4587 #define	RREG(n)	pci_read_config(dev, pos + (n), 2)
4588 	struct pcicfg_pcie *cfg;
4589 	int version, pos;
4590 
4591 	cfg = &dinfo->cfg.pcie;
4592 	pos = cfg->pcie_location;
4593 
4594 	cfg->pcie_flags = RREG(PCIER_FLAGS);
4595 
4596 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4597 
4598 	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
4599 
4600 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4601 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4602 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4603 		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
4604 
4605 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4606 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
4607 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
4608 		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
4609 
4610 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4611 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
4612 		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
4613 
4614 	if (version > 1) {
4615 		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
4616 		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
4617 		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
4618 	}
4619 #undef RREG
4620 }
4621 
4622 static void
4623 pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
4624 {
4625 	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
4626 	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
4627 }
4628 
4629 void
4630 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4631 {
4632 	uint32_t cls;
4633 	int ps;
4634 
4635 	/*
4636 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4637 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4638 	 * which also require special treatment.  Other types are unknown, and
4639 	 * we err on the side of safety by ignoring them.  Powering down
4640 	 * bridges should not be undertaken lightly.
4641 	 */
4642 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4643 		return;
4644 
4645 	/*
4646 	 * Some drivers apparently write to these registers w/o updating our
4647 	 * cached copy.  No harm happens if we update the copy, so do so here
4648 	 * so we can restore them.  The COMMAND register is modified by the
4649 	 * bus w/o updating the cache.  This should represent the normally
4650 	 * writable portion of the 'defined' part of type 0 headers.  In
4651 	 * theory we also need to save/restore the PCI capability structures
4652 	 * we know about, but apart from power we don't know any that are
4653 	 * writable.
4654 	 */
4655 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4656 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4657 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4658 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4659 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4660 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4661 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4662 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4663 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4664 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4665 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4666 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4667 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4668 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4669 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4670 
4671 	if (dinfo->cfg.pcie.pcie_location != 0)
4672 		pci_cfg_save_pcie(dev, dinfo);
4673 
4674 	if (dinfo->cfg.pcix.pcix_location != 0)
4675 		pci_cfg_save_pcix(dev, dinfo);
4676 
4677 	/*
4678 	 * don't set the state for display devices, base peripherals and
4679 	 * memory devices since bad things happen when they are powered down.
4680 	 * We should (a) have drivers that can easily detach and (b) use
4681 	 * generic drivers for these devices so that some device actually
4682 	 * attaches.  We need to make sure that when we implement (a) we don't
4683 	 * power the device down on a reattach.
4684 	 */
4685 	cls = pci_get_class(dev);
4686 	if (!setstate)
4687 		return;
4688 	switch (pci_do_power_nodriver)
4689 	{
4690 		case 0:		/* NO powerdown at all */
4691 			return;
4692 		case 1:		/* Conservative about what to power down */
4693 			if (cls == PCIC_STORAGE)
4694 				return;
4695 			/*FALLTHROUGH*/
4696 		case 2:		/* Agressive about what to power down */
4697 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4698 			    cls == PCIC_BASEPERIPH)
4699 				return;
4700 			/*FALLTHROUGH*/
4701 		case 3:		/* Power down everything */
4702 			break;
4703 	}
4704 	/*
4705 	 * PCI spec says we can only go into D3 state from D0 state.
4706 	 * Transition from D[12] into D0 before going to D3 state.
4707 	 */
4708 	ps = pci_get_powerstate(dev);
4709 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4710 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4711 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4712 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4713 }
4714 
4715 /* Wrapper APIs suitable for device driver use. */
4716 void
4717 pci_save_state(device_t dev)
4718 {
4719 	struct pci_devinfo *dinfo;
4720 
4721 	dinfo = device_get_ivars(dev);
4722 	pci_cfg_save(dev, dinfo, 0);
4723 }
4724 
4725 void
4726 pci_restore_state(device_t dev)
4727 {
4728 	struct pci_devinfo *dinfo;
4729 
4730 	dinfo = device_get_ivars(dev);
4731 	pci_cfg_restore(dev, dinfo);
4732 }
4733