xref: /freebsd/sys/dev/pci/pci.c (revision 646a7fea0c8a60ce2795ffc1bdf58e0fd0f7d624)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 
56 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57 #include <machine/intr_machdep.h>
58 #endif
59 
60 #include <sys/pciio.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/pci/pci_private.h>
64 
65 #include <dev/usb/controller/xhcireg.h>
66 #include <dev/usb/controller/ehcireg.h>
67 #include <dev/usb/controller/ohcireg.h>
68 #include <dev/usb/controller/uhcireg.h>
69 
70 #include "pcib_if.h"
71 #include "pci_if.h"
72 
73 #if (BUS_SPACE_MAXADDR > 0xFFFFFFFF)
74 #define	PCI_DMA_BOUNDARY	0x100000000
75 #endif
76 
77 #define	PCIR_IS_BIOS(cfg, reg)						\
78 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
79 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
80 
81 static pci_addr_t	pci_mapbase(uint64_t mapreg);
82 static const char	*pci_maptype(uint64_t mapreg);
83 static int		pci_mapsize(uint64_t testval);
84 static int		pci_maprange(uint64_t mapreg);
85 static pci_addr_t	pci_rombase(uint64_t mapreg);
86 static int		pci_romsize(uint64_t testval);
87 static void		pci_fixancient(pcicfgregs *cfg);
88 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
89 
90 static int		pci_porten(device_t dev);
91 static int		pci_memen(device_t dev);
92 static void		pci_assign_interrupt(device_t bus, device_t dev,
93 			    int force_route);
94 static int		pci_add_map(device_t bus, device_t dev, int reg,
95 			    struct resource_list *rl, int force, int prefetch);
96 static int		pci_probe(device_t dev);
97 static int		pci_attach(device_t dev);
98 static void		pci_load_vendor_data(void);
99 static int		pci_describe_parse_line(char **ptr, int *vendor,
100 			    int *device, char **desc);
101 static char		*pci_describe_device(device_t dev);
102 static bus_dma_tag_t	pci_get_dma_tag(device_t bus, device_t dev);
103 static int		pci_modevent(module_t mod, int what, void *arg);
104 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
105 			    pcicfgregs *cfg);
106 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
107 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
108 			    int reg, uint32_t *data);
109 #if 0
110 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
111 			    int reg, uint32_t data);
112 #endif
113 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
114 static void		pci_disable_msi(device_t dev);
115 static void		pci_enable_msi(device_t dev, uint64_t address,
116 			    uint16_t data);
117 static void		pci_enable_msix(device_t dev, u_int index,
118 			    uint64_t address, uint32_t data);
119 static void		pci_mask_msix(device_t dev, u_int index);
120 static void		pci_unmask_msix(device_t dev, u_int index);
121 static int		pci_msi_blacklisted(void);
122 static void		pci_resume_msi(device_t dev);
123 static void		pci_resume_msix(device_t dev);
124 static int		pci_remap_intr_method(device_t bus, device_t dev,
125 			    u_int irq);
126 
127 static device_method_t pci_methods[] = {
128 	/* Device interface */
129 	DEVMETHOD(device_probe,		pci_probe),
130 	DEVMETHOD(device_attach,	pci_attach),
131 	DEVMETHOD(device_detach,	bus_generic_detach),
132 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
133 	DEVMETHOD(device_suspend,	pci_suspend),
134 	DEVMETHOD(device_resume,	pci_resume),
135 
136 	/* Bus interface */
137 	DEVMETHOD(bus_print_child,	pci_print_child),
138 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
139 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
140 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
141 	DEVMETHOD(bus_driver_added,	pci_driver_added),
142 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
143 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
144 
145 	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
146 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
147 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
148 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
149 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
150 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
151 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
152 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
153 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
154 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
155 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
156 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
157 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
158 
159 	/* PCI interface */
160 	DEVMETHOD(pci_read_config,	pci_read_config_method),
161 	DEVMETHOD(pci_write_config,	pci_write_config_method),
162 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
163 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
164 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
165 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
166 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
167 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
168 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
169 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
170 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
171 	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
172 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
173 	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
174 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
175 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
176 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
177 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
178 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
179 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
180 
181 	DEVMETHOD_END
182 };
183 
184 DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
185 
186 static devclass_t pci_devclass;
187 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
188 MODULE_VERSION(pci, 1);
189 
190 static char	*pci_vendordata;
191 static size_t	pci_vendordata_size;
192 
193 struct pci_quirk {
194 	uint32_t devid;	/* Vendor/device of the card */
195 	int	type;
196 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
197 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
198 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
199 #define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
200 	int	arg1;
201 	int	arg2;
202 };
203 
204 static const struct pci_quirk pci_quirks[] = {
205 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
206 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
207 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
208 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
209 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
210 
211 	/*
212 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
213 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
214 	 */
215 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
216 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217 
218 	/*
219 	 * MSI doesn't work on earlier Intel chipsets including
220 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
221 	 */
222 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
223 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
224 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
225 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
226 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
227 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
228 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
229 
230 	/*
231 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
232 	 * bridge.
233 	 */
234 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
235 
236 	/*
237 	 * MSI-X doesn't work with at least LSI SAS1068E passed through by
238 	 * VMware.
239 	 */
240 	{ 0x079015ad, PCI_QUIRK_DISABLE_MSI,	0,	0 },
241 
242 	/*
243 	 * Some virtualization environments emulate an older chipset
244 	 * but support MSI just fine.  QEMU uses the Intel 82440.
245 	 */
246 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
247 	{ 0x12751275, PCI_QUIRK_ENABLE_MSI_VM,	0, 	0 },	/* bhyve */
248 
249 	/*
250 	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
251 	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
252 	 * It prevents us from attaching hpet(4) when the bit is unset.
253 	 * Note this quirk only affects SB600 revision A13 and earlier.
254 	 * For SB600 A21 and later, firmware must set the bit to hide it.
255 	 * For SB700 and later, it is unused and hardcoded to zero.
256 	 */
257 	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
258 
259 	{ 0 }
260 };
261 
262 /* map register information */
263 #define	PCI_MAPMEM	0x01	/* memory map */
264 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
265 #define	PCI_MAPPORT	0x04	/* port map */
266 
267 struct devlist pci_devq;
268 uint32_t pci_generation;
269 uint32_t pci_numdevs = 0;
270 static int pcie_chipset, pcix_chipset;
271 
272 /* sysctl vars */
273 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
274 
275 static int pci_enable_io_modes = 1;
276 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
277 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
278     &pci_enable_io_modes, 1,
279     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
280 enable these bits correctly.  We'd like to do this all the time, but there\n\
281 are some peripherals that this causes problems with.");
282 
283 static int pci_do_power_nodriver = 0;
284 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
285 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
286     &pci_do_power_nodriver, 0,
287   "Place a function into D3 state when no driver attaches to it.  0 means\n\
288 disable.  1 means conservatively place devices into D3 state.  2 means\n\
289 agressively place devices into D3 state.  3 means put absolutely everything\n\
290 in D3 state.");
291 
292 int pci_do_power_resume = 1;
293 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
294 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
295     &pci_do_power_resume, 1,
296   "Transition from D3 -> D0 on resume.");
297 
298 int pci_do_power_suspend = 1;
299 TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
300 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
301     &pci_do_power_suspend, 1,
302   "Transition from D0 -> D3 on suspend.");
303 
304 static int pci_do_msi = 1;
305 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
306 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
307     "Enable support for MSI interrupts");
308 
309 static int pci_do_msix = 1;
310 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
311 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
312     "Enable support for MSI-X interrupts");
313 
314 static int pci_honor_msi_blacklist = 1;
315 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
316 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
317     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
318 
319 #if defined(__i386__) || defined(__amd64__)
320 static int pci_usb_takeover = 1;
321 #else
322 static int pci_usb_takeover = 0;
323 #endif
324 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
325 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
326     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
327 Disable this if you depend on BIOS emulation of USB devices, that is\n\
328 you use USB devices (like keyboard or mouse) but do not load USB drivers");
329 
330 /* Find a device_t by bus/slot/function in domain 0 */
331 
332 device_t
333 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
334 {
335 
336 	return (pci_find_dbsf(0, bus, slot, func));
337 }
338 
339 /* Find a device_t by domain/bus/slot/function */
340 
341 device_t
342 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
343 {
344 	struct pci_devinfo *dinfo;
345 
346 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
347 		if ((dinfo->cfg.domain == domain) &&
348 		    (dinfo->cfg.bus == bus) &&
349 		    (dinfo->cfg.slot == slot) &&
350 		    (dinfo->cfg.func == func)) {
351 			return (dinfo->cfg.dev);
352 		}
353 	}
354 
355 	return (NULL);
356 }
357 
358 /* Find a device_t by vendor/device ID */
359 
360 device_t
361 pci_find_device(uint16_t vendor, uint16_t device)
362 {
363 	struct pci_devinfo *dinfo;
364 
365 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
366 		if ((dinfo->cfg.vendor == vendor) &&
367 		    (dinfo->cfg.device == device)) {
368 			return (dinfo->cfg.dev);
369 		}
370 	}
371 
372 	return (NULL);
373 }
374 
375 device_t
376 pci_find_class(uint8_t class, uint8_t subclass)
377 {
378 	struct pci_devinfo *dinfo;
379 
380 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
381 		if (dinfo->cfg.baseclass == class &&
382 		    dinfo->cfg.subclass == subclass) {
383 			return (dinfo->cfg.dev);
384 		}
385 	}
386 
387 	return (NULL);
388 }
389 
390 static int
391 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
392 {
393 	va_list ap;
394 	int retval;
395 
396 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
397 	    cfg->func);
398 	va_start(ap, fmt);
399 	retval += vprintf(fmt, ap);
400 	va_end(ap);
401 	return (retval);
402 }
403 
404 /* return base address of memory or port map */
405 
406 static pci_addr_t
407 pci_mapbase(uint64_t mapreg)
408 {
409 
410 	if (PCI_BAR_MEM(mapreg))
411 		return (mapreg & PCIM_BAR_MEM_BASE);
412 	else
413 		return (mapreg & PCIM_BAR_IO_BASE);
414 }
415 
416 /* return map type of memory or port map */
417 
418 static const char *
419 pci_maptype(uint64_t mapreg)
420 {
421 
422 	if (PCI_BAR_IO(mapreg))
423 		return ("I/O Port");
424 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
425 		return ("Prefetchable Memory");
426 	return ("Memory");
427 }
428 
429 /* return log2 of map size decoded for memory or port map */
430 
431 static int
432 pci_mapsize(uint64_t testval)
433 {
434 	int ln2size;
435 
436 	testval = pci_mapbase(testval);
437 	ln2size = 0;
438 	if (testval != 0) {
439 		while ((testval & 1) == 0)
440 		{
441 			ln2size++;
442 			testval >>= 1;
443 		}
444 	}
445 	return (ln2size);
446 }
447 
448 /* return base address of device ROM */
449 
450 static pci_addr_t
451 pci_rombase(uint64_t mapreg)
452 {
453 
454 	return (mapreg & PCIM_BIOS_ADDR_MASK);
455 }
456 
457 /* return log2 of map size decided for device ROM */
458 
459 static int
460 pci_romsize(uint64_t testval)
461 {
462 	int ln2size;
463 
464 	testval = pci_rombase(testval);
465 	ln2size = 0;
466 	if (testval != 0) {
467 		while ((testval & 1) == 0)
468 		{
469 			ln2size++;
470 			testval >>= 1;
471 		}
472 	}
473 	return (ln2size);
474 }
475 
476 /* return log2 of address range supported by map register */
477 
478 static int
479 pci_maprange(uint64_t mapreg)
480 {
481 	int ln2range = 0;
482 
483 	if (PCI_BAR_IO(mapreg))
484 		ln2range = 32;
485 	else
486 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
487 		case PCIM_BAR_MEM_32:
488 			ln2range = 32;
489 			break;
490 		case PCIM_BAR_MEM_1MB:
491 			ln2range = 20;
492 			break;
493 		case PCIM_BAR_MEM_64:
494 			ln2range = 64;
495 			break;
496 		}
497 	return (ln2range);
498 }
499 
500 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
501 
502 static void
503 pci_fixancient(pcicfgregs *cfg)
504 {
505 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
506 		return;
507 
508 	/* PCI to PCI bridges use header type 1 */
509 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
510 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
511 }
512 
513 /* extract header type specific config data */
514 
515 static void
516 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
517 {
518 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
519 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
520 	case PCIM_HDRTYPE_NORMAL:
521 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
522 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
523 		cfg->nummaps	    = PCI_MAXMAPS_0;
524 		break;
525 	case PCIM_HDRTYPE_BRIDGE:
526 		cfg->nummaps	    = PCI_MAXMAPS_1;
527 		break;
528 	case PCIM_HDRTYPE_CARDBUS:
529 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
530 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
531 		cfg->nummaps	    = PCI_MAXMAPS_2;
532 		break;
533 	}
534 #undef REG
535 }
536 
537 /* read configuration header into pcicfgregs structure */
538 struct pci_devinfo *
539 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
540 {
541 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
542 	pcicfgregs *cfg = NULL;
543 	struct pci_devinfo *devlist_entry;
544 	struct devlist *devlist_head;
545 
546 	devlist_head = &pci_devq;
547 
548 	devlist_entry = NULL;
549 
550 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
551 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
552 		if (devlist_entry == NULL)
553 			return (NULL);
554 
555 		cfg = &devlist_entry->cfg;
556 
557 		cfg->domain		= d;
558 		cfg->bus		= b;
559 		cfg->slot		= s;
560 		cfg->func		= f;
561 		cfg->vendor		= REG(PCIR_VENDOR, 2);
562 		cfg->device		= REG(PCIR_DEVICE, 2);
563 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
564 		cfg->statreg		= REG(PCIR_STATUS, 2);
565 		cfg->baseclass		= REG(PCIR_CLASS, 1);
566 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
567 		cfg->progif		= REG(PCIR_PROGIF, 1);
568 		cfg->revid		= REG(PCIR_REVID, 1);
569 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
570 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
571 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
572 		cfg->intpin		= REG(PCIR_INTPIN, 1);
573 		cfg->intline		= REG(PCIR_INTLINE, 1);
574 
575 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
576 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
577 
578 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
579 		cfg->hdrtype		&= ~PCIM_MFDEV;
580 		STAILQ_INIT(&cfg->maps);
581 
582 		pci_fixancient(cfg);
583 		pci_hdrtypedata(pcib, b, s, f, cfg);
584 
585 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
586 			pci_read_cap(pcib, cfg);
587 
588 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
589 
590 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
591 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
592 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
593 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
594 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
595 
596 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
597 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
598 		devlist_entry->conf.pc_vendor = cfg->vendor;
599 		devlist_entry->conf.pc_device = cfg->device;
600 
601 		devlist_entry->conf.pc_class = cfg->baseclass;
602 		devlist_entry->conf.pc_subclass = cfg->subclass;
603 		devlist_entry->conf.pc_progif = cfg->progif;
604 		devlist_entry->conf.pc_revid = cfg->revid;
605 
606 		pci_numdevs++;
607 		pci_generation++;
608 	}
609 	return (devlist_entry);
610 #undef REG
611 }
612 
613 static void
614 pci_read_cap(device_t pcib, pcicfgregs *cfg)
615 {
616 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
617 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
618 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
619 	uint64_t addr;
620 #endif
621 	uint32_t val;
622 	int	ptr, nextptr, ptrptr;
623 
624 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
625 	case PCIM_HDRTYPE_NORMAL:
626 	case PCIM_HDRTYPE_BRIDGE:
627 		ptrptr = PCIR_CAP_PTR;
628 		break;
629 	case PCIM_HDRTYPE_CARDBUS:
630 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
631 		break;
632 	default:
633 		return;		/* no extended capabilities support */
634 	}
635 	nextptr = REG(ptrptr, 1);	/* sanity check? */
636 
637 	/*
638 	 * Read capability entries.
639 	 */
640 	while (nextptr != 0) {
641 		/* Sanity check */
642 		if (nextptr > 255) {
643 			printf("illegal PCI extended capability offset %d\n",
644 			    nextptr);
645 			return;
646 		}
647 		/* Find the next entry */
648 		ptr = nextptr;
649 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
650 
651 		/* Process this entry */
652 		switch (REG(ptr + PCICAP_ID, 1)) {
653 		case PCIY_PMG:		/* PCI power management */
654 			if (cfg->pp.pp_cap == 0) {
655 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
656 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
657 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
658 				if ((nextptr - ptr) > PCIR_POWER_DATA)
659 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
660 			}
661 			break;
662 		case PCIY_HT:		/* HyperTransport */
663 			/* Determine HT-specific capability type. */
664 			val = REG(ptr + PCIR_HT_COMMAND, 2);
665 
666 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
667 				cfg->ht.ht_slave = ptr;
668 
669 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
670 			switch (val & PCIM_HTCMD_CAP_MASK) {
671 			case PCIM_HTCAP_MSI_MAPPING:
672 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
673 					/* Sanity check the mapping window. */
674 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
675 					    4);
676 					addr <<= 32;
677 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
678 					    4);
679 					if (addr != MSI_INTEL_ADDR_BASE)
680 						device_printf(pcib,
681 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
682 						    cfg->domain, cfg->bus,
683 						    cfg->slot, cfg->func,
684 						    (long long)addr);
685 				} else
686 					addr = MSI_INTEL_ADDR_BASE;
687 
688 				cfg->ht.ht_msimap = ptr;
689 				cfg->ht.ht_msictrl = val;
690 				cfg->ht.ht_msiaddr = addr;
691 				break;
692 			}
693 #endif
694 			break;
695 		case PCIY_MSI:		/* PCI MSI */
696 			cfg->msi.msi_location = ptr;
697 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
698 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
699 						     PCIM_MSICTRL_MMC_MASK)>>1);
700 			break;
701 		case PCIY_MSIX:		/* PCI MSI-X */
702 			cfg->msix.msix_location = ptr;
703 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
704 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
705 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
706 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
707 			cfg->msix.msix_table_bar = PCIR_BAR(val &
708 			    PCIM_MSIX_BIR_MASK);
709 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
710 			val = REG(ptr + PCIR_MSIX_PBA, 4);
711 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
712 			    PCIM_MSIX_BIR_MASK);
713 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
714 			break;
715 		case PCIY_VPD:		/* PCI Vital Product Data */
716 			cfg->vpd.vpd_reg = ptr;
717 			break;
718 		case PCIY_SUBVENDOR:
719 			/* Should always be true. */
720 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
721 			    PCIM_HDRTYPE_BRIDGE) {
722 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
723 				cfg->subvendor = val & 0xffff;
724 				cfg->subdevice = val >> 16;
725 			}
726 			break;
727 		case PCIY_PCIX:		/* PCI-X */
728 			/*
729 			 * Assume we have a PCI-X chipset if we have
730 			 * at least one PCI-PCI bridge with a PCI-X
731 			 * capability.  Note that some systems with
732 			 * PCI-express or HT chipsets might match on
733 			 * this check as well.
734 			 */
735 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
736 			    PCIM_HDRTYPE_BRIDGE)
737 				pcix_chipset = 1;
738 			cfg->pcix.pcix_location = ptr;
739 			break;
740 		case PCIY_EXPRESS:	/* PCI-express */
741 			/*
742 			 * Assume we have a PCI-express chipset if we have
743 			 * at least one PCI-express device.
744 			 */
745 			pcie_chipset = 1;
746 			cfg->pcie.pcie_location = ptr;
747 			val = REG(ptr + PCIER_FLAGS, 2);
748 			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
749 			break;
750 		default:
751 			break;
752 		}
753 	}
754 
755 #if defined(__powerpc__)
756 	/*
757 	 * Enable the MSI mapping window for all HyperTransport
758 	 * slaves.  PCI-PCI bridges have their windows enabled via
759 	 * PCIB_MAP_MSI().
760 	 */
761 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
762 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
763 		device_printf(pcib,
764 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
765 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
766 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
767 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
768 		     2);
769 	}
770 #endif
771 /* REG and WREG use carry through to next functions */
772 }
773 
774 /*
775  * PCI Vital Product Data
776  */
777 
778 #define	PCI_VPD_TIMEOUT		1000000
779 
780 static int
781 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
782 {
783 	int count = PCI_VPD_TIMEOUT;
784 
785 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
786 
787 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
788 
789 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
790 		if (--count < 0)
791 			return (ENXIO);
792 		DELAY(1);	/* limit looping */
793 	}
794 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
795 
796 	return (0);
797 }
798 
799 #if 0
800 static int
801 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
802 {
803 	int count = PCI_VPD_TIMEOUT;
804 
805 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
806 
807 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
808 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
809 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
810 		if (--count < 0)
811 			return (ENXIO);
812 		DELAY(1);	/* limit looping */
813 	}
814 
815 	return (0);
816 }
817 #endif
818 
819 #undef PCI_VPD_TIMEOUT
820 
821 struct vpd_readstate {
822 	device_t	pcib;
823 	pcicfgregs	*cfg;
824 	uint32_t	val;
825 	int		bytesinval;
826 	int		off;
827 	uint8_t		cksum;
828 };
829 
830 static int
831 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
832 {
833 	uint32_t reg;
834 	uint8_t byte;
835 
836 	if (vrs->bytesinval == 0) {
837 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
838 			return (ENXIO);
839 		vrs->val = le32toh(reg);
840 		vrs->off += 4;
841 		byte = vrs->val & 0xff;
842 		vrs->bytesinval = 3;
843 	} else {
844 		vrs->val = vrs->val >> 8;
845 		byte = vrs->val & 0xff;
846 		vrs->bytesinval--;
847 	}
848 
849 	vrs->cksum += byte;
850 	*data = byte;
851 	return (0);
852 }
853 
854 static void
855 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
856 {
857 	struct vpd_readstate vrs;
858 	int state;
859 	int name;
860 	int remain;
861 	int i;
862 	int alloc, off;		/* alloc/off for RO/W arrays */
863 	int cksumvalid;
864 	int dflen;
865 	uint8_t byte;
866 	uint8_t byte2;
867 
868 	/* init vpd reader */
869 	vrs.bytesinval = 0;
870 	vrs.off = 0;
871 	vrs.pcib = pcib;
872 	vrs.cfg = cfg;
873 	vrs.cksum = 0;
874 
875 	state = 0;
876 	name = remain = i = 0;	/* shut up stupid gcc */
877 	alloc = off = 0;	/* shut up stupid gcc */
878 	dflen = 0;		/* shut up stupid gcc */
879 	cksumvalid = -1;
880 	while (state >= 0) {
881 		if (vpd_nextbyte(&vrs, &byte)) {
882 			state = -2;
883 			break;
884 		}
885 #if 0
886 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
887 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
888 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
889 #endif
890 		switch (state) {
891 		case 0:		/* item name */
892 			if (byte & 0x80) {
893 				if (vpd_nextbyte(&vrs, &byte2)) {
894 					state = -2;
895 					break;
896 				}
897 				remain = byte2;
898 				if (vpd_nextbyte(&vrs, &byte2)) {
899 					state = -2;
900 					break;
901 				}
902 				remain |= byte2 << 8;
903 				if (remain > (0x7f*4 - vrs.off)) {
904 					state = -1;
905 					pci_printf(cfg,
906 					    "invalid VPD data, remain %#x\n",
907 					    remain);
908 				}
909 				name = byte & 0x7f;
910 			} else {
911 				remain = byte & 0x7;
912 				name = (byte >> 3) & 0xf;
913 			}
914 			switch (name) {
915 			case 0x2:	/* String */
916 				cfg->vpd.vpd_ident = malloc(remain + 1,
917 				    M_DEVBUF, M_WAITOK);
918 				i = 0;
919 				state = 1;
920 				break;
921 			case 0xf:	/* End */
922 				state = -1;
923 				break;
924 			case 0x10:	/* VPD-R */
925 				alloc = 8;
926 				off = 0;
927 				cfg->vpd.vpd_ros = malloc(alloc *
928 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
929 				    M_WAITOK | M_ZERO);
930 				state = 2;
931 				break;
932 			case 0x11:	/* VPD-W */
933 				alloc = 8;
934 				off = 0;
935 				cfg->vpd.vpd_w = malloc(alloc *
936 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
937 				    M_WAITOK | M_ZERO);
938 				state = 5;
939 				break;
940 			default:	/* Invalid data, abort */
941 				state = -1;
942 				break;
943 			}
944 			break;
945 
946 		case 1:	/* Identifier String */
947 			cfg->vpd.vpd_ident[i++] = byte;
948 			remain--;
949 			if (remain == 0)  {
950 				cfg->vpd.vpd_ident[i] = '\0';
951 				state = 0;
952 			}
953 			break;
954 
955 		case 2:	/* VPD-R Keyword Header */
956 			if (off == alloc) {
957 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
958 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
959 				    M_DEVBUF, M_WAITOK | M_ZERO);
960 			}
961 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
962 			if (vpd_nextbyte(&vrs, &byte2)) {
963 				state = -2;
964 				break;
965 			}
966 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
967 			if (vpd_nextbyte(&vrs, &byte2)) {
968 				state = -2;
969 				break;
970 			}
971 			dflen = byte2;
972 			if (dflen == 0 &&
973 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
974 			    2) == 0) {
975 				/*
976 				 * if this happens, we can't trust the rest
977 				 * of the VPD.
978 				 */
979 				pci_printf(cfg, "bad keyword length: %d\n",
980 				    dflen);
981 				cksumvalid = 0;
982 				state = -1;
983 				break;
984 			} else if (dflen == 0) {
985 				cfg->vpd.vpd_ros[off].value = malloc(1 *
986 				    sizeof(*cfg->vpd.vpd_ros[off].value),
987 				    M_DEVBUF, M_WAITOK);
988 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
989 			} else
990 				cfg->vpd.vpd_ros[off].value = malloc(
991 				    (dflen + 1) *
992 				    sizeof(*cfg->vpd.vpd_ros[off].value),
993 				    M_DEVBUF, M_WAITOK);
994 			remain -= 3;
995 			i = 0;
996 			/* keep in sync w/ state 3's transistions */
997 			if (dflen == 0 && remain == 0)
998 				state = 0;
999 			else if (dflen == 0)
1000 				state = 2;
1001 			else
1002 				state = 3;
1003 			break;
1004 
1005 		case 3:	/* VPD-R Keyword Value */
1006 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1007 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1008 			    "RV", 2) == 0 && cksumvalid == -1) {
1009 				if (vrs.cksum == 0)
1010 					cksumvalid = 1;
1011 				else {
1012 					if (bootverbose)
1013 						pci_printf(cfg,
1014 					    "bad VPD cksum, remain %hhu\n",
1015 						    vrs.cksum);
1016 					cksumvalid = 0;
1017 					state = -1;
1018 					break;
1019 				}
1020 			}
1021 			dflen--;
1022 			remain--;
1023 			/* keep in sync w/ state 2's transistions */
1024 			if (dflen == 0)
1025 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1026 			if (dflen == 0 && remain == 0) {
1027 				cfg->vpd.vpd_rocnt = off;
1028 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1029 				    off * sizeof(*cfg->vpd.vpd_ros),
1030 				    M_DEVBUF, M_WAITOK | M_ZERO);
1031 				state = 0;
1032 			} else if (dflen == 0)
1033 				state = 2;
1034 			break;
1035 
1036 		case 4:
1037 			remain--;
1038 			if (remain == 0)
1039 				state = 0;
1040 			break;
1041 
1042 		case 5:	/* VPD-W Keyword Header */
1043 			if (off == alloc) {
1044 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1045 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1046 				    M_DEVBUF, M_WAITOK | M_ZERO);
1047 			}
1048 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1049 			if (vpd_nextbyte(&vrs, &byte2)) {
1050 				state = -2;
1051 				break;
1052 			}
1053 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1054 			if (vpd_nextbyte(&vrs, &byte2)) {
1055 				state = -2;
1056 				break;
1057 			}
1058 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1059 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1060 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1061 			    sizeof(*cfg->vpd.vpd_w[off].value),
1062 			    M_DEVBUF, M_WAITOK);
1063 			remain -= 3;
1064 			i = 0;
1065 			/* keep in sync w/ state 6's transistions */
1066 			if (dflen == 0 && remain == 0)
1067 				state = 0;
1068 			else if (dflen == 0)
1069 				state = 5;
1070 			else
1071 				state = 6;
1072 			break;
1073 
1074 		case 6:	/* VPD-W Keyword Value */
1075 			cfg->vpd.vpd_w[off].value[i++] = byte;
1076 			dflen--;
1077 			remain--;
1078 			/* keep in sync w/ state 5's transistions */
1079 			if (dflen == 0)
1080 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1081 			if (dflen == 0 && remain == 0) {
1082 				cfg->vpd.vpd_wcnt = off;
1083 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1084 				    off * sizeof(*cfg->vpd.vpd_w),
1085 				    M_DEVBUF, M_WAITOK | M_ZERO);
1086 				state = 0;
1087 			} else if (dflen == 0)
1088 				state = 5;
1089 			break;
1090 
1091 		default:
1092 			pci_printf(cfg, "invalid state: %d\n", state);
1093 			state = -1;
1094 			break;
1095 		}
1096 	}
1097 
1098 	if (cksumvalid == 0 || state < -1) {
1099 		/* read-only data bad, clean up */
1100 		if (cfg->vpd.vpd_ros != NULL) {
1101 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1102 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1103 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1104 			cfg->vpd.vpd_ros = NULL;
1105 		}
1106 	}
1107 	if (state < -1) {
1108 		/* I/O error, clean up */
1109 		pci_printf(cfg, "failed to read VPD data.\n");
1110 		if (cfg->vpd.vpd_ident != NULL) {
1111 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1112 			cfg->vpd.vpd_ident = NULL;
1113 		}
1114 		if (cfg->vpd.vpd_w != NULL) {
1115 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1116 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1117 			free(cfg->vpd.vpd_w, M_DEVBUF);
1118 			cfg->vpd.vpd_w = NULL;
1119 		}
1120 	}
1121 	cfg->vpd.vpd_cached = 1;
1122 #undef REG
1123 #undef WREG
1124 }
1125 
1126 int
1127 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1128 {
1129 	struct pci_devinfo *dinfo = device_get_ivars(child);
1130 	pcicfgregs *cfg = &dinfo->cfg;
1131 
1132 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1133 		pci_read_vpd(device_get_parent(dev), cfg);
1134 
1135 	*identptr = cfg->vpd.vpd_ident;
1136 
1137 	if (*identptr == NULL)
1138 		return (ENXIO);
1139 
1140 	return (0);
1141 }
1142 
1143 int
1144 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1145 	const char **vptr)
1146 {
1147 	struct pci_devinfo *dinfo = device_get_ivars(child);
1148 	pcicfgregs *cfg = &dinfo->cfg;
1149 	int i;
1150 
1151 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1152 		pci_read_vpd(device_get_parent(dev), cfg);
1153 
1154 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1155 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1156 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1157 			*vptr = cfg->vpd.vpd_ros[i].value;
1158 			return (0);
1159 		}
1160 
1161 	*vptr = NULL;
1162 	return (ENXIO);
1163 }
1164 
1165 /*
1166  * Find the requested HyperTransport capability and return the offset
1167  * in configuration space via the pointer provided.  The function
1168  * returns 0 on success and an error code otherwise.
1169  */
1170 int
1171 pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1172 {
1173 	int ptr, error;
1174 	uint16_t val;
1175 
1176 	error = pci_find_cap(child, PCIY_HT, &ptr);
1177 	if (error)
1178 		return (error);
1179 
1180 	/*
1181 	 * Traverse the capabilities list checking each HT capability
1182 	 * to see if it matches the requested HT capability.
1183 	 */
1184 	while (ptr != 0) {
1185 		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1186 		if (capability == PCIM_HTCAP_SLAVE ||
1187 		    capability == PCIM_HTCAP_HOST)
1188 			val &= 0xe000;
1189 		else
1190 			val &= PCIM_HTCMD_CAP_MASK;
1191 		if (val == capability) {
1192 			if (capreg != NULL)
1193 				*capreg = ptr;
1194 			return (0);
1195 		}
1196 
1197 		/* Skip to the next HT capability. */
1198 		while (ptr != 0) {
1199 			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1200 			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1201 			    PCIY_HT)
1202 				break;
1203 		}
1204 	}
1205 	return (ENOENT);
1206 }
1207 
1208 /*
1209  * Find the requested capability and return the offset in
1210  * configuration space via the pointer provided.  The function returns
1211  * 0 on success and an error code otherwise.
1212  */
1213 int
1214 pci_find_cap_method(device_t dev, device_t child, int capability,
1215     int *capreg)
1216 {
1217 	struct pci_devinfo *dinfo = device_get_ivars(child);
1218 	pcicfgregs *cfg = &dinfo->cfg;
1219 	u_int32_t status;
1220 	u_int8_t ptr;
1221 
1222 	/*
1223 	 * Check the CAP_LIST bit of the PCI status register first.
1224 	 */
1225 	status = pci_read_config(child, PCIR_STATUS, 2);
1226 	if (!(status & PCIM_STATUS_CAPPRESENT))
1227 		return (ENXIO);
1228 
1229 	/*
1230 	 * Determine the start pointer of the capabilities list.
1231 	 */
1232 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1233 	case PCIM_HDRTYPE_NORMAL:
1234 	case PCIM_HDRTYPE_BRIDGE:
1235 		ptr = PCIR_CAP_PTR;
1236 		break;
1237 	case PCIM_HDRTYPE_CARDBUS:
1238 		ptr = PCIR_CAP_PTR_2;
1239 		break;
1240 	default:
1241 		/* XXX: panic? */
1242 		return (ENXIO);		/* no extended capabilities support */
1243 	}
1244 	ptr = pci_read_config(child, ptr, 1);
1245 
1246 	/*
1247 	 * Traverse the capabilities list.
1248 	 */
1249 	while (ptr != 0) {
1250 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1251 			if (capreg != NULL)
1252 				*capreg = ptr;
1253 			return (0);
1254 		}
1255 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1256 	}
1257 
1258 	return (ENOENT);
1259 }
1260 
1261 /*
1262  * Find the requested extended capability and return the offset in
1263  * configuration space via the pointer provided.  The function returns
1264  * 0 on success and an error code otherwise.
1265  */
1266 int
1267 pci_find_extcap_method(device_t dev, device_t child, int capability,
1268     int *capreg)
1269 {
1270 	struct pci_devinfo *dinfo = device_get_ivars(child);
1271 	pcicfgregs *cfg = &dinfo->cfg;
1272 	uint32_t ecap;
1273 	uint16_t ptr;
1274 
1275 	/* Only supported for PCI-express devices. */
1276 	if (cfg->pcie.pcie_location == 0)
1277 		return (ENXIO);
1278 
1279 	ptr = PCIR_EXTCAP;
1280 	ecap = pci_read_config(child, ptr, 4);
1281 	if (ecap == 0xffffffff || ecap == 0)
1282 		return (ENOENT);
1283 	for (;;) {
1284 		if (PCI_EXTCAP_ID(ecap) == capability) {
1285 			if (capreg != NULL)
1286 				*capreg = ptr;
1287 			return (0);
1288 		}
1289 		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1290 		if (ptr == 0)
1291 			break;
1292 		ecap = pci_read_config(child, ptr, 4);
1293 	}
1294 
1295 	return (ENOENT);
1296 }
1297 
1298 /*
1299  * Support for MSI-X message interrupts.
1300  */
1301 void
1302 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1303 {
1304 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1305 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1306 	uint32_t offset;
1307 
1308 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1309 	offset = msix->msix_table_offset + index * 16;
1310 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1311 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1312 	bus_write_4(msix->msix_table_res, offset + 8, data);
1313 
1314 	/* Enable MSI -> HT mapping. */
1315 	pci_ht_map_msi(dev, address);
1316 }
1317 
1318 void
1319 pci_mask_msix(device_t dev, u_int index)
1320 {
1321 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1322 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1323 	uint32_t offset, val;
1324 
1325 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1326 	offset = msix->msix_table_offset + index * 16 + 12;
1327 	val = bus_read_4(msix->msix_table_res, offset);
1328 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1329 		val |= PCIM_MSIX_VCTRL_MASK;
1330 		bus_write_4(msix->msix_table_res, offset, val);
1331 	}
1332 }
1333 
1334 void
1335 pci_unmask_msix(device_t dev, u_int index)
1336 {
1337 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1338 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1339 	uint32_t offset, val;
1340 
1341 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1342 	offset = msix->msix_table_offset + index * 16 + 12;
1343 	val = bus_read_4(msix->msix_table_res, offset);
1344 	if (val & PCIM_MSIX_VCTRL_MASK) {
1345 		val &= ~PCIM_MSIX_VCTRL_MASK;
1346 		bus_write_4(msix->msix_table_res, offset, val);
1347 	}
1348 }
1349 
1350 int
1351 pci_pending_msix(device_t dev, u_int index)
1352 {
1353 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1354 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1355 	uint32_t offset, bit;
1356 
1357 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1358 	offset = msix->msix_pba_offset + (index / 32) * 4;
1359 	bit = 1 << index % 32;
1360 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1361 }
1362 
1363 /*
1364  * Restore MSI-X registers and table during resume.  If MSI-X is
1365  * enabled then walk the virtual table to restore the actual MSI-X
1366  * table.
1367  */
1368 static void
1369 pci_resume_msix(device_t dev)
1370 {
1371 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1372 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1373 	struct msix_table_entry *mte;
1374 	struct msix_vector *mv;
1375 	int i;
1376 
1377 	if (msix->msix_alloc > 0) {
1378 		/* First, mask all vectors. */
1379 		for (i = 0; i < msix->msix_msgnum; i++)
1380 			pci_mask_msix(dev, i);
1381 
1382 		/* Second, program any messages with at least one handler. */
1383 		for (i = 0; i < msix->msix_table_len; i++) {
1384 			mte = &msix->msix_table[i];
1385 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1386 				continue;
1387 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1388 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1389 			pci_unmask_msix(dev, i);
1390 		}
1391 	}
1392 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1393 	    msix->msix_ctrl, 2);
1394 }
1395 
1396 /*
1397  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1398  * returned in *count.  After this function returns, each message will be
1399  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1400  */
1401 int
1402 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1403 {
1404 	struct pci_devinfo *dinfo = device_get_ivars(child);
1405 	pcicfgregs *cfg = &dinfo->cfg;
1406 	struct resource_list_entry *rle;
1407 	int actual, error, i, irq, max;
1408 
1409 	/* Don't let count == 0 get us into trouble. */
1410 	if (*count == 0)
1411 		return (EINVAL);
1412 
1413 	/* If rid 0 is allocated, then fail. */
1414 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1415 	if (rle != NULL && rle->res != NULL)
1416 		return (ENXIO);
1417 
1418 	/* Already have allocated messages? */
1419 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1420 		return (ENXIO);
1421 
1422 	/* If MSI is blacklisted for this system, fail. */
1423 	if (pci_msi_blacklisted())
1424 		return (ENXIO);
1425 
1426 	/* MSI-X capability present? */
1427 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1428 		return (ENODEV);
1429 
1430 	/* Make sure the appropriate BARs are mapped. */
1431 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1432 	    cfg->msix.msix_table_bar);
1433 	if (rle == NULL || rle->res == NULL ||
1434 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1435 		return (ENXIO);
1436 	cfg->msix.msix_table_res = rle->res;
1437 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1438 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1439 		    cfg->msix.msix_pba_bar);
1440 		if (rle == NULL || rle->res == NULL ||
1441 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1442 			return (ENXIO);
1443 	}
1444 	cfg->msix.msix_pba_res = rle->res;
1445 
1446 	if (bootverbose)
1447 		device_printf(child,
1448 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1449 		    *count, cfg->msix.msix_msgnum);
1450 	max = min(*count, cfg->msix.msix_msgnum);
1451 	for (i = 0; i < max; i++) {
1452 		/* Allocate a message. */
1453 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1454 		if (error) {
1455 			if (i == 0)
1456 				return (error);
1457 			break;
1458 		}
1459 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1460 		    irq, 1);
1461 	}
1462 	actual = i;
1463 
1464 	if (bootverbose) {
1465 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1466 		if (actual == 1)
1467 			device_printf(child, "using IRQ %lu for MSI-X\n",
1468 			    rle->start);
1469 		else {
1470 			int run;
1471 
1472 			/*
1473 			 * Be fancy and try to print contiguous runs of
1474 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1475 			 * 'run' is true if we are in a range.
1476 			 */
1477 			device_printf(child, "using IRQs %lu", rle->start);
1478 			irq = rle->start;
1479 			run = 0;
1480 			for (i = 1; i < actual; i++) {
1481 				rle = resource_list_find(&dinfo->resources,
1482 				    SYS_RES_IRQ, i + 1);
1483 
1484 				/* Still in a run? */
1485 				if (rle->start == irq + 1) {
1486 					run = 1;
1487 					irq++;
1488 					continue;
1489 				}
1490 
1491 				/* Finish previous range. */
1492 				if (run) {
1493 					printf("-%d", irq);
1494 					run = 0;
1495 				}
1496 
1497 				/* Start new range. */
1498 				printf(",%lu", rle->start);
1499 				irq = rle->start;
1500 			}
1501 
1502 			/* Unfinished range? */
1503 			if (run)
1504 				printf("-%d", irq);
1505 			printf(" for MSI-X\n");
1506 		}
1507 	}
1508 
1509 	/* Mask all vectors. */
1510 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1511 		pci_mask_msix(child, i);
1512 
1513 	/* Allocate and initialize vector data and virtual table. */
1514 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1515 	    M_DEVBUF, M_WAITOK | M_ZERO);
1516 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1517 	    M_DEVBUF, M_WAITOK | M_ZERO);
1518 	for (i = 0; i < actual; i++) {
1519 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1520 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1521 		cfg->msix.msix_table[i].mte_vector = i + 1;
1522 	}
1523 
1524 	/* Update control register to enable MSI-X. */
1525 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1526 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1527 	    cfg->msix.msix_ctrl, 2);
1528 
1529 	/* Update counts of alloc'd messages. */
1530 	cfg->msix.msix_alloc = actual;
1531 	cfg->msix.msix_table_len = actual;
1532 	*count = actual;
1533 	return (0);
1534 }
1535 
1536 /*
1537  * By default, pci_alloc_msix() will assign the allocated IRQ
1538  * resources consecutively to the first N messages in the MSI-X table.
1539  * However, device drivers may want to use different layouts if they
1540  * either receive fewer messages than they asked for, or they wish to
1541  * populate the MSI-X table sparsely.  This method allows the driver
1542  * to specify what layout it wants.  It must be called after a
1543  * successful pci_alloc_msix() but before any of the associated
1544  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1545  *
1546  * The 'vectors' array contains 'count' message vectors.  The array
1547  * maps directly to the MSI-X table in that index 0 in the array
1548  * specifies the vector for the first message in the MSI-X table, etc.
1549  * The vector value in each array index can either be 0 to indicate
1550  * that no vector should be assigned to a message slot, or it can be a
1551  * number from 1 to N (where N is the count returned from a
1552  * succcessful call to pci_alloc_msix()) to indicate which message
1553  * vector (IRQ) to be used for the corresponding message.
1554  *
1555  * On successful return, each message with a non-zero vector will have
1556  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1557  * 1.  Additionally, if any of the IRQs allocated via the previous
1558  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1559  * will be freed back to the system automatically.
1560  *
1561  * For example, suppose a driver has a MSI-X table with 6 messages and
1562  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1563  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1564  * C.  After the call to pci_alloc_msix(), the device will be setup to
1565  * have an MSI-X table of ABC--- (where - means no vector assigned).
1566  * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1567  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1568  * be freed back to the system.  This device will also have valid
1569  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1570  *
1571  * In any case, the SYS_RES_IRQ rid X will always map to the message
1572  * at MSI-X table index X - 1 and will only be valid if a vector is
1573  * assigned to that table entry.
1574  */
1575 int
1576 pci_remap_msix_method(device_t dev, device_t child, int count,
1577     const u_int *vectors)
1578 {
1579 	struct pci_devinfo *dinfo = device_get_ivars(child);
1580 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1581 	struct resource_list_entry *rle;
1582 	int i, irq, j, *used;
1583 
1584 	/*
1585 	 * Have to have at least one message in the table but the
1586 	 * table can't be bigger than the actual MSI-X table in the
1587 	 * device.
1588 	 */
1589 	if (count == 0 || count > msix->msix_msgnum)
1590 		return (EINVAL);
1591 
1592 	/* Sanity check the vectors. */
1593 	for (i = 0; i < count; i++)
1594 		if (vectors[i] > msix->msix_alloc)
1595 			return (EINVAL);
1596 
1597 	/*
1598 	 * Make sure there aren't any holes in the vectors to be used.
1599 	 * It's a big pain to support it, and it doesn't really make
1600 	 * sense anyway.  Also, at least one vector must be used.
1601 	 */
1602 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1603 	    M_ZERO);
1604 	for (i = 0; i < count; i++)
1605 		if (vectors[i] != 0)
1606 			used[vectors[i] - 1] = 1;
1607 	for (i = 0; i < msix->msix_alloc - 1; i++)
1608 		if (used[i] == 0 && used[i + 1] == 1) {
1609 			free(used, M_DEVBUF);
1610 			return (EINVAL);
1611 		}
1612 	if (used[0] != 1) {
1613 		free(used, M_DEVBUF);
1614 		return (EINVAL);
1615 	}
1616 
1617 	/* Make sure none of the resources are allocated. */
1618 	for (i = 0; i < msix->msix_table_len; i++) {
1619 		if (msix->msix_table[i].mte_vector == 0)
1620 			continue;
1621 		if (msix->msix_table[i].mte_handlers > 0)
1622 			return (EBUSY);
1623 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1624 		KASSERT(rle != NULL, ("missing resource"));
1625 		if (rle->res != NULL)
1626 			return (EBUSY);
1627 	}
1628 
1629 	/* Free the existing resource list entries. */
1630 	for (i = 0; i < msix->msix_table_len; i++) {
1631 		if (msix->msix_table[i].mte_vector == 0)
1632 			continue;
1633 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1634 	}
1635 
1636 	/*
1637 	 * Build the new virtual table keeping track of which vectors are
1638 	 * used.
1639 	 */
1640 	free(msix->msix_table, M_DEVBUF);
1641 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1642 	    M_DEVBUF, M_WAITOK | M_ZERO);
1643 	for (i = 0; i < count; i++)
1644 		msix->msix_table[i].mte_vector = vectors[i];
1645 	msix->msix_table_len = count;
1646 
1647 	/* Free any unused IRQs and resize the vectors array if necessary. */
1648 	j = msix->msix_alloc - 1;
1649 	if (used[j] == 0) {
1650 		struct msix_vector *vec;
1651 
1652 		while (used[j] == 0) {
1653 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1654 			    msix->msix_vectors[j].mv_irq);
1655 			j--;
1656 		}
1657 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1658 		    M_WAITOK);
1659 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1660 		    (j + 1));
1661 		free(msix->msix_vectors, M_DEVBUF);
1662 		msix->msix_vectors = vec;
1663 		msix->msix_alloc = j + 1;
1664 	}
1665 	free(used, M_DEVBUF);
1666 
1667 	/* Map the IRQs onto the rids. */
1668 	for (i = 0; i < count; i++) {
1669 		if (vectors[i] == 0)
1670 			continue;
1671 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1672 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1673 		    irq, 1);
1674 	}
1675 
1676 	if (bootverbose) {
1677 		device_printf(child, "Remapped MSI-X IRQs as: ");
1678 		for (i = 0; i < count; i++) {
1679 			if (i != 0)
1680 				printf(", ");
1681 			if (vectors[i] == 0)
1682 				printf("---");
1683 			else
1684 				printf("%d",
1685 				    msix->msix_vectors[vectors[i]].mv_irq);
1686 		}
1687 		printf("\n");
1688 	}
1689 
1690 	return (0);
1691 }
1692 
1693 static int
1694 pci_release_msix(device_t dev, device_t child)
1695 {
1696 	struct pci_devinfo *dinfo = device_get_ivars(child);
1697 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1698 	struct resource_list_entry *rle;
1699 	int i;
1700 
1701 	/* Do we have any messages to release? */
1702 	if (msix->msix_alloc == 0)
1703 		return (ENODEV);
1704 
1705 	/* Make sure none of the resources are allocated. */
1706 	for (i = 0; i < msix->msix_table_len; i++) {
1707 		if (msix->msix_table[i].mte_vector == 0)
1708 			continue;
1709 		if (msix->msix_table[i].mte_handlers > 0)
1710 			return (EBUSY);
1711 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1712 		KASSERT(rle != NULL, ("missing resource"));
1713 		if (rle->res != NULL)
1714 			return (EBUSY);
1715 	}
1716 
1717 	/* Update control register to disable MSI-X. */
1718 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1719 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1720 	    msix->msix_ctrl, 2);
1721 
1722 	/* Free the resource list entries. */
1723 	for (i = 0; i < msix->msix_table_len; i++) {
1724 		if (msix->msix_table[i].mte_vector == 0)
1725 			continue;
1726 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1727 	}
1728 	free(msix->msix_table, M_DEVBUF);
1729 	msix->msix_table_len = 0;
1730 
1731 	/* Release the IRQs. */
1732 	for (i = 0; i < msix->msix_alloc; i++)
1733 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1734 		    msix->msix_vectors[i].mv_irq);
1735 	free(msix->msix_vectors, M_DEVBUF);
1736 	msix->msix_alloc = 0;
1737 	return (0);
1738 }
1739 
1740 /*
1741  * Return the max supported MSI-X messages this device supports.
1742  * Basically, assuming the MD code can alloc messages, this function
1743  * should return the maximum value that pci_alloc_msix() can return.
1744  * Thus, it is subject to the tunables, etc.
1745  */
1746 int
1747 pci_msix_count_method(device_t dev, device_t child)
1748 {
1749 	struct pci_devinfo *dinfo = device_get_ivars(child);
1750 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1751 
1752 	if (pci_do_msix && msix->msix_location != 0)
1753 		return (msix->msix_msgnum);
1754 	return (0);
1755 }
1756 
1757 /*
1758  * HyperTransport MSI mapping control
1759  */
1760 void
1761 pci_ht_map_msi(device_t dev, uint64_t addr)
1762 {
1763 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1764 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1765 
1766 	if (!ht->ht_msimap)
1767 		return;
1768 
1769 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1770 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1771 		/* Enable MSI -> HT mapping. */
1772 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1773 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1774 		    ht->ht_msictrl, 2);
1775 	}
1776 
1777 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1778 		/* Disable MSI -> HT mapping. */
1779 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1780 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1781 		    ht->ht_msictrl, 2);
1782 	}
1783 }
1784 
1785 int
1786 pci_get_max_read_req(device_t dev)
1787 {
1788 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1789 	int cap;
1790 	uint16_t val;
1791 
1792 	cap = dinfo->cfg.pcie.pcie_location;
1793 	if (cap == 0)
1794 		return (0);
1795 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1796 	val &= PCIEM_CTL_MAX_READ_REQUEST;
1797 	val >>= 12;
1798 	return (1 << (val + 7));
1799 }
1800 
1801 int
1802 pci_set_max_read_req(device_t dev, int size)
1803 {
1804 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1805 	int cap;
1806 	uint16_t val;
1807 
1808 	cap = dinfo->cfg.pcie.pcie_location;
1809 	if (cap == 0)
1810 		return (0);
1811 	if (size < 128)
1812 		size = 128;
1813 	if (size > 4096)
1814 		size = 4096;
1815 	size = (1 << (fls(size) - 1));
1816 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1817 	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
1818 	val |= (fls(size) - 8) << 12;
1819 	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
1820 	return (size);
1821 }
1822 
1823 /*
1824  * Support for MSI message signalled interrupts.
1825  */
1826 void
1827 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1828 {
1829 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1830 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1831 
1832 	/* Write data and address values. */
1833 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1834 	    address & 0xffffffff, 4);
1835 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1836 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1837 		    address >> 32, 4);
1838 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1839 		    data, 2);
1840 	} else
1841 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1842 		    2);
1843 
1844 	/* Enable MSI in the control register. */
1845 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1846 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1847 	    2);
1848 
1849 	/* Enable MSI -> HT mapping. */
1850 	pci_ht_map_msi(dev, address);
1851 }
1852 
1853 void
1854 pci_disable_msi(device_t dev)
1855 {
1856 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1857 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1858 
1859 	/* Disable MSI -> HT mapping. */
1860 	pci_ht_map_msi(dev, 0);
1861 
1862 	/* Disable MSI in the control register. */
1863 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1864 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1865 	    2);
1866 }
1867 
1868 /*
1869  * Restore MSI registers during resume.  If MSI is enabled then
1870  * restore the data and address registers in addition to the control
1871  * register.
1872  */
1873 static void
1874 pci_resume_msi(device_t dev)
1875 {
1876 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1877 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1878 	uint64_t address;
1879 	uint16_t data;
1880 
1881 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1882 		address = msi->msi_addr;
1883 		data = msi->msi_data;
1884 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1885 		    address & 0xffffffff, 4);
1886 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1887 			pci_write_config(dev, msi->msi_location +
1888 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1889 			pci_write_config(dev, msi->msi_location +
1890 			    PCIR_MSI_DATA_64BIT, data, 2);
1891 		} else
1892 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1893 			    data, 2);
1894 	}
1895 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1896 	    2);
1897 }
1898 
1899 static int
1900 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1901 {
1902 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1903 	pcicfgregs *cfg = &dinfo->cfg;
1904 	struct resource_list_entry *rle;
1905 	struct msix_table_entry *mte;
1906 	struct msix_vector *mv;
1907 	uint64_t addr;
1908 	uint32_t data;
1909 	int error, i, j;
1910 
1911 	/*
1912 	 * Handle MSI first.  We try to find this IRQ among our list
1913 	 * of MSI IRQs.  If we find it, we request updated address and
1914 	 * data registers and apply the results.
1915 	 */
1916 	if (cfg->msi.msi_alloc > 0) {
1917 
1918 		/* If we don't have any active handlers, nothing to do. */
1919 		if (cfg->msi.msi_handlers == 0)
1920 			return (0);
1921 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1922 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1923 			    i + 1);
1924 			if (rle->start == irq) {
1925 				error = PCIB_MAP_MSI(device_get_parent(bus),
1926 				    dev, irq, &addr, &data);
1927 				if (error)
1928 					return (error);
1929 				pci_disable_msi(dev);
1930 				dinfo->cfg.msi.msi_addr = addr;
1931 				dinfo->cfg.msi.msi_data = data;
1932 				pci_enable_msi(dev, addr, data);
1933 				return (0);
1934 			}
1935 		}
1936 		return (ENOENT);
1937 	}
1938 
1939 	/*
1940 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1941 	 * we request the updated mapping info.  If that works, we go
1942 	 * through all the slots that use this IRQ and update them.
1943 	 */
1944 	if (cfg->msix.msix_alloc > 0) {
1945 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1946 			mv = &cfg->msix.msix_vectors[i];
1947 			if (mv->mv_irq == irq) {
1948 				error = PCIB_MAP_MSI(device_get_parent(bus),
1949 				    dev, irq, &addr, &data);
1950 				if (error)
1951 					return (error);
1952 				mv->mv_address = addr;
1953 				mv->mv_data = data;
1954 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1955 					mte = &cfg->msix.msix_table[j];
1956 					if (mte->mte_vector != i + 1)
1957 						continue;
1958 					if (mte->mte_handlers == 0)
1959 						continue;
1960 					pci_mask_msix(dev, j);
1961 					pci_enable_msix(dev, j, addr, data);
1962 					pci_unmask_msix(dev, j);
1963 				}
1964 			}
1965 		}
1966 		return (ENOENT);
1967 	}
1968 
1969 	return (ENOENT);
1970 }
1971 
1972 /*
1973  * Returns true if the specified device is blacklisted because MSI
1974  * doesn't work.
1975  */
1976 int
1977 pci_msi_device_blacklisted(device_t dev)
1978 {
1979 	const struct pci_quirk *q;
1980 
1981 	if (!pci_honor_msi_blacklist)
1982 		return (0);
1983 
1984 	for (q = &pci_quirks[0]; q->devid; q++) {
1985 		if (q->devid == pci_get_devid(dev) &&
1986 		    q->type == PCI_QUIRK_DISABLE_MSI)
1987 			return (1);
1988 	}
1989 	return (0);
1990 }
1991 
1992 /*
1993  * Returns true if a specified chipset supports MSI when it is
1994  * emulated hardware in a virtual machine.
1995  */
1996 static int
1997 pci_msi_vm_chipset(device_t dev)
1998 {
1999 	const struct pci_quirk *q;
2000 
2001 	for (q = &pci_quirks[0]; q->devid; q++) {
2002 		if (q->devid == pci_get_devid(dev) &&
2003 		    q->type == PCI_QUIRK_ENABLE_MSI_VM)
2004 			return (1);
2005 	}
2006 	return (0);
2007 }
2008 
2009 /*
2010  * Determine if MSI is blacklisted globally on this sytem.  Currently,
2011  * we just check for blacklisted chipsets as represented by the
2012  * host-PCI bridge at device 0:0:0.  In the future, it may become
2013  * necessary to check other system attributes, such as the kenv values
2014  * that give the motherboard manufacturer and model number.
2015  */
2016 static int
2017 pci_msi_blacklisted(void)
2018 {
2019 	device_t dev;
2020 
2021 	if (!pci_honor_msi_blacklist)
2022 		return (0);
2023 
2024 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2025 	if (!(pcie_chipset || pcix_chipset)) {
2026 		if (vm_guest != VM_GUEST_NO) {
2027 			dev = pci_find_bsf(0, 0, 0);
2028 			if (dev != NULL)
2029 				return (pci_msi_vm_chipset(dev) == 0);
2030 		}
2031 		return (1);
2032 	}
2033 
2034 	dev = pci_find_bsf(0, 0, 0);
2035 	if (dev != NULL)
2036 		return (pci_msi_device_blacklisted(dev));
2037 	return (0);
2038 }
2039 
2040 /*
2041  * Attempt to allocate *count MSI messages.  The actual number allocated is
2042  * returned in *count.  After this function returns, each message will be
2043  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2044  */
2045 int
2046 pci_alloc_msi_method(device_t dev, device_t child, int *count)
2047 {
2048 	struct pci_devinfo *dinfo = device_get_ivars(child);
2049 	pcicfgregs *cfg = &dinfo->cfg;
2050 	struct resource_list_entry *rle;
2051 	int actual, error, i, irqs[32];
2052 	uint16_t ctrl;
2053 
2054 	/* Don't let count == 0 get us into trouble. */
2055 	if (*count == 0)
2056 		return (EINVAL);
2057 
2058 	/* If rid 0 is allocated, then fail. */
2059 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2060 	if (rle != NULL && rle->res != NULL)
2061 		return (ENXIO);
2062 
2063 	/* Already have allocated messages? */
2064 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2065 		return (ENXIO);
2066 
2067 	/* If MSI is blacklisted for this system, fail. */
2068 	if (pci_msi_blacklisted())
2069 		return (ENXIO);
2070 
2071 	/* MSI capability present? */
2072 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2073 		return (ENODEV);
2074 
2075 	if (bootverbose)
2076 		device_printf(child,
2077 		    "attempting to allocate %d MSI vectors (%d supported)\n",
2078 		    *count, cfg->msi.msi_msgnum);
2079 
2080 	/* Don't ask for more than the device supports. */
2081 	actual = min(*count, cfg->msi.msi_msgnum);
2082 
2083 	/* Don't ask for more than 32 messages. */
2084 	actual = min(actual, 32);
2085 
2086 	/* MSI requires power of 2 number of messages. */
2087 	if (!powerof2(actual))
2088 		return (EINVAL);
2089 
2090 	for (;;) {
2091 		/* Try to allocate N messages. */
2092 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2093 		    actual, irqs);
2094 		if (error == 0)
2095 			break;
2096 		if (actual == 1)
2097 			return (error);
2098 
2099 		/* Try N / 2. */
2100 		actual >>= 1;
2101 	}
2102 
2103 	/*
2104 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2105 	 * resources in the irqs[] array, so add new resources
2106 	 * starting at rid 1.
2107 	 */
2108 	for (i = 0; i < actual; i++)
2109 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2110 		    irqs[i], irqs[i], 1);
2111 
2112 	if (bootverbose) {
2113 		if (actual == 1)
2114 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2115 		else {
2116 			int run;
2117 
2118 			/*
2119 			 * Be fancy and try to print contiguous runs
2120 			 * of IRQ values as ranges.  'run' is true if
2121 			 * we are in a range.
2122 			 */
2123 			device_printf(child, "using IRQs %d", irqs[0]);
2124 			run = 0;
2125 			for (i = 1; i < actual; i++) {
2126 
2127 				/* Still in a run? */
2128 				if (irqs[i] == irqs[i - 1] + 1) {
2129 					run = 1;
2130 					continue;
2131 				}
2132 
2133 				/* Finish previous range. */
2134 				if (run) {
2135 					printf("-%d", irqs[i - 1]);
2136 					run = 0;
2137 				}
2138 
2139 				/* Start new range. */
2140 				printf(",%d", irqs[i]);
2141 			}
2142 
2143 			/* Unfinished range? */
2144 			if (run)
2145 				printf("-%d", irqs[actual - 1]);
2146 			printf(" for MSI\n");
2147 		}
2148 	}
2149 
2150 	/* Update control register with actual count. */
2151 	ctrl = cfg->msi.msi_ctrl;
2152 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2153 	ctrl |= (ffs(actual) - 1) << 4;
2154 	cfg->msi.msi_ctrl = ctrl;
2155 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2156 
2157 	/* Update counts of alloc'd messages. */
2158 	cfg->msi.msi_alloc = actual;
2159 	cfg->msi.msi_handlers = 0;
2160 	*count = actual;
2161 	return (0);
2162 }
2163 
2164 /* Release the MSI messages associated with this device. */
2165 int
2166 pci_release_msi_method(device_t dev, device_t child)
2167 {
2168 	struct pci_devinfo *dinfo = device_get_ivars(child);
2169 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2170 	struct resource_list_entry *rle;
2171 	int error, i, irqs[32];
2172 
2173 	/* Try MSI-X first. */
2174 	error = pci_release_msix(dev, child);
2175 	if (error != ENODEV)
2176 		return (error);
2177 
2178 	/* Do we have any messages to release? */
2179 	if (msi->msi_alloc == 0)
2180 		return (ENODEV);
2181 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2182 
2183 	/* Make sure none of the resources are allocated. */
2184 	if (msi->msi_handlers > 0)
2185 		return (EBUSY);
2186 	for (i = 0; i < msi->msi_alloc; i++) {
2187 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2188 		KASSERT(rle != NULL, ("missing MSI resource"));
2189 		if (rle->res != NULL)
2190 			return (EBUSY);
2191 		irqs[i] = rle->start;
2192 	}
2193 
2194 	/* Update control register with 0 count. */
2195 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2196 	    ("%s: MSI still enabled", __func__));
2197 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2198 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2199 	    msi->msi_ctrl, 2);
2200 
2201 	/* Release the messages. */
2202 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2203 	for (i = 0; i < msi->msi_alloc; i++)
2204 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2205 
2206 	/* Update alloc count. */
2207 	msi->msi_alloc = 0;
2208 	msi->msi_addr = 0;
2209 	msi->msi_data = 0;
2210 	return (0);
2211 }
2212 
2213 /*
2214  * Return the max supported MSI messages this device supports.
2215  * Basically, assuming the MD code can alloc messages, this function
2216  * should return the maximum value that pci_alloc_msi() can return.
2217  * Thus, it is subject to the tunables, etc.
2218  */
2219 int
2220 pci_msi_count_method(device_t dev, device_t child)
2221 {
2222 	struct pci_devinfo *dinfo = device_get_ivars(child);
2223 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2224 
2225 	if (pci_do_msi && msi->msi_location != 0)
2226 		return (msi->msi_msgnum);
2227 	return (0);
2228 }
2229 
2230 /* free pcicfgregs structure and all depending data structures */
2231 
2232 int
2233 pci_freecfg(struct pci_devinfo *dinfo)
2234 {
2235 	struct devlist *devlist_head;
2236 	struct pci_map *pm, *next;
2237 	int i;
2238 
2239 	devlist_head = &pci_devq;
2240 
2241 	if (dinfo->cfg.vpd.vpd_reg) {
2242 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2243 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2244 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2245 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2246 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2247 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2248 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2249 	}
2250 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2251 		free(pm, M_DEVBUF);
2252 	}
2253 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2254 	free(dinfo, M_DEVBUF);
2255 
2256 	/* increment the generation count */
2257 	pci_generation++;
2258 
2259 	/* we're losing one device */
2260 	pci_numdevs--;
2261 	return (0);
2262 }
2263 
2264 /*
2265  * PCI power manangement
2266  */
2267 int
2268 pci_set_powerstate_method(device_t dev, device_t child, int state)
2269 {
2270 	struct pci_devinfo *dinfo = device_get_ivars(child);
2271 	pcicfgregs *cfg = &dinfo->cfg;
2272 	uint16_t status;
2273 	int result, oldstate, highest, delay;
2274 
2275 	if (cfg->pp.pp_cap == 0)
2276 		return (EOPNOTSUPP);
2277 
2278 	/*
2279 	 * Optimize a no state change request away.  While it would be OK to
2280 	 * write to the hardware in theory, some devices have shown odd
2281 	 * behavior when going from D3 -> D3.
2282 	 */
2283 	oldstate = pci_get_powerstate(child);
2284 	if (oldstate == state)
2285 		return (0);
2286 
2287 	/*
2288 	 * The PCI power management specification states that after a state
2289 	 * transition between PCI power states, system software must
2290 	 * guarantee a minimal delay before the function accesses the device.
2291 	 * Compute the worst case delay that we need to guarantee before we
2292 	 * access the device.  Many devices will be responsive much more
2293 	 * quickly than this delay, but there are some that don't respond
2294 	 * instantly to state changes.  Transitions to/from D3 state require
2295 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2296 	 * is done below with DELAY rather than a sleeper function because
2297 	 * this function can be called from contexts where we cannot sleep.
2298 	 */
2299 	highest = (oldstate > state) ? oldstate : state;
2300 	if (highest == PCI_POWERSTATE_D3)
2301 	    delay = 10000;
2302 	else if (highest == PCI_POWERSTATE_D2)
2303 	    delay = 200;
2304 	else
2305 	    delay = 0;
2306 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2307 	    & ~PCIM_PSTAT_DMASK;
2308 	result = 0;
2309 	switch (state) {
2310 	case PCI_POWERSTATE_D0:
2311 		status |= PCIM_PSTAT_D0;
2312 		break;
2313 	case PCI_POWERSTATE_D1:
2314 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2315 			return (EOPNOTSUPP);
2316 		status |= PCIM_PSTAT_D1;
2317 		break;
2318 	case PCI_POWERSTATE_D2:
2319 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2320 			return (EOPNOTSUPP);
2321 		status |= PCIM_PSTAT_D2;
2322 		break;
2323 	case PCI_POWERSTATE_D3:
2324 		status |= PCIM_PSTAT_D3;
2325 		break;
2326 	default:
2327 		return (EINVAL);
2328 	}
2329 
2330 	if (bootverbose)
2331 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2332 		    state);
2333 
2334 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2335 	if (delay)
2336 		DELAY(delay);
2337 	return (0);
2338 }
2339 
2340 int
2341 pci_get_powerstate_method(device_t dev, device_t child)
2342 {
2343 	struct pci_devinfo *dinfo = device_get_ivars(child);
2344 	pcicfgregs *cfg = &dinfo->cfg;
2345 	uint16_t status;
2346 	int result;
2347 
2348 	if (cfg->pp.pp_cap != 0) {
2349 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2350 		switch (status & PCIM_PSTAT_DMASK) {
2351 		case PCIM_PSTAT_D0:
2352 			result = PCI_POWERSTATE_D0;
2353 			break;
2354 		case PCIM_PSTAT_D1:
2355 			result = PCI_POWERSTATE_D1;
2356 			break;
2357 		case PCIM_PSTAT_D2:
2358 			result = PCI_POWERSTATE_D2;
2359 			break;
2360 		case PCIM_PSTAT_D3:
2361 			result = PCI_POWERSTATE_D3;
2362 			break;
2363 		default:
2364 			result = PCI_POWERSTATE_UNKNOWN;
2365 			break;
2366 		}
2367 	} else {
2368 		/* No support, device is always at D0 */
2369 		result = PCI_POWERSTATE_D0;
2370 	}
2371 	return (result);
2372 }
2373 
2374 /*
2375  * Some convenience functions for PCI device drivers.
2376  */
2377 
2378 static __inline void
2379 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2380 {
2381 	uint16_t	command;
2382 
2383 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2384 	command |= bit;
2385 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2386 }
2387 
2388 static __inline void
2389 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2390 {
2391 	uint16_t	command;
2392 
2393 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2394 	command &= ~bit;
2395 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2396 }
2397 
2398 int
2399 pci_enable_busmaster_method(device_t dev, device_t child)
2400 {
2401 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2402 	return (0);
2403 }
2404 
2405 int
2406 pci_disable_busmaster_method(device_t dev, device_t child)
2407 {
2408 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2409 	return (0);
2410 }
2411 
2412 int
2413 pci_enable_io_method(device_t dev, device_t child, int space)
2414 {
2415 	uint16_t bit;
2416 
2417 	switch(space) {
2418 	case SYS_RES_IOPORT:
2419 		bit = PCIM_CMD_PORTEN;
2420 		break;
2421 	case SYS_RES_MEMORY:
2422 		bit = PCIM_CMD_MEMEN;
2423 		break;
2424 	default:
2425 		return (EINVAL);
2426 	}
2427 	pci_set_command_bit(dev, child, bit);
2428 	return (0);
2429 }
2430 
2431 int
2432 pci_disable_io_method(device_t dev, device_t child, int space)
2433 {
2434 	uint16_t bit;
2435 
2436 	switch(space) {
2437 	case SYS_RES_IOPORT:
2438 		bit = PCIM_CMD_PORTEN;
2439 		break;
2440 	case SYS_RES_MEMORY:
2441 		bit = PCIM_CMD_MEMEN;
2442 		break;
2443 	default:
2444 		return (EINVAL);
2445 	}
2446 	pci_clear_command_bit(dev, child, bit);
2447 	return (0);
2448 }
2449 
2450 /*
2451  * New style pci driver.  Parent device is either a pci-host-bridge or a
2452  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2453  */
2454 
2455 void
2456 pci_print_verbose(struct pci_devinfo *dinfo)
2457 {
2458 
2459 	if (bootverbose) {
2460 		pcicfgregs *cfg = &dinfo->cfg;
2461 
2462 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2463 		    cfg->vendor, cfg->device, cfg->revid);
2464 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2465 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2466 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2467 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2468 		    cfg->mfdev);
2469 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2470 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2471 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2472 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2473 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2474 		if (cfg->intpin > 0)
2475 			printf("\tintpin=%c, irq=%d\n",
2476 			    cfg->intpin +'a' -1, cfg->intline);
2477 		if (cfg->pp.pp_cap) {
2478 			uint16_t status;
2479 
2480 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2481 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2482 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2483 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2484 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2485 			    status & PCIM_PSTAT_DMASK);
2486 		}
2487 		if (cfg->msi.msi_location) {
2488 			int ctrl;
2489 
2490 			ctrl = cfg->msi.msi_ctrl;
2491 			printf("\tMSI supports %d message%s%s%s\n",
2492 			    cfg->msi.msi_msgnum,
2493 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2494 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2495 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2496 		}
2497 		if (cfg->msix.msix_location) {
2498 			printf("\tMSI-X supports %d message%s ",
2499 			    cfg->msix.msix_msgnum,
2500 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2501 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2502 				printf("in map 0x%x\n",
2503 				    cfg->msix.msix_table_bar);
2504 			else
2505 				printf("in maps 0x%x and 0x%x\n",
2506 				    cfg->msix.msix_table_bar,
2507 				    cfg->msix.msix_pba_bar);
2508 		}
2509 	}
2510 }
2511 
2512 static int
2513 pci_porten(device_t dev)
2514 {
2515 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2516 }
2517 
2518 static int
2519 pci_memen(device_t dev)
2520 {
2521 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2522 }
2523 
2524 static void
2525 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2526 {
2527 	struct pci_devinfo *dinfo;
2528 	pci_addr_t map, testval;
2529 	int ln2range;
2530 	uint16_t cmd;
2531 
2532 	/*
2533 	 * The device ROM BAR is special.  It is always a 32-bit
2534 	 * memory BAR.  Bit 0 is special and should not be set when
2535 	 * sizing the BAR.
2536 	 */
2537 	dinfo = device_get_ivars(dev);
2538 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2539 		map = pci_read_config(dev, reg, 4);
2540 		pci_write_config(dev, reg, 0xfffffffe, 4);
2541 		testval = pci_read_config(dev, reg, 4);
2542 		pci_write_config(dev, reg, map, 4);
2543 		*mapp = map;
2544 		*testvalp = testval;
2545 		return;
2546 	}
2547 
2548 	map = pci_read_config(dev, reg, 4);
2549 	ln2range = pci_maprange(map);
2550 	if (ln2range == 64)
2551 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2552 
2553 	/*
2554 	 * Disable decoding via the command register before
2555 	 * determining the BAR's length since we will be placing it in
2556 	 * a weird state.
2557 	 */
2558 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2559 	pci_write_config(dev, PCIR_COMMAND,
2560 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2561 
2562 	/*
2563 	 * Determine the BAR's length by writing all 1's.  The bottom
2564 	 * log_2(size) bits of the BAR will stick as 0 when we read
2565 	 * the value back.
2566 	 */
2567 	pci_write_config(dev, reg, 0xffffffff, 4);
2568 	testval = pci_read_config(dev, reg, 4);
2569 	if (ln2range == 64) {
2570 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2571 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2572 	}
2573 
2574 	/*
2575 	 * Restore the original value of the BAR.  We may have reprogrammed
2576 	 * the BAR of the low-level console device and when booting verbose,
2577 	 * we need the console device addressable.
2578 	 */
2579 	pci_write_config(dev, reg, map, 4);
2580 	if (ln2range == 64)
2581 		pci_write_config(dev, reg + 4, map >> 32, 4);
2582 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2583 
2584 	*mapp = map;
2585 	*testvalp = testval;
2586 }
2587 
2588 static void
2589 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2590 {
2591 	struct pci_devinfo *dinfo;
2592 	int ln2range;
2593 
2594 	/* The device ROM BAR is always a 32-bit memory BAR. */
2595 	dinfo = device_get_ivars(dev);
2596 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2597 		ln2range = 32;
2598 	else
2599 		ln2range = pci_maprange(pm->pm_value);
2600 	pci_write_config(dev, pm->pm_reg, base, 4);
2601 	if (ln2range == 64)
2602 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2603 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2604 	if (ln2range == 64)
2605 		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2606 		    pm->pm_reg + 4, 4) << 32;
2607 }
2608 
2609 struct pci_map *
2610 pci_find_bar(device_t dev, int reg)
2611 {
2612 	struct pci_devinfo *dinfo;
2613 	struct pci_map *pm;
2614 
2615 	dinfo = device_get_ivars(dev);
2616 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2617 		if (pm->pm_reg == reg)
2618 			return (pm);
2619 	}
2620 	return (NULL);
2621 }
2622 
2623 int
2624 pci_bar_enabled(device_t dev, struct pci_map *pm)
2625 {
2626 	struct pci_devinfo *dinfo;
2627 	uint16_t cmd;
2628 
2629 	dinfo = device_get_ivars(dev);
2630 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2631 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2632 		return (0);
2633 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2634 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2635 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2636 	else
2637 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2638 }
2639 
2640 static struct pci_map *
2641 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2642 {
2643 	struct pci_devinfo *dinfo;
2644 	struct pci_map *pm, *prev;
2645 
2646 	dinfo = device_get_ivars(dev);
2647 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2648 	pm->pm_reg = reg;
2649 	pm->pm_value = value;
2650 	pm->pm_size = size;
2651 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2652 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2653 		    reg));
2654 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2655 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2656 			break;
2657 	}
2658 	if (prev != NULL)
2659 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2660 	else
2661 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2662 	return (pm);
2663 }
2664 
2665 static void
2666 pci_restore_bars(device_t dev)
2667 {
2668 	struct pci_devinfo *dinfo;
2669 	struct pci_map *pm;
2670 	int ln2range;
2671 
2672 	dinfo = device_get_ivars(dev);
2673 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2674 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2675 			ln2range = 32;
2676 		else
2677 			ln2range = pci_maprange(pm->pm_value);
2678 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2679 		if (ln2range == 64)
2680 			pci_write_config(dev, pm->pm_reg + 4,
2681 			    pm->pm_value >> 32, 4);
2682 	}
2683 }
2684 
2685 /*
2686  * Add a resource based on a pci map register. Return 1 if the map
2687  * register is a 32bit map register or 2 if it is a 64bit register.
2688  */
2689 static int
2690 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2691     int force, int prefetch)
2692 {
2693 	struct pci_map *pm;
2694 	pci_addr_t base, map, testval;
2695 	pci_addr_t start, end, count;
2696 	int barlen, basezero, maprange, mapsize, type;
2697 	uint16_t cmd;
2698 	struct resource *res;
2699 
2700 	/*
2701 	 * The BAR may already exist if the device is a CardBus card
2702 	 * whose CIS is stored in this BAR.
2703 	 */
2704 	pm = pci_find_bar(dev, reg);
2705 	if (pm != NULL) {
2706 		maprange = pci_maprange(pm->pm_value);
2707 		barlen = maprange == 64 ? 2 : 1;
2708 		return (barlen);
2709 	}
2710 
2711 	pci_read_bar(dev, reg, &map, &testval);
2712 	if (PCI_BAR_MEM(map)) {
2713 		type = SYS_RES_MEMORY;
2714 		if (map & PCIM_BAR_MEM_PREFETCH)
2715 			prefetch = 1;
2716 	} else
2717 		type = SYS_RES_IOPORT;
2718 	mapsize = pci_mapsize(testval);
2719 	base = pci_mapbase(map);
2720 #ifdef __PCI_BAR_ZERO_VALID
2721 	basezero = 0;
2722 #else
2723 	basezero = base == 0;
2724 #endif
2725 	maprange = pci_maprange(map);
2726 	barlen = maprange == 64 ? 2 : 1;
2727 
2728 	/*
2729 	 * For I/O registers, if bottom bit is set, and the next bit up
2730 	 * isn't clear, we know we have a BAR that doesn't conform to the
2731 	 * spec, so ignore it.  Also, sanity check the size of the data
2732 	 * areas to the type of memory involved.  Memory must be at least
2733 	 * 16 bytes in size, while I/O ranges must be at least 4.
2734 	 */
2735 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2736 		return (barlen);
2737 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2738 	    (type == SYS_RES_IOPORT && mapsize < 2))
2739 		return (barlen);
2740 
2741 	/* Save a record of this BAR. */
2742 	pm = pci_add_bar(dev, reg, map, mapsize);
2743 	if (bootverbose) {
2744 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2745 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2746 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2747 			printf(", port disabled\n");
2748 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2749 			printf(", memory disabled\n");
2750 		else
2751 			printf(", enabled\n");
2752 	}
2753 
2754 	/*
2755 	 * If base is 0, then we have problems if this architecture does
2756 	 * not allow that.  It is best to ignore such entries for the
2757 	 * moment.  These will be allocated later if the driver specifically
2758 	 * requests them.  However, some removable busses look better when
2759 	 * all resources are allocated, so allow '0' to be overriden.
2760 	 *
2761 	 * Similarly treat maps whose values is the same as the test value
2762 	 * read back.  These maps have had all f's written to them by the
2763 	 * BIOS in an attempt to disable the resources.
2764 	 */
2765 	if (!force && (basezero || map == testval))
2766 		return (barlen);
2767 	if ((u_long)base != base) {
2768 		device_printf(bus,
2769 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2770 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2771 		    pci_get_function(dev), reg);
2772 		return (barlen);
2773 	}
2774 
2775 	/*
2776 	 * This code theoretically does the right thing, but has
2777 	 * undesirable side effects in some cases where peripherals
2778 	 * respond oddly to having these bits enabled.  Let the user
2779 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2780 	 * default).
2781 	 */
2782 	if (pci_enable_io_modes) {
2783 		/* Turn on resources that have been left off by a lazy BIOS */
2784 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2785 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2786 			cmd |= PCIM_CMD_PORTEN;
2787 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2788 		}
2789 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2790 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2791 			cmd |= PCIM_CMD_MEMEN;
2792 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2793 		}
2794 	} else {
2795 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2796 			return (barlen);
2797 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2798 			return (barlen);
2799 	}
2800 
2801 	count = (pci_addr_t)1 << mapsize;
2802 	if (basezero || base == pci_mapbase(testval)) {
2803 		start = 0;	/* Let the parent decide. */
2804 		end = ~0ul;
2805 	} else {
2806 		start = base;
2807 		end = base + count - 1;
2808 	}
2809 	resource_list_add(rl, type, reg, start, end, count);
2810 
2811 	/*
2812 	 * Try to allocate the resource for this BAR from our parent
2813 	 * so that this resource range is already reserved.  The
2814 	 * driver for this device will later inherit this resource in
2815 	 * pci_alloc_resource().
2816 	 */
2817 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2818 	    prefetch ? RF_PREFETCHABLE : 0);
2819 	if (res == NULL) {
2820 		/*
2821 		 * If the allocation fails, delete the resource list entry
2822 		 * to force pci_alloc_resource() to allocate resources
2823 		 * from the parent.
2824 		 */
2825 		resource_list_delete(rl, type, reg);
2826 	} else {
2827 		start = rman_get_start(res);
2828 		pci_write_bar(dev, pm, start);
2829 	}
2830 	return (barlen);
2831 }
2832 
2833 /*
2834  * For ATA devices we need to decide early what addressing mode to use.
2835  * Legacy demands that the primary and secondary ATA ports sits on the
2836  * same addresses that old ISA hardware did. This dictates that we use
2837  * those addresses and ignore the BAR's if we cannot set PCI native
2838  * addressing mode.
2839  */
2840 static void
2841 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2842     uint32_t prefetchmask)
2843 {
2844 	struct resource *r;
2845 	int rid, type, progif;
2846 #if 0
2847 	/* if this device supports PCI native addressing use it */
2848 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2849 	if ((progif & 0x8a) == 0x8a) {
2850 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2851 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2852 			printf("Trying ATA native PCI addressing mode\n");
2853 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2854 		}
2855 	}
2856 #endif
2857 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2858 	type = SYS_RES_IOPORT;
2859 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2860 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2861 		    prefetchmask & (1 << 0));
2862 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2863 		    prefetchmask & (1 << 1));
2864 	} else {
2865 		rid = PCIR_BAR(0);
2866 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2867 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2868 		    0x1f7, 8, 0);
2869 		rid = PCIR_BAR(1);
2870 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2871 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2872 		    0x3f6, 1, 0);
2873 	}
2874 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2875 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2876 		    prefetchmask & (1 << 2));
2877 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2878 		    prefetchmask & (1 << 3));
2879 	} else {
2880 		rid = PCIR_BAR(2);
2881 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2882 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2883 		    0x177, 8, 0);
2884 		rid = PCIR_BAR(3);
2885 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2886 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2887 		    0x376, 1, 0);
2888 	}
2889 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2890 	    prefetchmask & (1 << 4));
2891 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2892 	    prefetchmask & (1 << 5));
2893 }
2894 
2895 static void
2896 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2897 {
2898 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2899 	pcicfgregs *cfg = &dinfo->cfg;
2900 	char tunable_name[64];
2901 	int irq;
2902 
2903 	/* Has to have an intpin to have an interrupt. */
2904 	if (cfg->intpin == 0)
2905 		return;
2906 
2907 	/* Let the user override the IRQ with a tunable. */
2908 	irq = PCI_INVALID_IRQ;
2909 	snprintf(tunable_name, sizeof(tunable_name),
2910 	    "hw.pci%d.%d.%d.INT%c.irq",
2911 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2912 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2913 		irq = PCI_INVALID_IRQ;
2914 
2915 	/*
2916 	 * If we didn't get an IRQ via the tunable, then we either use the
2917 	 * IRQ value in the intline register or we ask the bus to route an
2918 	 * interrupt for us.  If force_route is true, then we only use the
2919 	 * value in the intline register if the bus was unable to assign an
2920 	 * IRQ.
2921 	 */
2922 	if (!PCI_INTERRUPT_VALID(irq)) {
2923 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2924 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2925 		if (!PCI_INTERRUPT_VALID(irq))
2926 			irq = cfg->intline;
2927 	}
2928 
2929 	/* If after all that we don't have an IRQ, just bail. */
2930 	if (!PCI_INTERRUPT_VALID(irq))
2931 		return;
2932 
2933 	/* Update the config register if it changed. */
2934 	if (irq != cfg->intline) {
2935 		cfg->intline = irq;
2936 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2937 	}
2938 
2939 	/* Add this IRQ as rid 0 interrupt resource. */
2940 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2941 }
2942 
2943 /* Perform early OHCI takeover from SMM. */
2944 static void
2945 ohci_early_takeover(device_t self)
2946 {
2947 	struct resource *res;
2948 	uint32_t ctl;
2949 	int rid;
2950 	int i;
2951 
2952 	rid = PCIR_BAR(0);
2953 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2954 	if (res == NULL)
2955 		return;
2956 
2957 	ctl = bus_read_4(res, OHCI_CONTROL);
2958 	if (ctl & OHCI_IR) {
2959 		if (bootverbose)
2960 			printf("ohci early: "
2961 			    "SMM active, request owner change\n");
2962 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2963 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2964 			DELAY(1000);
2965 			ctl = bus_read_4(res, OHCI_CONTROL);
2966 		}
2967 		if (ctl & OHCI_IR) {
2968 			if (bootverbose)
2969 				printf("ohci early: "
2970 				    "SMM does not respond, resetting\n");
2971 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2972 		}
2973 		/* Disable interrupts */
2974 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2975 	}
2976 
2977 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2978 }
2979 
2980 /* Perform early UHCI takeover from SMM. */
2981 static void
2982 uhci_early_takeover(device_t self)
2983 {
2984 	struct resource *res;
2985 	int rid;
2986 
2987 	/*
2988 	 * Set the PIRQD enable bit and switch off all the others. We don't
2989 	 * want legacy support to interfere with us XXX Does this also mean
2990 	 * that the BIOS won't touch the keyboard anymore if it is connected
2991 	 * to the ports of the root hub?
2992 	 */
2993 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2994 
2995 	/* Disable interrupts */
2996 	rid = PCI_UHCI_BASE_REG;
2997 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2998 	if (res != NULL) {
2999 		bus_write_2(res, UHCI_INTR, 0);
3000 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3001 	}
3002 }
3003 
3004 /* Perform early EHCI takeover from SMM. */
3005 static void
3006 ehci_early_takeover(device_t self)
3007 {
3008 	struct resource *res;
3009 	uint32_t cparams;
3010 	uint32_t eec;
3011 	uint8_t eecp;
3012 	uint8_t bios_sem;
3013 	uint8_t offs;
3014 	int rid;
3015 	int i;
3016 
3017 	rid = PCIR_BAR(0);
3018 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3019 	if (res == NULL)
3020 		return;
3021 
3022 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3023 
3024 	/* Synchronise with the BIOS if it owns the controller. */
3025 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3026 	    eecp = EHCI_EECP_NEXT(eec)) {
3027 		eec = pci_read_config(self, eecp, 4);
3028 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3029 			continue;
3030 		}
3031 		bios_sem = pci_read_config(self, eecp +
3032 		    EHCI_LEGSUP_BIOS_SEM, 1);
3033 		if (bios_sem == 0) {
3034 			continue;
3035 		}
3036 		if (bootverbose)
3037 			printf("ehci early: "
3038 			    "SMM active, request owner change\n");
3039 
3040 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3041 
3042 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3043 			DELAY(1000);
3044 			bios_sem = pci_read_config(self, eecp +
3045 			    EHCI_LEGSUP_BIOS_SEM, 1);
3046 		}
3047 
3048 		if (bios_sem != 0) {
3049 			if (bootverbose)
3050 				printf("ehci early: "
3051 				    "SMM does not respond\n");
3052 		}
3053 		/* Disable interrupts */
3054 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3055 		bus_write_4(res, offs + EHCI_USBINTR, 0);
3056 	}
3057 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3058 }
3059 
3060 /* Perform early XHCI takeover from SMM. */
3061 static void
3062 xhci_early_takeover(device_t self)
3063 {
3064 	struct resource *res;
3065 	uint32_t cparams;
3066 	uint32_t eec;
3067 	uint8_t eecp;
3068 	uint8_t bios_sem;
3069 	uint8_t offs;
3070 	int rid;
3071 	int i;
3072 
3073 	rid = PCIR_BAR(0);
3074 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3075 	if (res == NULL)
3076 		return;
3077 
3078 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3079 
3080 	eec = -1;
3081 
3082 	/* Synchronise with the BIOS if it owns the controller. */
3083 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3084 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3085 		eec = bus_read_4(res, eecp);
3086 
3087 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3088 			continue;
3089 
3090 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3091 		if (bios_sem == 0)
3092 			continue;
3093 
3094 		if (bootverbose)
3095 			printf("xhci early: "
3096 			    "SMM active, request owner change\n");
3097 
3098 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3099 
3100 		/* wait a maximum of 5 second */
3101 
3102 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3103 			DELAY(1000);
3104 			bios_sem = bus_read_1(res, eecp +
3105 			    XHCI_XECP_BIOS_SEM);
3106 		}
3107 
3108 		if (bios_sem != 0) {
3109 			if (bootverbose)
3110 				printf("xhci early: "
3111 				    "SMM does not respond\n");
3112 		}
3113 
3114 		/* Disable interrupts */
3115 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3116 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3117 		bus_read_4(res, offs + XHCI_USBSTS);
3118 	}
3119 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3120 }
3121 
3122 void
3123 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3124 {
3125 	struct pci_devinfo *dinfo;
3126 	pcicfgregs *cfg;
3127 	struct resource_list *rl;
3128 	const struct pci_quirk *q;
3129 	uint32_t devid;
3130 	int i;
3131 
3132 	dinfo = device_get_ivars(dev);
3133 	cfg = &dinfo->cfg;
3134 	rl = &dinfo->resources;
3135 	devid = (cfg->device << 16) | cfg->vendor;
3136 
3137 	/* ATA devices needs special map treatment */
3138 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3139 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3140 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3141 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3142 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3143 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3144 	else
3145 		for (i = 0; i < cfg->nummaps;) {
3146 			/*
3147 			 * Skip quirked resources.
3148 			 */
3149 			for (q = &pci_quirks[0]; q->devid != 0; q++)
3150 				if (q->devid == devid &&
3151 				    q->type == PCI_QUIRK_UNMAP_REG &&
3152 				    q->arg1 == PCIR_BAR(i))
3153 					break;
3154 			if (q->devid != 0) {
3155 				i++;
3156 				continue;
3157 			}
3158 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3159 			    prefetchmask & (1 << i));
3160 		}
3161 
3162 	/*
3163 	 * Add additional, quirked resources.
3164 	 */
3165 	for (q = &pci_quirks[0]; q->devid != 0; q++)
3166 		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3167 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3168 
3169 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3170 #ifdef __PCI_REROUTE_INTERRUPT
3171 		/*
3172 		 * Try to re-route interrupts. Sometimes the BIOS or
3173 		 * firmware may leave bogus values in these registers.
3174 		 * If the re-route fails, then just stick with what we
3175 		 * have.
3176 		 */
3177 		pci_assign_interrupt(bus, dev, 1);
3178 #else
3179 		pci_assign_interrupt(bus, dev, 0);
3180 #endif
3181 	}
3182 
3183 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3184 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3185 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3186 			xhci_early_takeover(dev);
3187 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3188 			ehci_early_takeover(dev);
3189 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3190 			ohci_early_takeover(dev);
3191 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3192 			uhci_early_takeover(dev);
3193 	}
3194 }
3195 
3196 void
3197 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3198 {
3199 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3200 	device_t pcib = device_get_parent(dev);
3201 	struct pci_devinfo *dinfo;
3202 	int maxslots;
3203 	int s, f, pcifunchigh;
3204 	uint8_t hdrtype;
3205 
3206 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3207 	    ("dinfo_size too small"));
3208 	maxslots = PCIB_MAXSLOTS(pcib);
3209 	for (s = 0; s <= maxslots; s++) {
3210 		pcifunchigh = 0;
3211 		f = 0;
3212 		DELAY(1);
3213 		hdrtype = REG(PCIR_HDRTYPE, 1);
3214 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3215 			continue;
3216 		if (hdrtype & PCIM_MFDEV)
3217 			pcifunchigh = PCI_FUNCMAX;
3218 		for (f = 0; f <= pcifunchigh; f++) {
3219 			dinfo = pci_read_device(pcib, domain, busno, s, f,
3220 			    dinfo_size);
3221 			if (dinfo != NULL) {
3222 				pci_add_child(dev, dinfo);
3223 			}
3224 		}
3225 	}
3226 #undef REG
3227 }
3228 
3229 void
3230 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3231 {
3232 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3233 	device_set_ivars(dinfo->cfg.dev, dinfo);
3234 	resource_list_init(&dinfo->resources);
3235 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3236 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3237 	pci_print_verbose(dinfo);
3238 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3239 }
3240 
3241 static int
3242 pci_probe(device_t dev)
3243 {
3244 
3245 	device_set_desc(dev, "PCI bus");
3246 
3247 	/* Allow other subclasses to override this driver. */
3248 	return (BUS_PROBE_GENERIC);
3249 }
3250 
3251 int
3252 pci_attach_common(device_t dev)
3253 {
3254 	struct pci_softc *sc;
3255 	int busno, domain;
3256 #ifdef PCI_DMA_BOUNDARY
3257 	int error, tag_valid;
3258 #endif
3259 
3260 	sc = device_get_softc(dev);
3261 	domain = pcib_get_domain(dev);
3262 	busno = pcib_get_bus(dev);
3263 	if (bootverbose)
3264 		device_printf(dev, "domain=%d, physical bus=%d\n",
3265 		    domain, busno);
3266 #ifdef PCI_DMA_BOUNDARY
3267 	tag_valid = 0;
3268 	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
3269 	    devclass_find("pci")) {
3270 		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
3271 		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3272 		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
3273 		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
3274 		if (error)
3275 			device_printf(dev, "Failed to create DMA tag: %d\n",
3276 			    error);
3277 		else
3278 			tag_valid = 1;
3279 	}
3280 	if (!tag_valid)
3281 #endif
3282 		sc->sc_dma_tag = bus_get_dma_tag(dev);
3283 	return (0);
3284 }
3285 
3286 static int
3287 pci_attach(device_t dev)
3288 {
3289 	int busno, domain, error;
3290 
3291 	error = pci_attach_common(dev);
3292 	if (error)
3293 		return (error);
3294 
3295 	/*
3296 	 * Since there can be multiple independantly numbered PCI
3297 	 * busses on systems with multiple PCI domains, we can't use
3298 	 * the unit number to decide which bus we are probing. We ask
3299 	 * the parent pcib what our domain and bus numbers are.
3300 	 */
3301 	domain = pcib_get_domain(dev);
3302 	busno = pcib_get_bus(dev);
3303 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3304 	return (bus_generic_attach(dev));
3305 }
3306 
3307 static void
3308 pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3309     int state)
3310 {
3311 	device_t child, pcib;
3312 	struct pci_devinfo *dinfo;
3313 	int dstate, i;
3314 
3315 	/*
3316 	 * Set the device to the given state.  If the firmware suggests
3317 	 * a different power state, use it instead.  If power management
3318 	 * is not present, the firmware is responsible for managing
3319 	 * device power.  Skip children who aren't attached since they
3320 	 * are handled separately.
3321 	 */
3322 	pcib = device_get_parent(dev);
3323 	for (i = 0; i < numdevs; i++) {
3324 		child = devlist[i];
3325 		dinfo = device_get_ivars(child);
3326 		dstate = state;
3327 		if (device_is_attached(child) &&
3328 		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3329 			pci_set_powerstate(child, dstate);
3330 	}
3331 }
3332 
3333 int
3334 pci_suspend(device_t dev)
3335 {
3336 	device_t child, *devlist;
3337 	struct pci_devinfo *dinfo;
3338 	int error, i, numdevs;
3339 
3340 	/*
3341 	 * Save the PCI configuration space for each child and set the
3342 	 * device in the appropriate power state for this sleep state.
3343 	 */
3344 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3345 		return (error);
3346 	for (i = 0; i < numdevs; i++) {
3347 		child = devlist[i];
3348 		dinfo = device_get_ivars(child);
3349 		pci_cfg_save(child, dinfo, 0);
3350 	}
3351 
3352 	/* Suspend devices before potentially powering them down. */
3353 	error = bus_generic_suspend(dev);
3354 	if (error) {
3355 		free(devlist, M_TEMP);
3356 		return (error);
3357 	}
3358 	if (pci_do_power_suspend)
3359 		pci_set_power_children(dev, devlist, numdevs,
3360 		    PCI_POWERSTATE_D3);
3361 	free(devlist, M_TEMP);
3362 	return (0);
3363 }
3364 
3365 int
3366 pci_resume(device_t dev)
3367 {
3368 	device_t child, *devlist;
3369 	struct pci_devinfo *dinfo;
3370 	int error, i, numdevs;
3371 
3372 	/*
3373 	 * Set each child to D0 and restore its PCI configuration space.
3374 	 */
3375 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3376 		return (error);
3377 	if (pci_do_power_resume)
3378 		pci_set_power_children(dev, devlist, numdevs,
3379 		    PCI_POWERSTATE_D0);
3380 
3381 	/* Now the device is powered up, restore its config space. */
3382 	for (i = 0; i < numdevs; i++) {
3383 		child = devlist[i];
3384 		dinfo = device_get_ivars(child);
3385 
3386 		pci_cfg_restore(child, dinfo);
3387 		if (!device_is_attached(child))
3388 			pci_cfg_save(child, dinfo, 1);
3389 	}
3390 
3391 	/*
3392 	 * Resume critical devices first, then everything else later.
3393 	 */
3394 	for (i = 0; i < numdevs; i++) {
3395 		child = devlist[i];
3396 		switch (pci_get_class(child)) {
3397 		case PCIC_DISPLAY:
3398 		case PCIC_MEMORY:
3399 		case PCIC_BRIDGE:
3400 		case PCIC_BASEPERIPH:
3401 			DEVICE_RESUME(child);
3402 			break;
3403 		}
3404 	}
3405 	for (i = 0; i < numdevs; i++) {
3406 		child = devlist[i];
3407 		switch (pci_get_class(child)) {
3408 		case PCIC_DISPLAY:
3409 		case PCIC_MEMORY:
3410 		case PCIC_BRIDGE:
3411 		case PCIC_BASEPERIPH:
3412 			break;
3413 		default:
3414 			DEVICE_RESUME(child);
3415 		}
3416 	}
3417 	free(devlist, M_TEMP);
3418 	return (0);
3419 }
3420 
3421 static void
3422 pci_load_vendor_data(void)
3423 {
3424 	caddr_t data;
3425 	void *ptr;
3426 	size_t sz;
3427 
3428 	data = preload_search_by_type("pci_vendor_data");
3429 	if (data != NULL) {
3430 		ptr = preload_fetch_addr(data);
3431 		sz = preload_fetch_size(data);
3432 		if (ptr != NULL && sz != 0) {
3433 			pci_vendordata = ptr;
3434 			pci_vendordata_size = sz;
3435 			/* terminate the database */
3436 			pci_vendordata[pci_vendordata_size] = '\n';
3437 		}
3438 	}
3439 }
3440 
3441 void
3442 pci_driver_added(device_t dev, driver_t *driver)
3443 {
3444 	int numdevs;
3445 	device_t *devlist;
3446 	device_t child;
3447 	struct pci_devinfo *dinfo;
3448 	int i;
3449 
3450 	if (bootverbose)
3451 		device_printf(dev, "driver added\n");
3452 	DEVICE_IDENTIFY(driver, dev);
3453 	if (device_get_children(dev, &devlist, &numdevs) != 0)
3454 		return;
3455 	for (i = 0; i < numdevs; i++) {
3456 		child = devlist[i];
3457 		if (device_get_state(child) != DS_NOTPRESENT)
3458 			continue;
3459 		dinfo = device_get_ivars(child);
3460 		pci_print_verbose(dinfo);
3461 		if (bootverbose)
3462 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3463 		pci_cfg_restore(child, dinfo);
3464 		if (device_probe_and_attach(child) != 0)
3465 			pci_cfg_save(child, dinfo, 1);
3466 	}
3467 	free(devlist, M_TEMP);
3468 }
3469 
3470 int
3471 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3472     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3473 {
3474 	struct pci_devinfo *dinfo;
3475 	struct msix_table_entry *mte;
3476 	struct msix_vector *mv;
3477 	uint64_t addr;
3478 	uint32_t data;
3479 	void *cookie;
3480 	int error, rid;
3481 
3482 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3483 	    arg, &cookie);
3484 	if (error)
3485 		return (error);
3486 
3487 	/* If this is not a direct child, just bail out. */
3488 	if (device_get_parent(child) != dev) {
3489 		*cookiep = cookie;
3490 		return(0);
3491 	}
3492 
3493 	rid = rman_get_rid(irq);
3494 	if (rid == 0) {
3495 		/* Make sure that INTx is enabled */
3496 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3497 	} else {
3498 		/*
3499 		 * Check to see if the interrupt is MSI or MSI-X.
3500 		 * Ask our parent to map the MSI and give
3501 		 * us the address and data register values.
3502 		 * If we fail for some reason, teardown the
3503 		 * interrupt handler.
3504 		 */
3505 		dinfo = device_get_ivars(child);
3506 		if (dinfo->cfg.msi.msi_alloc > 0) {
3507 			if (dinfo->cfg.msi.msi_addr == 0) {
3508 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3509 			    ("MSI has handlers, but vectors not mapped"));
3510 				error = PCIB_MAP_MSI(device_get_parent(dev),
3511 				    child, rman_get_start(irq), &addr, &data);
3512 				if (error)
3513 					goto bad;
3514 				dinfo->cfg.msi.msi_addr = addr;
3515 				dinfo->cfg.msi.msi_data = data;
3516 			}
3517 			if (dinfo->cfg.msi.msi_handlers == 0)
3518 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3519 				    dinfo->cfg.msi.msi_data);
3520 			dinfo->cfg.msi.msi_handlers++;
3521 		} else {
3522 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3523 			    ("No MSI or MSI-X interrupts allocated"));
3524 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3525 			    ("MSI-X index too high"));
3526 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3527 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3528 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3529 			KASSERT(mv->mv_irq == rman_get_start(irq),
3530 			    ("IRQ mismatch"));
3531 			if (mv->mv_address == 0) {
3532 				KASSERT(mte->mte_handlers == 0,
3533 		    ("MSI-X table entry has handlers, but vector not mapped"));
3534 				error = PCIB_MAP_MSI(device_get_parent(dev),
3535 				    child, rman_get_start(irq), &addr, &data);
3536 				if (error)
3537 					goto bad;
3538 				mv->mv_address = addr;
3539 				mv->mv_data = data;
3540 			}
3541 			if (mte->mte_handlers == 0) {
3542 				pci_enable_msix(child, rid - 1, mv->mv_address,
3543 				    mv->mv_data);
3544 				pci_unmask_msix(child, rid - 1);
3545 			}
3546 			mte->mte_handlers++;
3547 		}
3548 
3549 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3550 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3551 	bad:
3552 		if (error) {
3553 			(void)bus_generic_teardown_intr(dev, child, irq,
3554 			    cookie);
3555 			return (error);
3556 		}
3557 	}
3558 	*cookiep = cookie;
3559 	return (0);
3560 }
3561 
3562 int
3563 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3564     void *cookie)
3565 {
3566 	struct msix_table_entry *mte;
3567 	struct resource_list_entry *rle;
3568 	struct pci_devinfo *dinfo;
3569 	int error, rid;
3570 
3571 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3572 		return (EINVAL);
3573 
3574 	/* If this isn't a direct child, just bail out */
3575 	if (device_get_parent(child) != dev)
3576 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3577 
3578 	rid = rman_get_rid(irq);
3579 	if (rid == 0) {
3580 		/* Mask INTx */
3581 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3582 	} else {
3583 		/*
3584 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3585 		 * decrement the appropriate handlers count and mask the
3586 		 * MSI-X message, or disable MSI messages if the count
3587 		 * drops to 0.
3588 		 */
3589 		dinfo = device_get_ivars(child);
3590 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3591 		if (rle->res != irq)
3592 			return (EINVAL);
3593 		if (dinfo->cfg.msi.msi_alloc > 0) {
3594 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3595 			    ("MSI-X index too high"));
3596 			if (dinfo->cfg.msi.msi_handlers == 0)
3597 				return (EINVAL);
3598 			dinfo->cfg.msi.msi_handlers--;
3599 			if (dinfo->cfg.msi.msi_handlers == 0)
3600 				pci_disable_msi(child);
3601 		} else {
3602 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3603 			    ("No MSI or MSI-X interrupts allocated"));
3604 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3605 			    ("MSI-X index too high"));
3606 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3607 			if (mte->mte_handlers == 0)
3608 				return (EINVAL);
3609 			mte->mte_handlers--;
3610 			if (mte->mte_handlers == 0)
3611 				pci_mask_msix(child, rid - 1);
3612 		}
3613 	}
3614 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3615 	if (rid > 0)
3616 		KASSERT(error == 0,
3617 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3618 	return (error);
3619 }
3620 
3621 int
3622 pci_print_child(device_t dev, device_t child)
3623 {
3624 	struct pci_devinfo *dinfo;
3625 	struct resource_list *rl;
3626 	int retval = 0;
3627 
3628 	dinfo = device_get_ivars(child);
3629 	rl = &dinfo->resources;
3630 
3631 	retval += bus_print_child_header(dev, child);
3632 
3633 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3634 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3635 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3636 	if (device_get_flags(dev))
3637 		retval += printf(" flags %#x", device_get_flags(dev));
3638 
3639 	retval += printf(" at device %d.%d", pci_get_slot(child),
3640 	    pci_get_function(child));
3641 
3642 	retval += bus_print_child_footer(dev, child);
3643 
3644 	return (retval);
3645 }
3646 
3647 static struct
3648 {
3649 	int	class;
3650 	int	subclass;
3651 	char	*desc;
3652 } pci_nomatch_tab[] = {
3653 	{PCIC_OLD,		-1,			"old"},
3654 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3655 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3656 	{PCIC_STORAGE,		-1,			"mass storage"},
3657 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3658 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3659 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3660 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3661 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3662 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3663 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3664 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3665 	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	"NVM"},
3666 	{PCIC_NETWORK,		-1,			"network"},
3667 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3668 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3669 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3670 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3671 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3672 	{PCIC_DISPLAY,		-1,			"display"},
3673 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3674 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3675 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3676 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3677 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3678 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3679 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3680 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3681 	{PCIC_MEMORY,		-1,			"memory"},
3682 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3683 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3684 	{PCIC_BRIDGE,		-1,			"bridge"},
3685 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3686 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3687 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3688 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3689 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3690 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3691 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3692 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3693 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3694 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3695 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3696 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3697 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3698 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3699 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3700 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3701 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3702 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3703 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3704 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3705 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3706 	{PCIC_INPUTDEV,		-1,			"input device"},
3707 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3708 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3709 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3710 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3711 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3712 	{PCIC_DOCKING,		-1,			"docking station"},
3713 	{PCIC_PROCESSOR,	-1,			"processor"},
3714 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3715 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3716 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3717 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3718 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3719 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3720 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3721 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3722 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3723 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3724 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3725 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3726 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3727 	{PCIC_SATCOM,		-1,			"satellite communication"},
3728 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3729 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3730 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3731 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3732 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3733 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3734 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3735 	{PCIC_DASP,		-1,			"dasp"},
3736 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3737 	{0, 0,		NULL}
3738 };
3739 
3740 void
3741 pci_probe_nomatch(device_t dev, device_t child)
3742 {
3743 	int	i;
3744 	char	*cp, *scp, *device;
3745 
3746 	/*
3747 	 * Look for a listing for this device in a loaded device database.
3748 	 */
3749 	if ((device = pci_describe_device(child)) != NULL) {
3750 		device_printf(dev, "<%s>", device);
3751 		free(device, M_DEVBUF);
3752 	} else {
3753 		/*
3754 		 * Scan the class/subclass descriptions for a general
3755 		 * description.
3756 		 */
3757 		cp = "unknown";
3758 		scp = NULL;
3759 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3760 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3761 				if (pci_nomatch_tab[i].subclass == -1) {
3762 					cp = pci_nomatch_tab[i].desc;
3763 				} else if (pci_nomatch_tab[i].subclass ==
3764 				    pci_get_subclass(child)) {
3765 					scp = pci_nomatch_tab[i].desc;
3766 				}
3767 			}
3768 		}
3769 		device_printf(dev, "<%s%s%s>",
3770 		    cp ? cp : "",
3771 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3772 		    scp ? scp : "");
3773 	}
3774 	printf(" at device %d.%d (no driver attached)\n",
3775 	    pci_get_slot(child), pci_get_function(child));
3776 	pci_cfg_save(child, device_get_ivars(child), 1);
3777 	return;
3778 }
3779 
3780 /*
3781  * Parse the PCI device database, if loaded, and return a pointer to a
3782  * description of the device.
3783  *
3784  * The database is flat text formatted as follows:
3785  *
3786  * Any line not in a valid format is ignored.
3787  * Lines are terminated with newline '\n' characters.
3788  *
3789  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3790  * the vendor name.
3791  *
3792  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3793  * - devices cannot be listed without a corresponding VENDOR line.
3794  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3795  * another TAB, then the device name.
3796  */
3797 
3798 /*
3799  * Assuming (ptr) points to the beginning of a line in the database,
3800  * return the vendor or device and description of the next entry.
3801  * The value of (vendor) or (device) inappropriate for the entry type
3802  * is set to -1.  Returns nonzero at the end of the database.
3803  *
3804  * Note that this is slightly unrobust in the face of corrupt data;
3805  * we attempt to safeguard against this by spamming the end of the
3806  * database with a newline when we initialise.
3807  */
3808 static int
3809 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3810 {
3811 	char	*cp = *ptr;
3812 	int	left;
3813 
3814 	*device = -1;
3815 	*vendor = -1;
3816 	**desc = '\0';
3817 	for (;;) {
3818 		left = pci_vendordata_size - (cp - pci_vendordata);
3819 		if (left <= 0) {
3820 			*ptr = cp;
3821 			return(1);
3822 		}
3823 
3824 		/* vendor entry? */
3825 		if (*cp != '\t' &&
3826 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3827 			break;
3828 		/* device entry? */
3829 		if (*cp == '\t' &&
3830 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3831 			break;
3832 
3833 		/* skip to next line */
3834 		while (*cp != '\n' && left > 0) {
3835 			cp++;
3836 			left--;
3837 		}
3838 		if (*cp == '\n') {
3839 			cp++;
3840 			left--;
3841 		}
3842 	}
3843 	/* skip to next line */
3844 	while (*cp != '\n' && left > 0) {
3845 		cp++;
3846 		left--;
3847 	}
3848 	if (*cp == '\n' && left > 0)
3849 		cp++;
3850 	*ptr = cp;
3851 	return(0);
3852 }
3853 
3854 static char *
3855 pci_describe_device(device_t dev)
3856 {
3857 	int	vendor, device;
3858 	char	*desc, *vp, *dp, *line;
3859 
3860 	desc = vp = dp = NULL;
3861 
3862 	/*
3863 	 * If we have no vendor data, we can't do anything.
3864 	 */
3865 	if (pci_vendordata == NULL)
3866 		goto out;
3867 
3868 	/*
3869 	 * Scan the vendor data looking for this device
3870 	 */
3871 	line = pci_vendordata;
3872 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3873 		goto out;
3874 	for (;;) {
3875 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3876 			goto out;
3877 		if (vendor == pci_get_vendor(dev))
3878 			break;
3879 	}
3880 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3881 		goto out;
3882 	for (;;) {
3883 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3884 			*dp = 0;
3885 			break;
3886 		}
3887 		if (vendor != -1) {
3888 			*dp = 0;
3889 			break;
3890 		}
3891 		if (device == pci_get_device(dev))
3892 			break;
3893 	}
3894 	if (dp[0] == '\0')
3895 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3896 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3897 	    NULL)
3898 		sprintf(desc, "%s, %s", vp, dp);
3899 out:
3900 	if (vp != NULL)
3901 		free(vp, M_DEVBUF);
3902 	if (dp != NULL)
3903 		free(dp, M_DEVBUF);
3904 	return(desc);
3905 }
3906 
3907 int
3908 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3909 {
3910 	struct pci_devinfo *dinfo;
3911 	pcicfgregs *cfg;
3912 
3913 	dinfo = device_get_ivars(child);
3914 	cfg = &dinfo->cfg;
3915 
3916 	switch (which) {
3917 	case PCI_IVAR_ETHADDR:
3918 		/*
3919 		 * The generic accessor doesn't deal with failure, so
3920 		 * we set the return value, then return an error.
3921 		 */
3922 		*((uint8_t **) result) = NULL;
3923 		return (EINVAL);
3924 	case PCI_IVAR_SUBVENDOR:
3925 		*result = cfg->subvendor;
3926 		break;
3927 	case PCI_IVAR_SUBDEVICE:
3928 		*result = cfg->subdevice;
3929 		break;
3930 	case PCI_IVAR_VENDOR:
3931 		*result = cfg->vendor;
3932 		break;
3933 	case PCI_IVAR_DEVICE:
3934 		*result = cfg->device;
3935 		break;
3936 	case PCI_IVAR_DEVID:
3937 		*result = (cfg->device << 16) | cfg->vendor;
3938 		break;
3939 	case PCI_IVAR_CLASS:
3940 		*result = cfg->baseclass;
3941 		break;
3942 	case PCI_IVAR_SUBCLASS:
3943 		*result = cfg->subclass;
3944 		break;
3945 	case PCI_IVAR_PROGIF:
3946 		*result = cfg->progif;
3947 		break;
3948 	case PCI_IVAR_REVID:
3949 		*result = cfg->revid;
3950 		break;
3951 	case PCI_IVAR_INTPIN:
3952 		*result = cfg->intpin;
3953 		break;
3954 	case PCI_IVAR_IRQ:
3955 		*result = cfg->intline;
3956 		break;
3957 	case PCI_IVAR_DOMAIN:
3958 		*result = cfg->domain;
3959 		break;
3960 	case PCI_IVAR_BUS:
3961 		*result = cfg->bus;
3962 		break;
3963 	case PCI_IVAR_SLOT:
3964 		*result = cfg->slot;
3965 		break;
3966 	case PCI_IVAR_FUNCTION:
3967 		*result = cfg->func;
3968 		break;
3969 	case PCI_IVAR_CMDREG:
3970 		*result = cfg->cmdreg;
3971 		break;
3972 	case PCI_IVAR_CACHELNSZ:
3973 		*result = cfg->cachelnsz;
3974 		break;
3975 	case PCI_IVAR_MINGNT:
3976 		*result = cfg->mingnt;
3977 		break;
3978 	case PCI_IVAR_MAXLAT:
3979 		*result = cfg->maxlat;
3980 		break;
3981 	case PCI_IVAR_LATTIMER:
3982 		*result = cfg->lattimer;
3983 		break;
3984 	default:
3985 		return (ENOENT);
3986 	}
3987 	return (0);
3988 }
3989 
3990 int
3991 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3992 {
3993 	struct pci_devinfo *dinfo;
3994 
3995 	dinfo = device_get_ivars(child);
3996 
3997 	switch (which) {
3998 	case PCI_IVAR_INTPIN:
3999 		dinfo->cfg.intpin = value;
4000 		return (0);
4001 	case PCI_IVAR_ETHADDR:
4002 	case PCI_IVAR_SUBVENDOR:
4003 	case PCI_IVAR_SUBDEVICE:
4004 	case PCI_IVAR_VENDOR:
4005 	case PCI_IVAR_DEVICE:
4006 	case PCI_IVAR_DEVID:
4007 	case PCI_IVAR_CLASS:
4008 	case PCI_IVAR_SUBCLASS:
4009 	case PCI_IVAR_PROGIF:
4010 	case PCI_IVAR_REVID:
4011 	case PCI_IVAR_IRQ:
4012 	case PCI_IVAR_DOMAIN:
4013 	case PCI_IVAR_BUS:
4014 	case PCI_IVAR_SLOT:
4015 	case PCI_IVAR_FUNCTION:
4016 		return (EINVAL);	/* disallow for now */
4017 
4018 	default:
4019 		return (ENOENT);
4020 	}
4021 }
4022 
4023 #include "opt_ddb.h"
4024 #ifdef DDB
4025 #include <ddb/ddb.h>
4026 #include <sys/cons.h>
4027 
4028 /*
4029  * List resources based on pci map registers, used for within ddb
4030  */
4031 
4032 DB_SHOW_COMMAND(pciregs, db_pci_dump)
4033 {
4034 	struct pci_devinfo *dinfo;
4035 	struct devlist *devlist_head;
4036 	struct pci_conf *p;
4037 	const char *name;
4038 	int i, error, none_count;
4039 
4040 	none_count = 0;
4041 	/* get the head of the device queue */
4042 	devlist_head = &pci_devq;
4043 
4044 	/*
4045 	 * Go through the list of devices and print out devices
4046 	 */
4047 	for (error = 0, i = 0,
4048 	     dinfo = STAILQ_FIRST(devlist_head);
4049 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4050 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4051 
4052 		/* Populate pd_name and pd_unit */
4053 		name = NULL;
4054 		if (dinfo->cfg.dev)
4055 			name = device_get_name(dinfo->cfg.dev);
4056 
4057 		p = &dinfo->conf;
4058 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4059 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4060 			(name && *name) ? name : "none",
4061 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4062 			none_count++,
4063 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4064 			p->pc_sel.pc_func, (p->pc_class << 16) |
4065 			(p->pc_subclass << 8) | p->pc_progif,
4066 			(p->pc_subdevice << 16) | p->pc_subvendor,
4067 			(p->pc_device << 16) | p->pc_vendor,
4068 			p->pc_revid, p->pc_hdr);
4069 	}
4070 }
4071 #endif /* DDB */
4072 
4073 static struct resource *
4074 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4075     u_long start, u_long end, u_long count, u_int flags)
4076 {
4077 	struct pci_devinfo *dinfo = device_get_ivars(child);
4078 	struct resource_list *rl = &dinfo->resources;
4079 	struct resource_list_entry *rle;
4080 	struct resource *res;
4081 	struct pci_map *pm;
4082 	pci_addr_t map, testval;
4083 	int mapsize;
4084 
4085 	res = NULL;
4086 	pm = pci_find_bar(child, *rid);
4087 	if (pm != NULL) {
4088 		/* This is a BAR that we failed to allocate earlier. */
4089 		mapsize = pm->pm_size;
4090 		map = pm->pm_value;
4091 	} else {
4092 		/*
4093 		 * Weed out the bogons, and figure out how large the
4094 		 * BAR/map is.  BARs that read back 0 here are bogus
4095 		 * and unimplemented.  Note: atapci in legacy mode are
4096 		 * special and handled elsewhere in the code.  If you
4097 		 * have a atapci device in legacy mode and it fails
4098 		 * here, that other code is broken.
4099 		 */
4100 		pci_read_bar(child, *rid, &map, &testval);
4101 
4102 		/*
4103 		 * Determine the size of the BAR and ignore BARs with a size
4104 		 * of 0.  Device ROM BARs use a different mask value.
4105 		 */
4106 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
4107 			mapsize = pci_romsize(testval);
4108 		else
4109 			mapsize = pci_mapsize(testval);
4110 		if (mapsize == 0)
4111 			goto out;
4112 		pm = pci_add_bar(child, *rid, map, mapsize);
4113 	}
4114 
4115 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
4116 		if (type != SYS_RES_MEMORY) {
4117 			if (bootverbose)
4118 				device_printf(dev,
4119 				    "child %s requested type %d for rid %#x,"
4120 				    " but the BAR says it is an memio\n",
4121 				    device_get_nameunit(child), type, *rid);
4122 			goto out;
4123 		}
4124 	} else {
4125 		if (type != SYS_RES_IOPORT) {
4126 			if (bootverbose)
4127 				device_printf(dev,
4128 				    "child %s requested type %d for rid %#x,"
4129 				    " but the BAR says it is an ioport\n",
4130 				    device_get_nameunit(child), type, *rid);
4131 			goto out;
4132 		}
4133 	}
4134 
4135 	/*
4136 	 * For real BARs, we need to override the size that
4137 	 * the driver requests, because that's what the BAR
4138 	 * actually uses and we would otherwise have a
4139 	 * situation where we might allocate the excess to
4140 	 * another driver, which won't work.
4141 	 */
4142 	count = (pci_addr_t)1 << mapsize;
4143 	if (RF_ALIGNMENT(flags) < mapsize)
4144 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
4145 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
4146 		flags |= RF_PREFETCHABLE;
4147 
4148 	/*
4149 	 * Allocate enough resource, and then write back the
4150 	 * appropriate BAR for that resource.
4151 	 */
4152 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
4153 	    start, end, count, flags & ~RF_ACTIVE);
4154 	if (res == NULL) {
4155 		device_printf(child,
4156 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4157 		    count, *rid, type, start, end);
4158 		goto out;
4159 	}
4160 	resource_list_add(rl, type, *rid, start, end, count);
4161 	rle = resource_list_find(rl, type, *rid);
4162 	if (rle == NULL)
4163 		panic("pci_reserve_map: unexpectedly can't find resource.");
4164 	rle->res = res;
4165 	rle->start = rman_get_start(res);
4166 	rle->end = rman_get_end(res);
4167 	rle->count = count;
4168 	rle->flags = RLE_RESERVED;
4169 	if (bootverbose)
4170 		device_printf(child,
4171 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4172 		    count, *rid, type, rman_get_start(res));
4173 	map = rman_get_start(res);
4174 	pci_write_bar(child, pm, map);
4175 out:
4176 	return (res);
4177 }
4178 
4179 struct resource *
4180 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4181 		   u_long start, u_long end, u_long count, u_int flags)
4182 {
4183 	struct pci_devinfo *dinfo = device_get_ivars(child);
4184 	struct resource_list *rl = &dinfo->resources;
4185 	struct resource_list_entry *rle;
4186 	struct resource *res;
4187 	pcicfgregs *cfg = &dinfo->cfg;
4188 
4189 	if (device_get_parent(child) != dev)
4190 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4191 		    type, rid, start, end, count, flags));
4192 
4193 	/*
4194 	 * Perform lazy resource allocation
4195 	 */
4196 	switch (type) {
4197 	case SYS_RES_IRQ:
4198 		/*
4199 		 * Can't alloc legacy interrupt once MSI messages have
4200 		 * been allocated.
4201 		 */
4202 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4203 		    cfg->msix.msix_alloc > 0))
4204 			return (NULL);
4205 
4206 		/*
4207 		 * If the child device doesn't have an interrupt
4208 		 * routed and is deserving of an interrupt, try to
4209 		 * assign it one.
4210 		 */
4211 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4212 		    (cfg->intpin != 0))
4213 			pci_assign_interrupt(dev, child, 0);
4214 		break;
4215 	case SYS_RES_IOPORT:
4216 	case SYS_RES_MEMORY:
4217 #ifdef NEW_PCIB
4218 		/*
4219 		 * PCI-PCI bridge I/O window resources are not BARs.
4220 		 * For those allocations just pass the request up the
4221 		 * tree.
4222 		 */
4223 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4224 			switch (*rid) {
4225 			case PCIR_IOBASEL_1:
4226 			case PCIR_MEMBASE_1:
4227 			case PCIR_PMBASEL_1:
4228 				/*
4229 				 * XXX: Should we bother creating a resource
4230 				 * list entry?
4231 				 */
4232 				return (bus_generic_alloc_resource(dev, child,
4233 				    type, rid, start, end, count, flags));
4234 			}
4235 		}
4236 #endif
4237 		/* Reserve resources for this BAR if needed. */
4238 		rle = resource_list_find(rl, type, *rid);
4239 		if (rle == NULL) {
4240 			res = pci_reserve_map(dev, child, type, rid, start, end,
4241 			    count, flags);
4242 			if (res == NULL)
4243 				return (NULL);
4244 		}
4245 	}
4246 	return (resource_list_alloc(rl, dev, child, type, rid,
4247 	    start, end, count, flags));
4248 }
4249 
4250 int
4251 pci_activate_resource(device_t dev, device_t child, int type, int rid,
4252     struct resource *r)
4253 {
4254 	struct pci_devinfo *dinfo;
4255 	int error;
4256 
4257 	error = bus_generic_activate_resource(dev, child, type, rid, r);
4258 	if (error)
4259 		return (error);
4260 
4261 	/* Enable decoding in the command register when activating BARs. */
4262 	if (device_get_parent(child) == dev) {
4263 		/* Device ROMs need their decoding explicitly enabled. */
4264 		dinfo = device_get_ivars(child);
4265 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4266 			pci_write_bar(child, pci_find_bar(child, rid),
4267 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4268 		switch (type) {
4269 		case SYS_RES_IOPORT:
4270 		case SYS_RES_MEMORY:
4271 			error = PCI_ENABLE_IO(dev, child, type);
4272 			break;
4273 		}
4274 	}
4275 	return (error);
4276 }
4277 
4278 int
4279 pci_deactivate_resource(device_t dev, device_t child, int type,
4280     int rid, struct resource *r)
4281 {
4282 	struct pci_devinfo *dinfo;
4283 	int error;
4284 
4285 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4286 	if (error)
4287 		return (error);
4288 
4289 	/* Disable decoding for device ROMs. */
4290 	if (device_get_parent(child) == dev) {
4291 		dinfo = device_get_ivars(child);
4292 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4293 			pci_write_bar(child, pci_find_bar(child, rid),
4294 			    rman_get_start(r));
4295 	}
4296 	return (0);
4297 }
4298 
4299 void
4300 pci_delete_child(device_t dev, device_t child)
4301 {
4302 	struct resource_list_entry *rle;
4303 	struct resource_list *rl;
4304 	struct pci_devinfo *dinfo;
4305 
4306 	dinfo = device_get_ivars(child);
4307 	rl = &dinfo->resources;
4308 
4309 	if (device_is_attached(child))
4310 		device_detach(child);
4311 
4312 	/* Turn off access to resources we're about to free */
4313 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4314 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4315 
4316 	/* Free all allocated resources */
4317 	STAILQ_FOREACH(rle, rl, link) {
4318 		if (rle->res) {
4319 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4320 			    resource_list_busy(rl, rle->type, rle->rid)) {
4321 				pci_printf(&dinfo->cfg,
4322 				    "Resource still owned, oops. "
4323 				    "(type=%d, rid=%d, addr=%lx)\n",
4324 				    rle->type, rle->rid,
4325 				    rman_get_start(rle->res));
4326 				bus_release_resource(child, rle->type, rle->rid,
4327 				    rle->res);
4328 			}
4329 			resource_list_unreserve(rl, dev, child, rle->type,
4330 			    rle->rid);
4331 		}
4332 	}
4333 	resource_list_free(rl);
4334 
4335 	device_delete_child(dev, child);
4336 	pci_freecfg(dinfo);
4337 }
4338 
4339 void
4340 pci_delete_resource(device_t dev, device_t child, int type, int rid)
4341 {
4342 	struct pci_devinfo *dinfo;
4343 	struct resource_list *rl;
4344 	struct resource_list_entry *rle;
4345 
4346 	if (device_get_parent(child) != dev)
4347 		return;
4348 
4349 	dinfo = device_get_ivars(child);
4350 	rl = &dinfo->resources;
4351 	rle = resource_list_find(rl, type, rid);
4352 	if (rle == NULL)
4353 		return;
4354 
4355 	if (rle->res) {
4356 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4357 		    resource_list_busy(rl, type, rid)) {
4358 			device_printf(dev, "delete_resource: "
4359 			    "Resource still owned by child, oops. "
4360 			    "(type=%d, rid=%d, addr=%lx)\n",
4361 			    type, rid, rman_get_start(rle->res));
4362 			return;
4363 		}
4364 		resource_list_unreserve(rl, dev, child, type, rid);
4365 	}
4366 	resource_list_delete(rl, type, rid);
4367 }
4368 
4369 struct resource_list *
4370 pci_get_resource_list (device_t dev, device_t child)
4371 {
4372 	struct pci_devinfo *dinfo = device_get_ivars(child);
4373 
4374 	return (&dinfo->resources);
4375 }
4376 
4377 bus_dma_tag_t
4378 pci_get_dma_tag(device_t bus, device_t dev)
4379 {
4380 	struct pci_softc *sc = device_get_softc(bus);
4381 
4382 	return (sc->sc_dma_tag);
4383 }
4384 
4385 uint32_t
4386 pci_read_config_method(device_t dev, device_t child, int reg, int width)
4387 {
4388 	struct pci_devinfo *dinfo = device_get_ivars(child);
4389 	pcicfgregs *cfg = &dinfo->cfg;
4390 
4391 	return (PCIB_READ_CONFIG(device_get_parent(dev),
4392 	    cfg->bus, cfg->slot, cfg->func, reg, width));
4393 }
4394 
4395 void
4396 pci_write_config_method(device_t dev, device_t child, int reg,
4397     uint32_t val, int width)
4398 {
4399 	struct pci_devinfo *dinfo = device_get_ivars(child);
4400 	pcicfgregs *cfg = &dinfo->cfg;
4401 
4402 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4403 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4404 }
4405 
4406 int
4407 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4408     size_t buflen)
4409 {
4410 
4411 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4412 	    pci_get_function(child));
4413 	return (0);
4414 }
4415 
4416 int
4417 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4418     size_t buflen)
4419 {
4420 	struct pci_devinfo *dinfo;
4421 	pcicfgregs *cfg;
4422 
4423 	dinfo = device_get_ivars(child);
4424 	cfg = &dinfo->cfg;
4425 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4426 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4427 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4428 	    cfg->progif);
4429 	return (0);
4430 }
4431 
4432 int
4433 pci_assign_interrupt_method(device_t dev, device_t child)
4434 {
4435 	struct pci_devinfo *dinfo = device_get_ivars(child);
4436 	pcicfgregs *cfg = &dinfo->cfg;
4437 
4438 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4439 	    cfg->intpin));
4440 }
4441 
4442 static int
4443 pci_modevent(module_t mod, int what, void *arg)
4444 {
4445 	static struct cdev *pci_cdev;
4446 
4447 	switch (what) {
4448 	case MOD_LOAD:
4449 		STAILQ_INIT(&pci_devq);
4450 		pci_generation = 0;
4451 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4452 		    "pci");
4453 		pci_load_vendor_data();
4454 		break;
4455 
4456 	case MOD_UNLOAD:
4457 		destroy_dev(pci_cdev);
4458 		break;
4459 	}
4460 
4461 	return (0);
4462 }
4463 
4464 static void
4465 pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
4466 {
4467 #define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
4468 	struct pcicfg_pcie *cfg;
4469 	int version, pos;
4470 
4471 	cfg = &dinfo->cfg.pcie;
4472 	pos = cfg->pcie_location;
4473 
4474 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4475 
4476 	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
4477 
4478 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4479 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4480 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4481 		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
4482 
4483 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4484 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
4485 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
4486 		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
4487 
4488 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4489 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
4490 		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
4491 
4492 	if (version > 1) {
4493 		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
4494 		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
4495 		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
4496 	}
4497 #undef WREG
4498 }
4499 
4500 static void
4501 pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
4502 {
4503 	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
4504 	    dinfo->cfg.pcix.pcix_command,  2);
4505 }
4506 
4507 void
4508 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4509 {
4510 
4511 	/*
4512 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4513 	 * which we know need special treatment.  Type 2 devices are
4514 	 * cardbus bridges which also require special treatment.
4515 	 * Other types are unknown, and we err on the side of safety
4516 	 * by ignoring them.
4517 	 */
4518 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4519 		return;
4520 
4521 	/*
4522 	 * Restore the device to full power mode.  We must do this
4523 	 * before we restore the registers because moving from D3 to
4524 	 * D0 will cause the chip's BARs and some other registers to
4525 	 * be reset to some unknown power on reset values.  Cut down
4526 	 * the noise on boot by doing nothing if we are already in
4527 	 * state D0.
4528 	 */
4529 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4530 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4531 	pci_restore_bars(dev);
4532 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4533 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4534 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4535 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4536 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4537 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4538 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4539 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4540 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4541 
4542 	/*
4543 	 * Restore extended capabilities for PCI-Express and PCI-X
4544 	 */
4545 	if (dinfo->cfg.pcie.pcie_location != 0)
4546 		pci_cfg_restore_pcie(dev, dinfo);
4547 	if (dinfo->cfg.pcix.pcix_location != 0)
4548 		pci_cfg_restore_pcix(dev, dinfo);
4549 
4550 	/* Restore MSI and MSI-X configurations if they are present. */
4551 	if (dinfo->cfg.msi.msi_location != 0)
4552 		pci_resume_msi(dev);
4553 	if (dinfo->cfg.msix.msix_location != 0)
4554 		pci_resume_msix(dev);
4555 }
4556 
4557 static void
4558 pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
4559 {
4560 #define	RREG(n)	pci_read_config(dev, pos + (n), 2)
4561 	struct pcicfg_pcie *cfg;
4562 	int version, pos;
4563 
4564 	cfg = &dinfo->cfg.pcie;
4565 	pos = cfg->pcie_location;
4566 
4567 	cfg->pcie_flags = RREG(PCIER_FLAGS);
4568 
4569 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4570 
4571 	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
4572 
4573 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4574 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4575 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4576 		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
4577 
4578 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4579 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
4580 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
4581 		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
4582 
4583 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4584 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
4585 		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
4586 
4587 	if (version > 1) {
4588 		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
4589 		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
4590 		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
4591 	}
4592 #undef RREG
4593 }
4594 
4595 static void
4596 pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
4597 {
4598 	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
4599 	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
4600 }
4601 
4602 void
4603 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4604 {
4605 	uint32_t cls;
4606 	int ps;
4607 
4608 	/*
4609 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4610 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4611 	 * which also require special treatment.  Other types are unknown, and
4612 	 * we err on the side of safety by ignoring them.  Powering down
4613 	 * bridges should not be undertaken lightly.
4614 	 */
4615 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4616 		return;
4617 
4618 	/*
4619 	 * Some drivers apparently write to these registers w/o updating our
4620 	 * cached copy.  No harm happens if we update the copy, so do so here
4621 	 * so we can restore them.  The COMMAND register is modified by the
4622 	 * bus w/o updating the cache.  This should represent the normally
4623 	 * writable portion of the 'defined' part of type 0 headers.  In
4624 	 * theory we also need to save/restore the PCI capability structures
4625 	 * we know about, but apart from power we don't know any that are
4626 	 * writable.
4627 	 */
4628 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4629 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4630 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4631 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4632 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4633 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4634 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4635 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4636 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4637 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4638 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4639 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4640 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4641 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4642 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4643 
4644 	if (dinfo->cfg.pcie.pcie_location != 0)
4645 		pci_cfg_save_pcie(dev, dinfo);
4646 
4647 	if (dinfo->cfg.pcix.pcix_location != 0)
4648 		pci_cfg_save_pcix(dev, dinfo);
4649 
4650 	/*
4651 	 * don't set the state for display devices, base peripherals and
4652 	 * memory devices since bad things happen when they are powered down.
4653 	 * We should (a) have drivers that can easily detach and (b) use
4654 	 * generic drivers for these devices so that some device actually
4655 	 * attaches.  We need to make sure that when we implement (a) we don't
4656 	 * power the device down on a reattach.
4657 	 */
4658 	cls = pci_get_class(dev);
4659 	if (!setstate)
4660 		return;
4661 	switch (pci_do_power_nodriver)
4662 	{
4663 		case 0:		/* NO powerdown at all */
4664 			return;
4665 		case 1:		/* Conservative about what to power down */
4666 			if (cls == PCIC_STORAGE)
4667 				return;
4668 			/*FALLTHROUGH*/
4669 		case 2:		/* Agressive about what to power down */
4670 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4671 			    cls == PCIC_BASEPERIPH)
4672 				return;
4673 			/*FALLTHROUGH*/
4674 		case 3:		/* Power down everything */
4675 			break;
4676 	}
4677 	/*
4678 	 * PCI spec says we can only go into D3 state from D0 state.
4679 	 * Transition from D[12] into D0 before going to D3 state.
4680 	 */
4681 	ps = pci_get_powerstate(dev);
4682 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4683 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4684 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4685 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4686 }
4687 
4688 /* Wrapper APIs suitable for device driver use. */
4689 void
4690 pci_save_state(device_t dev)
4691 {
4692 	struct pci_devinfo *dinfo;
4693 
4694 	dinfo = device_get_ivars(dev);
4695 	pci_cfg_save(dev, dinfo, 0);
4696 }
4697 
4698 void
4699 pci_restore_state(device_t dev)
4700 {
4701 	struct pci_devinfo *dinfo;
4702 
4703 	dinfo = device_get_ivars(dev);
4704 	pci_cfg_restore(dev, dinfo);
4705 }
4706