xref: /freebsd/sys/dev/pci/pci.c (revision ae77177087c655fc883075af4f425b37e032cd05)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 
56 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57 #include <machine/intr_machdep.h>
58 #endif
59 
60 #include <sys/pciio.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/pci/pci_private.h>
64 
65 #include <dev/usb/controller/xhcireg.h>
66 #include <dev/usb/controller/ehcireg.h>
67 #include <dev/usb/controller/ohcireg.h>
68 #include <dev/usb/controller/uhcireg.h>
69 
70 #include "pcib_if.h"
71 #include "pci_if.h"
72 
73 #if (BUS_SPACE_MAXADDR > 0xFFFFFFFF)
74 #define	PCI_DMA_BOUNDARY	0x100000000
75 #endif
76 
77 #define	PCIR_IS_BIOS(cfg, reg)						\
78 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
79 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
80 
81 static pci_addr_t	pci_mapbase(uint64_t mapreg);
82 static const char	*pci_maptype(uint64_t mapreg);
83 static int		pci_mapsize(uint64_t testval);
84 static int		pci_maprange(uint64_t mapreg);
85 static pci_addr_t	pci_rombase(uint64_t mapreg);
86 static int		pci_romsize(uint64_t testval);
87 static void		pci_fixancient(pcicfgregs *cfg);
88 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
89 
90 static int		pci_porten(device_t dev);
91 static int		pci_memen(device_t dev);
92 static void		pci_assign_interrupt(device_t bus, device_t dev,
93 			    int force_route);
94 static int		pci_add_map(device_t bus, device_t dev, int reg,
95 			    struct resource_list *rl, int force, int prefetch);
96 static int		pci_probe(device_t dev);
97 static int		pci_attach(device_t dev);
98 static void		pci_load_vendor_data(void);
99 static int		pci_describe_parse_line(char **ptr, int *vendor,
100 			    int *device, char **desc);
101 static char		*pci_describe_device(device_t dev);
102 static bus_dma_tag_t	pci_get_dma_tag(device_t bus, device_t dev);
103 static int		pci_modevent(module_t mod, int what, void *arg);
104 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
105 			    pcicfgregs *cfg);
106 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
107 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
108 			    int reg, uint32_t *data);
109 #if 0
110 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
111 			    int reg, uint32_t data);
112 #endif
113 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
114 static void		pci_disable_msi(device_t dev);
115 static void		pci_enable_msi(device_t dev, uint64_t address,
116 			    uint16_t data);
117 static void		pci_enable_msix(device_t dev, u_int index,
118 			    uint64_t address, uint32_t data);
119 static void		pci_mask_msix(device_t dev, u_int index);
120 static void		pci_unmask_msix(device_t dev, u_int index);
121 static int		pci_msi_blacklisted(void);
122 static void		pci_resume_msi(device_t dev);
123 static void		pci_resume_msix(device_t dev);
124 static int		pci_remap_intr_method(device_t bus, device_t dev,
125 			    u_int irq);
126 
127 static device_method_t pci_methods[] = {
128 	/* Device interface */
129 	DEVMETHOD(device_probe,		pci_probe),
130 	DEVMETHOD(device_attach,	pci_attach),
131 	DEVMETHOD(device_detach,	bus_generic_detach),
132 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
133 	DEVMETHOD(device_suspend,	pci_suspend),
134 	DEVMETHOD(device_resume,	pci_resume),
135 
136 	/* Bus interface */
137 	DEVMETHOD(bus_print_child,	pci_print_child),
138 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
139 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
140 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
141 	DEVMETHOD(bus_driver_added,	pci_driver_added),
142 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
143 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
144 
145 	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
146 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
147 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
148 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
149 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
150 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
151 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
152 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
153 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
154 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
155 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
156 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
157 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
158 
159 	/* PCI interface */
160 	DEVMETHOD(pci_read_config,	pci_read_config_method),
161 	DEVMETHOD(pci_write_config,	pci_write_config_method),
162 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
163 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
164 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
165 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
166 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
167 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
168 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
169 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
170 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
171 	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
172 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
173 	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
174 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
175 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
176 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
177 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
178 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
179 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
180 
181 	DEVMETHOD_END
182 };
183 
184 DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
185 
186 static devclass_t pci_devclass;
187 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
188 MODULE_VERSION(pci, 1);
189 
190 static char	*pci_vendordata;
191 static size_t	pci_vendordata_size;
192 
193 struct pci_quirk {
194 	uint32_t devid;	/* Vendor/device of the card */
195 	int	type;
196 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
197 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
198 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
199 #define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
200 	int	arg1;
201 	int	arg2;
202 };
203 
204 static const struct pci_quirk const pci_quirks[] = {
205 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
206 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
207 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
208 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
209 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
210 
211 	/*
212 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
213 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
214 	 */
215 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
216 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217 
218 	/*
219 	 * MSI doesn't work on earlier Intel chipsets including
220 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
221 	 */
222 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
223 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
224 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
225 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
226 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
227 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
228 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
229 
230 	/*
231 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
232 	 * bridge.
233 	 */
234 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
235 
236 	/*
237 	 * MSI-X doesn't work with at least LSI SAS1068E passed through by
238 	 * VMware.
239 	 */
240 	{ 0x079015ad, PCI_QUIRK_DISABLE_MSI,	0,	0 },
241 
242 	/*
243 	 * Some virtualization environments emulate an older chipset
244 	 * but support MSI just fine.  QEMU uses the Intel 82440.
245 	 */
246 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
247 
248 	/*
249 	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
250 	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
251 	 * It prevents us from attaching hpet(4) when the bit is unset.
252 	 * Note this quirk only affects SB600 revision A13 and earlier.
253 	 * For SB600 A21 and later, firmware must set the bit to hide it.
254 	 * For SB700 and later, it is unused and hardcoded to zero.
255 	 */
256 	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
257 
258 	{ 0 }
259 };
260 
261 /* map register information */
262 #define	PCI_MAPMEM	0x01	/* memory map */
263 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
264 #define	PCI_MAPPORT	0x04	/* port map */
265 
266 struct devlist pci_devq;
267 uint32_t pci_generation;
268 uint32_t pci_numdevs = 0;
269 static int pcie_chipset, pcix_chipset;
270 
271 /* sysctl vars */
272 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
273 
274 static int pci_enable_io_modes = 1;
275 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
276 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
277     &pci_enable_io_modes, 1,
278     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
279 enable these bits correctly.  We'd like to do this all the time, but there\n\
280 are some peripherals that this causes problems with.");
281 
282 static int pci_do_power_nodriver = 0;
283 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
284 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
285     &pci_do_power_nodriver, 0,
286   "Place a function into D3 state when no driver attaches to it.  0 means\n\
287 disable.  1 means conservatively place devices into D3 state.  2 means\n\
288 agressively place devices into D3 state.  3 means put absolutely everything\n\
289 in D3 state.");
290 
291 int pci_do_power_resume = 1;
292 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
293 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
294     &pci_do_power_resume, 1,
295   "Transition from D3 -> D0 on resume.");
296 
297 int pci_do_power_suspend = 1;
298 TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
299 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
300     &pci_do_power_suspend, 1,
301   "Transition from D0 -> D3 on suspend.");
302 
303 static int pci_do_msi = 1;
304 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
305 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
306     "Enable support for MSI interrupts");
307 
308 static int pci_do_msix = 1;
309 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
310 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
311     "Enable support for MSI-X interrupts");
312 
313 static int pci_honor_msi_blacklist = 1;
314 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
315 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
316     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
317 
318 #if defined(__i386__) || defined(__amd64__)
319 static int pci_usb_takeover = 1;
320 #else
321 static int pci_usb_takeover = 0;
322 #endif
323 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
324 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
325     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
326 Disable this if you depend on BIOS emulation of USB devices, that is\n\
327 you use USB devices (like keyboard or mouse) but do not load USB drivers");
328 
329 /* Find a device_t by bus/slot/function in domain 0 */
330 
331 device_t
332 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
333 {
334 
335 	return (pci_find_dbsf(0, bus, slot, func));
336 }
337 
338 /* Find a device_t by domain/bus/slot/function */
339 
340 device_t
341 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
342 {
343 	struct pci_devinfo *dinfo;
344 
345 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
346 		if ((dinfo->cfg.domain == domain) &&
347 		    (dinfo->cfg.bus == bus) &&
348 		    (dinfo->cfg.slot == slot) &&
349 		    (dinfo->cfg.func == func)) {
350 			return (dinfo->cfg.dev);
351 		}
352 	}
353 
354 	return (NULL);
355 }
356 
357 /* Find a device_t by vendor/device ID */
358 
359 device_t
360 pci_find_device(uint16_t vendor, uint16_t device)
361 {
362 	struct pci_devinfo *dinfo;
363 
364 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
365 		if ((dinfo->cfg.vendor == vendor) &&
366 		    (dinfo->cfg.device == device)) {
367 			return (dinfo->cfg.dev);
368 		}
369 	}
370 
371 	return (NULL);
372 }
373 
374 device_t
375 pci_find_class(uint8_t class, uint8_t subclass)
376 {
377 	struct pci_devinfo *dinfo;
378 
379 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
380 		if (dinfo->cfg.baseclass == class &&
381 		    dinfo->cfg.subclass == subclass) {
382 			return (dinfo->cfg.dev);
383 		}
384 	}
385 
386 	return (NULL);
387 }
388 
389 static int
390 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
391 {
392 	va_list ap;
393 	int retval;
394 
395 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
396 	    cfg->func);
397 	va_start(ap, fmt);
398 	retval += vprintf(fmt, ap);
399 	va_end(ap);
400 	return (retval);
401 }
402 
403 /* return base address of memory or port map */
404 
405 static pci_addr_t
406 pci_mapbase(uint64_t mapreg)
407 {
408 
409 	if (PCI_BAR_MEM(mapreg))
410 		return (mapreg & PCIM_BAR_MEM_BASE);
411 	else
412 		return (mapreg & PCIM_BAR_IO_BASE);
413 }
414 
415 /* return map type of memory or port map */
416 
417 static const char *
418 pci_maptype(uint64_t mapreg)
419 {
420 
421 	if (PCI_BAR_IO(mapreg))
422 		return ("I/O Port");
423 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
424 		return ("Prefetchable Memory");
425 	return ("Memory");
426 }
427 
428 /* return log2 of map size decoded for memory or port map */
429 
430 static int
431 pci_mapsize(uint64_t testval)
432 {
433 	int ln2size;
434 
435 	testval = pci_mapbase(testval);
436 	ln2size = 0;
437 	if (testval != 0) {
438 		while ((testval & 1) == 0)
439 		{
440 			ln2size++;
441 			testval >>= 1;
442 		}
443 	}
444 	return (ln2size);
445 }
446 
447 /* return base address of device ROM */
448 
449 static pci_addr_t
450 pci_rombase(uint64_t mapreg)
451 {
452 
453 	return (mapreg & PCIM_BIOS_ADDR_MASK);
454 }
455 
456 /* return log2 of map size decided for device ROM */
457 
458 static int
459 pci_romsize(uint64_t testval)
460 {
461 	int ln2size;
462 
463 	testval = pci_rombase(testval);
464 	ln2size = 0;
465 	if (testval != 0) {
466 		while ((testval & 1) == 0)
467 		{
468 			ln2size++;
469 			testval >>= 1;
470 		}
471 	}
472 	return (ln2size);
473 }
474 
475 /* return log2 of address range supported by map register */
476 
477 static int
478 pci_maprange(uint64_t mapreg)
479 {
480 	int ln2range = 0;
481 
482 	if (PCI_BAR_IO(mapreg))
483 		ln2range = 32;
484 	else
485 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
486 		case PCIM_BAR_MEM_32:
487 			ln2range = 32;
488 			break;
489 		case PCIM_BAR_MEM_1MB:
490 			ln2range = 20;
491 			break;
492 		case PCIM_BAR_MEM_64:
493 			ln2range = 64;
494 			break;
495 		}
496 	return (ln2range);
497 }
498 
499 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
500 
501 static void
502 pci_fixancient(pcicfgregs *cfg)
503 {
504 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
505 		return;
506 
507 	/* PCI to PCI bridges use header type 1 */
508 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
509 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
510 }
511 
512 /* extract header type specific config data */
513 
514 static void
515 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
516 {
517 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
518 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
519 	case PCIM_HDRTYPE_NORMAL:
520 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
521 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
522 		cfg->nummaps	    = PCI_MAXMAPS_0;
523 		break;
524 	case PCIM_HDRTYPE_BRIDGE:
525 		cfg->nummaps	    = PCI_MAXMAPS_1;
526 		break;
527 	case PCIM_HDRTYPE_CARDBUS:
528 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
529 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
530 		cfg->nummaps	    = PCI_MAXMAPS_2;
531 		break;
532 	}
533 #undef REG
534 }
535 
536 /* read configuration header into pcicfgregs structure */
537 struct pci_devinfo *
538 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
539 {
540 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
541 	pcicfgregs *cfg = NULL;
542 	struct pci_devinfo *devlist_entry;
543 	struct devlist *devlist_head;
544 
545 	devlist_head = &pci_devq;
546 
547 	devlist_entry = NULL;
548 
549 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
550 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
551 		if (devlist_entry == NULL)
552 			return (NULL);
553 
554 		cfg = &devlist_entry->cfg;
555 
556 		cfg->domain		= d;
557 		cfg->bus		= b;
558 		cfg->slot		= s;
559 		cfg->func		= f;
560 		cfg->vendor		= REG(PCIR_VENDOR, 2);
561 		cfg->device		= REG(PCIR_DEVICE, 2);
562 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
563 		cfg->statreg		= REG(PCIR_STATUS, 2);
564 		cfg->baseclass		= REG(PCIR_CLASS, 1);
565 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
566 		cfg->progif		= REG(PCIR_PROGIF, 1);
567 		cfg->revid		= REG(PCIR_REVID, 1);
568 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
569 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
570 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
571 		cfg->intpin		= REG(PCIR_INTPIN, 1);
572 		cfg->intline		= REG(PCIR_INTLINE, 1);
573 
574 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
575 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
576 
577 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
578 		cfg->hdrtype		&= ~PCIM_MFDEV;
579 		STAILQ_INIT(&cfg->maps);
580 
581 		pci_fixancient(cfg);
582 		pci_hdrtypedata(pcib, b, s, f, cfg);
583 
584 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
585 			pci_read_cap(pcib, cfg);
586 
587 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
588 
589 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
590 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
591 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
592 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
593 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
594 
595 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
596 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
597 		devlist_entry->conf.pc_vendor = cfg->vendor;
598 		devlist_entry->conf.pc_device = cfg->device;
599 
600 		devlist_entry->conf.pc_class = cfg->baseclass;
601 		devlist_entry->conf.pc_subclass = cfg->subclass;
602 		devlist_entry->conf.pc_progif = cfg->progif;
603 		devlist_entry->conf.pc_revid = cfg->revid;
604 
605 		pci_numdevs++;
606 		pci_generation++;
607 	}
608 	return (devlist_entry);
609 #undef REG
610 }
611 
612 static void
613 pci_read_cap(device_t pcib, pcicfgregs *cfg)
614 {
615 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
616 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
617 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
618 	uint64_t addr;
619 #endif
620 	uint32_t val;
621 	int	ptr, nextptr, ptrptr;
622 
623 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
624 	case PCIM_HDRTYPE_NORMAL:
625 	case PCIM_HDRTYPE_BRIDGE:
626 		ptrptr = PCIR_CAP_PTR;
627 		break;
628 	case PCIM_HDRTYPE_CARDBUS:
629 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
630 		break;
631 	default:
632 		return;		/* no extended capabilities support */
633 	}
634 	nextptr = REG(ptrptr, 1);	/* sanity check? */
635 
636 	/*
637 	 * Read capability entries.
638 	 */
639 	while (nextptr != 0) {
640 		/* Sanity check */
641 		if (nextptr > 255) {
642 			printf("illegal PCI extended capability offset %d\n",
643 			    nextptr);
644 			return;
645 		}
646 		/* Find the next entry */
647 		ptr = nextptr;
648 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
649 
650 		/* Process this entry */
651 		switch (REG(ptr + PCICAP_ID, 1)) {
652 		case PCIY_PMG:		/* PCI power management */
653 			if (cfg->pp.pp_cap == 0) {
654 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
655 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
656 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
657 				if ((nextptr - ptr) > PCIR_POWER_DATA)
658 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
659 			}
660 			break;
661 		case PCIY_HT:		/* HyperTransport */
662 			/* Determine HT-specific capability type. */
663 			val = REG(ptr + PCIR_HT_COMMAND, 2);
664 
665 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
666 				cfg->ht.ht_slave = ptr;
667 
668 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
669 			switch (val & PCIM_HTCMD_CAP_MASK) {
670 			case PCIM_HTCAP_MSI_MAPPING:
671 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
672 					/* Sanity check the mapping window. */
673 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
674 					    4);
675 					addr <<= 32;
676 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
677 					    4);
678 					if (addr != MSI_INTEL_ADDR_BASE)
679 						device_printf(pcib,
680 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
681 						    cfg->domain, cfg->bus,
682 						    cfg->slot, cfg->func,
683 						    (long long)addr);
684 				} else
685 					addr = MSI_INTEL_ADDR_BASE;
686 
687 				cfg->ht.ht_msimap = ptr;
688 				cfg->ht.ht_msictrl = val;
689 				cfg->ht.ht_msiaddr = addr;
690 				break;
691 			}
692 #endif
693 			break;
694 		case PCIY_MSI:		/* PCI MSI */
695 			cfg->msi.msi_location = ptr;
696 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
697 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
698 						     PCIM_MSICTRL_MMC_MASK)>>1);
699 			break;
700 		case PCIY_MSIX:		/* PCI MSI-X */
701 			cfg->msix.msix_location = ptr;
702 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
703 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
704 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
705 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
706 			cfg->msix.msix_table_bar = PCIR_BAR(val &
707 			    PCIM_MSIX_BIR_MASK);
708 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
709 			val = REG(ptr + PCIR_MSIX_PBA, 4);
710 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
711 			    PCIM_MSIX_BIR_MASK);
712 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
713 			break;
714 		case PCIY_VPD:		/* PCI Vital Product Data */
715 			cfg->vpd.vpd_reg = ptr;
716 			break;
717 		case PCIY_SUBVENDOR:
718 			/* Should always be true. */
719 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
720 			    PCIM_HDRTYPE_BRIDGE) {
721 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
722 				cfg->subvendor = val & 0xffff;
723 				cfg->subdevice = val >> 16;
724 			}
725 			break;
726 		case PCIY_PCIX:		/* PCI-X */
727 			/*
728 			 * Assume we have a PCI-X chipset if we have
729 			 * at least one PCI-PCI bridge with a PCI-X
730 			 * capability.  Note that some systems with
731 			 * PCI-express or HT chipsets might match on
732 			 * this check as well.
733 			 */
734 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
735 			    PCIM_HDRTYPE_BRIDGE)
736 				pcix_chipset = 1;
737 			cfg->pcix.pcix_location = ptr;
738 			break;
739 		case PCIY_EXPRESS:	/* PCI-express */
740 			/*
741 			 * Assume we have a PCI-express chipset if we have
742 			 * at least one PCI-express device.
743 			 */
744 			pcie_chipset = 1;
745 			cfg->pcie.pcie_location = ptr;
746 			val = REG(ptr + PCIR_EXPRESS_FLAGS, 2);
747 			cfg->pcie.pcie_type = val & PCIM_EXP_FLAGS_TYPE;
748 			break;
749 		default:
750 			break;
751 		}
752 	}
753 
754 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
755 	/*
756 	 * Enable the MSI mapping window for all HyperTransport
757 	 * slaves.  PCI-PCI bridges have their windows enabled via
758 	 * PCIB_MAP_MSI().
759 	 */
760 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
761 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
762 		device_printf(pcib,
763 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
764 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
765 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
766 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
767 		     2);
768 	}
769 #endif
770 /* REG and WREG use carry through to next functions */
771 }
772 
773 /*
774  * PCI Vital Product Data
775  */
776 
777 #define	PCI_VPD_TIMEOUT		1000000
778 
779 static int
780 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
781 {
782 	int count = PCI_VPD_TIMEOUT;
783 
784 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
785 
786 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
787 
788 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
789 		if (--count < 0)
790 			return (ENXIO);
791 		DELAY(1);	/* limit looping */
792 	}
793 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
794 
795 	return (0);
796 }
797 
798 #if 0
799 static int
800 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
801 {
802 	int count = PCI_VPD_TIMEOUT;
803 
804 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
805 
806 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
807 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
808 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
809 		if (--count < 0)
810 			return (ENXIO);
811 		DELAY(1);	/* limit looping */
812 	}
813 
814 	return (0);
815 }
816 #endif
817 
818 #undef PCI_VPD_TIMEOUT
819 
820 struct vpd_readstate {
821 	device_t	pcib;
822 	pcicfgregs	*cfg;
823 	uint32_t	val;
824 	int		bytesinval;
825 	int		off;
826 	uint8_t		cksum;
827 };
828 
829 static int
830 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
831 {
832 	uint32_t reg;
833 	uint8_t byte;
834 
835 	if (vrs->bytesinval == 0) {
836 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
837 			return (ENXIO);
838 		vrs->val = le32toh(reg);
839 		vrs->off += 4;
840 		byte = vrs->val & 0xff;
841 		vrs->bytesinval = 3;
842 	} else {
843 		vrs->val = vrs->val >> 8;
844 		byte = vrs->val & 0xff;
845 		vrs->bytesinval--;
846 	}
847 
848 	vrs->cksum += byte;
849 	*data = byte;
850 	return (0);
851 }
852 
853 static void
854 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
855 {
856 	struct vpd_readstate vrs;
857 	int state;
858 	int name;
859 	int remain;
860 	int i;
861 	int alloc, off;		/* alloc/off for RO/W arrays */
862 	int cksumvalid;
863 	int dflen;
864 	uint8_t byte;
865 	uint8_t byte2;
866 
867 	/* init vpd reader */
868 	vrs.bytesinval = 0;
869 	vrs.off = 0;
870 	vrs.pcib = pcib;
871 	vrs.cfg = cfg;
872 	vrs.cksum = 0;
873 
874 	state = 0;
875 	name = remain = i = 0;	/* shut up stupid gcc */
876 	alloc = off = 0;	/* shut up stupid gcc */
877 	dflen = 0;		/* shut up stupid gcc */
878 	cksumvalid = -1;
879 	while (state >= 0) {
880 		if (vpd_nextbyte(&vrs, &byte)) {
881 			state = -2;
882 			break;
883 		}
884 #if 0
885 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
886 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
887 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
888 #endif
889 		switch (state) {
890 		case 0:		/* item name */
891 			if (byte & 0x80) {
892 				if (vpd_nextbyte(&vrs, &byte2)) {
893 					state = -2;
894 					break;
895 				}
896 				remain = byte2;
897 				if (vpd_nextbyte(&vrs, &byte2)) {
898 					state = -2;
899 					break;
900 				}
901 				remain |= byte2 << 8;
902 				if (remain > (0x7f*4 - vrs.off)) {
903 					state = -1;
904 					pci_printf(cfg,
905 					    "invalid VPD data, remain %#x\n",
906 					    remain);
907 				}
908 				name = byte & 0x7f;
909 			} else {
910 				remain = byte & 0x7;
911 				name = (byte >> 3) & 0xf;
912 			}
913 			switch (name) {
914 			case 0x2:	/* String */
915 				cfg->vpd.vpd_ident = malloc(remain + 1,
916 				    M_DEVBUF, M_WAITOK);
917 				i = 0;
918 				state = 1;
919 				break;
920 			case 0xf:	/* End */
921 				state = -1;
922 				break;
923 			case 0x10:	/* VPD-R */
924 				alloc = 8;
925 				off = 0;
926 				cfg->vpd.vpd_ros = malloc(alloc *
927 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
928 				    M_WAITOK | M_ZERO);
929 				state = 2;
930 				break;
931 			case 0x11:	/* VPD-W */
932 				alloc = 8;
933 				off = 0;
934 				cfg->vpd.vpd_w = malloc(alloc *
935 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
936 				    M_WAITOK | M_ZERO);
937 				state = 5;
938 				break;
939 			default:	/* Invalid data, abort */
940 				state = -1;
941 				break;
942 			}
943 			break;
944 
945 		case 1:	/* Identifier String */
946 			cfg->vpd.vpd_ident[i++] = byte;
947 			remain--;
948 			if (remain == 0)  {
949 				cfg->vpd.vpd_ident[i] = '\0';
950 				state = 0;
951 			}
952 			break;
953 
954 		case 2:	/* VPD-R Keyword Header */
955 			if (off == alloc) {
956 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
957 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
958 				    M_DEVBUF, M_WAITOK | M_ZERO);
959 			}
960 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
961 			if (vpd_nextbyte(&vrs, &byte2)) {
962 				state = -2;
963 				break;
964 			}
965 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
966 			if (vpd_nextbyte(&vrs, &byte2)) {
967 				state = -2;
968 				break;
969 			}
970 			dflen = byte2;
971 			if (dflen == 0 &&
972 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
973 			    2) == 0) {
974 				/*
975 				 * if this happens, we can't trust the rest
976 				 * of the VPD.
977 				 */
978 				pci_printf(cfg, "bad keyword length: %d\n",
979 				    dflen);
980 				cksumvalid = 0;
981 				state = -1;
982 				break;
983 			} else if (dflen == 0) {
984 				cfg->vpd.vpd_ros[off].value = malloc(1 *
985 				    sizeof(*cfg->vpd.vpd_ros[off].value),
986 				    M_DEVBUF, M_WAITOK);
987 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
988 			} else
989 				cfg->vpd.vpd_ros[off].value = malloc(
990 				    (dflen + 1) *
991 				    sizeof(*cfg->vpd.vpd_ros[off].value),
992 				    M_DEVBUF, M_WAITOK);
993 			remain -= 3;
994 			i = 0;
995 			/* keep in sync w/ state 3's transistions */
996 			if (dflen == 0 && remain == 0)
997 				state = 0;
998 			else if (dflen == 0)
999 				state = 2;
1000 			else
1001 				state = 3;
1002 			break;
1003 
1004 		case 3:	/* VPD-R Keyword Value */
1005 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1006 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1007 			    "RV", 2) == 0 && cksumvalid == -1) {
1008 				if (vrs.cksum == 0)
1009 					cksumvalid = 1;
1010 				else {
1011 					if (bootverbose)
1012 						pci_printf(cfg,
1013 					    "bad VPD cksum, remain %hhu\n",
1014 						    vrs.cksum);
1015 					cksumvalid = 0;
1016 					state = -1;
1017 					break;
1018 				}
1019 			}
1020 			dflen--;
1021 			remain--;
1022 			/* keep in sync w/ state 2's transistions */
1023 			if (dflen == 0)
1024 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1025 			if (dflen == 0 && remain == 0) {
1026 				cfg->vpd.vpd_rocnt = off;
1027 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1028 				    off * sizeof(*cfg->vpd.vpd_ros),
1029 				    M_DEVBUF, M_WAITOK | M_ZERO);
1030 				state = 0;
1031 			} else if (dflen == 0)
1032 				state = 2;
1033 			break;
1034 
1035 		case 4:
1036 			remain--;
1037 			if (remain == 0)
1038 				state = 0;
1039 			break;
1040 
1041 		case 5:	/* VPD-W Keyword Header */
1042 			if (off == alloc) {
1043 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1044 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1045 				    M_DEVBUF, M_WAITOK | M_ZERO);
1046 			}
1047 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1048 			if (vpd_nextbyte(&vrs, &byte2)) {
1049 				state = -2;
1050 				break;
1051 			}
1052 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1053 			if (vpd_nextbyte(&vrs, &byte2)) {
1054 				state = -2;
1055 				break;
1056 			}
1057 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1058 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1059 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1060 			    sizeof(*cfg->vpd.vpd_w[off].value),
1061 			    M_DEVBUF, M_WAITOK);
1062 			remain -= 3;
1063 			i = 0;
1064 			/* keep in sync w/ state 6's transistions */
1065 			if (dflen == 0 && remain == 0)
1066 				state = 0;
1067 			else if (dflen == 0)
1068 				state = 5;
1069 			else
1070 				state = 6;
1071 			break;
1072 
1073 		case 6:	/* VPD-W Keyword Value */
1074 			cfg->vpd.vpd_w[off].value[i++] = byte;
1075 			dflen--;
1076 			remain--;
1077 			/* keep in sync w/ state 5's transistions */
1078 			if (dflen == 0)
1079 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1080 			if (dflen == 0 && remain == 0) {
1081 				cfg->vpd.vpd_wcnt = off;
1082 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1083 				    off * sizeof(*cfg->vpd.vpd_w),
1084 				    M_DEVBUF, M_WAITOK | M_ZERO);
1085 				state = 0;
1086 			} else if (dflen == 0)
1087 				state = 5;
1088 			break;
1089 
1090 		default:
1091 			pci_printf(cfg, "invalid state: %d\n", state);
1092 			state = -1;
1093 			break;
1094 		}
1095 	}
1096 
1097 	if (cksumvalid == 0 || state < -1) {
1098 		/* read-only data bad, clean up */
1099 		if (cfg->vpd.vpd_ros != NULL) {
1100 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1101 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1102 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1103 			cfg->vpd.vpd_ros = NULL;
1104 		}
1105 	}
1106 	if (state < -1) {
1107 		/* I/O error, clean up */
1108 		pci_printf(cfg, "failed to read VPD data.\n");
1109 		if (cfg->vpd.vpd_ident != NULL) {
1110 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1111 			cfg->vpd.vpd_ident = NULL;
1112 		}
1113 		if (cfg->vpd.vpd_w != NULL) {
1114 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1115 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1116 			free(cfg->vpd.vpd_w, M_DEVBUF);
1117 			cfg->vpd.vpd_w = NULL;
1118 		}
1119 	}
1120 	cfg->vpd.vpd_cached = 1;
1121 #undef REG
1122 #undef WREG
1123 }
1124 
1125 int
1126 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1127 {
1128 	struct pci_devinfo *dinfo = device_get_ivars(child);
1129 	pcicfgregs *cfg = &dinfo->cfg;
1130 
1131 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1132 		pci_read_vpd(device_get_parent(dev), cfg);
1133 
1134 	*identptr = cfg->vpd.vpd_ident;
1135 
1136 	if (*identptr == NULL)
1137 		return (ENXIO);
1138 
1139 	return (0);
1140 }
1141 
1142 int
1143 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1144 	const char **vptr)
1145 {
1146 	struct pci_devinfo *dinfo = device_get_ivars(child);
1147 	pcicfgregs *cfg = &dinfo->cfg;
1148 	int i;
1149 
1150 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1151 		pci_read_vpd(device_get_parent(dev), cfg);
1152 
1153 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1154 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1155 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1156 			*vptr = cfg->vpd.vpd_ros[i].value;
1157 			return (0);
1158 		}
1159 
1160 	*vptr = NULL;
1161 	return (ENXIO);
1162 }
1163 
1164 /*
1165  * Find the requested HyperTransport capability and return the offset
1166  * in configuration space via the pointer provided.  The function
1167  * returns 0 on success and an error code otherwise.
1168  */
1169 int
1170 pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1171 {
1172 	int ptr, error;
1173 	uint16_t val;
1174 
1175 	error = pci_find_cap(child, PCIY_HT, &ptr);
1176 	if (error)
1177 		return (error);
1178 
1179 	/*
1180 	 * Traverse the capabilities list checking each HT capability
1181 	 * to see if it matches the requested HT capability.
1182 	 */
1183 	while (ptr != 0) {
1184 		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1185 		if (capability == PCIM_HTCAP_SLAVE ||
1186 		    capability == PCIM_HTCAP_HOST)
1187 			val &= 0xe000;
1188 		else
1189 			val &= PCIM_HTCMD_CAP_MASK;
1190 		if (val == capability) {
1191 			if (capreg != NULL)
1192 				*capreg = ptr;
1193 			return (0);
1194 		}
1195 
1196 		/* Skip to the next HT capability. */
1197 		while (ptr != 0) {
1198 			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1199 			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1200 			    PCIY_HT)
1201 				break;
1202 		}
1203 	}
1204 	return (ENOENT);
1205 }
1206 
1207 /*
1208  * Find the requested capability and return the offset in
1209  * configuration space via the pointer provided.  The function returns
1210  * 0 on success and an error code otherwise.
1211  */
1212 int
1213 pci_find_cap_method(device_t dev, device_t child, int capability,
1214     int *capreg)
1215 {
1216 	struct pci_devinfo *dinfo = device_get_ivars(child);
1217 	pcicfgregs *cfg = &dinfo->cfg;
1218 	u_int32_t status;
1219 	u_int8_t ptr;
1220 
1221 	/*
1222 	 * Check the CAP_LIST bit of the PCI status register first.
1223 	 */
1224 	status = pci_read_config(child, PCIR_STATUS, 2);
1225 	if (!(status & PCIM_STATUS_CAPPRESENT))
1226 		return (ENXIO);
1227 
1228 	/*
1229 	 * Determine the start pointer of the capabilities list.
1230 	 */
1231 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1232 	case PCIM_HDRTYPE_NORMAL:
1233 	case PCIM_HDRTYPE_BRIDGE:
1234 		ptr = PCIR_CAP_PTR;
1235 		break;
1236 	case PCIM_HDRTYPE_CARDBUS:
1237 		ptr = PCIR_CAP_PTR_2;
1238 		break;
1239 	default:
1240 		/* XXX: panic? */
1241 		return (ENXIO);		/* no extended capabilities support */
1242 	}
1243 	ptr = pci_read_config(child, ptr, 1);
1244 
1245 	/*
1246 	 * Traverse the capabilities list.
1247 	 */
1248 	while (ptr != 0) {
1249 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1250 			if (capreg != NULL)
1251 				*capreg = ptr;
1252 			return (0);
1253 		}
1254 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1255 	}
1256 
1257 	return (ENOENT);
1258 }
1259 
1260 /*
1261  * Find the requested extended capability and return the offset in
1262  * configuration space via the pointer provided.  The function returns
1263  * 0 on success and an error code otherwise.
1264  */
1265 int
1266 pci_find_extcap_method(device_t dev, device_t child, int capability,
1267     int *capreg)
1268 {
1269 	struct pci_devinfo *dinfo = device_get_ivars(child);
1270 	pcicfgregs *cfg = &dinfo->cfg;
1271 	uint32_t ecap;
1272 	uint16_t ptr;
1273 
1274 	/* Only supported for PCI-express devices. */
1275 	if (cfg->pcie.pcie_location == 0)
1276 		return (ENXIO);
1277 
1278 	ptr = PCIR_EXTCAP;
1279 	ecap = pci_read_config(child, ptr, 4);
1280 	if (ecap == 0xffffffff || ecap == 0)
1281 		return (ENOENT);
1282 	for (;;) {
1283 		if (PCI_EXTCAP_ID(ecap) == capability) {
1284 			if (capreg != NULL)
1285 				*capreg = ptr;
1286 			return (0);
1287 		}
1288 		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1289 		if (ptr == 0)
1290 			break;
1291 		ecap = pci_read_config(child, ptr, 4);
1292 	}
1293 
1294 	return (ENOENT);
1295 }
1296 
1297 /*
1298  * Support for MSI-X message interrupts.
1299  */
1300 void
1301 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1302 {
1303 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1304 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1305 	uint32_t offset;
1306 
1307 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1308 	offset = msix->msix_table_offset + index * 16;
1309 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1310 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1311 	bus_write_4(msix->msix_table_res, offset + 8, data);
1312 
1313 	/* Enable MSI -> HT mapping. */
1314 	pci_ht_map_msi(dev, address);
1315 }
1316 
1317 void
1318 pci_mask_msix(device_t dev, u_int index)
1319 {
1320 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1321 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1322 	uint32_t offset, val;
1323 
1324 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1325 	offset = msix->msix_table_offset + index * 16 + 12;
1326 	val = bus_read_4(msix->msix_table_res, offset);
1327 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1328 		val |= PCIM_MSIX_VCTRL_MASK;
1329 		bus_write_4(msix->msix_table_res, offset, val);
1330 	}
1331 }
1332 
1333 void
1334 pci_unmask_msix(device_t dev, u_int index)
1335 {
1336 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1337 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1338 	uint32_t offset, val;
1339 
1340 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1341 	offset = msix->msix_table_offset + index * 16 + 12;
1342 	val = bus_read_4(msix->msix_table_res, offset);
1343 	if (val & PCIM_MSIX_VCTRL_MASK) {
1344 		val &= ~PCIM_MSIX_VCTRL_MASK;
1345 		bus_write_4(msix->msix_table_res, offset, val);
1346 	}
1347 }
1348 
1349 int
1350 pci_pending_msix(device_t dev, u_int index)
1351 {
1352 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1353 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1354 	uint32_t offset, bit;
1355 
1356 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1357 	offset = msix->msix_pba_offset + (index / 32) * 4;
1358 	bit = 1 << index % 32;
1359 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1360 }
1361 
1362 /*
1363  * Restore MSI-X registers and table during resume.  If MSI-X is
1364  * enabled then walk the virtual table to restore the actual MSI-X
1365  * table.
1366  */
1367 static void
1368 pci_resume_msix(device_t dev)
1369 {
1370 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1371 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1372 	struct msix_table_entry *mte;
1373 	struct msix_vector *mv;
1374 	int i;
1375 
1376 	if (msix->msix_alloc > 0) {
1377 		/* First, mask all vectors. */
1378 		for (i = 0; i < msix->msix_msgnum; i++)
1379 			pci_mask_msix(dev, i);
1380 
1381 		/* Second, program any messages with at least one handler. */
1382 		for (i = 0; i < msix->msix_table_len; i++) {
1383 			mte = &msix->msix_table[i];
1384 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1385 				continue;
1386 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1387 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1388 			pci_unmask_msix(dev, i);
1389 		}
1390 	}
1391 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1392 	    msix->msix_ctrl, 2);
1393 }
1394 
1395 /*
1396  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1397  * returned in *count.  After this function returns, each message will be
1398  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1399  */
1400 int
1401 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1402 {
1403 	struct pci_devinfo *dinfo = device_get_ivars(child);
1404 	pcicfgregs *cfg = &dinfo->cfg;
1405 	struct resource_list_entry *rle;
1406 	int actual, error, i, irq, max;
1407 
1408 	/* Don't let count == 0 get us into trouble. */
1409 	if (*count == 0)
1410 		return (EINVAL);
1411 
1412 	/* If rid 0 is allocated, then fail. */
1413 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1414 	if (rle != NULL && rle->res != NULL)
1415 		return (ENXIO);
1416 
1417 	/* Already have allocated messages? */
1418 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1419 		return (ENXIO);
1420 
1421 	/* If MSI is blacklisted for this system, fail. */
1422 	if (pci_msi_blacklisted())
1423 		return (ENXIO);
1424 
1425 	/* MSI-X capability present? */
1426 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1427 		return (ENODEV);
1428 
1429 	/* Make sure the appropriate BARs are mapped. */
1430 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1431 	    cfg->msix.msix_table_bar);
1432 	if (rle == NULL || rle->res == NULL ||
1433 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1434 		return (ENXIO);
1435 	cfg->msix.msix_table_res = rle->res;
1436 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1437 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1438 		    cfg->msix.msix_pba_bar);
1439 		if (rle == NULL || rle->res == NULL ||
1440 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1441 			return (ENXIO);
1442 	}
1443 	cfg->msix.msix_pba_res = rle->res;
1444 
1445 	if (bootverbose)
1446 		device_printf(child,
1447 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1448 		    *count, cfg->msix.msix_msgnum);
1449 	max = min(*count, cfg->msix.msix_msgnum);
1450 	for (i = 0; i < max; i++) {
1451 		/* Allocate a message. */
1452 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1453 		if (error) {
1454 			if (i == 0)
1455 				return (error);
1456 			break;
1457 		}
1458 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1459 		    irq, 1);
1460 	}
1461 	actual = i;
1462 
1463 	if (bootverbose) {
1464 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1465 		if (actual == 1)
1466 			device_printf(child, "using IRQ %lu for MSI-X\n",
1467 			    rle->start);
1468 		else {
1469 			int run;
1470 
1471 			/*
1472 			 * Be fancy and try to print contiguous runs of
1473 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1474 			 * 'run' is true if we are in a range.
1475 			 */
1476 			device_printf(child, "using IRQs %lu", rle->start);
1477 			irq = rle->start;
1478 			run = 0;
1479 			for (i = 1; i < actual; i++) {
1480 				rle = resource_list_find(&dinfo->resources,
1481 				    SYS_RES_IRQ, i + 1);
1482 
1483 				/* Still in a run? */
1484 				if (rle->start == irq + 1) {
1485 					run = 1;
1486 					irq++;
1487 					continue;
1488 				}
1489 
1490 				/* Finish previous range. */
1491 				if (run) {
1492 					printf("-%d", irq);
1493 					run = 0;
1494 				}
1495 
1496 				/* Start new range. */
1497 				printf(",%lu", rle->start);
1498 				irq = rle->start;
1499 			}
1500 
1501 			/* Unfinished range? */
1502 			if (run)
1503 				printf("-%d", irq);
1504 			printf(" for MSI-X\n");
1505 		}
1506 	}
1507 
1508 	/* Mask all vectors. */
1509 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1510 		pci_mask_msix(child, i);
1511 
1512 	/* Allocate and initialize vector data and virtual table. */
1513 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1514 	    M_DEVBUF, M_WAITOK | M_ZERO);
1515 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1516 	    M_DEVBUF, M_WAITOK | M_ZERO);
1517 	for (i = 0; i < actual; i++) {
1518 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1519 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1520 		cfg->msix.msix_table[i].mte_vector = i + 1;
1521 	}
1522 
1523 	/* Update control register to enable MSI-X. */
1524 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1525 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1526 	    cfg->msix.msix_ctrl, 2);
1527 
1528 	/* Update counts of alloc'd messages. */
1529 	cfg->msix.msix_alloc = actual;
1530 	cfg->msix.msix_table_len = actual;
1531 	*count = actual;
1532 	return (0);
1533 }
1534 
1535 /*
1536  * By default, pci_alloc_msix() will assign the allocated IRQ
1537  * resources consecutively to the first N messages in the MSI-X table.
1538  * However, device drivers may want to use different layouts if they
1539  * either receive fewer messages than they asked for, or they wish to
1540  * populate the MSI-X table sparsely.  This method allows the driver
1541  * to specify what layout it wants.  It must be called after a
1542  * successful pci_alloc_msix() but before any of the associated
1543  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1544  *
1545  * The 'vectors' array contains 'count' message vectors.  The array
1546  * maps directly to the MSI-X table in that index 0 in the array
1547  * specifies the vector for the first message in the MSI-X table, etc.
1548  * The vector value in each array index can either be 0 to indicate
1549  * that no vector should be assigned to a message slot, or it can be a
1550  * number from 1 to N (where N is the count returned from a
1551  * succcessful call to pci_alloc_msix()) to indicate which message
1552  * vector (IRQ) to be used for the corresponding message.
1553  *
1554  * On successful return, each message with a non-zero vector will have
1555  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1556  * 1.  Additionally, if any of the IRQs allocated via the previous
1557  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1558  * will be freed back to the system automatically.
1559  *
1560  * For example, suppose a driver has a MSI-X table with 6 messages and
1561  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1562  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1563  * C.  After the call to pci_alloc_msix(), the device will be setup to
1564  * have an MSI-X table of ABC--- (where - means no vector assigned).
1565  * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1566  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1567  * be freed back to the system.  This device will also have valid
1568  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1569  *
1570  * In any case, the SYS_RES_IRQ rid X will always map to the message
1571  * at MSI-X table index X - 1 and will only be valid if a vector is
1572  * assigned to that table entry.
1573  */
1574 int
1575 pci_remap_msix_method(device_t dev, device_t child, int count,
1576     const u_int *vectors)
1577 {
1578 	struct pci_devinfo *dinfo = device_get_ivars(child);
1579 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1580 	struct resource_list_entry *rle;
1581 	int i, irq, j, *used;
1582 
1583 	/*
1584 	 * Have to have at least one message in the table but the
1585 	 * table can't be bigger than the actual MSI-X table in the
1586 	 * device.
1587 	 */
1588 	if (count == 0 || count > msix->msix_msgnum)
1589 		return (EINVAL);
1590 
1591 	/* Sanity check the vectors. */
1592 	for (i = 0; i < count; i++)
1593 		if (vectors[i] > msix->msix_alloc)
1594 			return (EINVAL);
1595 
1596 	/*
1597 	 * Make sure there aren't any holes in the vectors to be used.
1598 	 * It's a big pain to support it, and it doesn't really make
1599 	 * sense anyway.  Also, at least one vector must be used.
1600 	 */
1601 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1602 	    M_ZERO);
1603 	for (i = 0; i < count; i++)
1604 		if (vectors[i] != 0)
1605 			used[vectors[i] - 1] = 1;
1606 	for (i = 0; i < msix->msix_alloc - 1; i++)
1607 		if (used[i] == 0 && used[i + 1] == 1) {
1608 			free(used, M_DEVBUF);
1609 			return (EINVAL);
1610 		}
1611 	if (used[0] != 1) {
1612 		free(used, M_DEVBUF);
1613 		return (EINVAL);
1614 	}
1615 
1616 	/* Make sure none of the resources are allocated. */
1617 	for (i = 0; i < msix->msix_table_len; i++) {
1618 		if (msix->msix_table[i].mte_vector == 0)
1619 			continue;
1620 		if (msix->msix_table[i].mte_handlers > 0)
1621 			return (EBUSY);
1622 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1623 		KASSERT(rle != NULL, ("missing resource"));
1624 		if (rle->res != NULL)
1625 			return (EBUSY);
1626 	}
1627 
1628 	/* Free the existing resource list entries. */
1629 	for (i = 0; i < msix->msix_table_len; i++) {
1630 		if (msix->msix_table[i].mte_vector == 0)
1631 			continue;
1632 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1633 	}
1634 
1635 	/*
1636 	 * Build the new virtual table keeping track of which vectors are
1637 	 * used.
1638 	 */
1639 	free(msix->msix_table, M_DEVBUF);
1640 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1641 	    M_DEVBUF, M_WAITOK | M_ZERO);
1642 	for (i = 0; i < count; i++)
1643 		msix->msix_table[i].mte_vector = vectors[i];
1644 	msix->msix_table_len = count;
1645 
1646 	/* Free any unused IRQs and resize the vectors array if necessary. */
1647 	j = msix->msix_alloc - 1;
1648 	if (used[j] == 0) {
1649 		struct msix_vector *vec;
1650 
1651 		while (used[j] == 0) {
1652 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1653 			    msix->msix_vectors[j].mv_irq);
1654 			j--;
1655 		}
1656 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1657 		    M_WAITOK);
1658 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1659 		    (j + 1));
1660 		free(msix->msix_vectors, M_DEVBUF);
1661 		msix->msix_vectors = vec;
1662 		msix->msix_alloc = j + 1;
1663 	}
1664 	free(used, M_DEVBUF);
1665 
1666 	/* Map the IRQs onto the rids. */
1667 	for (i = 0; i < count; i++) {
1668 		if (vectors[i] == 0)
1669 			continue;
1670 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1671 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1672 		    irq, 1);
1673 	}
1674 
1675 	if (bootverbose) {
1676 		device_printf(child, "Remapped MSI-X IRQs as: ");
1677 		for (i = 0; i < count; i++) {
1678 			if (i != 0)
1679 				printf(", ");
1680 			if (vectors[i] == 0)
1681 				printf("---");
1682 			else
1683 				printf("%d",
1684 				    msix->msix_vectors[vectors[i]].mv_irq);
1685 		}
1686 		printf("\n");
1687 	}
1688 
1689 	return (0);
1690 }
1691 
1692 static int
1693 pci_release_msix(device_t dev, device_t child)
1694 {
1695 	struct pci_devinfo *dinfo = device_get_ivars(child);
1696 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1697 	struct resource_list_entry *rle;
1698 	int i;
1699 
1700 	/* Do we have any messages to release? */
1701 	if (msix->msix_alloc == 0)
1702 		return (ENODEV);
1703 
1704 	/* Make sure none of the resources are allocated. */
1705 	for (i = 0; i < msix->msix_table_len; i++) {
1706 		if (msix->msix_table[i].mte_vector == 0)
1707 			continue;
1708 		if (msix->msix_table[i].mte_handlers > 0)
1709 			return (EBUSY);
1710 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1711 		KASSERT(rle != NULL, ("missing resource"));
1712 		if (rle->res != NULL)
1713 			return (EBUSY);
1714 	}
1715 
1716 	/* Update control register to disable MSI-X. */
1717 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1718 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1719 	    msix->msix_ctrl, 2);
1720 
1721 	/* Free the resource list entries. */
1722 	for (i = 0; i < msix->msix_table_len; i++) {
1723 		if (msix->msix_table[i].mte_vector == 0)
1724 			continue;
1725 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1726 	}
1727 	free(msix->msix_table, M_DEVBUF);
1728 	msix->msix_table_len = 0;
1729 
1730 	/* Release the IRQs. */
1731 	for (i = 0; i < msix->msix_alloc; i++)
1732 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1733 		    msix->msix_vectors[i].mv_irq);
1734 	free(msix->msix_vectors, M_DEVBUF);
1735 	msix->msix_alloc = 0;
1736 	return (0);
1737 }
1738 
1739 /*
1740  * Return the max supported MSI-X messages this device supports.
1741  * Basically, assuming the MD code can alloc messages, this function
1742  * should return the maximum value that pci_alloc_msix() can return.
1743  * Thus, it is subject to the tunables, etc.
1744  */
1745 int
1746 pci_msix_count_method(device_t dev, device_t child)
1747 {
1748 	struct pci_devinfo *dinfo = device_get_ivars(child);
1749 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1750 
1751 	if (pci_do_msix && msix->msix_location != 0)
1752 		return (msix->msix_msgnum);
1753 	return (0);
1754 }
1755 
1756 /*
1757  * HyperTransport MSI mapping control
1758  */
1759 void
1760 pci_ht_map_msi(device_t dev, uint64_t addr)
1761 {
1762 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1763 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1764 
1765 	if (!ht->ht_msimap)
1766 		return;
1767 
1768 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1769 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1770 		/* Enable MSI -> HT mapping. */
1771 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1772 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1773 		    ht->ht_msictrl, 2);
1774 	}
1775 
1776 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1777 		/* Disable MSI -> HT mapping. */
1778 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1779 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1780 		    ht->ht_msictrl, 2);
1781 	}
1782 }
1783 
1784 int
1785 pci_get_max_read_req(device_t dev)
1786 {
1787 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1788 	int cap;
1789 	uint16_t val;
1790 
1791 	cap = dinfo->cfg.pcie.pcie_location;
1792 	if (cap == 0)
1793 		return (0);
1794 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1795 	val &= PCIM_EXP_CTL_MAX_READ_REQUEST;
1796 	val >>= 12;
1797 	return (1 << (val + 7));
1798 }
1799 
1800 int
1801 pci_set_max_read_req(device_t dev, int size)
1802 {
1803 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1804 	int cap;
1805 	uint16_t val;
1806 
1807 	cap = dinfo->cfg.pcie.pcie_location;
1808 	if (cap == 0)
1809 		return (0);
1810 	if (size < 128)
1811 		size = 128;
1812 	if (size > 4096)
1813 		size = 4096;
1814 	size = (1 << (fls(size) - 1));
1815 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1816 	val &= ~PCIM_EXP_CTL_MAX_READ_REQUEST;
1817 	val |= (fls(size) - 8) << 12;
1818 	pci_write_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, val, 2);
1819 	return (size);
1820 }
1821 
1822 /*
1823  * Support for MSI message signalled interrupts.
1824  */
1825 void
1826 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1827 {
1828 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1829 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1830 
1831 	/* Write data and address values. */
1832 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1833 	    address & 0xffffffff, 4);
1834 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1835 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1836 		    address >> 32, 4);
1837 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1838 		    data, 2);
1839 	} else
1840 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1841 		    2);
1842 
1843 	/* Enable MSI in the control register. */
1844 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1845 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1846 	    2);
1847 
1848 	/* Enable MSI -> HT mapping. */
1849 	pci_ht_map_msi(dev, address);
1850 }
1851 
1852 void
1853 pci_disable_msi(device_t dev)
1854 {
1855 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1856 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1857 
1858 	/* Disable MSI -> HT mapping. */
1859 	pci_ht_map_msi(dev, 0);
1860 
1861 	/* Disable MSI in the control register. */
1862 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1863 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1864 	    2);
1865 }
1866 
1867 /*
1868  * Restore MSI registers during resume.  If MSI is enabled then
1869  * restore the data and address registers in addition to the control
1870  * register.
1871  */
1872 static void
1873 pci_resume_msi(device_t dev)
1874 {
1875 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1876 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1877 	uint64_t address;
1878 	uint16_t data;
1879 
1880 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1881 		address = msi->msi_addr;
1882 		data = msi->msi_data;
1883 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1884 		    address & 0xffffffff, 4);
1885 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1886 			pci_write_config(dev, msi->msi_location +
1887 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1888 			pci_write_config(dev, msi->msi_location +
1889 			    PCIR_MSI_DATA_64BIT, data, 2);
1890 		} else
1891 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1892 			    data, 2);
1893 	}
1894 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1895 	    2);
1896 }
1897 
1898 static int
1899 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1900 {
1901 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1902 	pcicfgregs *cfg = &dinfo->cfg;
1903 	struct resource_list_entry *rle;
1904 	struct msix_table_entry *mte;
1905 	struct msix_vector *mv;
1906 	uint64_t addr;
1907 	uint32_t data;
1908 	int error, i, j;
1909 
1910 	/*
1911 	 * Handle MSI first.  We try to find this IRQ among our list
1912 	 * of MSI IRQs.  If we find it, we request updated address and
1913 	 * data registers and apply the results.
1914 	 */
1915 	if (cfg->msi.msi_alloc > 0) {
1916 
1917 		/* If we don't have any active handlers, nothing to do. */
1918 		if (cfg->msi.msi_handlers == 0)
1919 			return (0);
1920 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1921 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1922 			    i + 1);
1923 			if (rle->start == irq) {
1924 				error = PCIB_MAP_MSI(device_get_parent(bus),
1925 				    dev, irq, &addr, &data);
1926 				if (error)
1927 					return (error);
1928 				pci_disable_msi(dev);
1929 				dinfo->cfg.msi.msi_addr = addr;
1930 				dinfo->cfg.msi.msi_data = data;
1931 				pci_enable_msi(dev, addr, data);
1932 				return (0);
1933 			}
1934 		}
1935 		return (ENOENT);
1936 	}
1937 
1938 	/*
1939 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1940 	 * we request the updated mapping info.  If that works, we go
1941 	 * through all the slots that use this IRQ and update them.
1942 	 */
1943 	if (cfg->msix.msix_alloc > 0) {
1944 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1945 			mv = &cfg->msix.msix_vectors[i];
1946 			if (mv->mv_irq == irq) {
1947 				error = PCIB_MAP_MSI(device_get_parent(bus),
1948 				    dev, irq, &addr, &data);
1949 				if (error)
1950 					return (error);
1951 				mv->mv_address = addr;
1952 				mv->mv_data = data;
1953 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1954 					mte = &cfg->msix.msix_table[j];
1955 					if (mte->mte_vector != i + 1)
1956 						continue;
1957 					if (mte->mte_handlers == 0)
1958 						continue;
1959 					pci_mask_msix(dev, j);
1960 					pci_enable_msix(dev, j, addr, data);
1961 					pci_unmask_msix(dev, j);
1962 				}
1963 			}
1964 		}
1965 		return (ENOENT);
1966 	}
1967 
1968 	return (ENOENT);
1969 }
1970 
1971 /*
1972  * Returns true if the specified device is blacklisted because MSI
1973  * doesn't work.
1974  */
1975 int
1976 pci_msi_device_blacklisted(device_t dev)
1977 {
1978 	const struct pci_quirk *q;
1979 
1980 	if (!pci_honor_msi_blacklist)
1981 		return (0);
1982 
1983 	for (q = &pci_quirks[0]; q->devid; q++) {
1984 		if (q->devid == pci_get_devid(dev) &&
1985 		    q->type == PCI_QUIRK_DISABLE_MSI)
1986 			return (1);
1987 	}
1988 	return (0);
1989 }
1990 
1991 /*
1992  * Returns true if a specified chipset supports MSI when it is
1993  * emulated hardware in a virtual machine.
1994  */
1995 static int
1996 pci_msi_vm_chipset(device_t dev)
1997 {
1998 	const struct pci_quirk *q;
1999 
2000 	for (q = &pci_quirks[0]; q->devid; q++) {
2001 		if (q->devid == pci_get_devid(dev) &&
2002 		    q->type == PCI_QUIRK_ENABLE_MSI_VM)
2003 			return (1);
2004 	}
2005 	return (0);
2006 }
2007 
2008 /*
2009  * Determine if MSI is blacklisted globally on this sytem.  Currently,
2010  * we just check for blacklisted chipsets as represented by the
2011  * host-PCI bridge at device 0:0:0.  In the future, it may become
2012  * necessary to check other system attributes, such as the kenv values
2013  * that give the motherboard manufacturer and model number.
2014  */
2015 static int
2016 pci_msi_blacklisted(void)
2017 {
2018 	device_t dev;
2019 
2020 	if (!pci_honor_msi_blacklist)
2021 		return (0);
2022 
2023 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2024 	if (!(pcie_chipset || pcix_chipset)) {
2025 		if (vm_guest != VM_GUEST_NO) {
2026 			dev = pci_find_bsf(0, 0, 0);
2027 			if (dev != NULL)
2028 				return (pci_msi_vm_chipset(dev) == 0);
2029 		}
2030 		return (1);
2031 	}
2032 
2033 	dev = pci_find_bsf(0, 0, 0);
2034 	if (dev != NULL)
2035 		return (pci_msi_device_blacklisted(dev));
2036 	return (0);
2037 }
2038 
2039 /*
2040  * Attempt to allocate *count MSI messages.  The actual number allocated is
2041  * returned in *count.  After this function returns, each message will be
2042  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2043  */
2044 int
2045 pci_alloc_msi_method(device_t dev, device_t child, int *count)
2046 {
2047 	struct pci_devinfo *dinfo = device_get_ivars(child);
2048 	pcicfgregs *cfg = &dinfo->cfg;
2049 	struct resource_list_entry *rle;
2050 	int actual, error, i, irqs[32];
2051 	uint16_t ctrl;
2052 
2053 	/* Don't let count == 0 get us into trouble. */
2054 	if (*count == 0)
2055 		return (EINVAL);
2056 
2057 	/* If rid 0 is allocated, then fail. */
2058 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2059 	if (rle != NULL && rle->res != NULL)
2060 		return (ENXIO);
2061 
2062 	/* Already have allocated messages? */
2063 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2064 		return (ENXIO);
2065 
2066 	/* If MSI is blacklisted for this system, fail. */
2067 	if (pci_msi_blacklisted())
2068 		return (ENXIO);
2069 
2070 	/* MSI capability present? */
2071 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2072 		return (ENODEV);
2073 
2074 	if (bootverbose)
2075 		device_printf(child,
2076 		    "attempting to allocate %d MSI vectors (%d supported)\n",
2077 		    *count, cfg->msi.msi_msgnum);
2078 
2079 	/* Don't ask for more than the device supports. */
2080 	actual = min(*count, cfg->msi.msi_msgnum);
2081 
2082 	/* Don't ask for more than 32 messages. */
2083 	actual = min(actual, 32);
2084 
2085 	/* MSI requires power of 2 number of messages. */
2086 	if (!powerof2(actual))
2087 		return (EINVAL);
2088 
2089 	for (;;) {
2090 		/* Try to allocate N messages. */
2091 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2092 		    actual, irqs);
2093 		if (error == 0)
2094 			break;
2095 		if (actual == 1)
2096 			return (error);
2097 
2098 		/* Try N / 2. */
2099 		actual >>= 1;
2100 	}
2101 
2102 	/*
2103 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2104 	 * resources in the irqs[] array, so add new resources
2105 	 * starting at rid 1.
2106 	 */
2107 	for (i = 0; i < actual; i++)
2108 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2109 		    irqs[i], irqs[i], 1);
2110 
2111 	if (bootverbose) {
2112 		if (actual == 1)
2113 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2114 		else {
2115 			int run;
2116 
2117 			/*
2118 			 * Be fancy and try to print contiguous runs
2119 			 * of IRQ values as ranges.  'run' is true if
2120 			 * we are in a range.
2121 			 */
2122 			device_printf(child, "using IRQs %d", irqs[0]);
2123 			run = 0;
2124 			for (i = 1; i < actual; i++) {
2125 
2126 				/* Still in a run? */
2127 				if (irqs[i] == irqs[i - 1] + 1) {
2128 					run = 1;
2129 					continue;
2130 				}
2131 
2132 				/* Finish previous range. */
2133 				if (run) {
2134 					printf("-%d", irqs[i - 1]);
2135 					run = 0;
2136 				}
2137 
2138 				/* Start new range. */
2139 				printf(",%d", irqs[i]);
2140 			}
2141 
2142 			/* Unfinished range? */
2143 			if (run)
2144 				printf("-%d", irqs[actual - 1]);
2145 			printf(" for MSI\n");
2146 		}
2147 	}
2148 
2149 	/* Update control register with actual count. */
2150 	ctrl = cfg->msi.msi_ctrl;
2151 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2152 	ctrl |= (ffs(actual) - 1) << 4;
2153 	cfg->msi.msi_ctrl = ctrl;
2154 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2155 
2156 	/* Update counts of alloc'd messages. */
2157 	cfg->msi.msi_alloc = actual;
2158 	cfg->msi.msi_handlers = 0;
2159 	*count = actual;
2160 	return (0);
2161 }
2162 
2163 /* Release the MSI messages associated with this device. */
2164 int
2165 pci_release_msi_method(device_t dev, device_t child)
2166 {
2167 	struct pci_devinfo *dinfo = device_get_ivars(child);
2168 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2169 	struct resource_list_entry *rle;
2170 	int error, i, irqs[32];
2171 
2172 	/* Try MSI-X first. */
2173 	error = pci_release_msix(dev, child);
2174 	if (error != ENODEV)
2175 		return (error);
2176 
2177 	/* Do we have any messages to release? */
2178 	if (msi->msi_alloc == 0)
2179 		return (ENODEV);
2180 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2181 
2182 	/* Make sure none of the resources are allocated. */
2183 	if (msi->msi_handlers > 0)
2184 		return (EBUSY);
2185 	for (i = 0; i < msi->msi_alloc; i++) {
2186 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2187 		KASSERT(rle != NULL, ("missing MSI resource"));
2188 		if (rle->res != NULL)
2189 			return (EBUSY);
2190 		irqs[i] = rle->start;
2191 	}
2192 
2193 	/* Update control register with 0 count. */
2194 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2195 	    ("%s: MSI still enabled", __func__));
2196 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2197 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2198 	    msi->msi_ctrl, 2);
2199 
2200 	/* Release the messages. */
2201 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2202 	for (i = 0; i < msi->msi_alloc; i++)
2203 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2204 
2205 	/* Update alloc count. */
2206 	msi->msi_alloc = 0;
2207 	msi->msi_addr = 0;
2208 	msi->msi_data = 0;
2209 	return (0);
2210 }
2211 
2212 /*
2213  * Return the max supported MSI messages this device supports.
2214  * Basically, assuming the MD code can alloc messages, this function
2215  * should return the maximum value that pci_alloc_msi() can return.
2216  * Thus, it is subject to the tunables, etc.
2217  */
2218 int
2219 pci_msi_count_method(device_t dev, device_t child)
2220 {
2221 	struct pci_devinfo *dinfo = device_get_ivars(child);
2222 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2223 
2224 	if (pci_do_msi && msi->msi_location != 0)
2225 		return (msi->msi_msgnum);
2226 	return (0);
2227 }
2228 
2229 /* free pcicfgregs structure and all depending data structures */
2230 
2231 int
2232 pci_freecfg(struct pci_devinfo *dinfo)
2233 {
2234 	struct devlist *devlist_head;
2235 	struct pci_map *pm, *next;
2236 	int i;
2237 
2238 	devlist_head = &pci_devq;
2239 
2240 	if (dinfo->cfg.vpd.vpd_reg) {
2241 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2242 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2243 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2244 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2245 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2246 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2247 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2248 	}
2249 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2250 		free(pm, M_DEVBUF);
2251 	}
2252 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2253 	free(dinfo, M_DEVBUF);
2254 
2255 	/* increment the generation count */
2256 	pci_generation++;
2257 
2258 	/* we're losing one device */
2259 	pci_numdevs--;
2260 	return (0);
2261 }
2262 
2263 /*
2264  * PCI power manangement
2265  */
2266 int
2267 pci_set_powerstate_method(device_t dev, device_t child, int state)
2268 {
2269 	struct pci_devinfo *dinfo = device_get_ivars(child);
2270 	pcicfgregs *cfg = &dinfo->cfg;
2271 	uint16_t status;
2272 	int result, oldstate, highest, delay;
2273 
2274 	if (cfg->pp.pp_cap == 0)
2275 		return (EOPNOTSUPP);
2276 
2277 	/*
2278 	 * Optimize a no state change request away.  While it would be OK to
2279 	 * write to the hardware in theory, some devices have shown odd
2280 	 * behavior when going from D3 -> D3.
2281 	 */
2282 	oldstate = pci_get_powerstate(child);
2283 	if (oldstate == state)
2284 		return (0);
2285 
2286 	/*
2287 	 * The PCI power management specification states that after a state
2288 	 * transition between PCI power states, system software must
2289 	 * guarantee a minimal delay before the function accesses the device.
2290 	 * Compute the worst case delay that we need to guarantee before we
2291 	 * access the device.  Many devices will be responsive much more
2292 	 * quickly than this delay, but there are some that don't respond
2293 	 * instantly to state changes.  Transitions to/from D3 state require
2294 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2295 	 * is done below with DELAY rather than a sleeper function because
2296 	 * this function can be called from contexts where we cannot sleep.
2297 	 */
2298 	highest = (oldstate > state) ? oldstate : state;
2299 	if (highest == PCI_POWERSTATE_D3)
2300 	    delay = 10000;
2301 	else if (highest == PCI_POWERSTATE_D2)
2302 	    delay = 200;
2303 	else
2304 	    delay = 0;
2305 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2306 	    & ~PCIM_PSTAT_DMASK;
2307 	result = 0;
2308 	switch (state) {
2309 	case PCI_POWERSTATE_D0:
2310 		status |= PCIM_PSTAT_D0;
2311 		break;
2312 	case PCI_POWERSTATE_D1:
2313 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2314 			return (EOPNOTSUPP);
2315 		status |= PCIM_PSTAT_D1;
2316 		break;
2317 	case PCI_POWERSTATE_D2:
2318 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2319 			return (EOPNOTSUPP);
2320 		status |= PCIM_PSTAT_D2;
2321 		break;
2322 	case PCI_POWERSTATE_D3:
2323 		status |= PCIM_PSTAT_D3;
2324 		break;
2325 	default:
2326 		return (EINVAL);
2327 	}
2328 
2329 	if (bootverbose)
2330 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2331 		    state);
2332 
2333 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2334 	if (delay)
2335 		DELAY(delay);
2336 	return (0);
2337 }
2338 
2339 int
2340 pci_get_powerstate_method(device_t dev, device_t child)
2341 {
2342 	struct pci_devinfo *dinfo = device_get_ivars(child);
2343 	pcicfgregs *cfg = &dinfo->cfg;
2344 	uint16_t status;
2345 	int result;
2346 
2347 	if (cfg->pp.pp_cap != 0) {
2348 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2349 		switch (status & PCIM_PSTAT_DMASK) {
2350 		case PCIM_PSTAT_D0:
2351 			result = PCI_POWERSTATE_D0;
2352 			break;
2353 		case PCIM_PSTAT_D1:
2354 			result = PCI_POWERSTATE_D1;
2355 			break;
2356 		case PCIM_PSTAT_D2:
2357 			result = PCI_POWERSTATE_D2;
2358 			break;
2359 		case PCIM_PSTAT_D3:
2360 			result = PCI_POWERSTATE_D3;
2361 			break;
2362 		default:
2363 			result = PCI_POWERSTATE_UNKNOWN;
2364 			break;
2365 		}
2366 	} else {
2367 		/* No support, device is always at D0 */
2368 		result = PCI_POWERSTATE_D0;
2369 	}
2370 	return (result);
2371 }
2372 
2373 /*
2374  * Some convenience functions for PCI device drivers.
2375  */
2376 
2377 static __inline void
2378 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2379 {
2380 	uint16_t	command;
2381 
2382 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2383 	command |= bit;
2384 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2385 }
2386 
2387 static __inline void
2388 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2389 {
2390 	uint16_t	command;
2391 
2392 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2393 	command &= ~bit;
2394 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2395 }
2396 
2397 int
2398 pci_enable_busmaster_method(device_t dev, device_t child)
2399 {
2400 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2401 	return (0);
2402 }
2403 
2404 int
2405 pci_disable_busmaster_method(device_t dev, device_t child)
2406 {
2407 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2408 	return (0);
2409 }
2410 
2411 int
2412 pci_enable_io_method(device_t dev, device_t child, int space)
2413 {
2414 	uint16_t bit;
2415 
2416 	switch(space) {
2417 	case SYS_RES_IOPORT:
2418 		bit = PCIM_CMD_PORTEN;
2419 		break;
2420 	case SYS_RES_MEMORY:
2421 		bit = PCIM_CMD_MEMEN;
2422 		break;
2423 	default:
2424 		return (EINVAL);
2425 	}
2426 	pci_set_command_bit(dev, child, bit);
2427 	return (0);
2428 }
2429 
2430 int
2431 pci_disable_io_method(device_t dev, device_t child, int space)
2432 {
2433 	uint16_t bit;
2434 
2435 	switch(space) {
2436 	case SYS_RES_IOPORT:
2437 		bit = PCIM_CMD_PORTEN;
2438 		break;
2439 	case SYS_RES_MEMORY:
2440 		bit = PCIM_CMD_MEMEN;
2441 		break;
2442 	default:
2443 		return (EINVAL);
2444 	}
2445 	pci_clear_command_bit(dev, child, bit);
2446 	return (0);
2447 }
2448 
2449 /*
2450  * New style pci driver.  Parent device is either a pci-host-bridge or a
2451  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2452  */
2453 
2454 void
2455 pci_print_verbose(struct pci_devinfo *dinfo)
2456 {
2457 
2458 	if (bootverbose) {
2459 		pcicfgregs *cfg = &dinfo->cfg;
2460 
2461 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2462 		    cfg->vendor, cfg->device, cfg->revid);
2463 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2464 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2465 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2466 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2467 		    cfg->mfdev);
2468 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2469 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2470 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2471 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2472 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2473 		if (cfg->intpin > 0)
2474 			printf("\tintpin=%c, irq=%d\n",
2475 			    cfg->intpin +'a' -1, cfg->intline);
2476 		if (cfg->pp.pp_cap) {
2477 			uint16_t status;
2478 
2479 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2480 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2481 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2482 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2483 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2484 			    status & PCIM_PSTAT_DMASK);
2485 		}
2486 		if (cfg->msi.msi_location) {
2487 			int ctrl;
2488 
2489 			ctrl = cfg->msi.msi_ctrl;
2490 			printf("\tMSI supports %d message%s%s%s\n",
2491 			    cfg->msi.msi_msgnum,
2492 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2493 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2494 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2495 		}
2496 		if (cfg->msix.msix_location) {
2497 			printf("\tMSI-X supports %d message%s ",
2498 			    cfg->msix.msix_msgnum,
2499 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2500 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2501 				printf("in map 0x%x\n",
2502 				    cfg->msix.msix_table_bar);
2503 			else
2504 				printf("in maps 0x%x and 0x%x\n",
2505 				    cfg->msix.msix_table_bar,
2506 				    cfg->msix.msix_pba_bar);
2507 		}
2508 	}
2509 }
2510 
2511 static int
2512 pci_porten(device_t dev)
2513 {
2514 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2515 }
2516 
2517 static int
2518 pci_memen(device_t dev)
2519 {
2520 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2521 }
2522 
2523 static void
2524 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2525 {
2526 	struct pci_devinfo *dinfo;
2527 	pci_addr_t map, testval;
2528 	int ln2range;
2529 	uint16_t cmd;
2530 
2531 	/*
2532 	 * The device ROM BAR is special.  It is always a 32-bit
2533 	 * memory BAR.  Bit 0 is special and should not be set when
2534 	 * sizing the BAR.
2535 	 */
2536 	dinfo = device_get_ivars(dev);
2537 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2538 		map = pci_read_config(dev, reg, 4);
2539 		pci_write_config(dev, reg, 0xfffffffe, 4);
2540 		testval = pci_read_config(dev, reg, 4);
2541 		pci_write_config(dev, reg, map, 4);
2542 		*mapp = map;
2543 		*testvalp = testval;
2544 		return;
2545 	}
2546 
2547 	map = pci_read_config(dev, reg, 4);
2548 	ln2range = pci_maprange(map);
2549 	if (ln2range == 64)
2550 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2551 
2552 	/*
2553 	 * Disable decoding via the command register before
2554 	 * determining the BAR's length since we will be placing it in
2555 	 * a weird state.
2556 	 */
2557 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2558 	pci_write_config(dev, PCIR_COMMAND,
2559 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2560 
2561 	/*
2562 	 * Determine the BAR's length by writing all 1's.  The bottom
2563 	 * log_2(size) bits of the BAR will stick as 0 when we read
2564 	 * the value back.
2565 	 */
2566 	pci_write_config(dev, reg, 0xffffffff, 4);
2567 	testval = pci_read_config(dev, reg, 4);
2568 	if (ln2range == 64) {
2569 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2570 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2571 	}
2572 
2573 	/*
2574 	 * Restore the original value of the BAR.  We may have reprogrammed
2575 	 * the BAR of the low-level console device and when booting verbose,
2576 	 * we need the console device addressable.
2577 	 */
2578 	pci_write_config(dev, reg, map, 4);
2579 	if (ln2range == 64)
2580 		pci_write_config(dev, reg + 4, map >> 32, 4);
2581 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2582 
2583 	*mapp = map;
2584 	*testvalp = testval;
2585 }
2586 
2587 static void
2588 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2589 {
2590 	struct pci_devinfo *dinfo;
2591 	int ln2range;
2592 
2593 	/* The device ROM BAR is always a 32-bit memory BAR. */
2594 	dinfo = device_get_ivars(dev);
2595 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2596 		ln2range = 32;
2597 	else
2598 		ln2range = pci_maprange(pm->pm_value);
2599 	pci_write_config(dev, pm->pm_reg, base, 4);
2600 	if (ln2range == 64)
2601 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2602 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2603 	if (ln2range == 64)
2604 		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2605 		    pm->pm_reg + 4, 4) << 32;
2606 }
2607 
2608 struct pci_map *
2609 pci_find_bar(device_t dev, int reg)
2610 {
2611 	struct pci_devinfo *dinfo;
2612 	struct pci_map *pm;
2613 
2614 	dinfo = device_get_ivars(dev);
2615 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2616 		if (pm->pm_reg == reg)
2617 			return (pm);
2618 	}
2619 	return (NULL);
2620 }
2621 
2622 int
2623 pci_bar_enabled(device_t dev, struct pci_map *pm)
2624 {
2625 	struct pci_devinfo *dinfo;
2626 	uint16_t cmd;
2627 
2628 	dinfo = device_get_ivars(dev);
2629 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2630 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2631 		return (0);
2632 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2633 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2634 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2635 	else
2636 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2637 }
2638 
2639 static struct pci_map *
2640 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2641 {
2642 	struct pci_devinfo *dinfo;
2643 	struct pci_map *pm, *prev;
2644 
2645 	dinfo = device_get_ivars(dev);
2646 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2647 	pm->pm_reg = reg;
2648 	pm->pm_value = value;
2649 	pm->pm_size = size;
2650 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2651 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2652 		    reg));
2653 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2654 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2655 			break;
2656 	}
2657 	if (prev != NULL)
2658 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2659 	else
2660 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2661 	return (pm);
2662 }
2663 
2664 static void
2665 pci_restore_bars(device_t dev)
2666 {
2667 	struct pci_devinfo *dinfo;
2668 	struct pci_map *pm;
2669 	int ln2range;
2670 
2671 	dinfo = device_get_ivars(dev);
2672 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2673 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2674 			ln2range = 32;
2675 		else
2676 			ln2range = pci_maprange(pm->pm_value);
2677 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2678 		if (ln2range == 64)
2679 			pci_write_config(dev, pm->pm_reg + 4,
2680 			    pm->pm_value >> 32, 4);
2681 	}
2682 }
2683 
2684 /*
2685  * Add a resource based on a pci map register. Return 1 if the map
2686  * register is a 32bit map register or 2 if it is a 64bit register.
2687  */
2688 static int
2689 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2690     int force, int prefetch)
2691 {
2692 	struct pci_map *pm;
2693 	pci_addr_t base, map, testval;
2694 	pci_addr_t start, end, count;
2695 	int barlen, basezero, maprange, mapsize, type;
2696 	uint16_t cmd;
2697 	struct resource *res;
2698 
2699 	/*
2700 	 * The BAR may already exist if the device is a CardBus card
2701 	 * whose CIS is stored in this BAR.
2702 	 */
2703 	pm = pci_find_bar(dev, reg);
2704 	if (pm != NULL) {
2705 		maprange = pci_maprange(pm->pm_value);
2706 		barlen = maprange == 64 ? 2 : 1;
2707 		return (barlen);
2708 	}
2709 
2710 	pci_read_bar(dev, reg, &map, &testval);
2711 	if (PCI_BAR_MEM(map)) {
2712 		type = SYS_RES_MEMORY;
2713 		if (map & PCIM_BAR_MEM_PREFETCH)
2714 			prefetch = 1;
2715 	} else
2716 		type = SYS_RES_IOPORT;
2717 	mapsize = pci_mapsize(testval);
2718 	base = pci_mapbase(map);
2719 #ifdef __PCI_BAR_ZERO_VALID
2720 	basezero = 0;
2721 #else
2722 	basezero = base == 0;
2723 #endif
2724 	maprange = pci_maprange(map);
2725 	barlen = maprange == 64 ? 2 : 1;
2726 
2727 	/*
2728 	 * For I/O registers, if bottom bit is set, and the next bit up
2729 	 * isn't clear, we know we have a BAR that doesn't conform to the
2730 	 * spec, so ignore it.  Also, sanity check the size of the data
2731 	 * areas to the type of memory involved.  Memory must be at least
2732 	 * 16 bytes in size, while I/O ranges must be at least 4.
2733 	 */
2734 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2735 		return (barlen);
2736 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2737 	    (type == SYS_RES_IOPORT && mapsize < 2))
2738 		return (barlen);
2739 
2740 	/* Save a record of this BAR. */
2741 	pm = pci_add_bar(dev, reg, map, mapsize);
2742 	if (bootverbose) {
2743 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2744 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2745 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2746 			printf(", port disabled\n");
2747 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2748 			printf(", memory disabled\n");
2749 		else
2750 			printf(", enabled\n");
2751 	}
2752 
2753 	/*
2754 	 * If base is 0, then we have problems if this architecture does
2755 	 * not allow that.  It is best to ignore such entries for the
2756 	 * moment.  These will be allocated later if the driver specifically
2757 	 * requests them.  However, some removable busses look better when
2758 	 * all resources are allocated, so allow '0' to be overriden.
2759 	 *
2760 	 * Similarly treat maps whose values is the same as the test value
2761 	 * read back.  These maps have had all f's written to them by the
2762 	 * BIOS in an attempt to disable the resources.
2763 	 */
2764 	if (!force && (basezero || map == testval))
2765 		return (barlen);
2766 	if ((u_long)base != base) {
2767 		device_printf(bus,
2768 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2769 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2770 		    pci_get_function(dev), reg);
2771 		return (barlen);
2772 	}
2773 
2774 	/*
2775 	 * This code theoretically does the right thing, but has
2776 	 * undesirable side effects in some cases where peripherals
2777 	 * respond oddly to having these bits enabled.  Let the user
2778 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2779 	 * default).
2780 	 */
2781 	if (pci_enable_io_modes) {
2782 		/* Turn on resources that have been left off by a lazy BIOS */
2783 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2784 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2785 			cmd |= PCIM_CMD_PORTEN;
2786 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2787 		}
2788 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2789 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2790 			cmd |= PCIM_CMD_MEMEN;
2791 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2792 		}
2793 	} else {
2794 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2795 			return (barlen);
2796 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2797 			return (barlen);
2798 	}
2799 
2800 	count = (pci_addr_t)1 << mapsize;
2801 	if (basezero || base == pci_mapbase(testval)) {
2802 		start = 0;	/* Let the parent decide. */
2803 		end = ~0ul;
2804 	} else {
2805 		start = base;
2806 		end = base + count - 1;
2807 	}
2808 	resource_list_add(rl, type, reg, start, end, count);
2809 
2810 	/*
2811 	 * Try to allocate the resource for this BAR from our parent
2812 	 * so that this resource range is already reserved.  The
2813 	 * driver for this device will later inherit this resource in
2814 	 * pci_alloc_resource().
2815 	 */
2816 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2817 	    prefetch ? RF_PREFETCHABLE : 0);
2818 	if (res == NULL) {
2819 		/*
2820 		 * If the allocation fails, clear the BAR and delete
2821 		 * the resource list entry to force
2822 		 * pci_alloc_resource() to allocate resources from the
2823 		 * parent.
2824 		 */
2825 		resource_list_delete(rl, type, reg);
2826 		start = 0;
2827 	} else
2828 		start = rman_get_start(res);
2829 	pci_write_bar(dev, pm, start);
2830 	return (barlen);
2831 }
2832 
2833 /*
2834  * For ATA devices we need to decide early what addressing mode to use.
2835  * Legacy demands that the primary and secondary ATA ports sits on the
2836  * same addresses that old ISA hardware did. This dictates that we use
2837  * those addresses and ignore the BAR's if we cannot set PCI native
2838  * addressing mode.
2839  */
2840 static void
2841 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2842     uint32_t prefetchmask)
2843 {
2844 	struct resource *r;
2845 	int rid, type, progif;
2846 #if 0
2847 	/* if this device supports PCI native addressing use it */
2848 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2849 	if ((progif & 0x8a) == 0x8a) {
2850 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2851 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2852 			printf("Trying ATA native PCI addressing mode\n");
2853 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2854 		}
2855 	}
2856 #endif
2857 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2858 	type = SYS_RES_IOPORT;
2859 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2860 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2861 		    prefetchmask & (1 << 0));
2862 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2863 		    prefetchmask & (1 << 1));
2864 	} else {
2865 		rid = PCIR_BAR(0);
2866 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2867 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2868 		    0x1f7, 8, 0);
2869 		rid = PCIR_BAR(1);
2870 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2871 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2872 		    0x3f6, 1, 0);
2873 	}
2874 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2875 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2876 		    prefetchmask & (1 << 2));
2877 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2878 		    prefetchmask & (1 << 3));
2879 	} else {
2880 		rid = PCIR_BAR(2);
2881 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2882 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2883 		    0x177, 8, 0);
2884 		rid = PCIR_BAR(3);
2885 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2886 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2887 		    0x376, 1, 0);
2888 	}
2889 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2890 	    prefetchmask & (1 << 4));
2891 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2892 	    prefetchmask & (1 << 5));
2893 }
2894 
2895 static void
2896 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2897 {
2898 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2899 	pcicfgregs *cfg = &dinfo->cfg;
2900 	char tunable_name[64];
2901 	int irq;
2902 
2903 	/* Has to have an intpin to have an interrupt. */
2904 	if (cfg->intpin == 0)
2905 		return;
2906 
2907 	/* Let the user override the IRQ with a tunable. */
2908 	irq = PCI_INVALID_IRQ;
2909 	snprintf(tunable_name, sizeof(tunable_name),
2910 	    "hw.pci%d.%d.%d.INT%c.irq",
2911 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2912 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2913 		irq = PCI_INVALID_IRQ;
2914 
2915 	/*
2916 	 * If we didn't get an IRQ via the tunable, then we either use the
2917 	 * IRQ value in the intline register or we ask the bus to route an
2918 	 * interrupt for us.  If force_route is true, then we only use the
2919 	 * value in the intline register if the bus was unable to assign an
2920 	 * IRQ.
2921 	 */
2922 	if (!PCI_INTERRUPT_VALID(irq)) {
2923 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2924 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2925 		if (!PCI_INTERRUPT_VALID(irq))
2926 			irq = cfg->intline;
2927 	}
2928 
2929 	/* If after all that we don't have an IRQ, just bail. */
2930 	if (!PCI_INTERRUPT_VALID(irq))
2931 		return;
2932 
2933 	/* Update the config register if it changed. */
2934 	if (irq != cfg->intline) {
2935 		cfg->intline = irq;
2936 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2937 	}
2938 
2939 	/* Add this IRQ as rid 0 interrupt resource. */
2940 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2941 }
2942 
2943 /* Perform early OHCI takeover from SMM. */
2944 static void
2945 ohci_early_takeover(device_t self)
2946 {
2947 	struct resource *res;
2948 	uint32_t ctl;
2949 	int rid;
2950 	int i;
2951 
2952 	rid = PCIR_BAR(0);
2953 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2954 	if (res == NULL)
2955 		return;
2956 
2957 	ctl = bus_read_4(res, OHCI_CONTROL);
2958 	if (ctl & OHCI_IR) {
2959 		if (bootverbose)
2960 			printf("ohci early: "
2961 			    "SMM active, request owner change\n");
2962 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2963 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2964 			DELAY(1000);
2965 			ctl = bus_read_4(res, OHCI_CONTROL);
2966 		}
2967 		if (ctl & OHCI_IR) {
2968 			if (bootverbose)
2969 				printf("ohci early: "
2970 				    "SMM does not respond, resetting\n");
2971 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2972 		}
2973 		/* Disable interrupts */
2974 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2975 	}
2976 
2977 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2978 }
2979 
2980 /* Perform early UHCI takeover from SMM. */
2981 static void
2982 uhci_early_takeover(device_t self)
2983 {
2984 	struct resource *res;
2985 	int rid;
2986 
2987 	/*
2988 	 * Set the PIRQD enable bit and switch off all the others. We don't
2989 	 * want legacy support to interfere with us XXX Does this also mean
2990 	 * that the BIOS won't touch the keyboard anymore if it is connected
2991 	 * to the ports of the root hub?
2992 	 */
2993 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2994 
2995 	/* Disable interrupts */
2996 	rid = PCI_UHCI_BASE_REG;
2997 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2998 	if (res != NULL) {
2999 		bus_write_2(res, UHCI_INTR, 0);
3000 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3001 	}
3002 }
3003 
3004 /* Perform early EHCI takeover from SMM. */
3005 static void
3006 ehci_early_takeover(device_t self)
3007 {
3008 	struct resource *res;
3009 	uint32_t cparams;
3010 	uint32_t eec;
3011 	uint8_t eecp;
3012 	uint8_t bios_sem;
3013 	uint8_t offs;
3014 	int rid;
3015 	int i;
3016 
3017 	rid = PCIR_BAR(0);
3018 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3019 	if (res == NULL)
3020 		return;
3021 
3022 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3023 
3024 	/* Synchronise with the BIOS if it owns the controller. */
3025 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3026 	    eecp = EHCI_EECP_NEXT(eec)) {
3027 		eec = pci_read_config(self, eecp, 4);
3028 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3029 			continue;
3030 		}
3031 		bios_sem = pci_read_config(self, eecp +
3032 		    EHCI_LEGSUP_BIOS_SEM, 1);
3033 		if (bios_sem == 0) {
3034 			continue;
3035 		}
3036 		if (bootverbose)
3037 			printf("ehci early: "
3038 			    "SMM active, request owner change\n");
3039 
3040 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3041 
3042 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3043 			DELAY(1000);
3044 			bios_sem = pci_read_config(self, eecp +
3045 			    EHCI_LEGSUP_BIOS_SEM, 1);
3046 		}
3047 
3048 		if (bios_sem != 0) {
3049 			if (bootverbose)
3050 				printf("ehci early: "
3051 				    "SMM does not respond\n");
3052 		}
3053 		/* Disable interrupts */
3054 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3055 		bus_write_4(res, offs + EHCI_USBINTR, 0);
3056 	}
3057 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3058 }
3059 
3060 /* Perform early XHCI takeover from SMM. */
3061 static void
3062 xhci_early_takeover(device_t self)
3063 {
3064 	struct resource *res;
3065 	uint32_t cparams;
3066 	uint32_t eec;
3067 	uint8_t eecp;
3068 	uint8_t bios_sem;
3069 	uint8_t offs;
3070 	int rid;
3071 	int i;
3072 
3073 	rid = PCIR_BAR(0);
3074 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3075 	if (res == NULL)
3076 		return;
3077 
3078 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3079 
3080 	eec = -1;
3081 
3082 	/* Synchronise with the BIOS if it owns the controller. */
3083 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3084 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3085 		eec = bus_read_4(res, eecp);
3086 
3087 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3088 			continue;
3089 
3090 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3091 		if (bios_sem == 0)
3092 			continue;
3093 
3094 		if (bootverbose)
3095 			printf("xhci early: "
3096 			    "SMM active, request owner change\n");
3097 
3098 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3099 
3100 		/* wait a maximum of 5 second */
3101 
3102 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3103 			DELAY(1000);
3104 			bios_sem = bus_read_1(res, eecp +
3105 			    XHCI_XECP_BIOS_SEM);
3106 		}
3107 
3108 		if (bios_sem != 0) {
3109 			if (bootverbose)
3110 				printf("xhci early: "
3111 				    "SMM does not respond\n");
3112 		}
3113 
3114 		/* Disable interrupts */
3115 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3116 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3117 		bus_read_4(res, offs + XHCI_USBSTS);
3118 	}
3119 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3120 }
3121 
3122 void
3123 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3124 {
3125 	struct pci_devinfo *dinfo;
3126 	pcicfgregs *cfg;
3127 	struct resource_list *rl;
3128 	const struct pci_quirk *q;
3129 	uint32_t devid;
3130 	int i;
3131 
3132 	dinfo = device_get_ivars(dev);
3133 	cfg = &dinfo->cfg;
3134 	rl = &dinfo->resources;
3135 	devid = (cfg->device << 16) | cfg->vendor;
3136 
3137 	/* ATA devices needs special map treatment */
3138 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3139 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3140 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3141 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3142 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3143 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3144 	else
3145 		for (i = 0; i < cfg->nummaps;) {
3146 			/*
3147 			 * Skip quirked resources.
3148 			 */
3149 			for (q = &pci_quirks[0]; q->devid != 0; q++)
3150 				if (q->devid == devid &&
3151 				    q->type == PCI_QUIRK_UNMAP_REG &&
3152 				    q->arg1 == PCIR_BAR(i))
3153 					break;
3154 			if (q->devid != 0) {
3155 				i++;
3156 				continue;
3157 			}
3158 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3159 			    prefetchmask & (1 << i));
3160 		}
3161 
3162 	/*
3163 	 * Add additional, quirked resources.
3164 	 */
3165 	for (q = &pci_quirks[0]; q->devid != 0; q++)
3166 		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3167 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3168 
3169 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3170 #ifdef __PCI_REROUTE_INTERRUPT
3171 		/*
3172 		 * Try to re-route interrupts. Sometimes the BIOS or
3173 		 * firmware may leave bogus values in these registers.
3174 		 * If the re-route fails, then just stick with what we
3175 		 * have.
3176 		 */
3177 		pci_assign_interrupt(bus, dev, 1);
3178 #else
3179 		pci_assign_interrupt(bus, dev, 0);
3180 #endif
3181 	}
3182 
3183 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3184 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3185 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3186 			xhci_early_takeover(dev);
3187 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3188 			ehci_early_takeover(dev);
3189 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3190 			ohci_early_takeover(dev);
3191 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3192 			uhci_early_takeover(dev);
3193 	}
3194 }
3195 
3196 void
3197 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3198 {
3199 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3200 	device_t pcib = device_get_parent(dev);
3201 	struct pci_devinfo *dinfo;
3202 	int maxslots;
3203 	int s, f, pcifunchigh;
3204 	uint8_t hdrtype;
3205 
3206 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3207 	    ("dinfo_size too small"));
3208 	maxslots = PCIB_MAXSLOTS(pcib);
3209 	for (s = 0; s <= maxslots; s++) {
3210 		pcifunchigh = 0;
3211 		f = 0;
3212 		DELAY(1);
3213 		hdrtype = REG(PCIR_HDRTYPE, 1);
3214 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3215 			continue;
3216 		if (hdrtype & PCIM_MFDEV)
3217 			pcifunchigh = PCI_FUNCMAX;
3218 		for (f = 0; f <= pcifunchigh; f++) {
3219 			dinfo = pci_read_device(pcib, domain, busno, s, f,
3220 			    dinfo_size);
3221 			if (dinfo != NULL) {
3222 				pci_add_child(dev, dinfo);
3223 			}
3224 		}
3225 	}
3226 #undef REG
3227 }
3228 
3229 void
3230 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3231 {
3232 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3233 	device_set_ivars(dinfo->cfg.dev, dinfo);
3234 	resource_list_init(&dinfo->resources);
3235 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3236 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3237 	pci_print_verbose(dinfo);
3238 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3239 }
3240 
3241 static int
3242 pci_probe(device_t dev)
3243 {
3244 
3245 	device_set_desc(dev, "PCI bus");
3246 
3247 	/* Allow other subclasses to override this driver. */
3248 	return (BUS_PROBE_GENERIC);
3249 }
3250 
3251 int
3252 pci_attach_common(device_t dev)
3253 {
3254 	struct pci_softc *sc;
3255 	int busno, domain;
3256 #ifdef PCI_DMA_BOUNDARY
3257 	int error, tag_valid;
3258 #endif
3259 
3260 	sc = device_get_softc(dev);
3261 	domain = pcib_get_domain(dev);
3262 	busno = pcib_get_bus(dev);
3263 	if (bootverbose)
3264 		device_printf(dev, "domain=%d, physical bus=%d\n",
3265 		    domain, busno);
3266 #ifdef PCI_DMA_BOUNDARY
3267 	tag_valid = 0;
3268 	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
3269 	    devclass_find("pci")) {
3270 		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
3271 		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3272 		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
3273 		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
3274 		if (error)
3275 			device_printf(dev, "Failed to create DMA tag: %d\n",
3276 			    error);
3277 		else
3278 			tag_valid = 1;
3279 	}
3280 	if (!tag_valid)
3281 #endif
3282 		sc->sc_dma_tag = bus_get_dma_tag(dev);
3283 	return (0);
3284 }
3285 
3286 static int
3287 pci_attach(device_t dev)
3288 {
3289 	int busno, domain, error;
3290 
3291 	error = pci_attach_common(dev);
3292 	if (error)
3293 		return (error);
3294 
3295 	/*
3296 	 * Since there can be multiple independantly numbered PCI
3297 	 * busses on systems with multiple PCI domains, we can't use
3298 	 * the unit number to decide which bus we are probing. We ask
3299 	 * the parent pcib what our domain and bus numbers are.
3300 	 */
3301 	domain = pcib_get_domain(dev);
3302 	busno = pcib_get_bus(dev);
3303 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3304 	return (bus_generic_attach(dev));
3305 }
3306 
3307 static void
3308 pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3309     int state)
3310 {
3311 	device_t child, pcib;
3312 	struct pci_devinfo *dinfo;
3313 	int dstate, i;
3314 
3315 	/*
3316 	 * Set the device to the given state.  If the firmware suggests
3317 	 * a different power state, use it instead.  If power management
3318 	 * is not present, the firmware is responsible for managing
3319 	 * device power.  Skip children who aren't attached since they
3320 	 * are handled separately.
3321 	 */
3322 	pcib = device_get_parent(dev);
3323 	for (i = 0; i < numdevs; i++) {
3324 		child = devlist[i];
3325 		dinfo = device_get_ivars(child);
3326 		dstate = state;
3327 		if (device_is_attached(child) &&
3328 		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3329 			pci_set_powerstate(child, dstate);
3330 	}
3331 }
3332 
3333 int
3334 pci_suspend(device_t dev)
3335 {
3336 	device_t child, *devlist;
3337 	struct pci_devinfo *dinfo;
3338 	int error, i, numdevs;
3339 
3340 	/*
3341 	 * Save the PCI configuration space for each child and set the
3342 	 * device in the appropriate power state for this sleep state.
3343 	 */
3344 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3345 		return (error);
3346 	for (i = 0; i < numdevs; i++) {
3347 		child = devlist[i];
3348 		dinfo = device_get_ivars(child);
3349 		pci_cfg_save(child, dinfo, 0);
3350 	}
3351 
3352 	/* Suspend devices before potentially powering them down. */
3353 	error = bus_generic_suspend(dev);
3354 	if (error) {
3355 		free(devlist, M_TEMP);
3356 		return (error);
3357 	}
3358 	if (pci_do_power_suspend)
3359 		pci_set_power_children(dev, devlist, numdevs,
3360 		    PCI_POWERSTATE_D3);
3361 	free(devlist, M_TEMP);
3362 	return (0);
3363 }
3364 
3365 int
3366 pci_resume(device_t dev)
3367 {
3368 	device_t child, *devlist;
3369 	struct pci_devinfo *dinfo;
3370 	int error, i, numdevs;
3371 
3372 	/*
3373 	 * Set each child to D0 and restore its PCI configuration space.
3374 	 */
3375 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3376 		return (error);
3377 	if (pci_do_power_resume)
3378 		pci_set_power_children(dev, devlist, numdevs,
3379 		    PCI_POWERSTATE_D0);
3380 
3381 	/* Now the device is powered up, restore its config space. */
3382 	for (i = 0; i < numdevs; i++) {
3383 		child = devlist[i];
3384 		dinfo = device_get_ivars(child);
3385 
3386 		pci_cfg_restore(child, dinfo);
3387 		if (!device_is_attached(child))
3388 			pci_cfg_save(child, dinfo, 1);
3389 	}
3390 
3391 	/*
3392 	 * Resume critical devices first, then everything else later.
3393 	 */
3394 	for (i = 0; i < numdevs; i++) {
3395 		child = devlist[i];
3396 		switch (pci_get_class(child)) {
3397 		case PCIC_DISPLAY:
3398 		case PCIC_MEMORY:
3399 		case PCIC_BRIDGE:
3400 		case PCIC_BASEPERIPH:
3401 			DEVICE_RESUME(child);
3402 			break;
3403 		}
3404 	}
3405 	for (i = 0; i < numdevs; i++) {
3406 		child = devlist[i];
3407 		switch (pci_get_class(child)) {
3408 		case PCIC_DISPLAY:
3409 		case PCIC_MEMORY:
3410 		case PCIC_BRIDGE:
3411 		case PCIC_BASEPERIPH:
3412 			break;
3413 		default:
3414 			DEVICE_RESUME(child);
3415 		}
3416 	}
3417 	free(devlist, M_TEMP);
3418 	return (0);
3419 }
3420 
3421 static void
3422 pci_load_vendor_data(void)
3423 {
3424 	caddr_t data;
3425 	void *ptr;
3426 	size_t sz;
3427 
3428 	data = preload_search_by_type("pci_vendor_data");
3429 	if (data != NULL) {
3430 		ptr = preload_fetch_addr(data);
3431 		sz = preload_fetch_size(data);
3432 		if (ptr != NULL && sz != 0) {
3433 			pci_vendordata = ptr;
3434 			pci_vendordata_size = sz;
3435 			/* terminate the database */
3436 			pci_vendordata[pci_vendordata_size] = '\n';
3437 		}
3438 	}
3439 }
3440 
3441 void
3442 pci_driver_added(device_t dev, driver_t *driver)
3443 {
3444 	int numdevs;
3445 	device_t *devlist;
3446 	device_t child;
3447 	struct pci_devinfo *dinfo;
3448 	int i;
3449 
3450 	if (bootverbose)
3451 		device_printf(dev, "driver added\n");
3452 	DEVICE_IDENTIFY(driver, dev);
3453 	if (device_get_children(dev, &devlist, &numdevs) != 0)
3454 		return;
3455 	for (i = 0; i < numdevs; i++) {
3456 		child = devlist[i];
3457 		if (device_get_state(child) != DS_NOTPRESENT)
3458 			continue;
3459 		dinfo = device_get_ivars(child);
3460 		pci_print_verbose(dinfo);
3461 		if (bootverbose)
3462 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3463 		pci_cfg_restore(child, dinfo);
3464 		if (device_probe_and_attach(child) != 0)
3465 			pci_cfg_save(child, dinfo, 1);
3466 	}
3467 	free(devlist, M_TEMP);
3468 }
3469 
3470 int
3471 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3472     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3473 {
3474 	struct pci_devinfo *dinfo;
3475 	struct msix_table_entry *mte;
3476 	struct msix_vector *mv;
3477 	uint64_t addr;
3478 	uint32_t data;
3479 	void *cookie;
3480 	int error, rid;
3481 
3482 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3483 	    arg, &cookie);
3484 	if (error)
3485 		return (error);
3486 
3487 	/* If this is not a direct child, just bail out. */
3488 	if (device_get_parent(child) != dev) {
3489 		*cookiep = cookie;
3490 		return(0);
3491 	}
3492 
3493 	rid = rman_get_rid(irq);
3494 	if (rid == 0) {
3495 		/* Make sure that INTx is enabled */
3496 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3497 	} else {
3498 		/*
3499 		 * Check to see if the interrupt is MSI or MSI-X.
3500 		 * Ask our parent to map the MSI and give
3501 		 * us the address and data register values.
3502 		 * If we fail for some reason, teardown the
3503 		 * interrupt handler.
3504 		 */
3505 		dinfo = device_get_ivars(child);
3506 		if (dinfo->cfg.msi.msi_alloc > 0) {
3507 			if (dinfo->cfg.msi.msi_addr == 0) {
3508 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3509 			    ("MSI has handlers, but vectors not mapped"));
3510 				error = PCIB_MAP_MSI(device_get_parent(dev),
3511 				    child, rman_get_start(irq), &addr, &data);
3512 				if (error)
3513 					goto bad;
3514 				dinfo->cfg.msi.msi_addr = addr;
3515 				dinfo->cfg.msi.msi_data = data;
3516 			}
3517 			if (dinfo->cfg.msi.msi_handlers == 0)
3518 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3519 				    dinfo->cfg.msi.msi_data);
3520 			dinfo->cfg.msi.msi_handlers++;
3521 		} else {
3522 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3523 			    ("No MSI or MSI-X interrupts allocated"));
3524 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3525 			    ("MSI-X index too high"));
3526 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3527 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3528 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3529 			KASSERT(mv->mv_irq == rman_get_start(irq),
3530 			    ("IRQ mismatch"));
3531 			if (mv->mv_address == 0) {
3532 				KASSERT(mte->mte_handlers == 0,
3533 		    ("MSI-X table entry has handlers, but vector not mapped"));
3534 				error = PCIB_MAP_MSI(device_get_parent(dev),
3535 				    child, rman_get_start(irq), &addr, &data);
3536 				if (error)
3537 					goto bad;
3538 				mv->mv_address = addr;
3539 				mv->mv_data = data;
3540 			}
3541 			if (mte->mte_handlers == 0) {
3542 				pci_enable_msix(child, rid - 1, mv->mv_address,
3543 				    mv->mv_data);
3544 				pci_unmask_msix(child, rid - 1);
3545 			}
3546 			mte->mte_handlers++;
3547 		}
3548 
3549 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3550 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3551 	bad:
3552 		if (error) {
3553 			(void)bus_generic_teardown_intr(dev, child, irq,
3554 			    cookie);
3555 			return (error);
3556 		}
3557 	}
3558 	*cookiep = cookie;
3559 	return (0);
3560 }
3561 
3562 int
3563 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3564     void *cookie)
3565 {
3566 	struct msix_table_entry *mte;
3567 	struct resource_list_entry *rle;
3568 	struct pci_devinfo *dinfo;
3569 	int error, rid;
3570 
3571 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3572 		return (EINVAL);
3573 
3574 	/* If this isn't a direct child, just bail out */
3575 	if (device_get_parent(child) != dev)
3576 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3577 
3578 	rid = rman_get_rid(irq);
3579 	if (rid == 0) {
3580 		/* Mask INTx */
3581 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3582 	} else {
3583 		/*
3584 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3585 		 * decrement the appropriate handlers count and mask the
3586 		 * MSI-X message, or disable MSI messages if the count
3587 		 * drops to 0.
3588 		 */
3589 		dinfo = device_get_ivars(child);
3590 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3591 		if (rle->res != irq)
3592 			return (EINVAL);
3593 		if (dinfo->cfg.msi.msi_alloc > 0) {
3594 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3595 			    ("MSI-X index too high"));
3596 			if (dinfo->cfg.msi.msi_handlers == 0)
3597 				return (EINVAL);
3598 			dinfo->cfg.msi.msi_handlers--;
3599 			if (dinfo->cfg.msi.msi_handlers == 0)
3600 				pci_disable_msi(child);
3601 		} else {
3602 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3603 			    ("No MSI or MSI-X interrupts allocated"));
3604 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3605 			    ("MSI-X index too high"));
3606 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3607 			if (mte->mte_handlers == 0)
3608 				return (EINVAL);
3609 			mte->mte_handlers--;
3610 			if (mte->mte_handlers == 0)
3611 				pci_mask_msix(child, rid - 1);
3612 		}
3613 	}
3614 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3615 	if (rid > 0)
3616 		KASSERT(error == 0,
3617 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3618 	return (error);
3619 }
3620 
3621 int
3622 pci_print_child(device_t dev, device_t child)
3623 {
3624 	struct pci_devinfo *dinfo;
3625 	struct resource_list *rl;
3626 	int retval = 0;
3627 
3628 	dinfo = device_get_ivars(child);
3629 	rl = &dinfo->resources;
3630 
3631 	retval += bus_print_child_header(dev, child);
3632 
3633 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3634 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3635 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3636 	if (device_get_flags(dev))
3637 		retval += printf(" flags %#x", device_get_flags(dev));
3638 
3639 	retval += printf(" at device %d.%d", pci_get_slot(child),
3640 	    pci_get_function(child));
3641 
3642 	retval += bus_print_child_footer(dev, child);
3643 
3644 	return (retval);
3645 }
3646 
3647 static struct
3648 {
3649 	int	class;
3650 	int	subclass;
3651 	char	*desc;
3652 } pci_nomatch_tab[] = {
3653 	{PCIC_OLD,		-1,			"old"},
3654 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3655 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3656 	{PCIC_STORAGE,		-1,			"mass storage"},
3657 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3658 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3659 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3660 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3661 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3662 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3663 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3664 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3665 	{PCIC_NETWORK,		-1,			"network"},
3666 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3667 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3668 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3669 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3670 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3671 	{PCIC_DISPLAY,		-1,			"display"},
3672 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3673 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3674 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3675 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3676 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3677 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3678 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3679 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3680 	{PCIC_MEMORY,		-1,			"memory"},
3681 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3682 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3683 	{PCIC_BRIDGE,		-1,			"bridge"},
3684 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3685 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3686 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3687 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3688 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3689 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3690 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3691 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3692 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3693 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3694 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3695 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3696 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3697 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3698 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3699 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3700 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3701 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3702 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3703 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3704 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3705 	{PCIC_INPUTDEV,		-1,			"input device"},
3706 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3707 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3708 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3709 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3710 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3711 	{PCIC_DOCKING,		-1,			"docking station"},
3712 	{PCIC_PROCESSOR,	-1,			"processor"},
3713 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3714 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3715 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3716 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3717 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3718 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3719 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3720 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3721 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3722 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3723 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3724 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3725 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3726 	{PCIC_SATCOM,		-1,			"satellite communication"},
3727 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3728 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3729 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3730 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3731 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3732 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3733 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3734 	{PCIC_DASP,		-1,			"dasp"},
3735 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3736 	{0, 0,		NULL}
3737 };
3738 
3739 void
3740 pci_probe_nomatch(device_t dev, device_t child)
3741 {
3742 	int	i;
3743 	char	*cp, *scp, *device;
3744 
3745 	/*
3746 	 * Look for a listing for this device in a loaded device database.
3747 	 */
3748 	if ((device = pci_describe_device(child)) != NULL) {
3749 		device_printf(dev, "<%s>", device);
3750 		free(device, M_DEVBUF);
3751 	} else {
3752 		/*
3753 		 * Scan the class/subclass descriptions for a general
3754 		 * description.
3755 		 */
3756 		cp = "unknown";
3757 		scp = NULL;
3758 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3759 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3760 				if (pci_nomatch_tab[i].subclass == -1) {
3761 					cp = pci_nomatch_tab[i].desc;
3762 				} else if (pci_nomatch_tab[i].subclass ==
3763 				    pci_get_subclass(child)) {
3764 					scp = pci_nomatch_tab[i].desc;
3765 				}
3766 			}
3767 		}
3768 		device_printf(dev, "<%s%s%s>",
3769 		    cp ? cp : "",
3770 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3771 		    scp ? scp : "");
3772 	}
3773 	printf(" at device %d.%d (no driver attached)\n",
3774 	    pci_get_slot(child), pci_get_function(child));
3775 	pci_cfg_save(child, device_get_ivars(child), 1);
3776 	return;
3777 }
3778 
3779 /*
3780  * Parse the PCI device database, if loaded, and return a pointer to a
3781  * description of the device.
3782  *
3783  * The database is flat text formatted as follows:
3784  *
3785  * Any line not in a valid format is ignored.
3786  * Lines are terminated with newline '\n' characters.
3787  *
3788  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3789  * the vendor name.
3790  *
3791  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3792  * - devices cannot be listed without a corresponding VENDOR line.
3793  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3794  * another TAB, then the device name.
3795  */
3796 
3797 /*
3798  * Assuming (ptr) points to the beginning of a line in the database,
3799  * return the vendor or device and description of the next entry.
3800  * The value of (vendor) or (device) inappropriate for the entry type
3801  * is set to -1.  Returns nonzero at the end of the database.
3802  *
3803  * Note that this is slightly unrobust in the face of corrupt data;
3804  * we attempt to safeguard against this by spamming the end of the
3805  * database with a newline when we initialise.
3806  */
3807 static int
3808 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3809 {
3810 	char	*cp = *ptr;
3811 	int	left;
3812 
3813 	*device = -1;
3814 	*vendor = -1;
3815 	**desc = '\0';
3816 	for (;;) {
3817 		left = pci_vendordata_size - (cp - pci_vendordata);
3818 		if (left <= 0) {
3819 			*ptr = cp;
3820 			return(1);
3821 		}
3822 
3823 		/* vendor entry? */
3824 		if (*cp != '\t' &&
3825 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3826 			break;
3827 		/* device entry? */
3828 		if (*cp == '\t' &&
3829 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3830 			break;
3831 
3832 		/* skip to next line */
3833 		while (*cp != '\n' && left > 0) {
3834 			cp++;
3835 			left--;
3836 		}
3837 		if (*cp == '\n') {
3838 			cp++;
3839 			left--;
3840 		}
3841 	}
3842 	/* skip to next line */
3843 	while (*cp != '\n' && left > 0) {
3844 		cp++;
3845 		left--;
3846 	}
3847 	if (*cp == '\n' && left > 0)
3848 		cp++;
3849 	*ptr = cp;
3850 	return(0);
3851 }
3852 
3853 static char *
3854 pci_describe_device(device_t dev)
3855 {
3856 	int	vendor, device;
3857 	char	*desc, *vp, *dp, *line;
3858 
3859 	desc = vp = dp = NULL;
3860 
3861 	/*
3862 	 * If we have no vendor data, we can't do anything.
3863 	 */
3864 	if (pci_vendordata == NULL)
3865 		goto out;
3866 
3867 	/*
3868 	 * Scan the vendor data looking for this device
3869 	 */
3870 	line = pci_vendordata;
3871 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3872 		goto out;
3873 	for (;;) {
3874 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3875 			goto out;
3876 		if (vendor == pci_get_vendor(dev))
3877 			break;
3878 	}
3879 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3880 		goto out;
3881 	for (;;) {
3882 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3883 			*dp = 0;
3884 			break;
3885 		}
3886 		if (vendor != -1) {
3887 			*dp = 0;
3888 			break;
3889 		}
3890 		if (device == pci_get_device(dev))
3891 			break;
3892 	}
3893 	if (dp[0] == '\0')
3894 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3895 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3896 	    NULL)
3897 		sprintf(desc, "%s, %s", vp, dp);
3898  out:
3899 	if (vp != NULL)
3900 		free(vp, M_DEVBUF);
3901 	if (dp != NULL)
3902 		free(dp, M_DEVBUF);
3903 	return(desc);
3904 }
3905 
3906 int
3907 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3908 {
3909 	struct pci_devinfo *dinfo;
3910 	pcicfgregs *cfg;
3911 
3912 	dinfo = device_get_ivars(child);
3913 	cfg = &dinfo->cfg;
3914 
3915 	switch (which) {
3916 	case PCI_IVAR_ETHADDR:
3917 		/*
3918 		 * The generic accessor doesn't deal with failure, so
3919 		 * we set the return value, then return an error.
3920 		 */
3921 		*((uint8_t **) result) = NULL;
3922 		return (EINVAL);
3923 	case PCI_IVAR_SUBVENDOR:
3924 		*result = cfg->subvendor;
3925 		break;
3926 	case PCI_IVAR_SUBDEVICE:
3927 		*result = cfg->subdevice;
3928 		break;
3929 	case PCI_IVAR_VENDOR:
3930 		*result = cfg->vendor;
3931 		break;
3932 	case PCI_IVAR_DEVICE:
3933 		*result = cfg->device;
3934 		break;
3935 	case PCI_IVAR_DEVID:
3936 		*result = (cfg->device << 16) | cfg->vendor;
3937 		break;
3938 	case PCI_IVAR_CLASS:
3939 		*result = cfg->baseclass;
3940 		break;
3941 	case PCI_IVAR_SUBCLASS:
3942 		*result = cfg->subclass;
3943 		break;
3944 	case PCI_IVAR_PROGIF:
3945 		*result = cfg->progif;
3946 		break;
3947 	case PCI_IVAR_REVID:
3948 		*result = cfg->revid;
3949 		break;
3950 	case PCI_IVAR_INTPIN:
3951 		*result = cfg->intpin;
3952 		break;
3953 	case PCI_IVAR_IRQ:
3954 		*result = cfg->intline;
3955 		break;
3956 	case PCI_IVAR_DOMAIN:
3957 		*result = cfg->domain;
3958 		break;
3959 	case PCI_IVAR_BUS:
3960 		*result = cfg->bus;
3961 		break;
3962 	case PCI_IVAR_SLOT:
3963 		*result = cfg->slot;
3964 		break;
3965 	case PCI_IVAR_FUNCTION:
3966 		*result = cfg->func;
3967 		break;
3968 	case PCI_IVAR_CMDREG:
3969 		*result = cfg->cmdreg;
3970 		break;
3971 	case PCI_IVAR_CACHELNSZ:
3972 		*result = cfg->cachelnsz;
3973 		break;
3974 	case PCI_IVAR_MINGNT:
3975 		*result = cfg->mingnt;
3976 		break;
3977 	case PCI_IVAR_MAXLAT:
3978 		*result = cfg->maxlat;
3979 		break;
3980 	case PCI_IVAR_LATTIMER:
3981 		*result = cfg->lattimer;
3982 		break;
3983 	default:
3984 		return (ENOENT);
3985 	}
3986 	return (0);
3987 }
3988 
3989 int
3990 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3991 {
3992 	struct pci_devinfo *dinfo;
3993 
3994 	dinfo = device_get_ivars(child);
3995 
3996 	switch (which) {
3997 	case PCI_IVAR_INTPIN:
3998 		dinfo->cfg.intpin = value;
3999 		return (0);
4000 	case PCI_IVAR_ETHADDR:
4001 	case PCI_IVAR_SUBVENDOR:
4002 	case PCI_IVAR_SUBDEVICE:
4003 	case PCI_IVAR_VENDOR:
4004 	case PCI_IVAR_DEVICE:
4005 	case PCI_IVAR_DEVID:
4006 	case PCI_IVAR_CLASS:
4007 	case PCI_IVAR_SUBCLASS:
4008 	case PCI_IVAR_PROGIF:
4009 	case PCI_IVAR_REVID:
4010 	case PCI_IVAR_IRQ:
4011 	case PCI_IVAR_DOMAIN:
4012 	case PCI_IVAR_BUS:
4013 	case PCI_IVAR_SLOT:
4014 	case PCI_IVAR_FUNCTION:
4015 		return (EINVAL);	/* disallow for now */
4016 
4017 	default:
4018 		return (ENOENT);
4019 	}
4020 }
4021 
4022 #include "opt_ddb.h"
4023 #ifdef DDB
4024 #include <ddb/ddb.h>
4025 #include <sys/cons.h>
4026 
4027 /*
4028  * List resources based on pci map registers, used for within ddb
4029  */
4030 
4031 DB_SHOW_COMMAND(pciregs, db_pci_dump)
4032 {
4033 	struct pci_devinfo *dinfo;
4034 	struct devlist *devlist_head;
4035 	struct pci_conf *p;
4036 	const char *name;
4037 	int i, error, none_count;
4038 
4039 	none_count = 0;
4040 	/* get the head of the device queue */
4041 	devlist_head = &pci_devq;
4042 
4043 	/*
4044 	 * Go through the list of devices and print out devices
4045 	 */
4046 	for (error = 0, i = 0,
4047 	     dinfo = STAILQ_FIRST(devlist_head);
4048 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4049 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4050 
4051 		/* Populate pd_name and pd_unit */
4052 		name = NULL;
4053 		if (dinfo->cfg.dev)
4054 			name = device_get_name(dinfo->cfg.dev);
4055 
4056 		p = &dinfo->conf;
4057 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4058 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4059 			(name && *name) ? name : "none",
4060 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4061 			none_count++,
4062 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4063 			p->pc_sel.pc_func, (p->pc_class << 16) |
4064 			(p->pc_subclass << 8) | p->pc_progif,
4065 			(p->pc_subdevice << 16) | p->pc_subvendor,
4066 			(p->pc_device << 16) | p->pc_vendor,
4067 			p->pc_revid, p->pc_hdr);
4068 	}
4069 }
4070 #endif /* DDB */
4071 
4072 static struct resource *
4073 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4074     u_long start, u_long end, u_long count, u_int flags)
4075 {
4076 	struct pci_devinfo *dinfo = device_get_ivars(child);
4077 	struct resource_list *rl = &dinfo->resources;
4078 	struct resource_list_entry *rle;
4079 	struct resource *res;
4080 	struct pci_map *pm;
4081 	pci_addr_t map, testval;
4082 	int mapsize;
4083 
4084 	res = NULL;
4085 	pm = pci_find_bar(child, *rid);
4086 	if (pm != NULL) {
4087 		/* This is a BAR that we failed to allocate earlier. */
4088 		mapsize = pm->pm_size;
4089 		map = pm->pm_value;
4090 	} else {
4091 		/*
4092 		 * Weed out the bogons, and figure out how large the
4093 		 * BAR/map is.  BARs that read back 0 here are bogus
4094 		 * and unimplemented.  Note: atapci in legacy mode are
4095 		 * special and handled elsewhere in the code.  If you
4096 		 * have a atapci device in legacy mode and it fails
4097 		 * here, that other code is broken.
4098 		 */
4099 		pci_read_bar(child, *rid, &map, &testval);
4100 
4101 		/*
4102 		 * Determine the size of the BAR and ignore BARs with a size
4103 		 * of 0.  Device ROM BARs use a different mask value.
4104 		 */
4105 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
4106 			mapsize = pci_romsize(testval);
4107 		else
4108 			mapsize = pci_mapsize(testval);
4109 		if (mapsize == 0)
4110 			goto out;
4111 		pm = pci_add_bar(child, *rid, map, mapsize);
4112 	}
4113 
4114 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
4115 		if (type != SYS_RES_MEMORY) {
4116 			if (bootverbose)
4117 				device_printf(dev,
4118 				    "child %s requested type %d for rid %#x,"
4119 				    " but the BAR says it is an memio\n",
4120 				    device_get_nameunit(child), type, *rid);
4121 			goto out;
4122 		}
4123 	} else {
4124 		if (type != SYS_RES_IOPORT) {
4125 			if (bootverbose)
4126 				device_printf(dev,
4127 				    "child %s requested type %d for rid %#x,"
4128 				    " but the BAR says it is an ioport\n",
4129 				    device_get_nameunit(child), type, *rid);
4130 			goto out;
4131 		}
4132 	}
4133 
4134 	/*
4135 	 * For real BARs, we need to override the size that
4136 	 * the driver requests, because that's what the BAR
4137 	 * actually uses and we would otherwise have a
4138 	 * situation where we might allocate the excess to
4139 	 * another driver, which won't work.
4140 	 */
4141 	count = (pci_addr_t)1 << mapsize;
4142 	if (RF_ALIGNMENT(flags) < mapsize)
4143 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
4144 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
4145 		flags |= RF_PREFETCHABLE;
4146 
4147 	/*
4148 	 * Allocate enough resource, and then write back the
4149 	 * appropriate BAR for that resource.
4150 	 */
4151 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
4152 	    start, end, count, flags & ~RF_ACTIVE);
4153 	if (res == NULL) {
4154 		device_printf(child,
4155 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4156 		    count, *rid, type, start, end);
4157 		goto out;
4158 	}
4159 	resource_list_add(rl, type, *rid, start, end, count);
4160 	rle = resource_list_find(rl, type, *rid);
4161 	if (rle == NULL)
4162 		panic("pci_reserve_map: unexpectedly can't find resource.");
4163 	rle->res = res;
4164 	rle->start = rman_get_start(res);
4165 	rle->end = rman_get_end(res);
4166 	rle->count = count;
4167 	rle->flags = RLE_RESERVED;
4168 	if (bootverbose)
4169 		device_printf(child,
4170 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4171 		    count, *rid, type, rman_get_start(res));
4172 	map = rman_get_start(res);
4173 	pci_write_bar(child, pm, map);
4174 out:;
4175 	return (res);
4176 }
4177 
4178 struct resource *
4179 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4180 		   u_long start, u_long end, u_long count, u_int flags)
4181 {
4182 	struct pci_devinfo *dinfo = device_get_ivars(child);
4183 	struct resource_list *rl = &dinfo->resources;
4184 	struct resource_list_entry *rle;
4185 	struct resource *res;
4186 	pcicfgregs *cfg = &dinfo->cfg;
4187 
4188 	if (device_get_parent(child) != dev)
4189 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4190 		    type, rid, start, end, count, flags));
4191 
4192 	/*
4193 	 * Perform lazy resource allocation
4194 	 */
4195 	switch (type) {
4196 	case SYS_RES_IRQ:
4197 		/*
4198 		 * Can't alloc legacy interrupt once MSI messages have
4199 		 * been allocated.
4200 		 */
4201 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4202 		    cfg->msix.msix_alloc > 0))
4203 			return (NULL);
4204 
4205 		/*
4206 		 * If the child device doesn't have an interrupt
4207 		 * routed and is deserving of an interrupt, try to
4208 		 * assign it one.
4209 		 */
4210 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4211 		    (cfg->intpin != 0))
4212 			pci_assign_interrupt(dev, child, 0);
4213 		break;
4214 	case SYS_RES_IOPORT:
4215 	case SYS_RES_MEMORY:
4216 #ifdef NEW_PCIB
4217 		/*
4218 		 * PCI-PCI bridge I/O window resources are not BARs.
4219 		 * For those allocations just pass the request up the
4220 		 * tree.
4221 		 */
4222 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4223 			switch (*rid) {
4224 			case PCIR_IOBASEL_1:
4225 			case PCIR_MEMBASE_1:
4226 			case PCIR_PMBASEL_1:
4227 				/*
4228 				 * XXX: Should we bother creating a resource
4229 				 * list entry?
4230 				 */
4231 				return (bus_generic_alloc_resource(dev, child,
4232 				    type, rid, start, end, count, flags));
4233 			}
4234 		}
4235 #endif
4236 		/* Reserve resources for this BAR if needed. */
4237 		rle = resource_list_find(rl, type, *rid);
4238 		if (rle == NULL) {
4239 			res = pci_reserve_map(dev, child, type, rid, start, end,
4240 			    count, flags);
4241 			if (res == NULL)
4242 				return (NULL);
4243 		}
4244 	}
4245 	return (resource_list_alloc(rl, dev, child, type, rid,
4246 	    start, end, count, flags));
4247 }
4248 
4249 int
4250 pci_activate_resource(device_t dev, device_t child, int type, int rid,
4251     struct resource *r)
4252 {
4253 	struct pci_devinfo *dinfo;
4254 	int error;
4255 
4256 	error = bus_generic_activate_resource(dev, child, type, rid, r);
4257 	if (error)
4258 		return (error);
4259 
4260 	/* Enable decoding in the command register when activating BARs. */
4261 	if (device_get_parent(child) == dev) {
4262 		/* Device ROMs need their decoding explicitly enabled. */
4263 		dinfo = device_get_ivars(child);
4264 		if (PCIR_IS_BIOS(&dinfo->cfg, rid))
4265 			pci_write_bar(child, pci_find_bar(child, rid),
4266 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4267 		switch (type) {
4268 		case SYS_RES_IOPORT:
4269 		case SYS_RES_MEMORY:
4270 			error = PCI_ENABLE_IO(dev, child, type);
4271 			break;
4272 		}
4273 	}
4274 	return (error);
4275 }
4276 
4277 int
4278 pci_deactivate_resource(device_t dev, device_t child, int type,
4279     int rid, struct resource *r)
4280 {
4281 	struct pci_devinfo *dinfo;
4282 	int error;
4283 
4284 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4285 	if (error)
4286 		return (error);
4287 
4288 	/* Disable decoding for device ROMs. */
4289 	if (device_get_parent(child) == dev) {
4290 		dinfo = device_get_ivars(child);
4291 		if (PCIR_IS_BIOS(&dinfo->cfg, rid))
4292 			pci_write_bar(child, pci_find_bar(child, rid),
4293 			    rman_get_start(r));
4294 	}
4295 	return (0);
4296 }
4297 
4298 void
4299 pci_delete_child(device_t dev, device_t child)
4300 {
4301 	struct resource_list_entry *rle;
4302 	struct resource_list *rl;
4303 	struct pci_devinfo *dinfo;
4304 
4305 	dinfo = device_get_ivars(child);
4306 	rl = &dinfo->resources;
4307 
4308 	if (device_is_attached(child))
4309 		device_detach(child);
4310 
4311 	/* Turn off access to resources we're about to free */
4312 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4313 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4314 
4315 	/* Free all allocated resources */
4316 	STAILQ_FOREACH(rle, rl, link) {
4317 		if (rle->res) {
4318 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4319 			    resource_list_busy(rl, rle->type, rle->rid)) {
4320 				pci_printf(&dinfo->cfg,
4321 				    "Resource still owned, oops. "
4322 				    "(type=%d, rid=%d, addr=%lx)\n",
4323 				    rle->type, rle->rid,
4324 				    rman_get_start(rle->res));
4325 				bus_release_resource(child, rle->type, rle->rid,
4326 				    rle->res);
4327 			}
4328 			resource_list_unreserve(rl, dev, child, rle->type,
4329 			    rle->rid);
4330 		}
4331 	}
4332 	resource_list_free(rl);
4333 
4334 	device_delete_child(dev, child);
4335 	pci_freecfg(dinfo);
4336 }
4337 
4338 void
4339 pci_delete_resource(device_t dev, device_t child, int type, int rid)
4340 {
4341 	struct pci_devinfo *dinfo;
4342 	struct resource_list *rl;
4343 	struct resource_list_entry *rle;
4344 
4345 	if (device_get_parent(child) != dev)
4346 		return;
4347 
4348 	dinfo = device_get_ivars(child);
4349 	rl = &dinfo->resources;
4350 	rle = resource_list_find(rl, type, rid);
4351 	if (rle == NULL)
4352 		return;
4353 
4354 	if (rle->res) {
4355 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4356 		    resource_list_busy(rl, type, rid)) {
4357 			device_printf(dev, "delete_resource: "
4358 			    "Resource still owned by child, oops. "
4359 			    "(type=%d, rid=%d, addr=%lx)\n",
4360 			    type, rid, rman_get_start(rle->res));
4361 			return;
4362 		}
4363 
4364 #ifndef __PCI_BAR_ZERO_VALID
4365 		/*
4366 		 * If this is a BAR, clear the BAR so it stops
4367 		 * decoding before releasing the resource.
4368 		 */
4369 		switch (type) {
4370 		case SYS_RES_IOPORT:
4371 		case SYS_RES_MEMORY:
4372 			pci_write_bar(child, pci_find_bar(child, rid), 0);
4373 			break;
4374 		}
4375 #endif
4376 		resource_list_unreserve(rl, dev, child, type, rid);
4377 	}
4378 	resource_list_delete(rl, type, rid);
4379 }
4380 
4381 struct resource_list *
4382 pci_get_resource_list (device_t dev, device_t child)
4383 {
4384 	struct pci_devinfo *dinfo = device_get_ivars(child);
4385 
4386 	return (&dinfo->resources);
4387 }
4388 
4389 bus_dma_tag_t
4390 pci_get_dma_tag(device_t bus, device_t dev)
4391 {
4392 	struct pci_softc *sc = device_get_softc(bus);
4393 
4394 	return (sc->sc_dma_tag);
4395 }
4396 
4397 uint32_t
4398 pci_read_config_method(device_t dev, device_t child, int reg, int width)
4399 {
4400 	struct pci_devinfo *dinfo = device_get_ivars(child);
4401 	pcicfgregs *cfg = &dinfo->cfg;
4402 
4403 	return (PCIB_READ_CONFIG(device_get_parent(dev),
4404 	    cfg->bus, cfg->slot, cfg->func, reg, width));
4405 }
4406 
4407 void
4408 pci_write_config_method(device_t dev, device_t child, int reg,
4409     uint32_t val, int width)
4410 {
4411 	struct pci_devinfo *dinfo = device_get_ivars(child);
4412 	pcicfgregs *cfg = &dinfo->cfg;
4413 
4414 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4415 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4416 }
4417 
4418 int
4419 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4420     size_t buflen)
4421 {
4422 
4423 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4424 	    pci_get_function(child));
4425 	return (0);
4426 }
4427 
4428 int
4429 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4430     size_t buflen)
4431 {
4432 	struct pci_devinfo *dinfo;
4433 	pcicfgregs *cfg;
4434 
4435 	dinfo = device_get_ivars(child);
4436 	cfg = &dinfo->cfg;
4437 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4438 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4439 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4440 	    cfg->progif);
4441 	return (0);
4442 }
4443 
4444 int
4445 pci_assign_interrupt_method(device_t dev, device_t child)
4446 {
4447 	struct pci_devinfo *dinfo = device_get_ivars(child);
4448 	pcicfgregs *cfg = &dinfo->cfg;
4449 
4450 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4451 	    cfg->intpin));
4452 }
4453 
4454 static int
4455 pci_modevent(module_t mod, int what, void *arg)
4456 {
4457 	static struct cdev *pci_cdev;
4458 
4459 	switch (what) {
4460 	case MOD_LOAD:
4461 		STAILQ_INIT(&pci_devq);
4462 		pci_generation = 0;
4463 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4464 		    "pci");
4465 		pci_load_vendor_data();
4466 		break;
4467 
4468 	case MOD_UNLOAD:
4469 		destroy_dev(pci_cdev);
4470 		break;
4471 	}
4472 
4473 	return (0);
4474 }
4475 
4476 static void
4477 pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
4478 {
4479 #define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
4480 	struct pcicfg_pcie *cfg;
4481 	int version, pos;
4482 
4483 	cfg = &dinfo->cfg.pcie;
4484 	pos = cfg->pcie_location;
4485 
4486 	version = cfg->pcie_flags & PCIM_EXP_FLAGS_VERSION;
4487 
4488 	WREG(PCIR_EXPRESS_DEVICE_CTL, cfg->pcie_device_ctl);
4489 
4490 	if (version > 1 || cfg->pcie_type == PCIM_EXP_TYPE_ROOT_PORT ||
4491 	    cfg->pcie_type == PCIM_EXP_TYPE_ENDPOINT ||
4492 	    cfg->pcie_type == PCIM_EXP_TYPE_LEGACY_ENDPOINT)
4493 		WREG(PCIR_EXPRESS_LINK_CTL, cfg->pcie_link_ctl);
4494 
4495 	if (version > 1 || (cfg->pcie_type == PCIM_EXP_TYPE_ROOT_PORT ||
4496 	    (cfg->pcie_type == PCIM_EXP_TYPE_DOWNSTREAM_PORT &&
4497 	     (cfg->pcie_flags & PCIM_EXP_FLAGS_SLOT))))
4498 		WREG(PCIR_EXPRESS_SLOT_CTL, cfg->pcie_slot_ctl);
4499 
4500 	if (version > 1 || cfg->pcie_type == PCIM_EXP_TYPE_ROOT_PORT ||
4501 	    cfg->pcie_type == PCIM_EXP_TYPE_ROOT_EC)
4502 		WREG(PCIR_EXPRESS_ROOT_CTL, cfg->pcie_root_ctl);
4503 
4504 	if (version > 1) {
4505 		WREG(PCIR_EXPRESS_DEVICE_CTL2, cfg->pcie_device_ctl2);
4506 		WREG(PCIR_EXPRESS_LINK_CTL2, cfg->pcie_link_ctl2);
4507 		WREG(PCIR_EXPRESS_SLOT_CTL2, cfg->pcie_slot_ctl2);
4508 	}
4509 #undef WREG
4510 }
4511 
4512 static void
4513 pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
4514 {
4515 	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
4516 	    dinfo->cfg.pcix.pcix_command,  2);
4517 }
4518 
4519 void
4520 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4521 {
4522 
4523 	/*
4524 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4525 	 * which we know need special treatment.  Type 2 devices are
4526 	 * cardbus bridges which also require special treatment.
4527 	 * Other types are unknown, and we err on the side of safety
4528 	 * by ignoring them.
4529 	 */
4530 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4531 		return;
4532 
4533 	/*
4534 	 * Restore the device to full power mode.  We must do this
4535 	 * before we restore the registers because moving from D3 to
4536 	 * D0 will cause the chip's BARs and some other registers to
4537 	 * be reset to some unknown power on reset values.  Cut down
4538 	 * the noise on boot by doing nothing if we are already in
4539 	 * state D0.
4540 	 */
4541 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4542 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4543 	pci_restore_bars(dev);
4544 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4545 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4546 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4547 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4548 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4549 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4550 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4551 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4552 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4553 
4554 	/*
4555 	 * Restore extended capabilities for PCI-Express and PCI-X
4556 	 */
4557 	if (dinfo->cfg.pcie.pcie_location != 0)
4558 		pci_cfg_restore_pcie(dev, dinfo);
4559 	if (dinfo->cfg.pcix.pcix_location != 0)
4560 		pci_cfg_restore_pcix(dev, dinfo);
4561 
4562 	/* Restore MSI and MSI-X configurations if they are present. */
4563 	if (dinfo->cfg.msi.msi_location != 0)
4564 		pci_resume_msi(dev);
4565 	if (dinfo->cfg.msix.msix_location != 0)
4566 		pci_resume_msix(dev);
4567 }
4568 
4569 static void
4570 pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
4571 {
4572 #define	RREG(n)	pci_read_config(dev, pos + (n), 2)
4573 	struct pcicfg_pcie *cfg;
4574 	int version, pos;
4575 
4576 	cfg = &dinfo->cfg.pcie;
4577 	pos = cfg->pcie_location;
4578 
4579 	cfg->pcie_flags = RREG(PCIR_EXPRESS_FLAGS);
4580 
4581 	version = cfg->pcie_flags & PCIM_EXP_FLAGS_VERSION;
4582 
4583 	cfg->pcie_device_ctl = RREG(PCIR_EXPRESS_DEVICE_CTL);
4584 
4585 	if (version > 1 || cfg->pcie_type == PCIM_EXP_TYPE_ROOT_PORT ||
4586 	    cfg->pcie_type == PCIM_EXP_TYPE_ENDPOINT ||
4587 	    cfg->pcie_type == PCIM_EXP_TYPE_LEGACY_ENDPOINT)
4588 		cfg->pcie_link_ctl = RREG(PCIR_EXPRESS_LINK_CTL);
4589 
4590 	if (version > 1 || (cfg->pcie_type == PCIM_EXP_TYPE_ROOT_PORT ||
4591 	    (cfg->pcie_type == PCIM_EXP_TYPE_DOWNSTREAM_PORT &&
4592 	     (cfg->pcie_flags & PCIM_EXP_FLAGS_SLOT))))
4593 		cfg->pcie_slot_ctl = RREG(PCIR_EXPRESS_SLOT_CTL);
4594 
4595 	if (version > 1 || cfg->pcie_type == PCIM_EXP_TYPE_ROOT_PORT ||
4596 	    cfg->pcie_type == PCIM_EXP_TYPE_ROOT_EC)
4597 		cfg->pcie_root_ctl = RREG(PCIR_EXPRESS_ROOT_CTL);
4598 
4599 	if (version > 1) {
4600 		cfg->pcie_device_ctl2 = RREG(PCIR_EXPRESS_DEVICE_CTL2);
4601 		cfg->pcie_link_ctl2 = RREG(PCIR_EXPRESS_LINK_CTL2);
4602 		cfg->pcie_slot_ctl2 = RREG(PCIR_EXPRESS_SLOT_CTL2);
4603 	}
4604 #undef RREG
4605 }
4606 
4607 static void
4608 pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
4609 {
4610 	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
4611 	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
4612 }
4613 
4614 void
4615 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4616 {
4617 	uint32_t cls;
4618 	int ps;
4619 
4620 	/*
4621 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4622 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4623 	 * which also require special treatment.  Other types are unknown, and
4624 	 * we err on the side of safety by ignoring them.  Powering down
4625 	 * bridges should not be undertaken lightly.
4626 	 */
4627 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4628 		return;
4629 
4630 	/*
4631 	 * Some drivers apparently write to these registers w/o updating our
4632 	 * cached copy.  No harm happens if we update the copy, so do so here
4633 	 * so we can restore them.  The COMMAND register is modified by the
4634 	 * bus w/o updating the cache.  This should represent the normally
4635 	 * writable portion of the 'defined' part of type 0 headers.  In
4636 	 * theory we also need to save/restore the PCI capability structures
4637 	 * we know about, but apart from power we don't know any that are
4638 	 * writable.
4639 	 */
4640 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4641 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4642 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4643 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4644 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4645 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4646 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4647 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4648 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4649 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4650 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4651 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4652 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4653 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4654 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4655 
4656 	if (dinfo->cfg.pcie.pcie_location != 0)
4657 		pci_cfg_save_pcie(dev, dinfo);
4658 
4659 	if (dinfo->cfg.pcix.pcix_location != 0)
4660 		pci_cfg_save_pcix(dev, dinfo);
4661 
4662 	/*
4663 	 * don't set the state for display devices, base peripherals and
4664 	 * memory devices since bad things happen when they are powered down.
4665 	 * We should (a) have drivers that can easily detach and (b) use
4666 	 * generic drivers for these devices so that some device actually
4667 	 * attaches.  We need to make sure that when we implement (a) we don't
4668 	 * power the device down on a reattach.
4669 	 */
4670 	cls = pci_get_class(dev);
4671 	if (!setstate)
4672 		return;
4673 	switch (pci_do_power_nodriver)
4674 	{
4675 		case 0:		/* NO powerdown at all */
4676 			return;
4677 		case 1:		/* Conservative about what to power down */
4678 			if (cls == PCIC_STORAGE)
4679 				return;
4680 			/*FALLTHROUGH*/
4681 		case 2:		/* Agressive about what to power down */
4682 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4683 			    cls == PCIC_BASEPERIPH)
4684 				return;
4685 			/*FALLTHROUGH*/
4686 		case 3:		/* Power down everything */
4687 			break;
4688 	}
4689 	/*
4690 	 * PCI spec says we can only go into D3 state from D0 state.
4691 	 * Transition from D[12] into D0 before going to D3 state.
4692 	 */
4693 	ps = pci_get_powerstate(dev);
4694 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4695 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4696 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4697 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4698 }
4699 
4700 /* Wrapper APIs suitable for device driver use. */
4701 void
4702 pci_save_state(device_t dev)
4703 {
4704 	struct pci_devinfo *dinfo;
4705 
4706 	dinfo = device_get_ivars(dev);
4707 	pci_cfg_save(dev, dinfo, 0);
4708 }
4709 
4710 void
4711 pci_restore_state(device_t dev)
4712 {
4713 	struct pci_devinfo *dinfo;
4714 
4715 	dinfo = device_get_ivars(dev);
4716 	pci_cfg_restore(dev, dinfo);
4717 }
4718