xref: /freebsd/sys/dev/pci/pci.c (revision c243e4902be8df1e643c76b5f18b68bb77cc5268)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 
56 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57 #include <machine/intr_machdep.h>
58 #endif
59 
60 #include <sys/pciio.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/pci/pci_private.h>
64 
65 #include <dev/usb/controller/xhcireg.h>
66 #include <dev/usb/controller/ehcireg.h>
67 #include <dev/usb/controller/ohcireg.h>
68 #include <dev/usb/controller/uhcireg.h>
69 
70 #include "pcib_if.h"
71 #include "pci_if.h"
72 
73 #if (BUS_SPACE_MAXADDR > 0xFFFFFFFF)
74 #define	PCI_DMA_BOUNDARY	0x100000000
75 #endif
76 
77 #define	PCIR_IS_BIOS(cfg, reg)						\
78 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
79 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
80 
81 static pci_addr_t	pci_mapbase(uint64_t mapreg);
82 static const char	*pci_maptype(uint64_t mapreg);
83 static int		pci_mapsize(uint64_t testval);
84 static int		pci_maprange(uint64_t mapreg);
85 static pci_addr_t	pci_rombase(uint64_t mapreg);
86 static int		pci_romsize(uint64_t testval);
87 static void		pci_fixancient(pcicfgregs *cfg);
88 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
89 
90 static int		pci_porten(device_t dev);
91 static int		pci_memen(device_t dev);
92 static void		pci_assign_interrupt(device_t bus, device_t dev,
93 			    int force_route);
94 static int		pci_add_map(device_t bus, device_t dev, int reg,
95 			    struct resource_list *rl, int force, int prefetch);
96 static int		pci_probe(device_t dev);
97 static int		pci_attach(device_t dev);
98 static void		pci_load_vendor_data(void);
99 static int		pci_describe_parse_line(char **ptr, int *vendor,
100 			    int *device, char **desc);
101 static char		*pci_describe_device(device_t dev);
102 static bus_dma_tag_t	pci_get_dma_tag(device_t bus, device_t dev);
103 static int		pci_modevent(module_t mod, int what, void *arg);
104 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
105 			    pcicfgregs *cfg);
106 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
107 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
108 			    int reg, uint32_t *data);
109 #if 0
110 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
111 			    int reg, uint32_t data);
112 #endif
113 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
114 static void		pci_disable_msi(device_t dev);
115 static void		pci_enable_msi(device_t dev, uint64_t address,
116 			    uint16_t data);
117 static void		pci_enable_msix(device_t dev, u_int index,
118 			    uint64_t address, uint32_t data);
119 static void		pci_mask_msix(device_t dev, u_int index);
120 static void		pci_unmask_msix(device_t dev, u_int index);
121 static int		pci_msi_blacklisted(void);
122 static void		pci_resume_msi(device_t dev);
123 static void		pci_resume_msix(device_t dev);
124 static int		pci_remap_intr_method(device_t bus, device_t dev,
125 			    u_int irq);
126 
127 static device_method_t pci_methods[] = {
128 	/* Device interface */
129 	DEVMETHOD(device_probe,		pci_probe),
130 	DEVMETHOD(device_attach,	pci_attach),
131 	DEVMETHOD(device_detach,	bus_generic_detach),
132 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
133 	DEVMETHOD(device_suspend,	pci_suspend),
134 	DEVMETHOD(device_resume,	pci_resume),
135 
136 	/* Bus interface */
137 	DEVMETHOD(bus_print_child,	pci_print_child),
138 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
139 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
140 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
141 	DEVMETHOD(bus_driver_added,	pci_driver_added),
142 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
143 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
144 
145 	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
146 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
147 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
148 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
149 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
150 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
151 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
152 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
153 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
154 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
155 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
156 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
157 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
158 
159 	/* PCI interface */
160 	DEVMETHOD(pci_read_config,	pci_read_config_method),
161 	DEVMETHOD(pci_write_config,	pci_write_config_method),
162 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
163 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
164 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
165 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
166 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
167 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
168 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
169 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
170 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
171 	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
172 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
173 	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
174 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
175 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
176 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
177 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
178 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
179 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
180 
181 	DEVMETHOD_END
182 };
183 
184 DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
185 
186 static devclass_t pci_devclass;
187 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
188 MODULE_VERSION(pci, 1);
189 
190 static char	*pci_vendordata;
191 static size_t	pci_vendordata_size;
192 
193 struct pci_quirk {
194 	uint32_t devid;	/* Vendor/device of the card */
195 	int	type;
196 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
197 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
198 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
199 #define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
200 	int	arg1;
201 	int	arg2;
202 };
203 
204 static const struct pci_quirk const pci_quirks[] = {
205 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
206 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
207 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
208 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
209 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
210 
211 	/*
212 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
213 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
214 	 */
215 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
216 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217 
218 	/*
219 	 * MSI doesn't work on earlier Intel chipsets including
220 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
221 	 */
222 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
223 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
224 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
225 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
226 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
227 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
228 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
229 
230 	/*
231 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
232 	 * bridge.
233 	 */
234 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
235 
236 	/*
237 	 * MSI-X doesn't work with at least LSI SAS1068E passed through by
238 	 * VMware.
239 	 */
240 	{ 0x079015ad, PCI_QUIRK_DISABLE_MSI,	0,	0 },
241 
242 	/*
243 	 * Some virtualization environments emulate an older chipset
244 	 * but support MSI just fine.  QEMU uses the Intel 82440.
245 	 */
246 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
247 
248 	/*
249 	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
250 	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
251 	 * It prevents us from attaching hpet(4) when the bit is unset.
252 	 * Note this quirk only affects SB600 revision A13 and earlier.
253 	 * For SB600 A21 and later, firmware must set the bit to hide it.
254 	 * For SB700 and later, it is unused and hardcoded to zero.
255 	 */
256 	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
257 
258 	{ 0 }
259 };
260 
261 /* map register information */
262 #define	PCI_MAPMEM	0x01	/* memory map */
263 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
264 #define	PCI_MAPPORT	0x04	/* port map */
265 
266 struct devlist pci_devq;
267 uint32_t pci_generation;
268 uint32_t pci_numdevs = 0;
269 static int pcie_chipset, pcix_chipset;
270 
271 /* sysctl vars */
272 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
273 
274 static int pci_enable_io_modes = 1;
275 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
276 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
277     &pci_enable_io_modes, 1,
278     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
279 enable these bits correctly.  We'd like to do this all the time, but there\n\
280 are some peripherals that this causes problems with.");
281 
282 static int pci_do_power_nodriver = 0;
283 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
284 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
285     &pci_do_power_nodriver, 0,
286   "Place a function into D3 state when no driver attaches to it.  0 means\n\
287 disable.  1 means conservatively place devices into D3 state.  2 means\n\
288 agressively place devices into D3 state.  3 means put absolutely everything\n\
289 in D3 state.");
290 
291 int pci_do_power_resume = 1;
292 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
293 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
294     &pci_do_power_resume, 1,
295   "Transition from D3 -> D0 on resume.");
296 
297 int pci_do_power_suspend = 1;
298 TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
299 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
300     &pci_do_power_suspend, 1,
301   "Transition from D0 -> D3 on suspend.");
302 
303 static int pci_do_msi = 1;
304 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
305 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
306     "Enable support for MSI interrupts");
307 
308 static int pci_do_msix = 1;
309 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
310 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
311     "Enable support for MSI-X interrupts");
312 
313 static int pci_honor_msi_blacklist = 1;
314 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
315 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
316     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
317 
318 #if defined(__i386__) || defined(__amd64__)
319 static int pci_usb_takeover = 1;
320 #else
321 static int pci_usb_takeover = 0;
322 #endif
323 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
324 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
325     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
326 Disable this if you depend on BIOS emulation of USB devices, that is\n\
327 you use USB devices (like keyboard or mouse) but do not load USB drivers");
328 
329 /* Find a device_t by bus/slot/function in domain 0 */
330 
331 device_t
332 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
333 {
334 
335 	return (pci_find_dbsf(0, bus, slot, func));
336 }
337 
338 /* Find a device_t by domain/bus/slot/function */
339 
340 device_t
341 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
342 {
343 	struct pci_devinfo *dinfo;
344 
345 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
346 		if ((dinfo->cfg.domain == domain) &&
347 		    (dinfo->cfg.bus == bus) &&
348 		    (dinfo->cfg.slot == slot) &&
349 		    (dinfo->cfg.func == func)) {
350 			return (dinfo->cfg.dev);
351 		}
352 	}
353 
354 	return (NULL);
355 }
356 
357 /* Find a device_t by vendor/device ID */
358 
359 device_t
360 pci_find_device(uint16_t vendor, uint16_t device)
361 {
362 	struct pci_devinfo *dinfo;
363 
364 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
365 		if ((dinfo->cfg.vendor == vendor) &&
366 		    (dinfo->cfg.device == device)) {
367 			return (dinfo->cfg.dev);
368 		}
369 	}
370 
371 	return (NULL);
372 }
373 
374 device_t
375 pci_find_class(uint8_t class, uint8_t subclass)
376 {
377 	struct pci_devinfo *dinfo;
378 
379 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
380 		if (dinfo->cfg.baseclass == class &&
381 		    dinfo->cfg.subclass == subclass) {
382 			return (dinfo->cfg.dev);
383 		}
384 	}
385 
386 	return (NULL);
387 }
388 
389 static int
390 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
391 {
392 	va_list ap;
393 	int retval;
394 
395 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
396 	    cfg->func);
397 	va_start(ap, fmt);
398 	retval += vprintf(fmt, ap);
399 	va_end(ap);
400 	return (retval);
401 }
402 
403 /* return base address of memory or port map */
404 
405 static pci_addr_t
406 pci_mapbase(uint64_t mapreg)
407 {
408 
409 	if (PCI_BAR_MEM(mapreg))
410 		return (mapreg & PCIM_BAR_MEM_BASE);
411 	else
412 		return (mapreg & PCIM_BAR_IO_BASE);
413 }
414 
415 /* return map type of memory or port map */
416 
417 static const char *
418 pci_maptype(uint64_t mapreg)
419 {
420 
421 	if (PCI_BAR_IO(mapreg))
422 		return ("I/O Port");
423 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
424 		return ("Prefetchable Memory");
425 	return ("Memory");
426 }
427 
428 /* return log2 of map size decoded for memory or port map */
429 
430 static int
431 pci_mapsize(uint64_t testval)
432 {
433 	int ln2size;
434 
435 	testval = pci_mapbase(testval);
436 	ln2size = 0;
437 	if (testval != 0) {
438 		while ((testval & 1) == 0)
439 		{
440 			ln2size++;
441 			testval >>= 1;
442 		}
443 	}
444 	return (ln2size);
445 }
446 
447 /* return base address of device ROM */
448 
449 static pci_addr_t
450 pci_rombase(uint64_t mapreg)
451 {
452 
453 	return (mapreg & PCIM_BIOS_ADDR_MASK);
454 }
455 
456 /* return log2 of map size decided for device ROM */
457 
458 static int
459 pci_romsize(uint64_t testval)
460 {
461 	int ln2size;
462 
463 	testval = pci_rombase(testval);
464 	ln2size = 0;
465 	if (testval != 0) {
466 		while ((testval & 1) == 0)
467 		{
468 			ln2size++;
469 			testval >>= 1;
470 		}
471 	}
472 	return (ln2size);
473 }
474 
475 /* return log2 of address range supported by map register */
476 
477 static int
478 pci_maprange(uint64_t mapreg)
479 {
480 	int ln2range = 0;
481 
482 	if (PCI_BAR_IO(mapreg))
483 		ln2range = 32;
484 	else
485 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
486 		case PCIM_BAR_MEM_32:
487 			ln2range = 32;
488 			break;
489 		case PCIM_BAR_MEM_1MB:
490 			ln2range = 20;
491 			break;
492 		case PCIM_BAR_MEM_64:
493 			ln2range = 64;
494 			break;
495 		}
496 	return (ln2range);
497 }
498 
499 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
500 
501 static void
502 pci_fixancient(pcicfgregs *cfg)
503 {
504 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
505 		return;
506 
507 	/* PCI to PCI bridges use header type 1 */
508 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
509 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
510 }
511 
512 /* extract header type specific config data */
513 
514 static void
515 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
516 {
517 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
518 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
519 	case PCIM_HDRTYPE_NORMAL:
520 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
521 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
522 		cfg->nummaps	    = PCI_MAXMAPS_0;
523 		break;
524 	case PCIM_HDRTYPE_BRIDGE:
525 		cfg->nummaps	    = PCI_MAXMAPS_1;
526 		break;
527 	case PCIM_HDRTYPE_CARDBUS:
528 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
529 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
530 		cfg->nummaps	    = PCI_MAXMAPS_2;
531 		break;
532 	}
533 #undef REG
534 }
535 
536 /* read configuration header into pcicfgregs structure */
537 struct pci_devinfo *
538 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
539 {
540 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
541 	pcicfgregs *cfg = NULL;
542 	struct pci_devinfo *devlist_entry;
543 	struct devlist *devlist_head;
544 
545 	devlist_head = &pci_devq;
546 
547 	devlist_entry = NULL;
548 
549 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
550 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
551 		if (devlist_entry == NULL)
552 			return (NULL);
553 
554 		cfg = &devlist_entry->cfg;
555 
556 		cfg->domain		= d;
557 		cfg->bus		= b;
558 		cfg->slot		= s;
559 		cfg->func		= f;
560 		cfg->vendor		= REG(PCIR_VENDOR, 2);
561 		cfg->device		= REG(PCIR_DEVICE, 2);
562 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
563 		cfg->statreg		= REG(PCIR_STATUS, 2);
564 		cfg->baseclass		= REG(PCIR_CLASS, 1);
565 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
566 		cfg->progif		= REG(PCIR_PROGIF, 1);
567 		cfg->revid		= REG(PCIR_REVID, 1);
568 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
569 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
570 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
571 		cfg->intpin		= REG(PCIR_INTPIN, 1);
572 		cfg->intline		= REG(PCIR_INTLINE, 1);
573 
574 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
575 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
576 
577 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
578 		cfg->hdrtype		&= ~PCIM_MFDEV;
579 		STAILQ_INIT(&cfg->maps);
580 
581 		pci_fixancient(cfg);
582 		pci_hdrtypedata(pcib, b, s, f, cfg);
583 
584 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
585 			pci_read_cap(pcib, cfg);
586 
587 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
588 
589 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
590 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
591 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
592 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
593 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
594 
595 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
596 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
597 		devlist_entry->conf.pc_vendor = cfg->vendor;
598 		devlist_entry->conf.pc_device = cfg->device;
599 
600 		devlist_entry->conf.pc_class = cfg->baseclass;
601 		devlist_entry->conf.pc_subclass = cfg->subclass;
602 		devlist_entry->conf.pc_progif = cfg->progif;
603 		devlist_entry->conf.pc_revid = cfg->revid;
604 
605 		pci_numdevs++;
606 		pci_generation++;
607 	}
608 	return (devlist_entry);
609 #undef REG
610 }
611 
612 static void
613 pci_read_cap(device_t pcib, pcicfgregs *cfg)
614 {
615 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
616 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
617 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
618 	uint64_t addr;
619 #endif
620 	uint32_t val;
621 	int	ptr, nextptr, ptrptr;
622 
623 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
624 	case PCIM_HDRTYPE_NORMAL:
625 	case PCIM_HDRTYPE_BRIDGE:
626 		ptrptr = PCIR_CAP_PTR;
627 		break;
628 	case PCIM_HDRTYPE_CARDBUS:
629 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
630 		break;
631 	default:
632 		return;		/* no extended capabilities support */
633 	}
634 	nextptr = REG(ptrptr, 1);	/* sanity check? */
635 
636 	/*
637 	 * Read capability entries.
638 	 */
639 	while (nextptr != 0) {
640 		/* Sanity check */
641 		if (nextptr > 255) {
642 			printf("illegal PCI extended capability offset %d\n",
643 			    nextptr);
644 			return;
645 		}
646 		/* Find the next entry */
647 		ptr = nextptr;
648 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
649 
650 		/* Process this entry */
651 		switch (REG(ptr + PCICAP_ID, 1)) {
652 		case PCIY_PMG:		/* PCI power management */
653 			if (cfg->pp.pp_cap == 0) {
654 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
655 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
656 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
657 				if ((nextptr - ptr) > PCIR_POWER_DATA)
658 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
659 			}
660 			break;
661 		case PCIY_HT:		/* HyperTransport */
662 			/* Determine HT-specific capability type. */
663 			val = REG(ptr + PCIR_HT_COMMAND, 2);
664 
665 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
666 				cfg->ht.ht_slave = ptr;
667 
668 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
669 			switch (val & PCIM_HTCMD_CAP_MASK) {
670 			case PCIM_HTCAP_MSI_MAPPING:
671 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
672 					/* Sanity check the mapping window. */
673 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
674 					    4);
675 					addr <<= 32;
676 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
677 					    4);
678 					if (addr != MSI_INTEL_ADDR_BASE)
679 						device_printf(pcib,
680 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
681 						    cfg->domain, cfg->bus,
682 						    cfg->slot, cfg->func,
683 						    (long long)addr);
684 				} else
685 					addr = MSI_INTEL_ADDR_BASE;
686 
687 				cfg->ht.ht_msimap = ptr;
688 				cfg->ht.ht_msictrl = val;
689 				cfg->ht.ht_msiaddr = addr;
690 				break;
691 			}
692 #endif
693 			break;
694 		case PCIY_MSI:		/* PCI MSI */
695 			cfg->msi.msi_location = ptr;
696 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
697 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
698 						     PCIM_MSICTRL_MMC_MASK)>>1);
699 			break;
700 		case PCIY_MSIX:		/* PCI MSI-X */
701 			cfg->msix.msix_location = ptr;
702 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
703 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
704 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
705 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
706 			cfg->msix.msix_table_bar = PCIR_BAR(val &
707 			    PCIM_MSIX_BIR_MASK);
708 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
709 			val = REG(ptr + PCIR_MSIX_PBA, 4);
710 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
711 			    PCIM_MSIX_BIR_MASK);
712 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
713 			break;
714 		case PCIY_VPD:		/* PCI Vital Product Data */
715 			cfg->vpd.vpd_reg = ptr;
716 			break;
717 		case PCIY_SUBVENDOR:
718 			/* Should always be true. */
719 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
720 			    PCIM_HDRTYPE_BRIDGE) {
721 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
722 				cfg->subvendor = val & 0xffff;
723 				cfg->subdevice = val >> 16;
724 			}
725 			break;
726 		case PCIY_PCIX:		/* PCI-X */
727 			/*
728 			 * Assume we have a PCI-X chipset if we have
729 			 * at least one PCI-PCI bridge with a PCI-X
730 			 * capability.  Note that some systems with
731 			 * PCI-express or HT chipsets might match on
732 			 * this check as well.
733 			 */
734 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
735 			    PCIM_HDRTYPE_BRIDGE)
736 				pcix_chipset = 1;
737 			cfg->pcix.pcix_location = ptr;
738 			break;
739 		case PCIY_EXPRESS:	/* PCI-express */
740 			/*
741 			 * Assume we have a PCI-express chipset if we have
742 			 * at least one PCI-express device.
743 			 */
744 			pcie_chipset = 1;
745 			cfg->pcie.pcie_location = ptr;
746 			val = REG(ptr + PCIR_EXPRESS_FLAGS, 2);
747 			cfg->pcie.pcie_type = val & PCIM_EXP_FLAGS_TYPE;
748 			break;
749 		default:
750 			break;
751 		}
752 	}
753 
754 #if defined(__powerpc__)
755 	/*
756 	 * Enable the MSI mapping window for all HyperTransport
757 	 * slaves.  PCI-PCI bridges have their windows enabled via
758 	 * PCIB_MAP_MSI().
759 	 */
760 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
761 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
762 		device_printf(pcib,
763 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
764 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
765 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
766 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
767 		     2);
768 	}
769 #endif
770 /* REG and WREG use carry through to next functions */
771 }
772 
773 /*
774  * PCI Vital Product Data
775  */
776 
777 #define	PCI_VPD_TIMEOUT		1000000
778 
779 static int
780 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
781 {
782 	int count = PCI_VPD_TIMEOUT;
783 
784 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
785 
786 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
787 
788 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
789 		if (--count < 0)
790 			return (ENXIO);
791 		DELAY(1);	/* limit looping */
792 	}
793 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
794 
795 	return (0);
796 }
797 
798 #if 0
799 static int
800 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
801 {
802 	int count = PCI_VPD_TIMEOUT;
803 
804 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
805 
806 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
807 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
808 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
809 		if (--count < 0)
810 			return (ENXIO);
811 		DELAY(1);	/* limit looping */
812 	}
813 
814 	return (0);
815 }
816 #endif
817 
818 #undef PCI_VPD_TIMEOUT
819 
820 struct vpd_readstate {
821 	device_t	pcib;
822 	pcicfgregs	*cfg;
823 	uint32_t	val;
824 	int		bytesinval;
825 	int		off;
826 	uint8_t		cksum;
827 };
828 
829 static int
830 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
831 {
832 	uint32_t reg;
833 	uint8_t byte;
834 
835 	if (vrs->bytesinval == 0) {
836 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
837 			return (ENXIO);
838 		vrs->val = le32toh(reg);
839 		vrs->off += 4;
840 		byte = vrs->val & 0xff;
841 		vrs->bytesinval = 3;
842 	} else {
843 		vrs->val = vrs->val >> 8;
844 		byte = vrs->val & 0xff;
845 		vrs->bytesinval--;
846 	}
847 
848 	vrs->cksum += byte;
849 	*data = byte;
850 	return (0);
851 }
852 
853 static void
854 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
855 {
856 	struct vpd_readstate vrs;
857 	int state;
858 	int name;
859 	int remain;
860 	int i;
861 	int alloc, off;		/* alloc/off for RO/W arrays */
862 	int cksumvalid;
863 	int dflen;
864 	uint8_t byte;
865 	uint8_t byte2;
866 
867 	/* init vpd reader */
868 	vrs.bytesinval = 0;
869 	vrs.off = 0;
870 	vrs.pcib = pcib;
871 	vrs.cfg = cfg;
872 	vrs.cksum = 0;
873 
874 	state = 0;
875 	name = remain = i = 0;	/* shut up stupid gcc */
876 	alloc = off = 0;	/* shut up stupid gcc */
877 	dflen = 0;		/* shut up stupid gcc */
878 	cksumvalid = -1;
879 	while (state >= 0) {
880 		if (vpd_nextbyte(&vrs, &byte)) {
881 			state = -2;
882 			break;
883 		}
884 #if 0
885 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
886 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
887 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
888 #endif
889 		switch (state) {
890 		case 0:		/* item name */
891 			if (byte & 0x80) {
892 				if (vpd_nextbyte(&vrs, &byte2)) {
893 					state = -2;
894 					break;
895 				}
896 				remain = byte2;
897 				if (vpd_nextbyte(&vrs, &byte2)) {
898 					state = -2;
899 					break;
900 				}
901 				remain |= byte2 << 8;
902 				if (remain > (0x7f*4 - vrs.off)) {
903 					state = -1;
904 					pci_printf(cfg,
905 					    "invalid VPD data, remain %#x\n",
906 					    remain);
907 				}
908 				name = byte & 0x7f;
909 			} else {
910 				remain = byte & 0x7;
911 				name = (byte >> 3) & 0xf;
912 			}
913 			switch (name) {
914 			case 0x2:	/* String */
915 				cfg->vpd.vpd_ident = malloc(remain + 1,
916 				    M_DEVBUF, M_WAITOK);
917 				i = 0;
918 				state = 1;
919 				break;
920 			case 0xf:	/* End */
921 				state = -1;
922 				break;
923 			case 0x10:	/* VPD-R */
924 				alloc = 8;
925 				off = 0;
926 				cfg->vpd.vpd_ros = malloc(alloc *
927 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
928 				    M_WAITOK | M_ZERO);
929 				state = 2;
930 				break;
931 			case 0x11:	/* VPD-W */
932 				alloc = 8;
933 				off = 0;
934 				cfg->vpd.vpd_w = malloc(alloc *
935 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
936 				    M_WAITOK | M_ZERO);
937 				state = 5;
938 				break;
939 			default:	/* Invalid data, abort */
940 				state = -1;
941 				break;
942 			}
943 			break;
944 
945 		case 1:	/* Identifier String */
946 			cfg->vpd.vpd_ident[i++] = byte;
947 			remain--;
948 			if (remain == 0)  {
949 				cfg->vpd.vpd_ident[i] = '\0';
950 				state = 0;
951 			}
952 			break;
953 
954 		case 2:	/* VPD-R Keyword Header */
955 			if (off == alloc) {
956 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
957 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
958 				    M_DEVBUF, M_WAITOK | M_ZERO);
959 			}
960 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
961 			if (vpd_nextbyte(&vrs, &byte2)) {
962 				state = -2;
963 				break;
964 			}
965 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
966 			if (vpd_nextbyte(&vrs, &byte2)) {
967 				state = -2;
968 				break;
969 			}
970 			dflen = byte2;
971 			if (dflen == 0 &&
972 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
973 			    2) == 0) {
974 				/*
975 				 * if this happens, we can't trust the rest
976 				 * of the VPD.
977 				 */
978 				pci_printf(cfg, "bad keyword length: %d\n",
979 				    dflen);
980 				cksumvalid = 0;
981 				state = -1;
982 				break;
983 			} else if (dflen == 0) {
984 				cfg->vpd.vpd_ros[off].value = malloc(1 *
985 				    sizeof(*cfg->vpd.vpd_ros[off].value),
986 				    M_DEVBUF, M_WAITOK);
987 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
988 			} else
989 				cfg->vpd.vpd_ros[off].value = malloc(
990 				    (dflen + 1) *
991 				    sizeof(*cfg->vpd.vpd_ros[off].value),
992 				    M_DEVBUF, M_WAITOK);
993 			remain -= 3;
994 			i = 0;
995 			/* keep in sync w/ state 3's transistions */
996 			if (dflen == 0 && remain == 0)
997 				state = 0;
998 			else if (dflen == 0)
999 				state = 2;
1000 			else
1001 				state = 3;
1002 			break;
1003 
1004 		case 3:	/* VPD-R Keyword Value */
1005 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1006 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1007 			    "RV", 2) == 0 && cksumvalid == -1) {
1008 				if (vrs.cksum == 0)
1009 					cksumvalid = 1;
1010 				else {
1011 					if (bootverbose)
1012 						pci_printf(cfg,
1013 					    "bad VPD cksum, remain %hhu\n",
1014 						    vrs.cksum);
1015 					cksumvalid = 0;
1016 					state = -1;
1017 					break;
1018 				}
1019 			}
1020 			dflen--;
1021 			remain--;
1022 			/* keep in sync w/ state 2's transistions */
1023 			if (dflen == 0)
1024 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1025 			if (dflen == 0 && remain == 0) {
1026 				cfg->vpd.vpd_rocnt = off;
1027 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1028 				    off * sizeof(*cfg->vpd.vpd_ros),
1029 				    M_DEVBUF, M_WAITOK | M_ZERO);
1030 				state = 0;
1031 			} else if (dflen == 0)
1032 				state = 2;
1033 			break;
1034 
1035 		case 4:
1036 			remain--;
1037 			if (remain == 0)
1038 				state = 0;
1039 			break;
1040 
1041 		case 5:	/* VPD-W Keyword Header */
1042 			if (off == alloc) {
1043 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1044 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1045 				    M_DEVBUF, M_WAITOK | M_ZERO);
1046 			}
1047 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1048 			if (vpd_nextbyte(&vrs, &byte2)) {
1049 				state = -2;
1050 				break;
1051 			}
1052 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1053 			if (vpd_nextbyte(&vrs, &byte2)) {
1054 				state = -2;
1055 				break;
1056 			}
1057 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1058 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1059 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1060 			    sizeof(*cfg->vpd.vpd_w[off].value),
1061 			    M_DEVBUF, M_WAITOK);
1062 			remain -= 3;
1063 			i = 0;
1064 			/* keep in sync w/ state 6's transistions */
1065 			if (dflen == 0 && remain == 0)
1066 				state = 0;
1067 			else if (dflen == 0)
1068 				state = 5;
1069 			else
1070 				state = 6;
1071 			break;
1072 
1073 		case 6:	/* VPD-W Keyword Value */
1074 			cfg->vpd.vpd_w[off].value[i++] = byte;
1075 			dflen--;
1076 			remain--;
1077 			/* keep in sync w/ state 5's transistions */
1078 			if (dflen == 0)
1079 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1080 			if (dflen == 0 && remain == 0) {
1081 				cfg->vpd.vpd_wcnt = off;
1082 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1083 				    off * sizeof(*cfg->vpd.vpd_w),
1084 				    M_DEVBUF, M_WAITOK | M_ZERO);
1085 				state = 0;
1086 			} else if (dflen == 0)
1087 				state = 5;
1088 			break;
1089 
1090 		default:
1091 			pci_printf(cfg, "invalid state: %d\n", state);
1092 			state = -1;
1093 			break;
1094 		}
1095 	}
1096 
1097 	if (cksumvalid == 0 || state < -1) {
1098 		/* read-only data bad, clean up */
1099 		if (cfg->vpd.vpd_ros != NULL) {
1100 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1101 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1102 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1103 			cfg->vpd.vpd_ros = NULL;
1104 		}
1105 	}
1106 	if (state < -1) {
1107 		/* I/O error, clean up */
1108 		pci_printf(cfg, "failed to read VPD data.\n");
1109 		if (cfg->vpd.vpd_ident != NULL) {
1110 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1111 			cfg->vpd.vpd_ident = NULL;
1112 		}
1113 		if (cfg->vpd.vpd_w != NULL) {
1114 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1115 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1116 			free(cfg->vpd.vpd_w, M_DEVBUF);
1117 			cfg->vpd.vpd_w = NULL;
1118 		}
1119 	}
1120 	cfg->vpd.vpd_cached = 1;
1121 #undef REG
1122 #undef WREG
1123 }
1124 
1125 int
1126 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1127 {
1128 	struct pci_devinfo *dinfo = device_get_ivars(child);
1129 	pcicfgregs *cfg = &dinfo->cfg;
1130 
1131 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1132 		pci_read_vpd(device_get_parent(dev), cfg);
1133 
1134 	*identptr = cfg->vpd.vpd_ident;
1135 
1136 	if (*identptr == NULL)
1137 		return (ENXIO);
1138 
1139 	return (0);
1140 }
1141 
1142 int
1143 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1144 	const char **vptr)
1145 {
1146 	struct pci_devinfo *dinfo = device_get_ivars(child);
1147 	pcicfgregs *cfg = &dinfo->cfg;
1148 	int i;
1149 
1150 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1151 		pci_read_vpd(device_get_parent(dev), cfg);
1152 
1153 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1154 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1155 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1156 			*vptr = cfg->vpd.vpd_ros[i].value;
1157 			return (0);
1158 		}
1159 
1160 	*vptr = NULL;
1161 	return (ENXIO);
1162 }
1163 
1164 /*
1165  * Find the requested HyperTransport capability and return the offset
1166  * in configuration space via the pointer provided.  The function
1167  * returns 0 on success and an error code otherwise.
1168  */
1169 int
1170 pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1171 {
1172 	int ptr, error;
1173 	uint16_t val;
1174 
1175 	error = pci_find_cap(child, PCIY_HT, &ptr);
1176 	if (error)
1177 		return (error);
1178 
1179 	/*
1180 	 * Traverse the capabilities list checking each HT capability
1181 	 * to see if it matches the requested HT capability.
1182 	 */
1183 	while (ptr != 0) {
1184 		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1185 		if (capability == PCIM_HTCAP_SLAVE ||
1186 		    capability == PCIM_HTCAP_HOST)
1187 			val &= 0xe000;
1188 		else
1189 			val &= PCIM_HTCMD_CAP_MASK;
1190 		if (val == capability) {
1191 			if (capreg != NULL)
1192 				*capreg = ptr;
1193 			return (0);
1194 		}
1195 
1196 		/* Skip to the next HT capability. */
1197 		while (ptr != 0) {
1198 			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1199 			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1200 			    PCIY_HT)
1201 				break;
1202 		}
1203 	}
1204 	return (ENOENT);
1205 }
1206 
1207 /*
1208  * Find the requested capability and return the offset in
1209  * configuration space via the pointer provided.  The function returns
1210  * 0 on success and an error code otherwise.
1211  */
1212 int
1213 pci_find_cap_method(device_t dev, device_t child, int capability,
1214     int *capreg)
1215 {
1216 	struct pci_devinfo *dinfo = device_get_ivars(child);
1217 	pcicfgregs *cfg = &dinfo->cfg;
1218 	u_int32_t status;
1219 	u_int8_t ptr;
1220 
1221 	/*
1222 	 * Check the CAP_LIST bit of the PCI status register first.
1223 	 */
1224 	status = pci_read_config(child, PCIR_STATUS, 2);
1225 	if (!(status & PCIM_STATUS_CAPPRESENT))
1226 		return (ENXIO);
1227 
1228 	/*
1229 	 * Determine the start pointer of the capabilities list.
1230 	 */
1231 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1232 	case PCIM_HDRTYPE_NORMAL:
1233 	case PCIM_HDRTYPE_BRIDGE:
1234 		ptr = PCIR_CAP_PTR;
1235 		break;
1236 	case PCIM_HDRTYPE_CARDBUS:
1237 		ptr = PCIR_CAP_PTR_2;
1238 		break;
1239 	default:
1240 		/* XXX: panic? */
1241 		return (ENXIO);		/* no extended capabilities support */
1242 	}
1243 	ptr = pci_read_config(child, ptr, 1);
1244 
1245 	/*
1246 	 * Traverse the capabilities list.
1247 	 */
1248 	while (ptr != 0) {
1249 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1250 			if (capreg != NULL)
1251 				*capreg = ptr;
1252 			return (0);
1253 		}
1254 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1255 	}
1256 
1257 	return (ENOENT);
1258 }
1259 
1260 /*
1261  * Find the requested extended capability and return the offset in
1262  * configuration space via the pointer provided.  The function returns
1263  * 0 on success and an error code otherwise.
1264  */
1265 int
1266 pci_find_extcap_method(device_t dev, device_t child, int capability,
1267     int *capreg)
1268 {
1269 	struct pci_devinfo *dinfo = device_get_ivars(child);
1270 	pcicfgregs *cfg = &dinfo->cfg;
1271 	uint32_t ecap;
1272 	uint16_t ptr;
1273 
1274 	/* Only supported for PCI-express devices. */
1275 	if (cfg->pcie.pcie_location == 0)
1276 		return (ENXIO);
1277 
1278 	ptr = PCIR_EXTCAP;
1279 	ecap = pci_read_config(child, ptr, 4);
1280 	if (ecap == 0xffffffff || ecap == 0)
1281 		return (ENOENT);
1282 	for (;;) {
1283 		if (PCI_EXTCAP_ID(ecap) == capability) {
1284 			if (capreg != NULL)
1285 				*capreg = ptr;
1286 			return (0);
1287 		}
1288 		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1289 		if (ptr == 0)
1290 			break;
1291 		ecap = pci_read_config(child, ptr, 4);
1292 	}
1293 
1294 	return (ENOENT);
1295 }
1296 
1297 /*
1298  * Support for MSI-X message interrupts.
1299  */
1300 void
1301 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1302 {
1303 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1304 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1305 	uint32_t offset;
1306 
1307 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1308 	offset = msix->msix_table_offset + index * 16;
1309 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1310 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1311 	bus_write_4(msix->msix_table_res, offset + 8, data);
1312 
1313 	/* Enable MSI -> HT mapping. */
1314 	pci_ht_map_msi(dev, address);
1315 }
1316 
1317 void
1318 pci_mask_msix(device_t dev, u_int index)
1319 {
1320 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1321 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1322 	uint32_t offset, val;
1323 
1324 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1325 	offset = msix->msix_table_offset + index * 16 + 12;
1326 	val = bus_read_4(msix->msix_table_res, offset);
1327 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1328 		val |= PCIM_MSIX_VCTRL_MASK;
1329 		bus_write_4(msix->msix_table_res, offset, val);
1330 	}
1331 }
1332 
1333 void
1334 pci_unmask_msix(device_t dev, u_int index)
1335 {
1336 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1337 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1338 	uint32_t offset, val;
1339 
1340 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1341 	offset = msix->msix_table_offset + index * 16 + 12;
1342 	val = bus_read_4(msix->msix_table_res, offset);
1343 	if (val & PCIM_MSIX_VCTRL_MASK) {
1344 		val &= ~PCIM_MSIX_VCTRL_MASK;
1345 		bus_write_4(msix->msix_table_res, offset, val);
1346 	}
1347 }
1348 
1349 int
1350 pci_pending_msix(device_t dev, u_int index)
1351 {
1352 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1353 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1354 	uint32_t offset, bit;
1355 
1356 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1357 	offset = msix->msix_pba_offset + (index / 32) * 4;
1358 	bit = 1 << index % 32;
1359 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1360 }
1361 
1362 /*
1363  * Restore MSI-X registers and table during resume.  If MSI-X is
1364  * enabled then walk the virtual table to restore the actual MSI-X
1365  * table.
1366  */
1367 static void
1368 pci_resume_msix(device_t dev)
1369 {
1370 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1371 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1372 	struct msix_table_entry *mte;
1373 	struct msix_vector *mv;
1374 	int i;
1375 
1376 	if (msix->msix_alloc > 0) {
1377 		/* First, mask all vectors. */
1378 		for (i = 0; i < msix->msix_msgnum; i++)
1379 			pci_mask_msix(dev, i);
1380 
1381 		/* Second, program any messages with at least one handler. */
1382 		for (i = 0; i < msix->msix_table_len; i++) {
1383 			mte = &msix->msix_table[i];
1384 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1385 				continue;
1386 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1387 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1388 			pci_unmask_msix(dev, i);
1389 		}
1390 	}
1391 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1392 	    msix->msix_ctrl, 2);
1393 }
1394 
1395 /*
1396  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1397  * returned in *count.  After this function returns, each message will be
1398  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1399  */
1400 int
1401 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1402 {
1403 	struct pci_devinfo *dinfo = device_get_ivars(child);
1404 	pcicfgregs *cfg = &dinfo->cfg;
1405 	struct resource_list_entry *rle;
1406 	int actual, error, i, irq, max;
1407 
1408 	/* Don't let count == 0 get us into trouble. */
1409 	if (*count == 0)
1410 		return (EINVAL);
1411 
1412 	/* If rid 0 is allocated, then fail. */
1413 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1414 	if (rle != NULL && rle->res != NULL)
1415 		return (ENXIO);
1416 
1417 	/* Already have allocated messages? */
1418 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1419 		return (ENXIO);
1420 
1421 	/* If MSI is blacklisted for this system, fail. */
1422 	if (pci_msi_blacklisted())
1423 		return (ENXIO);
1424 
1425 	/* MSI-X capability present? */
1426 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1427 		return (ENODEV);
1428 
1429 	/* Make sure the appropriate BARs are mapped. */
1430 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1431 	    cfg->msix.msix_table_bar);
1432 	if (rle == NULL || rle->res == NULL ||
1433 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1434 		return (ENXIO);
1435 	cfg->msix.msix_table_res = rle->res;
1436 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1437 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1438 		    cfg->msix.msix_pba_bar);
1439 		if (rle == NULL || rle->res == NULL ||
1440 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1441 			return (ENXIO);
1442 	}
1443 	cfg->msix.msix_pba_res = rle->res;
1444 
1445 	if (bootverbose)
1446 		device_printf(child,
1447 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1448 		    *count, cfg->msix.msix_msgnum);
1449 	max = min(*count, cfg->msix.msix_msgnum);
1450 	for (i = 0; i < max; i++) {
1451 		/* Allocate a message. */
1452 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1453 		if (error) {
1454 			if (i == 0)
1455 				return (error);
1456 			break;
1457 		}
1458 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1459 		    irq, 1);
1460 	}
1461 	actual = i;
1462 
1463 	if (bootverbose) {
1464 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1465 		if (actual == 1)
1466 			device_printf(child, "using IRQ %lu for MSI-X\n",
1467 			    rle->start);
1468 		else {
1469 			int run;
1470 
1471 			/*
1472 			 * Be fancy and try to print contiguous runs of
1473 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1474 			 * 'run' is true if we are in a range.
1475 			 */
1476 			device_printf(child, "using IRQs %lu", rle->start);
1477 			irq = rle->start;
1478 			run = 0;
1479 			for (i = 1; i < actual; i++) {
1480 				rle = resource_list_find(&dinfo->resources,
1481 				    SYS_RES_IRQ, i + 1);
1482 
1483 				/* Still in a run? */
1484 				if (rle->start == irq + 1) {
1485 					run = 1;
1486 					irq++;
1487 					continue;
1488 				}
1489 
1490 				/* Finish previous range. */
1491 				if (run) {
1492 					printf("-%d", irq);
1493 					run = 0;
1494 				}
1495 
1496 				/* Start new range. */
1497 				printf(",%lu", rle->start);
1498 				irq = rle->start;
1499 			}
1500 
1501 			/* Unfinished range? */
1502 			if (run)
1503 				printf("-%d", irq);
1504 			printf(" for MSI-X\n");
1505 		}
1506 	}
1507 
1508 	/* Mask all vectors. */
1509 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1510 		pci_mask_msix(child, i);
1511 
1512 	/* Allocate and initialize vector data and virtual table. */
1513 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1514 	    M_DEVBUF, M_WAITOK | M_ZERO);
1515 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1516 	    M_DEVBUF, M_WAITOK | M_ZERO);
1517 	for (i = 0; i < actual; i++) {
1518 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1519 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1520 		cfg->msix.msix_table[i].mte_vector = i + 1;
1521 	}
1522 
1523 	/* Update control register to enable MSI-X. */
1524 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1525 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1526 	    cfg->msix.msix_ctrl, 2);
1527 
1528 	/* Update counts of alloc'd messages. */
1529 	cfg->msix.msix_alloc = actual;
1530 	cfg->msix.msix_table_len = actual;
1531 	*count = actual;
1532 	return (0);
1533 }
1534 
1535 /*
1536  * By default, pci_alloc_msix() will assign the allocated IRQ
1537  * resources consecutively to the first N messages in the MSI-X table.
1538  * However, device drivers may want to use different layouts if they
1539  * either receive fewer messages than they asked for, or they wish to
1540  * populate the MSI-X table sparsely.  This method allows the driver
1541  * to specify what layout it wants.  It must be called after a
1542  * successful pci_alloc_msix() but before any of the associated
1543  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1544  *
1545  * The 'vectors' array contains 'count' message vectors.  The array
1546  * maps directly to the MSI-X table in that index 0 in the array
1547  * specifies the vector for the first message in the MSI-X table, etc.
1548  * The vector value in each array index can either be 0 to indicate
1549  * that no vector should be assigned to a message slot, or it can be a
1550  * number from 1 to N (where N is the count returned from a
1551  * succcessful call to pci_alloc_msix()) to indicate which message
1552  * vector (IRQ) to be used for the corresponding message.
1553  *
1554  * On successful return, each message with a non-zero vector will have
1555  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1556  * 1.  Additionally, if any of the IRQs allocated via the previous
1557  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1558  * will be freed back to the system automatically.
1559  *
1560  * For example, suppose a driver has a MSI-X table with 6 messages and
1561  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1562  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1563  * C.  After the call to pci_alloc_msix(), the device will be setup to
1564  * have an MSI-X table of ABC--- (where - means no vector assigned).
1565  * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1566  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1567  * be freed back to the system.  This device will also have valid
1568  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1569  *
1570  * In any case, the SYS_RES_IRQ rid X will always map to the message
1571  * at MSI-X table index X - 1 and will only be valid if a vector is
1572  * assigned to that table entry.
1573  */
1574 int
1575 pci_remap_msix_method(device_t dev, device_t child, int count,
1576     const u_int *vectors)
1577 {
1578 	struct pci_devinfo *dinfo = device_get_ivars(child);
1579 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1580 	struct resource_list_entry *rle;
1581 	int i, irq, j, *used;
1582 
1583 	/*
1584 	 * Have to have at least one message in the table but the
1585 	 * table can't be bigger than the actual MSI-X table in the
1586 	 * device.
1587 	 */
1588 	if (count == 0 || count > msix->msix_msgnum)
1589 		return (EINVAL);
1590 
1591 	/* Sanity check the vectors. */
1592 	for (i = 0; i < count; i++)
1593 		if (vectors[i] > msix->msix_alloc)
1594 			return (EINVAL);
1595 
1596 	/*
1597 	 * Make sure there aren't any holes in the vectors to be used.
1598 	 * It's a big pain to support it, and it doesn't really make
1599 	 * sense anyway.  Also, at least one vector must be used.
1600 	 */
1601 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1602 	    M_ZERO);
1603 	for (i = 0; i < count; i++)
1604 		if (vectors[i] != 0)
1605 			used[vectors[i] - 1] = 1;
1606 	for (i = 0; i < msix->msix_alloc - 1; i++)
1607 		if (used[i] == 0 && used[i + 1] == 1) {
1608 			free(used, M_DEVBUF);
1609 			return (EINVAL);
1610 		}
1611 	if (used[0] != 1) {
1612 		free(used, M_DEVBUF);
1613 		return (EINVAL);
1614 	}
1615 
1616 	/* Make sure none of the resources are allocated. */
1617 	for (i = 0; i < msix->msix_table_len; i++) {
1618 		if (msix->msix_table[i].mte_vector == 0)
1619 			continue;
1620 		if (msix->msix_table[i].mte_handlers > 0)
1621 			return (EBUSY);
1622 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1623 		KASSERT(rle != NULL, ("missing resource"));
1624 		if (rle->res != NULL)
1625 			return (EBUSY);
1626 	}
1627 
1628 	/* Free the existing resource list entries. */
1629 	for (i = 0; i < msix->msix_table_len; i++) {
1630 		if (msix->msix_table[i].mte_vector == 0)
1631 			continue;
1632 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1633 	}
1634 
1635 	/*
1636 	 * Build the new virtual table keeping track of which vectors are
1637 	 * used.
1638 	 */
1639 	free(msix->msix_table, M_DEVBUF);
1640 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1641 	    M_DEVBUF, M_WAITOK | M_ZERO);
1642 	for (i = 0; i < count; i++)
1643 		msix->msix_table[i].mte_vector = vectors[i];
1644 	msix->msix_table_len = count;
1645 
1646 	/* Free any unused IRQs and resize the vectors array if necessary. */
1647 	j = msix->msix_alloc - 1;
1648 	if (used[j] == 0) {
1649 		struct msix_vector *vec;
1650 
1651 		while (used[j] == 0) {
1652 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1653 			    msix->msix_vectors[j].mv_irq);
1654 			j--;
1655 		}
1656 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1657 		    M_WAITOK);
1658 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1659 		    (j + 1));
1660 		free(msix->msix_vectors, M_DEVBUF);
1661 		msix->msix_vectors = vec;
1662 		msix->msix_alloc = j + 1;
1663 	}
1664 	free(used, M_DEVBUF);
1665 
1666 	/* Map the IRQs onto the rids. */
1667 	for (i = 0; i < count; i++) {
1668 		if (vectors[i] == 0)
1669 			continue;
1670 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1671 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1672 		    irq, 1);
1673 	}
1674 
1675 	if (bootverbose) {
1676 		device_printf(child, "Remapped MSI-X IRQs as: ");
1677 		for (i = 0; i < count; i++) {
1678 			if (i != 0)
1679 				printf(", ");
1680 			if (vectors[i] == 0)
1681 				printf("---");
1682 			else
1683 				printf("%d",
1684 				    msix->msix_vectors[vectors[i]].mv_irq);
1685 		}
1686 		printf("\n");
1687 	}
1688 
1689 	return (0);
1690 }
1691 
1692 static int
1693 pci_release_msix(device_t dev, device_t child)
1694 {
1695 	struct pci_devinfo *dinfo = device_get_ivars(child);
1696 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1697 	struct resource_list_entry *rle;
1698 	int i;
1699 
1700 	/* Do we have any messages to release? */
1701 	if (msix->msix_alloc == 0)
1702 		return (ENODEV);
1703 
1704 	/* Make sure none of the resources are allocated. */
1705 	for (i = 0; i < msix->msix_table_len; i++) {
1706 		if (msix->msix_table[i].mte_vector == 0)
1707 			continue;
1708 		if (msix->msix_table[i].mte_handlers > 0)
1709 			return (EBUSY);
1710 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1711 		KASSERT(rle != NULL, ("missing resource"));
1712 		if (rle->res != NULL)
1713 			return (EBUSY);
1714 	}
1715 
1716 	/* Update control register to disable MSI-X. */
1717 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1718 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1719 	    msix->msix_ctrl, 2);
1720 
1721 	/* Free the resource list entries. */
1722 	for (i = 0; i < msix->msix_table_len; i++) {
1723 		if (msix->msix_table[i].mte_vector == 0)
1724 			continue;
1725 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1726 	}
1727 	free(msix->msix_table, M_DEVBUF);
1728 	msix->msix_table_len = 0;
1729 
1730 	/* Release the IRQs. */
1731 	for (i = 0; i < msix->msix_alloc; i++)
1732 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1733 		    msix->msix_vectors[i].mv_irq);
1734 	free(msix->msix_vectors, M_DEVBUF);
1735 	msix->msix_alloc = 0;
1736 	return (0);
1737 }
1738 
1739 /*
1740  * Return the max supported MSI-X messages this device supports.
1741  * Basically, assuming the MD code can alloc messages, this function
1742  * should return the maximum value that pci_alloc_msix() can return.
1743  * Thus, it is subject to the tunables, etc.
1744  */
1745 int
1746 pci_msix_count_method(device_t dev, device_t child)
1747 {
1748 	struct pci_devinfo *dinfo = device_get_ivars(child);
1749 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1750 
1751 	if (pci_do_msix && msix->msix_location != 0)
1752 		return (msix->msix_msgnum);
1753 	return (0);
1754 }
1755 
1756 /*
1757  * HyperTransport MSI mapping control
1758  */
1759 void
1760 pci_ht_map_msi(device_t dev, uint64_t addr)
1761 {
1762 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1763 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1764 
1765 	if (!ht->ht_msimap)
1766 		return;
1767 
1768 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1769 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1770 		/* Enable MSI -> HT mapping. */
1771 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1772 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1773 		    ht->ht_msictrl, 2);
1774 	}
1775 
1776 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1777 		/* Disable MSI -> HT mapping. */
1778 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1779 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1780 		    ht->ht_msictrl, 2);
1781 	}
1782 }
1783 
1784 int
1785 pci_get_max_read_req(device_t dev)
1786 {
1787 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1788 	int cap;
1789 	uint16_t val;
1790 
1791 	cap = dinfo->cfg.pcie.pcie_location;
1792 	if (cap == 0)
1793 		return (0);
1794 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1795 	val &= PCIM_EXP_CTL_MAX_READ_REQUEST;
1796 	val >>= 12;
1797 	return (1 << (val + 7));
1798 }
1799 
1800 int
1801 pci_set_max_read_req(device_t dev, int size)
1802 {
1803 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1804 	int cap;
1805 	uint16_t val;
1806 
1807 	cap = dinfo->cfg.pcie.pcie_location;
1808 	if (cap == 0)
1809 		return (0);
1810 	if (size < 128)
1811 		size = 128;
1812 	if (size > 4096)
1813 		size = 4096;
1814 	size = (1 << (fls(size) - 1));
1815 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1816 	val &= ~PCIM_EXP_CTL_MAX_READ_REQUEST;
1817 	val |= (fls(size) - 8) << 12;
1818 	pci_write_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, val, 2);
1819 	return (size);
1820 }
1821 
1822 /*
1823  * Support for MSI message signalled interrupts.
1824  */
1825 void
1826 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1827 {
1828 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1829 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1830 
1831 	/* Write data and address values. */
1832 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1833 	    address & 0xffffffff, 4);
1834 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1835 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1836 		    address >> 32, 4);
1837 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1838 		    data, 2);
1839 	} else
1840 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1841 		    2);
1842 
1843 	/* Enable MSI in the control register. */
1844 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1845 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1846 	    2);
1847 
1848 	/* Enable MSI -> HT mapping. */
1849 	pci_ht_map_msi(dev, address);
1850 }
1851 
1852 void
1853 pci_disable_msi(device_t dev)
1854 {
1855 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1856 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1857 
1858 	/* Disable MSI -> HT mapping. */
1859 	pci_ht_map_msi(dev, 0);
1860 
1861 	/* Disable MSI in the control register. */
1862 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1863 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1864 	    2);
1865 }
1866 
1867 /*
1868  * Restore MSI registers during resume.  If MSI is enabled then
1869  * restore the data and address registers in addition to the control
1870  * register.
1871  */
1872 static void
1873 pci_resume_msi(device_t dev)
1874 {
1875 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1876 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1877 	uint64_t address;
1878 	uint16_t data;
1879 
1880 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1881 		address = msi->msi_addr;
1882 		data = msi->msi_data;
1883 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1884 		    address & 0xffffffff, 4);
1885 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1886 			pci_write_config(dev, msi->msi_location +
1887 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1888 			pci_write_config(dev, msi->msi_location +
1889 			    PCIR_MSI_DATA_64BIT, data, 2);
1890 		} else
1891 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1892 			    data, 2);
1893 	}
1894 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1895 	    2);
1896 }
1897 
1898 static int
1899 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1900 {
1901 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1902 	pcicfgregs *cfg = &dinfo->cfg;
1903 	struct resource_list_entry *rle;
1904 	struct msix_table_entry *mte;
1905 	struct msix_vector *mv;
1906 	uint64_t addr;
1907 	uint32_t data;
1908 	int error, i, j;
1909 
1910 	/*
1911 	 * Handle MSI first.  We try to find this IRQ among our list
1912 	 * of MSI IRQs.  If we find it, we request updated address and
1913 	 * data registers and apply the results.
1914 	 */
1915 	if (cfg->msi.msi_alloc > 0) {
1916 
1917 		/* If we don't have any active handlers, nothing to do. */
1918 		if (cfg->msi.msi_handlers == 0)
1919 			return (0);
1920 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1921 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1922 			    i + 1);
1923 			if (rle->start == irq) {
1924 				error = PCIB_MAP_MSI(device_get_parent(bus),
1925 				    dev, irq, &addr, &data);
1926 				if (error)
1927 					return (error);
1928 				pci_disable_msi(dev);
1929 				dinfo->cfg.msi.msi_addr = addr;
1930 				dinfo->cfg.msi.msi_data = data;
1931 				pci_enable_msi(dev, addr, data);
1932 				return (0);
1933 			}
1934 		}
1935 		return (ENOENT);
1936 	}
1937 
1938 	/*
1939 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1940 	 * we request the updated mapping info.  If that works, we go
1941 	 * through all the slots that use this IRQ and update them.
1942 	 */
1943 	if (cfg->msix.msix_alloc > 0) {
1944 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1945 			mv = &cfg->msix.msix_vectors[i];
1946 			if (mv->mv_irq == irq) {
1947 				error = PCIB_MAP_MSI(device_get_parent(bus),
1948 				    dev, irq, &addr, &data);
1949 				if (error)
1950 					return (error);
1951 				mv->mv_address = addr;
1952 				mv->mv_data = data;
1953 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1954 					mte = &cfg->msix.msix_table[j];
1955 					if (mte->mte_vector != i + 1)
1956 						continue;
1957 					if (mte->mte_handlers == 0)
1958 						continue;
1959 					pci_mask_msix(dev, j);
1960 					pci_enable_msix(dev, j, addr, data);
1961 					pci_unmask_msix(dev, j);
1962 				}
1963 			}
1964 		}
1965 		return (ENOENT);
1966 	}
1967 
1968 	return (ENOENT);
1969 }
1970 
1971 /*
1972  * Returns true if the specified device is blacklisted because MSI
1973  * doesn't work.
1974  */
1975 int
1976 pci_msi_device_blacklisted(device_t dev)
1977 {
1978 	const struct pci_quirk *q;
1979 
1980 	if (!pci_honor_msi_blacklist)
1981 		return (0);
1982 
1983 	for (q = &pci_quirks[0]; q->devid; q++) {
1984 		if (q->devid == pci_get_devid(dev) &&
1985 		    q->type == PCI_QUIRK_DISABLE_MSI)
1986 			return (1);
1987 	}
1988 	return (0);
1989 }
1990 
1991 /*
1992  * Returns true if a specified chipset supports MSI when it is
1993  * emulated hardware in a virtual machine.
1994  */
1995 static int
1996 pci_msi_vm_chipset(device_t dev)
1997 {
1998 	const struct pci_quirk *q;
1999 
2000 	for (q = &pci_quirks[0]; q->devid; q++) {
2001 		if (q->devid == pci_get_devid(dev) &&
2002 		    q->type == PCI_QUIRK_ENABLE_MSI_VM)
2003 			return (1);
2004 	}
2005 	return (0);
2006 }
2007 
2008 /*
2009  * Determine if MSI is blacklisted globally on this sytem.  Currently,
2010  * we just check for blacklisted chipsets as represented by the
2011  * host-PCI bridge at device 0:0:0.  In the future, it may become
2012  * necessary to check other system attributes, such as the kenv values
2013  * that give the motherboard manufacturer and model number.
2014  */
2015 static int
2016 pci_msi_blacklisted(void)
2017 {
2018 	device_t dev;
2019 
2020 	if (!pci_honor_msi_blacklist)
2021 		return (0);
2022 
2023 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2024 	if (!(pcie_chipset || pcix_chipset)) {
2025 		if (vm_guest != VM_GUEST_NO) {
2026 			dev = pci_find_bsf(0, 0, 0);
2027 			if (dev != NULL)
2028 				return (pci_msi_vm_chipset(dev) == 0);
2029 		}
2030 		return (1);
2031 	}
2032 
2033 	dev = pci_find_bsf(0, 0, 0);
2034 	if (dev != NULL)
2035 		return (pci_msi_device_blacklisted(dev));
2036 	return (0);
2037 }
2038 
2039 /*
2040  * Attempt to allocate *count MSI messages.  The actual number allocated is
2041  * returned in *count.  After this function returns, each message will be
2042  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2043  */
2044 int
2045 pci_alloc_msi_method(device_t dev, device_t child, int *count)
2046 {
2047 	struct pci_devinfo *dinfo = device_get_ivars(child);
2048 	pcicfgregs *cfg = &dinfo->cfg;
2049 	struct resource_list_entry *rle;
2050 	int actual, error, i, irqs[32];
2051 	uint16_t ctrl;
2052 
2053 	/* Don't let count == 0 get us into trouble. */
2054 	if (*count == 0)
2055 		return (EINVAL);
2056 
2057 	/* If rid 0 is allocated, then fail. */
2058 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2059 	if (rle != NULL && rle->res != NULL)
2060 		return (ENXIO);
2061 
2062 	/* Already have allocated messages? */
2063 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2064 		return (ENXIO);
2065 
2066 	/* If MSI is blacklisted for this system, fail. */
2067 	if (pci_msi_blacklisted())
2068 		return (ENXIO);
2069 
2070 	/* MSI capability present? */
2071 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2072 		return (ENODEV);
2073 
2074 	if (bootverbose)
2075 		device_printf(child,
2076 		    "attempting to allocate %d MSI vectors (%d supported)\n",
2077 		    *count, cfg->msi.msi_msgnum);
2078 
2079 	/* Don't ask for more than the device supports. */
2080 	actual = min(*count, cfg->msi.msi_msgnum);
2081 
2082 	/* Don't ask for more than 32 messages. */
2083 	actual = min(actual, 32);
2084 
2085 	/* MSI requires power of 2 number of messages. */
2086 	if (!powerof2(actual))
2087 		return (EINVAL);
2088 
2089 	for (;;) {
2090 		/* Try to allocate N messages. */
2091 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2092 		    actual, irqs);
2093 		if (error == 0)
2094 			break;
2095 		if (actual == 1)
2096 			return (error);
2097 
2098 		/* Try N / 2. */
2099 		actual >>= 1;
2100 	}
2101 
2102 	/*
2103 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2104 	 * resources in the irqs[] array, so add new resources
2105 	 * starting at rid 1.
2106 	 */
2107 	for (i = 0; i < actual; i++)
2108 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2109 		    irqs[i], irqs[i], 1);
2110 
2111 	if (bootverbose) {
2112 		if (actual == 1)
2113 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2114 		else {
2115 			int run;
2116 
2117 			/*
2118 			 * Be fancy and try to print contiguous runs
2119 			 * of IRQ values as ranges.  'run' is true if
2120 			 * we are in a range.
2121 			 */
2122 			device_printf(child, "using IRQs %d", irqs[0]);
2123 			run = 0;
2124 			for (i = 1; i < actual; i++) {
2125 
2126 				/* Still in a run? */
2127 				if (irqs[i] == irqs[i - 1] + 1) {
2128 					run = 1;
2129 					continue;
2130 				}
2131 
2132 				/* Finish previous range. */
2133 				if (run) {
2134 					printf("-%d", irqs[i - 1]);
2135 					run = 0;
2136 				}
2137 
2138 				/* Start new range. */
2139 				printf(",%d", irqs[i]);
2140 			}
2141 
2142 			/* Unfinished range? */
2143 			if (run)
2144 				printf("-%d", irqs[actual - 1]);
2145 			printf(" for MSI\n");
2146 		}
2147 	}
2148 
2149 	/* Update control register with actual count. */
2150 	ctrl = cfg->msi.msi_ctrl;
2151 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2152 	ctrl |= (ffs(actual) - 1) << 4;
2153 	cfg->msi.msi_ctrl = ctrl;
2154 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2155 
2156 	/* Update counts of alloc'd messages. */
2157 	cfg->msi.msi_alloc = actual;
2158 	cfg->msi.msi_handlers = 0;
2159 	*count = actual;
2160 	return (0);
2161 }
2162 
2163 /* Release the MSI messages associated with this device. */
2164 int
2165 pci_release_msi_method(device_t dev, device_t child)
2166 {
2167 	struct pci_devinfo *dinfo = device_get_ivars(child);
2168 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2169 	struct resource_list_entry *rle;
2170 	int error, i, irqs[32];
2171 
2172 	/* Try MSI-X first. */
2173 	error = pci_release_msix(dev, child);
2174 	if (error != ENODEV)
2175 		return (error);
2176 
2177 	/* Do we have any messages to release? */
2178 	if (msi->msi_alloc == 0)
2179 		return (ENODEV);
2180 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2181 
2182 	/* Make sure none of the resources are allocated. */
2183 	if (msi->msi_handlers > 0)
2184 		return (EBUSY);
2185 	for (i = 0; i < msi->msi_alloc; i++) {
2186 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2187 		KASSERT(rle != NULL, ("missing MSI resource"));
2188 		if (rle->res != NULL)
2189 			return (EBUSY);
2190 		irqs[i] = rle->start;
2191 	}
2192 
2193 	/* Update control register with 0 count. */
2194 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2195 	    ("%s: MSI still enabled", __func__));
2196 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2197 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2198 	    msi->msi_ctrl, 2);
2199 
2200 	/* Release the messages. */
2201 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2202 	for (i = 0; i < msi->msi_alloc; i++)
2203 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2204 
2205 	/* Update alloc count. */
2206 	msi->msi_alloc = 0;
2207 	msi->msi_addr = 0;
2208 	msi->msi_data = 0;
2209 	return (0);
2210 }
2211 
2212 /*
2213  * Return the max supported MSI messages this device supports.
2214  * Basically, assuming the MD code can alloc messages, this function
2215  * should return the maximum value that pci_alloc_msi() can return.
2216  * Thus, it is subject to the tunables, etc.
2217  */
2218 int
2219 pci_msi_count_method(device_t dev, device_t child)
2220 {
2221 	struct pci_devinfo *dinfo = device_get_ivars(child);
2222 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2223 
2224 	if (pci_do_msi && msi->msi_location != 0)
2225 		return (msi->msi_msgnum);
2226 	return (0);
2227 }
2228 
2229 /* free pcicfgregs structure and all depending data structures */
2230 
2231 int
2232 pci_freecfg(struct pci_devinfo *dinfo)
2233 {
2234 	struct devlist *devlist_head;
2235 	struct pci_map *pm, *next;
2236 	int i;
2237 
2238 	devlist_head = &pci_devq;
2239 
2240 	if (dinfo->cfg.vpd.vpd_reg) {
2241 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2242 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2243 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2244 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2245 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2246 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2247 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2248 	}
2249 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2250 		free(pm, M_DEVBUF);
2251 	}
2252 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2253 	free(dinfo, M_DEVBUF);
2254 
2255 	/* increment the generation count */
2256 	pci_generation++;
2257 
2258 	/* we're losing one device */
2259 	pci_numdevs--;
2260 	return (0);
2261 }
2262 
2263 /*
2264  * PCI power manangement
2265  */
2266 int
2267 pci_set_powerstate_method(device_t dev, device_t child, int state)
2268 {
2269 	struct pci_devinfo *dinfo = device_get_ivars(child);
2270 	pcicfgregs *cfg = &dinfo->cfg;
2271 	uint16_t status;
2272 	int result, oldstate, highest, delay;
2273 
2274 	if (cfg->pp.pp_cap == 0)
2275 		return (EOPNOTSUPP);
2276 
2277 	/*
2278 	 * Optimize a no state change request away.  While it would be OK to
2279 	 * write to the hardware in theory, some devices have shown odd
2280 	 * behavior when going from D3 -> D3.
2281 	 */
2282 	oldstate = pci_get_powerstate(child);
2283 	if (oldstate == state)
2284 		return (0);
2285 
2286 	/*
2287 	 * The PCI power management specification states that after a state
2288 	 * transition between PCI power states, system software must
2289 	 * guarantee a minimal delay before the function accesses the device.
2290 	 * Compute the worst case delay that we need to guarantee before we
2291 	 * access the device.  Many devices will be responsive much more
2292 	 * quickly than this delay, but there are some that don't respond
2293 	 * instantly to state changes.  Transitions to/from D3 state require
2294 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2295 	 * is done below with DELAY rather than a sleeper function because
2296 	 * this function can be called from contexts where we cannot sleep.
2297 	 */
2298 	highest = (oldstate > state) ? oldstate : state;
2299 	if (highest == PCI_POWERSTATE_D3)
2300 	    delay = 10000;
2301 	else if (highest == PCI_POWERSTATE_D2)
2302 	    delay = 200;
2303 	else
2304 	    delay = 0;
2305 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2306 	    & ~PCIM_PSTAT_DMASK;
2307 	result = 0;
2308 	switch (state) {
2309 	case PCI_POWERSTATE_D0:
2310 		status |= PCIM_PSTAT_D0;
2311 		break;
2312 	case PCI_POWERSTATE_D1:
2313 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2314 			return (EOPNOTSUPP);
2315 		status |= PCIM_PSTAT_D1;
2316 		break;
2317 	case PCI_POWERSTATE_D2:
2318 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2319 			return (EOPNOTSUPP);
2320 		status |= PCIM_PSTAT_D2;
2321 		break;
2322 	case PCI_POWERSTATE_D3:
2323 		status |= PCIM_PSTAT_D3;
2324 		break;
2325 	default:
2326 		return (EINVAL);
2327 	}
2328 
2329 	if (bootverbose)
2330 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2331 		    state);
2332 
2333 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2334 	if (delay)
2335 		DELAY(delay);
2336 	return (0);
2337 }
2338 
2339 int
2340 pci_get_powerstate_method(device_t dev, device_t child)
2341 {
2342 	struct pci_devinfo *dinfo = device_get_ivars(child);
2343 	pcicfgregs *cfg = &dinfo->cfg;
2344 	uint16_t status;
2345 	int result;
2346 
2347 	if (cfg->pp.pp_cap != 0) {
2348 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2349 		switch (status & PCIM_PSTAT_DMASK) {
2350 		case PCIM_PSTAT_D0:
2351 			result = PCI_POWERSTATE_D0;
2352 			break;
2353 		case PCIM_PSTAT_D1:
2354 			result = PCI_POWERSTATE_D1;
2355 			break;
2356 		case PCIM_PSTAT_D2:
2357 			result = PCI_POWERSTATE_D2;
2358 			break;
2359 		case PCIM_PSTAT_D3:
2360 			result = PCI_POWERSTATE_D3;
2361 			break;
2362 		default:
2363 			result = PCI_POWERSTATE_UNKNOWN;
2364 			break;
2365 		}
2366 	} else {
2367 		/* No support, device is always at D0 */
2368 		result = PCI_POWERSTATE_D0;
2369 	}
2370 	return (result);
2371 }
2372 
2373 /*
2374  * Some convenience functions for PCI device drivers.
2375  */
2376 
2377 static __inline void
2378 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2379 {
2380 	uint16_t	command;
2381 
2382 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2383 	command |= bit;
2384 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2385 }
2386 
2387 static __inline void
2388 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2389 {
2390 	uint16_t	command;
2391 
2392 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2393 	command &= ~bit;
2394 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2395 }
2396 
2397 int
2398 pci_enable_busmaster_method(device_t dev, device_t child)
2399 {
2400 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2401 	return (0);
2402 }
2403 
2404 int
2405 pci_disable_busmaster_method(device_t dev, device_t child)
2406 {
2407 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2408 	return (0);
2409 }
2410 
2411 int
2412 pci_enable_io_method(device_t dev, device_t child, int space)
2413 {
2414 	uint16_t bit;
2415 
2416 	switch(space) {
2417 	case SYS_RES_IOPORT:
2418 		bit = PCIM_CMD_PORTEN;
2419 		break;
2420 	case SYS_RES_MEMORY:
2421 		bit = PCIM_CMD_MEMEN;
2422 		break;
2423 	default:
2424 		return (EINVAL);
2425 	}
2426 	pci_set_command_bit(dev, child, bit);
2427 	return (0);
2428 }
2429 
2430 int
2431 pci_disable_io_method(device_t dev, device_t child, int space)
2432 {
2433 	uint16_t bit;
2434 
2435 	switch(space) {
2436 	case SYS_RES_IOPORT:
2437 		bit = PCIM_CMD_PORTEN;
2438 		break;
2439 	case SYS_RES_MEMORY:
2440 		bit = PCIM_CMD_MEMEN;
2441 		break;
2442 	default:
2443 		return (EINVAL);
2444 	}
2445 	pci_clear_command_bit(dev, child, bit);
2446 	return (0);
2447 }
2448 
2449 /*
2450  * New style pci driver.  Parent device is either a pci-host-bridge or a
2451  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2452  */
2453 
2454 void
2455 pci_print_verbose(struct pci_devinfo *dinfo)
2456 {
2457 
2458 	if (bootverbose) {
2459 		pcicfgregs *cfg = &dinfo->cfg;
2460 
2461 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2462 		    cfg->vendor, cfg->device, cfg->revid);
2463 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2464 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2465 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2466 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2467 		    cfg->mfdev);
2468 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2469 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2470 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2471 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2472 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2473 		if (cfg->intpin > 0)
2474 			printf("\tintpin=%c, irq=%d\n",
2475 			    cfg->intpin +'a' -1, cfg->intline);
2476 		if (cfg->pp.pp_cap) {
2477 			uint16_t status;
2478 
2479 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2480 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2481 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2482 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2483 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2484 			    status & PCIM_PSTAT_DMASK);
2485 		}
2486 		if (cfg->msi.msi_location) {
2487 			int ctrl;
2488 
2489 			ctrl = cfg->msi.msi_ctrl;
2490 			printf("\tMSI supports %d message%s%s%s\n",
2491 			    cfg->msi.msi_msgnum,
2492 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2493 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2494 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2495 		}
2496 		if (cfg->msix.msix_location) {
2497 			printf("\tMSI-X supports %d message%s ",
2498 			    cfg->msix.msix_msgnum,
2499 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2500 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2501 				printf("in map 0x%x\n",
2502 				    cfg->msix.msix_table_bar);
2503 			else
2504 				printf("in maps 0x%x and 0x%x\n",
2505 				    cfg->msix.msix_table_bar,
2506 				    cfg->msix.msix_pba_bar);
2507 		}
2508 	}
2509 }
2510 
2511 static int
2512 pci_porten(device_t dev)
2513 {
2514 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2515 }
2516 
2517 static int
2518 pci_memen(device_t dev)
2519 {
2520 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2521 }
2522 
2523 static void
2524 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2525 {
2526 	struct pci_devinfo *dinfo;
2527 	pci_addr_t map, testval;
2528 	int ln2range;
2529 	uint16_t cmd;
2530 
2531 	/*
2532 	 * The device ROM BAR is special.  It is always a 32-bit
2533 	 * memory BAR.  Bit 0 is special and should not be set when
2534 	 * sizing the BAR.
2535 	 */
2536 	dinfo = device_get_ivars(dev);
2537 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2538 		map = pci_read_config(dev, reg, 4);
2539 		pci_write_config(dev, reg, 0xfffffffe, 4);
2540 		testval = pci_read_config(dev, reg, 4);
2541 		pci_write_config(dev, reg, map, 4);
2542 		*mapp = map;
2543 		*testvalp = testval;
2544 		return;
2545 	}
2546 
2547 	map = pci_read_config(dev, reg, 4);
2548 	ln2range = pci_maprange(map);
2549 	if (ln2range == 64)
2550 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2551 
2552 	/*
2553 	 * Disable decoding via the command register before
2554 	 * determining the BAR's length since we will be placing it in
2555 	 * a weird state.
2556 	 */
2557 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2558 	pci_write_config(dev, PCIR_COMMAND,
2559 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2560 
2561 	/*
2562 	 * Determine the BAR's length by writing all 1's.  The bottom
2563 	 * log_2(size) bits of the BAR will stick as 0 when we read
2564 	 * the value back.
2565 	 */
2566 	pci_write_config(dev, reg, 0xffffffff, 4);
2567 	testval = pci_read_config(dev, reg, 4);
2568 	if (ln2range == 64) {
2569 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2570 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2571 	}
2572 
2573 	/*
2574 	 * Restore the original value of the BAR.  We may have reprogrammed
2575 	 * the BAR of the low-level console device and when booting verbose,
2576 	 * we need the console device addressable.
2577 	 */
2578 	pci_write_config(dev, reg, map, 4);
2579 	if (ln2range == 64)
2580 		pci_write_config(dev, reg + 4, map >> 32, 4);
2581 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2582 
2583 	*mapp = map;
2584 	*testvalp = testval;
2585 }
2586 
2587 static void
2588 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2589 {
2590 	struct pci_devinfo *dinfo;
2591 	int ln2range;
2592 
2593 	/* The device ROM BAR is always a 32-bit memory BAR. */
2594 	dinfo = device_get_ivars(dev);
2595 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2596 		ln2range = 32;
2597 	else
2598 		ln2range = pci_maprange(pm->pm_value);
2599 	pci_write_config(dev, pm->pm_reg, base, 4);
2600 	if (ln2range == 64)
2601 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2602 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2603 	if (ln2range == 64)
2604 		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2605 		    pm->pm_reg + 4, 4) << 32;
2606 }
2607 
2608 struct pci_map *
2609 pci_find_bar(device_t dev, int reg)
2610 {
2611 	struct pci_devinfo *dinfo;
2612 	struct pci_map *pm;
2613 
2614 	dinfo = device_get_ivars(dev);
2615 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2616 		if (pm->pm_reg == reg)
2617 			return (pm);
2618 	}
2619 	return (NULL);
2620 }
2621 
2622 int
2623 pci_bar_enabled(device_t dev, struct pci_map *pm)
2624 {
2625 	struct pci_devinfo *dinfo;
2626 	uint16_t cmd;
2627 
2628 	dinfo = device_get_ivars(dev);
2629 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2630 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2631 		return (0);
2632 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2633 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2634 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2635 	else
2636 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2637 }
2638 
2639 static struct pci_map *
2640 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2641 {
2642 	struct pci_devinfo *dinfo;
2643 	struct pci_map *pm, *prev;
2644 
2645 	dinfo = device_get_ivars(dev);
2646 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2647 	pm->pm_reg = reg;
2648 	pm->pm_value = value;
2649 	pm->pm_size = size;
2650 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2651 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2652 		    reg));
2653 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2654 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2655 			break;
2656 	}
2657 	if (prev != NULL)
2658 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2659 	else
2660 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2661 	return (pm);
2662 }
2663 
2664 static void
2665 pci_restore_bars(device_t dev)
2666 {
2667 	struct pci_devinfo *dinfo;
2668 	struct pci_map *pm;
2669 	int ln2range;
2670 
2671 	dinfo = device_get_ivars(dev);
2672 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2673 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2674 			ln2range = 32;
2675 		else
2676 			ln2range = pci_maprange(pm->pm_value);
2677 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2678 		if (ln2range == 64)
2679 			pci_write_config(dev, pm->pm_reg + 4,
2680 			    pm->pm_value >> 32, 4);
2681 	}
2682 }
2683 
2684 /*
2685  * Add a resource based on a pci map register. Return 1 if the map
2686  * register is a 32bit map register or 2 if it is a 64bit register.
2687  */
2688 static int
2689 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2690     int force, int prefetch)
2691 {
2692 	struct pci_map *pm;
2693 	pci_addr_t base, map, testval;
2694 	pci_addr_t start, end, count;
2695 	int barlen, basezero, maprange, mapsize, type;
2696 	uint16_t cmd;
2697 	struct resource *res;
2698 
2699 	/*
2700 	 * The BAR may already exist if the device is a CardBus card
2701 	 * whose CIS is stored in this BAR.
2702 	 */
2703 	pm = pci_find_bar(dev, reg);
2704 	if (pm != NULL) {
2705 		maprange = pci_maprange(pm->pm_value);
2706 		barlen = maprange == 64 ? 2 : 1;
2707 		return (barlen);
2708 	}
2709 
2710 	pci_read_bar(dev, reg, &map, &testval);
2711 	if (PCI_BAR_MEM(map)) {
2712 		type = SYS_RES_MEMORY;
2713 		if (map & PCIM_BAR_MEM_PREFETCH)
2714 			prefetch = 1;
2715 	} else
2716 		type = SYS_RES_IOPORT;
2717 	mapsize = pci_mapsize(testval);
2718 	base = pci_mapbase(map);
2719 #ifdef __PCI_BAR_ZERO_VALID
2720 	basezero = 0;
2721 #else
2722 	basezero = base == 0;
2723 #endif
2724 	maprange = pci_maprange(map);
2725 	barlen = maprange == 64 ? 2 : 1;
2726 
2727 	/*
2728 	 * For I/O registers, if bottom bit is set, and the next bit up
2729 	 * isn't clear, we know we have a BAR that doesn't conform to the
2730 	 * spec, so ignore it.  Also, sanity check the size of the data
2731 	 * areas to the type of memory involved.  Memory must be at least
2732 	 * 16 bytes in size, while I/O ranges must be at least 4.
2733 	 */
2734 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2735 		return (barlen);
2736 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2737 	    (type == SYS_RES_IOPORT && mapsize < 2))
2738 		return (barlen);
2739 
2740 	/* Save a record of this BAR. */
2741 	pm = pci_add_bar(dev, reg, map, mapsize);
2742 	if (bootverbose) {
2743 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2744 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2745 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2746 			printf(", port disabled\n");
2747 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2748 			printf(", memory disabled\n");
2749 		else
2750 			printf(", enabled\n");
2751 	}
2752 
2753 	/*
2754 	 * If base is 0, then we have problems if this architecture does
2755 	 * not allow that.  It is best to ignore such entries for the
2756 	 * moment.  These will be allocated later if the driver specifically
2757 	 * requests them.  However, some removable busses look better when
2758 	 * all resources are allocated, so allow '0' to be overriden.
2759 	 *
2760 	 * Similarly treat maps whose values is the same as the test value
2761 	 * read back.  These maps have had all f's written to them by the
2762 	 * BIOS in an attempt to disable the resources.
2763 	 */
2764 	if (!force && (basezero || map == testval))
2765 		return (barlen);
2766 	if ((u_long)base != base) {
2767 		device_printf(bus,
2768 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2769 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2770 		    pci_get_function(dev), reg);
2771 		return (barlen);
2772 	}
2773 
2774 	/*
2775 	 * This code theoretically does the right thing, but has
2776 	 * undesirable side effects in some cases where peripherals
2777 	 * respond oddly to having these bits enabled.  Let the user
2778 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2779 	 * default).
2780 	 */
2781 	if (pci_enable_io_modes) {
2782 		/* Turn on resources that have been left off by a lazy BIOS */
2783 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2784 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2785 			cmd |= PCIM_CMD_PORTEN;
2786 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2787 		}
2788 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2789 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2790 			cmd |= PCIM_CMD_MEMEN;
2791 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2792 		}
2793 	} else {
2794 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2795 			return (barlen);
2796 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2797 			return (barlen);
2798 	}
2799 
2800 	count = (pci_addr_t)1 << mapsize;
2801 	if (basezero || base == pci_mapbase(testval)) {
2802 		start = 0;	/* Let the parent decide. */
2803 		end = ~0ul;
2804 	} else {
2805 		start = base;
2806 		end = base + count - 1;
2807 	}
2808 	resource_list_add(rl, type, reg, start, end, count);
2809 
2810 	/*
2811 	 * Try to allocate the resource for this BAR from our parent
2812 	 * so that this resource range is already reserved.  The
2813 	 * driver for this device will later inherit this resource in
2814 	 * pci_alloc_resource().
2815 	 */
2816 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2817 	    prefetch ? RF_PREFETCHABLE : 0);
2818 	if (res == NULL) {
2819 		/*
2820 		 * If the allocation fails, delete the resource list entry
2821 		 * to force pci_alloc_resource() to allocate resources
2822 		 * from the parent.
2823 		 */
2824 		resource_list_delete(rl, type, reg);
2825 	} else {
2826 		start = rman_get_start(res);
2827 		pci_write_bar(dev, pm, start);
2828 	}
2829 	return (barlen);
2830 }
2831 
2832 /*
2833  * For ATA devices we need to decide early what addressing mode to use.
2834  * Legacy demands that the primary and secondary ATA ports sits on the
2835  * same addresses that old ISA hardware did. This dictates that we use
2836  * those addresses and ignore the BAR's if we cannot set PCI native
2837  * addressing mode.
2838  */
2839 static void
2840 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2841     uint32_t prefetchmask)
2842 {
2843 	struct resource *r;
2844 	int rid, type, progif;
2845 #if 0
2846 	/* if this device supports PCI native addressing use it */
2847 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2848 	if ((progif & 0x8a) == 0x8a) {
2849 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2850 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2851 			printf("Trying ATA native PCI addressing mode\n");
2852 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2853 		}
2854 	}
2855 #endif
2856 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2857 	type = SYS_RES_IOPORT;
2858 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2859 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2860 		    prefetchmask & (1 << 0));
2861 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2862 		    prefetchmask & (1 << 1));
2863 	} else {
2864 		rid = PCIR_BAR(0);
2865 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2866 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2867 		    0x1f7, 8, 0);
2868 		rid = PCIR_BAR(1);
2869 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2870 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2871 		    0x3f6, 1, 0);
2872 	}
2873 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2874 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2875 		    prefetchmask & (1 << 2));
2876 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2877 		    prefetchmask & (1 << 3));
2878 	} else {
2879 		rid = PCIR_BAR(2);
2880 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2881 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2882 		    0x177, 8, 0);
2883 		rid = PCIR_BAR(3);
2884 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2885 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2886 		    0x376, 1, 0);
2887 	}
2888 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2889 	    prefetchmask & (1 << 4));
2890 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2891 	    prefetchmask & (1 << 5));
2892 }
2893 
2894 static void
2895 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2896 {
2897 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2898 	pcicfgregs *cfg = &dinfo->cfg;
2899 	char tunable_name[64];
2900 	int irq;
2901 
2902 	/* Has to have an intpin to have an interrupt. */
2903 	if (cfg->intpin == 0)
2904 		return;
2905 
2906 	/* Let the user override the IRQ with a tunable. */
2907 	irq = PCI_INVALID_IRQ;
2908 	snprintf(tunable_name, sizeof(tunable_name),
2909 	    "hw.pci%d.%d.%d.INT%c.irq",
2910 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2911 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2912 		irq = PCI_INVALID_IRQ;
2913 
2914 	/*
2915 	 * If we didn't get an IRQ via the tunable, then we either use the
2916 	 * IRQ value in the intline register or we ask the bus to route an
2917 	 * interrupt for us.  If force_route is true, then we only use the
2918 	 * value in the intline register if the bus was unable to assign an
2919 	 * IRQ.
2920 	 */
2921 	if (!PCI_INTERRUPT_VALID(irq)) {
2922 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2923 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2924 		if (!PCI_INTERRUPT_VALID(irq))
2925 			irq = cfg->intline;
2926 	}
2927 
2928 	/* If after all that we don't have an IRQ, just bail. */
2929 	if (!PCI_INTERRUPT_VALID(irq))
2930 		return;
2931 
2932 	/* Update the config register if it changed. */
2933 	if (irq != cfg->intline) {
2934 		cfg->intline = irq;
2935 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2936 	}
2937 
2938 	/* Add this IRQ as rid 0 interrupt resource. */
2939 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2940 }
2941 
2942 /* Perform early OHCI takeover from SMM. */
2943 static void
2944 ohci_early_takeover(device_t self)
2945 {
2946 	struct resource *res;
2947 	uint32_t ctl;
2948 	int rid;
2949 	int i;
2950 
2951 	rid = PCIR_BAR(0);
2952 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2953 	if (res == NULL)
2954 		return;
2955 
2956 	ctl = bus_read_4(res, OHCI_CONTROL);
2957 	if (ctl & OHCI_IR) {
2958 		if (bootverbose)
2959 			printf("ohci early: "
2960 			    "SMM active, request owner change\n");
2961 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2962 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2963 			DELAY(1000);
2964 			ctl = bus_read_4(res, OHCI_CONTROL);
2965 		}
2966 		if (ctl & OHCI_IR) {
2967 			if (bootverbose)
2968 				printf("ohci early: "
2969 				    "SMM does not respond, resetting\n");
2970 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2971 		}
2972 		/* Disable interrupts */
2973 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2974 	}
2975 
2976 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2977 }
2978 
2979 /* Perform early UHCI takeover from SMM. */
2980 static void
2981 uhci_early_takeover(device_t self)
2982 {
2983 	struct resource *res;
2984 	int rid;
2985 
2986 	/*
2987 	 * Set the PIRQD enable bit and switch off all the others. We don't
2988 	 * want legacy support to interfere with us XXX Does this also mean
2989 	 * that the BIOS won't touch the keyboard anymore if it is connected
2990 	 * to the ports of the root hub?
2991 	 */
2992 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2993 
2994 	/* Disable interrupts */
2995 	rid = PCI_UHCI_BASE_REG;
2996 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2997 	if (res != NULL) {
2998 		bus_write_2(res, UHCI_INTR, 0);
2999 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3000 	}
3001 }
3002 
3003 /* Perform early EHCI takeover from SMM. */
3004 static void
3005 ehci_early_takeover(device_t self)
3006 {
3007 	struct resource *res;
3008 	uint32_t cparams;
3009 	uint32_t eec;
3010 	uint8_t eecp;
3011 	uint8_t bios_sem;
3012 	uint8_t offs;
3013 	int rid;
3014 	int i;
3015 
3016 	rid = PCIR_BAR(0);
3017 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3018 	if (res == NULL)
3019 		return;
3020 
3021 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3022 
3023 	/* Synchronise with the BIOS if it owns the controller. */
3024 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3025 	    eecp = EHCI_EECP_NEXT(eec)) {
3026 		eec = pci_read_config(self, eecp, 4);
3027 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3028 			continue;
3029 		}
3030 		bios_sem = pci_read_config(self, eecp +
3031 		    EHCI_LEGSUP_BIOS_SEM, 1);
3032 		if (bios_sem == 0) {
3033 			continue;
3034 		}
3035 		if (bootverbose)
3036 			printf("ehci early: "
3037 			    "SMM active, request owner change\n");
3038 
3039 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3040 
3041 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3042 			DELAY(1000);
3043 			bios_sem = pci_read_config(self, eecp +
3044 			    EHCI_LEGSUP_BIOS_SEM, 1);
3045 		}
3046 
3047 		if (bios_sem != 0) {
3048 			if (bootverbose)
3049 				printf("ehci early: "
3050 				    "SMM does not respond\n");
3051 		}
3052 		/* Disable interrupts */
3053 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3054 		bus_write_4(res, offs + EHCI_USBINTR, 0);
3055 	}
3056 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3057 }
3058 
3059 /* Perform early XHCI takeover from SMM. */
3060 static void
3061 xhci_early_takeover(device_t self)
3062 {
3063 	struct resource *res;
3064 	uint32_t cparams;
3065 	uint32_t eec;
3066 	uint8_t eecp;
3067 	uint8_t bios_sem;
3068 	uint8_t offs;
3069 	int rid;
3070 	int i;
3071 
3072 	rid = PCIR_BAR(0);
3073 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3074 	if (res == NULL)
3075 		return;
3076 
3077 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3078 
3079 	eec = -1;
3080 
3081 	/* Synchronise with the BIOS if it owns the controller. */
3082 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3083 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3084 		eec = bus_read_4(res, eecp);
3085 
3086 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3087 			continue;
3088 
3089 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3090 		if (bios_sem == 0)
3091 			continue;
3092 
3093 		if (bootverbose)
3094 			printf("xhci early: "
3095 			    "SMM active, request owner change\n");
3096 
3097 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3098 
3099 		/* wait a maximum of 5 second */
3100 
3101 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3102 			DELAY(1000);
3103 			bios_sem = bus_read_1(res, eecp +
3104 			    XHCI_XECP_BIOS_SEM);
3105 		}
3106 
3107 		if (bios_sem != 0) {
3108 			if (bootverbose)
3109 				printf("xhci early: "
3110 				    "SMM does not respond\n");
3111 		}
3112 
3113 		/* Disable interrupts */
3114 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3115 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3116 		bus_read_4(res, offs + XHCI_USBSTS);
3117 	}
3118 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3119 }
3120 
3121 void
3122 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3123 {
3124 	struct pci_devinfo *dinfo;
3125 	pcicfgregs *cfg;
3126 	struct resource_list *rl;
3127 	const struct pci_quirk *q;
3128 	uint32_t devid;
3129 	int i;
3130 
3131 	dinfo = device_get_ivars(dev);
3132 	cfg = &dinfo->cfg;
3133 	rl = &dinfo->resources;
3134 	devid = (cfg->device << 16) | cfg->vendor;
3135 
3136 	/* ATA devices needs special map treatment */
3137 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3138 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3139 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3140 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3141 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3142 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3143 	else
3144 		for (i = 0; i < cfg->nummaps;) {
3145 			/*
3146 			 * Skip quirked resources.
3147 			 */
3148 			for (q = &pci_quirks[0]; q->devid != 0; q++)
3149 				if (q->devid == devid &&
3150 				    q->type == PCI_QUIRK_UNMAP_REG &&
3151 				    q->arg1 == PCIR_BAR(i))
3152 					break;
3153 			if (q->devid != 0) {
3154 				i++;
3155 				continue;
3156 			}
3157 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3158 			    prefetchmask & (1 << i));
3159 		}
3160 
3161 	/*
3162 	 * Add additional, quirked resources.
3163 	 */
3164 	for (q = &pci_quirks[0]; q->devid != 0; q++)
3165 		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3166 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3167 
3168 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3169 #ifdef __PCI_REROUTE_INTERRUPT
3170 		/*
3171 		 * Try to re-route interrupts. Sometimes the BIOS or
3172 		 * firmware may leave bogus values in these registers.
3173 		 * If the re-route fails, then just stick with what we
3174 		 * have.
3175 		 */
3176 		pci_assign_interrupt(bus, dev, 1);
3177 #else
3178 		pci_assign_interrupt(bus, dev, 0);
3179 #endif
3180 	}
3181 
3182 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3183 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3184 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3185 			xhci_early_takeover(dev);
3186 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3187 			ehci_early_takeover(dev);
3188 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3189 			ohci_early_takeover(dev);
3190 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3191 			uhci_early_takeover(dev);
3192 	}
3193 }
3194 
3195 void
3196 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3197 {
3198 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3199 	device_t pcib = device_get_parent(dev);
3200 	struct pci_devinfo *dinfo;
3201 	int maxslots;
3202 	int s, f, pcifunchigh;
3203 	uint8_t hdrtype;
3204 
3205 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3206 	    ("dinfo_size too small"));
3207 	maxslots = PCIB_MAXSLOTS(pcib);
3208 	for (s = 0; s <= maxslots; s++) {
3209 		pcifunchigh = 0;
3210 		f = 0;
3211 		DELAY(1);
3212 		hdrtype = REG(PCIR_HDRTYPE, 1);
3213 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3214 			continue;
3215 		if (hdrtype & PCIM_MFDEV)
3216 			pcifunchigh = PCI_FUNCMAX;
3217 		for (f = 0; f <= pcifunchigh; f++) {
3218 			dinfo = pci_read_device(pcib, domain, busno, s, f,
3219 			    dinfo_size);
3220 			if (dinfo != NULL) {
3221 				pci_add_child(dev, dinfo);
3222 			}
3223 		}
3224 	}
3225 #undef REG
3226 }
3227 
3228 void
3229 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3230 {
3231 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3232 	device_set_ivars(dinfo->cfg.dev, dinfo);
3233 	resource_list_init(&dinfo->resources);
3234 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3235 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3236 	pci_print_verbose(dinfo);
3237 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3238 }
3239 
3240 static int
3241 pci_probe(device_t dev)
3242 {
3243 
3244 	device_set_desc(dev, "PCI bus");
3245 
3246 	/* Allow other subclasses to override this driver. */
3247 	return (BUS_PROBE_GENERIC);
3248 }
3249 
3250 int
3251 pci_attach_common(device_t dev)
3252 {
3253 	struct pci_softc *sc;
3254 	int busno, domain;
3255 #ifdef PCI_DMA_BOUNDARY
3256 	int error, tag_valid;
3257 #endif
3258 
3259 	sc = device_get_softc(dev);
3260 	domain = pcib_get_domain(dev);
3261 	busno = pcib_get_bus(dev);
3262 	if (bootverbose)
3263 		device_printf(dev, "domain=%d, physical bus=%d\n",
3264 		    domain, busno);
3265 #ifdef PCI_DMA_BOUNDARY
3266 	tag_valid = 0;
3267 	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
3268 	    devclass_find("pci")) {
3269 		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
3270 		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3271 		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
3272 		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
3273 		if (error)
3274 			device_printf(dev, "Failed to create DMA tag: %d\n",
3275 			    error);
3276 		else
3277 			tag_valid = 1;
3278 	}
3279 	if (!tag_valid)
3280 #endif
3281 		sc->sc_dma_tag = bus_get_dma_tag(dev);
3282 	return (0);
3283 }
3284 
3285 static int
3286 pci_attach(device_t dev)
3287 {
3288 	int busno, domain, error;
3289 
3290 	error = pci_attach_common(dev);
3291 	if (error)
3292 		return (error);
3293 
3294 	/*
3295 	 * Since there can be multiple independantly numbered PCI
3296 	 * busses on systems with multiple PCI domains, we can't use
3297 	 * the unit number to decide which bus we are probing. We ask
3298 	 * the parent pcib what our domain and bus numbers are.
3299 	 */
3300 	domain = pcib_get_domain(dev);
3301 	busno = pcib_get_bus(dev);
3302 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3303 	return (bus_generic_attach(dev));
3304 }
3305 
3306 static void
3307 pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3308     int state)
3309 {
3310 	device_t child, pcib;
3311 	struct pci_devinfo *dinfo;
3312 	int dstate, i;
3313 
3314 	/*
3315 	 * Set the device to the given state.  If the firmware suggests
3316 	 * a different power state, use it instead.  If power management
3317 	 * is not present, the firmware is responsible for managing
3318 	 * device power.  Skip children who aren't attached since they
3319 	 * are handled separately.
3320 	 */
3321 	pcib = device_get_parent(dev);
3322 	for (i = 0; i < numdevs; i++) {
3323 		child = devlist[i];
3324 		dinfo = device_get_ivars(child);
3325 		dstate = state;
3326 		if (device_is_attached(child) &&
3327 		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3328 			pci_set_powerstate(child, dstate);
3329 	}
3330 }
3331 
3332 int
3333 pci_suspend(device_t dev)
3334 {
3335 	device_t child, *devlist;
3336 	struct pci_devinfo *dinfo;
3337 	int error, i, numdevs;
3338 
3339 	/*
3340 	 * Save the PCI configuration space for each child and set the
3341 	 * device in the appropriate power state for this sleep state.
3342 	 */
3343 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3344 		return (error);
3345 	for (i = 0; i < numdevs; i++) {
3346 		child = devlist[i];
3347 		dinfo = device_get_ivars(child);
3348 		pci_cfg_save(child, dinfo, 0);
3349 	}
3350 
3351 	/* Suspend devices before potentially powering them down. */
3352 	error = bus_generic_suspend(dev);
3353 	if (error) {
3354 		free(devlist, M_TEMP);
3355 		return (error);
3356 	}
3357 	if (pci_do_power_suspend)
3358 		pci_set_power_children(dev, devlist, numdevs,
3359 		    PCI_POWERSTATE_D3);
3360 	free(devlist, M_TEMP);
3361 	return (0);
3362 }
3363 
3364 int
3365 pci_resume(device_t dev)
3366 {
3367 	device_t child, *devlist;
3368 	struct pci_devinfo *dinfo;
3369 	int error, i, numdevs;
3370 
3371 	/*
3372 	 * Set each child to D0 and restore its PCI configuration space.
3373 	 */
3374 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3375 		return (error);
3376 	if (pci_do_power_resume)
3377 		pci_set_power_children(dev, devlist, numdevs,
3378 		    PCI_POWERSTATE_D0);
3379 
3380 	/* Now the device is powered up, restore its config space. */
3381 	for (i = 0; i < numdevs; i++) {
3382 		child = devlist[i];
3383 		dinfo = device_get_ivars(child);
3384 
3385 		pci_cfg_restore(child, dinfo);
3386 		if (!device_is_attached(child))
3387 			pci_cfg_save(child, dinfo, 1);
3388 	}
3389 
3390 	/*
3391 	 * Resume critical devices first, then everything else later.
3392 	 */
3393 	for (i = 0; i < numdevs; i++) {
3394 		child = devlist[i];
3395 		switch (pci_get_class(child)) {
3396 		case PCIC_DISPLAY:
3397 		case PCIC_MEMORY:
3398 		case PCIC_BRIDGE:
3399 		case PCIC_BASEPERIPH:
3400 			DEVICE_RESUME(child);
3401 			break;
3402 		}
3403 	}
3404 	for (i = 0; i < numdevs; i++) {
3405 		child = devlist[i];
3406 		switch (pci_get_class(child)) {
3407 		case PCIC_DISPLAY:
3408 		case PCIC_MEMORY:
3409 		case PCIC_BRIDGE:
3410 		case PCIC_BASEPERIPH:
3411 			break;
3412 		default:
3413 			DEVICE_RESUME(child);
3414 		}
3415 	}
3416 	free(devlist, M_TEMP);
3417 	return (0);
3418 }
3419 
3420 static void
3421 pci_load_vendor_data(void)
3422 {
3423 	caddr_t data;
3424 	void *ptr;
3425 	size_t sz;
3426 
3427 	data = preload_search_by_type("pci_vendor_data");
3428 	if (data != NULL) {
3429 		ptr = preload_fetch_addr(data);
3430 		sz = preload_fetch_size(data);
3431 		if (ptr != NULL && sz != 0) {
3432 			pci_vendordata = ptr;
3433 			pci_vendordata_size = sz;
3434 			/* terminate the database */
3435 			pci_vendordata[pci_vendordata_size] = '\n';
3436 		}
3437 	}
3438 }
3439 
3440 void
3441 pci_driver_added(device_t dev, driver_t *driver)
3442 {
3443 	int numdevs;
3444 	device_t *devlist;
3445 	device_t child;
3446 	struct pci_devinfo *dinfo;
3447 	int i;
3448 
3449 	if (bootverbose)
3450 		device_printf(dev, "driver added\n");
3451 	DEVICE_IDENTIFY(driver, dev);
3452 	if (device_get_children(dev, &devlist, &numdevs) != 0)
3453 		return;
3454 	for (i = 0; i < numdevs; i++) {
3455 		child = devlist[i];
3456 		if (device_get_state(child) != DS_NOTPRESENT)
3457 			continue;
3458 		dinfo = device_get_ivars(child);
3459 		pci_print_verbose(dinfo);
3460 		if (bootverbose)
3461 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3462 		pci_cfg_restore(child, dinfo);
3463 		if (device_probe_and_attach(child) != 0)
3464 			pci_cfg_save(child, dinfo, 1);
3465 	}
3466 	free(devlist, M_TEMP);
3467 }
3468 
3469 int
3470 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3471     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3472 {
3473 	struct pci_devinfo *dinfo;
3474 	struct msix_table_entry *mte;
3475 	struct msix_vector *mv;
3476 	uint64_t addr;
3477 	uint32_t data;
3478 	void *cookie;
3479 	int error, rid;
3480 
3481 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3482 	    arg, &cookie);
3483 	if (error)
3484 		return (error);
3485 
3486 	/* If this is not a direct child, just bail out. */
3487 	if (device_get_parent(child) != dev) {
3488 		*cookiep = cookie;
3489 		return(0);
3490 	}
3491 
3492 	rid = rman_get_rid(irq);
3493 	if (rid == 0) {
3494 		/* Make sure that INTx is enabled */
3495 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3496 	} else {
3497 		/*
3498 		 * Check to see if the interrupt is MSI or MSI-X.
3499 		 * Ask our parent to map the MSI and give
3500 		 * us the address and data register values.
3501 		 * If we fail for some reason, teardown the
3502 		 * interrupt handler.
3503 		 */
3504 		dinfo = device_get_ivars(child);
3505 		if (dinfo->cfg.msi.msi_alloc > 0) {
3506 			if (dinfo->cfg.msi.msi_addr == 0) {
3507 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3508 			    ("MSI has handlers, but vectors not mapped"));
3509 				error = PCIB_MAP_MSI(device_get_parent(dev),
3510 				    child, rman_get_start(irq), &addr, &data);
3511 				if (error)
3512 					goto bad;
3513 				dinfo->cfg.msi.msi_addr = addr;
3514 				dinfo->cfg.msi.msi_data = data;
3515 			}
3516 			if (dinfo->cfg.msi.msi_handlers == 0)
3517 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3518 				    dinfo->cfg.msi.msi_data);
3519 			dinfo->cfg.msi.msi_handlers++;
3520 		} else {
3521 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3522 			    ("No MSI or MSI-X interrupts allocated"));
3523 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3524 			    ("MSI-X index too high"));
3525 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3526 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3527 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3528 			KASSERT(mv->mv_irq == rman_get_start(irq),
3529 			    ("IRQ mismatch"));
3530 			if (mv->mv_address == 0) {
3531 				KASSERT(mte->mte_handlers == 0,
3532 		    ("MSI-X table entry has handlers, but vector not mapped"));
3533 				error = PCIB_MAP_MSI(device_get_parent(dev),
3534 				    child, rman_get_start(irq), &addr, &data);
3535 				if (error)
3536 					goto bad;
3537 				mv->mv_address = addr;
3538 				mv->mv_data = data;
3539 			}
3540 			if (mte->mte_handlers == 0) {
3541 				pci_enable_msix(child, rid - 1, mv->mv_address,
3542 				    mv->mv_data);
3543 				pci_unmask_msix(child, rid - 1);
3544 			}
3545 			mte->mte_handlers++;
3546 		}
3547 
3548 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3549 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3550 	bad:
3551 		if (error) {
3552 			(void)bus_generic_teardown_intr(dev, child, irq,
3553 			    cookie);
3554 			return (error);
3555 		}
3556 	}
3557 	*cookiep = cookie;
3558 	return (0);
3559 }
3560 
3561 int
3562 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3563     void *cookie)
3564 {
3565 	struct msix_table_entry *mte;
3566 	struct resource_list_entry *rle;
3567 	struct pci_devinfo *dinfo;
3568 	int error, rid;
3569 
3570 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3571 		return (EINVAL);
3572 
3573 	/* If this isn't a direct child, just bail out */
3574 	if (device_get_parent(child) != dev)
3575 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3576 
3577 	rid = rman_get_rid(irq);
3578 	if (rid == 0) {
3579 		/* Mask INTx */
3580 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3581 	} else {
3582 		/*
3583 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3584 		 * decrement the appropriate handlers count and mask the
3585 		 * MSI-X message, or disable MSI messages if the count
3586 		 * drops to 0.
3587 		 */
3588 		dinfo = device_get_ivars(child);
3589 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3590 		if (rle->res != irq)
3591 			return (EINVAL);
3592 		if (dinfo->cfg.msi.msi_alloc > 0) {
3593 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3594 			    ("MSI-X index too high"));
3595 			if (dinfo->cfg.msi.msi_handlers == 0)
3596 				return (EINVAL);
3597 			dinfo->cfg.msi.msi_handlers--;
3598 			if (dinfo->cfg.msi.msi_handlers == 0)
3599 				pci_disable_msi(child);
3600 		} else {
3601 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3602 			    ("No MSI or MSI-X interrupts allocated"));
3603 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3604 			    ("MSI-X index too high"));
3605 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3606 			if (mte->mte_handlers == 0)
3607 				return (EINVAL);
3608 			mte->mte_handlers--;
3609 			if (mte->mte_handlers == 0)
3610 				pci_mask_msix(child, rid - 1);
3611 		}
3612 	}
3613 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3614 	if (rid > 0)
3615 		KASSERT(error == 0,
3616 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3617 	return (error);
3618 }
3619 
3620 int
3621 pci_print_child(device_t dev, device_t child)
3622 {
3623 	struct pci_devinfo *dinfo;
3624 	struct resource_list *rl;
3625 	int retval = 0;
3626 
3627 	dinfo = device_get_ivars(child);
3628 	rl = &dinfo->resources;
3629 
3630 	retval += bus_print_child_header(dev, child);
3631 
3632 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3633 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3634 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3635 	if (device_get_flags(dev))
3636 		retval += printf(" flags %#x", device_get_flags(dev));
3637 
3638 	retval += printf(" at device %d.%d", pci_get_slot(child),
3639 	    pci_get_function(child));
3640 
3641 	retval += bus_print_child_footer(dev, child);
3642 
3643 	return (retval);
3644 }
3645 
3646 static struct
3647 {
3648 	int	class;
3649 	int	subclass;
3650 	char	*desc;
3651 } pci_nomatch_tab[] = {
3652 	{PCIC_OLD,		-1,			"old"},
3653 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3654 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3655 	{PCIC_STORAGE,		-1,			"mass storage"},
3656 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3657 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3658 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3659 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3660 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3661 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3662 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3663 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3664 	{PCIC_NETWORK,		-1,			"network"},
3665 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3666 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3667 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3668 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3669 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3670 	{PCIC_DISPLAY,		-1,			"display"},
3671 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3672 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3673 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3674 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3675 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3676 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3677 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3678 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3679 	{PCIC_MEMORY,		-1,			"memory"},
3680 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3681 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3682 	{PCIC_BRIDGE,		-1,			"bridge"},
3683 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3684 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3685 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3686 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3687 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3688 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3689 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3690 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3691 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3692 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3693 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3694 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3695 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3696 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3697 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3698 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3699 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3700 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3701 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3702 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3703 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3704 	{PCIC_INPUTDEV,		-1,			"input device"},
3705 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3706 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3707 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3708 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3709 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3710 	{PCIC_DOCKING,		-1,			"docking station"},
3711 	{PCIC_PROCESSOR,	-1,			"processor"},
3712 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3713 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3714 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3715 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3716 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3717 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3718 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3719 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3720 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3721 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3722 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3723 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3724 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3725 	{PCIC_SATCOM,		-1,			"satellite communication"},
3726 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3727 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3728 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3729 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3730 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3731 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3732 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3733 	{PCIC_DASP,		-1,			"dasp"},
3734 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3735 	{0, 0,		NULL}
3736 };
3737 
3738 void
3739 pci_probe_nomatch(device_t dev, device_t child)
3740 {
3741 	int	i;
3742 	char	*cp, *scp, *device;
3743 
3744 	/*
3745 	 * Look for a listing for this device in a loaded device database.
3746 	 */
3747 	if ((device = pci_describe_device(child)) != NULL) {
3748 		device_printf(dev, "<%s>", device);
3749 		free(device, M_DEVBUF);
3750 	} else {
3751 		/*
3752 		 * Scan the class/subclass descriptions for a general
3753 		 * description.
3754 		 */
3755 		cp = "unknown";
3756 		scp = NULL;
3757 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3758 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3759 				if (pci_nomatch_tab[i].subclass == -1) {
3760 					cp = pci_nomatch_tab[i].desc;
3761 				} else if (pci_nomatch_tab[i].subclass ==
3762 				    pci_get_subclass(child)) {
3763 					scp = pci_nomatch_tab[i].desc;
3764 				}
3765 			}
3766 		}
3767 		device_printf(dev, "<%s%s%s>",
3768 		    cp ? cp : "",
3769 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3770 		    scp ? scp : "");
3771 	}
3772 	printf(" at device %d.%d (no driver attached)\n",
3773 	    pci_get_slot(child), pci_get_function(child));
3774 	pci_cfg_save(child, device_get_ivars(child), 1);
3775 	return;
3776 }
3777 
3778 /*
3779  * Parse the PCI device database, if loaded, and return a pointer to a
3780  * description of the device.
3781  *
3782  * The database is flat text formatted as follows:
3783  *
3784  * Any line not in a valid format is ignored.
3785  * Lines are terminated with newline '\n' characters.
3786  *
3787  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3788  * the vendor name.
3789  *
3790  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3791  * - devices cannot be listed without a corresponding VENDOR line.
3792  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3793  * another TAB, then the device name.
3794  */
3795 
3796 /*
3797  * Assuming (ptr) points to the beginning of a line in the database,
3798  * return the vendor or device and description of the next entry.
3799  * The value of (vendor) or (device) inappropriate for the entry type
3800  * is set to -1.  Returns nonzero at the end of the database.
3801  *
3802  * Note that this is slightly unrobust in the face of corrupt data;
3803  * we attempt to safeguard against this by spamming the end of the
3804  * database with a newline when we initialise.
3805  */
3806 static int
3807 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3808 {
3809 	char	*cp = *ptr;
3810 	int	left;
3811 
3812 	*device = -1;
3813 	*vendor = -1;
3814 	**desc = '\0';
3815 	for (;;) {
3816 		left = pci_vendordata_size - (cp - pci_vendordata);
3817 		if (left <= 0) {
3818 			*ptr = cp;
3819 			return(1);
3820 		}
3821 
3822 		/* vendor entry? */
3823 		if (*cp != '\t' &&
3824 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3825 			break;
3826 		/* device entry? */
3827 		if (*cp == '\t' &&
3828 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3829 			break;
3830 
3831 		/* skip to next line */
3832 		while (*cp != '\n' && left > 0) {
3833 			cp++;
3834 			left--;
3835 		}
3836 		if (*cp == '\n') {
3837 			cp++;
3838 			left--;
3839 		}
3840 	}
3841 	/* skip to next line */
3842 	while (*cp != '\n' && left > 0) {
3843 		cp++;
3844 		left--;
3845 	}
3846 	if (*cp == '\n' && left > 0)
3847 		cp++;
3848 	*ptr = cp;
3849 	return(0);
3850 }
3851 
3852 static char *
3853 pci_describe_device(device_t dev)
3854 {
3855 	int	vendor, device;
3856 	char	*desc, *vp, *dp, *line;
3857 
3858 	desc = vp = dp = NULL;
3859 
3860 	/*
3861 	 * If we have no vendor data, we can't do anything.
3862 	 */
3863 	if (pci_vendordata == NULL)
3864 		goto out;
3865 
3866 	/*
3867 	 * Scan the vendor data looking for this device
3868 	 */
3869 	line = pci_vendordata;
3870 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3871 		goto out;
3872 	for (;;) {
3873 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3874 			goto out;
3875 		if (vendor == pci_get_vendor(dev))
3876 			break;
3877 	}
3878 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3879 		goto out;
3880 	for (;;) {
3881 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3882 			*dp = 0;
3883 			break;
3884 		}
3885 		if (vendor != -1) {
3886 			*dp = 0;
3887 			break;
3888 		}
3889 		if (device == pci_get_device(dev))
3890 			break;
3891 	}
3892 	if (dp[0] == '\0')
3893 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3894 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3895 	    NULL)
3896 		sprintf(desc, "%s, %s", vp, dp);
3897 out:
3898 	if (vp != NULL)
3899 		free(vp, M_DEVBUF);
3900 	if (dp != NULL)
3901 		free(dp, M_DEVBUF);
3902 	return(desc);
3903 }
3904 
3905 int
3906 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3907 {
3908 	struct pci_devinfo *dinfo;
3909 	pcicfgregs *cfg;
3910 
3911 	dinfo = device_get_ivars(child);
3912 	cfg = &dinfo->cfg;
3913 
3914 	switch (which) {
3915 	case PCI_IVAR_ETHADDR:
3916 		/*
3917 		 * The generic accessor doesn't deal with failure, so
3918 		 * we set the return value, then return an error.
3919 		 */
3920 		*((uint8_t **) result) = NULL;
3921 		return (EINVAL);
3922 	case PCI_IVAR_SUBVENDOR:
3923 		*result = cfg->subvendor;
3924 		break;
3925 	case PCI_IVAR_SUBDEVICE:
3926 		*result = cfg->subdevice;
3927 		break;
3928 	case PCI_IVAR_VENDOR:
3929 		*result = cfg->vendor;
3930 		break;
3931 	case PCI_IVAR_DEVICE:
3932 		*result = cfg->device;
3933 		break;
3934 	case PCI_IVAR_DEVID:
3935 		*result = (cfg->device << 16) | cfg->vendor;
3936 		break;
3937 	case PCI_IVAR_CLASS:
3938 		*result = cfg->baseclass;
3939 		break;
3940 	case PCI_IVAR_SUBCLASS:
3941 		*result = cfg->subclass;
3942 		break;
3943 	case PCI_IVAR_PROGIF:
3944 		*result = cfg->progif;
3945 		break;
3946 	case PCI_IVAR_REVID:
3947 		*result = cfg->revid;
3948 		break;
3949 	case PCI_IVAR_INTPIN:
3950 		*result = cfg->intpin;
3951 		break;
3952 	case PCI_IVAR_IRQ:
3953 		*result = cfg->intline;
3954 		break;
3955 	case PCI_IVAR_DOMAIN:
3956 		*result = cfg->domain;
3957 		break;
3958 	case PCI_IVAR_BUS:
3959 		*result = cfg->bus;
3960 		break;
3961 	case PCI_IVAR_SLOT:
3962 		*result = cfg->slot;
3963 		break;
3964 	case PCI_IVAR_FUNCTION:
3965 		*result = cfg->func;
3966 		break;
3967 	case PCI_IVAR_CMDREG:
3968 		*result = cfg->cmdreg;
3969 		break;
3970 	case PCI_IVAR_CACHELNSZ:
3971 		*result = cfg->cachelnsz;
3972 		break;
3973 	case PCI_IVAR_MINGNT:
3974 		*result = cfg->mingnt;
3975 		break;
3976 	case PCI_IVAR_MAXLAT:
3977 		*result = cfg->maxlat;
3978 		break;
3979 	case PCI_IVAR_LATTIMER:
3980 		*result = cfg->lattimer;
3981 		break;
3982 	default:
3983 		return (ENOENT);
3984 	}
3985 	return (0);
3986 }
3987 
3988 int
3989 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3990 {
3991 	struct pci_devinfo *dinfo;
3992 
3993 	dinfo = device_get_ivars(child);
3994 
3995 	switch (which) {
3996 	case PCI_IVAR_INTPIN:
3997 		dinfo->cfg.intpin = value;
3998 		return (0);
3999 	case PCI_IVAR_ETHADDR:
4000 	case PCI_IVAR_SUBVENDOR:
4001 	case PCI_IVAR_SUBDEVICE:
4002 	case PCI_IVAR_VENDOR:
4003 	case PCI_IVAR_DEVICE:
4004 	case PCI_IVAR_DEVID:
4005 	case PCI_IVAR_CLASS:
4006 	case PCI_IVAR_SUBCLASS:
4007 	case PCI_IVAR_PROGIF:
4008 	case PCI_IVAR_REVID:
4009 	case PCI_IVAR_IRQ:
4010 	case PCI_IVAR_DOMAIN:
4011 	case PCI_IVAR_BUS:
4012 	case PCI_IVAR_SLOT:
4013 	case PCI_IVAR_FUNCTION:
4014 		return (EINVAL);	/* disallow for now */
4015 
4016 	default:
4017 		return (ENOENT);
4018 	}
4019 }
4020 
4021 #include "opt_ddb.h"
4022 #ifdef DDB
4023 #include <ddb/ddb.h>
4024 #include <sys/cons.h>
4025 
4026 /*
4027  * List resources based on pci map registers, used for within ddb
4028  */
4029 
4030 DB_SHOW_COMMAND(pciregs, db_pci_dump)
4031 {
4032 	struct pci_devinfo *dinfo;
4033 	struct devlist *devlist_head;
4034 	struct pci_conf *p;
4035 	const char *name;
4036 	int i, error, none_count;
4037 
4038 	none_count = 0;
4039 	/* get the head of the device queue */
4040 	devlist_head = &pci_devq;
4041 
4042 	/*
4043 	 * Go through the list of devices and print out devices
4044 	 */
4045 	for (error = 0, i = 0,
4046 	     dinfo = STAILQ_FIRST(devlist_head);
4047 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4048 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4049 
4050 		/* Populate pd_name and pd_unit */
4051 		name = NULL;
4052 		if (dinfo->cfg.dev)
4053 			name = device_get_name(dinfo->cfg.dev);
4054 
4055 		p = &dinfo->conf;
4056 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4057 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4058 			(name && *name) ? name : "none",
4059 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4060 			none_count++,
4061 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4062 			p->pc_sel.pc_func, (p->pc_class << 16) |
4063 			(p->pc_subclass << 8) | p->pc_progif,
4064 			(p->pc_subdevice << 16) | p->pc_subvendor,
4065 			(p->pc_device << 16) | p->pc_vendor,
4066 			p->pc_revid, p->pc_hdr);
4067 	}
4068 }
4069 #endif /* DDB */
4070 
4071 static struct resource *
4072 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4073     u_long start, u_long end, u_long count, u_int flags)
4074 {
4075 	struct pci_devinfo *dinfo = device_get_ivars(child);
4076 	struct resource_list *rl = &dinfo->resources;
4077 	struct resource_list_entry *rle;
4078 	struct resource *res;
4079 	struct pci_map *pm;
4080 	pci_addr_t map, testval;
4081 	int mapsize;
4082 
4083 	res = NULL;
4084 	pm = pci_find_bar(child, *rid);
4085 	if (pm != NULL) {
4086 		/* This is a BAR that we failed to allocate earlier. */
4087 		mapsize = pm->pm_size;
4088 		map = pm->pm_value;
4089 	} else {
4090 		/*
4091 		 * Weed out the bogons, and figure out how large the
4092 		 * BAR/map is.  BARs that read back 0 here are bogus
4093 		 * and unimplemented.  Note: atapci in legacy mode are
4094 		 * special and handled elsewhere in the code.  If you
4095 		 * have a atapci device in legacy mode and it fails
4096 		 * here, that other code is broken.
4097 		 */
4098 		pci_read_bar(child, *rid, &map, &testval);
4099 
4100 		/*
4101 		 * Determine the size of the BAR and ignore BARs with a size
4102 		 * of 0.  Device ROM BARs use a different mask value.
4103 		 */
4104 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
4105 			mapsize = pci_romsize(testval);
4106 		else
4107 			mapsize = pci_mapsize(testval);
4108 		if (mapsize == 0)
4109 			goto out;
4110 		pm = pci_add_bar(child, *rid, map, mapsize);
4111 	}
4112 
4113 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
4114 		if (type != SYS_RES_MEMORY) {
4115 			if (bootverbose)
4116 				device_printf(dev,
4117 				    "child %s requested type %d for rid %#x,"
4118 				    " but the BAR says it is an memio\n",
4119 				    device_get_nameunit(child), type, *rid);
4120 			goto out;
4121 		}
4122 	} else {
4123 		if (type != SYS_RES_IOPORT) {
4124 			if (bootverbose)
4125 				device_printf(dev,
4126 				    "child %s requested type %d for rid %#x,"
4127 				    " but the BAR says it is an ioport\n",
4128 				    device_get_nameunit(child), type, *rid);
4129 			goto out;
4130 		}
4131 	}
4132 
4133 	/*
4134 	 * For real BARs, we need to override the size that
4135 	 * the driver requests, because that's what the BAR
4136 	 * actually uses and we would otherwise have a
4137 	 * situation where we might allocate the excess to
4138 	 * another driver, which won't work.
4139 	 */
4140 	count = (pci_addr_t)1 << mapsize;
4141 	if (RF_ALIGNMENT(flags) < mapsize)
4142 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
4143 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
4144 		flags |= RF_PREFETCHABLE;
4145 
4146 	/*
4147 	 * Allocate enough resource, and then write back the
4148 	 * appropriate BAR for that resource.
4149 	 */
4150 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
4151 	    start, end, count, flags & ~RF_ACTIVE);
4152 	if (res == NULL) {
4153 		device_printf(child,
4154 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4155 		    count, *rid, type, start, end);
4156 		goto out;
4157 	}
4158 	resource_list_add(rl, type, *rid, start, end, count);
4159 	rle = resource_list_find(rl, type, *rid);
4160 	if (rle == NULL)
4161 		panic("pci_reserve_map: unexpectedly can't find resource.");
4162 	rle->res = res;
4163 	rle->start = rman_get_start(res);
4164 	rle->end = rman_get_end(res);
4165 	rle->count = count;
4166 	rle->flags = RLE_RESERVED;
4167 	if (bootverbose)
4168 		device_printf(child,
4169 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4170 		    count, *rid, type, rman_get_start(res));
4171 	map = rman_get_start(res);
4172 	pci_write_bar(child, pm, map);
4173 out:
4174 	return (res);
4175 }
4176 
4177 struct resource *
4178 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4179 		   u_long start, u_long end, u_long count, u_int flags)
4180 {
4181 	struct pci_devinfo *dinfo = device_get_ivars(child);
4182 	struct resource_list *rl = &dinfo->resources;
4183 	struct resource_list_entry *rle;
4184 	struct resource *res;
4185 	pcicfgregs *cfg = &dinfo->cfg;
4186 
4187 	if (device_get_parent(child) != dev)
4188 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4189 		    type, rid, start, end, count, flags));
4190 
4191 	/*
4192 	 * Perform lazy resource allocation
4193 	 */
4194 	switch (type) {
4195 	case SYS_RES_IRQ:
4196 		/*
4197 		 * Can't alloc legacy interrupt once MSI messages have
4198 		 * been allocated.
4199 		 */
4200 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4201 		    cfg->msix.msix_alloc > 0))
4202 			return (NULL);
4203 
4204 		/*
4205 		 * If the child device doesn't have an interrupt
4206 		 * routed and is deserving of an interrupt, try to
4207 		 * assign it one.
4208 		 */
4209 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4210 		    (cfg->intpin != 0))
4211 			pci_assign_interrupt(dev, child, 0);
4212 		break;
4213 	case SYS_RES_IOPORT:
4214 	case SYS_RES_MEMORY:
4215 #ifdef NEW_PCIB
4216 		/*
4217 		 * PCI-PCI bridge I/O window resources are not BARs.
4218 		 * For those allocations just pass the request up the
4219 		 * tree.
4220 		 */
4221 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4222 			switch (*rid) {
4223 			case PCIR_IOBASEL_1:
4224 			case PCIR_MEMBASE_1:
4225 			case PCIR_PMBASEL_1:
4226 				/*
4227 				 * XXX: Should we bother creating a resource
4228 				 * list entry?
4229 				 */
4230 				return (bus_generic_alloc_resource(dev, child,
4231 				    type, rid, start, end, count, flags));
4232 			}
4233 		}
4234 #endif
4235 		/* Reserve resources for this BAR if needed. */
4236 		rle = resource_list_find(rl, type, *rid);
4237 		if (rle == NULL) {
4238 			res = pci_reserve_map(dev, child, type, rid, start, end,
4239 			    count, flags);
4240 			if (res == NULL)
4241 				return (NULL);
4242 		}
4243 	}
4244 	return (resource_list_alloc(rl, dev, child, type, rid,
4245 	    start, end, count, flags));
4246 }
4247 
4248 int
4249 pci_activate_resource(device_t dev, device_t child, int type, int rid,
4250     struct resource *r)
4251 {
4252 	struct pci_devinfo *dinfo;
4253 	int error;
4254 
4255 	error = bus_generic_activate_resource(dev, child, type, rid, r);
4256 	if (error)
4257 		return (error);
4258 
4259 	/* Enable decoding in the command register when activating BARs. */
4260 	if (device_get_parent(child) == dev) {
4261 		/* Device ROMs need their decoding explicitly enabled. */
4262 		dinfo = device_get_ivars(child);
4263 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4264 			pci_write_bar(child, pci_find_bar(child, rid),
4265 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4266 		switch (type) {
4267 		case SYS_RES_IOPORT:
4268 		case SYS_RES_MEMORY:
4269 			error = PCI_ENABLE_IO(dev, child, type);
4270 			break;
4271 		}
4272 	}
4273 	return (error);
4274 }
4275 
4276 int
4277 pci_deactivate_resource(device_t dev, device_t child, int type,
4278     int rid, struct resource *r)
4279 {
4280 	struct pci_devinfo *dinfo;
4281 	int error;
4282 
4283 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4284 	if (error)
4285 		return (error);
4286 
4287 	/* Disable decoding for device ROMs. */
4288 	if (device_get_parent(child) == dev) {
4289 		dinfo = device_get_ivars(child);
4290 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4291 			pci_write_bar(child, pci_find_bar(child, rid),
4292 			    rman_get_start(r));
4293 	}
4294 	return (0);
4295 }
4296 
4297 void
4298 pci_delete_child(device_t dev, device_t child)
4299 {
4300 	struct resource_list_entry *rle;
4301 	struct resource_list *rl;
4302 	struct pci_devinfo *dinfo;
4303 
4304 	dinfo = device_get_ivars(child);
4305 	rl = &dinfo->resources;
4306 
4307 	if (device_is_attached(child))
4308 		device_detach(child);
4309 
4310 	/* Turn off access to resources we're about to free */
4311 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4312 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4313 
4314 	/* Free all allocated resources */
4315 	STAILQ_FOREACH(rle, rl, link) {
4316 		if (rle->res) {
4317 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4318 			    resource_list_busy(rl, rle->type, rle->rid)) {
4319 				pci_printf(&dinfo->cfg,
4320 				    "Resource still owned, oops. "
4321 				    "(type=%d, rid=%d, addr=%lx)\n",
4322 				    rle->type, rle->rid,
4323 				    rman_get_start(rle->res));
4324 				bus_release_resource(child, rle->type, rle->rid,
4325 				    rle->res);
4326 			}
4327 			resource_list_unreserve(rl, dev, child, rle->type,
4328 			    rle->rid);
4329 		}
4330 	}
4331 	resource_list_free(rl);
4332 
4333 	device_delete_child(dev, child);
4334 	pci_freecfg(dinfo);
4335 }
4336 
4337 void
4338 pci_delete_resource(device_t dev, device_t child, int type, int rid)
4339 {
4340 	struct pci_devinfo *dinfo;
4341 	struct resource_list *rl;
4342 	struct resource_list_entry *rle;
4343 
4344 	if (device_get_parent(child) != dev)
4345 		return;
4346 
4347 	dinfo = device_get_ivars(child);
4348 	rl = &dinfo->resources;
4349 	rle = resource_list_find(rl, type, rid);
4350 	if (rle == NULL)
4351 		return;
4352 
4353 	if (rle->res) {
4354 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4355 		    resource_list_busy(rl, type, rid)) {
4356 			device_printf(dev, "delete_resource: "
4357 			    "Resource still owned by child, oops. "
4358 			    "(type=%d, rid=%d, addr=%lx)\n",
4359 			    type, rid, rman_get_start(rle->res));
4360 			return;
4361 		}
4362 		resource_list_unreserve(rl, dev, child, type, rid);
4363 	}
4364 	resource_list_delete(rl, type, rid);
4365 }
4366 
4367 struct resource_list *
4368 pci_get_resource_list (device_t dev, device_t child)
4369 {
4370 	struct pci_devinfo *dinfo = device_get_ivars(child);
4371 
4372 	return (&dinfo->resources);
4373 }
4374 
4375 bus_dma_tag_t
4376 pci_get_dma_tag(device_t bus, device_t dev)
4377 {
4378 	struct pci_softc *sc = device_get_softc(bus);
4379 
4380 	return (sc->sc_dma_tag);
4381 }
4382 
4383 uint32_t
4384 pci_read_config_method(device_t dev, device_t child, int reg, int width)
4385 {
4386 	struct pci_devinfo *dinfo = device_get_ivars(child);
4387 	pcicfgregs *cfg = &dinfo->cfg;
4388 
4389 	return (PCIB_READ_CONFIG(device_get_parent(dev),
4390 	    cfg->bus, cfg->slot, cfg->func, reg, width));
4391 }
4392 
4393 void
4394 pci_write_config_method(device_t dev, device_t child, int reg,
4395     uint32_t val, int width)
4396 {
4397 	struct pci_devinfo *dinfo = device_get_ivars(child);
4398 	pcicfgregs *cfg = &dinfo->cfg;
4399 
4400 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4401 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4402 }
4403 
4404 int
4405 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4406     size_t buflen)
4407 {
4408 
4409 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4410 	    pci_get_function(child));
4411 	return (0);
4412 }
4413 
4414 int
4415 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4416     size_t buflen)
4417 {
4418 	struct pci_devinfo *dinfo;
4419 	pcicfgregs *cfg;
4420 
4421 	dinfo = device_get_ivars(child);
4422 	cfg = &dinfo->cfg;
4423 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4424 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4425 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4426 	    cfg->progif);
4427 	return (0);
4428 }
4429 
4430 int
4431 pci_assign_interrupt_method(device_t dev, device_t child)
4432 {
4433 	struct pci_devinfo *dinfo = device_get_ivars(child);
4434 	pcicfgregs *cfg = &dinfo->cfg;
4435 
4436 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4437 	    cfg->intpin));
4438 }
4439 
4440 static int
4441 pci_modevent(module_t mod, int what, void *arg)
4442 {
4443 	static struct cdev *pci_cdev;
4444 
4445 	switch (what) {
4446 	case MOD_LOAD:
4447 		STAILQ_INIT(&pci_devq);
4448 		pci_generation = 0;
4449 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4450 		    "pci");
4451 		pci_load_vendor_data();
4452 		break;
4453 
4454 	case MOD_UNLOAD:
4455 		destroy_dev(pci_cdev);
4456 		break;
4457 	}
4458 
4459 	return (0);
4460 }
4461 
4462 static void
4463 pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
4464 {
4465 #define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
4466 	struct pcicfg_pcie *cfg;
4467 	int version, pos;
4468 
4469 	cfg = &dinfo->cfg.pcie;
4470 	pos = cfg->pcie_location;
4471 
4472 	version = cfg->pcie_flags & PCIM_EXP_FLAGS_VERSION;
4473 
4474 	WREG(PCIR_EXPRESS_DEVICE_CTL, cfg->pcie_device_ctl);
4475 
4476 	if (version > 1 || cfg->pcie_type == PCIM_EXP_TYPE_ROOT_PORT ||
4477 	    cfg->pcie_type == PCIM_EXP_TYPE_ENDPOINT ||
4478 	    cfg->pcie_type == PCIM_EXP_TYPE_LEGACY_ENDPOINT)
4479 		WREG(PCIR_EXPRESS_LINK_CTL, cfg->pcie_link_ctl);
4480 
4481 	if (version > 1 || (cfg->pcie_type == PCIM_EXP_TYPE_ROOT_PORT ||
4482 	    (cfg->pcie_type == PCIM_EXP_TYPE_DOWNSTREAM_PORT &&
4483 	     (cfg->pcie_flags & PCIM_EXP_FLAGS_SLOT))))
4484 		WREG(PCIR_EXPRESS_SLOT_CTL, cfg->pcie_slot_ctl);
4485 
4486 	if (version > 1 || cfg->pcie_type == PCIM_EXP_TYPE_ROOT_PORT ||
4487 	    cfg->pcie_type == PCIM_EXP_TYPE_ROOT_EC)
4488 		WREG(PCIR_EXPRESS_ROOT_CTL, cfg->pcie_root_ctl);
4489 
4490 	if (version > 1) {
4491 		WREG(PCIR_EXPRESS_DEVICE_CTL2, cfg->pcie_device_ctl2);
4492 		WREG(PCIR_EXPRESS_LINK_CTL2, cfg->pcie_link_ctl2);
4493 		WREG(PCIR_EXPRESS_SLOT_CTL2, cfg->pcie_slot_ctl2);
4494 	}
4495 #undef WREG
4496 }
4497 
4498 static void
4499 pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
4500 {
4501 	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
4502 	    dinfo->cfg.pcix.pcix_command,  2);
4503 }
4504 
4505 void
4506 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4507 {
4508 
4509 	/*
4510 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4511 	 * which we know need special treatment.  Type 2 devices are
4512 	 * cardbus bridges which also require special treatment.
4513 	 * Other types are unknown, and we err on the side of safety
4514 	 * by ignoring them.
4515 	 */
4516 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4517 		return;
4518 
4519 	/*
4520 	 * Restore the device to full power mode.  We must do this
4521 	 * before we restore the registers because moving from D3 to
4522 	 * D0 will cause the chip's BARs and some other registers to
4523 	 * be reset to some unknown power on reset values.  Cut down
4524 	 * the noise on boot by doing nothing if we are already in
4525 	 * state D0.
4526 	 */
4527 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4528 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4529 	pci_restore_bars(dev);
4530 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4531 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4532 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4533 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4534 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4535 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4536 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4537 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4538 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4539 
4540 	/*
4541 	 * Restore extended capabilities for PCI-Express and PCI-X
4542 	 */
4543 	if (dinfo->cfg.pcie.pcie_location != 0)
4544 		pci_cfg_restore_pcie(dev, dinfo);
4545 	if (dinfo->cfg.pcix.pcix_location != 0)
4546 		pci_cfg_restore_pcix(dev, dinfo);
4547 
4548 	/* Restore MSI and MSI-X configurations if they are present. */
4549 	if (dinfo->cfg.msi.msi_location != 0)
4550 		pci_resume_msi(dev);
4551 	if (dinfo->cfg.msix.msix_location != 0)
4552 		pci_resume_msix(dev);
4553 }
4554 
4555 static void
4556 pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
4557 {
4558 #define	RREG(n)	pci_read_config(dev, pos + (n), 2)
4559 	struct pcicfg_pcie *cfg;
4560 	int version, pos;
4561 
4562 	cfg = &dinfo->cfg.pcie;
4563 	pos = cfg->pcie_location;
4564 
4565 	cfg->pcie_flags = RREG(PCIR_EXPRESS_FLAGS);
4566 
4567 	version = cfg->pcie_flags & PCIM_EXP_FLAGS_VERSION;
4568 
4569 	cfg->pcie_device_ctl = RREG(PCIR_EXPRESS_DEVICE_CTL);
4570 
4571 	if (version > 1 || cfg->pcie_type == PCIM_EXP_TYPE_ROOT_PORT ||
4572 	    cfg->pcie_type == PCIM_EXP_TYPE_ENDPOINT ||
4573 	    cfg->pcie_type == PCIM_EXP_TYPE_LEGACY_ENDPOINT)
4574 		cfg->pcie_link_ctl = RREG(PCIR_EXPRESS_LINK_CTL);
4575 
4576 	if (version > 1 || (cfg->pcie_type == PCIM_EXP_TYPE_ROOT_PORT ||
4577 	    (cfg->pcie_type == PCIM_EXP_TYPE_DOWNSTREAM_PORT &&
4578 	     (cfg->pcie_flags & PCIM_EXP_FLAGS_SLOT))))
4579 		cfg->pcie_slot_ctl = RREG(PCIR_EXPRESS_SLOT_CTL);
4580 
4581 	if (version > 1 || cfg->pcie_type == PCIM_EXP_TYPE_ROOT_PORT ||
4582 	    cfg->pcie_type == PCIM_EXP_TYPE_ROOT_EC)
4583 		cfg->pcie_root_ctl = RREG(PCIR_EXPRESS_ROOT_CTL);
4584 
4585 	if (version > 1) {
4586 		cfg->pcie_device_ctl2 = RREG(PCIR_EXPRESS_DEVICE_CTL2);
4587 		cfg->pcie_link_ctl2 = RREG(PCIR_EXPRESS_LINK_CTL2);
4588 		cfg->pcie_slot_ctl2 = RREG(PCIR_EXPRESS_SLOT_CTL2);
4589 	}
4590 #undef RREG
4591 }
4592 
4593 static void
4594 pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
4595 {
4596 	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
4597 	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
4598 }
4599 
4600 void
4601 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4602 {
4603 	uint32_t cls;
4604 	int ps;
4605 
4606 	/*
4607 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4608 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4609 	 * which also require special treatment.  Other types are unknown, and
4610 	 * we err on the side of safety by ignoring them.  Powering down
4611 	 * bridges should not be undertaken lightly.
4612 	 */
4613 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4614 		return;
4615 
4616 	/*
4617 	 * Some drivers apparently write to these registers w/o updating our
4618 	 * cached copy.  No harm happens if we update the copy, so do so here
4619 	 * so we can restore them.  The COMMAND register is modified by the
4620 	 * bus w/o updating the cache.  This should represent the normally
4621 	 * writable portion of the 'defined' part of type 0 headers.  In
4622 	 * theory we also need to save/restore the PCI capability structures
4623 	 * we know about, but apart from power we don't know any that are
4624 	 * writable.
4625 	 */
4626 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4627 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4628 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4629 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4630 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4631 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4632 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4633 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4634 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4635 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4636 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4637 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4638 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4639 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4640 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4641 
4642 	if (dinfo->cfg.pcie.pcie_location != 0)
4643 		pci_cfg_save_pcie(dev, dinfo);
4644 
4645 	if (dinfo->cfg.pcix.pcix_location != 0)
4646 		pci_cfg_save_pcix(dev, dinfo);
4647 
4648 	/*
4649 	 * don't set the state for display devices, base peripherals and
4650 	 * memory devices since bad things happen when they are powered down.
4651 	 * We should (a) have drivers that can easily detach and (b) use
4652 	 * generic drivers for these devices so that some device actually
4653 	 * attaches.  We need to make sure that when we implement (a) we don't
4654 	 * power the device down on a reattach.
4655 	 */
4656 	cls = pci_get_class(dev);
4657 	if (!setstate)
4658 		return;
4659 	switch (pci_do_power_nodriver)
4660 	{
4661 		case 0:		/* NO powerdown at all */
4662 			return;
4663 		case 1:		/* Conservative about what to power down */
4664 			if (cls == PCIC_STORAGE)
4665 				return;
4666 			/*FALLTHROUGH*/
4667 		case 2:		/* Agressive about what to power down */
4668 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4669 			    cls == PCIC_BASEPERIPH)
4670 				return;
4671 			/*FALLTHROUGH*/
4672 		case 3:		/* Power down everything */
4673 			break;
4674 	}
4675 	/*
4676 	 * PCI spec says we can only go into D3 state from D0 state.
4677 	 * Transition from D[12] into D0 before going to D3 state.
4678 	 */
4679 	ps = pci_get_powerstate(dev);
4680 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4681 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4682 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4683 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4684 }
4685 
4686 /* Wrapper APIs suitable for device driver use. */
4687 void
4688 pci_save_state(device_t dev)
4689 {
4690 	struct pci_devinfo *dinfo;
4691 
4692 	dinfo = device_get_ivars(dev);
4693 	pci_cfg_save(dev, dinfo, 0);
4694 }
4695 
4696 void
4697 pci_restore_state(device_t dev)
4698 {
4699 	struct pci_devinfo *dinfo;
4700 
4701 	dinfo = device_get_ivars(dev);
4702 	pci_cfg_restore(dev, dinfo);
4703 }
4704