xref: /freebsd/sys/dev/pci/pci.c (revision 4fd0d10e0fe684211328bc148edf89a792425b39)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 
56 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57 #include <machine/intr_machdep.h>
58 #endif
59 
60 #include <sys/pciio.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/pci/pci_private.h>
64 
65 #include <dev/usb/controller/xhcireg.h>
66 #include <dev/usb/controller/ehcireg.h>
67 #include <dev/usb/controller/ohcireg.h>
68 #include <dev/usb/controller/uhcireg.h>
69 
70 #include "pcib_if.h"
71 #include "pci_if.h"
72 
73 #if (BUS_SPACE_MAXADDR > 0xFFFFFFFF)
74 #define	PCI_DMA_BOUNDARY	0x100000000
75 #endif
76 
77 #define	PCIR_IS_BIOS(cfg, reg)						\
78 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
79 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
80 
81 static int		pci_has_quirk(uint32_t devid, int quirk);
82 static pci_addr_t	pci_mapbase(uint64_t mapreg);
83 static const char	*pci_maptype(uint64_t mapreg);
84 static int		pci_mapsize(uint64_t testval);
85 static int		pci_maprange(uint64_t mapreg);
86 static pci_addr_t	pci_rombase(uint64_t mapreg);
87 static int		pci_romsize(uint64_t testval);
88 static void		pci_fixancient(pcicfgregs *cfg);
89 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
90 
91 static int		pci_porten(device_t dev);
92 static int		pci_memen(device_t dev);
93 static void		pci_assign_interrupt(device_t bus, device_t dev,
94 			    int force_route);
95 static int		pci_add_map(device_t bus, device_t dev, int reg,
96 			    struct resource_list *rl, int force, int prefetch);
97 static int		pci_probe(device_t dev);
98 static int		pci_attach(device_t dev);
99 static void		pci_load_vendor_data(void);
100 static int		pci_describe_parse_line(char **ptr, int *vendor,
101 			    int *device, char **desc);
102 static char		*pci_describe_device(device_t dev);
103 static bus_dma_tag_t	pci_get_dma_tag(device_t bus, device_t dev);
104 static int		pci_modevent(module_t mod, int what, void *arg);
105 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
106 			    pcicfgregs *cfg);
107 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
108 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
109 			    int reg, uint32_t *data);
110 #if 0
111 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
112 			    int reg, uint32_t data);
113 #endif
114 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
115 static void		pci_disable_msi(device_t dev);
116 static void		pci_enable_msi(device_t dev, uint64_t address,
117 			    uint16_t data);
118 static void		pci_enable_msix(device_t dev, u_int index,
119 			    uint64_t address, uint32_t data);
120 static void		pci_mask_msix(device_t dev, u_int index);
121 static void		pci_unmask_msix(device_t dev, u_int index);
122 static int		pci_msi_blacklisted(void);
123 static int		pci_msix_blacklisted(void);
124 static void		pci_resume_msi(device_t dev);
125 static void		pci_resume_msix(device_t dev);
126 static int		pci_remap_intr_method(device_t bus, device_t dev,
127 			    u_int irq);
128 
129 static device_method_t pci_methods[] = {
130 	/* Device interface */
131 	DEVMETHOD(device_probe,		pci_probe),
132 	DEVMETHOD(device_attach,	pci_attach),
133 	DEVMETHOD(device_detach,	bus_generic_detach),
134 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
135 	DEVMETHOD(device_suspend,	pci_suspend),
136 	DEVMETHOD(device_resume,	pci_resume),
137 
138 	/* Bus interface */
139 	DEVMETHOD(bus_print_child,	pci_print_child),
140 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
141 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
142 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
143 	DEVMETHOD(bus_driver_added,	pci_driver_added),
144 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
145 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
146 
147 	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
148 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
149 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
150 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
151 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
152 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
153 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
154 	DEVMETHOD(bus_release_resource,	pci_release_resource),
155 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
156 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
157 	DEVMETHOD(bus_child_detached,	pci_child_detached),
158 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
159 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
160 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
161 
162 	/* PCI interface */
163 	DEVMETHOD(pci_read_config,	pci_read_config_method),
164 	DEVMETHOD(pci_write_config,	pci_write_config_method),
165 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
166 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
167 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
168 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
169 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
170 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
171 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
172 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
173 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
174 	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
175 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
176 	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
177 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
178 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
179 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
180 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
181 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
182 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
183 
184 	DEVMETHOD_END
185 };
186 
187 DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
188 
189 static devclass_t pci_devclass;
190 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
191 MODULE_VERSION(pci, 1);
192 
193 static char	*pci_vendordata;
194 static size_t	pci_vendordata_size;
195 
196 struct pci_quirk {
197 	uint32_t devid;	/* Vendor/device of the card */
198 	int	type;
199 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
200 #define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
201 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
202 #define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
203 #define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
204 	int	arg1;
205 	int	arg2;
206 };
207 
208 static const struct pci_quirk pci_quirks[] = {
209 	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
210 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
211 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
212 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
213 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
214 
215 	/*
216 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
217 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
218 	 */
219 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
220 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
221 
222 	/*
223 	 * MSI doesn't work on earlier Intel chipsets including
224 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
225 	 */
226 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
227 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
228 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
229 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
230 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
231 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
232 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
233 
234 	/*
235 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
236 	 * bridge.
237 	 */
238 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
239 
240 	/*
241 	 * MSI-X allocation doesn't work properly for devices passed through
242 	 * by VMware up to at least ESXi 5.1.
243 	 */
244 	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
245 	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
246 
247 	/*
248 	 * Some virtualization environments emulate an older chipset
249 	 * but support MSI just fine.  QEMU uses the Intel 82440.
250 	 */
251 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
252 
253 	/*
254 	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
255 	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
256 	 * It prevents us from attaching hpet(4) when the bit is unset.
257 	 * Note this quirk only affects SB600 revision A13 and earlier.
258 	 * For SB600 A21 and later, firmware must set the bit to hide it.
259 	 * For SB700 and later, it is unused and hardcoded to zero.
260 	 */
261 	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
262 
263 	{ 0 }
264 };
265 
266 /* map register information */
267 #define	PCI_MAPMEM	0x01	/* memory map */
268 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
269 #define	PCI_MAPPORT	0x04	/* port map */
270 
271 struct devlist pci_devq;
272 uint32_t pci_generation;
273 uint32_t pci_numdevs = 0;
274 static int pcie_chipset, pcix_chipset;
275 
276 /* sysctl vars */
277 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
278 
279 static int pci_enable_io_modes = 1;
280 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
281 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
282     &pci_enable_io_modes, 1,
283     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
284 enable these bits correctly.  We'd like to do this all the time, but there\n\
285 are some peripherals that this causes problems with.");
286 
287 static int pci_do_realloc_bars = 0;
288 TUNABLE_INT("hw.pci.realloc_bars", &pci_do_realloc_bars);
289 SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RW,
290     &pci_do_realloc_bars, 0,
291     "Attempt to allocate a new range for any BARs whose original firmware-assigned ranges fail to allocate during the initial device scan.");
292 
293 static int pci_do_power_nodriver = 0;
294 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
295 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
296     &pci_do_power_nodriver, 0,
297   "Place a function into D3 state when no driver attaches to it.  0 means\n\
298 disable.  1 means conservatively place devices into D3 state.  2 means\n\
299 agressively place devices into D3 state.  3 means put absolutely everything\n\
300 in D3 state.");
301 
302 int pci_do_power_resume = 1;
303 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
304 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
305     &pci_do_power_resume, 1,
306   "Transition from D3 -> D0 on resume.");
307 
308 int pci_do_power_suspend = 1;
309 TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
310 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
311     &pci_do_power_suspend, 1,
312   "Transition from D0 -> D3 on suspend.");
313 
314 static int pci_do_msi = 1;
315 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
316 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
317     "Enable support for MSI interrupts");
318 
319 static int pci_do_msix = 1;
320 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
321 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
322     "Enable support for MSI-X interrupts");
323 
324 static int pci_honor_msi_blacklist = 1;
325 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
326 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
327     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
328 
329 #if defined(__i386__) || defined(__amd64__)
330 static int pci_usb_takeover = 1;
331 #else
332 static int pci_usb_takeover = 0;
333 #endif
334 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
335 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
336     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
337 Disable this if you depend on BIOS emulation of USB devices, that is\n\
338 you use USB devices (like keyboard or mouse) but do not load USB drivers");
339 
340 static int
341 pci_has_quirk(uint32_t devid, int quirk)
342 {
343 	const struct pci_quirk *q;
344 
345 	for (q = &pci_quirks[0]; q->devid; q++) {
346 		if (q->devid == devid && q->type == quirk)
347 			return (1);
348 	}
349 	return (0);
350 }
351 
352 /* Find a device_t by bus/slot/function in domain 0 */
353 
354 device_t
355 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
356 {
357 
358 	return (pci_find_dbsf(0, bus, slot, func));
359 }
360 
361 /* Find a device_t by domain/bus/slot/function */
362 
363 device_t
364 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
365 {
366 	struct pci_devinfo *dinfo;
367 
368 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
369 		if ((dinfo->cfg.domain == domain) &&
370 		    (dinfo->cfg.bus == bus) &&
371 		    (dinfo->cfg.slot == slot) &&
372 		    (dinfo->cfg.func == func)) {
373 			return (dinfo->cfg.dev);
374 		}
375 	}
376 
377 	return (NULL);
378 }
379 
380 /* Find a device_t by vendor/device ID */
381 
382 device_t
383 pci_find_device(uint16_t vendor, uint16_t device)
384 {
385 	struct pci_devinfo *dinfo;
386 
387 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
388 		if ((dinfo->cfg.vendor == vendor) &&
389 		    (dinfo->cfg.device == device)) {
390 			return (dinfo->cfg.dev);
391 		}
392 	}
393 
394 	return (NULL);
395 }
396 
397 device_t
398 pci_find_class(uint8_t class, uint8_t subclass)
399 {
400 	struct pci_devinfo *dinfo;
401 
402 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
403 		if (dinfo->cfg.baseclass == class &&
404 		    dinfo->cfg.subclass == subclass) {
405 			return (dinfo->cfg.dev);
406 		}
407 	}
408 
409 	return (NULL);
410 }
411 
412 static int
413 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
414 {
415 	va_list ap;
416 	int retval;
417 
418 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
419 	    cfg->func);
420 	va_start(ap, fmt);
421 	retval += vprintf(fmt, ap);
422 	va_end(ap);
423 	return (retval);
424 }
425 
426 /* return base address of memory or port map */
427 
428 static pci_addr_t
429 pci_mapbase(uint64_t mapreg)
430 {
431 
432 	if (PCI_BAR_MEM(mapreg))
433 		return (mapreg & PCIM_BAR_MEM_BASE);
434 	else
435 		return (mapreg & PCIM_BAR_IO_BASE);
436 }
437 
438 /* return map type of memory or port map */
439 
440 static const char *
441 pci_maptype(uint64_t mapreg)
442 {
443 
444 	if (PCI_BAR_IO(mapreg))
445 		return ("I/O Port");
446 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
447 		return ("Prefetchable Memory");
448 	return ("Memory");
449 }
450 
451 /* return log2 of map size decoded for memory or port map */
452 
453 static int
454 pci_mapsize(uint64_t testval)
455 {
456 	int ln2size;
457 
458 	testval = pci_mapbase(testval);
459 	ln2size = 0;
460 	if (testval != 0) {
461 		while ((testval & 1) == 0)
462 		{
463 			ln2size++;
464 			testval >>= 1;
465 		}
466 	}
467 	return (ln2size);
468 }
469 
470 /* return base address of device ROM */
471 
472 static pci_addr_t
473 pci_rombase(uint64_t mapreg)
474 {
475 
476 	return (mapreg & PCIM_BIOS_ADDR_MASK);
477 }
478 
479 /* return log2 of map size decided for device ROM */
480 
481 static int
482 pci_romsize(uint64_t testval)
483 {
484 	int ln2size;
485 
486 	testval = pci_rombase(testval);
487 	ln2size = 0;
488 	if (testval != 0) {
489 		while ((testval & 1) == 0)
490 		{
491 			ln2size++;
492 			testval >>= 1;
493 		}
494 	}
495 	return (ln2size);
496 }
497 
498 /* return log2 of address range supported by map register */
499 
500 static int
501 pci_maprange(uint64_t mapreg)
502 {
503 	int ln2range = 0;
504 
505 	if (PCI_BAR_IO(mapreg))
506 		ln2range = 32;
507 	else
508 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
509 		case PCIM_BAR_MEM_32:
510 			ln2range = 32;
511 			break;
512 		case PCIM_BAR_MEM_1MB:
513 			ln2range = 20;
514 			break;
515 		case PCIM_BAR_MEM_64:
516 			ln2range = 64;
517 			break;
518 		}
519 	return (ln2range);
520 }
521 
522 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
523 
524 static void
525 pci_fixancient(pcicfgregs *cfg)
526 {
527 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
528 		return;
529 
530 	/* PCI to PCI bridges use header type 1 */
531 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
532 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
533 }
534 
535 /* extract header type specific config data */
536 
537 static void
538 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
539 {
540 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
541 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
542 	case PCIM_HDRTYPE_NORMAL:
543 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
544 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
545 		cfg->nummaps	    = PCI_MAXMAPS_0;
546 		break;
547 	case PCIM_HDRTYPE_BRIDGE:
548 		cfg->nummaps	    = PCI_MAXMAPS_1;
549 		break;
550 	case PCIM_HDRTYPE_CARDBUS:
551 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
552 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
553 		cfg->nummaps	    = PCI_MAXMAPS_2;
554 		break;
555 	}
556 #undef REG
557 }
558 
559 /* read configuration header into pcicfgregs structure */
560 struct pci_devinfo *
561 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
562 {
563 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
564 	pcicfgregs *cfg = NULL;
565 	struct pci_devinfo *devlist_entry;
566 	struct devlist *devlist_head;
567 
568 	devlist_head = &pci_devq;
569 
570 	devlist_entry = NULL;
571 
572 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
573 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
574 		if (devlist_entry == NULL)
575 			return (NULL);
576 
577 		cfg = &devlist_entry->cfg;
578 
579 		cfg->domain		= d;
580 		cfg->bus		= b;
581 		cfg->slot		= s;
582 		cfg->func		= f;
583 		cfg->vendor		= REG(PCIR_VENDOR, 2);
584 		cfg->device		= REG(PCIR_DEVICE, 2);
585 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
586 		cfg->statreg		= REG(PCIR_STATUS, 2);
587 		cfg->baseclass		= REG(PCIR_CLASS, 1);
588 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
589 		cfg->progif		= REG(PCIR_PROGIF, 1);
590 		cfg->revid		= REG(PCIR_REVID, 1);
591 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
592 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
593 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
594 		cfg->intpin		= REG(PCIR_INTPIN, 1);
595 		cfg->intline		= REG(PCIR_INTLINE, 1);
596 
597 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
598 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
599 
600 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
601 		cfg->hdrtype		&= ~PCIM_MFDEV;
602 		STAILQ_INIT(&cfg->maps);
603 
604 		pci_fixancient(cfg);
605 		pci_hdrtypedata(pcib, b, s, f, cfg);
606 
607 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
608 			pci_read_cap(pcib, cfg);
609 
610 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
611 
612 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
613 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
614 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
615 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
616 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
617 
618 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
619 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
620 		devlist_entry->conf.pc_vendor = cfg->vendor;
621 		devlist_entry->conf.pc_device = cfg->device;
622 
623 		devlist_entry->conf.pc_class = cfg->baseclass;
624 		devlist_entry->conf.pc_subclass = cfg->subclass;
625 		devlist_entry->conf.pc_progif = cfg->progif;
626 		devlist_entry->conf.pc_revid = cfg->revid;
627 
628 		pci_numdevs++;
629 		pci_generation++;
630 	}
631 	return (devlist_entry);
632 #undef REG
633 }
634 
635 static void
636 pci_read_cap(device_t pcib, pcicfgregs *cfg)
637 {
638 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
639 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
640 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
641 	uint64_t addr;
642 #endif
643 	uint32_t val;
644 	int	ptr, nextptr, ptrptr;
645 
646 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
647 	case PCIM_HDRTYPE_NORMAL:
648 	case PCIM_HDRTYPE_BRIDGE:
649 		ptrptr = PCIR_CAP_PTR;
650 		break;
651 	case PCIM_HDRTYPE_CARDBUS:
652 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
653 		break;
654 	default:
655 		return;		/* no extended capabilities support */
656 	}
657 	nextptr = REG(ptrptr, 1);	/* sanity check? */
658 
659 	/*
660 	 * Read capability entries.
661 	 */
662 	while (nextptr != 0) {
663 		/* Sanity check */
664 		if (nextptr > 255) {
665 			printf("illegal PCI extended capability offset %d\n",
666 			    nextptr);
667 			return;
668 		}
669 		/* Find the next entry */
670 		ptr = nextptr;
671 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
672 
673 		/* Process this entry */
674 		switch (REG(ptr + PCICAP_ID, 1)) {
675 		case PCIY_PMG:		/* PCI power management */
676 			if (cfg->pp.pp_cap == 0) {
677 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
678 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
679 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
680 				if ((nextptr - ptr) > PCIR_POWER_DATA)
681 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
682 			}
683 			break;
684 		case PCIY_HT:		/* HyperTransport */
685 			/* Determine HT-specific capability type. */
686 			val = REG(ptr + PCIR_HT_COMMAND, 2);
687 
688 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
689 				cfg->ht.ht_slave = ptr;
690 
691 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
692 			switch (val & PCIM_HTCMD_CAP_MASK) {
693 			case PCIM_HTCAP_MSI_MAPPING:
694 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
695 					/* Sanity check the mapping window. */
696 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
697 					    4);
698 					addr <<= 32;
699 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
700 					    4);
701 					if (addr != MSI_INTEL_ADDR_BASE)
702 						device_printf(pcib,
703 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
704 						    cfg->domain, cfg->bus,
705 						    cfg->slot, cfg->func,
706 						    (long long)addr);
707 				} else
708 					addr = MSI_INTEL_ADDR_BASE;
709 
710 				cfg->ht.ht_msimap = ptr;
711 				cfg->ht.ht_msictrl = val;
712 				cfg->ht.ht_msiaddr = addr;
713 				break;
714 			}
715 #endif
716 			break;
717 		case PCIY_MSI:		/* PCI MSI */
718 			cfg->msi.msi_location = ptr;
719 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
720 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
721 						     PCIM_MSICTRL_MMC_MASK)>>1);
722 			break;
723 		case PCIY_MSIX:		/* PCI MSI-X */
724 			cfg->msix.msix_location = ptr;
725 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
726 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
727 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
728 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
729 			cfg->msix.msix_table_bar = PCIR_BAR(val &
730 			    PCIM_MSIX_BIR_MASK);
731 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
732 			val = REG(ptr + PCIR_MSIX_PBA, 4);
733 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
734 			    PCIM_MSIX_BIR_MASK);
735 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
736 			break;
737 		case PCIY_VPD:		/* PCI Vital Product Data */
738 			cfg->vpd.vpd_reg = ptr;
739 			break;
740 		case PCIY_SUBVENDOR:
741 			/* Should always be true. */
742 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
743 			    PCIM_HDRTYPE_BRIDGE) {
744 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
745 				cfg->subvendor = val & 0xffff;
746 				cfg->subdevice = val >> 16;
747 			}
748 			break;
749 		case PCIY_PCIX:		/* PCI-X */
750 			/*
751 			 * Assume we have a PCI-X chipset if we have
752 			 * at least one PCI-PCI bridge with a PCI-X
753 			 * capability.  Note that some systems with
754 			 * PCI-express or HT chipsets might match on
755 			 * this check as well.
756 			 */
757 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
758 			    PCIM_HDRTYPE_BRIDGE)
759 				pcix_chipset = 1;
760 			cfg->pcix.pcix_location = ptr;
761 			break;
762 		case PCIY_EXPRESS:	/* PCI-express */
763 			/*
764 			 * Assume we have a PCI-express chipset if we have
765 			 * at least one PCI-express device.
766 			 */
767 			pcie_chipset = 1;
768 			cfg->pcie.pcie_location = ptr;
769 			val = REG(ptr + PCIER_FLAGS, 2);
770 			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
771 			break;
772 		default:
773 			break;
774 		}
775 	}
776 
777 #if defined(__powerpc__)
778 	/*
779 	 * Enable the MSI mapping window for all HyperTransport
780 	 * slaves.  PCI-PCI bridges have their windows enabled via
781 	 * PCIB_MAP_MSI().
782 	 */
783 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
784 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
785 		device_printf(pcib,
786 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
787 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
788 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
789 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
790 		     2);
791 	}
792 #endif
793 /* REG and WREG use carry through to next functions */
794 }
795 
796 /*
797  * PCI Vital Product Data
798  */
799 
800 #define	PCI_VPD_TIMEOUT		1000000
801 
802 static int
803 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
804 {
805 	int count = PCI_VPD_TIMEOUT;
806 
807 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
808 
809 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
810 
811 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
812 		if (--count < 0)
813 			return (ENXIO);
814 		DELAY(1);	/* limit looping */
815 	}
816 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
817 
818 	return (0);
819 }
820 
821 #if 0
822 static int
823 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
824 {
825 	int count = PCI_VPD_TIMEOUT;
826 
827 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
828 
829 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
830 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
831 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
832 		if (--count < 0)
833 			return (ENXIO);
834 		DELAY(1);	/* limit looping */
835 	}
836 
837 	return (0);
838 }
839 #endif
840 
841 #undef PCI_VPD_TIMEOUT
842 
843 struct vpd_readstate {
844 	device_t	pcib;
845 	pcicfgregs	*cfg;
846 	uint32_t	val;
847 	int		bytesinval;
848 	int		off;
849 	uint8_t		cksum;
850 };
851 
852 static int
853 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
854 {
855 	uint32_t reg;
856 	uint8_t byte;
857 
858 	if (vrs->bytesinval == 0) {
859 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
860 			return (ENXIO);
861 		vrs->val = le32toh(reg);
862 		vrs->off += 4;
863 		byte = vrs->val & 0xff;
864 		vrs->bytesinval = 3;
865 	} else {
866 		vrs->val = vrs->val >> 8;
867 		byte = vrs->val & 0xff;
868 		vrs->bytesinval--;
869 	}
870 
871 	vrs->cksum += byte;
872 	*data = byte;
873 	return (0);
874 }
875 
876 static void
877 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
878 {
879 	struct vpd_readstate vrs;
880 	int state;
881 	int name;
882 	int remain;
883 	int i;
884 	int alloc, off;		/* alloc/off for RO/W arrays */
885 	int cksumvalid;
886 	int dflen;
887 	uint8_t byte;
888 	uint8_t byte2;
889 
890 	/* init vpd reader */
891 	vrs.bytesinval = 0;
892 	vrs.off = 0;
893 	vrs.pcib = pcib;
894 	vrs.cfg = cfg;
895 	vrs.cksum = 0;
896 
897 	state = 0;
898 	name = remain = i = 0;	/* shut up stupid gcc */
899 	alloc = off = 0;	/* shut up stupid gcc */
900 	dflen = 0;		/* shut up stupid gcc */
901 	cksumvalid = -1;
902 	while (state >= 0) {
903 		if (vpd_nextbyte(&vrs, &byte)) {
904 			state = -2;
905 			break;
906 		}
907 #if 0
908 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
909 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
910 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
911 #endif
912 		switch (state) {
913 		case 0:		/* item name */
914 			if (byte & 0x80) {
915 				if (vpd_nextbyte(&vrs, &byte2)) {
916 					state = -2;
917 					break;
918 				}
919 				remain = byte2;
920 				if (vpd_nextbyte(&vrs, &byte2)) {
921 					state = -2;
922 					break;
923 				}
924 				remain |= byte2 << 8;
925 				if (remain > (0x7f*4 - vrs.off)) {
926 					state = -1;
927 					pci_printf(cfg,
928 					    "invalid VPD data, remain %#x\n",
929 					    remain);
930 				}
931 				name = byte & 0x7f;
932 			} else {
933 				remain = byte & 0x7;
934 				name = (byte >> 3) & 0xf;
935 			}
936 			switch (name) {
937 			case 0x2:	/* String */
938 				cfg->vpd.vpd_ident = malloc(remain + 1,
939 				    M_DEVBUF, M_WAITOK);
940 				i = 0;
941 				state = 1;
942 				break;
943 			case 0xf:	/* End */
944 				state = -1;
945 				break;
946 			case 0x10:	/* VPD-R */
947 				alloc = 8;
948 				off = 0;
949 				cfg->vpd.vpd_ros = malloc(alloc *
950 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
951 				    M_WAITOK | M_ZERO);
952 				state = 2;
953 				break;
954 			case 0x11:	/* VPD-W */
955 				alloc = 8;
956 				off = 0;
957 				cfg->vpd.vpd_w = malloc(alloc *
958 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
959 				    M_WAITOK | M_ZERO);
960 				state = 5;
961 				break;
962 			default:	/* Invalid data, abort */
963 				state = -1;
964 				break;
965 			}
966 			break;
967 
968 		case 1:	/* Identifier String */
969 			cfg->vpd.vpd_ident[i++] = byte;
970 			remain--;
971 			if (remain == 0)  {
972 				cfg->vpd.vpd_ident[i] = '\0';
973 				state = 0;
974 			}
975 			break;
976 
977 		case 2:	/* VPD-R Keyword Header */
978 			if (off == alloc) {
979 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
980 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
981 				    M_DEVBUF, M_WAITOK | M_ZERO);
982 			}
983 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
984 			if (vpd_nextbyte(&vrs, &byte2)) {
985 				state = -2;
986 				break;
987 			}
988 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
989 			if (vpd_nextbyte(&vrs, &byte2)) {
990 				state = -2;
991 				break;
992 			}
993 			dflen = byte2;
994 			if (dflen == 0 &&
995 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
996 			    2) == 0) {
997 				/*
998 				 * if this happens, we can't trust the rest
999 				 * of the VPD.
1000 				 */
1001 				pci_printf(cfg, "bad keyword length: %d\n",
1002 				    dflen);
1003 				cksumvalid = 0;
1004 				state = -1;
1005 				break;
1006 			} else if (dflen == 0) {
1007 				cfg->vpd.vpd_ros[off].value = malloc(1 *
1008 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1009 				    M_DEVBUF, M_WAITOK);
1010 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1011 			} else
1012 				cfg->vpd.vpd_ros[off].value = malloc(
1013 				    (dflen + 1) *
1014 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1015 				    M_DEVBUF, M_WAITOK);
1016 			remain -= 3;
1017 			i = 0;
1018 			/* keep in sync w/ state 3's transistions */
1019 			if (dflen == 0 && remain == 0)
1020 				state = 0;
1021 			else if (dflen == 0)
1022 				state = 2;
1023 			else
1024 				state = 3;
1025 			break;
1026 
1027 		case 3:	/* VPD-R Keyword Value */
1028 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1029 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1030 			    "RV", 2) == 0 && cksumvalid == -1) {
1031 				if (vrs.cksum == 0)
1032 					cksumvalid = 1;
1033 				else {
1034 					if (bootverbose)
1035 						pci_printf(cfg,
1036 					    "bad VPD cksum, remain %hhu\n",
1037 						    vrs.cksum);
1038 					cksumvalid = 0;
1039 					state = -1;
1040 					break;
1041 				}
1042 			}
1043 			dflen--;
1044 			remain--;
1045 			/* keep in sync w/ state 2's transistions */
1046 			if (dflen == 0)
1047 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1048 			if (dflen == 0 && remain == 0) {
1049 				cfg->vpd.vpd_rocnt = off;
1050 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1051 				    off * sizeof(*cfg->vpd.vpd_ros),
1052 				    M_DEVBUF, M_WAITOK | M_ZERO);
1053 				state = 0;
1054 			} else if (dflen == 0)
1055 				state = 2;
1056 			break;
1057 
1058 		case 4:
1059 			remain--;
1060 			if (remain == 0)
1061 				state = 0;
1062 			break;
1063 
1064 		case 5:	/* VPD-W Keyword Header */
1065 			if (off == alloc) {
1066 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1067 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1068 				    M_DEVBUF, M_WAITOK | M_ZERO);
1069 			}
1070 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1071 			if (vpd_nextbyte(&vrs, &byte2)) {
1072 				state = -2;
1073 				break;
1074 			}
1075 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1076 			if (vpd_nextbyte(&vrs, &byte2)) {
1077 				state = -2;
1078 				break;
1079 			}
1080 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1081 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1082 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1083 			    sizeof(*cfg->vpd.vpd_w[off].value),
1084 			    M_DEVBUF, M_WAITOK);
1085 			remain -= 3;
1086 			i = 0;
1087 			/* keep in sync w/ state 6's transistions */
1088 			if (dflen == 0 && remain == 0)
1089 				state = 0;
1090 			else if (dflen == 0)
1091 				state = 5;
1092 			else
1093 				state = 6;
1094 			break;
1095 
1096 		case 6:	/* VPD-W Keyword Value */
1097 			cfg->vpd.vpd_w[off].value[i++] = byte;
1098 			dflen--;
1099 			remain--;
1100 			/* keep in sync w/ state 5's transistions */
1101 			if (dflen == 0)
1102 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1103 			if (dflen == 0 && remain == 0) {
1104 				cfg->vpd.vpd_wcnt = off;
1105 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1106 				    off * sizeof(*cfg->vpd.vpd_w),
1107 				    M_DEVBUF, M_WAITOK | M_ZERO);
1108 				state = 0;
1109 			} else if (dflen == 0)
1110 				state = 5;
1111 			break;
1112 
1113 		default:
1114 			pci_printf(cfg, "invalid state: %d\n", state);
1115 			state = -1;
1116 			break;
1117 		}
1118 	}
1119 
1120 	if (cksumvalid == 0 || state < -1) {
1121 		/* read-only data bad, clean up */
1122 		if (cfg->vpd.vpd_ros != NULL) {
1123 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1124 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1125 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1126 			cfg->vpd.vpd_ros = NULL;
1127 		}
1128 	}
1129 	if (state < -1) {
1130 		/* I/O error, clean up */
1131 		pci_printf(cfg, "failed to read VPD data.\n");
1132 		if (cfg->vpd.vpd_ident != NULL) {
1133 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1134 			cfg->vpd.vpd_ident = NULL;
1135 		}
1136 		if (cfg->vpd.vpd_w != NULL) {
1137 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1138 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1139 			free(cfg->vpd.vpd_w, M_DEVBUF);
1140 			cfg->vpd.vpd_w = NULL;
1141 		}
1142 	}
1143 	cfg->vpd.vpd_cached = 1;
1144 #undef REG
1145 #undef WREG
1146 }
1147 
1148 int
1149 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1150 {
1151 	struct pci_devinfo *dinfo = device_get_ivars(child);
1152 	pcicfgregs *cfg = &dinfo->cfg;
1153 
1154 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1155 		pci_read_vpd(device_get_parent(dev), cfg);
1156 
1157 	*identptr = cfg->vpd.vpd_ident;
1158 
1159 	if (*identptr == NULL)
1160 		return (ENXIO);
1161 
1162 	return (0);
1163 }
1164 
1165 int
1166 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1167 	const char **vptr)
1168 {
1169 	struct pci_devinfo *dinfo = device_get_ivars(child);
1170 	pcicfgregs *cfg = &dinfo->cfg;
1171 	int i;
1172 
1173 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1174 		pci_read_vpd(device_get_parent(dev), cfg);
1175 
1176 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1177 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1178 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1179 			*vptr = cfg->vpd.vpd_ros[i].value;
1180 			return (0);
1181 		}
1182 
1183 	*vptr = NULL;
1184 	return (ENXIO);
1185 }
1186 
1187 /*
1188  * Find the requested HyperTransport capability and return the offset
1189  * in configuration space via the pointer provided.  The function
1190  * returns 0 on success and an error code otherwise.
1191  */
1192 int
1193 pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1194 {
1195 	int ptr, error;
1196 	uint16_t val;
1197 
1198 	error = pci_find_cap(child, PCIY_HT, &ptr);
1199 	if (error)
1200 		return (error);
1201 
1202 	/*
1203 	 * Traverse the capabilities list checking each HT capability
1204 	 * to see if it matches the requested HT capability.
1205 	 */
1206 	while (ptr != 0) {
1207 		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1208 		if (capability == PCIM_HTCAP_SLAVE ||
1209 		    capability == PCIM_HTCAP_HOST)
1210 			val &= 0xe000;
1211 		else
1212 			val &= PCIM_HTCMD_CAP_MASK;
1213 		if (val == capability) {
1214 			if (capreg != NULL)
1215 				*capreg = ptr;
1216 			return (0);
1217 		}
1218 
1219 		/* Skip to the next HT capability. */
1220 		while (ptr != 0) {
1221 			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1222 			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1223 			    PCIY_HT)
1224 				break;
1225 		}
1226 	}
1227 	return (ENOENT);
1228 }
1229 
1230 /*
1231  * Find the requested capability and return the offset in
1232  * configuration space via the pointer provided.  The function returns
1233  * 0 on success and an error code otherwise.
1234  */
1235 int
1236 pci_find_cap_method(device_t dev, device_t child, int capability,
1237     int *capreg)
1238 {
1239 	struct pci_devinfo *dinfo = device_get_ivars(child);
1240 	pcicfgregs *cfg = &dinfo->cfg;
1241 	u_int32_t status;
1242 	u_int8_t ptr;
1243 
1244 	/*
1245 	 * Check the CAP_LIST bit of the PCI status register first.
1246 	 */
1247 	status = pci_read_config(child, PCIR_STATUS, 2);
1248 	if (!(status & PCIM_STATUS_CAPPRESENT))
1249 		return (ENXIO);
1250 
1251 	/*
1252 	 * Determine the start pointer of the capabilities list.
1253 	 */
1254 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1255 	case PCIM_HDRTYPE_NORMAL:
1256 	case PCIM_HDRTYPE_BRIDGE:
1257 		ptr = PCIR_CAP_PTR;
1258 		break;
1259 	case PCIM_HDRTYPE_CARDBUS:
1260 		ptr = PCIR_CAP_PTR_2;
1261 		break;
1262 	default:
1263 		/* XXX: panic? */
1264 		return (ENXIO);		/* no extended capabilities support */
1265 	}
1266 	ptr = pci_read_config(child, ptr, 1);
1267 
1268 	/*
1269 	 * Traverse the capabilities list.
1270 	 */
1271 	while (ptr != 0) {
1272 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1273 			if (capreg != NULL)
1274 				*capreg = ptr;
1275 			return (0);
1276 		}
1277 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1278 	}
1279 
1280 	return (ENOENT);
1281 }
1282 
1283 /*
1284  * Find the requested extended capability and return the offset in
1285  * configuration space via the pointer provided.  The function returns
1286  * 0 on success and an error code otherwise.
1287  */
1288 int
1289 pci_find_extcap_method(device_t dev, device_t child, int capability,
1290     int *capreg)
1291 {
1292 	struct pci_devinfo *dinfo = device_get_ivars(child);
1293 	pcicfgregs *cfg = &dinfo->cfg;
1294 	uint32_t ecap;
1295 	uint16_t ptr;
1296 
1297 	/* Only supported for PCI-express devices. */
1298 	if (cfg->pcie.pcie_location == 0)
1299 		return (ENXIO);
1300 
1301 	ptr = PCIR_EXTCAP;
1302 	ecap = pci_read_config(child, ptr, 4);
1303 	if (ecap == 0xffffffff || ecap == 0)
1304 		return (ENOENT);
1305 	for (;;) {
1306 		if (PCI_EXTCAP_ID(ecap) == capability) {
1307 			if (capreg != NULL)
1308 				*capreg = ptr;
1309 			return (0);
1310 		}
1311 		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1312 		if (ptr == 0)
1313 			break;
1314 		ecap = pci_read_config(child, ptr, 4);
1315 	}
1316 
1317 	return (ENOENT);
1318 }
1319 
1320 /*
1321  * Support for MSI-X message interrupts.
1322  */
1323 void
1324 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1325 {
1326 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1327 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1328 	uint32_t offset;
1329 
1330 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1331 	offset = msix->msix_table_offset + index * 16;
1332 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1333 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1334 	bus_write_4(msix->msix_table_res, offset + 8, data);
1335 
1336 	/* Enable MSI -> HT mapping. */
1337 	pci_ht_map_msi(dev, address);
1338 }
1339 
1340 void
1341 pci_mask_msix(device_t dev, u_int index)
1342 {
1343 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1344 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1345 	uint32_t offset, val;
1346 
1347 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1348 	offset = msix->msix_table_offset + index * 16 + 12;
1349 	val = bus_read_4(msix->msix_table_res, offset);
1350 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1351 		val |= PCIM_MSIX_VCTRL_MASK;
1352 		bus_write_4(msix->msix_table_res, offset, val);
1353 	}
1354 }
1355 
1356 void
1357 pci_unmask_msix(device_t dev, u_int index)
1358 {
1359 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1360 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1361 	uint32_t offset, val;
1362 
1363 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1364 	offset = msix->msix_table_offset + index * 16 + 12;
1365 	val = bus_read_4(msix->msix_table_res, offset);
1366 	if (val & PCIM_MSIX_VCTRL_MASK) {
1367 		val &= ~PCIM_MSIX_VCTRL_MASK;
1368 		bus_write_4(msix->msix_table_res, offset, val);
1369 	}
1370 }
1371 
1372 int
1373 pci_pending_msix(device_t dev, u_int index)
1374 {
1375 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1376 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1377 	uint32_t offset, bit;
1378 
1379 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1380 	offset = msix->msix_pba_offset + (index / 32) * 4;
1381 	bit = 1 << index % 32;
1382 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1383 }
1384 
1385 /*
1386  * Restore MSI-X registers and table during resume.  If MSI-X is
1387  * enabled then walk the virtual table to restore the actual MSI-X
1388  * table.
1389  */
1390 static void
1391 pci_resume_msix(device_t dev)
1392 {
1393 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1394 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1395 	struct msix_table_entry *mte;
1396 	struct msix_vector *mv;
1397 	int i;
1398 
1399 	if (msix->msix_alloc > 0) {
1400 		/* First, mask all vectors. */
1401 		for (i = 0; i < msix->msix_msgnum; i++)
1402 			pci_mask_msix(dev, i);
1403 
1404 		/* Second, program any messages with at least one handler. */
1405 		for (i = 0; i < msix->msix_table_len; i++) {
1406 			mte = &msix->msix_table[i];
1407 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1408 				continue;
1409 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1410 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1411 			pci_unmask_msix(dev, i);
1412 		}
1413 	}
1414 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1415 	    msix->msix_ctrl, 2);
1416 }
1417 
1418 /*
1419  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1420  * returned in *count.  After this function returns, each message will be
1421  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1422  */
1423 int
1424 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1425 {
1426 	struct pci_devinfo *dinfo = device_get_ivars(child);
1427 	pcicfgregs *cfg = &dinfo->cfg;
1428 	struct resource_list_entry *rle;
1429 	int actual, error, i, irq, max;
1430 
1431 	/* Don't let count == 0 get us into trouble. */
1432 	if (*count == 0)
1433 		return (EINVAL);
1434 
1435 	/* If rid 0 is allocated, then fail. */
1436 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1437 	if (rle != NULL && rle->res != NULL)
1438 		return (ENXIO);
1439 
1440 	/* Already have allocated messages? */
1441 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1442 		return (ENXIO);
1443 
1444 	/* If MSI-X is blacklisted for this system, fail. */
1445 	if (pci_msix_blacklisted())
1446 		return (ENXIO);
1447 
1448 	/* MSI-X capability present? */
1449 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1450 		return (ENODEV);
1451 
1452 	/* Make sure the appropriate BARs are mapped. */
1453 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1454 	    cfg->msix.msix_table_bar);
1455 	if (rle == NULL || rle->res == NULL ||
1456 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1457 		return (ENXIO);
1458 	cfg->msix.msix_table_res = rle->res;
1459 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1460 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1461 		    cfg->msix.msix_pba_bar);
1462 		if (rle == NULL || rle->res == NULL ||
1463 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1464 			return (ENXIO);
1465 	}
1466 	cfg->msix.msix_pba_res = rle->res;
1467 
1468 	if (bootverbose)
1469 		device_printf(child,
1470 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1471 		    *count, cfg->msix.msix_msgnum);
1472 	max = min(*count, cfg->msix.msix_msgnum);
1473 	for (i = 0; i < max; i++) {
1474 		/* Allocate a message. */
1475 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1476 		if (error) {
1477 			if (i == 0)
1478 				return (error);
1479 			break;
1480 		}
1481 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1482 		    irq, 1);
1483 	}
1484 	actual = i;
1485 
1486 	if (bootverbose) {
1487 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1488 		if (actual == 1)
1489 			device_printf(child, "using IRQ %lu for MSI-X\n",
1490 			    rle->start);
1491 		else {
1492 			int run;
1493 
1494 			/*
1495 			 * Be fancy and try to print contiguous runs of
1496 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1497 			 * 'run' is true if we are in a range.
1498 			 */
1499 			device_printf(child, "using IRQs %lu", rle->start);
1500 			irq = rle->start;
1501 			run = 0;
1502 			for (i = 1; i < actual; i++) {
1503 				rle = resource_list_find(&dinfo->resources,
1504 				    SYS_RES_IRQ, i + 1);
1505 
1506 				/* Still in a run? */
1507 				if (rle->start == irq + 1) {
1508 					run = 1;
1509 					irq++;
1510 					continue;
1511 				}
1512 
1513 				/* Finish previous range. */
1514 				if (run) {
1515 					printf("-%d", irq);
1516 					run = 0;
1517 				}
1518 
1519 				/* Start new range. */
1520 				printf(",%lu", rle->start);
1521 				irq = rle->start;
1522 			}
1523 
1524 			/* Unfinished range? */
1525 			if (run)
1526 				printf("-%d", irq);
1527 			printf(" for MSI-X\n");
1528 		}
1529 	}
1530 
1531 	/* Mask all vectors. */
1532 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1533 		pci_mask_msix(child, i);
1534 
1535 	/* Allocate and initialize vector data and virtual table. */
1536 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1537 	    M_DEVBUF, M_WAITOK | M_ZERO);
1538 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1539 	    M_DEVBUF, M_WAITOK | M_ZERO);
1540 	for (i = 0; i < actual; i++) {
1541 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1542 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1543 		cfg->msix.msix_table[i].mte_vector = i + 1;
1544 	}
1545 
1546 	/* Update control register to enable MSI-X. */
1547 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1548 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1549 	    cfg->msix.msix_ctrl, 2);
1550 
1551 	/* Update counts of alloc'd messages. */
1552 	cfg->msix.msix_alloc = actual;
1553 	cfg->msix.msix_table_len = actual;
1554 	*count = actual;
1555 	return (0);
1556 }
1557 
1558 /*
1559  * By default, pci_alloc_msix() will assign the allocated IRQ
1560  * resources consecutively to the first N messages in the MSI-X table.
1561  * However, device drivers may want to use different layouts if they
1562  * either receive fewer messages than they asked for, or they wish to
1563  * populate the MSI-X table sparsely.  This method allows the driver
1564  * to specify what layout it wants.  It must be called after a
1565  * successful pci_alloc_msix() but before any of the associated
1566  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1567  *
1568  * The 'vectors' array contains 'count' message vectors.  The array
1569  * maps directly to the MSI-X table in that index 0 in the array
1570  * specifies the vector for the first message in the MSI-X table, etc.
1571  * The vector value in each array index can either be 0 to indicate
1572  * that no vector should be assigned to a message slot, or it can be a
1573  * number from 1 to N (where N is the count returned from a
1574  * succcessful call to pci_alloc_msix()) to indicate which message
1575  * vector (IRQ) to be used for the corresponding message.
1576  *
1577  * On successful return, each message with a non-zero vector will have
1578  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1579  * 1.  Additionally, if any of the IRQs allocated via the previous
1580  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1581  * will be freed back to the system automatically.
1582  *
1583  * For example, suppose a driver has a MSI-X table with 6 messages and
1584  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1585  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1586  * C.  After the call to pci_alloc_msix(), the device will be setup to
1587  * have an MSI-X table of ABC--- (where - means no vector assigned).
1588  * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1589  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1590  * be freed back to the system.  This device will also have valid
1591  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1592  *
1593  * In any case, the SYS_RES_IRQ rid X will always map to the message
1594  * at MSI-X table index X - 1 and will only be valid if a vector is
1595  * assigned to that table entry.
1596  */
1597 int
1598 pci_remap_msix_method(device_t dev, device_t child, int count,
1599     const u_int *vectors)
1600 {
1601 	struct pci_devinfo *dinfo = device_get_ivars(child);
1602 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1603 	struct resource_list_entry *rle;
1604 	int i, irq, j, *used;
1605 
1606 	/*
1607 	 * Have to have at least one message in the table but the
1608 	 * table can't be bigger than the actual MSI-X table in the
1609 	 * device.
1610 	 */
1611 	if (count == 0 || count > msix->msix_msgnum)
1612 		return (EINVAL);
1613 
1614 	/* Sanity check the vectors. */
1615 	for (i = 0; i < count; i++)
1616 		if (vectors[i] > msix->msix_alloc)
1617 			return (EINVAL);
1618 
1619 	/*
1620 	 * Make sure there aren't any holes in the vectors to be used.
1621 	 * It's a big pain to support it, and it doesn't really make
1622 	 * sense anyway.  Also, at least one vector must be used.
1623 	 */
1624 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1625 	    M_ZERO);
1626 	for (i = 0; i < count; i++)
1627 		if (vectors[i] != 0)
1628 			used[vectors[i] - 1] = 1;
1629 	for (i = 0; i < msix->msix_alloc - 1; i++)
1630 		if (used[i] == 0 && used[i + 1] == 1) {
1631 			free(used, M_DEVBUF);
1632 			return (EINVAL);
1633 		}
1634 	if (used[0] != 1) {
1635 		free(used, M_DEVBUF);
1636 		return (EINVAL);
1637 	}
1638 
1639 	/* Make sure none of the resources are allocated. */
1640 	for (i = 0; i < msix->msix_table_len; i++) {
1641 		if (msix->msix_table[i].mte_vector == 0)
1642 			continue;
1643 		if (msix->msix_table[i].mte_handlers > 0)
1644 			return (EBUSY);
1645 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1646 		KASSERT(rle != NULL, ("missing resource"));
1647 		if (rle->res != NULL)
1648 			return (EBUSY);
1649 	}
1650 
1651 	/* Free the existing resource list entries. */
1652 	for (i = 0; i < msix->msix_table_len; i++) {
1653 		if (msix->msix_table[i].mte_vector == 0)
1654 			continue;
1655 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1656 	}
1657 
1658 	/*
1659 	 * Build the new virtual table keeping track of which vectors are
1660 	 * used.
1661 	 */
1662 	free(msix->msix_table, M_DEVBUF);
1663 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1664 	    M_DEVBUF, M_WAITOK | M_ZERO);
1665 	for (i = 0; i < count; i++)
1666 		msix->msix_table[i].mte_vector = vectors[i];
1667 	msix->msix_table_len = count;
1668 
1669 	/* Free any unused IRQs and resize the vectors array if necessary. */
1670 	j = msix->msix_alloc - 1;
1671 	if (used[j] == 0) {
1672 		struct msix_vector *vec;
1673 
1674 		while (used[j] == 0) {
1675 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1676 			    msix->msix_vectors[j].mv_irq);
1677 			j--;
1678 		}
1679 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1680 		    M_WAITOK);
1681 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1682 		    (j + 1));
1683 		free(msix->msix_vectors, M_DEVBUF);
1684 		msix->msix_vectors = vec;
1685 		msix->msix_alloc = j + 1;
1686 	}
1687 	free(used, M_DEVBUF);
1688 
1689 	/* Map the IRQs onto the rids. */
1690 	for (i = 0; i < count; i++) {
1691 		if (vectors[i] == 0)
1692 			continue;
1693 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1694 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1695 		    irq, 1);
1696 	}
1697 
1698 	if (bootverbose) {
1699 		device_printf(child, "Remapped MSI-X IRQs as: ");
1700 		for (i = 0; i < count; i++) {
1701 			if (i != 0)
1702 				printf(", ");
1703 			if (vectors[i] == 0)
1704 				printf("---");
1705 			else
1706 				printf("%d",
1707 				    msix->msix_vectors[vectors[i]].mv_irq);
1708 		}
1709 		printf("\n");
1710 	}
1711 
1712 	return (0);
1713 }
1714 
1715 static int
1716 pci_release_msix(device_t dev, device_t child)
1717 {
1718 	struct pci_devinfo *dinfo = device_get_ivars(child);
1719 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1720 	struct resource_list_entry *rle;
1721 	int i;
1722 
1723 	/* Do we have any messages to release? */
1724 	if (msix->msix_alloc == 0)
1725 		return (ENODEV);
1726 
1727 	/* Make sure none of the resources are allocated. */
1728 	for (i = 0; i < msix->msix_table_len; i++) {
1729 		if (msix->msix_table[i].mte_vector == 0)
1730 			continue;
1731 		if (msix->msix_table[i].mte_handlers > 0)
1732 			return (EBUSY);
1733 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1734 		KASSERT(rle != NULL, ("missing resource"));
1735 		if (rle->res != NULL)
1736 			return (EBUSY);
1737 	}
1738 
1739 	/* Update control register to disable MSI-X. */
1740 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1741 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1742 	    msix->msix_ctrl, 2);
1743 
1744 	/* Free the resource list entries. */
1745 	for (i = 0; i < msix->msix_table_len; i++) {
1746 		if (msix->msix_table[i].mte_vector == 0)
1747 			continue;
1748 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1749 	}
1750 	free(msix->msix_table, M_DEVBUF);
1751 	msix->msix_table_len = 0;
1752 
1753 	/* Release the IRQs. */
1754 	for (i = 0; i < msix->msix_alloc; i++)
1755 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1756 		    msix->msix_vectors[i].mv_irq);
1757 	free(msix->msix_vectors, M_DEVBUF);
1758 	msix->msix_alloc = 0;
1759 	return (0);
1760 }
1761 
1762 /*
1763  * Return the max supported MSI-X messages this device supports.
1764  * Basically, assuming the MD code can alloc messages, this function
1765  * should return the maximum value that pci_alloc_msix() can return.
1766  * Thus, it is subject to the tunables, etc.
1767  */
1768 int
1769 pci_msix_count_method(device_t dev, device_t child)
1770 {
1771 	struct pci_devinfo *dinfo = device_get_ivars(child);
1772 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1773 
1774 	if (pci_do_msix && msix->msix_location != 0)
1775 		return (msix->msix_msgnum);
1776 	return (0);
1777 }
1778 
1779 /*
1780  * HyperTransport MSI mapping control
1781  */
1782 void
1783 pci_ht_map_msi(device_t dev, uint64_t addr)
1784 {
1785 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1786 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1787 
1788 	if (!ht->ht_msimap)
1789 		return;
1790 
1791 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1792 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1793 		/* Enable MSI -> HT mapping. */
1794 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1795 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1796 		    ht->ht_msictrl, 2);
1797 	}
1798 
1799 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1800 		/* Disable MSI -> HT mapping. */
1801 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1802 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1803 		    ht->ht_msictrl, 2);
1804 	}
1805 }
1806 
1807 int
1808 pci_get_max_read_req(device_t dev)
1809 {
1810 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1811 	int cap;
1812 	uint16_t val;
1813 
1814 	cap = dinfo->cfg.pcie.pcie_location;
1815 	if (cap == 0)
1816 		return (0);
1817 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1818 	val &= PCIEM_CTL_MAX_READ_REQUEST;
1819 	val >>= 12;
1820 	return (1 << (val + 7));
1821 }
1822 
1823 int
1824 pci_set_max_read_req(device_t dev, int size)
1825 {
1826 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1827 	int cap;
1828 	uint16_t val;
1829 
1830 	cap = dinfo->cfg.pcie.pcie_location;
1831 	if (cap == 0)
1832 		return (0);
1833 	if (size < 128)
1834 		size = 128;
1835 	if (size > 4096)
1836 		size = 4096;
1837 	size = (1 << (fls(size) - 1));
1838 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1839 	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
1840 	val |= (fls(size) - 8) << 12;
1841 	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
1842 	return (size);
1843 }
1844 
1845 /*
1846  * Support for MSI message signalled interrupts.
1847  */
1848 void
1849 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1850 {
1851 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1852 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1853 
1854 	/* Write data and address values. */
1855 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1856 	    address & 0xffffffff, 4);
1857 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1858 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1859 		    address >> 32, 4);
1860 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1861 		    data, 2);
1862 	} else
1863 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1864 		    2);
1865 
1866 	/* Enable MSI in the control register. */
1867 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1868 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1869 	    2);
1870 
1871 	/* Enable MSI -> HT mapping. */
1872 	pci_ht_map_msi(dev, address);
1873 }
1874 
1875 void
1876 pci_disable_msi(device_t dev)
1877 {
1878 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1879 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1880 
1881 	/* Disable MSI -> HT mapping. */
1882 	pci_ht_map_msi(dev, 0);
1883 
1884 	/* Disable MSI in the control register. */
1885 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1886 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1887 	    2);
1888 }
1889 
1890 /*
1891  * Restore MSI registers during resume.  If MSI is enabled then
1892  * restore the data and address registers in addition to the control
1893  * register.
1894  */
1895 static void
1896 pci_resume_msi(device_t dev)
1897 {
1898 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1899 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1900 	uint64_t address;
1901 	uint16_t data;
1902 
1903 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1904 		address = msi->msi_addr;
1905 		data = msi->msi_data;
1906 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1907 		    address & 0xffffffff, 4);
1908 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1909 			pci_write_config(dev, msi->msi_location +
1910 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1911 			pci_write_config(dev, msi->msi_location +
1912 			    PCIR_MSI_DATA_64BIT, data, 2);
1913 		} else
1914 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1915 			    data, 2);
1916 	}
1917 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1918 	    2);
1919 }
1920 
1921 static int
1922 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1923 {
1924 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1925 	pcicfgregs *cfg = &dinfo->cfg;
1926 	struct resource_list_entry *rle;
1927 	struct msix_table_entry *mte;
1928 	struct msix_vector *mv;
1929 	uint64_t addr;
1930 	uint32_t data;
1931 	int error, i, j;
1932 
1933 	/*
1934 	 * Handle MSI first.  We try to find this IRQ among our list
1935 	 * of MSI IRQs.  If we find it, we request updated address and
1936 	 * data registers and apply the results.
1937 	 */
1938 	if (cfg->msi.msi_alloc > 0) {
1939 
1940 		/* If we don't have any active handlers, nothing to do. */
1941 		if (cfg->msi.msi_handlers == 0)
1942 			return (0);
1943 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1944 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1945 			    i + 1);
1946 			if (rle->start == irq) {
1947 				error = PCIB_MAP_MSI(device_get_parent(bus),
1948 				    dev, irq, &addr, &data);
1949 				if (error)
1950 					return (error);
1951 				pci_disable_msi(dev);
1952 				dinfo->cfg.msi.msi_addr = addr;
1953 				dinfo->cfg.msi.msi_data = data;
1954 				pci_enable_msi(dev, addr, data);
1955 				return (0);
1956 			}
1957 		}
1958 		return (ENOENT);
1959 	}
1960 
1961 	/*
1962 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1963 	 * we request the updated mapping info.  If that works, we go
1964 	 * through all the slots that use this IRQ and update them.
1965 	 */
1966 	if (cfg->msix.msix_alloc > 0) {
1967 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1968 			mv = &cfg->msix.msix_vectors[i];
1969 			if (mv->mv_irq == irq) {
1970 				error = PCIB_MAP_MSI(device_get_parent(bus),
1971 				    dev, irq, &addr, &data);
1972 				if (error)
1973 					return (error);
1974 				mv->mv_address = addr;
1975 				mv->mv_data = data;
1976 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1977 					mte = &cfg->msix.msix_table[j];
1978 					if (mte->mte_vector != i + 1)
1979 						continue;
1980 					if (mte->mte_handlers == 0)
1981 						continue;
1982 					pci_mask_msix(dev, j);
1983 					pci_enable_msix(dev, j, addr, data);
1984 					pci_unmask_msix(dev, j);
1985 				}
1986 			}
1987 		}
1988 		return (ENOENT);
1989 	}
1990 
1991 	return (ENOENT);
1992 }
1993 
1994 /*
1995  * Returns true if the specified device is blacklisted because MSI
1996  * doesn't work.
1997  */
1998 int
1999 pci_msi_device_blacklisted(device_t dev)
2000 {
2001 
2002 	if (!pci_honor_msi_blacklist)
2003 		return (0);
2004 
2005 	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2006 }
2007 
2008 /*
2009  * Determine if MSI is blacklisted globally on this system.  Currently,
2010  * we just check for blacklisted chipsets as represented by the
2011  * host-PCI bridge at device 0:0:0.  In the future, it may become
2012  * necessary to check other system attributes, such as the kenv values
2013  * that give the motherboard manufacturer and model number.
2014  */
2015 static int
2016 pci_msi_blacklisted(void)
2017 {
2018 	device_t dev;
2019 
2020 	if (!pci_honor_msi_blacklist)
2021 		return (0);
2022 
2023 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2024 	if (!(pcie_chipset || pcix_chipset)) {
2025 		if (vm_guest != VM_GUEST_NO) {
2026 			/*
2027 			 * Whitelist older chipsets in virtual
2028 			 * machines known to support MSI.
2029 			 */
2030 			dev = pci_find_bsf(0, 0, 0);
2031 			if (dev != NULL)
2032 				return (!pci_has_quirk(pci_get_devid(dev),
2033 					PCI_QUIRK_ENABLE_MSI_VM));
2034 		}
2035 		return (1);
2036 	}
2037 
2038 	dev = pci_find_bsf(0, 0, 0);
2039 	if (dev != NULL)
2040 		return (pci_msi_device_blacklisted(dev));
2041 	return (0);
2042 }
2043 
2044 /*
2045  * Returns true if the specified device is blacklisted because MSI-X
2046  * doesn't work.  Note that this assumes that if MSI doesn't work,
2047  * MSI-X doesn't either.
2048  */
2049 int
2050 pci_msix_device_blacklisted(device_t dev)
2051 {
2052 
2053 	if (!pci_honor_msi_blacklist)
2054 		return (0);
2055 
2056 	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2057 		return (1);
2058 
2059 	return (pci_msi_device_blacklisted(dev));
2060 }
2061 
2062 /*
2063  * Determine if MSI-X is blacklisted globally on this system.  If MSI
2064  * is blacklisted, assume that MSI-X is as well.  Check for additional
2065  * chipsets where MSI works but MSI-X does not.
2066  */
2067 static int
2068 pci_msix_blacklisted(void)
2069 {
2070 	device_t dev;
2071 
2072 	if (!pci_honor_msi_blacklist)
2073 		return (0);
2074 
2075 	dev = pci_find_bsf(0, 0, 0);
2076 	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2077 	    PCI_QUIRK_DISABLE_MSIX))
2078 		return (1);
2079 
2080 	return (pci_msi_blacklisted());
2081 }
2082 
2083 /*
2084  * Attempt to allocate *count MSI messages.  The actual number allocated is
2085  * returned in *count.  After this function returns, each message will be
2086  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2087  */
2088 int
2089 pci_alloc_msi_method(device_t dev, device_t child, int *count)
2090 {
2091 	struct pci_devinfo *dinfo = device_get_ivars(child);
2092 	pcicfgregs *cfg = &dinfo->cfg;
2093 	struct resource_list_entry *rle;
2094 	int actual, error, i, irqs[32];
2095 	uint16_t ctrl;
2096 
2097 	/* Don't let count == 0 get us into trouble. */
2098 	if (*count == 0)
2099 		return (EINVAL);
2100 
2101 	/* If rid 0 is allocated, then fail. */
2102 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2103 	if (rle != NULL && rle->res != NULL)
2104 		return (ENXIO);
2105 
2106 	/* Already have allocated messages? */
2107 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2108 		return (ENXIO);
2109 
2110 	/* If MSI is blacklisted for this system, fail. */
2111 	if (pci_msi_blacklisted())
2112 		return (ENXIO);
2113 
2114 	/* MSI capability present? */
2115 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2116 		return (ENODEV);
2117 
2118 	if (bootverbose)
2119 		device_printf(child,
2120 		    "attempting to allocate %d MSI vectors (%d supported)\n",
2121 		    *count, cfg->msi.msi_msgnum);
2122 
2123 	/* Don't ask for more than the device supports. */
2124 	actual = min(*count, cfg->msi.msi_msgnum);
2125 
2126 	/* Don't ask for more than 32 messages. */
2127 	actual = min(actual, 32);
2128 
2129 	/* MSI requires power of 2 number of messages. */
2130 	if (!powerof2(actual))
2131 		return (EINVAL);
2132 
2133 	for (;;) {
2134 		/* Try to allocate N messages. */
2135 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2136 		    actual, irqs);
2137 		if (error == 0)
2138 			break;
2139 		if (actual == 1)
2140 			return (error);
2141 
2142 		/* Try N / 2. */
2143 		actual >>= 1;
2144 	}
2145 
2146 	/*
2147 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2148 	 * resources in the irqs[] array, so add new resources
2149 	 * starting at rid 1.
2150 	 */
2151 	for (i = 0; i < actual; i++)
2152 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2153 		    irqs[i], irqs[i], 1);
2154 
2155 	if (bootverbose) {
2156 		if (actual == 1)
2157 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2158 		else {
2159 			int run;
2160 
2161 			/*
2162 			 * Be fancy and try to print contiguous runs
2163 			 * of IRQ values as ranges.  'run' is true if
2164 			 * we are in a range.
2165 			 */
2166 			device_printf(child, "using IRQs %d", irqs[0]);
2167 			run = 0;
2168 			for (i = 1; i < actual; i++) {
2169 
2170 				/* Still in a run? */
2171 				if (irqs[i] == irqs[i - 1] + 1) {
2172 					run = 1;
2173 					continue;
2174 				}
2175 
2176 				/* Finish previous range. */
2177 				if (run) {
2178 					printf("-%d", irqs[i - 1]);
2179 					run = 0;
2180 				}
2181 
2182 				/* Start new range. */
2183 				printf(",%d", irqs[i]);
2184 			}
2185 
2186 			/* Unfinished range? */
2187 			if (run)
2188 				printf("-%d", irqs[actual - 1]);
2189 			printf(" for MSI\n");
2190 		}
2191 	}
2192 
2193 	/* Update control register with actual count. */
2194 	ctrl = cfg->msi.msi_ctrl;
2195 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2196 	ctrl |= (ffs(actual) - 1) << 4;
2197 	cfg->msi.msi_ctrl = ctrl;
2198 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2199 
2200 	/* Update counts of alloc'd messages. */
2201 	cfg->msi.msi_alloc = actual;
2202 	cfg->msi.msi_handlers = 0;
2203 	*count = actual;
2204 	return (0);
2205 }
2206 
2207 /* Release the MSI messages associated with this device. */
2208 int
2209 pci_release_msi_method(device_t dev, device_t child)
2210 {
2211 	struct pci_devinfo *dinfo = device_get_ivars(child);
2212 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2213 	struct resource_list_entry *rle;
2214 	int error, i, irqs[32];
2215 
2216 	/* Try MSI-X first. */
2217 	error = pci_release_msix(dev, child);
2218 	if (error != ENODEV)
2219 		return (error);
2220 
2221 	/* Do we have any messages to release? */
2222 	if (msi->msi_alloc == 0)
2223 		return (ENODEV);
2224 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2225 
2226 	/* Make sure none of the resources are allocated. */
2227 	if (msi->msi_handlers > 0)
2228 		return (EBUSY);
2229 	for (i = 0; i < msi->msi_alloc; i++) {
2230 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2231 		KASSERT(rle != NULL, ("missing MSI resource"));
2232 		if (rle->res != NULL)
2233 			return (EBUSY);
2234 		irqs[i] = rle->start;
2235 	}
2236 
2237 	/* Update control register with 0 count. */
2238 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2239 	    ("%s: MSI still enabled", __func__));
2240 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2241 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2242 	    msi->msi_ctrl, 2);
2243 
2244 	/* Release the messages. */
2245 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2246 	for (i = 0; i < msi->msi_alloc; i++)
2247 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2248 
2249 	/* Update alloc count. */
2250 	msi->msi_alloc = 0;
2251 	msi->msi_addr = 0;
2252 	msi->msi_data = 0;
2253 	return (0);
2254 }
2255 
2256 /*
2257  * Return the max supported MSI messages this device supports.
2258  * Basically, assuming the MD code can alloc messages, this function
2259  * should return the maximum value that pci_alloc_msi() can return.
2260  * Thus, it is subject to the tunables, etc.
2261  */
2262 int
2263 pci_msi_count_method(device_t dev, device_t child)
2264 {
2265 	struct pci_devinfo *dinfo = device_get_ivars(child);
2266 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2267 
2268 	if (pci_do_msi && msi->msi_location != 0)
2269 		return (msi->msi_msgnum);
2270 	return (0);
2271 }
2272 
2273 /* free pcicfgregs structure and all depending data structures */
2274 
2275 int
2276 pci_freecfg(struct pci_devinfo *dinfo)
2277 {
2278 	struct devlist *devlist_head;
2279 	struct pci_map *pm, *next;
2280 	int i;
2281 
2282 	devlist_head = &pci_devq;
2283 
2284 	if (dinfo->cfg.vpd.vpd_reg) {
2285 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2286 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2287 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2288 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2289 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2290 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2291 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2292 	}
2293 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2294 		free(pm, M_DEVBUF);
2295 	}
2296 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2297 	free(dinfo, M_DEVBUF);
2298 
2299 	/* increment the generation count */
2300 	pci_generation++;
2301 
2302 	/* we're losing one device */
2303 	pci_numdevs--;
2304 	return (0);
2305 }
2306 
2307 /*
2308  * PCI power manangement
2309  */
2310 int
2311 pci_set_powerstate_method(device_t dev, device_t child, int state)
2312 {
2313 	struct pci_devinfo *dinfo = device_get_ivars(child);
2314 	pcicfgregs *cfg = &dinfo->cfg;
2315 	uint16_t status;
2316 	int result, oldstate, highest, delay;
2317 
2318 	if (cfg->pp.pp_cap == 0)
2319 		return (EOPNOTSUPP);
2320 
2321 	/*
2322 	 * Optimize a no state change request away.  While it would be OK to
2323 	 * write to the hardware in theory, some devices have shown odd
2324 	 * behavior when going from D3 -> D3.
2325 	 */
2326 	oldstate = pci_get_powerstate(child);
2327 	if (oldstate == state)
2328 		return (0);
2329 
2330 	/*
2331 	 * The PCI power management specification states that after a state
2332 	 * transition between PCI power states, system software must
2333 	 * guarantee a minimal delay before the function accesses the device.
2334 	 * Compute the worst case delay that we need to guarantee before we
2335 	 * access the device.  Many devices will be responsive much more
2336 	 * quickly than this delay, but there are some that don't respond
2337 	 * instantly to state changes.  Transitions to/from D3 state require
2338 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2339 	 * is done below with DELAY rather than a sleeper function because
2340 	 * this function can be called from contexts where we cannot sleep.
2341 	 */
2342 	highest = (oldstate > state) ? oldstate : state;
2343 	if (highest == PCI_POWERSTATE_D3)
2344 	    delay = 10000;
2345 	else if (highest == PCI_POWERSTATE_D2)
2346 	    delay = 200;
2347 	else
2348 	    delay = 0;
2349 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2350 	    & ~PCIM_PSTAT_DMASK;
2351 	result = 0;
2352 	switch (state) {
2353 	case PCI_POWERSTATE_D0:
2354 		status |= PCIM_PSTAT_D0;
2355 		break;
2356 	case PCI_POWERSTATE_D1:
2357 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2358 			return (EOPNOTSUPP);
2359 		status |= PCIM_PSTAT_D1;
2360 		break;
2361 	case PCI_POWERSTATE_D2:
2362 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2363 			return (EOPNOTSUPP);
2364 		status |= PCIM_PSTAT_D2;
2365 		break;
2366 	case PCI_POWERSTATE_D3:
2367 		status |= PCIM_PSTAT_D3;
2368 		break;
2369 	default:
2370 		return (EINVAL);
2371 	}
2372 
2373 	if (bootverbose)
2374 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2375 		    state);
2376 
2377 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2378 	if (delay)
2379 		DELAY(delay);
2380 	return (0);
2381 }
2382 
2383 int
2384 pci_get_powerstate_method(device_t dev, device_t child)
2385 {
2386 	struct pci_devinfo *dinfo = device_get_ivars(child);
2387 	pcicfgregs *cfg = &dinfo->cfg;
2388 	uint16_t status;
2389 	int result;
2390 
2391 	if (cfg->pp.pp_cap != 0) {
2392 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2393 		switch (status & PCIM_PSTAT_DMASK) {
2394 		case PCIM_PSTAT_D0:
2395 			result = PCI_POWERSTATE_D0;
2396 			break;
2397 		case PCIM_PSTAT_D1:
2398 			result = PCI_POWERSTATE_D1;
2399 			break;
2400 		case PCIM_PSTAT_D2:
2401 			result = PCI_POWERSTATE_D2;
2402 			break;
2403 		case PCIM_PSTAT_D3:
2404 			result = PCI_POWERSTATE_D3;
2405 			break;
2406 		default:
2407 			result = PCI_POWERSTATE_UNKNOWN;
2408 			break;
2409 		}
2410 	} else {
2411 		/* No support, device is always at D0 */
2412 		result = PCI_POWERSTATE_D0;
2413 	}
2414 	return (result);
2415 }
2416 
2417 /*
2418  * Some convenience functions for PCI device drivers.
2419  */
2420 
2421 static __inline void
2422 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2423 {
2424 	uint16_t	command;
2425 
2426 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2427 	command |= bit;
2428 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2429 }
2430 
2431 static __inline void
2432 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2433 {
2434 	uint16_t	command;
2435 
2436 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2437 	command &= ~bit;
2438 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2439 }
2440 
2441 int
2442 pci_enable_busmaster_method(device_t dev, device_t child)
2443 {
2444 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2445 	return (0);
2446 }
2447 
2448 int
2449 pci_disable_busmaster_method(device_t dev, device_t child)
2450 {
2451 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2452 	return (0);
2453 }
2454 
2455 int
2456 pci_enable_io_method(device_t dev, device_t child, int space)
2457 {
2458 	uint16_t bit;
2459 
2460 	switch(space) {
2461 	case SYS_RES_IOPORT:
2462 		bit = PCIM_CMD_PORTEN;
2463 		break;
2464 	case SYS_RES_MEMORY:
2465 		bit = PCIM_CMD_MEMEN;
2466 		break;
2467 	default:
2468 		return (EINVAL);
2469 	}
2470 	pci_set_command_bit(dev, child, bit);
2471 	return (0);
2472 }
2473 
2474 int
2475 pci_disable_io_method(device_t dev, device_t child, int space)
2476 {
2477 	uint16_t bit;
2478 
2479 	switch(space) {
2480 	case SYS_RES_IOPORT:
2481 		bit = PCIM_CMD_PORTEN;
2482 		break;
2483 	case SYS_RES_MEMORY:
2484 		bit = PCIM_CMD_MEMEN;
2485 		break;
2486 	default:
2487 		return (EINVAL);
2488 	}
2489 	pci_clear_command_bit(dev, child, bit);
2490 	return (0);
2491 }
2492 
2493 /*
2494  * New style pci driver.  Parent device is either a pci-host-bridge or a
2495  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2496  */
2497 
2498 void
2499 pci_print_verbose(struct pci_devinfo *dinfo)
2500 {
2501 
2502 	if (bootverbose) {
2503 		pcicfgregs *cfg = &dinfo->cfg;
2504 
2505 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2506 		    cfg->vendor, cfg->device, cfg->revid);
2507 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2508 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2509 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2510 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2511 		    cfg->mfdev);
2512 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2513 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2514 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2515 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2516 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2517 		if (cfg->intpin > 0)
2518 			printf("\tintpin=%c, irq=%d\n",
2519 			    cfg->intpin +'a' -1, cfg->intline);
2520 		if (cfg->pp.pp_cap) {
2521 			uint16_t status;
2522 
2523 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2524 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2525 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2526 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2527 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2528 			    status & PCIM_PSTAT_DMASK);
2529 		}
2530 		if (cfg->msi.msi_location) {
2531 			int ctrl;
2532 
2533 			ctrl = cfg->msi.msi_ctrl;
2534 			printf("\tMSI supports %d message%s%s%s\n",
2535 			    cfg->msi.msi_msgnum,
2536 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2537 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2538 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2539 		}
2540 		if (cfg->msix.msix_location) {
2541 			printf("\tMSI-X supports %d message%s ",
2542 			    cfg->msix.msix_msgnum,
2543 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2544 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2545 				printf("in map 0x%x\n",
2546 				    cfg->msix.msix_table_bar);
2547 			else
2548 				printf("in maps 0x%x and 0x%x\n",
2549 				    cfg->msix.msix_table_bar,
2550 				    cfg->msix.msix_pba_bar);
2551 		}
2552 	}
2553 }
2554 
2555 static int
2556 pci_porten(device_t dev)
2557 {
2558 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2559 }
2560 
2561 static int
2562 pci_memen(device_t dev)
2563 {
2564 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2565 }
2566 
2567 static void
2568 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2569 {
2570 	struct pci_devinfo *dinfo;
2571 	pci_addr_t map, testval;
2572 	int ln2range;
2573 	uint16_t cmd;
2574 
2575 	/*
2576 	 * The device ROM BAR is special.  It is always a 32-bit
2577 	 * memory BAR.  Bit 0 is special and should not be set when
2578 	 * sizing the BAR.
2579 	 */
2580 	dinfo = device_get_ivars(dev);
2581 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2582 		map = pci_read_config(dev, reg, 4);
2583 		pci_write_config(dev, reg, 0xfffffffe, 4);
2584 		testval = pci_read_config(dev, reg, 4);
2585 		pci_write_config(dev, reg, map, 4);
2586 		*mapp = map;
2587 		*testvalp = testval;
2588 		return;
2589 	}
2590 
2591 	map = pci_read_config(dev, reg, 4);
2592 	ln2range = pci_maprange(map);
2593 	if (ln2range == 64)
2594 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2595 
2596 	/*
2597 	 * Disable decoding via the command register before
2598 	 * determining the BAR's length since we will be placing it in
2599 	 * a weird state.
2600 	 */
2601 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2602 	pci_write_config(dev, PCIR_COMMAND,
2603 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2604 
2605 	/*
2606 	 * Determine the BAR's length by writing all 1's.  The bottom
2607 	 * log_2(size) bits of the BAR will stick as 0 when we read
2608 	 * the value back.
2609 	 */
2610 	pci_write_config(dev, reg, 0xffffffff, 4);
2611 	testval = pci_read_config(dev, reg, 4);
2612 	if (ln2range == 64) {
2613 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2614 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2615 	}
2616 
2617 	/*
2618 	 * Restore the original value of the BAR.  We may have reprogrammed
2619 	 * the BAR of the low-level console device and when booting verbose,
2620 	 * we need the console device addressable.
2621 	 */
2622 	pci_write_config(dev, reg, map, 4);
2623 	if (ln2range == 64)
2624 		pci_write_config(dev, reg + 4, map >> 32, 4);
2625 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2626 
2627 	*mapp = map;
2628 	*testvalp = testval;
2629 }
2630 
2631 static void
2632 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2633 {
2634 	struct pci_devinfo *dinfo;
2635 	int ln2range;
2636 
2637 	/* The device ROM BAR is always a 32-bit memory BAR. */
2638 	dinfo = device_get_ivars(dev);
2639 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2640 		ln2range = 32;
2641 	else
2642 		ln2range = pci_maprange(pm->pm_value);
2643 	pci_write_config(dev, pm->pm_reg, base, 4);
2644 	if (ln2range == 64)
2645 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2646 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2647 	if (ln2range == 64)
2648 		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2649 		    pm->pm_reg + 4, 4) << 32;
2650 }
2651 
2652 struct pci_map *
2653 pci_find_bar(device_t dev, int reg)
2654 {
2655 	struct pci_devinfo *dinfo;
2656 	struct pci_map *pm;
2657 
2658 	dinfo = device_get_ivars(dev);
2659 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2660 		if (pm->pm_reg == reg)
2661 			return (pm);
2662 	}
2663 	return (NULL);
2664 }
2665 
2666 int
2667 pci_bar_enabled(device_t dev, struct pci_map *pm)
2668 {
2669 	struct pci_devinfo *dinfo;
2670 	uint16_t cmd;
2671 
2672 	dinfo = device_get_ivars(dev);
2673 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2674 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2675 		return (0);
2676 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2677 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2678 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2679 	else
2680 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2681 }
2682 
2683 static struct pci_map *
2684 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2685 {
2686 	struct pci_devinfo *dinfo;
2687 	struct pci_map *pm, *prev;
2688 
2689 	dinfo = device_get_ivars(dev);
2690 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2691 	pm->pm_reg = reg;
2692 	pm->pm_value = value;
2693 	pm->pm_size = size;
2694 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2695 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2696 		    reg));
2697 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2698 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2699 			break;
2700 	}
2701 	if (prev != NULL)
2702 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2703 	else
2704 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2705 	return (pm);
2706 }
2707 
2708 static void
2709 pci_restore_bars(device_t dev)
2710 {
2711 	struct pci_devinfo *dinfo;
2712 	struct pci_map *pm;
2713 	int ln2range;
2714 
2715 	dinfo = device_get_ivars(dev);
2716 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2717 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2718 			ln2range = 32;
2719 		else
2720 			ln2range = pci_maprange(pm->pm_value);
2721 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2722 		if (ln2range == 64)
2723 			pci_write_config(dev, pm->pm_reg + 4,
2724 			    pm->pm_value >> 32, 4);
2725 	}
2726 }
2727 
2728 /*
2729  * Add a resource based on a pci map register. Return 1 if the map
2730  * register is a 32bit map register or 2 if it is a 64bit register.
2731  */
2732 static int
2733 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2734     int force, int prefetch)
2735 {
2736 	struct pci_map *pm;
2737 	pci_addr_t base, map, testval;
2738 	pci_addr_t start, end, count;
2739 	int barlen, basezero, maprange, mapsize, type;
2740 	uint16_t cmd;
2741 	struct resource *res;
2742 
2743 	/*
2744 	 * The BAR may already exist if the device is a CardBus card
2745 	 * whose CIS is stored in this BAR.
2746 	 */
2747 	pm = pci_find_bar(dev, reg);
2748 	if (pm != NULL) {
2749 		maprange = pci_maprange(pm->pm_value);
2750 		barlen = maprange == 64 ? 2 : 1;
2751 		return (barlen);
2752 	}
2753 
2754 	pci_read_bar(dev, reg, &map, &testval);
2755 	if (PCI_BAR_MEM(map)) {
2756 		type = SYS_RES_MEMORY;
2757 		if (map & PCIM_BAR_MEM_PREFETCH)
2758 			prefetch = 1;
2759 	} else
2760 		type = SYS_RES_IOPORT;
2761 	mapsize = pci_mapsize(testval);
2762 	base = pci_mapbase(map);
2763 #ifdef __PCI_BAR_ZERO_VALID
2764 	basezero = 0;
2765 #else
2766 	basezero = base == 0;
2767 #endif
2768 	maprange = pci_maprange(map);
2769 	barlen = maprange == 64 ? 2 : 1;
2770 
2771 	/*
2772 	 * For I/O registers, if bottom bit is set, and the next bit up
2773 	 * isn't clear, we know we have a BAR that doesn't conform to the
2774 	 * spec, so ignore it.  Also, sanity check the size of the data
2775 	 * areas to the type of memory involved.  Memory must be at least
2776 	 * 16 bytes in size, while I/O ranges must be at least 4.
2777 	 */
2778 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2779 		return (barlen);
2780 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2781 	    (type == SYS_RES_IOPORT && mapsize < 2))
2782 		return (barlen);
2783 
2784 	/* Save a record of this BAR. */
2785 	pm = pci_add_bar(dev, reg, map, mapsize);
2786 	if (bootverbose) {
2787 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2788 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2789 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2790 			printf(", port disabled\n");
2791 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2792 			printf(", memory disabled\n");
2793 		else
2794 			printf(", enabled\n");
2795 	}
2796 
2797 	/*
2798 	 * If base is 0, then we have problems if this architecture does
2799 	 * not allow that.  It is best to ignore such entries for the
2800 	 * moment.  These will be allocated later if the driver specifically
2801 	 * requests them.  However, some removable busses look better when
2802 	 * all resources are allocated, so allow '0' to be overriden.
2803 	 *
2804 	 * Similarly treat maps whose values is the same as the test value
2805 	 * read back.  These maps have had all f's written to them by the
2806 	 * BIOS in an attempt to disable the resources.
2807 	 */
2808 	if (!force && (basezero || map == testval))
2809 		return (barlen);
2810 	if ((u_long)base != base) {
2811 		device_printf(bus,
2812 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2813 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2814 		    pci_get_function(dev), reg);
2815 		return (barlen);
2816 	}
2817 
2818 	/*
2819 	 * This code theoretically does the right thing, but has
2820 	 * undesirable side effects in some cases where peripherals
2821 	 * respond oddly to having these bits enabled.  Let the user
2822 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2823 	 * default).
2824 	 */
2825 	if (pci_enable_io_modes) {
2826 		/* Turn on resources that have been left off by a lazy BIOS */
2827 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2828 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2829 			cmd |= PCIM_CMD_PORTEN;
2830 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2831 		}
2832 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2833 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2834 			cmd |= PCIM_CMD_MEMEN;
2835 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2836 		}
2837 	} else {
2838 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2839 			return (barlen);
2840 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2841 			return (barlen);
2842 	}
2843 
2844 	count = (pci_addr_t)1 << mapsize;
2845 	if (basezero || base == pci_mapbase(testval)) {
2846 		start = 0;	/* Let the parent decide. */
2847 		end = ~0ul;
2848 	} else {
2849 		start = base;
2850 		end = base + count - 1;
2851 	}
2852 	resource_list_add(rl, type, reg, start, end, count);
2853 
2854 	/*
2855 	 * Try to allocate the resource for this BAR from our parent
2856 	 * so that this resource range is already reserved.  The
2857 	 * driver for this device will later inherit this resource in
2858 	 * pci_alloc_resource().
2859 	 */
2860 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2861 	    prefetch ? RF_PREFETCHABLE : 0);
2862 	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0ul)) {
2863 		/*
2864 		 * If the allocation fails, try to allocate a resource for
2865 		 * this BAR using any available range.  The firmware felt
2866 		 * it was important enough to assign a resource, so don't
2867 		 * disable decoding if we can help it.
2868 		 */
2869 		resource_list_delete(rl, type, reg);
2870 		resource_list_add(rl, type, reg, 0, ~0ul, count);
2871 		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0ul,
2872 		    count, prefetch ? RF_PREFETCHABLE : 0);
2873 	}
2874 	if (res == NULL) {
2875 		/*
2876 		 * If the allocation fails, delete the resource list entry
2877 		 * and disable decoding for this device.
2878 		 *
2879 		 * If the driver requests this resource in the future,
2880 		 * pci_reserve_map() will try to allocate a fresh
2881 		 * resource range.
2882 		 */
2883 		resource_list_delete(rl, type, reg);
2884 		pci_disable_io(dev, type);
2885 		if (bootverbose)
2886 			device_printf(bus,
2887 			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
2888 			    pci_get_domain(dev), pci_get_bus(dev),
2889 			    pci_get_slot(dev), pci_get_function(dev), reg);
2890 	} else {
2891 		start = rman_get_start(res);
2892 		pci_write_bar(dev, pm, start);
2893 	}
2894 	return (barlen);
2895 }
2896 
2897 /*
2898  * For ATA devices we need to decide early what addressing mode to use.
2899  * Legacy demands that the primary and secondary ATA ports sits on the
2900  * same addresses that old ISA hardware did. This dictates that we use
2901  * those addresses and ignore the BAR's if we cannot set PCI native
2902  * addressing mode.
2903  */
2904 static void
2905 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2906     uint32_t prefetchmask)
2907 {
2908 	struct resource *r;
2909 	int rid, type, progif;
2910 #if 0
2911 	/* if this device supports PCI native addressing use it */
2912 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2913 	if ((progif & 0x8a) == 0x8a) {
2914 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2915 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2916 			printf("Trying ATA native PCI addressing mode\n");
2917 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2918 		}
2919 	}
2920 #endif
2921 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2922 	type = SYS_RES_IOPORT;
2923 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2924 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2925 		    prefetchmask & (1 << 0));
2926 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2927 		    prefetchmask & (1 << 1));
2928 	} else {
2929 		rid = PCIR_BAR(0);
2930 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2931 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2932 		    0x1f7, 8, 0);
2933 		rid = PCIR_BAR(1);
2934 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2935 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2936 		    0x3f6, 1, 0);
2937 	}
2938 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2939 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2940 		    prefetchmask & (1 << 2));
2941 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2942 		    prefetchmask & (1 << 3));
2943 	} else {
2944 		rid = PCIR_BAR(2);
2945 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2946 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2947 		    0x177, 8, 0);
2948 		rid = PCIR_BAR(3);
2949 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2950 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2951 		    0x376, 1, 0);
2952 	}
2953 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2954 	    prefetchmask & (1 << 4));
2955 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2956 	    prefetchmask & (1 << 5));
2957 }
2958 
2959 static void
2960 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2961 {
2962 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2963 	pcicfgregs *cfg = &dinfo->cfg;
2964 	char tunable_name[64];
2965 	int irq;
2966 
2967 	/* Has to have an intpin to have an interrupt. */
2968 	if (cfg->intpin == 0)
2969 		return;
2970 
2971 	/* Let the user override the IRQ with a tunable. */
2972 	irq = PCI_INVALID_IRQ;
2973 	snprintf(tunable_name, sizeof(tunable_name),
2974 	    "hw.pci%d.%d.%d.INT%c.irq",
2975 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2976 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2977 		irq = PCI_INVALID_IRQ;
2978 
2979 	/*
2980 	 * If we didn't get an IRQ via the tunable, then we either use the
2981 	 * IRQ value in the intline register or we ask the bus to route an
2982 	 * interrupt for us.  If force_route is true, then we only use the
2983 	 * value in the intline register if the bus was unable to assign an
2984 	 * IRQ.
2985 	 */
2986 	if (!PCI_INTERRUPT_VALID(irq)) {
2987 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2988 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2989 		if (!PCI_INTERRUPT_VALID(irq))
2990 			irq = cfg->intline;
2991 	}
2992 
2993 	/* If after all that we don't have an IRQ, just bail. */
2994 	if (!PCI_INTERRUPT_VALID(irq))
2995 		return;
2996 
2997 	/* Update the config register if it changed. */
2998 	if (irq != cfg->intline) {
2999 		cfg->intline = irq;
3000 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
3001 	}
3002 
3003 	/* Add this IRQ as rid 0 interrupt resource. */
3004 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3005 }
3006 
3007 /* Perform early OHCI takeover from SMM. */
3008 static void
3009 ohci_early_takeover(device_t self)
3010 {
3011 	struct resource *res;
3012 	uint32_t ctl;
3013 	int rid;
3014 	int i;
3015 
3016 	rid = PCIR_BAR(0);
3017 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3018 	if (res == NULL)
3019 		return;
3020 
3021 	ctl = bus_read_4(res, OHCI_CONTROL);
3022 	if (ctl & OHCI_IR) {
3023 		if (bootverbose)
3024 			printf("ohci early: "
3025 			    "SMM active, request owner change\n");
3026 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3027 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3028 			DELAY(1000);
3029 			ctl = bus_read_4(res, OHCI_CONTROL);
3030 		}
3031 		if (ctl & OHCI_IR) {
3032 			if (bootverbose)
3033 				printf("ohci early: "
3034 				    "SMM does not respond, resetting\n");
3035 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3036 		}
3037 		/* Disable interrupts */
3038 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3039 	}
3040 
3041 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3042 }
3043 
3044 /* Perform early UHCI takeover from SMM. */
3045 static void
3046 uhci_early_takeover(device_t self)
3047 {
3048 	struct resource *res;
3049 	int rid;
3050 
3051 	/*
3052 	 * Set the PIRQD enable bit and switch off all the others. We don't
3053 	 * want legacy support to interfere with us XXX Does this also mean
3054 	 * that the BIOS won't touch the keyboard anymore if it is connected
3055 	 * to the ports of the root hub?
3056 	 */
3057 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3058 
3059 	/* Disable interrupts */
3060 	rid = PCI_UHCI_BASE_REG;
3061 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3062 	if (res != NULL) {
3063 		bus_write_2(res, UHCI_INTR, 0);
3064 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3065 	}
3066 }
3067 
3068 /* Perform early EHCI takeover from SMM. */
3069 static void
3070 ehci_early_takeover(device_t self)
3071 {
3072 	struct resource *res;
3073 	uint32_t cparams;
3074 	uint32_t eec;
3075 	uint8_t eecp;
3076 	uint8_t bios_sem;
3077 	uint8_t offs;
3078 	int rid;
3079 	int i;
3080 
3081 	rid = PCIR_BAR(0);
3082 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3083 	if (res == NULL)
3084 		return;
3085 
3086 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3087 
3088 	/* Synchronise with the BIOS if it owns the controller. */
3089 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3090 	    eecp = EHCI_EECP_NEXT(eec)) {
3091 		eec = pci_read_config(self, eecp, 4);
3092 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3093 			continue;
3094 		}
3095 		bios_sem = pci_read_config(self, eecp +
3096 		    EHCI_LEGSUP_BIOS_SEM, 1);
3097 		if (bios_sem == 0) {
3098 			continue;
3099 		}
3100 		if (bootverbose)
3101 			printf("ehci early: "
3102 			    "SMM active, request owner change\n");
3103 
3104 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3105 
3106 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3107 			DELAY(1000);
3108 			bios_sem = pci_read_config(self, eecp +
3109 			    EHCI_LEGSUP_BIOS_SEM, 1);
3110 		}
3111 
3112 		if (bios_sem != 0) {
3113 			if (bootverbose)
3114 				printf("ehci early: "
3115 				    "SMM does not respond\n");
3116 		}
3117 		/* Disable interrupts */
3118 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3119 		bus_write_4(res, offs + EHCI_USBINTR, 0);
3120 	}
3121 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3122 }
3123 
3124 /* Perform early XHCI takeover from SMM. */
3125 static void
3126 xhci_early_takeover(device_t self)
3127 {
3128 	struct resource *res;
3129 	uint32_t cparams;
3130 	uint32_t eec;
3131 	uint8_t eecp;
3132 	uint8_t bios_sem;
3133 	uint8_t offs;
3134 	int rid;
3135 	int i;
3136 
3137 	rid = PCIR_BAR(0);
3138 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3139 	if (res == NULL)
3140 		return;
3141 
3142 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3143 
3144 	eec = -1;
3145 
3146 	/* Synchronise with the BIOS if it owns the controller. */
3147 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3148 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3149 		eec = bus_read_4(res, eecp);
3150 
3151 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3152 			continue;
3153 
3154 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3155 		if (bios_sem == 0)
3156 			continue;
3157 
3158 		if (bootverbose)
3159 			printf("xhci early: "
3160 			    "SMM active, request owner change\n");
3161 
3162 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3163 
3164 		/* wait a maximum of 5 second */
3165 
3166 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3167 			DELAY(1000);
3168 			bios_sem = bus_read_1(res, eecp +
3169 			    XHCI_XECP_BIOS_SEM);
3170 		}
3171 
3172 		if (bios_sem != 0) {
3173 			if (bootverbose)
3174 				printf("xhci early: "
3175 				    "SMM does not respond\n");
3176 		}
3177 
3178 		/* Disable interrupts */
3179 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3180 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3181 		bus_read_4(res, offs + XHCI_USBSTS);
3182 	}
3183 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3184 }
3185 
3186 void
3187 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3188 {
3189 	struct pci_devinfo *dinfo;
3190 	pcicfgregs *cfg;
3191 	struct resource_list *rl;
3192 	const struct pci_quirk *q;
3193 	uint32_t devid;
3194 	int i;
3195 
3196 	dinfo = device_get_ivars(dev);
3197 	cfg = &dinfo->cfg;
3198 	rl = &dinfo->resources;
3199 	devid = (cfg->device << 16) | cfg->vendor;
3200 
3201 	/* ATA devices needs special map treatment */
3202 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3203 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3204 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3205 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3206 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3207 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3208 	else
3209 		for (i = 0; i < cfg->nummaps;) {
3210 			/*
3211 			 * Skip quirked resources.
3212 			 */
3213 			for (q = &pci_quirks[0]; q->devid != 0; q++)
3214 				if (q->devid == devid &&
3215 				    q->type == PCI_QUIRK_UNMAP_REG &&
3216 				    q->arg1 == PCIR_BAR(i))
3217 					break;
3218 			if (q->devid != 0) {
3219 				i++;
3220 				continue;
3221 			}
3222 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3223 			    prefetchmask & (1 << i));
3224 		}
3225 
3226 	/*
3227 	 * Add additional, quirked resources.
3228 	 */
3229 	for (q = &pci_quirks[0]; q->devid != 0; q++)
3230 		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3231 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3232 
3233 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3234 #ifdef __PCI_REROUTE_INTERRUPT
3235 		/*
3236 		 * Try to re-route interrupts. Sometimes the BIOS or
3237 		 * firmware may leave bogus values in these registers.
3238 		 * If the re-route fails, then just stick with what we
3239 		 * have.
3240 		 */
3241 		pci_assign_interrupt(bus, dev, 1);
3242 #else
3243 		pci_assign_interrupt(bus, dev, 0);
3244 #endif
3245 	}
3246 
3247 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3248 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3249 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3250 			xhci_early_takeover(dev);
3251 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3252 			ehci_early_takeover(dev);
3253 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3254 			ohci_early_takeover(dev);
3255 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3256 			uhci_early_takeover(dev);
3257 	}
3258 }
3259 
3260 void
3261 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3262 {
3263 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3264 	device_t pcib = device_get_parent(dev);
3265 	struct pci_devinfo *dinfo;
3266 	int maxslots;
3267 	int s, f, pcifunchigh;
3268 	uint8_t hdrtype;
3269 
3270 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3271 	    ("dinfo_size too small"));
3272 	maxslots = PCIB_MAXSLOTS(pcib);
3273 	for (s = 0; s <= maxslots; s++) {
3274 		pcifunchigh = 0;
3275 		f = 0;
3276 		DELAY(1);
3277 		hdrtype = REG(PCIR_HDRTYPE, 1);
3278 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3279 			continue;
3280 		if (hdrtype & PCIM_MFDEV)
3281 			pcifunchigh = PCI_FUNCMAX;
3282 		for (f = 0; f <= pcifunchigh; f++) {
3283 			dinfo = pci_read_device(pcib, domain, busno, s, f,
3284 			    dinfo_size);
3285 			if (dinfo != NULL) {
3286 				pci_add_child(dev, dinfo);
3287 			}
3288 		}
3289 	}
3290 #undef REG
3291 }
3292 
3293 void
3294 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3295 {
3296 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3297 	device_set_ivars(dinfo->cfg.dev, dinfo);
3298 	resource_list_init(&dinfo->resources);
3299 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3300 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3301 	pci_print_verbose(dinfo);
3302 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3303 }
3304 
3305 static int
3306 pci_probe(device_t dev)
3307 {
3308 
3309 	device_set_desc(dev, "PCI bus");
3310 
3311 	/* Allow other subclasses to override this driver. */
3312 	return (BUS_PROBE_GENERIC);
3313 }
3314 
3315 int
3316 pci_attach_common(device_t dev)
3317 {
3318 	struct pci_softc *sc;
3319 	int busno, domain;
3320 #ifdef PCI_DMA_BOUNDARY
3321 	int error, tag_valid;
3322 #endif
3323 
3324 	sc = device_get_softc(dev);
3325 	domain = pcib_get_domain(dev);
3326 	busno = pcib_get_bus(dev);
3327 	if (bootverbose)
3328 		device_printf(dev, "domain=%d, physical bus=%d\n",
3329 		    domain, busno);
3330 #ifdef PCI_DMA_BOUNDARY
3331 	tag_valid = 0;
3332 	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
3333 	    devclass_find("pci")) {
3334 		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
3335 		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3336 		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
3337 		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
3338 		if (error)
3339 			device_printf(dev, "Failed to create DMA tag: %d\n",
3340 			    error);
3341 		else
3342 			tag_valid = 1;
3343 	}
3344 	if (!tag_valid)
3345 #endif
3346 		sc->sc_dma_tag = bus_get_dma_tag(dev);
3347 	return (0);
3348 }
3349 
3350 static int
3351 pci_attach(device_t dev)
3352 {
3353 	int busno, domain, error;
3354 
3355 	error = pci_attach_common(dev);
3356 	if (error)
3357 		return (error);
3358 
3359 	/*
3360 	 * Since there can be multiple independantly numbered PCI
3361 	 * busses on systems with multiple PCI domains, we can't use
3362 	 * the unit number to decide which bus we are probing. We ask
3363 	 * the parent pcib what our domain and bus numbers are.
3364 	 */
3365 	domain = pcib_get_domain(dev);
3366 	busno = pcib_get_bus(dev);
3367 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3368 	return (bus_generic_attach(dev));
3369 }
3370 
3371 static void
3372 pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3373     int state)
3374 {
3375 	device_t child, pcib;
3376 	struct pci_devinfo *dinfo;
3377 	int dstate, i;
3378 
3379 	/*
3380 	 * Set the device to the given state.  If the firmware suggests
3381 	 * a different power state, use it instead.  If power management
3382 	 * is not present, the firmware is responsible for managing
3383 	 * device power.  Skip children who aren't attached since they
3384 	 * are handled separately.
3385 	 */
3386 	pcib = device_get_parent(dev);
3387 	for (i = 0; i < numdevs; i++) {
3388 		child = devlist[i];
3389 		dinfo = device_get_ivars(child);
3390 		dstate = state;
3391 		if (device_is_attached(child) &&
3392 		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3393 			pci_set_powerstate(child, dstate);
3394 	}
3395 }
3396 
3397 int
3398 pci_suspend(device_t dev)
3399 {
3400 	device_t child, *devlist;
3401 	struct pci_devinfo *dinfo;
3402 	int error, i, numdevs;
3403 
3404 	/*
3405 	 * Save the PCI configuration space for each child and set the
3406 	 * device in the appropriate power state for this sleep state.
3407 	 */
3408 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3409 		return (error);
3410 	for (i = 0; i < numdevs; i++) {
3411 		child = devlist[i];
3412 		dinfo = device_get_ivars(child);
3413 		pci_cfg_save(child, dinfo, 0);
3414 	}
3415 
3416 	/* Suspend devices before potentially powering them down. */
3417 	error = bus_generic_suspend(dev);
3418 	if (error) {
3419 		free(devlist, M_TEMP);
3420 		return (error);
3421 	}
3422 	if (pci_do_power_suspend)
3423 		pci_set_power_children(dev, devlist, numdevs,
3424 		    PCI_POWERSTATE_D3);
3425 	free(devlist, M_TEMP);
3426 	return (0);
3427 }
3428 
3429 int
3430 pci_resume(device_t dev)
3431 {
3432 	device_t child, *devlist;
3433 	struct pci_devinfo *dinfo;
3434 	int error, i, numdevs;
3435 
3436 	/*
3437 	 * Set each child to D0 and restore its PCI configuration space.
3438 	 */
3439 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3440 		return (error);
3441 	if (pci_do_power_resume)
3442 		pci_set_power_children(dev, devlist, numdevs,
3443 		    PCI_POWERSTATE_D0);
3444 
3445 	/* Now the device is powered up, restore its config space. */
3446 	for (i = 0; i < numdevs; i++) {
3447 		child = devlist[i];
3448 		dinfo = device_get_ivars(child);
3449 
3450 		pci_cfg_restore(child, dinfo);
3451 		if (!device_is_attached(child))
3452 			pci_cfg_save(child, dinfo, 1);
3453 	}
3454 
3455 	/*
3456 	 * Resume critical devices first, then everything else later.
3457 	 */
3458 	for (i = 0; i < numdevs; i++) {
3459 		child = devlist[i];
3460 		switch (pci_get_class(child)) {
3461 		case PCIC_DISPLAY:
3462 		case PCIC_MEMORY:
3463 		case PCIC_BRIDGE:
3464 		case PCIC_BASEPERIPH:
3465 			DEVICE_RESUME(child);
3466 			break;
3467 		}
3468 	}
3469 	for (i = 0; i < numdevs; i++) {
3470 		child = devlist[i];
3471 		switch (pci_get_class(child)) {
3472 		case PCIC_DISPLAY:
3473 		case PCIC_MEMORY:
3474 		case PCIC_BRIDGE:
3475 		case PCIC_BASEPERIPH:
3476 			break;
3477 		default:
3478 			DEVICE_RESUME(child);
3479 		}
3480 	}
3481 	free(devlist, M_TEMP);
3482 	return (0);
3483 }
3484 
3485 static void
3486 pci_load_vendor_data(void)
3487 {
3488 	caddr_t data;
3489 	void *ptr;
3490 	size_t sz;
3491 
3492 	data = preload_search_by_type("pci_vendor_data");
3493 	if (data != NULL) {
3494 		ptr = preload_fetch_addr(data);
3495 		sz = preload_fetch_size(data);
3496 		if (ptr != NULL && sz != 0) {
3497 			pci_vendordata = ptr;
3498 			pci_vendordata_size = sz;
3499 			/* terminate the database */
3500 			pci_vendordata[pci_vendordata_size] = '\n';
3501 		}
3502 	}
3503 }
3504 
3505 void
3506 pci_driver_added(device_t dev, driver_t *driver)
3507 {
3508 	int numdevs;
3509 	device_t *devlist;
3510 	device_t child;
3511 	struct pci_devinfo *dinfo;
3512 	int i;
3513 
3514 	if (bootverbose)
3515 		device_printf(dev, "driver added\n");
3516 	DEVICE_IDENTIFY(driver, dev);
3517 	if (device_get_children(dev, &devlist, &numdevs) != 0)
3518 		return;
3519 	for (i = 0; i < numdevs; i++) {
3520 		child = devlist[i];
3521 		if (device_get_state(child) != DS_NOTPRESENT)
3522 			continue;
3523 		dinfo = device_get_ivars(child);
3524 		pci_print_verbose(dinfo);
3525 		if (bootverbose)
3526 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3527 		pci_cfg_restore(child, dinfo);
3528 		if (device_probe_and_attach(child) != 0)
3529 			pci_child_detached(dev, child);
3530 	}
3531 	free(devlist, M_TEMP);
3532 }
3533 
3534 int
3535 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3536     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3537 {
3538 	struct pci_devinfo *dinfo;
3539 	struct msix_table_entry *mte;
3540 	struct msix_vector *mv;
3541 	uint64_t addr;
3542 	uint32_t data;
3543 	void *cookie;
3544 	int error, rid;
3545 
3546 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3547 	    arg, &cookie);
3548 	if (error)
3549 		return (error);
3550 
3551 	/* If this is not a direct child, just bail out. */
3552 	if (device_get_parent(child) != dev) {
3553 		*cookiep = cookie;
3554 		return(0);
3555 	}
3556 
3557 	rid = rman_get_rid(irq);
3558 	if (rid == 0) {
3559 		/* Make sure that INTx is enabled */
3560 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3561 	} else {
3562 		/*
3563 		 * Check to see if the interrupt is MSI or MSI-X.
3564 		 * Ask our parent to map the MSI and give
3565 		 * us the address and data register values.
3566 		 * If we fail for some reason, teardown the
3567 		 * interrupt handler.
3568 		 */
3569 		dinfo = device_get_ivars(child);
3570 		if (dinfo->cfg.msi.msi_alloc > 0) {
3571 			if (dinfo->cfg.msi.msi_addr == 0) {
3572 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3573 			    ("MSI has handlers, but vectors not mapped"));
3574 				error = PCIB_MAP_MSI(device_get_parent(dev),
3575 				    child, rman_get_start(irq), &addr, &data);
3576 				if (error)
3577 					goto bad;
3578 				dinfo->cfg.msi.msi_addr = addr;
3579 				dinfo->cfg.msi.msi_data = data;
3580 			}
3581 			if (dinfo->cfg.msi.msi_handlers == 0)
3582 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3583 				    dinfo->cfg.msi.msi_data);
3584 			dinfo->cfg.msi.msi_handlers++;
3585 		} else {
3586 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3587 			    ("No MSI or MSI-X interrupts allocated"));
3588 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3589 			    ("MSI-X index too high"));
3590 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3591 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3592 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3593 			KASSERT(mv->mv_irq == rman_get_start(irq),
3594 			    ("IRQ mismatch"));
3595 			if (mv->mv_address == 0) {
3596 				KASSERT(mte->mte_handlers == 0,
3597 		    ("MSI-X table entry has handlers, but vector not mapped"));
3598 				error = PCIB_MAP_MSI(device_get_parent(dev),
3599 				    child, rman_get_start(irq), &addr, &data);
3600 				if (error)
3601 					goto bad;
3602 				mv->mv_address = addr;
3603 				mv->mv_data = data;
3604 			}
3605 			if (mte->mte_handlers == 0) {
3606 				pci_enable_msix(child, rid - 1, mv->mv_address,
3607 				    mv->mv_data);
3608 				pci_unmask_msix(child, rid - 1);
3609 			}
3610 			mte->mte_handlers++;
3611 		}
3612 
3613 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3614 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3615 	bad:
3616 		if (error) {
3617 			(void)bus_generic_teardown_intr(dev, child, irq,
3618 			    cookie);
3619 			return (error);
3620 		}
3621 	}
3622 	*cookiep = cookie;
3623 	return (0);
3624 }
3625 
3626 int
3627 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3628     void *cookie)
3629 {
3630 	struct msix_table_entry *mte;
3631 	struct resource_list_entry *rle;
3632 	struct pci_devinfo *dinfo;
3633 	int error, rid;
3634 
3635 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3636 		return (EINVAL);
3637 
3638 	/* If this isn't a direct child, just bail out */
3639 	if (device_get_parent(child) != dev)
3640 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3641 
3642 	rid = rman_get_rid(irq);
3643 	if (rid == 0) {
3644 		/* Mask INTx */
3645 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3646 	} else {
3647 		/*
3648 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3649 		 * decrement the appropriate handlers count and mask the
3650 		 * MSI-X message, or disable MSI messages if the count
3651 		 * drops to 0.
3652 		 */
3653 		dinfo = device_get_ivars(child);
3654 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3655 		if (rle->res != irq)
3656 			return (EINVAL);
3657 		if (dinfo->cfg.msi.msi_alloc > 0) {
3658 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3659 			    ("MSI-X index too high"));
3660 			if (dinfo->cfg.msi.msi_handlers == 0)
3661 				return (EINVAL);
3662 			dinfo->cfg.msi.msi_handlers--;
3663 			if (dinfo->cfg.msi.msi_handlers == 0)
3664 				pci_disable_msi(child);
3665 		} else {
3666 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3667 			    ("No MSI or MSI-X interrupts allocated"));
3668 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3669 			    ("MSI-X index too high"));
3670 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3671 			if (mte->mte_handlers == 0)
3672 				return (EINVAL);
3673 			mte->mte_handlers--;
3674 			if (mte->mte_handlers == 0)
3675 				pci_mask_msix(child, rid - 1);
3676 		}
3677 	}
3678 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3679 	if (rid > 0)
3680 		KASSERT(error == 0,
3681 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3682 	return (error);
3683 }
3684 
3685 int
3686 pci_print_child(device_t dev, device_t child)
3687 {
3688 	struct pci_devinfo *dinfo;
3689 	struct resource_list *rl;
3690 	int retval = 0;
3691 
3692 	dinfo = device_get_ivars(child);
3693 	rl = &dinfo->resources;
3694 
3695 	retval += bus_print_child_header(dev, child);
3696 
3697 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3698 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3699 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3700 	if (device_get_flags(dev))
3701 		retval += printf(" flags %#x", device_get_flags(dev));
3702 
3703 	retval += printf(" at device %d.%d", pci_get_slot(child),
3704 	    pci_get_function(child));
3705 
3706 	retval += bus_print_child_footer(dev, child);
3707 
3708 	return (retval);
3709 }
3710 
3711 static const struct
3712 {
3713 	int		class;
3714 	int		subclass;
3715 	const char	*desc;
3716 } pci_nomatch_tab[] = {
3717 	{PCIC_OLD,		-1,			"old"},
3718 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3719 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3720 	{PCIC_STORAGE,		-1,			"mass storage"},
3721 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3722 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3723 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3724 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3725 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3726 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3727 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3728 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3729 	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	"NVM"},
3730 	{PCIC_NETWORK,		-1,			"network"},
3731 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3732 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3733 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3734 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3735 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3736 	{PCIC_DISPLAY,		-1,			"display"},
3737 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3738 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3739 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3740 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3741 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3742 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3743 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3744 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3745 	{PCIC_MEMORY,		-1,			"memory"},
3746 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3747 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3748 	{PCIC_BRIDGE,		-1,			"bridge"},
3749 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3750 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3751 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3752 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3753 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3754 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3755 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3756 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3757 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3758 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3759 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3760 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3761 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3762 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3763 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3764 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3765 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3766 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3767 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3768 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3769 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3770 	{PCIC_INPUTDEV,		-1,			"input device"},
3771 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3772 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3773 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3774 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3775 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3776 	{PCIC_DOCKING,		-1,			"docking station"},
3777 	{PCIC_PROCESSOR,	-1,			"processor"},
3778 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3779 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3780 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3781 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3782 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3783 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3784 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3785 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3786 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3787 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3788 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3789 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3790 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3791 	{PCIC_SATCOM,		-1,			"satellite communication"},
3792 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3793 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3794 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3795 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3796 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3797 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3798 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3799 	{PCIC_DASP,		-1,			"dasp"},
3800 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3801 	{0, 0,		NULL}
3802 };
3803 
3804 void
3805 pci_probe_nomatch(device_t dev, device_t child)
3806 {
3807 	int i;
3808 	const char *cp, *scp;
3809 	char *device;
3810 
3811 	/*
3812 	 * Look for a listing for this device in a loaded device database.
3813 	 */
3814 	if ((device = pci_describe_device(child)) != NULL) {
3815 		device_printf(dev, "<%s>", device);
3816 		free(device, M_DEVBUF);
3817 	} else {
3818 		/*
3819 		 * Scan the class/subclass descriptions for a general
3820 		 * description.
3821 		 */
3822 		cp = "unknown";
3823 		scp = NULL;
3824 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3825 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3826 				if (pci_nomatch_tab[i].subclass == -1) {
3827 					cp = pci_nomatch_tab[i].desc;
3828 				} else if (pci_nomatch_tab[i].subclass ==
3829 				    pci_get_subclass(child)) {
3830 					scp = pci_nomatch_tab[i].desc;
3831 				}
3832 			}
3833 		}
3834 		device_printf(dev, "<%s%s%s>",
3835 		    cp ? cp : "",
3836 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3837 		    scp ? scp : "");
3838 	}
3839 	printf(" at device %d.%d (no driver attached)\n",
3840 	    pci_get_slot(child), pci_get_function(child));
3841 	pci_cfg_save(child, device_get_ivars(child), 1);
3842 }
3843 
3844 void
3845 pci_child_detached(device_t dev, device_t child)
3846 {
3847 	struct pci_devinfo *dinfo;
3848 	struct resource_list *rl;
3849 
3850 	dinfo = device_get_ivars(child);
3851 	rl = &dinfo->resources;
3852 
3853 	/*
3854 	 * Have to deallocate IRQs before releasing any MSI messages and
3855 	 * have to release MSI messages before deallocating any memory
3856 	 * BARs.
3857 	 */
3858 	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
3859 		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
3860 	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
3861 		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
3862 		(void)pci_release_msi(child);
3863 	}
3864 	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
3865 		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
3866 	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
3867 		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
3868 
3869 	pci_cfg_save(child, dinfo, 1);
3870 }
3871 
3872 /*
3873  * Parse the PCI device database, if loaded, and return a pointer to a
3874  * description of the device.
3875  *
3876  * The database is flat text formatted as follows:
3877  *
3878  * Any line not in a valid format is ignored.
3879  * Lines are terminated with newline '\n' characters.
3880  *
3881  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3882  * the vendor name.
3883  *
3884  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3885  * - devices cannot be listed without a corresponding VENDOR line.
3886  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3887  * another TAB, then the device name.
3888  */
3889 
3890 /*
3891  * Assuming (ptr) points to the beginning of a line in the database,
3892  * return the vendor or device and description of the next entry.
3893  * The value of (vendor) or (device) inappropriate for the entry type
3894  * is set to -1.  Returns nonzero at the end of the database.
3895  *
3896  * Note that this is slightly unrobust in the face of corrupt data;
3897  * we attempt to safeguard against this by spamming the end of the
3898  * database with a newline when we initialise.
3899  */
3900 static int
3901 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3902 {
3903 	char	*cp = *ptr;
3904 	int	left;
3905 
3906 	*device = -1;
3907 	*vendor = -1;
3908 	**desc = '\0';
3909 	for (;;) {
3910 		left = pci_vendordata_size - (cp - pci_vendordata);
3911 		if (left <= 0) {
3912 			*ptr = cp;
3913 			return(1);
3914 		}
3915 
3916 		/* vendor entry? */
3917 		if (*cp != '\t' &&
3918 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3919 			break;
3920 		/* device entry? */
3921 		if (*cp == '\t' &&
3922 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3923 			break;
3924 
3925 		/* skip to next line */
3926 		while (*cp != '\n' && left > 0) {
3927 			cp++;
3928 			left--;
3929 		}
3930 		if (*cp == '\n') {
3931 			cp++;
3932 			left--;
3933 		}
3934 	}
3935 	/* skip to next line */
3936 	while (*cp != '\n' && left > 0) {
3937 		cp++;
3938 		left--;
3939 	}
3940 	if (*cp == '\n' && left > 0)
3941 		cp++;
3942 	*ptr = cp;
3943 	return(0);
3944 }
3945 
3946 static char *
3947 pci_describe_device(device_t dev)
3948 {
3949 	int	vendor, device;
3950 	char	*desc, *vp, *dp, *line;
3951 
3952 	desc = vp = dp = NULL;
3953 
3954 	/*
3955 	 * If we have no vendor data, we can't do anything.
3956 	 */
3957 	if (pci_vendordata == NULL)
3958 		goto out;
3959 
3960 	/*
3961 	 * Scan the vendor data looking for this device
3962 	 */
3963 	line = pci_vendordata;
3964 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3965 		goto out;
3966 	for (;;) {
3967 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3968 			goto out;
3969 		if (vendor == pci_get_vendor(dev))
3970 			break;
3971 	}
3972 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3973 		goto out;
3974 	for (;;) {
3975 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3976 			*dp = 0;
3977 			break;
3978 		}
3979 		if (vendor != -1) {
3980 			*dp = 0;
3981 			break;
3982 		}
3983 		if (device == pci_get_device(dev))
3984 			break;
3985 	}
3986 	if (dp[0] == '\0')
3987 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3988 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3989 	    NULL)
3990 		sprintf(desc, "%s, %s", vp, dp);
3991 out:
3992 	if (vp != NULL)
3993 		free(vp, M_DEVBUF);
3994 	if (dp != NULL)
3995 		free(dp, M_DEVBUF);
3996 	return(desc);
3997 }
3998 
3999 int
4000 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
4001 {
4002 	struct pci_devinfo *dinfo;
4003 	pcicfgregs *cfg;
4004 
4005 	dinfo = device_get_ivars(child);
4006 	cfg = &dinfo->cfg;
4007 
4008 	switch (which) {
4009 	case PCI_IVAR_ETHADDR:
4010 		/*
4011 		 * The generic accessor doesn't deal with failure, so
4012 		 * we set the return value, then return an error.
4013 		 */
4014 		*((uint8_t **) result) = NULL;
4015 		return (EINVAL);
4016 	case PCI_IVAR_SUBVENDOR:
4017 		*result = cfg->subvendor;
4018 		break;
4019 	case PCI_IVAR_SUBDEVICE:
4020 		*result = cfg->subdevice;
4021 		break;
4022 	case PCI_IVAR_VENDOR:
4023 		*result = cfg->vendor;
4024 		break;
4025 	case PCI_IVAR_DEVICE:
4026 		*result = cfg->device;
4027 		break;
4028 	case PCI_IVAR_DEVID:
4029 		*result = (cfg->device << 16) | cfg->vendor;
4030 		break;
4031 	case PCI_IVAR_CLASS:
4032 		*result = cfg->baseclass;
4033 		break;
4034 	case PCI_IVAR_SUBCLASS:
4035 		*result = cfg->subclass;
4036 		break;
4037 	case PCI_IVAR_PROGIF:
4038 		*result = cfg->progif;
4039 		break;
4040 	case PCI_IVAR_REVID:
4041 		*result = cfg->revid;
4042 		break;
4043 	case PCI_IVAR_INTPIN:
4044 		*result = cfg->intpin;
4045 		break;
4046 	case PCI_IVAR_IRQ:
4047 		*result = cfg->intline;
4048 		break;
4049 	case PCI_IVAR_DOMAIN:
4050 		*result = cfg->domain;
4051 		break;
4052 	case PCI_IVAR_BUS:
4053 		*result = cfg->bus;
4054 		break;
4055 	case PCI_IVAR_SLOT:
4056 		*result = cfg->slot;
4057 		break;
4058 	case PCI_IVAR_FUNCTION:
4059 		*result = cfg->func;
4060 		break;
4061 	case PCI_IVAR_CMDREG:
4062 		*result = cfg->cmdreg;
4063 		break;
4064 	case PCI_IVAR_CACHELNSZ:
4065 		*result = cfg->cachelnsz;
4066 		break;
4067 	case PCI_IVAR_MINGNT:
4068 		*result = cfg->mingnt;
4069 		break;
4070 	case PCI_IVAR_MAXLAT:
4071 		*result = cfg->maxlat;
4072 		break;
4073 	case PCI_IVAR_LATTIMER:
4074 		*result = cfg->lattimer;
4075 		break;
4076 	default:
4077 		return (ENOENT);
4078 	}
4079 	return (0);
4080 }
4081 
4082 int
4083 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4084 {
4085 	struct pci_devinfo *dinfo;
4086 
4087 	dinfo = device_get_ivars(child);
4088 
4089 	switch (which) {
4090 	case PCI_IVAR_INTPIN:
4091 		dinfo->cfg.intpin = value;
4092 		return (0);
4093 	case PCI_IVAR_ETHADDR:
4094 	case PCI_IVAR_SUBVENDOR:
4095 	case PCI_IVAR_SUBDEVICE:
4096 	case PCI_IVAR_VENDOR:
4097 	case PCI_IVAR_DEVICE:
4098 	case PCI_IVAR_DEVID:
4099 	case PCI_IVAR_CLASS:
4100 	case PCI_IVAR_SUBCLASS:
4101 	case PCI_IVAR_PROGIF:
4102 	case PCI_IVAR_REVID:
4103 	case PCI_IVAR_IRQ:
4104 	case PCI_IVAR_DOMAIN:
4105 	case PCI_IVAR_BUS:
4106 	case PCI_IVAR_SLOT:
4107 	case PCI_IVAR_FUNCTION:
4108 		return (EINVAL);	/* disallow for now */
4109 
4110 	default:
4111 		return (ENOENT);
4112 	}
4113 }
4114 
4115 #include "opt_ddb.h"
4116 #ifdef DDB
4117 #include <ddb/ddb.h>
4118 #include <sys/cons.h>
4119 
4120 /*
4121  * List resources based on pci map registers, used for within ddb
4122  */
4123 
4124 DB_SHOW_COMMAND(pciregs, db_pci_dump)
4125 {
4126 	struct pci_devinfo *dinfo;
4127 	struct devlist *devlist_head;
4128 	struct pci_conf *p;
4129 	const char *name;
4130 	int i, error, none_count;
4131 
4132 	none_count = 0;
4133 	/* get the head of the device queue */
4134 	devlist_head = &pci_devq;
4135 
4136 	/*
4137 	 * Go through the list of devices and print out devices
4138 	 */
4139 	for (error = 0, i = 0,
4140 	     dinfo = STAILQ_FIRST(devlist_head);
4141 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4142 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4143 
4144 		/* Populate pd_name and pd_unit */
4145 		name = NULL;
4146 		if (dinfo->cfg.dev)
4147 			name = device_get_name(dinfo->cfg.dev);
4148 
4149 		p = &dinfo->conf;
4150 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4151 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4152 			(name && *name) ? name : "none",
4153 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4154 			none_count++,
4155 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4156 			p->pc_sel.pc_func, (p->pc_class << 16) |
4157 			(p->pc_subclass << 8) | p->pc_progif,
4158 			(p->pc_subdevice << 16) | p->pc_subvendor,
4159 			(p->pc_device << 16) | p->pc_vendor,
4160 			p->pc_revid, p->pc_hdr);
4161 	}
4162 }
4163 #endif /* DDB */
4164 
4165 static struct resource *
4166 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4167     u_long start, u_long end, u_long count, u_int flags)
4168 {
4169 	struct pci_devinfo *dinfo = device_get_ivars(child);
4170 	struct resource_list *rl = &dinfo->resources;
4171 	struct resource_list_entry *rle;
4172 	struct resource *res;
4173 	struct pci_map *pm;
4174 	pci_addr_t map, testval;
4175 	int mapsize;
4176 
4177 	res = NULL;
4178 	pm = pci_find_bar(child, *rid);
4179 	if (pm != NULL) {
4180 		/* This is a BAR that we failed to allocate earlier. */
4181 		mapsize = pm->pm_size;
4182 		map = pm->pm_value;
4183 	} else {
4184 		/*
4185 		 * Weed out the bogons, and figure out how large the
4186 		 * BAR/map is.  BARs that read back 0 here are bogus
4187 		 * and unimplemented.  Note: atapci in legacy mode are
4188 		 * special and handled elsewhere in the code.  If you
4189 		 * have a atapci device in legacy mode and it fails
4190 		 * here, that other code is broken.
4191 		 */
4192 		pci_read_bar(child, *rid, &map, &testval);
4193 
4194 		/*
4195 		 * Determine the size of the BAR and ignore BARs with a size
4196 		 * of 0.  Device ROM BARs use a different mask value.
4197 		 */
4198 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
4199 			mapsize = pci_romsize(testval);
4200 		else
4201 			mapsize = pci_mapsize(testval);
4202 		if (mapsize == 0)
4203 			goto out;
4204 		pm = pci_add_bar(child, *rid, map, mapsize);
4205 	}
4206 
4207 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
4208 		if (type != SYS_RES_MEMORY) {
4209 			if (bootverbose)
4210 				device_printf(dev,
4211 				    "child %s requested type %d for rid %#x,"
4212 				    " but the BAR says it is an memio\n",
4213 				    device_get_nameunit(child), type, *rid);
4214 			goto out;
4215 		}
4216 	} else {
4217 		if (type != SYS_RES_IOPORT) {
4218 			if (bootverbose)
4219 				device_printf(dev,
4220 				    "child %s requested type %d for rid %#x,"
4221 				    " but the BAR says it is an ioport\n",
4222 				    device_get_nameunit(child), type, *rid);
4223 			goto out;
4224 		}
4225 	}
4226 
4227 	/*
4228 	 * For real BARs, we need to override the size that
4229 	 * the driver requests, because that's what the BAR
4230 	 * actually uses and we would otherwise have a
4231 	 * situation where we might allocate the excess to
4232 	 * another driver, which won't work.
4233 	 */
4234 	count = (pci_addr_t)1 << mapsize;
4235 	if (RF_ALIGNMENT(flags) < mapsize)
4236 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
4237 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
4238 		flags |= RF_PREFETCHABLE;
4239 
4240 	/*
4241 	 * Allocate enough resource, and then write back the
4242 	 * appropriate BAR for that resource.
4243 	 */
4244 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
4245 	    start, end, count, flags & ~RF_ACTIVE);
4246 	if (res == NULL) {
4247 		device_printf(child,
4248 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4249 		    count, *rid, type, start, end);
4250 		goto out;
4251 	}
4252 	resource_list_add(rl, type, *rid, start, end, count);
4253 	rle = resource_list_find(rl, type, *rid);
4254 	if (rle == NULL)
4255 		panic("pci_reserve_map: unexpectedly can't find resource.");
4256 	rle->res = res;
4257 	rle->start = rman_get_start(res);
4258 	rle->end = rman_get_end(res);
4259 	rle->count = count;
4260 	rle->flags = RLE_RESERVED;
4261 	if (bootverbose)
4262 		device_printf(child,
4263 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4264 		    count, *rid, type, rman_get_start(res));
4265 	map = rman_get_start(res);
4266 	pci_write_bar(child, pm, map);
4267 out:
4268 	return (res);
4269 }
4270 
4271 struct resource *
4272 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4273 		   u_long start, u_long end, u_long count, u_int flags)
4274 {
4275 	struct pci_devinfo *dinfo;
4276 	struct resource_list *rl;
4277 	struct resource_list_entry *rle;
4278 	struct resource *res;
4279 	pcicfgregs *cfg;
4280 
4281 	if (device_get_parent(child) != dev)
4282 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4283 		    type, rid, start, end, count, flags));
4284 
4285 	/*
4286 	 * Perform lazy resource allocation
4287 	 */
4288 	dinfo = device_get_ivars(child);
4289 	rl = &dinfo->resources;
4290 	cfg = &dinfo->cfg;
4291 	switch (type) {
4292 	case SYS_RES_IRQ:
4293 		/*
4294 		 * Can't alloc legacy interrupt once MSI messages have
4295 		 * been allocated.
4296 		 */
4297 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4298 		    cfg->msix.msix_alloc > 0))
4299 			return (NULL);
4300 
4301 		/*
4302 		 * If the child device doesn't have an interrupt
4303 		 * routed and is deserving of an interrupt, try to
4304 		 * assign it one.
4305 		 */
4306 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4307 		    (cfg->intpin != 0))
4308 			pci_assign_interrupt(dev, child, 0);
4309 		break;
4310 	case SYS_RES_IOPORT:
4311 	case SYS_RES_MEMORY:
4312 #ifdef NEW_PCIB
4313 		/*
4314 		 * PCI-PCI bridge I/O window resources are not BARs.
4315 		 * For those allocations just pass the request up the
4316 		 * tree.
4317 		 */
4318 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4319 			switch (*rid) {
4320 			case PCIR_IOBASEL_1:
4321 			case PCIR_MEMBASE_1:
4322 			case PCIR_PMBASEL_1:
4323 				/*
4324 				 * XXX: Should we bother creating a resource
4325 				 * list entry?
4326 				 */
4327 				return (bus_generic_alloc_resource(dev, child,
4328 				    type, rid, start, end, count, flags));
4329 			}
4330 		}
4331 #endif
4332 		/* Reserve resources for this BAR if needed. */
4333 		rle = resource_list_find(rl, type, *rid);
4334 		if (rle == NULL) {
4335 			res = pci_reserve_map(dev, child, type, rid, start, end,
4336 			    count, flags);
4337 			if (res == NULL)
4338 				return (NULL);
4339 		}
4340 	}
4341 	return (resource_list_alloc(rl, dev, child, type, rid,
4342 	    start, end, count, flags));
4343 }
4344 
4345 int
4346 pci_release_resource(device_t dev, device_t child, int type, int rid,
4347     struct resource *r)
4348 {
4349 	struct pci_devinfo *dinfo;
4350 	struct resource_list *rl;
4351 	pcicfgregs *cfg;
4352 
4353 	if (device_get_parent(child) != dev)
4354 		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
4355 		    type, rid, r));
4356 
4357 	dinfo = device_get_ivars(child);
4358 	cfg = &dinfo->cfg;
4359 #ifdef NEW_PCIB
4360 	/*
4361 	 * PCI-PCI bridge I/O window resources are not BARs.  For
4362 	 * those allocations just pass the request up the tree.
4363 	 */
4364 	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
4365 	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
4366 		switch (rid) {
4367 		case PCIR_IOBASEL_1:
4368 		case PCIR_MEMBASE_1:
4369 		case PCIR_PMBASEL_1:
4370 			return (bus_generic_release_resource(dev, child, type,
4371 			    rid, r));
4372 		}
4373 	}
4374 #endif
4375 
4376 	rl = &dinfo->resources;
4377 	return (resource_list_release(rl, dev, child, type, rid, r));
4378 }
4379 
4380 int
4381 pci_activate_resource(device_t dev, device_t child, int type, int rid,
4382     struct resource *r)
4383 {
4384 	struct pci_devinfo *dinfo;
4385 	int error;
4386 
4387 	error = bus_generic_activate_resource(dev, child, type, rid, r);
4388 	if (error)
4389 		return (error);
4390 
4391 	/* Enable decoding in the command register when activating BARs. */
4392 	if (device_get_parent(child) == dev) {
4393 		/* Device ROMs need their decoding explicitly enabled. */
4394 		dinfo = device_get_ivars(child);
4395 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4396 			pci_write_bar(child, pci_find_bar(child, rid),
4397 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4398 		switch (type) {
4399 		case SYS_RES_IOPORT:
4400 		case SYS_RES_MEMORY:
4401 			error = PCI_ENABLE_IO(dev, child, type);
4402 			break;
4403 		}
4404 	}
4405 	return (error);
4406 }
4407 
4408 int
4409 pci_deactivate_resource(device_t dev, device_t child, int type,
4410     int rid, struct resource *r)
4411 {
4412 	struct pci_devinfo *dinfo;
4413 	int error;
4414 
4415 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4416 	if (error)
4417 		return (error);
4418 
4419 	/* Disable decoding for device ROMs. */
4420 	if (device_get_parent(child) == dev) {
4421 		dinfo = device_get_ivars(child);
4422 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4423 			pci_write_bar(child, pci_find_bar(child, rid),
4424 			    rman_get_start(r));
4425 	}
4426 	return (0);
4427 }
4428 
4429 void
4430 pci_delete_child(device_t dev, device_t child)
4431 {
4432 	struct resource_list_entry *rle;
4433 	struct resource_list *rl;
4434 	struct pci_devinfo *dinfo;
4435 
4436 	dinfo = device_get_ivars(child);
4437 	rl = &dinfo->resources;
4438 
4439 	if (device_is_attached(child))
4440 		device_detach(child);
4441 
4442 	/* Turn off access to resources we're about to free */
4443 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4444 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4445 
4446 	/* Free all allocated resources */
4447 	STAILQ_FOREACH(rle, rl, link) {
4448 		if (rle->res) {
4449 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4450 			    resource_list_busy(rl, rle->type, rle->rid)) {
4451 				pci_printf(&dinfo->cfg,
4452 				    "Resource still owned, oops. "
4453 				    "(type=%d, rid=%d, addr=%lx)\n",
4454 				    rle->type, rle->rid,
4455 				    rman_get_start(rle->res));
4456 				bus_release_resource(child, rle->type, rle->rid,
4457 				    rle->res);
4458 			}
4459 			resource_list_unreserve(rl, dev, child, rle->type,
4460 			    rle->rid);
4461 		}
4462 	}
4463 	resource_list_free(rl);
4464 
4465 	device_delete_child(dev, child);
4466 	pci_freecfg(dinfo);
4467 }
4468 
4469 void
4470 pci_delete_resource(device_t dev, device_t child, int type, int rid)
4471 {
4472 	struct pci_devinfo *dinfo;
4473 	struct resource_list *rl;
4474 	struct resource_list_entry *rle;
4475 
4476 	if (device_get_parent(child) != dev)
4477 		return;
4478 
4479 	dinfo = device_get_ivars(child);
4480 	rl = &dinfo->resources;
4481 	rle = resource_list_find(rl, type, rid);
4482 	if (rle == NULL)
4483 		return;
4484 
4485 	if (rle->res) {
4486 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4487 		    resource_list_busy(rl, type, rid)) {
4488 			device_printf(dev, "delete_resource: "
4489 			    "Resource still owned by child, oops. "
4490 			    "(type=%d, rid=%d, addr=%lx)\n",
4491 			    type, rid, rman_get_start(rle->res));
4492 			return;
4493 		}
4494 		resource_list_unreserve(rl, dev, child, type, rid);
4495 	}
4496 	resource_list_delete(rl, type, rid);
4497 }
4498 
4499 struct resource_list *
4500 pci_get_resource_list (device_t dev, device_t child)
4501 {
4502 	struct pci_devinfo *dinfo = device_get_ivars(child);
4503 
4504 	return (&dinfo->resources);
4505 }
4506 
4507 bus_dma_tag_t
4508 pci_get_dma_tag(device_t bus, device_t dev)
4509 {
4510 	struct pci_softc *sc = device_get_softc(bus);
4511 
4512 	return (sc->sc_dma_tag);
4513 }
4514 
4515 uint32_t
4516 pci_read_config_method(device_t dev, device_t child, int reg, int width)
4517 {
4518 	struct pci_devinfo *dinfo = device_get_ivars(child);
4519 	pcicfgregs *cfg = &dinfo->cfg;
4520 
4521 	return (PCIB_READ_CONFIG(device_get_parent(dev),
4522 	    cfg->bus, cfg->slot, cfg->func, reg, width));
4523 }
4524 
4525 void
4526 pci_write_config_method(device_t dev, device_t child, int reg,
4527     uint32_t val, int width)
4528 {
4529 	struct pci_devinfo *dinfo = device_get_ivars(child);
4530 	pcicfgregs *cfg = &dinfo->cfg;
4531 
4532 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4533 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4534 }
4535 
4536 int
4537 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4538     size_t buflen)
4539 {
4540 
4541 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4542 	    pci_get_function(child));
4543 	return (0);
4544 }
4545 
4546 int
4547 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4548     size_t buflen)
4549 {
4550 	struct pci_devinfo *dinfo;
4551 	pcicfgregs *cfg;
4552 
4553 	dinfo = device_get_ivars(child);
4554 	cfg = &dinfo->cfg;
4555 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4556 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4557 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4558 	    cfg->progif);
4559 	return (0);
4560 }
4561 
4562 int
4563 pci_assign_interrupt_method(device_t dev, device_t child)
4564 {
4565 	struct pci_devinfo *dinfo = device_get_ivars(child);
4566 	pcicfgregs *cfg = &dinfo->cfg;
4567 
4568 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4569 	    cfg->intpin));
4570 }
4571 
4572 static int
4573 pci_modevent(module_t mod, int what, void *arg)
4574 {
4575 	static struct cdev *pci_cdev;
4576 
4577 	switch (what) {
4578 	case MOD_LOAD:
4579 		STAILQ_INIT(&pci_devq);
4580 		pci_generation = 0;
4581 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4582 		    "pci");
4583 		pci_load_vendor_data();
4584 		break;
4585 
4586 	case MOD_UNLOAD:
4587 		destroy_dev(pci_cdev);
4588 		break;
4589 	}
4590 
4591 	return (0);
4592 }
4593 
4594 static void
4595 pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
4596 {
4597 #define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
4598 	struct pcicfg_pcie *cfg;
4599 	int version, pos;
4600 
4601 	cfg = &dinfo->cfg.pcie;
4602 	pos = cfg->pcie_location;
4603 
4604 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4605 
4606 	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
4607 
4608 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4609 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4610 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4611 		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
4612 
4613 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4614 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
4615 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
4616 		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
4617 
4618 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4619 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
4620 		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
4621 
4622 	if (version > 1) {
4623 		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
4624 		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
4625 		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
4626 	}
4627 #undef WREG
4628 }
4629 
4630 static void
4631 pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
4632 {
4633 	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
4634 	    dinfo->cfg.pcix.pcix_command,  2);
4635 }
4636 
4637 void
4638 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4639 {
4640 
4641 	/*
4642 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4643 	 * which we know need special treatment.  Type 2 devices are
4644 	 * cardbus bridges which also require special treatment.
4645 	 * Other types are unknown, and we err on the side of safety
4646 	 * by ignoring them.
4647 	 */
4648 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4649 		return;
4650 
4651 	/*
4652 	 * Restore the device to full power mode.  We must do this
4653 	 * before we restore the registers because moving from D3 to
4654 	 * D0 will cause the chip's BARs and some other registers to
4655 	 * be reset to some unknown power on reset values.  Cut down
4656 	 * the noise on boot by doing nothing if we are already in
4657 	 * state D0.
4658 	 */
4659 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4660 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4661 	pci_restore_bars(dev);
4662 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4663 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4664 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4665 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4666 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4667 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4668 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4669 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4670 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4671 
4672 	/*
4673 	 * Restore extended capabilities for PCI-Express and PCI-X
4674 	 */
4675 	if (dinfo->cfg.pcie.pcie_location != 0)
4676 		pci_cfg_restore_pcie(dev, dinfo);
4677 	if (dinfo->cfg.pcix.pcix_location != 0)
4678 		pci_cfg_restore_pcix(dev, dinfo);
4679 
4680 	/* Restore MSI and MSI-X configurations if they are present. */
4681 	if (dinfo->cfg.msi.msi_location != 0)
4682 		pci_resume_msi(dev);
4683 	if (dinfo->cfg.msix.msix_location != 0)
4684 		pci_resume_msix(dev);
4685 }
4686 
4687 static void
4688 pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
4689 {
4690 #define	RREG(n)	pci_read_config(dev, pos + (n), 2)
4691 	struct pcicfg_pcie *cfg;
4692 	int version, pos;
4693 
4694 	cfg = &dinfo->cfg.pcie;
4695 	pos = cfg->pcie_location;
4696 
4697 	cfg->pcie_flags = RREG(PCIER_FLAGS);
4698 
4699 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4700 
4701 	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
4702 
4703 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4704 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4705 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4706 		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
4707 
4708 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4709 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
4710 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
4711 		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
4712 
4713 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4714 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
4715 		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
4716 
4717 	if (version > 1) {
4718 		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
4719 		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
4720 		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
4721 	}
4722 #undef RREG
4723 }
4724 
4725 static void
4726 pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
4727 {
4728 	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
4729 	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
4730 }
4731 
4732 void
4733 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4734 {
4735 	uint32_t cls;
4736 	int ps;
4737 
4738 	/*
4739 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4740 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4741 	 * which also require special treatment.  Other types are unknown, and
4742 	 * we err on the side of safety by ignoring them.  Powering down
4743 	 * bridges should not be undertaken lightly.
4744 	 */
4745 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4746 		return;
4747 
4748 	/*
4749 	 * Some drivers apparently write to these registers w/o updating our
4750 	 * cached copy.  No harm happens if we update the copy, so do so here
4751 	 * so we can restore them.  The COMMAND register is modified by the
4752 	 * bus w/o updating the cache.  This should represent the normally
4753 	 * writable portion of the 'defined' part of type 0 headers.  In
4754 	 * theory we also need to save/restore the PCI capability structures
4755 	 * we know about, but apart from power we don't know any that are
4756 	 * writable.
4757 	 */
4758 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4759 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4760 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4761 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4762 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4763 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4764 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4765 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4766 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4767 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4768 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4769 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4770 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4771 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4772 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4773 
4774 	if (dinfo->cfg.pcie.pcie_location != 0)
4775 		pci_cfg_save_pcie(dev, dinfo);
4776 
4777 	if (dinfo->cfg.pcix.pcix_location != 0)
4778 		pci_cfg_save_pcix(dev, dinfo);
4779 
4780 	/*
4781 	 * don't set the state for display devices, base peripherals and
4782 	 * memory devices since bad things happen when they are powered down.
4783 	 * We should (a) have drivers that can easily detach and (b) use
4784 	 * generic drivers for these devices so that some device actually
4785 	 * attaches.  We need to make sure that when we implement (a) we don't
4786 	 * power the device down on a reattach.
4787 	 */
4788 	cls = pci_get_class(dev);
4789 	if (!setstate)
4790 		return;
4791 	switch (pci_do_power_nodriver)
4792 	{
4793 		case 0:		/* NO powerdown at all */
4794 			return;
4795 		case 1:		/* Conservative about what to power down */
4796 			if (cls == PCIC_STORAGE)
4797 				return;
4798 			/*FALLTHROUGH*/
4799 		case 2:		/* Agressive about what to power down */
4800 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4801 			    cls == PCIC_BASEPERIPH)
4802 				return;
4803 			/*FALLTHROUGH*/
4804 		case 3:		/* Power down everything */
4805 			break;
4806 	}
4807 	/*
4808 	 * PCI spec says we can only go into D3 state from D0 state.
4809 	 * Transition from D[12] into D0 before going to D3 state.
4810 	 */
4811 	ps = pci_get_powerstate(dev);
4812 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4813 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4814 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4815 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4816 }
4817 
4818 /* Wrapper APIs suitable for device driver use. */
4819 void
4820 pci_save_state(device_t dev)
4821 {
4822 	struct pci_devinfo *dinfo;
4823 
4824 	dinfo = device_get_ivars(dev);
4825 	pci_cfg_save(dev, dinfo, 0);
4826 }
4827 
4828 void
4829 pci_restore_state(device_t dev)
4830 {
4831 	struct pci_devinfo *dinfo;
4832 
4833 	dinfo = device_get_ivars(dev);
4834 	pci_cfg_restore(dev, dinfo);
4835 }
4836