xref: /freebsd/sys/dev/pci/pci.c (revision ef0cb5db0af0d5d5b75b74f8e534fe601b7176d7)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/limits.h>
39 #include <sys/linker.h>
40 #include <sys/fcntl.h>
41 #include <sys/conf.h>
42 #include <sys/kernel.h>
43 #include <sys/queue.h>
44 #include <sys/sysctl.h>
45 #include <sys/endian.h>
46 
47 #include <vm/vm.h>
48 #include <vm/pmap.h>
49 #include <vm/vm_extern.h>
50 
51 #include <sys/bus.h>
52 #include <machine/bus.h>
53 #include <sys/rman.h>
54 #include <machine/resource.h>
55 #include <machine/stdarg.h>
56 
57 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
58 #include <machine/intr_machdep.h>
59 #endif
60 
61 #include <sys/pciio.h>
62 #include <dev/pci/pcireg.h>
63 #include <dev/pci/pcivar.h>
64 #include <dev/pci/pci_private.h>
65 
66 #include <dev/usb/controller/xhcireg.h>
67 #include <dev/usb/controller/ehcireg.h>
68 #include <dev/usb/controller/ohcireg.h>
69 #include <dev/usb/controller/uhcireg.h>
70 
71 #include "pcib_if.h"
72 #include "pci_if.h"
73 
74 #define	PCIR_IS_BIOS(cfg, reg)						\
75 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
76 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
77 
78 static int		pci_has_quirk(uint32_t devid, int quirk);
79 static pci_addr_t	pci_mapbase(uint64_t mapreg);
80 static const char	*pci_maptype(uint64_t mapreg);
81 static int		pci_maprange(uint64_t mapreg);
82 static pci_addr_t	pci_rombase(uint64_t mapreg);
83 static int		pci_romsize(uint64_t testval);
84 static void		pci_fixancient(pcicfgregs *cfg);
85 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
86 
87 static int		pci_porten(device_t dev);
88 static int		pci_memen(device_t dev);
89 static void		pci_assign_interrupt(device_t bus, device_t dev,
90 			    int force_route);
91 static int		pci_add_map(device_t bus, device_t dev, int reg,
92 			    struct resource_list *rl, int force, int prefetch);
93 static int		pci_probe(device_t dev);
94 static int		pci_attach(device_t dev);
95 #ifdef PCI_RES_BUS
96 static int		pci_detach(device_t dev);
97 #endif
98 static void		pci_load_vendor_data(void);
99 static int		pci_describe_parse_line(char **ptr, int *vendor,
100 			    int *device, char **desc);
101 static char		*pci_describe_device(device_t dev);
102 static int		pci_modevent(module_t mod, int what, void *arg);
103 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
104 			    pcicfgregs *cfg);
105 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
106 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
107 			    int reg, uint32_t *data);
108 #if 0
109 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
110 			    int reg, uint32_t data);
111 #endif
112 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
113 static void		pci_mask_msix(device_t dev, u_int index);
114 static void		pci_unmask_msix(device_t dev, u_int index);
115 static int		pci_msi_blacklisted(void);
116 static int		pci_msix_blacklisted(void);
117 static void		pci_resume_msi(device_t dev);
118 static void		pci_resume_msix(device_t dev);
119 static int		pci_remap_intr_method(device_t bus, device_t dev,
120 			    u_int irq);
121 
122 static uint16_t		pci_get_rid_method(device_t dev, device_t child);
123 
124 static struct pci_devinfo * pci_fill_devinfo(device_t pcib, int d, int b, int s,
125     int f, uint16_t vid, uint16_t did, size_t size);
126 
127 static device_method_t pci_methods[] = {
128 	/* Device interface */
129 	DEVMETHOD(device_probe,		pci_probe),
130 	DEVMETHOD(device_attach,	pci_attach),
131 #ifdef PCI_RES_BUS
132 	DEVMETHOD(device_detach,	pci_detach),
133 #else
134 	DEVMETHOD(device_detach,	bus_generic_detach),
135 #endif
136 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
137 	DEVMETHOD(device_suspend,	bus_generic_suspend),
138 	DEVMETHOD(device_resume,	pci_resume),
139 
140 	/* Bus interface */
141 	DEVMETHOD(bus_print_child,	pci_print_child),
142 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
143 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
144 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
145 	DEVMETHOD(bus_driver_added,	pci_driver_added),
146 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
147 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
148 
149 	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
150 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
151 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
152 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
153 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
154 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
155 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
156 	DEVMETHOD(bus_release_resource,	pci_release_resource),
157 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
158 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
159 	DEVMETHOD(bus_child_detached,	pci_child_detached),
160 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
161 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
162 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
163 	DEVMETHOD(bus_suspend_child,	pci_suspend_child),
164 	DEVMETHOD(bus_resume_child,	pci_resume_child),
165 
166 	/* PCI interface */
167 	DEVMETHOD(pci_read_config,	pci_read_config_method),
168 	DEVMETHOD(pci_write_config,	pci_write_config_method),
169 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
170 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
171 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
172 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
173 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
174 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
175 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
176 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
177 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
178 	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
179 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
180 	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
181 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
182 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
183 	DEVMETHOD(pci_enable_msi,	pci_enable_msi_method),
184 	DEVMETHOD(pci_enable_msix,	pci_enable_msix_method),
185 	DEVMETHOD(pci_disable_msi,	pci_disable_msi_method),
186 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
187 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
188 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
189 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
190 	DEVMETHOD(pci_get_rid,		pci_get_rid_method),
191 	DEVMETHOD(pci_child_added,	pci_child_added_method),
192 #ifdef PCI_IOV
193 	DEVMETHOD(pci_iov_attach,	pci_iov_attach_method),
194 	DEVMETHOD(pci_iov_detach,	pci_iov_detach_method),
195 	DEVMETHOD(pci_create_iov_child,	pci_create_iov_child_method),
196 #endif
197 
198 	DEVMETHOD_END
199 };
200 
201 DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
202 
203 static devclass_t pci_devclass;
204 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
205 MODULE_VERSION(pci, 1);
206 
207 static char	*pci_vendordata;
208 static size_t	pci_vendordata_size;
209 
210 struct pci_quirk {
211 	uint32_t devid;	/* Vendor/device of the card */
212 	int	type;
213 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
214 #define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
215 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
216 #define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
217 #define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
218 #define	PCI_QUIRK_MSI_INTX_BUG	6 /* PCIM_CMD_INTxDIS disables MSI */
219 	int	arg1;
220 	int	arg2;
221 };
222 
223 static const struct pci_quirk pci_quirks[] = {
224 	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
225 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
226 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
227 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
228 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
229 
230 	/*
231 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
232 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
233 	 */
234 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
235 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
236 
237 	/*
238 	 * MSI doesn't work on earlier Intel chipsets including
239 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
240 	 */
241 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
242 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
243 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
244 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
245 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
246 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
247 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
248 
249 	/*
250 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
251 	 * bridge.
252 	 */
253 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
254 
255 	/*
256 	 * MSI-X allocation doesn't work properly for devices passed through
257 	 * by VMware up to at least ESXi 5.1.
258 	 */
259 	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
260 	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
261 
262 	/*
263 	 * Some virtualization environments emulate an older chipset
264 	 * but support MSI just fine.  QEMU uses the Intel 82440.
265 	 */
266 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
267 
268 	/*
269 	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
270 	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
271 	 * It prevents us from attaching hpet(4) when the bit is unset.
272 	 * Note this quirk only affects SB600 revision A13 and earlier.
273 	 * For SB600 A21 and later, firmware must set the bit to hide it.
274 	 * For SB700 and later, it is unused and hardcoded to zero.
275 	 */
276 	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
277 
278 	/*
279 	 * Atheros AR8161/AR8162/E2200 Ethernet controllers have a bug that
280 	 * MSI interrupt does not assert if PCIM_CMD_INTxDIS bit of the
281 	 * command register is set.
282 	 */
283 	{ 0x10911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
284 	{ 0xE0911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
285 	{ 0x10901969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
286 
287 	/*
288 	 * Broadcom BCM5714(S)/BCM5715(S)/BCM5780(S) Ethernet MACs don't
289 	 * issue MSI interrupts with PCIM_CMD_INTxDIS set either.
290 	 */
291 	{ 0x166814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714 */
292 	{ 0x166914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714S */
293 	{ 0x166a14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780 */
294 	{ 0x166b14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780S */
295 	{ 0x167814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715 */
296 	{ 0x167914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715S */
297 
298 	{ 0 }
299 };
300 
301 /* map register information */
302 #define	PCI_MAPMEM	0x01	/* memory map */
303 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
304 #define	PCI_MAPPORT	0x04	/* port map */
305 
306 struct devlist pci_devq;
307 uint32_t pci_generation;
308 uint32_t pci_numdevs = 0;
309 static int pcie_chipset, pcix_chipset;
310 
311 /* sysctl vars */
312 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
313 
314 static int pci_enable_io_modes = 1;
315 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RWTUN,
316     &pci_enable_io_modes, 1,
317     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
318 enable these bits correctly.  We'd like to do this all the time, but there\n\
319 are some peripherals that this causes problems with.");
320 
321 static int pci_do_realloc_bars = 0;
322 SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RWTUN,
323     &pci_do_realloc_bars, 0,
324     "Attempt to allocate a new range for any BARs whose original "
325     "firmware-assigned ranges fail to allocate during the initial device scan.");
326 
327 static int pci_do_power_nodriver = 0;
328 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RWTUN,
329     &pci_do_power_nodriver, 0,
330   "Place a function into D3 state when no driver attaches to it.  0 means\n\
331 disable.  1 means conservatively place devices into D3 state.  2 means\n\
332 agressively place devices into D3 state.  3 means put absolutely everything\n\
333 in D3 state.");
334 
335 int pci_do_power_resume = 1;
336 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RWTUN,
337     &pci_do_power_resume, 1,
338   "Transition from D3 -> D0 on resume.");
339 
340 int pci_do_power_suspend = 1;
341 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RWTUN,
342     &pci_do_power_suspend, 1,
343   "Transition from D0 -> D3 on suspend.");
344 
345 static int pci_do_msi = 1;
346 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RWTUN, &pci_do_msi, 1,
347     "Enable support for MSI interrupts");
348 
349 static int pci_do_msix = 1;
350 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RWTUN, &pci_do_msix, 1,
351     "Enable support for MSI-X interrupts");
352 
353 static int pci_honor_msi_blacklist = 1;
354 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RDTUN,
355     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
356 
357 #if defined(__i386__) || defined(__amd64__)
358 static int pci_usb_takeover = 1;
359 #else
360 static int pci_usb_takeover = 0;
361 #endif
362 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
363     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
364 Disable this if you depend on BIOS emulation of USB devices, that is\n\
365 you use USB devices (like keyboard or mouse) but do not load USB drivers");
366 
367 static int pci_clear_bars;
368 SYSCTL_INT(_hw_pci, OID_AUTO, clear_bars, CTLFLAG_RDTUN, &pci_clear_bars, 0,
369     "Ignore firmware-assigned resources for BARs.");
370 
371 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
372 static int pci_clear_buses;
373 SYSCTL_INT(_hw_pci, OID_AUTO, clear_buses, CTLFLAG_RDTUN, &pci_clear_buses, 0,
374     "Ignore firmware-assigned bus numbers.");
375 #endif
376 
377 static int pci_enable_ari = 1;
378 SYSCTL_INT(_hw_pci, OID_AUTO, enable_ari, CTLFLAG_RDTUN, &pci_enable_ari,
379     0, "Enable support for PCIe Alternative RID Interpretation");
380 
381 static int
382 pci_has_quirk(uint32_t devid, int quirk)
383 {
384 	const struct pci_quirk *q;
385 
386 	for (q = &pci_quirks[0]; q->devid; q++) {
387 		if (q->devid == devid && q->type == quirk)
388 			return (1);
389 	}
390 	return (0);
391 }
392 
393 /* Find a device_t by bus/slot/function in domain 0 */
394 
395 device_t
396 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
397 {
398 
399 	return (pci_find_dbsf(0, bus, slot, func));
400 }
401 
402 /* Find a device_t by domain/bus/slot/function */
403 
404 device_t
405 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
406 {
407 	struct pci_devinfo *dinfo;
408 
409 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
410 		if ((dinfo->cfg.domain == domain) &&
411 		    (dinfo->cfg.bus == bus) &&
412 		    (dinfo->cfg.slot == slot) &&
413 		    (dinfo->cfg.func == func)) {
414 			return (dinfo->cfg.dev);
415 		}
416 	}
417 
418 	return (NULL);
419 }
420 
421 /* Find a device_t by vendor/device ID */
422 
423 device_t
424 pci_find_device(uint16_t vendor, uint16_t device)
425 {
426 	struct pci_devinfo *dinfo;
427 
428 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
429 		if ((dinfo->cfg.vendor == vendor) &&
430 		    (dinfo->cfg.device == device)) {
431 			return (dinfo->cfg.dev);
432 		}
433 	}
434 
435 	return (NULL);
436 }
437 
438 device_t
439 pci_find_class(uint8_t class, uint8_t subclass)
440 {
441 	struct pci_devinfo *dinfo;
442 
443 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
444 		if (dinfo->cfg.baseclass == class &&
445 		    dinfo->cfg.subclass == subclass) {
446 			return (dinfo->cfg.dev);
447 		}
448 	}
449 
450 	return (NULL);
451 }
452 
453 static int
454 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
455 {
456 	va_list ap;
457 	int retval;
458 
459 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
460 	    cfg->func);
461 	va_start(ap, fmt);
462 	retval += vprintf(fmt, ap);
463 	va_end(ap);
464 	return (retval);
465 }
466 
467 /* return base address of memory or port map */
468 
469 static pci_addr_t
470 pci_mapbase(uint64_t mapreg)
471 {
472 
473 	if (PCI_BAR_MEM(mapreg))
474 		return (mapreg & PCIM_BAR_MEM_BASE);
475 	else
476 		return (mapreg & PCIM_BAR_IO_BASE);
477 }
478 
479 /* return map type of memory or port map */
480 
481 static const char *
482 pci_maptype(uint64_t mapreg)
483 {
484 
485 	if (PCI_BAR_IO(mapreg))
486 		return ("I/O Port");
487 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
488 		return ("Prefetchable Memory");
489 	return ("Memory");
490 }
491 
492 /* return log2 of map size decoded for memory or port map */
493 
494 int
495 pci_mapsize(uint64_t testval)
496 {
497 	int ln2size;
498 
499 	testval = pci_mapbase(testval);
500 	ln2size = 0;
501 	if (testval != 0) {
502 		while ((testval & 1) == 0)
503 		{
504 			ln2size++;
505 			testval >>= 1;
506 		}
507 	}
508 	return (ln2size);
509 }
510 
511 /* return base address of device ROM */
512 
513 static pci_addr_t
514 pci_rombase(uint64_t mapreg)
515 {
516 
517 	return (mapreg & PCIM_BIOS_ADDR_MASK);
518 }
519 
520 /* return log2 of map size decided for device ROM */
521 
522 static int
523 pci_romsize(uint64_t testval)
524 {
525 	int ln2size;
526 
527 	testval = pci_rombase(testval);
528 	ln2size = 0;
529 	if (testval != 0) {
530 		while ((testval & 1) == 0)
531 		{
532 			ln2size++;
533 			testval >>= 1;
534 		}
535 	}
536 	return (ln2size);
537 }
538 
539 /* return log2 of address range supported by map register */
540 
541 static int
542 pci_maprange(uint64_t mapreg)
543 {
544 	int ln2range = 0;
545 
546 	if (PCI_BAR_IO(mapreg))
547 		ln2range = 32;
548 	else
549 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
550 		case PCIM_BAR_MEM_32:
551 			ln2range = 32;
552 			break;
553 		case PCIM_BAR_MEM_1MB:
554 			ln2range = 20;
555 			break;
556 		case PCIM_BAR_MEM_64:
557 			ln2range = 64;
558 			break;
559 		}
560 	return (ln2range);
561 }
562 
563 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
564 
565 static void
566 pci_fixancient(pcicfgregs *cfg)
567 {
568 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
569 		return;
570 
571 	/* PCI to PCI bridges use header type 1 */
572 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
573 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
574 }
575 
576 /* extract header type specific config data */
577 
578 static void
579 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
580 {
581 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
582 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
583 	case PCIM_HDRTYPE_NORMAL:
584 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
585 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
586 		cfg->mingnt         = REG(PCIR_MINGNT, 1);
587 		cfg->maxlat         = REG(PCIR_MAXLAT, 1);
588 		cfg->nummaps	    = PCI_MAXMAPS_0;
589 		break;
590 	case PCIM_HDRTYPE_BRIDGE:
591 		cfg->bridge.br_seclat = REG(PCIR_SECLAT_1, 1);
592 		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_1, 1);
593 		cfg->bridge.br_secbus = REG(PCIR_SECBUS_1, 1);
594 		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_1, 1);
595 		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_1, 2);
596 		cfg->nummaps	    = PCI_MAXMAPS_1;
597 		break;
598 	case PCIM_HDRTYPE_CARDBUS:
599 		cfg->bridge.br_seclat = REG(PCIR_SECLAT_2, 1);
600 		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_2, 1);
601 		cfg->bridge.br_secbus = REG(PCIR_SECBUS_2, 1);
602 		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_2, 1);
603 		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_2, 2);
604 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
605 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
606 		cfg->nummaps	    = PCI_MAXMAPS_2;
607 		break;
608 	}
609 #undef REG
610 }
611 
612 /* read configuration header into pcicfgregs structure */
613 struct pci_devinfo *
614 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
615 {
616 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
617 	uint16_t vid, did;
618 
619 	vid = REG(PCIR_VENDOR, 2);
620 	did = REG(PCIR_DEVICE, 2);
621 	if (vid != 0xffff)
622 		return (pci_fill_devinfo(pcib, d, b, s, f, vid, did, size));
623 
624 	return (NULL);
625 }
626 
627 static struct pci_devinfo *
628 pci_fill_devinfo(device_t pcib, int d, int b, int s, int f, uint16_t vid,
629     uint16_t did, size_t size)
630 {
631 	struct pci_devinfo *devlist_entry;
632 	pcicfgregs *cfg;
633 
634 	devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
635 
636 	cfg = &devlist_entry->cfg;
637 
638 	cfg->domain		= d;
639 	cfg->bus		= b;
640 	cfg->slot		= s;
641 	cfg->func		= f;
642 	cfg->vendor		= vid;
643 	cfg->device		= did;
644 	cfg->cmdreg		= REG(PCIR_COMMAND, 2);
645 	cfg->statreg		= REG(PCIR_STATUS, 2);
646 	cfg->baseclass		= REG(PCIR_CLASS, 1);
647 	cfg->subclass		= REG(PCIR_SUBCLASS, 1);
648 	cfg->progif		= REG(PCIR_PROGIF, 1);
649 	cfg->revid		= REG(PCIR_REVID, 1);
650 	cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
651 	cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
652 	cfg->lattimer		= REG(PCIR_LATTIMER, 1);
653 	cfg->intpin		= REG(PCIR_INTPIN, 1);
654 	cfg->intline		= REG(PCIR_INTLINE, 1);
655 
656 	cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
657 	cfg->hdrtype		&= ~PCIM_MFDEV;
658 	STAILQ_INIT(&cfg->maps);
659 
660 	cfg->devinfo_size	= size;
661 	cfg->iov		= NULL;
662 
663 	pci_fixancient(cfg);
664 	pci_hdrtypedata(pcib, b, s, f, cfg);
665 
666 	if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
667 		pci_read_cap(pcib, cfg);
668 
669 	STAILQ_INSERT_TAIL(&pci_devq, devlist_entry, pci_links);
670 
671 	devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
672 	devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
673 	devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
674 	devlist_entry->conf.pc_sel.pc_func = cfg->func;
675 	devlist_entry->conf.pc_hdr = cfg->hdrtype;
676 
677 	devlist_entry->conf.pc_subvendor = cfg->subvendor;
678 	devlist_entry->conf.pc_subdevice = cfg->subdevice;
679 	devlist_entry->conf.pc_vendor = cfg->vendor;
680 	devlist_entry->conf.pc_device = cfg->device;
681 
682 	devlist_entry->conf.pc_class = cfg->baseclass;
683 	devlist_entry->conf.pc_subclass = cfg->subclass;
684 	devlist_entry->conf.pc_progif = cfg->progif;
685 	devlist_entry->conf.pc_revid = cfg->revid;
686 
687 	pci_numdevs++;
688 	pci_generation++;
689 
690 	return (devlist_entry);
691 }
692 #undef REG
693 
694 static void
695 pci_read_cap(device_t pcib, pcicfgregs *cfg)
696 {
697 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
698 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
699 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
700 	uint64_t addr;
701 #endif
702 	uint32_t val;
703 	int	ptr, nextptr, ptrptr;
704 
705 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
706 	case PCIM_HDRTYPE_NORMAL:
707 	case PCIM_HDRTYPE_BRIDGE:
708 		ptrptr = PCIR_CAP_PTR;
709 		break;
710 	case PCIM_HDRTYPE_CARDBUS:
711 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
712 		break;
713 	default:
714 		return;		/* no extended capabilities support */
715 	}
716 	nextptr = REG(ptrptr, 1);	/* sanity check? */
717 
718 	/*
719 	 * Read capability entries.
720 	 */
721 	while (nextptr != 0) {
722 		/* Sanity check */
723 		if (nextptr > 255) {
724 			printf("illegal PCI extended capability offset %d\n",
725 			    nextptr);
726 			return;
727 		}
728 		/* Find the next entry */
729 		ptr = nextptr;
730 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
731 
732 		/* Process this entry */
733 		switch (REG(ptr + PCICAP_ID, 1)) {
734 		case PCIY_PMG:		/* PCI power management */
735 			if (cfg->pp.pp_cap == 0) {
736 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
737 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
738 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
739 				if ((nextptr - ptr) > PCIR_POWER_DATA)
740 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
741 			}
742 			break;
743 		case PCIY_HT:		/* HyperTransport */
744 			/* Determine HT-specific capability type. */
745 			val = REG(ptr + PCIR_HT_COMMAND, 2);
746 
747 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
748 				cfg->ht.ht_slave = ptr;
749 
750 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
751 			switch (val & PCIM_HTCMD_CAP_MASK) {
752 			case PCIM_HTCAP_MSI_MAPPING:
753 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
754 					/* Sanity check the mapping window. */
755 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
756 					    4);
757 					addr <<= 32;
758 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
759 					    4);
760 					if (addr != MSI_INTEL_ADDR_BASE)
761 						device_printf(pcib,
762 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
763 						    cfg->domain, cfg->bus,
764 						    cfg->slot, cfg->func,
765 						    (long long)addr);
766 				} else
767 					addr = MSI_INTEL_ADDR_BASE;
768 
769 				cfg->ht.ht_msimap = ptr;
770 				cfg->ht.ht_msictrl = val;
771 				cfg->ht.ht_msiaddr = addr;
772 				break;
773 			}
774 #endif
775 			break;
776 		case PCIY_MSI:		/* PCI MSI */
777 			cfg->msi.msi_location = ptr;
778 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
779 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
780 						     PCIM_MSICTRL_MMC_MASK)>>1);
781 			break;
782 		case PCIY_MSIX:		/* PCI MSI-X */
783 			cfg->msix.msix_location = ptr;
784 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
785 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
786 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
787 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
788 			cfg->msix.msix_table_bar = PCIR_BAR(val &
789 			    PCIM_MSIX_BIR_MASK);
790 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
791 			val = REG(ptr + PCIR_MSIX_PBA, 4);
792 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
793 			    PCIM_MSIX_BIR_MASK);
794 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
795 			break;
796 		case PCIY_VPD:		/* PCI Vital Product Data */
797 			cfg->vpd.vpd_reg = ptr;
798 			break;
799 		case PCIY_SUBVENDOR:
800 			/* Should always be true. */
801 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
802 			    PCIM_HDRTYPE_BRIDGE) {
803 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
804 				cfg->subvendor = val & 0xffff;
805 				cfg->subdevice = val >> 16;
806 			}
807 			break;
808 		case PCIY_PCIX:		/* PCI-X */
809 			/*
810 			 * Assume we have a PCI-X chipset if we have
811 			 * at least one PCI-PCI bridge with a PCI-X
812 			 * capability.  Note that some systems with
813 			 * PCI-express or HT chipsets might match on
814 			 * this check as well.
815 			 */
816 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
817 			    PCIM_HDRTYPE_BRIDGE)
818 				pcix_chipset = 1;
819 			cfg->pcix.pcix_location = ptr;
820 			break;
821 		case PCIY_EXPRESS:	/* PCI-express */
822 			/*
823 			 * Assume we have a PCI-express chipset if we have
824 			 * at least one PCI-express device.
825 			 */
826 			pcie_chipset = 1;
827 			cfg->pcie.pcie_location = ptr;
828 			val = REG(ptr + PCIER_FLAGS, 2);
829 			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
830 			break;
831 		default:
832 			break;
833 		}
834 	}
835 
836 #if defined(__powerpc__)
837 	/*
838 	 * Enable the MSI mapping window for all HyperTransport
839 	 * slaves.  PCI-PCI bridges have their windows enabled via
840 	 * PCIB_MAP_MSI().
841 	 */
842 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
843 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
844 		device_printf(pcib,
845 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
846 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
847 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
848 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
849 		     2);
850 	}
851 #endif
852 /* REG and WREG use carry through to next functions */
853 }
854 
855 /*
856  * PCI Vital Product Data
857  */
858 
859 #define	PCI_VPD_TIMEOUT		1000000
860 
861 static int
862 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
863 {
864 	int count = PCI_VPD_TIMEOUT;
865 
866 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
867 
868 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
869 
870 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
871 		if (--count < 0)
872 			return (ENXIO);
873 		DELAY(1);	/* limit looping */
874 	}
875 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
876 
877 	return (0);
878 }
879 
880 #if 0
881 static int
882 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
883 {
884 	int count = PCI_VPD_TIMEOUT;
885 
886 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
887 
888 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
889 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
890 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
891 		if (--count < 0)
892 			return (ENXIO);
893 		DELAY(1);	/* limit looping */
894 	}
895 
896 	return (0);
897 }
898 #endif
899 
900 #undef PCI_VPD_TIMEOUT
901 
902 struct vpd_readstate {
903 	device_t	pcib;
904 	pcicfgregs	*cfg;
905 	uint32_t	val;
906 	int		bytesinval;
907 	int		off;
908 	uint8_t		cksum;
909 };
910 
911 static int
912 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
913 {
914 	uint32_t reg;
915 	uint8_t byte;
916 
917 	if (vrs->bytesinval == 0) {
918 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
919 			return (ENXIO);
920 		vrs->val = le32toh(reg);
921 		vrs->off += 4;
922 		byte = vrs->val & 0xff;
923 		vrs->bytesinval = 3;
924 	} else {
925 		vrs->val = vrs->val >> 8;
926 		byte = vrs->val & 0xff;
927 		vrs->bytesinval--;
928 	}
929 
930 	vrs->cksum += byte;
931 	*data = byte;
932 	return (0);
933 }
934 
935 static void
936 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
937 {
938 	struct vpd_readstate vrs;
939 	int state;
940 	int name;
941 	int remain;
942 	int i;
943 	int alloc, off;		/* alloc/off for RO/W arrays */
944 	int cksumvalid;
945 	int dflen;
946 	uint8_t byte;
947 	uint8_t byte2;
948 
949 	/* init vpd reader */
950 	vrs.bytesinval = 0;
951 	vrs.off = 0;
952 	vrs.pcib = pcib;
953 	vrs.cfg = cfg;
954 	vrs.cksum = 0;
955 
956 	state = 0;
957 	name = remain = i = 0;	/* shut up stupid gcc */
958 	alloc = off = 0;	/* shut up stupid gcc */
959 	dflen = 0;		/* shut up stupid gcc */
960 	cksumvalid = -1;
961 	while (state >= 0) {
962 		if (vpd_nextbyte(&vrs, &byte)) {
963 			state = -2;
964 			break;
965 		}
966 #if 0
967 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
968 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
969 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
970 #endif
971 		switch (state) {
972 		case 0:		/* item name */
973 			if (byte & 0x80) {
974 				if (vpd_nextbyte(&vrs, &byte2)) {
975 					state = -2;
976 					break;
977 				}
978 				remain = byte2;
979 				if (vpd_nextbyte(&vrs, &byte2)) {
980 					state = -2;
981 					break;
982 				}
983 				remain |= byte2 << 8;
984 				if (remain > (0x7f*4 - vrs.off)) {
985 					state = -1;
986 					pci_printf(cfg,
987 					    "invalid VPD data, remain %#x\n",
988 					    remain);
989 				}
990 				name = byte & 0x7f;
991 			} else {
992 				remain = byte & 0x7;
993 				name = (byte >> 3) & 0xf;
994 			}
995 			switch (name) {
996 			case 0x2:	/* String */
997 				cfg->vpd.vpd_ident = malloc(remain + 1,
998 				    M_DEVBUF, M_WAITOK);
999 				i = 0;
1000 				state = 1;
1001 				break;
1002 			case 0xf:	/* End */
1003 				state = -1;
1004 				break;
1005 			case 0x10:	/* VPD-R */
1006 				alloc = 8;
1007 				off = 0;
1008 				cfg->vpd.vpd_ros = malloc(alloc *
1009 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1010 				    M_WAITOK | M_ZERO);
1011 				state = 2;
1012 				break;
1013 			case 0x11:	/* VPD-W */
1014 				alloc = 8;
1015 				off = 0;
1016 				cfg->vpd.vpd_w = malloc(alloc *
1017 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1018 				    M_WAITOK | M_ZERO);
1019 				state = 5;
1020 				break;
1021 			default:	/* Invalid data, abort */
1022 				state = -1;
1023 				break;
1024 			}
1025 			break;
1026 
1027 		case 1:	/* Identifier String */
1028 			cfg->vpd.vpd_ident[i++] = byte;
1029 			remain--;
1030 			if (remain == 0)  {
1031 				cfg->vpd.vpd_ident[i] = '\0';
1032 				state = 0;
1033 			}
1034 			break;
1035 
1036 		case 2:	/* VPD-R Keyword Header */
1037 			if (off == alloc) {
1038 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1039 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1040 				    M_DEVBUF, M_WAITOK | M_ZERO);
1041 			}
1042 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1043 			if (vpd_nextbyte(&vrs, &byte2)) {
1044 				state = -2;
1045 				break;
1046 			}
1047 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1048 			if (vpd_nextbyte(&vrs, &byte2)) {
1049 				state = -2;
1050 				break;
1051 			}
1052 			cfg->vpd.vpd_ros[off].len = dflen = byte2;
1053 			if (dflen == 0 &&
1054 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1055 			    2) == 0) {
1056 				/*
1057 				 * if this happens, we can't trust the rest
1058 				 * of the VPD.
1059 				 */
1060 				pci_printf(cfg, "bad keyword length: %d\n",
1061 				    dflen);
1062 				cksumvalid = 0;
1063 				state = -1;
1064 				break;
1065 			} else if (dflen == 0) {
1066 				cfg->vpd.vpd_ros[off].value = malloc(1 *
1067 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1068 				    M_DEVBUF, M_WAITOK);
1069 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1070 			} else
1071 				cfg->vpd.vpd_ros[off].value = malloc(
1072 				    (dflen + 1) *
1073 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1074 				    M_DEVBUF, M_WAITOK);
1075 			remain -= 3;
1076 			i = 0;
1077 			/* keep in sync w/ state 3's transistions */
1078 			if (dflen == 0 && remain == 0)
1079 				state = 0;
1080 			else if (dflen == 0)
1081 				state = 2;
1082 			else
1083 				state = 3;
1084 			break;
1085 
1086 		case 3:	/* VPD-R Keyword Value */
1087 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1088 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1089 			    "RV", 2) == 0 && cksumvalid == -1) {
1090 				if (vrs.cksum == 0)
1091 					cksumvalid = 1;
1092 				else {
1093 					if (bootverbose)
1094 						pci_printf(cfg,
1095 					    "bad VPD cksum, remain %hhu\n",
1096 						    vrs.cksum);
1097 					cksumvalid = 0;
1098 					state = -1;
1099 					break;
1100 				}
1101 			}
1102 			dflen--;
1103 			remain--;
1104 			/* keep in sync w/ state 2's transistions */
1105 			if (dflen == 0)
1106 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1107 			if (dflen == 0 && remain == 0) {
1108 				cfg->vpd.vpd_rocnt = off;
1109 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1110 				    off * sizeof(*cfg->vpd.vpd_ros),
1111 				    M_DEVBUF, M_WAITOK | M_ZERO);
1112 				state = 0;
1113 			} else if (dflen == 0)
1114 				state = 2;
1115 			break;
1116 
1117 		case 4:
1118 			remain--;
1119 			if (remain == 0)
1120 				state = 0;
1121 			break;
1122 
1123 		case 5:	/* VPD-W Keyword Header */
1124 			if (off == alloc) {
1125 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1126 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1127 				    M_DEVBUF, M_WAITOK | M_ZERO);
1128 			}
1129 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1130 			if (vpd_nextbyte(&vrs, &byte2)) {
1131 				state = -2;
1132 				break;
1133 			}
1134 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1135 			if (vpd_nextbyte(&vrs, &byte2)) {
1136 				state = -2;
1137 				break;
1138 			}
1139 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1140 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1141 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1142 			    sizeof(*cfg->vpd.vpd_w[off].value),
1143 			    M_DEVBUF, M_WAITOK);
1144 			remain -= 3;
1145 			i = 0;
1146 			/* keep in sync w/ state 6's transistions */
1147 			if (dflen == 0 && remain == 0)
1148 				state = 0;
1149 			else if (dflen == 0)
1150 				state = 5;
1151 			else
1152 				state = 6;
1153 			break;
1154 
1155 		case 6:	/* VPD-W Keyword Value */
1156 			cfg->vpd.vpd_w[off].value[i++] = byte;
1157 			dflen--;
1158 			remain--;
1159 			/* keep in sync w/ state 5's transistions */
1160 			if (dflen == 0)
1161 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1162 			if (dflen == 0 && remain == 0) {
1163 				cfg->vpd.vpd_wcnt = off;
1164 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1165 				    off * sizeof(*cfg->vpd.vpd_w),
1166 				    M_DEVBUF, M_WAITOK | M_ZERO);
1167 				state = 0;
1168 			} else if (dflen == 0)
1169 				state = 5;
1170 			break;
1171 
1172 		default:
1173 			pci_printf(cfg, "invalid state: %d\n", state);
1174 			state = -1;
1175 			break;
1176 		}
1177 	}
1178 
1179 	if (cksumvalid == 0 || state < -1) {
1180 		/* read-only data bad, clean up */
1181 		if (cfg->vpd.vpd_ros != NULL) {
1182 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1183 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1184 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1185 			cfg->vpd.vpd_ros = NULL;
1186 		}
1187 	}
1188 	if (state < -1) {
1189 		/* I/O error, clean up */
1190 		pci_printf(cfg, "failed to read VPD data.\n");
1191 		if (cfg->vpd.vpd_ident != NULL) {
1192 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1193 			cfg->vpd.vpd_ident = NULL;
1194 		}
1195 		if (cfg->vpd.vpd_w != NULL) {
1196 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1197 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1198 			free(cfg->vpd.vpd_w, M_DEVBUF);
1199 			cfg->vpd.vpd_w = NULL;
1200 		}
1201 	}
1202 	cfg->vpd.vpd_cached = 1;
1203 #undef REG
1204 #undef WREG
1205 }
1206 
1207 int
1208 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1209 {
1210 	struct pci_devinfo *dinfo = device_get_ivars(child);
1211 	pcicfgregs *cfg = &dinfo->cfg;
1212 
1213 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1214 		pci_read_vpd(device_get_parent(dev), cfg);
1215 
1216 	*identptr = cfg->vpd.vpd_ident;
1217 
1218 	if (*identptr == NULL)
1219 		return (ENXIO);
1220 
1221 	return (0);
1222 }
1223 
1224 int
1225 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1226 	const char **vptr)
1227 {
1228 	struct pci_devinfo *dinfo = device_get_ivars(child);
1229 	pcicfgregs *cfg = &dinfo->cfg;
1230 	int i;
1231 
1232 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1233 		pci_read_vpd(device_get_parent(dev), cfg);
1234 
1235 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1236 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1237 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1238 			*vptr = cfg->vpd.vpd_ros[i].value;
1239 			return (0);
1240 		}
1241 
1242 	*vptr = NULL;
1243 	return (ENXIO);
1244 }
1245 
1246 struct pcicfg_vpd *
1247 pci_fetch_vpd_list(device_t dev)
1248 {
1249 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1250 	pcicfgregs *cfg = &dinfo->cfg;
1251 
1252 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1253 		pci_read_vpd(device_get_parent(device_get_parent(dev)), cfg);
1254 	return (&cfg->vpd);
1255 }
1256 
1257 /*
1258  * Find the requested HyperTransport capability and return the offset
1259  * in configuration space via the pointer provided.  The function
1260  * returns 0 on success and an error code otherwise.
1261  */
1262 int
1263 pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1264 {
1265 	int ptr, error;
1266 	uint16_t val;
1267 
1268 	error = pci_find_cap(child, PCIY_HT, &ptr);
1269 	if (error)
1270 		return (error);
1271 
1272 	/*
1273 	 * Traverse the capabilities list checking each HT capability
1274 	 * to see if it matches the requested HT capability.
1275 	 */
1276 	while (ptr != 0) {
1277 		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1278 		if (capability == PCIM_HTCAP_SLAVE ||
1279 		    capability == PCIM_HTCAP_HOST)
1280 			val &= 0xe000;
1281 		else
1282 			val &= PCIM_HTCMD_CAP_MASK;
1283 		if (val == capability) {
1284 			if (capreg != NULL)
1285 				*capreg = ptr;
1286 			return (0);
1287 		}
1288 
1289 		/* Skip to the next HT capability. */
1290 		while (ptr != 0) {
1291 			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1292 			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1293 			    PCIY_HT)
1294 				break;
1295 		}
1296 	}
1297 	return (ENOENT);
1298 }
1299 
1300 /*
1301  * Find the requested capability and return the offset in
1302  * configuration space via the pointer provided.  The function returns
1303  * 0 on success and an error code otherwise.
1304  */
1305 int
1306 pci_find_cap_method(device_t dev, device_t child, int capability,
1307     int *capreg)
1308 {
1309 	struct pci_devinfo *dinfo = device_get_ivars(child);
1310 	pcicfgregs *cfg = &dinfo->cfg;
1311 	u_int32_t status;
1312 	u_int8_t ptr;
1313 
1314 	/*
1315 	 * Check the CAP_LIST bit of the PCI status register first.
1316 	 */
1317 	status = pci_read_config(child, PCIR_STATUS, 2);
1318 	if (!(status & PCIM_STATUS_CAPPRESENT))
1319 		return (ENXIO);
1320 
1321 	/*
1322 	 * Determine the start pointer of the capabilities list.
1323 	 */
1324 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1325 	case PCIM_HDRTYPE_NORMAL:
1326 	case PCIM_HDRTYPE_BRIDGE:
1327 		ptr = PCIR_CAP_PTR;
1328 		break;
1329 	case PCIM_HDRTYPE_CARDBUS:
1330 		ptr = PCIR_CAP_PTR_2;
1331 		break;
1332 	default:
1333 		/* XXX: panic? */
1334 		return (ENXIO);		/* no extended capabilities support */
1335 	}
1336 	ptr = pci_read_config(child, ptr, 1);
1337 
1338 	/*
1339 	 * Traverse the capabilities list.
1340 	 */
1341 	while (ptr != 0) {
1342 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1343 			if (capreg != NULL)
1344 				*capreg = ptr;
1345 			return (0);
1346 		}
1347 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1348 	}
1349 
1350 	return (ENOENT);
1351 }
1352 
1353 /*
1354  * Find the requested extended capability and return the offset in
1355  * configuration space via the pointer provided.  The function returns
1356  * 0 on success and an error code otherwise.
1357  */
1358 int
1359 pci_find_extcap_method(device_t dev, device_t child, int capability,
1360     int *capreg)
1361 {
1362 	struct pci_devinfo *dinfo = device_get_ivars(child);
1363 	pcicfgregs *cfg = &dinfo->cfg;
1364 	uint32_t ecap;
1365 	uint16_t ptr;
1366 
1367 	/* Only supported for PCI-express devices. */
1368 	if (cfg->pcie.pcie_location == 0)
1369 		return (ENXIO);
1370 
1371 	ptr = PCIR_EXTCAP;
1372 	ecap = pci_read_config(child, ptr, 4);
1373 	if (ecap == 0xffffffff || ecap == 0)
1374 		return (ENOENT);
1375 	for (;;) {
1376 		if (PCI_EXTCAP_ID(ecap) == capability) {
1377 			if (capreg != NULL)
1378 				*capreg = ptr;
1379 			return (0);
1380 		}
1381 		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1382 		if (ptr == 0)
1383 			break;
1384 		ecap = pci_read_config(child, ptr, 4);
1385 	}
1386 
1387 	return (ENOENT);
1388 }
1389 
1390 /*
1391  * Support for MSI-X message interrupts.
1392  */
1393 void
1394 pci_enable_msix_method(device_t dev, device_t child, u_int index,
1395     uint64_t address, uint32_t data)
1396 {
1397 	struct pci_devinfo *dinfo = device_get_ivars(child);
1398 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1399 	uint32_t offset;
1400 
1401 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1402 	offset = msix->msix_table_offset + index * 16;
1403 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1404 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1405 	bus_write_4(msix->msix_table_res, offset + 8, data);
1406 
1407 	/* Enable MSI -> HT mapping. */
1408 	pci_ht_map_msi(child, address);
1409 }
1410 
1411 void
1412 pci_mask_msix(device_t dev, u_int index)
1413 {
1414 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1415 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1416 	uint32_t offset, val;
1417 
1418 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1419 	offset = msix->msix_table_offset + index * 16 + 12;
1420 	val = bus_read_4(msix->msix_table_res, offset);
1421 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1422 		val |= PCIM_MSIX_VCTRL_MASK;
1423 		bus_write_4(msix->msix_table_res, offset, val);
1424 	}
1425 }
1426 
1427 void
1428 pci_unmask_msix(device_t dev, u_int index)
1429 {
1430 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1431 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1432 	uint32_t offset, val;
1433 
1434 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1435 	offset = msix->msix_table_offset + index * 16 + 12;
1436 	val = bus_read_4(msix->msix_table_res, offset);
1437 	if (val & PCIM_MSIX_VCTRL_MASK) {
1438 		val &= ~PCIM_MSIX_VCTRL_MASK;
1439 		bus_write_4(msix->msix_table_res, offset, val);
1440 	}
1441 }
1442 
1443 int
1444 pci_pending_msix(device_t dev, u_int index)
1445 {
1446 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1447 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1448 	uint32_t offset, bit;
1449 
1450 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1451 	offset = msix->msix_pba_offset + (index / 32) * 4;
1452 	bit = 1 << index % 32;
1453 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1454 }
1455 
1456 /*
1457  * Restore MSI-X registers and table during resume.  If MSI-X is
1458  * enabled then walk the virtual table to restore the actual MSI-X
1459  * table.
1460  */
1461 static void
1462 pci_resume_msix(device_t dev)
1463 {
1464 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1465 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1466 	struct msix_table_entry *mte;
1467 	struct msix_vector *mv;
1468 	int i;
1469 
1470 	if (msix->msix_alloc > 0) {
1471 		/* First, mask all vectors. */
1472 		for (i = 0; i < msix->msix_msgnum; i++)
1473 			pci_mask_msix(dev, i);
1474 
1475 		/* Second, program any messages with at least one handler. */
1476 		for (i = 0; i < msix->msix_table_len; i++) {
1477 			mte = &msix->msix_table[i];
1478 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1479 				continue;
1480 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1481 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1482 			pci_unmask_msix(dev, i);
1483 		}
1484 	}
1485 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1486 	    msix->msix_ctrl, 2);
1487 }
1488 
1489 /*
1490  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1491  * returned in *count.  After this function returns, each message will be
1492  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1493  */
1494 int
1495 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1496 {
1497 	struct pci_devinfo *dinfo = device_get_ivars(child);
1498 	pcicfgregs *cfg = &dinfo->cfg;
1499 	struct resource_list_entry *rle;
1500 	int actual, error, i, irq, max;
1501 
1502 	/* Don't let count == 0 get us into trouble. */
1503 	if (*count == 0)
1504 		return (EINVAL);
1505 
1506 	/* If rid 0 is allocated, then fail. */
1507 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1508 	if (rle != NULL && rle->res != NULL)
1509 		return (ENXIO);
1510 
1511 	/* Already have allocated messages? */
1512 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1513 		return (ENXIO);
1514 
1515 	/* If MSI-X is blacklisted for this system, fail. */
1516 	if (pci_msix_blacklisted())
1517 		return (ENXIO);
1518 
1519 	/* MSI-X capability present? */
1520 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1521 		return (ENODEV);
1522 
1523 	/* Make sure the appropriate BARs are mapped. */
1524 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1525 	    cfg->msix.msix_table_bar);
1526 	if (rle == NULL || rle->res == NULL ||
1527 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1528 		return (ENXIO);
1529 	cfg->msix.msix_table_res = rle->res;
1530 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1531 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1532 		    cfg->msix.msix_pba_bar);
1533 		if (rle == NULL || rle->res == NULL ||
1534 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1535 			return (ENXIO);
1536 	}
1537 	cfg->msix.msix_pba_res = rle->res;
1538 
1539 	if (bootverbose)
1540 		device_printf(child,
1541 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1542 		    *count, cfg->msix.msix_msgnum);
1543 	max = min(*count, cfg->msix.msix_msgnum);
1544 	for (i = 0; i < max; i++) {
1545 		/* Allocate a message. */
1546 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1547 		if (error) {
1548 			if (i == 0)
1549 				return (error);
1550 			break;
1551 		}
1552 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1553 		    irq, 1);
1554 	}
1555 	actual = i;
1556 
1557 	if (bootverbose) {
1558 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1559 		if (actual == 1)
1560 			device_printf(child, "using IRQ %lu for MSI-X\n",
1561 			    rle->start);
1562 		else {
1563 			int run;
1564 
1565 			/*
1566 			 * Be fancy and try to print contiguous runs of
1567 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1568 			 * 'run' is true if we are in a range.
1569 			 */
1570 			device_printf(child, "using IRQs %lu", rle->start);
1571 			irq = rle->start;
1572 			run = 0;
1573 			for (i = 1; i < actual; i++) {
1574 				rle = resource_list_find(&dinfo->resources,
1575 				    SYS_RES_IRQ, i + 1);
1576 
1577 				/* Still in a run? */
1578 				if (rle->start == irq + 1) {
1579 					run = 1;
1580 					irq++;
1581 					continue;
1582 				}
1583 
1584 				/* Finish previous range. */
1585 				if (run) {
1586 					printf("-%d", irq);
1587 					run = 0;
1588 				}
1589 
1590 				/* Start new range. */
1591 				printf(",%lu", rle->start);
1592 				irq = rle->start;
1593 			}
1594 
1595 			/* Unfinished range? */
1596 			if (run)
1597 				printf("-%d", irq);
1598 			printf(" for MSI-X\n");
1599 		}
1600 	}
1601 
1602 	/* Mask all vectors. */
1603 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1604 		pci_mask_msix(child, i);
1605 
1606 	/* Allocate and initialize vector data and virtual table. */
1607 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1608 	    M_DEVBUF, M_WAITOK | M_ZERO);
1609 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1610 	    M_DEVBUF, M_WAITOK | M_ZERO);
1611 	for (i = 0; i < actual; i++) {
1612 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1613 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1614 		cfg->msix.msix_table[i].mte_vector = i + 1;
1615 	}
1616 
1617 	/* Update control register to enable MSI-X. */
1618 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1619 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1620 	    cfg->msix.msix_ctrl, 2);
1621 
1622 	/* Update counts of alloc'd messages. */
1623 	cfg->msix.msix_alloc = actual;
1624 	cfg->msix.msix_table_len = actual;
1625 	*count = actual;
1626 	return (0);
1627 }
1628 
1629 /*
1630  * By default, pci_alloc_msix() will assign the allocated IRQ
1631  * resources consecutively to the first N messages in the MSI-X table.
1632  * However, device drivers may want to use different layouts if they
1633  * either receive fewer messages than they asked for, or they wish to
1634  * populate the MSI-X table sparsely.  This method allows the driver
1635  * to specify what layout it wants.  It must be called after a
1636  * successful pci_alloc_msix() but before any of the associated
1637  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1638  *
1639  * The 'vectors' array contains 'count' message vectors.  The array
1640  * maps directly to the MSI-X table in that index 0 in the array
1641  * specifies the vector for the first message in the MSI-X table, etc.
1642  * The vector value in each array index can either be 0 to indicate
1643  * that no vector should be assigned to a message slot, or it can be a
1644  * number from 1 to N (where N is the count returned from a
1645  * succcessful call to pci_alloc_msix()) to indicate which message
1646  * vector (IRQ) to be used for the corresponding message.
1647  *
1648  * On successful return, each message with a non-zero vector will have
1649  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1650  * 1.  Additionally, if any of the IRQs allocated via the previous
1651  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1652  * will be freed back to the system automatically.
1653  *
1654  * For example, suppose a driver has a MSI-X table with 6 messages and
1655  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1656  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1657  * C.  After the call to pci_alloc_msix(), the device will be setup to
1658  * have an MSI-X table of ABC--- (where - means no vector assigned).
1659  * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1660  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1661  * be freed back to the system.  This device will also have valid
1662  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1663  *
1664  * In any case, the SYS_RES_IRQ rid X will always map to the message
1665  * at MSI-X table index X - 1 and will only be valid if a vector is
1666  * assigned to that table entry.
1667  */
1668 int
1669 pci_remap_msix_method(device_t dev, device_t child, int count,
1670     const u_int *vectors)
1671 {
1672 	struct pci_devinfo *dinfo = device_get_ivars(child);
1673 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1674 	struct resource_list_entry *rle;
1675 	int i, irq, j, *used;
1676 
1677 	/*
1678 	 * Have to have at least one message in the table but the
1679 	 * table can't be bigger than the actual MSI-X table in the
1680 	 * device.
1681 	 */
1682 	if (count == 0 || count > msix->msix_msgnum)
1683 		return (EINVAL);
1684 
1685 	/* Sanity check the vectors. */
1686 	for (i = 0; i < count; i++)
1687 		if (vectors[i] > msix->msix_alloc)
1688 			return (EINVAL);
1689 
1690 	/*
1691 	 * Make sure there aren't any holes in the vectors to be used.
1692 	 * It's a big pain to support it, and it doesn't really make
1693 	 * sense anyway.  Also, at least one vector must be used.
1694 	 */
1695 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1696 	    M_ZERO);
1697 	for (i = 0; i < count; i++)
1698 		if (vectors[i] != 0)
1699 			used[vectors[i] - 1] = 1;
1700 	for (i = 0; i < msix->msix_alloc - 1; i++)
1701 		if (used[i] == 0 && used[i + 1] == 1) {
1702 			free(used, M_DEVBUF);
1703 			return (EINVAL);
1704 		}
1705 	if (used[0] != 1) {
1706 		free(used, M_DEVBUF);
1707 		return (EINVAL);
1708 	}
1709 
1710 	/* Make sure none of the resources are allocated. */
1711 	for (i = 0; i < msix->msix_table_len; i++) {
1712 		if (msix->msix_table[i].mte_vector == 0)
1713 			continue;
1714 		if (msix->msix_table[i].mte_handlers > 0) {
1715 			free(used, M_DEVBUF);
1716 			return (EBUSY);
1717 		}
1718 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1719 		KASSERT(rle != NULL, ("missing resource"));
1720 		if (rle->res != NULL) {
1721 			free(used, M_DEVBUF);
1722 			return (EBUSY);
1723 		}
1724 	}
1725 
1726 	/* Free the existing resource list entries. */
1727 	for (i = 0; i < msix->msix_table_len; i++) {
1728 		if (msix->msix_table[i].mte_vector == 0)
1729 			continue;
1730 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1731 	}
1732 
1733 	/*
1734 	 * Build the new virtual table keeping track of which vectors are
1735 	 * used.
1736 	 */
1737 	free(msix->msix_table, M_DEVBUF);
1738 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1739 	    M_DEVBUF, M_WAITOK | M_ZERO);
1740 	for (i = 0; i < count; i++)
1741 		msix->msix_table[i].mte_vector = vectors[i];
1742 	msix->msix_table_len = count;
1743 
1744 	/* Free any unused IRQs and resize the vectors array if necessary. */
1745 	j = msix->msix_alloc - 1;
1746 	if (used[j] == 0) {
1747 		struct msix_vector *vec;
1748 
1749 		while (used[j] == 0) {
1750 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1751 			    msix->msix_vectors[j].mv_irq);
1752 			j--;
1753 		}
1754 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1755 		    M_WAITOK);
1756 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1757 		    (j + 1));
1758 		free(msix->msix_vectors, M_DEVBUF);
1759 		msix->msix_vectors = vec;
1760 		msix->msix_alloc = j + 1;
1761 	}
1762 	free(used, M_DEVBUF);
1763 
1764 	/* Map the IRQs onto the rids. */
1765 	for (i = 0; i < count; i++) {
1766 		if (vectors[i] == 0)
1767 			continue;
1768 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1769 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1770 		    irq, 1);
1771 	}
1772 
1773 	if (bootverbose) {
1774 		device_printf(child, "Remapped MSI-X IRQs as: ");
1775 		for (i = 0; i < count; i++) {
1776 			if (i != 0)
1777 				printf(", ");
1778 			if (vectors[i] == 0)
1779 				printf("---");
1780 			else
1781 				printf("%d",
1782 				    msix->msix_vectors[vectors[i]].mv_irq);
1783 		}
1784 		printf("\n");
1785 	}
1786 
1787 	return (0);
1788 }
1789 
1790 static int
1791 pci_release_msix(device_t dev, device_t child)
1792 {
1793 	struct pci_devinfo *dinfo = device_get_ivars(child);
1794 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1795 	struct resource_list_entry *rle;
1796 	int i;
1797 
1798 	/* Do we have any messages to release? */
1799 	if (msix->msix_alloc == 0)
1800 		return (ENODEV);
1801 
1802 	/* Make sure none of the resources are allocated. */
1803 	for (i = 0; i < msix->msix_table_len; i++) {
1804 		if (msix->msix_table[i].mte_vector == 0)
1805 			continue;
1806 		if (msix->msix_table[i].mte_handlers > 0)
1807 			return (EBUSY);
1808 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1809 		KASSERT(rle != NULL, ("missing resource"));
1810 		if (rle->res != NULL)
1811 			return (EBUSY);
1812 	}
1813 
1814 	/* Update control register to disable MSI-X. */
1815 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1816 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1817 	    msix->msix_ctrl, 2);
1818 
1819 	/* Free the resource list entries. */
1820 	for (i = 0; i < msix->msix_table_len; i++) {
1821 		if (msix->msix_table[i].mte_vector == 0)
1822 			continue;
1823 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1824 	}
1825 	free(msix->msix_table, M_DEVBUF);
1826 	msix->msix_table_len = 0;
1827 
1828 	/* Release the IRQs. */
1829 	for (i = 0; i < msix->msix_alloc; i++)
1830 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1831 		    msix->msix_vectors[i].mv_irq);
1832 	free(msix->msix_vectors, M_DEVBUF);
1833 	msix->msix_alloc = 0;
1834 	return (0);
1835 }
1836 
1837 /*
1838  * Return the max supported MSI-X messages this device supports.
1839  * Basically, assuming the MD code can alloc messages, this function
1840  * should return the maximum value that pci_alloc_msix() can return.
1841  * Thus, it is subject to the tunables, etc.
1842  */
1843 int
1844 pci_msix_count_method(device_t dev, device_t child)
1845 {
1846 	struct pci_devinfo *dinfo = device_get_ivars(child);
1847 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1848 
1849 	if (pci_do_msix && msix->msix_location != 0)
1850 		return (msix->msix_msgnum);
1851 	return (0);
1852 }
1853 
1854 /*
1855  * HyperTransport MSI mapping control
1856  */
1857 void
1858 pci_ht_map_msi(device_t dev, uint64_t addr)
1859 {
1860 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1861 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1862 
1863 	if (!ht->ht_msimap)
1864 		return;
1865 
1866 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1867 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1868 		/* Enable MSI -> HT mapping. */
1869 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1870 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1871 		    ht->ht_msictrl, 2);
1872 	}
1873 
1874 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1875 		/* Disable MSI -> HT mapping. */
1876 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1877 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1878 		    ht->ht_msictrl, 2);
1879 	}
1880 }
1881 
1882 int
1883 pci_get_max_read_req(device_t dev)
1884 {
1885 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1886 	int cap;
1887 	uint16_t val;
1888 
1889 	cap = dinfo->cfg.pcie.pcie_location;
1890 	if (cap == 0)
1891 		return (0);
1892 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1893 	val &= PCIEM_CTL_MAX_READ_REQUEST;
1894 	val >>= 12;
1895 	return (1 << (val + 7));
1896 }
1897 
1898 int
1899 pci_set_max_read_req(device_t dev, int size)
1900 {
1901 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1902 	int cap;
1903 	uint16_t val;
1904 
1905 	cap = dinfo->cfg.pcie.pcie_location;
1906 	if (cap == 0)
1907 		return (0);
1908 	if (size < 128)
1909 		size = 128;
1910 	if (size > 4096)
1911 		size = 4096;
1912 	size = (1 << (fls(size) - 1));
1913 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1914 	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
1915 	val |= (fls(size) - 8) << 12;
1916 	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
1917 	return (size);
1918 }
1919 
1920 /*
1921  * Support for MSI message signalled interrupts.
1922  */
1923 void
1924 pci_enable_msi_method(device_t dev, device_t child, uint64_t address,
1925     uint16_t data)
1926 {
1927 	struct pci_devinfo *dinfo = device_get_ivars(child);
1928 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1929 
1930 	/* Write data and address values. */
1931 	pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR,
1932 	    address & 0xffffffff, 4);
1933 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1934 		pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1935 		    address >> 32, 4);
1936 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA_64BIT,
1937 		    data, 2);
1938 	} else
1939 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA, data,
1940 		    2);
1941 
1942 	/* Enable MSI in the control register. */
1943 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1944 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1945 	    msi->msi_ctrl, 2);
1946 
1947 	/* Enable MSI -> HT mapping. */
1948 	pci_ht_map_msi(child, address);
1949 }
1950 
1951 void
1952 pci_disable_msi_method(device_t dev, device_t child)
1953 {
1954 	struct pci_devinfo *dinfo = device_get_ivars(child);
1955 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1956 
1957 	/* Disable MSI -> HT mapping. */
1958 	pci_ht_map_msi(child, 0);
1959 
1960 	/* Disable MSI in the control register. */
1961 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1962 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1963 	    msi->msi_ctrl, 2);
1964 }
1965 
1966 /*
1967  * Restore MSI registers during resume.  If MSI is enabled then
1968  * restore the data and address registers in addition to the control
1969  * register.
1970  */
1971 static void
1972 pci_resume_msi(device_t dev)
1973 {
1974 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1975 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1976 	uint64_t address;
1977 	uint16_t data;
1978 
1979 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1980 		address = msi->msi_addr;
1981 		data = msi->msi_data;
1982 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1983 		    address & 0xffffffff, 4);
1984 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1985 			pci_write_config(dev, msi->msi_location +
1986 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1987 			pci_write_config(dev, msi->msi_location +
1988 			    PCIR_MSI_DATA_64BIT, data, 2);
1989 		} else
1990 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1991 			    data, 2);
1992 	}
1993 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1994 	    2);
1995 }
1996 
1997 static int
1998 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1999 {
2000 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2001 	pcicfgregs *cfg = &dinfo->cfg;
2002 	struct resource_list_entry *rle;
2003 	struct msix_table_entry *mte;
2004 	struct msix_vector *mv;
2005 	uint64_t addr;
2006 	uint32_t data;
2007 	int error, i, j;
2008 
2009 	/*
2010 	 * Handle MSI first.  We try to find this IRQ among our list
2011 	 * of MSI IRQs.  If we find it, we request updated address and
2012 	 * data registers and apply the results.
2013 	 */
2014 	if (cfg->msi.msi_alloc > 0) {
2015 
2016 		/* If we don't have any active handlers, nothing to do. */
2017 		if (cfg->msi.msi_handlers == 0)
2018 			return (0);
2019 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
2020 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
2021 			    i + 1);
2022 			if (rle->start == irq) {
2023 				error = PCIB_MAP_MSI(device_get_parent(bus),
2024 				    dev, irq, &addr, &data);
2025 				if (error)
2026 					return (error);
2027 				pci_disable_msi(dev);
2028 				dinfo->cfg.msi.msi_addr = addr;
2029 				dinfo->cfg.msi.msi_data = data;
2030 				pci_enable_msi(dev, addr, data);
2031 				return (0);
2032 			}
2033 		}
2034 		return (ENOENT);
2035 	}
2036 
2037 	/*
2038 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
2039 	 * we request the updated mapping info.  If that works, we go
2040 	 * through all the slots that use this IRQ and update them.
2041 	 */
2042 	if (cfg->msix.msix_alloc > 0) {
2043 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
2044 			mv = &cfg->msix.msix_vectors[i];
2045 			if (mv->mv_irq == irq) {
2046 				error = PCIB_MAP_MSI(device_get_parent(bus),
2047 				    dev, irq, &addr, &data);
2048 				if (error)
2049 					return (error);
2050 				mv->mv_address = addr;
2051 				mv->mv_data = data;
2052 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
2053 					mte = &cfg->msix.msix_table[j];
2054 					if (mte->mte_vector != i + 1)
2055 						continue;
2056 					if (mte->mte_handlers == 0)
2057 						continue;
2058 					pci_mask_msix(dev, j);
2059 					pci_enable_msix(dev, j, addr, data);
2060 					pci_unmask_msix(dev, j);
2061 				}
2062 			}
2063 		}
2064 		return (ENOENT);
2065 	}
2066 
2067 	return (ENOENT);
2068 }
2069 
2070 /*
2071  * Returns true if the specified device is blacklisted because MSI
2072  * doesn't work.
2073  */
2074 int
2075 pci_msi_device_blacklisted(device_t dev)
2076 {
2077 
2078 	if (!pci_honor_msi_blacklist)
2079 		return (0);
2080 
2081 	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2082 }
2083 
2084 /*
2085  * Determine if MSI is blacklisted globally on this system.  Currently,
2086  * we just check for blacklisted chipsets as represented by the
2087  * host-PCI bridge at device 0:0:0.  In the future, it may become
2088  * necessary to check other system attributes, such as the kenv values
2089  * that give the motherboard manufacturer and model number.
2090  */
2091 static int
2092 pci_msi_blacklisted(void)
2093 {
2094 	device_t dev;
2095 
2096 	if (!pci_honor_msi_blacklist)
2097 		return (0);
2098 
2099 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2100 	if (!(pcie_chipset || pcix_chipset)) {
2101 		if (vm_guest != VM_GUEST_NO) {
2102 			/*
2103 			 * Whitelist older chipsets in virtual
2104 			 * machines known to support MSI.
2105 			 */
2106 			dev = pci_find_bsf(0, 0, 0);
2107 			if (dev != NULL)
2108 				return (!pci_has_quirk(pci_get_devid(dev),
2109 					PCI_QUIRK_ENABLE_MSI_VM));
2110 		}
2111 		return (1);
2112 	}
2113 
2114 	dev = pci_find_bsf(0, 0, 0);
2115 	if (dev != NULL)
2116 		return (pci_msi_device_blacklisted(dev));
2117 	return (0);
2118 }
2119 
2120 /*
2121  * Returns true if the specified device is blacklisted because MSI-X
2122  * doesn't work.  Note that this assumes that if MSI doesn't work,
2123  * MSI-X doesn't either.
2124  */
2125 int
2126 pci_msix_device_blacklisted(device_t dev)
2127 {
2128 
2129 	if (!pci_honor_msi_blacklist)
2130 		return (0);
2131 
2132 	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2133 		return (1);
2134 
2135 	return (pci_msi_device_blacklisted(dev));
2136 }
2137 
2138 /*
2139  * Determine if MSI-X is blacklisted globally on this system.  If MSI
2140  * is blacklisted, assume that MSI-X is as well.  Check for additional
2141  * chipsets where MSI works but MSI-X does not.
2142  */
2143 static int
2144 pci_msix_blacklisted(void)
2145 {
2146 	device_t dev;
2147 
2148 	if (!pci_honor_msi_blacklist)
2149 		return (0);
2150 
2151 	dev = pci_find_bsf(0, 0, 0);
2152 	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2153 	    PCI_QUIRK_DISABLE_MSIX))
2154 		return (1);
2155 
2156 	return (pci_msi_blacklisted());
2157 }
2158 
2159 /*
2160  * Attempt to allocate *count MSI messages.  The actual number allocated is
2161  * returned in *count.  After this function returns, each message will be
2162  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2163  */
2164 int
2165 pci_alloc_msi_method(device_t dev, device_t child, int *count)
2166 {
2167 	struct pci_devinfo *dinfo = device_get_ivars(child);
2168 	pcicfgregs *cfg = &dinfo->cfg;
2169 	struct resource_list_entry *rle;
2170 	int actual, error, i, irqs[32];
2171 	uint16_t ctrl;
2172 
2173 	/* Don't let count == 0 get us into trouble. */
2174 	if (*count == 0)
2175 		return (EINVAL);
2176 
2177 	/* If rid 0 is allocated, then fail. */
2178 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2179 	if (rle != NULL && rle->res != NULL)
2180 		return (ENXIO);
2181 
2182 	/* Already have allocated messages? */
2183 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2184 		return (ENXIO);
2185 
2186 	/* If MSI is blacklisted for this system, fail. */
2187 	if (pci_msi_blacklisted())
2188 		return (ENXIO);
2189 
2190 	/* MSI capability present? */
2191 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2192 		return (ENODEV);
2193 
2194 	if (bootverbose)
2195 		device_printf(child,
2196 		    "attempting to allocate %d MSI vectors (%d supported)\n",
2197 		    *count, cfg->msi.msi_msgnum);
2198 
2199 	/* Don't ask for more than the device supports. */
2200 	actual = min(*count, cfg->msi.msi_msgnum);
2201 
2202 	/* Don't ask for more than 32 messages. */
2203 	actual = min(actual, 32);
2204 
2205 	/* MSI requires power of 2 number of messages. */
2206 	if (!powerof2(actual))
2207 		return (EINVAL);
2208 
2209 	for (;;) {
2210 		/* Try to allocate N messages. */
2211 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2212 		    actual, irqs);
2213 		if (error == 0)
2214 			break;
2215 		if (actual == 1)
2216 			return (error);
2217 
2218 		/* Try N / 2. */
2219 		actual >>= 1;
2220 	}
2221 
2222 	/*
2223 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2224 	 * resources in the irqs[] array, so add new resources
2225 	 * starting at rid 1.
2226 	 */
2227 	for (i = 0; i < actual; i++)
2228 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2229 		    irqs[i], irqs[i], 1);
2230 
2231 	if (bootverbose) {
2232 		if (actual == 1)
2233 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2234 		else {
2235 			int run;
2236 
2237 			/*
2238 			 * Be fancy and try to print contiguous runs
2239 			 * of IRQ values as ranges.  'run' is true if
2240 			 * we are in a range.
2241 			 */
2242 			device_printf(child, "using IRQs %d", irqs[0]);
2243 			run = 0;
2244 			for (i = 1; i < actual; i++) {
2245 
2246 				/* Still in a run? */
2247 				if (irqs[i] == irqs[i - 1] + 1) {
2248 					run = 1;
2249 					continue;
2250 				}
2251 
2252 				/* Finish previous range. */
2253 				if (run) {
2254 					printf("-%d", irqs[i - 1]);
2255 					run = 0;
2256 				}
2257 
2258 				/* Start new range. */
2259 				printf(",%d", irqs[i]);
2260 			}
2261 
2262 			/* Unfinished range? */
2263 			if (run)
2264 				printf("-%d", irqs[actual - 1]);
2265 			printf(" for MSI\n");
2266 		}
2267 	}
2268 
2269 	/* Update control register with actual count. */
2270 	ctrl = cfg->msi.msi_ctrl;
2271 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2272 	ctrl |= (ffs(actual) - 1) << 4;
2273 	cfg->msi.msi_ctrl = ctrl;
2274 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2275 
2276 	/* Update counts of alloc'd messages. */
2277 	cfg->msi.msi_alloc = actual;
2278 	cfg->msi.msi_handlers = 0;
2279 	*count = actual;
2280 	return (0);
2281 }
2282 
2283 /* Release the MSI messages associated with this device. */
2284 int
2285 pci_release_msi_method(device_t dev, device_t child)
2286 {
2287 	struct pci_devinfo *dinfo = device_get_ivars(child);
2288 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2289 	struct resource_list_entry *rle;
2290 	int error, i, irqs[32];
2291 
2292 	/* Try MSI-X first. */
2293 	error = pci_release_msix(dev, child);
2294 	if (error != ENODEV)
2295 		return (error);
2296 
2297 	/* Do we have any messages to release? */
2298 	if (msi->msi_alloc == 0)
2299 		return (ENODEV);
2300 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2301 
2302 	/* Make sure none of the resources are allocated. */
2303 	if (msi->msi_handlers > 0)
2304 		return (EBUSY);
2305 	for (i = 0; i < msi->msi_alloc; i++) {
2306 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2307 		KASSERT(rle != NULL, ("missing MSI resource"));
2308 		if (rle->res != NULL)
2309 			return (EBUSY);
2310 		irqs[i] = rle->start;
2311 	}
2312 
2313 	/* Update control register with 0 count. */
2314 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2315 	    ("%s: MSI still enabled", __func__));
2316 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2317 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2318 	    msi->msi_ctrl, 2);
2319 
2320 	/* Release the messages. */
2321 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2322 	for (i = 0; i < msi->msi_alloc; i++)
2323 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2324 
2325 	/* Update alloc count. */
2326 	msi->msi_alloc = 0;
2327 	msi->msi_addr = 0;
2328 	msi->msi_data = 0;
2329 	return (0);
2330 }
2331 
2332 /*
2333  * Return the max supported MSI messages this device supports.
2334  * Basically, assuming the MD code can alloc messages, this function
2335  * should return the maximum value that pci_alloc_msi() can return.
2336  * Thus, it is subject to the tunables, etc.
2337  */
2338 int
2339 pci_msi_count_method(device_t dev, device_t child)
2340 {
2341 	struct pci_devinfo *dinfo = device_get_ivars(child);
2342 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2343 
2344 	if (pci_do_msi && msi->msi_location != 0)
2345 		return (msi->msi_msgnum);
2346 	return (0);
2347 }
2348 
2349 /* free pcicfgregs structure and all depending data structures */
2350 
2351 int
2352 pci_freecfg(struct pci_devinfo *dinfo)
2353 {
2354 	struct devlist *devlist_head;
2355 	struct pci_map *pm, *next;
2356 	int i;
2357 
2358 	devlist_head = &pci_devq;
2359 
2360 	if (dinfo->cfg.vpd.vpd_reg) {
2361 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2362 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2363 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2364 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2365 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2366 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2367 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2368 	}
2369 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2370 		free(pm, M_DEVBUF);
2371 	}
2372 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2373 	free(dinfo, M_DEVBUF);
2374 
2375 	/* increment the generation count */
2376 	pci_generation++;
2377 
2378 	/* we're losing one device */
2379 	pci_numdevs--;
2380 	return (0);
2381 }
2382 
2383 /*
2384  * PCI power manangement
2385  */
2386 int
2387 pci_set_powerstate_method(device_t dev, device_t child, int state)
2388 {
2389 	struct pci_devinfo *dinfo = device_get_ivars(child);
2390 	pcicfgregs *cfg = &dinfo->cfg;
2391 	uint16_t status;
2392 	int result, oldstate, highest, delay;
2393 
2394 	if (cfg->pp.pp_cap == 0)
2395 		return (EOPNOTSUPP);
2396 
2397 	/*
2398 	 * Optimize a no state change request away.  While it would be OK to
2399 	 * write to the hardware in theory, some devices have shown odd
2400 	 * behavior when going from D3 -> D3.
2401 	 */
2402 	oldstate = pci_get_powerstate(child);
2403 	if (oldstate == state)
2404 		return (0);
2405 
2406 	/*
2407 	 * The PCI power management specification states that after a state
2408 	 * transition between PCI power states, system software must
2409 	 * guarantee a minimal delay before the function accesses the device.
2410 	 * Compute the worst case delay that we need to guarantee before we
2411 	 * access the device.  Many devices will be responsive much more
2412 	 * quickly than this delay, but there are some that don't respond
2413 	 * instantly to state changes.  Transitions to/from D3 state require
2414 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2415 	 * is done below with DELAY rather than a sleeper function because
2416 	 * this function can be called from contexts where we cannot sleep.
2417 	 */
2418 	highest = (oldstate > state) ? oldstate : state;
2419 	if (highest == PCI_POWERSTATE_D3)
2420 	    delay = 10000;
2421 	else if (highest == PCI_POWERSTATE_D2)
2422 	    delay = 200;
2423 	else
2424 	    delay = 0;
2425 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2426 	    & ~PCIM_PSTAT_DMASK;
2427 	result = 0;
2428 	switch (state) {
2429 	case PCI_POWERSTATE_D0:
2430 		status |= PCIM_PSTAT_D0;
2431 		break;
2432 	case PCI_POWERSTATE_D1:
2433 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2434 			return (EOPNOTSUPP);
2435 		status |= PCIM_PSTAT_D1;
2436 		break;
2437 	case PCI_POWERSTATE_D2:
2438 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2439 			return (EOPNOTSUPP);
2440 		status |= PCIM_PSTAT_D2;
2441 		break;
2442 	case PCI_POWERSTATE_D3:
2443 		status |= PCIM_PSTAT_D3;
2444 		break;
2445 	default:
2446 		return (EINVAL);
2447 	}
2448 
2449 	if (bootverbose)
2450 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2451 		    state);
2452 
2453 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2454 	if (delay)
2455 		DELAY(delay);
2456 	return (0);
2457 }
2458 
2459 int
2460 pci_get_powerstate_method(device_t dev, device_t child)
2461 {
2462 	struct pci_devinfo *dinfo = device_get_ivars(child);
2463 	pcicfgregs *cfg = &dinfo->cfg;
2464 	uint16_t status;
2465 	int result;
2466 
2467 	if (cfg->pp.pp_cap != 0) {
2468 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2469 		switch (status & PCIM_PSTAT_DMASK) {
2470 		case PCIM_PSTAT_D0:
2471 			result = PCI_POWERSTATE_D0;
2472 			break;
2473 		case PCIM_PSTAT_D1:
2474 			result = PCI_POWERSTATE_D1;
2475 			break;
2476 		case PCIM_PSTAT_D2:
2477 			result = PCI_POWERSTATE_D2;
2478 			break;
2479 		case PCIM_PSTAT_D3:
2480 			result = PCI_POWERSTATE_D3;
2481 			break;
2482 		default:
2483 			result = PCI_POWERSTATE_UNKNOWN;
2484 			break;
2485 		}
2486 	} else {
2487 		/* No support, device is always at D0 */
2488 		result = PCI_POWERSTATE_D0;
2489 	}
2490 	return (result);
2491 }
2492 
2493 /*
2494  * Some convenience functions for PCI device drivers.
2495  */
2496 
2497 static __inline void
2498 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2499 {
2500 	uint16_t	command;
2501 
2502 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2503 	command |= bit;
2504 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2505 }
2506 
2507 static __inline void
2508 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2509 {
2510 	uint16_t	command;
2511 
2512 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2513 	command &= ~bit;
2514 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2515 }
2516 
2517 int
2518 pci_enable_busmaster_method(device_t dev, device_t child)
2519 {
2520 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2521 	return (0);
2522 }
2523 
2524 int
2525 pci_disable_busmaster_method(device_t dev, device_t child)
2526 {
2527 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2528 	return (0);
2529 }
2530 
2531 int
2532 pci_enable_io_method(device_t dev, device_t child, int space)
2533 {
2534 	uint16_t bit;
2535 
2536 	switch(space) {
2537 	case SYS_RES_IOPORT:
2538 		bit = PCIM_CMD_PORTEN;
2539 		break;
2540 	case SYS_RES_MEMORY:
2541 		bit = PCIM_CMD_MEMEN;
2542 		break;
2543 	default:
2544 		return (EINVAL);
2545 	}
2546 	pci_set_command_bit(dev, child, bit);
2547 	return (0);
2548 }
2549 
2550 int
2551 pci_disable_io_method(device_t dev, device_t child, int space)
2552 {
2553 	uint16_t bit;
2554 
2555 	switch(space) {
2556 	case SYS_RES_IOPORT:
2557 		bit = PCIM_CMD_PORTEN;
2558 		break;
2559 	case SYS_RES_MEMORY:
2560 		bit = PCIM_CMD_MEMEN;
2561 		break;
2562 	default:
2563 		return (EINVAL);
2564 	}
2565 	pci_clear_command_bit(dev, child, bit);
2566 	return (0);
2567 }
2568 
2569 /*
2570  * New style pci driver.  Parent device is either a pci-host-bridge or a
2571  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2572  */
2573 
2574 void
2575 pci_print_verbose(struct pci_devinfo *dinfo)
2576 {
2577 
2578 	if (bootverbose) {
2579 		pcicfgregs *cfg = &dinfo->cfg;
2580 
2581 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2582 		    cfg->vendor, cfg->device, cfg->revid);
2583 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2584 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2585 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2586 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2587 		    cfg->mfdev);
2588 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2589 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2590 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2591 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2592 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2593 		if (cfg->intpin > 0)
2594 			printf("\tintpin=%c, irq=%d\n",
2595 			    cfg->intpin +'a' -1, cfg->intline);
2596 		if (cfg->pp.pp_cap) {
2597 			uint16_t status;
2598 
2599 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2600 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2601 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2602 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2603 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2604 			    status & PCIM_PSTAT_DMASK);
2605 		}
2606 		if (cfg->msi.msi_location) {
2607 			int ctrl;
2608 
2609 			ctrl = cfg->msi.msi_ctrl;
2610 			printf("\tMSI supports %d message%s%s%s\n",
2611 			    cfg->msi.msi_msgnum,
2612 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2613 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2614 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2615 		}
2616 		if (cfg->msix.msix_location) {
2617 			printf("\tMSI-X supports %d message%s ",
2618 			    cfg->msix.msix_msgnum,
2619 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2620 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2621 				printf("in map 0x%x\n",
2622 				    cfg->msix.msix_table_bar);
2623 			else
2624 				printf("in maps 0x%x and 0x%x\n",
2625 				    cfg->msix.msix_table_bar,
2626 				    cfg->msix.msix_pba_bar);
2627 		}
2628 	}
2629 }
2630 
2631 static int
2632 pci_porten(device_t dev)
2633 {
2634 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2635 }
2636 
2637 static int
2638 pci_memen(device_t dev)
2639 {
2640 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2641 }
2642 
2643 void
2644 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp,
2645     int *bar64)
2646 {
2647 	struct pci_devinfo *dinfo;
2648 	pci_addr_t map, testval;
2649 	int ln2range;
2650 	uint16_t cmd;
2651 
2652 	/*
2653 	 * The device ROM BAR is special.  It is always a 32-bit
2654 	 * memory BAR.  Bit 0 is special and should not be set when
2655 	 * sizing the BAR.
2656 	 */
2657 	dinfo = device_get_ivars(dev);
2658 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2659 		map = pci_read_config(dev, reg, 4);
2660 		pci_write_config(dev, reg, 0xfffffffe, 4);
2661 		testval = pci_read_config(dev, reg, 4);
2662 		pci_write_config(dev, reg, map, 4);
2663 		*mapp = map;
2664 		*testvalp = testval;
2665 		if (bar64 != NULL)
2666 			*bar64 = 0;
2667 		return;
2668 	}
2669 
2670 	map = pci_read_config(dev, reg, 4);
2671 	ln2range = pci_maprange(map);
2672 	if (ln2range == 64)
2673 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2674 
2675 	/*
2676 	 * Disable decoding via the command register before
2677 	 * determining the BAR's length since we will be placing it in
2678 	 * a weird state.
2679 	 */
2680 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2681 	pci_write_config(dev, PCIR_COMMAND,
2682 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2683 
2684 	/*
2685 	 * Determine the BAR's length by writing all 1's.  The bottom
2686 	 * log_2(size) bits of the BAR will stick as 0 when we read
2687 	 * the value back.
2688 	 */
2689 	pci_write_config(dev, reg, 0xffffffff, 4);
2690 	testval = pci_read_config(dev, reg, 4);
2691 	if (ln2range == 64) {
2692 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2693 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2694 	}
2695 
2696 	/*
2697 	 * Restore the original value of the BAR.  We may have reprogrammed
2698 	 * the BAR of the low-level console device and when booting verbose,
2699 	 * we need the console device addressable.
2700 	 */
2701 	pci_write_config(dev, reg, map, 4);
2702 	if (ln2range == 64)
2703 		pci_write_config(dev, reg + 4, map >> 32, 4);
2704 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2705 
2706 	*mapp = map;
2707 	*testvalp = testval;
2708 	if (bar64 != NULL)
2709 		*bar64 = (ln2range == 64);
2710 }
2711 
2712 static void
2713 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2714 {
2715 	struct pci_devinfo *dinfo;
2716 	int ln2range;
2717 
2718 	/* The device ROM BAR is always a 32-bit memory BAR. */
2719 	dinfo = device_get_ivars(dev);
2720 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2721 		ln2range = 32;
2722 	else
2723 		ln2range = pci_maprange(pm->pm_value);
2724 	pci_write_config(dev, pm->pm_reg, base, 4);
2725 	if (ln2range == 64)
2726 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2727 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2728 	if (ln2range == 64)
2729 		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2730 		    pm->pm_reg + 4, 4) << 32;
2731 }
2732 
2733 struct pci_map *
2734 pci_find_bar(device_t dev, int reg)
2735 {
2736 	struct pci_devinfo *dinfo;
2737 	struct pci_map *pm;
2738 
2739 	dinfo = device_get_ivars(dev);
2740 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2741 		if (pm->pm_reg == reg)
2742 			return (pm);
2743 	}
2744 	return (NULL);
2745 }
2746 
2747 int
2748 pci_bar_enabled(device_t dev, struct pci_map *pm)
2749 {
2750 	struct pci_devinfo *dinfo;
2751 	uint16_t cmd;
2752 
2753 	dinfo = device_get_ivars(dev);
2754 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2755 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2756 		return (0);
2757 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2758 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2759 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2760 	else
2761 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2762 }
2763 
2764 struct pci_map *
2765 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2766 {
2767 	struct pci_devinfo *dinfo;
2768 	struct pci_map *pm, *prev;
2769 
2770 	dinfo = device_get_ivars(dev);
2771 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2772 	pm->pm_reg = reg;
2773 	pm->pm_value = value;
2774 	pm->pm_size = size;
2775 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2776 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2777 		    reg));
2778 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2779 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2780 			break;
2781 	}
2782 	if (prev != NULL)
2783 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2784 	else
2785 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2786 	return (pm);
2787 }
2788 
2789 static void
2790 pci_restore_bars(device_t dev)
2791 {
2792 	struct pci_devinfo *dinfo;
2793 	struct pci_map *pm;
2794 	int ln2range;
2795 
2796 	dinfo = device_get_ivars(dev);
2797 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2798 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2799 			ln2range = 32;
2800 		else
2801 			ln2range = pci_maprange(pm->pm_value);
2802 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2803 		if (ln2range == 64)
2804 			pci_write_config(dev, pm->pm_reg + 4,
2805 			    pm->pm_value >> 32, 4);
2806 	}
2807 }
2808 
2809 /*
2810  * Add a resource based on a pci map register. Return 1 if the map
2811  * register is a 32bit map register or 2 if it is a 64bit register.
2812  */
2813 static int
2814 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2815     int force, int prefetch)
2816 {
2817 	struct pci_map *pm;
2818 	pci_addr_t base, map, testval;
2819 	pci_addr_t start, end, count;
2820 	int barlen, basezero, flags, maprange, mapsize, type;
2821 	uint16_t cmd;
2822 	struct resource *res;
2823 
2824 	/*
2825 	 * The BAR may already exist if the device is a CardBus card
2826 	 * whose CIS is stored in this BAR.
2827 	 */
2828 	pm = pci_find_bar(dev, reg);
2829 	if (pm != NULL) {
2830 		maprange = pci_maprange(pm->pm_value);
2831 		barlen = maprange == 64 ? 2 : 1;
2832 		return (barlen);
2833 	}
2834 
2835 	pci_read_bar(dev, reg, &map, &testval, NULL);
2836 	if (PCI_BAR_MEM(map)) {
2837 		type = SYS_RES_MEMORY;
2838 		if (map & PCIM_BAR_MEM_PREFETCH)
2839 			prefetch = 1;
2840 	} else
2841 		type = SYS_RES_IOPORT;
2842 	mapsize = pci_mapsize(testval);
2843 	base = pci_mapbase(map);
2844 #ifdef __PCI_BAR_ZERO_VALID
2845 	basezero = 0;
2846 #else
2847 	basezero = base == 0;
2848 #endif
2849 	maprange = pci_maprange(map);
2850 	barlen = maprange == 64 ? 2 : 1;
2851 
2852 	/*
2853 	 * For I/O registers, if bottom bit is set, and the next bit up
2854 	 * isn't clear, we know we have a BAR that doesn't conform to the
2855 	 * spec, so ignore it.  Also, sanity check the size of the data
2856 	 * areas to the type of memory involved.  Memory must be at least
2857 	 * 16 bytes in size, while I/O ranges must be at least 4.
2858 	 */
2859 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2860 		return (barlen);
2861 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2862 	    (type == SYS_RES_IOPORT && mapsize < 2))
2863 		return (barlen);
2864 
2865 	/* Save a record of this BAR. */
2866 	pm = pci_add_bar(dev, reg, map, mapsize);
2867 	if (bootverbose) {
2868 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2869 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2870 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2871 			printf(", port disabled\n");
2872 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2873 			printf(", memory disabled\n");
2874 		else
2875 			printf(", enabled\n");
2876 	}
2877 
2878 	/*
2879 	 * If base is 0, then we have problems if this architecture does
2880 	 * not allow that.  It is best to ignore such entries for the
2881 	 * moment.  These will be allocated later if the driver specifically
2882 	 * requests them.  However, some removable busses look better when
2883 	 * all resources are allocated, so allow '0' to be overriden.
2884 	 *
2885 	 * Similarly treat maps whose values is the same as the test value
2886 	 * read back.  These maps have had all f's written to them by the
2887 	 * BIOS in an attempt to disable the resources.
2888 	 */
2889 	if (!force && (basezero || map == testval))
2890 		return (barlen);
2891 	if ((u_long)base != base) {
2892 		device_printf(bus,
2893 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2894 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2895 		    pci_get_function(dev), reg);
2896 		return (barlen);
2897 	}
2898 
2899 	/*
2900 	 * This code theoretically does the right thing, but has
2901 	 * undesirable side effects in some cases where peripherals
2902 	 * respond oddly to having these bits enabled.  Let the user
2903 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2904 	 * default).
2905 	 */
2906 	if (pci_enable_io_modes) {
2907 		/* Turn on resources that have been left off by a lazy BIOS */
2908 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2909 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2910 			cmd |= PCIM_CMD_PORTEN;
2911 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2912 		}
2913 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2914 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2915 			cmd |= PCIM_CMD_MEMEN;
2916 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2917 		}
2918 	} else {
2919 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2920 			return (barlen);
2921 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2922 			return (barlen);
2923 	}
2924 
2925 	count = (pci_addr_t)1 << mapsize;
2926 	flags = RF_ALIGNMENT_LOG2(mapsize);
2927 	if (prefetch)
2928 		flags |= RF_PREFETCHABLE;
2929 	if (basezero || base == pci_mapbase(testval) || pci_clear_bars) {
2930 		start = 0;	/* Let the parent decide. */
2931 		end = ~0ul;
2932 	} else {
2933 		start = base;
2934 		end = base + count - 1;
2935 	}
2936 	resource_list_add(rl, type, reg, start, end, count);
2937 
2938 	/*
2939 	 * Try to allocate the resource for this BAR from our parent
2940 	 * so that this resource range is already reserved.  The
2941 	 * driver for this device will later inherit this resource in
2942 	 * pci_alloc_resource().
2943 	 */
2944 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2945 	    flags);
2946 	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0ul)) {
2947 		/*
2948 		 * If the allocation fails, try to allocate a resource for
2949 		 * this BAR using any available range.  The firmware felt
2950 		 * it was important enough to assign a resource, so don't
2951 		 * disable decoding if we can help it.
2952 		 */
2953 		resource_list_delete(rl, type, reg);
2954 		resource_list_add(rl, type, reg, 0, ~0ul, count);
2955 		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0ul,
2956 		    count, flags);
2957 	}
2958 	if (res == NULL) {
2959 		/*
2960 		 * If the allocation fails, delete the resource list entry
2961 		 * and disable decoding for this device.
2962 		 *
2963 		 * If the driver requests this resource in the future,
2964 		 * pci_reserve_map() will try to allocate a fresh
2965 		 * resource range.
2966 		 */
2967 		resource_list_delete(rl, type, reg);
2968 		pci_disable_io(dev, type);
2969 		if (bootverbose)
2970 			device_printf(bus,
2971 			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
2972 			    pci_get_domain(dev), pci_get_bus(dev),
2973 			    pci_get_slot(dev), pci_get_function(dev), reg);
2974 	} else {
2975 		start = rman_get_start(res);
2976 		pci_write_bar(dev, pm, start);
2977 	}
2978 	return (barlen);
2979 }
2980 
2981 /*
2982  * For ATA devices we need to decide early what addressing mode to use.
2983  * Legacy demands that the primary and secondary ATA ports sits on the
2984  * same addresses that old ISA hardware did. This dictates that we use
2985  * those addresses and ignore the BAR's if we cannot set PCI native
2986  * addressing mode.
2987  */
2988 static void
2989 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2990     uint32_t prefetchmask)
2991 {
2992 	struct resource *r;
2993 	int rid, type, progif;
2994 #if 0
2995 	/* if this device supports PCI native addressing use it */
2996 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2997 	if ((progif & 0x8a) == 0x8a) {
2998 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2999 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
3000 			printf("Trying ATA native PCI addressing mode\n");
3001 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
3002 		}
3003 	}
3004 #endif
3005 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3006 	type = SYS_RES_IOPORT;
3007 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
3008 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
3009 		    prefetchmask & (1 << 0));
3010 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
3011 		    prefetchmask & (1 << 1));
3012 	} else {
3013 		rid = PCIR_BAR(0);
3014 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
3015 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
3016 		    0x1f7, 8, 0);
3017 		rid = PCIR_BAR(1);
3018 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
3019 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
3020 		    0x3f6, 1, 0);
3021 	}
3022 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
3023 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
3024 		    prefetchmask & (1 << 2));
3025 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
3026 		    prefetchmask & (1 << 3));
3027 	} else {
3028 		rid = PCIR_BAR(2);
3029 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
3030 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
3031 		    0x177, 8, 0);
3032 		rid = PCIR_BAR(3);
3033 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
3034 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
3035 		    0x376, 1, 0);
3036 	}
3037 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
3038 	    prefetchmask & (1 << 4));
3039 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
3040 	    prefetchmask & (1 << 5));
3041 }
3042 
3043 static void
3044 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
3045 {
3046 	struct pci_devinfo *dinfo = device_get_ivars(dev);
3047 	pcicfgregs *cfg = &dinfo->cfg;
3048 	char tunable_name[64];
3049 	int irq;
3050 
3051 	/* Has to have an intpin to have an interrupt. */
3052 	if (cfg->intpin == 0)
3053 		return;
3054 
3055 	/* Let the user override the IRQ with a tunable. */
3056 	irq = PCI_INVALID_IRQ;
3057 	snprintf(tunable_name, sizeof(tunable_name),
3058 	    "hw.pci%d.%d.%d.INT%c.irq",
3059 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
3060 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
3061 		irq = PCI_INVALID_IRQ;
3062 
3063 	/*
3064 	 * If we didn't get an IRQ via the tunable, then we either use the
3065 	 * IRQ value in the intline register or we ask the bus to route an
3066 	 * interrupt for us.  If force_route is true, then we only use the
3067 	 * value in the intline register if the bus was unable to assign an
3068 	 * IRQ.
3069 	 */
3070 	if (!PCI_INTERRUPT_VALID(irq)) {
3071 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
3072 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
3073 		if (!PCI_INTERRUPT_VALID(irq))
3074 			irq = cfg->intline;
3075 	}
3076 
3077 	/* If after all that we don't have an IRQ, just bail. */
3078 	if (!PCI_INTERRUPT_VALID(irq))
3079 		return;
3080 
3081 	/* Update the config register if it changed. */
3082 	if (irq != cfg->intline) {
3083 		cfg->intline = irq;
3084 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
3085 	}
3086 
3087 	/* Add this IRQ as rid 0 interrupt resource. */
3088 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3089 }
3090 
3091 /* Perform early OHCI takeover from SMM. */
3092 static void
3093 ohci_early_takeover(device_t self)
3094 {
3095 	struct resource *res;
3096 	uint32_t ctl;
3097 	int rid;
3098 	int i;
3099 
3100 	rid = PCIR_BAR(0);
3101 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3102 	if (res == NULL)
3103 		return;
3104 
3105 	ctl = bus_read_4(res, OHCI_CONTROL);
3106 	if (ctl & OHCI_IR) {
3107 		if (bootverbose)
3108 			printf("ohci early: "
3109 			    "SMM active, request owner change\n");
3110 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3111 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3112 			DELAY(1000);
3113 			ctl = bus_read_4(res, OHCI_CONTROL);
3114 		}
3115 		if (ctl & OHCI_IR) {
3116 			if (bootverbose)
3117 				printf("ohci early: "
3118 				    "SMM does not respond, resetting\n");
3119 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3120 		}
3121 		/* Disable interrupts */
3122 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3123 	}
3124 
3125 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3126 }
3127 
3128 /* Perform early UHCI takeover from SMM. */
3129 static void
3130 uhci_early_takeover(device_t self)
3131 {
3132 	struct resource *res;
3133 	int rid;
3134 
3135 	/*
3136 	 * Set the PIRQD enable bit and switch off all the others. We don't
3137 	 * want legacy support to interfere with us XXX Does this also mean
3138 	 * that the BIOS won't touch the keyboard anymore if it is connected
3139 	 * to the ports of the root hub?
3140 	 */
3141 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3142 
3143 	/* Disable interrupts */
3144 	rid = PCI_UHCI_BASE_REG;
3145 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3146 	if (res != NULL) {
3147 		bus_write_2(res, UHCI_INTR, 0);
3148 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3149 	}
3150 }
3151 
3152 /* Perform early EHCI takeover from SMM. */
3153 static void
3154 ehci_early_takeover(device_t self)
3155 {
3156 	struct resource *res;
3157 	uint32_t cparams;
3158 	uint32_t eec;
3159 	uint8_t eecp;
3160 	uint8_t bios_sem;
3161 	uint8_t offs;
3162 	int rid;
3163 	int i;
3164 
3165 	rid = PCIR_BAR(0);
3166 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3167 	if (res == NULL)
3168 		return;
3169 
3170 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3171 
3172 	/* Synchronise with the BIOS if it owns the controller. */
3173 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3174 	    eecp = EHCI_EECP_NEXT(eec)) {
3175 		eec = pci_read_config(self, eecp, 4);
3176 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3177 			continue;
3178 		}
3179 		bios_sem = pci_read_config(self, eecp +
3180 		    EHCI_LEGSUP_BIOS_SEM, 1);
3181 		if (bios_sem == 0) {
3182 			continue;
3183 		}
3184 		if (bootverbose)
3185 			printf("ehci early: "
3186 			    "SMM active, request owner change\n");
3187 
3188 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3189 
3190 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3191 			DELAY(1000);
3192 			bios_sem = pci_read_config(self, eecp +
3193 			    EHCI_LEGSUP_BIOS_SEM, 1);
3194 		}
3195 
3196 		if (bios_sem != 0) {
3197 			if (bootverbose)
3198 				printf("ehci early: "
3199 				    "SMM does not respond\n");
3200 		}
3201 		/* Disable interrupts */
3202 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3203 		bus_write_4(res, offs + EHCI_USBINTR, 0);
3204 	}
3205 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3206 }
3207 
3208 /* Perform early XHCI takeover from SMM. */
3209 static void
3210 xhci_early_takeover(device_t self)
3211 {
3212 	struct resource *res;
3213 	uint32_t cparams;
3214 	uint32_t eec;
3215 	uint8_t eecp;
3216 	uint8_t bios_sem;
3217 	uint8_t offs;
3218 	int rid;
3219 	int i;
3220 
3221 	rid = PCIR_BAR(0);
3222 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3223 	if (res == NULL)
3224 		return;
3225 
3226 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3227 
3228 	eec = -1;
3229 
3230 	/* Synchronise with the BIOS if it owns the controller. */
3231 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3232 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3233 		eec = bus_read_4(res, eecp);
3234 
3235 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3236 			continue;
3237 
3238 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3239 		if (bios_sem == 0)
3240 			continue;
3241 
3242 		if (bootverbose)
3243 			printf("xhci early: "
3244 			    "SMM active, request owner change\n");
3245 
3246 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3247 
3248 		/* wait a maximum of 5 second */
3249 
3250 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3251 			DELAY(1000);
3252 			bios_sem = bus_read_1(res, eecp +
3253 			    XHCI_XECP_BIOS_SEM);
3254 		}
3255 
3256 		if (bios_sem != 0) {
3257 			if (bootverbose)
3258 				printf("xhci early: "
3259 				    "SMM does not respond\n");
3260 		}
3261 
3262 		/* Disable interrupts */
3263 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3264 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3265 		bus_read_4(res, offs + XHCI_USBSTS);
3266 	}
3267 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3268 }
3269 
3270 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3271 static void
3272 pci_reserve_secbus(device_t bus, device_t dev, pcicfgregs *cfg,
3273     struct resource_list *rl)
3274 {
3275 	struct resource *res;
3276 	char *cp;
3277 	u_long start, end, count;
3278 	int rid, sec_bus, sec_reg, sub_bus, sub_reg, sup_bus;
3279 
3280 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3281 	case PCIM_HDRTYPE_BRIDGE:
3282 		sec_reg = PCIR_SECBUS_1;
3283 		sub_reg = PCIR_SUBBUS_1;
3284 		break;
3285 	case PCIM_HDRTYPE_CARDBUS:
3286 		sec_reg = PCIR_SECBUS_2;
3287 		sub_reg = PCIR_SUBBUS_2;
3288 		break;
3289 	default:
3290 		return;
3291 	}
3292 
3293 	/*
3294 	 * If the existing bus range is valid, attempt to reserve it
3295 	 * from our parent.  If this fails for any reason, clear the
3296 	 * secbus and subbus registers.
3297 	 *
3298 	 * XXX: Should we reset sub_bus to sec_bus if it is < sec_bus?
3299 	 * This would at least preserve the existing sec_bus if it is
3300 	 * valid.
3301 	 */
3302 	sec_bus = PCI_READ_CONFIG(bus, dev, sec_reg, 1);
3303 	sub_bus = PCI_READ_CONFIG(bus, dev, sub_reg, 1);
3304 
3305 	/* Quirk handling. */
3306 	switch (pci_get_devid(dev)) {
3307 	case 0x12258086:		/* Intel 82454KX/GX (Orion) */
3308 		sup_bus = pci_read_config(dev, 0x41, 1);
3309 		if (sup_bus != 0xff) {
3310 			sec_bus = sup_bus + 1;
3311 			sub_bus = sup_bus + 1;
3312 			PCI_WRITE_CONFIG(bus, dev, sec_reg, sec_bus, 1);
3313 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3314 		}
3315 		break;
3316 
3317 	case 0x00dd10de:
3318 		/* Compaq R3000 BIOS sets wrong subordinate bus number. */
3319 		if ((cp = kern_getenv("smbios.planar.maker")) == NULL)
3320 			break;
3321 		if (strncmp(cp, "Compal", 6) != 0) {
3322 			freeenv(cp);
3323 			break;
3324 		}
3325 		freeenv(cp);
3326 		if ((cp = kern_getenv("smbios.planar.product")) == NULL)
3327 			break;
3328 		if (strncmp(cp, "08A0", 4) != 0) {
3329 			freeenv(cp);
3330 			break;
3331 		}
3332 		freeenv(cp);
3333 		if (sub_bus < 0xa) {
3334 			sub_bus = 0xa;
3335 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3336 		}
3337 		break;
3338 	}
3339 
3340 	if (bootverbose)
3341 		printf("\tsecbus=%d, subbus=%d\n", sec_bus, sub_bus);
3342 	if (sec_bus > 0 && sub_bus >= sec_bus) {
3343 		start = sec_bus;
3344 		end = sub_bus;
3345 		count = end - start + 1;
3346 
3347 		resource_list_add(rl, PCI_RES_BUS, 0, 0ul, ~0ul, count);
3348 
3349 		/*
3350 		 * If requested, clear secondary bus registers in
3351 		 * bridge devices to force a complete renumbering
3352 		 * rather than reserving the existing range.  However,
3353 		 * preserve the existing size.
3354 		 */
3355 		if (pci_clear_buses)
3356 			goto clear;
3357 
3358 		rid = 0;
3359 		res = resource_list_reserve(rl, bus, dev, PCI_RES_BUS, &rid,
3360 		    start, end, count, 0);
3361 		if (res != NULL)
3362 			return;
3363 
3364 		if (bootverbose)
3365 			device_printf(bus,
3366 			    "pci%d:%d:%d:%d secbus failed to allocate\n",
3367 			    pci_get_domain(dev), pci_get_bus(dev),
3368 			    pci_get_slot(dev), pci_get_function(dev));
3369 	}
3370 
3371 clear:
3372 	PCI_WRITE_CONFIG(bus, dev, sec_reg, 0, 1);
3373 	PCI_WRITE_CONFIG(bus, dev, sub_reg, 0, 1);
3374 }
3375 
3376 static struct resource *
3377 pci_alloc_secbus(device_t dev, device_t child, int *rid, u_long start,
3378     u_long end, u_long count, u_int flags)
3379 {
3380 	struct pci_devinfo *dinfo;
3381 	pcicfgregs *cfg;
3382 	struct resource_list *rl;
3383 	struct resource *res;
3384 	int sec_reg, sub_reg;
3385 
3386 	dinfo = device_get_ivars(child);
3387 	cfg = &dinfo->cfg;
3388 	rl = &dinfo->resources;
3389 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3390 	case PCIM_HDRTYPE_BRIDGE:
3391 		sec_reg = PCIR_SECBUS_1;
3392 		sub_reg = PCIR_SUBBUS_1;
3393 		break;
3394 	case PCIM_HDRTYPE_CARDBUS:
3395 		sec_reg = PCIR_SECBUS_2;
3396 		sub_reg = PCIR_SUBBUS_2;
3397 		break;
3398 	default:
3399 		return (NULL);
3400 	}
3401 
3402 	if (*rid != 0)
3403 		return (NULL);
3404 
3405 	if (resource_list_find(rl, PCI_RES_BUS, *rid) == NULL)
3406 		resource_list_add(rl, PCI_RES_BUS, *rid, start, end, count);
3407 	if (!resource_list_reserved(rl, PCI_RES_BUS, *rid)) {
3408 		res = resource_list_reserve(rl, dev, child, PCI_RES_BUS, rid,
3409 		    start, end, count, flags & ~RF_ACTIVE);
3410 		if (res == NULL) {
3411 			resource_list_delete(rl, PCI_RES_BUS, *rid);
3412 			device_printf(child, "allocating %lu bus%s failed\n",
3413 			    count, count == 1 ? "" : "es");
3414 			return (NULL);
3415 		}
3416 		if (bootverbose)
3417 			device_printf(child,
3418 			    "Lazy allocation of %lu bus%s at %lu\n", count,
3419 			    count == 1 ? "" : "es", rman_get_start(res));
3420 		PCI_WRITE_CONFIG(dev, child, sec_reg, rman_get_start(res), 1);
3421 		PCI_WRITE_CONFIG(dev, child, sub_reg, rman_get_end(res), 1);
3422 	}
3423 	return (resource_list_alloc(rl, dev, child, PCI_RES_BUS, rid, start,
3424 	    end, count, flags));
3425 }
3426 #endif
3427 
3428 void
3429 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3430 {
3431 	struct pci_devinfo *dinfo;
3432 	pcicfgregs *cfg;
3433 	struct resource_list *rl;
3434 	const struct pci_quirk *q;
3435 	uint32_t devid;
3436 	int i;
3437 
3438 	dinfo = device_get_ivars(dev);
3439 	cfg = &dinfo->cfg;
3440 	rl = &dinfo->resources;
3441 	devid = (cfg->device << 16) | cfg->vendor;
3442 
3443 	/* ATA devices needs special map treatment */
3444 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3445 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3446 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3447 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3448 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3449 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3450 	else
3451 		for (i = 0; i < cfg->nummaps;) {
3452 			/*
3453 			 * Skip quirked resources.
3454 			 */
3455 			for (q = &pci_quirks[0]; q->devid != 0; q++)
3456 				if (q->devid == devid &&
3457 				    q->type == PCI_QUIRK_UNMAP_REG &&
3458 				    q->arg1 == PCIR_BAR(i))
3459 					break;
3460 			if (q->devid != 0) {
3461 				i++;
3462 				continue;
3463 			}
3464 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3465 			    prefetchmask & (1 << i));
3466 		}
3467 
3468 	/*
3469 	 * Add additional, quirked resources.
3470 	 */
3471 	for (q = &pci_quirks[0]; q->devid != 0; q++)
3472 		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3473 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3474 
3475 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3476 #ifdef __PCI_REROUTE_INTERRUPT
3477 		/*
3478 		 * Try to re-route interrupts. Sometimes the BIOS or
3479 		 * firmware may leave bogus values in these registers.
3480 		 * If the re-route fails, then just stick with what we
3481 		 * have.
3482 		 */
3483 		pci_assign_interrupt(bus, dev, 1);
3484 #else
3485 		pci_assign_interrupt(bus, dev, 0);
3486 #endif
3487 	}
3488 
3489 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3490 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3491 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3492 			xhci_early_takeover(dev);
3493 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3494 			ehci_early_takeover(dev);
3495 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3496 			ohci_early_takeover(dev);
3497 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3498 			uhci_early_takeover(dev);
3499 	}
3500 
3501 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3502 	/*
3503 	 * Reserve resources for secondary bus ranges behind bridge
3504 	 * devices.
3505 	 */
3506 	pci_reserve_secbus(bus, dev, cfg, rl);
3507 #endif
3508 }
3509 
3510 static struct pci_devinfo *
3511 pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
3512     int slot, int func, size_t dinfo_size)
3513 {
3514 	struct pci_devinfo *dinfo;
3515 
3516 	dinfo = pci_read_device(pcib, domain, busno, slot, func, dinfo_size);
3517 	if (dinfo != NULL)
3518 		pci_add_child(dev, dinfo);
3519 
3520 	return (dinfo);
3521 }
3522 
3523 void
3524 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3525 {
3526 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3527 	device_t pcib = device_get_parent(dev);
3528 	struct pci_devinfo *dinfo;
3529 	int maxslots;
3530 	int s, f, pcifunchigh;
3531 	uint8_t hdrtype;
3532 	int first_func;
3533 
3534 	/*
3535 	 * Try to detect a device at slot 0, function 0.  If it exists, try to
3536 	 * enable ARI.  We must enable ARI before detecting the rest of the
3537 	 * functions on this bus as ARI changes the set of slots and functions
3538 	 * that are legal on this bus.
3539 	 */
3540 	dinfo = pci_identify_function(pcib, dev, domain, busno, 0, 0,
3541 	    dinfo_size);
3542 	if (dinfo != NULL && pci_enable_ari)
3543 		PCIB_TRY_ENABLE_ARI(pcib, dinfo->cfg.dev);
3544 
3545 	/*
3546 	 * Start looking for new devices on slot 0 at function 1 because we
3547 	 * just identified the device at slot 0, function 0.
3548 	 */
3549 	first_func = 1;
3550 
3551 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3552 	    ("dinfo_size too small"));
3553 	maxslots = PCIB_MAXSLOTS(pcib);
3554 	for (s = 0; s <= maxslots; s++, first_func = 0) {
3555 		pcifunchigh = 0;
3556 		f = 0;
3557 		DELAY(1);
3558 		hdrtype = REG(PCIR_HDRTYPE, 1);
3559 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3560 			continue;
3561 		if (hdrtype & PCIM_MFDEV)
3562 			pcifunchigh = PCIB_MAXFUNCS(pcib);
3563 		for (f = first_func; f <= pcifunchigh; f++)
3564 			pci_identify_function(pcib, dev, domain, busno, s, f,
3565 			    dinfo_size);
3566 	}
3567 #undef REG
3568 }
3569 
3570 #ifdef PCI_IOV
3571 device_t
3572 pci_add_iov_child(device_t bus, device_t pf, size_t size, uint16_t rid,
3573     uint16_t vid, uint16_t did)
3574 {
3575 	struct pci_devinfo *pf_dinfo, *vf_dinfo;
3576 	device_t pcib;
3577 	int busno, slot, func;
3578 
3579 	pf_dinfo = device_get_ivars(pf);
3580 
3581 	/*
3582 	 * Do a sanity check that we have been passed the correct size.  If this
3583 	 * test fails then likely the pci subclass hasn't implemented the
3584 	 * pci_create_iov_child method like it's supposed it.
3585 	 */
3586 	if (size != pf_dinfo->cfg.devinfo_size) {
3587 		device_printf(pf,
3588 		    "PCI subclass does not properly implement PCI_IOV\n");
3589 		return (NULL);
3590 	}
3591 
3592 	pcib = device_get_parent(bus);
3593 
3594 	PCIB_DECODE_RID(pcib, rid, &busno, &slot, &func);
3595 
3596 	vf_dinfo = pci_fill_devinfo(pcib, pci_get_domain(pcib), busno, slot, func,
3597 	    vid, did, size);
3598 
3599 	vf_dinfo->cfg.flags |= PCICFG_VF;
3600 	pci_add_child(bus, vf_dinfo);
3601 
3602 	return (vf_dinfo->cfg.dev);
3603 }
3604 
3605 device_t
3606 pci_create_iov_child_method(device_t bus, device_t pf, uint16_t rid,
3607     uint16_t vid, uint16_t did)
3608 {
3609 
3610 	return (pci_add_iov_child(bus, pf, sizeof(struct pci_devinfo), rid, vid,
3611 	    did));
3612 }
3613 #endif
3614 
3615 void
3616 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3617 {
3618 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3619 	device_set_ivars(dinfo->cfg.dev, dinfo);
3620 	resource_list_init(&dinfo->resources);
3621 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3622 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3623 	pci_print_verbose(dinfo);
3624 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3625 	pci_child_added(dinfo->cfg.dev);
3626 }
3627 
3628 void
3629 pci_child_added_method(device_t dev, device_t child)
3630 {
3631 
3632 }
3633 
3634 static int
3635 pci_probe(device_t dev)
3636 {
3637 
3638 	device_set_desc(dev, "PCI bus");
3639 
3640 	/* Allow other subclasses to override this driver. */
3641 	return (BUS_PROBE_GENERIC);
3642 }
3643 
3644 int
3645 pci_attach_common(device_t dev)
3646 {
3647 	struct pci_softc *sc;
3648 	int busno, domain;
3649 #ifdef PCI_DMA_BOUNDARY
3650 	int error, tag_valid;
3651 #endif
3652 #ifdef PCI_RES_BUS
3653 	int rid;
3654 #endif
3655 
3656 	sc = device_get_softc(dev);
3657 	domain = pcib_get_domain(dev);
3658 	busno = pcib_get_bus(dev);
3659 #ifdef PCI_RES_BUS
3660 	rid = 0;
3661 	sc->sc_bus = bus_alloc_resource(dev, PCI_RES_BUS, &rid, busno, busno,
3662 	    1, 0);
3663 	if (sc->sc_bus == NULL) {
3664 		device_printf(dev, "failed to allocate bus number\n");
3665 		return (ENXIO);
3666 	}
3667 #endif
3668 	if (bootverbose)
3669 		device_printf(dev, "domain=%d, physical bus=%d\n",
3670 		    domain, busno);
3671 #ifdef PCI_DMA_BOUNDARY
3672 	tag_valid = 0;
3673 	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
3674 	    devclass_find("pci")) {
3675 		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
3676 		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3677 		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
3678 		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
3679 		if (error)
3680 			device_printf(dev, "Failed to create DMA tag: %d\n",
3681 			    error);
3682 		else
3683 			tag_valid = 1;
3684 	}
3685 	if (!tag_valid)
3686 #endif
3687 		sc->sc_dma_tag = bus_get_dma_tag(dev);
3688 	return (0);
3689 }
3690 
3691 static int
3692 pci_attach(device_t dev)
3693 {
3694 	int busno, domain, error;
3695 
3696 	error = pci_attach_common(dev);
3697 	if (error)
3698 		return (error);
3699 
3700 	/*
3701 	 * Since there can be multiple independantly numbered PCI
3702 	 * busses on systems with multiple PCI domains, we can't use
3703 	 * the unit number to decide which bus we are probing. We ask
3704 	 * the parent pcib what our domain and bus numbers are.
3705 	 */
3706 	domain = pcib_get_domain(dev);
3707 	busno = pcib_get_bus(dev);
3708 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3709 	return (bus_generic_attach(dev));
3710 }
3711 
3712 #ifdef PCI_RES_BUS
3713 static int
3714 pci_detach(device_t dev)
3715 {
3716 	struct pci_softc *sc;
3717 	int error;
3718 
3719 	error = bus_generic_detach(dev);
3720 	if (error)
3721 		return (error);
3722 	sc = device_get_softc(dev);
3723 	return (bus_release_resource(dev, PCI_RES_BUS, 0, sc->sc_bus));
3724 }
3725 #endif
3726 
3727 static void
3728 pci_set_power_child(device_t dev, device_t child, int state)
3729 {
3730 	struct pci_devinfo *dinfo;
3731 	device_t pcib;
3732 	int dstate;
3733 
3734 	/*
3735 	 * Set the device to the given state.  If the firmware suggests
3736 	 * a different power state, use it instead.  If power management
3737 	 * is not present, the firmware is responsible for managing
3738 	 * device power.  Skip children who aren't attached since they
3739 	 * are handled separately.
3740 	 */
3741 	pcib = device_get_parent(dev);
3742 	dinfo = device_get_ivars(child);
3743 	dstate = state;
3744 	if (device_is_attached(child) &&
3745 	    PCIB_POWER_FOR_SLEEP(pcib, child, &dstate) == 0)
3746 		pci_set_powerstate(child, dstate);
3747 }
3748 
3749 int
3750 pci_suspend_child(device_t dev, device_t child)
3751 {
3752 	struct pci_devinfo *dinfo;
3753 	int error;
3754 
3755 	dinfo = device_get_ivars(child);
3756 
3757 	/*
3758 	 * Save the PCI configuration space for the child and set the
3759 	 * device in the appropriate power state for this sleep state.
3760 	 */
3761 	pci_cfg_save(child, dinfo, 0);
3762 
3763 	/* Suspend devices before potentially powering them down. */
3764 	error = bus_generic_suspend_child(dev, child);
3765 
3766 	if (error)
3767 		return (error);
3768 
3769 	if (pci_do_power_suspend)
3770 		pci_set_power_child(dev, child, PCI_POWERSTATE_D3);
3771 
3772 	return (0);
3773 }
3774 
3775 int
3776 pci_resume_child(device_t dev, device_t child)
3777 {
3778 	struct pci_devinfo *dinfo;
3779 
3780 	if (pci_do_power_resume)
3781 		pci_set_power_child(dev, child, PCI_POWERSTATE_D0);
3782 
3783 	dinfo = device_get_ivars(child);
3784 	pci_cfg_restore(child, dinfo);
3785 	if (!device_is_attached(child))
3786 		pci_cfg_save(child, dinfo, 1);
3787 
3788 	bus_generic_resume_child(dev, child);
3789 
3790 	return (0);
3791 }
3792 
3793 int
3794 pci_resume(device_t dev)
3795 {
3796 	device_t child, *devlist;
3797 	int error, i, numdevs;
3798 
3799 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3800 		return (error);
3801 
3802 	/*
3803 	 * Resume critical devices first, then everything else later.
3804 	 */
3805 	for (i = 0; i < numdevs; i++) {
3806 		child = devlist[i];
3807 		switch (pci_get_class(child)) {
3808 		case PCIC_DISPLAY:
3809 		case PCIC_MEMORY:
3810 		case PCIC_BRIDGE:
3811 		case PCIC_BASEPERIPH:
3812 			BUS_RESUME_CHILD(dev, child);
3813 			break;
3814 		}
3815 	}
3816 	for (i = 0; i < numdevs; i++) {
3817 		child = devlist[i];
3818 		switch (pci_get_class(child)) {
3819 		case PCIC_DISPLAY:
3820 		case PCIC_MEMORY:
3821 		case PCIC_BRIDGE:
3822 		case PCIC_BASEPERIPH:
3823 			break;
3824 		default:
3825 			BUS_RESUME_CHILD(dev, child);
3826 		}
3827 	}
3828 	free(devlist, M_TEMP);
3829 	return (0);
3830 }
3831 
3832 static void
3833 pci_load_vendor_data(void)
3834 {
3835 	caddr_t data;
3836 	void *ptr;
3837 	size_t sz;
3838 
3839 	data = preload_search_by_type("pci_vendor_data");
3840 	if (data != NULL) {
3841 		ptr = preload_fetch_addr(data);
3842 		sz = preload_fetch_size(data);
3843 		if (ptr != NULL && sz != 0) {
3844 			pci_vendordata = ptr;
3845 			pci_vendordata_size = sz;
3846 			/* terminate the database */
3847 			pci_vendordata[pci_vendordata_size] = '\n';
3848 		}
3849 	}
3850 }
3851 
3852 void
3853 pci_driver_added(device_t dev, driver_t *driver)
3854 {
3855 	int numdevs;
3856 	device_t *devlist;
3857 	device_t child;
3858 	struct pci_devinfo *dinfo;
3859 	int i;
3860 
3861 	if (bootverbose)
3862 		device_printf(dev, "driver added\n");
3863 	DEVICE_IDENTIFY(driver, dev);
3864 	if (device_get_children(dev, &devlist, &numdevs) != 0)
3865 		return;
3866 	for (i = 0; i < numdevs; i++) {
3867 		child = devlist[i];
3868 		if (device_get_state(child) != DS_NOTPRESENT)
3869 			continue;
3870 		dinfo = device_get_ivars(child);
3871 		pci_print_verbose(dinfo);
3872 		if (bootverbose)
3873 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3874 		pci_cfg_restore(child, dinfo);
3875 		if (device_probe_and_attach(child) != 0)
3876 			pci_child_detached(dev, child);
3877 	}
3878 	free(devlist, M_TEMP);
3879 }
3880 
3881 int
3882 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3883     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3884 {
3885 	struct pci_devinfo *dinfo;
3886 	struct msix_table_entry *mte;
3887 	struct msix_vector *mv;
3888 	uint64_t addr;
3889 	uint32_t data;
3890 	void *cookie;
3891 	int error, rid;
3892 
3893 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3894 	    arg, &cookie);
3895 	if (error)
3896 		return (error);
3897 
3898 	/* If this is not a direct child, just bail out. */
3899 	if (device_get_parent(child) != dev) {
3900 		*cookiep = cookie;
3901 		return(0);
3902 	}
3903 
3904 	rid = rman_get_rid(irq);
3905 	if (rid == 0) {
3906 		/* Make sure that INTx is enabled */
3907 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3908 	} else {
3909 		/*
3910 		 * Check to see if the interrupt is MSI or MSI-X.
3911 		 * Ask our parent to map the MSI and give
3912 		 * us the address and data register values.
3913 		 * If we fail for some reason, teardown the
3914 		 * interrupt handler.
3915 		 */
3916 		dinfo = device_get_ivars(child);
3917 		if (dinfo->cfg.msi.msi_alloc > 0) {
3918 			if (dinfo->cfg.msi.msi_addr == 0) {
3919 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3920 			    ("MSI has handlers, but vectors not mapped"));
3921 				error = PCIB_MAP_MSI(device_get_parent(dev),
3922 				    child, rman_get_start(irq), &addr, &data);
3923 				if (error)
3924 					goto bad;
3925 				dinfo->cfg.msi.msi_addr = addr;
3926 				dinfo->cfg.msi.msi_data = data;
3927 			}
3928 			if (dinfo->cfg.msi.msi_handlers == 0)
3929 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3930 				    dinfo->cfg.msi.msi_data);
3931 			dinfo->cfg.msi.msi_handlers++;
3932 		} else {
3933 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3934 			    ("No MSI or MSI-X interrupts allocated"));
3935 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3936 			    ("MSI-X index too high"));
3937 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3938 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3939 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3940 			KASSERT(mv->mv_irq == rman_get_start(irq),
3941 			    ("IRQ mismatch"));
3942 			if (mv->mv_address == 0) {
3943 				KASSERT(mte->mte_handlers == 0,
3944 		    ("MSI-X table entry has handlers, but vector not mapped"));
3945 				error = PCIB_MAP_MSI(device_get_parent(dev),
3946 				    child, rman_get_start(irq), &addr, &data);
3947 				if (error)
3948 					goto bad;
3949 				mv->mv_address = addr;
3950 				mv->mv_data = data;
3951 			}
3952 			if (mte->mte_handlers == 0) {
3953 				pci_enable_msix(child, rid - 1, mv->mv_address,
3954 				    mv->mv_data);
3955 				pci_unmask_msix(child, rid - 1);
3956 			}
3957 			mte->mte_handlers++;
3958 		}
3959 
3960 		/*
3961 		 * Make sure that INTx is disabled if we are using MSI/MSI-X,
3962 		 * unless the device is affected by PCI_QUIRK_MSI_INTX_BUG,
3963 		 * in which case we "enable" INTx so MSI/MSI-X actually works.
3964 		 */
3965 		if (!pci_has_quirk(pci_get_devid(child),
3966 		    PCI_QUIRK_MSI_INTX_BUG))
3967 			pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3968 		else
3969 			pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3970 	bad:
3971 		if (error) {
3972 			(void)bus_generic_teardown_intr(dev, child, irq,
3973 			    cookie);
3974 			return (error);
3975 		}
3976 	}
3977 	*cookiep = cookie;
3978 	return (0);
3979 }
3980 
3981 int
3982 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3983     void *cookie)
3984 {
3985 	struct msix_table_entry *mte;
3986 	struct resource_list_entry *rle;
3987 	struct pci_devinfo *dinfo;
3988 	int error, rid;
3989 
3990 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3991 		return (EINVAL);
3992 
3993 	/* If this isn't a direct child, just bail out */
3994 	if (device_get_parent(child) != dev)
3995 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3996 
3997 	rid = rman_get_rid(irq);
3998 	if (rid == 0) {
3999 		/* Mask INTx */
4000 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4001 	} else {
4002 		/*
4003 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
4004 		 * decrement the appropriate handlers count and mask the
4005 		 * MSI-X message, or disable MSI messages if the count
4006 		 * drops to 0.
4007 		 */
4008 		dinfo = device_get_ivars(child);
4009 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
4010 		if (rle->res != irq)
4011 			return (EINVAL);
4012 		if (dinfo->cfg.msi.msi_alloc > 0) {
4013 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
4014 			    ("MSI-X index too high"));
4015 			if (dinfo->cfg.msi.msi_handlers == 0)
4016 				return (EINVAL);
4017 			dinfo->cfg.msi.msi_handlers--;
4018 			if (dinfo->cfg.msi.msi_handlers == 0)
4019 				pci_disable_msi(child);
4020 		} else {
4021 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4022 			    ("No MSI or MSI-X interrupts allocated"));
4023 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4024 			    ("MSI-X index too high"));
4025 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4026 			if (mte->mte_handlers == 0)
4027 				return (EINVAL);
4028 			mte->mte_handlers--;
4029 			if (mte->mte_handlers == 0)
4030 				pci_mask_msix(child, rid - 1);
4031 		}
4032 	}
4033 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
4034 	if (rid > 0)
4035 		KASSERT(error == 0,
4036 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
4037 	return (error);
4038 }
4039 
4040 int
4041 pci_print_child(device_t dev, device_t child)
4042 {
4043 	struct pci_devinfo *dinfo;
4044 	struct resource_list *rl;
4045 	int retval = 0;
4046 
4047 	dinfo = device_get_ivars(child);
4048 	rl = &dinfo->resources;
4049 
4050 	retval += bus_print_child_header(dev, child);
4051 
4052 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
4053 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
4054 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
4055 	if (device_get_flags(dev))
4056 		retval += printf(" flags %#x", device_get_flags(dev));
4057 
4058 	retval += printf(" at device %d.%d", pci_get_slot(child),
4059 	    pci_get_function(child));
4060 
4061 	retval += bus_print_child_domain(dev, child);
4062 	retval += bus_print_child_footer(dev, child);
4063 
4064 	return (retval);
4065 }
4066 
4067 static const struct
4068 {
4069 	int		class;
4070 	int		subclass;
4071 	int		report; /* 0 = bootverbose, 1 = always */
4072 	const char	*desc;
4073 } pci_nomatch_tab[] = {
4074 	{PCIC_OLD,		-1,			1, "old"},
4075 	{PCIC_OLD,		PCIS_OLD_NONVGA,	1, "non-VGA display device"},
4076 	{PCIC_OLD,		PCIS_OLD_VGA,		1, "VGA-compatible display device"},
4077 	{PCIC_STORAGE,		-1,			1, "mass storage"},
4078 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	1, "SCSI"},
4079 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	1, "ATA"},
4080 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	1, "floppy disk"},
4081 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	1, "IPI"},
4082 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	1, "RAID"},
4083 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	1, "ATA (ADMA)"},
4084 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	1, "SATA"},
4085 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	1, "SAS"},
4086 	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	1, "NVM"},
4087 	{PCIC_NETWORK,		-1,			1, "network"},
4088 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	1, "ethernet"},
4089 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	1, "token ring"},
4090 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	1, "fddi"},
4091 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	1, "ATM"},
4092 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	1, "ISDN"},
4093 	{PCIC_DISPLAY,		-1,			1, "display"},
4094 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	1, "VGA"},
4095 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	1, "XGA"},
4096 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	1, "3D"},
4097 	{PCIC_MULTIMEDIA,	-1,			1, "multimedia"},
4098 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	1, "video"},
4099 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	1, "audio"},
4100 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	1, "telephony"},
4101 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	1, "HDA"},
4102 	{PCIC_MEMORY,		-1,			1, "memory"},
4103 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	1, "RAM"},
4104 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	1, "flash"},
4105 	{PCIC_BRIDGE,		-1,			1, "bridge"},
4106 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	1, "HOST-PCI"},
4107 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	1, "PCI-ISA"},
4108 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	1, "PCI-EISA"},
4109 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	1, "PCI-MCA"},
4110 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	1, "PCI-PCI"},
4111 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	1, "PCI-PCMCIA"},
4112 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	1, "PCI-NuBus"},
4113 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	1, "PCI-CardBus"},
4114 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	1, "PCI-RACEway"},
4115 	{PCIC_SIMPLECOMM,	-1,			1, "simple comms"},
4116 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	1, "UART"},	/* could detect 16550 */
4117 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	1, "parallel port"},
4118 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	1, "multiport serial"},
4119 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	1, "generic modem"},
4120 	{PCIC_BASEPERIPH,	-1,			0, "base peripheral"},
4121 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	1, "interrupt controller"},
4122 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	1, "DMA controller"},
4123 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	1, "timer"},
4124 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	1, "realtime clock"},
4125 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	1, "PCI hot-plug controller"},
4126 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	1, "SD host controller"},
4127 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_IOMMU,	1, "IOMMU"},
4128 	{PCIC_INPUTDEV,		-1,			1, "input device"},
4129 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	1, "keyboard"},
4130 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,1, "digitizer"},
4131 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	1, "mouse"},
4132 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	1, "scanner"},
4133 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	1, "gameport"},
4134 	{PCIC_DOCKING,		-1,			1, "docking station"},
4135 	{PCIC_PROCESSOR,	-1,			1, "processor"},
4136 	{PCIC_SERIALBUS,	-1,			1, "serial bus"},
4137 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	1, "FireWire"},
4138 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	1, "AccessBus"},
4139 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	1, "SSA"},
4140 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	1, "USB"},
4141 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	1, "Fibre Channel"},
4142 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	0, "SMBus"},
4143 	{PCIC_WIRELESS,		-1,			1, "wireless controller"},
4144 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	1, "iRDA"},
4145 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	1, "IR"},
4146 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	1, "RF"},
4147 	{PCIC_INTELLIIO,	-1,			1, "intelligent I/O controller"},
4148 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	1, "I2O"},
4149 	{PCIC_SATCOM,		-1,			1, "satellite communication"},
4150 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		1, "sat TV"},
4151 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	1, "sat audio"},
4152 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	1, "sat voice"},
4153 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	1, "sat data"},
4154 	{PCIC_CRYPTO,		-1,			1, "encrypt/decrypt"},
4155 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	1, "network/computer crypto"},
4156 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	1, "entertainment crypto"},
4157 	{PCIC_DASP,		-1,			0, "dasp"},
4158 	{PCIC_DASP,		PCIS_DASP_DPIO,		1, "DPIO module"},
4159 	{0, 0, 0,		NULL}
4160 };
4161 
4162 void
4163 pci_probe_nomatch(device_t dev, device_t child)
4164 {
4165 	int i, report;
4166 	const char *cp, *scp;
4167 	char *device;
4168 
4169 	/*
4170 	 * Look for a listing for this device in a loaded device database.
4171 	 */
4172 	report = 1;
4173 	if ((device = pci_describe_device(child)) != NULL) {
4174 		device_printf(dev, "<%s>", device);
4175 		free(device, M_DEVBUF);
4176 	} else {
4177 		/*
4178 		 * Scan the class/subclass descriptions for a general
4179 		 * description.
4180 		 */
4181 		cp = "unknown";
4182 		scp = NULL;
4183 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
4184 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
4185 				if (pci_nomatch_tab[i].subclass == -1) {
4186 					cp = pci_nomatch_tab[i].desc;
4187 					report = pci_nomatch_tab[i].report;
4188 				} else if (pci_nomatch_tab[i].subclass ==
4189 				    pci_get_subclass(child)) {
4190 					scp = pci_nomatch_tab[i].desc;
4191 					report = pci_nomatch_tab[i].report;
4192 				}
4193 			}
4194 		}
4195 		if (report || bootverbose) {
4196 			device_printf(dev, "<%s%s%s>",
4197 			    cp ? cp : "",
4198 			    ((cp != NULL) && (scp != NULL)) ? ", " : "",
4199 			    scp ? scp : "");
4200 		}
4201 	}
4202 	if (report || bootverbose) {
4203 		printf(" at device %d.%d (no driver attached)\n",
4204 		    pci_get_slot(child), pci_get_function(child));
4205 	}
4206 	pci_cfg_save(child, device_get_ivars(child), 1);
4207 }
4208 
4209 void
4210 pci_child_detached(device_t dev, device_t child)
4211 {
4212 	struct pci_devinfo *dinfo;
4213 	struct resource_list *rl;
4214 
4215 	dinfo = device_get_ivars(child);
4216 	rl = &dinfo->resources;
4217 
4218 	/*
4219 	 * Have to deallocate IRQs before releasing any MSI messages and
4220 	 * have to release MSI messages before deallocating any memory
4221 	 * BARs.
4222 	 */
4223 	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
4224 		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
4225 	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
4226 		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
4227 		(void)pci_release_msi(child);
4228 	}
4229 	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
4230 		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
4231 	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
4232 		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
4233 #ifdef PCI_RES_BUS
4234 	if (resource_list_release_active(rl, dev, child, PCI_RES_BUS) != 0)
4235 		pci_printf(&dinfo->cfg, "Device leaked PCI bus numbers\n");
4236 #endif
4237 
4238 	pci_cfg_save(child, dinfo, 1);
4239 }
4240 
4241 /*
4242  * Parse the PCI device database, if loaded, and return a pointer to a
4243  * description of the device.
4244  *
4245  * The database is flat text formatted as follows:
4246  *
4247  * Any line not in a valid format is ignored.
4248  * Lines are terminated with newline '\n' characters.
4249  *
4250  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
4251  * the vendor name.
4252  *
4253  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
4254  * - devices cannot be listed without a corresponding VENDOR line.
4255  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
4256  * another TAB, then the device name.
4257  */
4258 
4259 /*
4260  * Assuming (ptr) points to the beginning of a line in the database,
4261  * return the vendor or device and description of the next entry.
4262  * The value of (vendor) or (device) inappropriate for the entry type
4263  * is set to -1.  Returns nonzero at the end of the database.
4264  *
4265  * Note that this is slightly unrobust in the face of corrupt data;
4266  * we attempt to safeguard against this by spamming the end of the
4267  * database with a newline when we initialise.
4268  */
4269 static int
4270 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
4271 {
4272 	char	*cp = *ptr;
4273 	int	left;
4274 
4275 	*device = -1;
4276 	*vendor = -1;
4277 	**desc = '\0';
4278 	for (;;) {
4279 		left = pci_vendordata_size - (cp - pci_vendordata);
4280 		if (left <= 0) {
4281 			*ptr = cp;
4282 			return(1);
4283 		}
4284 
4285 		/* vendor entry? */
4286 		if (*cp != '\t' &&
4287 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
4288 			break;
4289 		/* device entry? */
4290 		if (*cp == '\t' &&
4291 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
4292 			break;
4293 
4294 		/* skip to next line */
4295 		while (*cp != '\n' && left > 0) {
4296 			cp++;
4297 			left--;
4298 		}
4299 		if (*cp == '\n') {
4300 			cp++;
4301 			left--;
4302 		}
4303 	}
4304 	/* skip to next line */
4305 	while (*cp != '\n' && left > 0) {
4306 		cp++;
4307 		left--;
4308 	}
4309 	if (*cp == '\n' && left > 0)
4310 		cp++;
4311 	*ptr = cp;
4312 	return(0);
4313 }
4314 
4315 static char *
4316 pci_describe_device(device_t dev)
4317 {
4318 	int	vendor, device;
4319 	char	*desc, *vp, *dp, *line;
4320 
4321 	desc = vp = dp = NULL;
4322 
4323 	/*
4324 	 * If we have no vendor data, we can't do anything.
4325 	 */
4326 	if (pci_vendordata == NULL)
4327 		goto out;
4328 
4329 	/*
4330 	 * Scan the vendor data looking for this device
4331 	 */
4332 	line = pci_vendordata;
4333 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4334 		goto out;
4335 	for (;;) {
4336 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
4337 			goto out;
4338 		if (vendor == pci_get_vendor(dev))
4339 			break;
4340 	}
4341 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4342 		goto out;
4343 	for (;;) {
4344 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
4345 			*dp = 0;
4346 			break;
4347 		}
4348 		if (vendor != -1) {
4349 			*dp = 0;
4350 			break;
4351 		}
4352 		if (device == pci_get_device(dev))
4353 			break;
4354 	}
4355 	if (dp[0] == '\0')
4356 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
4357 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
4358 	    NULL)
4359 		sprintf(desc, "%s, %s", vp, dp);
4360 out:
4361 	if (vp != NULL)
4362 		free(vp, M_DEVBUF);
4363 	if (dp != NULL)
4364 		free(dp, M_DEVBUF);
4365 	return(desc);
4366 }
4367 
4368 int
4369 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
4370 {
4371 	struct pci_devinfo *dinfo;
4372 	pcicfgregs *cfg;
4373 
4374 	dinfo = device_get_ivars(child);
4375 	cfg = &dinfo->cfg;
4376 
4377 	switch (which) {
4378 	case PCI_IVAR_ETHADDR:
4379 		/*
4380 		 * The generic accessor doesn't deal with failure, so
4381 		 * we set the return value, then return an error.
4382 		 */
4383 		*((uint8_t **) result) = NULL;
4384 		return (EINVAL);
4385 	case PCI_IVAR_SUBVENDOR:
4386 		*result = cfg->subvendor;
4387 		break;
4388 	case PCI_IVAR_SUBDEVICE:
4389 		*result = cfg->subdevice;
4390 		break;
4391 	case PCI_IVAR_VENDOR:
4392 		*result = cfg->vendor;
4393 		break;
4394 	case PCI_IVAR_DEVICE:
4395 		*result = cfg->device;
4396 		break;
4397 	case PCI_IVAR_DEVID:
4398 		*result = (cfg->device << 16) | cfg->vendor;
4399 		break;
4400 	case PCI_IVAR_CLASS:
4401 		*result = cfg->baseclass;
4402 		break;
4403 	case PCI_IVAR_SUBCLASS:
4404 		*result = cfg->subclass;
4405 		break;
4406 	case PCI_IVAR_PROGIF:
4407 		*result = cfg->progif;
4408 		break;
4409 	case PCI_IVAR_REVID:
4410 		*result = cfg->revid;
4411 		break;
4412 	case PCI_IVAR_INTPIN:
4413 		*result = cfg->intpin;
4414 		break;
4415 	case PCI_IVAR_IRQ:
4416 		*result = cfg->intline;
4417 		break;
4418 	case PCI_IVAR_DOMAIN:
4419 		*result = cfg->domain;
4420 		break;
4421 	case PCI_IVAR_BUS:
4422 		*result = cfg->bus;
4423 		break;
4424 	case PCI_IVAR_SLOT:
4425 		*result = cfg->slot;
4426 		break;
4427 	case PCI_IVAR_FUNCTION:
4428 		*result = cfg->func;
4429 		break;
4430 	case PCI_IVAR_CMDREG:
4431 		*result = cfg->cmdreg;
4432 		break;
4433 	case PCI_IVAR_CACHELNSZ:
4434 		*result = cfg->cachelnsz;
4435 		break;
4436 	case PCI_IVAR_MINGNT:
4437 		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4438 			*result = -1;
4439 			return (EINVAL);
4440 		}
4441 		*result = cfg->mingnt;
4442 		break;
4443 	case PCI_IVAR_MAXLAT:
4444 		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4445 			*result = -1;
4446 			return (EINVAL);
4447 		}
4448 		*result = cfg->maxlat;
4449 		break;
4450 	case PCI_IVAR_LATTIMER:
4451 		*result = cfg->lattimer;
4452 		break;
4453 	default:
4454 		return (ENOENT);
4455 	}
4456 	return (0);
4457 }
4458 
4459 int
4460 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4461 {
4462 	struct pci_devinfo *dinfo;
4463 
4464 	dinfo = device_get_ivars(child);
4465 
4466 	switch (which) {
4467 	case PCI_IVAR_INTPIN:
4468 		dinfo->cfg.intpin = value;
4469 		return (0);
4470 	case PCI_IVAR_ETHADDR:
4471 	case PCI_IVAR_SUBVENDOR:
4472 	case PCI_IVAR_SUBDEVICE:
4473 	case PCI_IVAR_VENDOR:
4474 	case PCI_IVAR_DEVICE:
4475 	case PCI_IVAR_DEVID:
4476 	case PCI_IVAR_CLASS:
4477 	case PCI_IVAR_SUBCLASS:
4478 	case PCI_IVAR_PROGIF:
4479 	case PCI_IVAR_REVID:
4480 	case PCI_IVAR_IRQ:
4481 	case PCI_IVAR_DOMAIN:
4482 	case PCI_IVAR_BUS:
4483 	case PCI_IVAR_SLOT:
4484 	case PCI_IVAR_FUNCTION:
4485 		return (EINVAL);	/* disallow for now */
4486 
4487 	default:
4488 		return (ENOENT);
4489 	}
4490 }
4491 
4492 #include "opt_ddb.h"
4493 #ifdef DDB
4494 #include <ddb/ddb.h>
4495 #include <sys/cons.h>
4496 
4497 /*
4498  * List resources based on pci map registers, used for within ddb
4499  */
4500 
4501 DB_SHOW_COMMAND(pciregs, db_pci_dump)
4502 {
4503 	struct pci_devinfo *dinfo;
4504 	struct devlist *devlist_head;
4505 	struct pci_conf *p;
4506 	const char *name;
4507 	int i, error, none_count;
4508 
4509 	none_count = 0;
4510 	/* get the head of the device queue */
4511 	devlist_head = &pci_devq;
4512 
4513 	/*
4514 	 * Go through the list of devices and print out devices
4515 	 */
4516 	for (error = 0, i = 0,
4517 	     dinfo = STAILQ_FIRST(devlist_head);
4518 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4519 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4520 
4521 		/* Populate pd_name and pd_unit */
4522 		name = NULL;
4523 		if (dinfo->cfg.dev)
4524 			name = device_get_name(dinfo->cfg.dev);
4525 
4526 		p = &dinfo->conf;
4527 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4528 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4529 			(name && *name) ? name : "none",
4530 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4531 			none_count++,
4532 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4533 			p->pc_sel.pc_func, (p->pc_class << 16) |
4534 			(p->pc_subclass << 8) | p->pc_progif,
4535 			(p->pc_subdevice << 16) | p->pc_subvendor,
4536 			(p->pc_device << 16) | p->pc_vendor,
4537 			p->pc_revid, p->pc_hdr);
4538 	}
4539 }
4540 #endif /* DDB */
4541 
4542 static struct resource *
4543 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4544     u_long start, u_long end, u_long count, u_int num, u_int flags)
4545 {
4546 	struct pci_devinfo *dinfo = device_get_ivars(child);
4547 	struct resource_list *rl = &dinfo->resources;
4548 	struct resource *res;
4549 	struct pci_map *pm;
4550 	pci_addr_t map, testval;
4551 	int mapsize;
4552 
4553 	res = NULL;
4554 	pm = pci_find_bar(child, *rid);
4555 	if (pm != NULL) {
4556 		/* This is a BAR that we failed to allocate earlier. */
4557 		mapsize = pm->pm_size;
4558 		map = pm->pm_value;
4559 	} else {
4560 		/*
4561 		 * Weed out the bogons, and figure out how large the
4562 		 * BAR/map is.  BARs that read back 0 here are bogus
4563 		 * and unimplemented.  Note: atapci in legacy mode are
4564 		 * special and handled elsewhere in the code.  If you
4565 		 * have a atapci device in legacy mode and it fails
4566 		 * here, that other code is broken.
4567 		 */
4568 		pci_read_bar(child, *rid, &map, &testval, NULL);
4569 
4570 		/*
4571 		 * Determine the size of the BAR and ignore BARs with a size
4572 		 * of 0.  Device ROM BARs use a different mask value.
4573 		 */
4574 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
4575 			mapsize = pci_romsize(testval);
4576 		else
4577 			mapsize = pci_mapsize(testval);
4578 		if (mapsize == 0)
4579 			goto out;
4580 		pm = pci_add_bar(child, *rid, map, mapsize);
4581 	}
4582 
4583 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
4584 		if (type != SYS_RES_MEMORY) {
4585 			if (bootverbose)
4586 				device_printf(dev,
4587 				    "child %s requested type %d for rid %#x,"
4588 				    " but the BAR says it is an memio\n",
4589 				    device_get_nameunit(child), type, *rid);
4590 			goto out;
4591 		}
4592 	} else {
4593 		if (type != SYS_RES_IOPORT) {
4594 			if (bootverbose)
4595 				device_printf(dev,
4596 				    "child %s requested type %d for rid %#x,"
4597 				    " but the BAR says it is an ioport\n",
4598 				    device_get_nameunit(child), type, *rid);
4599 			goto out;
4600 		}
4601 	}
4602 
4603 	/*
4604 	 * For real BARs, we need to override the size that
4605 	 * the driver requests, because that's what the BAR
4606 	 * actually uses and we would otherwise have a
4607 	 * situation where we might allocate the excess to
4608 	 * another driver, which won't work.
4609 	 */
4610 	count = ((pci_addr_t)1 << mapsize) * num;
4611 	if (RF_ALIGNMENT(flags) < mapsize)
4612 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
4613 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
4614 		flags |= RF_PREFETCHABLE;
4615 
4616 	/*
4617 	 * Allocate enough resource, and then write back the
4618 	 * appropriate BAR for that resource.
4619 	 */
4620 	resource_list_add(rl, type, *rid, start, end, count);
4621 	res = resource_list_reserve(rl, dev, child, type, rid, start, end,
4622 	    count, flags & ~RF_ACTIVE);
4623 	if (res == NULL) {
4624 		resource_list_delete(rl, type, *rid);
4625 		device_printf(child,
4626 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4627 		    count, *rid, type, start, end);
4628 		goto out;
4629 	}
4630 	if (bootverbose)
4631 		device_printf(child,
4632 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4633 		    count, *rid, type, rman_get_start(res));
4634 	map = rman_get_start(res);
4635 	pci_write_bar(child, pm, map);
4636 out:
4637 	return (res);
4638 }
4639 
4640 struct resource *
4641 pci_alloc_multi_resource(device_t dev, device_t child, int type, int *rid,
4642     u_long start, u_long end, u_long count, u_long num, u_int flags)
4643 {
4644 	struct pci_devinfo *dinfo;
4645 	struct resource_list *rl;
4646 	struct resource_list_entry *rle;
4647 	struct resource *res;
4648 	pcicfgregs *cfg;
4649 
4650 	/*
4651 	 * Perform lazy resource allocation
4652 	 */
4653 	dinfo = device_get_ivars(child);
4654 	rl = &dinfo->resources;
4655 	cfg = &dinfo->cfg;
4656 	switch (type) {
4657 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
4658 	case PCI_RES_BUS:
4659 		return (pci_alloc_secbus(dev, child, rid, start, end, count,
4660 		    flags));
4661 #endif
4662 	case SYS_RES_IRQ:
4663 		/*
4664 		 * Can't alloc legacy interrupt once MSI messages have
4665 		 * been allocated.
4666 		 */
4667 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4668 		    cfg->msix.msix_alloc > 0))
4669 			return (NULL);
4670 
4671 		/*
4672 		 * If the child device doesn't have an interrupt
4673 		 * routed and is deserving of an interrupt, try to
4674 		 * assign it one.
4675 		 */
4676 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4677 		    (cfg->intpin != 0))
4678 			pci_assign_interrupt(dev, child, 0);
4679 		break;
4680 	case SYS_RES_IOPORT:
4681 	case SYS_RES_MEMORY:
4682 #ifdef NEW_PCIB
4683 		/*
4684 		 * PCI-PCI bridge I/O window resources are not BARs.
4685 		 * For those allocations just pass the request up the
4686 		 * tree.
4687 		 */
4688 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4689 			switch (*rid) {
4690 			case PCIR_IOBASEL_1:
4691 			case PCIR_MEMBASE_1:
4692 			case PCIR_PMBASEL_1:
4693 				/*
4694 				 * XXX: Should we bother creating a resource
4695 				 * list entry?
4696 				 */
4697 				return (bus_generic_alloc_resource(dev, child,
4698 				    type, rid, start, end, count, flags));
4699 			}
4700 		}
4701 #endif
4702 		/* Reserve resources for this BAR if needed. */
4703 		rle = resource_list_find(rl, type, *rid);
4704 		if (rle == NULL) {
4705 			res = pci_reserve_map(dev, child, type, rid, start, end,
4706 			    count, num, flags);
4707 			if (res == NULL)
4708 				return (NULL);
4709 		}
4710 	}
4711 	return (resource_list_alloc(rl, dev, child, type, rid,
4712 	    start, end, count, flags));
4713 }
4714 
4715 struct resource *
4716 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4717     u_long start, u_long end, u_long count, u_int flags)
4718 {
4719 #ifdef PCI_IOV
4720 	struct pci_devinfo *dinfo;
4721 #endif
4722 
4723 	if (device_get_parent(child) != dev)
4724 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4725 		    type, rid, start, end, count, flags));
4726 
4727 #ifdef PCI_IOV
4728 	dinfo = device_get_ivars(child);
4729 	if (dinfo->cfg.flags & PCICFG_VF) {
4730 		switch (type) {
4731 		/* VFs can't have I/O BARs. */
4732 		case SYS_RES_IOPORT:
4733 			return (NULL);
4734 		case SYS_RES_MEMORY:
4735 			return (pci_vf_alloc_mem_resource(dev, child, rid,
4736 			    start, end, count, flags));
4737 		}
4738 
4739 		/* Fall through for other types of resource allocations. */
4740 	}
4741 #endif
4742 
4743 	return (pci_alloc_multi_resource(dev, child, type, rid, start, end,
4744 	    count, 1, flags));
4745 }
4746 
4747 int
4748 pci_release_resource(device_t dev, device_t child, int type, int rid,
4749     struct resource *r)
4750 {
4751 	struct pci_devinfo *dinfo;
4752 	struct resource_list *rl;
4753 	pcicfgregs *cfg;
4754 
4755 	if (device_get_parent(child) != dev)
4756 		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
4757 		    type, rid, r));
4758 
4759 	dinfo = device_get_ivars(child);
4760 	cfg = &dinfo->cfg;
4761 
4762 #ifdef PCI_IOV
4763 	if (dinfo->cfg.flags & PCICFG_VF) {
4764 		switch (type) {
4765 		/* VFs can't have I/O BARs. */
4766 		case SYS_RES_IOPORT:
4767 			return (EDOOFUS);
4768 		case SYS_RES_MEMORY:
4769 			return (pci_vf_release_mem_resource(dev, child, rid,
4770 			    r));
4771 		}
4772 
4773 		/* Fall through for other types of resource allocations. */
4774 	}
4775 #endif
4776 
4777 #ifdef NEW_PCIB
4778 	/*
4779 	 * PCI-PCI bridge I/O window resources are not BARs.  For
4780 	 * those allocations just pass the request up the tree.
4781 	 */
4782 	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
4783 	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
4784 		switch (rid) {
4785 		case PCIR_IOBASEL_1:
4786 		case PCIR_MEMBASE_1:
4787 		case PCIR_PMBASEL_1:
4788 			return (bus_generic_release_resource(dev, child, type,
4789 			    rid, r));
4790 		}
4791 	}
4792 #endif
4793 
4794 	rl = &dinfo->resources;
4795 	return (resource_list_release(rl, dev, child, type, rid, r));
4796 }
4797 
4798 int
4799 pci_activate_resource(device_t dev, device_t child, int type, int rid,
4800     struct resource *r)
4801 {
4802 	struct pci_devinfo *dinfo;
4803 	int error;
4804 
4805 	error = bus_generic_activate_resource(dev, child, type, rid, r);
4806 	if (error)
4807 		return (error);
4808 
4809 	/* Enable decoding in the command register when activating BARs. */
4810 	if (device_get_parent(child) == dev) {
4811 		/* Device ROMs need their decoding explicitly enabled. */
4812 		dinfo = device_get_ivars(child);
4813 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4814 			pci_write_bar(child, pci_find_bar(child, rid),
4815 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4816 		switch (type) {
4817 		case SYS_RES_IOPORT:
4818 		case SYS_RES_MEMORY:
4819 			error = PCI_ENABLE_IO(dev, child, type);
4820 			break;
4821 		}
4822 	}
4823 	return (error);
4824 }
4825 
4826 int
4827 pci_deactivate_resource(device_t dev, device_t child, int type,
4828     int rid, struct resource *r)
4829 {
4830 	struct pci_devinfo *dinfo;
4831 	int error;
4832 
4833 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4834 	if (error)
4835 		return (error);
4836 
4837 	/* Disable decoding for device ROMs. */
4838 	if (device_get_parent(child) == dev) {
4839 		dinfo = device_get_ivars(child);
4840 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4841 			pci_write_bar(child, pci_find_bar(child, rid),
4842 			    rman_get_start(r));
4843 	}
4844 	return (0);
4845 }
4846 
4847 void
4848 pci_delete_child(device_t dev, device_t child)
4849 {
4850 	struct resource_list_entry *rle;
4851 	struct resource_list *rl;
4852 	struct pci_devinfo *dinfo;
4853 
4854 	dinfo = device_get_ivars(child);
4855 	rl = &dinfo->resources;
4856 
4857 	if (device_is_attached(child))
4858 		device_detach(child);
4859 
4860 	/* Turn off access to resources we're about to free */
4861 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4862 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4863 
4864 	/* Free all allocated resources */
4865 	STAILQ_FOREACH(rle, rl, link) {
4866 		if (rle->res) {
4867 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4868 			    resource_list_busy(rl, rle->type, rle->rid)) {
4869 				pci_printf(&dinfo->cfg,
4870 				    "Resource still owned, oops. "
4871 				    "(type=%d, rid=%d, addr=%lx)\n",
4872 				    rle->type, rle->rid,
4873 				    rman_get_start(rle->res));
4874 				bus_release_resource(child, rle->type, rle->rid,
4875 				    rle->res);
4876 			}
4877 			resource_list_unreserve(rl, dev, child, rle->type,
4878 			    rle->rid);
4879 		}
4880 	}
4881 	resource_list_free(rl);
4882 
4883 	device_delete_child(dev, child);
4884 	pci_freecfg(dinfo);
4885 }
4886 
4887 void
4888 pci_delete_resource(device_t dev, device_t child, int type, int rid)
4889 {
4890 	struct pci_devinfo *dinfo;
4891 	struct resource_list *rl;
4892 	struct resource_list_entry *rle;
4893 
4894 	if (device_get_parent(child) != dev)
4895 		return;
4896 
4897 	dinfo = device_get_ivars(child);
4898 	rl = &dinfo->resources;
4899 	rle = resource_list_find(rl, type, rid);
4900 	if (rle == NULL)
4901 		return;
4902 
4903 	if (rle->res) {
4904 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4905 		    resource_list_busy(rl, type, rid)) {
4906 			device_printf(dev, "delete_resource: "
4907 			    "Resource still owned by child, oops. "
4908 			    "(type=%d, rid=%d, addr=%lx)\n",
4909 			    type, rid, rman_get_start(rle->res));
4910 			return;
4911 		}
4912 		resource_list_unreserve(rl, dev, child, type, rid);
4913 	}
4914 	resource_list_delete(rl, type, rid);
4915 }
4916 
4917 struct resource_list *
4918 pci_get_resource_list (device_t dev, device_t child)
4919 {
4920 	struct pci_devinfo *dinfo = device_get_ivars(child);
4921 
4922 	return (&dinfo->resources);
4923 }
4924 
4925 bus_dma_tag_t
4926 pci_get_dma_tag(device_t bus, device_t dev)
4927 {
4928 	struct pci_softc *sc = device_get_softc(bus);
4929 
4930 	return (sc->sc_dma_tag);
4931 }
4932 
4933 uint32_t
4934 pci_read_config_method(device_t dev, device_t child, int reg, int width)
4935 {
4936 	struct pci_devinfo *dinfo = device_get_ivars(child);
4937 	pcicfgregs *cfg = &dinfo->cfg;
4938 
4939 #ifdef PCI_IOV
4940 	/*
4941 	 * SR-IOV VFs don't implement the VID or DID registers, so we have to
4942 	 * emulate them here.
4943 	 */
4944 	if (cfg->flags & PCICFG_VF) {
4945 		if (reg == PCIR_VENDOR) {
4946 			switch (width) {
4947 			case 4:
4948 				return (cfg->device << 16 | cfg->vendor);
4949 			case 2:
4950 				return (cfg->vendor);
4951 			case 1:
4952 				return (cfg->vendor & 0xff);
4953 			default:
4954 				return (0xffffffff);
4955 			}
4956 		} else if (reg == PCIR_DEVICE) {
4957 			switch (width) {
4958 			/* Note that an unaligned 4-byte read is an error. */
4959 			case 2:
4960 				return (cfg->device);
4961 			case 1:
4962 				return (cfg->device & 0xff);
4963 			default:
4964 				return (0xffffffff);
4965 			}
4966 		}
4967 	}
4968 #endif
4969 
4970 	return (PCIB_READ_CONFIG(device_get_parent(dev),
4971 	    cfg->bus, cfg->slot, cfg->func, reg, width));
4972 }
4973 
4974 void
4975 pci_write_config_method(device_t dev, device_t child, int reg,
4976     uint32_t val, int width)
4977 {
4978 	struct pci_devinfo *dinfo = device_get_ivars(child);
4979 	pcicfgregs *cfg = &dinfo->cfg;
4980 
4981 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4982 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4983 }
4984 
4985 int
4986 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4987     size_t buflen)
4988 {
4989 
4990 	snprintf(buf, buflen, "pci%d:%d:%d:%d", pci_get_domain(child),
4991 	    pci_get_bus(child), pci_get_slot(child), pci_get_function(child));
4992 	return (0);
4993 }
4994 
4995 int
4996 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4997     size_t buflen)
4998 {
4999 	struct pci_devinfo *dinfo;
5000 	pcicfgregs *cfg;
5001 
5002 	dinfo = device_get_ivars(child);
5003 	cfg = &dinfo->cfg;
5004 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
5005 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
5006 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
5007 	    cfg->progif);
5008 	return (0);
5009 }
5010 
5011 int
5012 pci_assign_interrupt_method(device_t dev, device_t child)
5013 {
5014 	struct pci_devinfo *dinfo = device_get_ivars(child);
5015 	pcicfgregs *cfg = &dinfo->cfg;
5016 
5017 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
5018 	    cfg->intpin));
5019 }
5020 
5021 static void
5022 pci_lookup(void *arg, const char *name, device_t *dev)
5023 {
5024 	long val;
5025 	char *end;
5026 	int domain, bus, slot, func;
5027 
5028 	if (*dev != NULL)
5029 		return;
5030 
5031 	/*
5032 	 * Accept pciconf-style selectors of either pciD:B:S:F or
5033 	 * pciB:S:F.  In the latter case, the domain is assumed to
5034 	 * be zero.
5035 	 */
5036 	if (strncmp(name, "pci", 3) != 0)
5037 		return;
5038 	val = strtol(name + 3, &end, 10);
5039 	if (val < 0 || val > INT_MAX || *end != ':')
5040 		return;
5041 	domain = val;
5042 	val = strtol(end + 1, &end, 10);
5043 	if (val < 0 || val > INT_MAX || *end != ':')
5044 		return;
5045 	bus = val;
5046 	val = strtol(end + 1, &end, 10);
5047 	if (val < 0 || val > INT_MAX)
5048 		return;
5049 	slot = val;
5050 	if (*end == ':') {
5051 		val = strtol(end + 1, &end, 10);
5052 		if (val < 0 || val > INT_MAX || *end != '\0')
5053 			return;
5054 		func = val;
5055 	} else if (*end == '\0') {
5056 		func = slot;
5057 		slot = bus;
5058 		bus = domain;
5059 		domain = 0;
5060 	} else
5061 		return;
5062 
5063 	if (domain > PCI_DOMAINMAX || bus > PCI_BUSMAX || slot > PCI_SLOTMAX ||
5064 	    func > PCIE_ARI_FUNCMAX || (slot != 0 && func > PCI_FUNCMAX))
5065 		return;
5066 
5067 	*dev = pci_find_dbsf(domain, bus, slot, func);
5068 }
5069 
5070 static int
5071 pci_modevent(module_t mod, int what, void *arg)
5072 {
5073 	static struct cdev *pci_cdev;
5074 	static eventhandler_tag tag;
5075 
5076 	switch (what) {
5077 	case MOD_LOAD:
5078 		STAILQ_INIT(&pci_devq);
5079 		pci_generation = 0;
5080 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
5081 		    "pci");
5082 		pci_load_vendor_data();
5083 		tag = EVENTHANDLER_REGISTER(dev_lookup, pci_lookup, NULL,
5084 		    1000);
5085 		break;
5086 
5087 	case MOD_UNLOAD:
5088 		if (tag != NULL)
5089 			EVENTHANDLER_DEREGISTER(dev_lookup, tag);
5090 		destroy_dev(pci_cdev);
5091 		break;
5092 	}
5093 
5094 	return (0);
5095 }
5096 
5097 static void
5098 pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
5099 {
5100 #define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
5101 	struct pcicfg_pcie *cfg;
5102 	int version, pos;
5103 
5104 	cfg = &dinfo->cfg.pcie;
5105 	pos = cfg->pcie_location;
5106 
5107 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5108 
5109 	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
5110 
5111 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5112 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5113 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5114 		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
5115 
5116 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5117 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5118 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5119 		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
5120 
5121 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5122 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5123 		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
5124 
5125 	if (version > 1) {
5126 		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
5127 		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
5128 		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
5129 	}
5130 #undef WREG
5131 }
5132 
5133 static void
5134 pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
5135 {
5136 	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
5137 	    dinfo->cfg.pcix.pcix_command,  2);
5138 }
5139 
5140 void
5141 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
5142 {
5143 
5144 	/*
5145 	 * Restore the device to full power mode.  We must do this
5146 	 * before we restore the registers because moving from D3 to
5147 	 * D0 will cause the chip's BARs and some other registers to
5148 	 * be reset to some unknown power on reset values.  Cut down
5149 	 * the noise on boot by doing nothing if we are already in
5150 	 * state D0.
5151 	 */
5152 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
5153 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5154 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
5155 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
5156 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
5157 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
5158 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
5159 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
5160 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
5161 	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5162 	case PCIM_HDRTYPE_NORMAL:
5163 		pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
5164 		pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
5165 		break;
5166 	case PCIM_HDRTYPE_BRIDGE:
5167 		pci_write_config(dev, PCIR_SECLAT_1,
5168 		    dinfo->cfg.bridge.br_seclat, 1);
5169 		pci_write_config(dev, PCIR_SUBBUS_1,
5170 		    dinfo->cfg.bridge.br_subbus, 1);
5171 		pci_write_config(dev, PCIR_SECBUS_1,
5172 		    dinfo->cfg.bridge.br_secbus, 1);
5173 		pci_write_config(dev, PCIR_PRIBUS_1,
5174 		    dinfo->cfg.bridge.br_pribus, 1);
5175 		pci_write_config(dev, PCIR_BRIDGECTL_1,
5176 		    dinfo->cfg.bridge.br_control, 2);
5177 		break;
5178 	case PCIM_HDRTYPE_CARDBUS:
5179 		pci_write_config(dev, PCIR_SECLAT_2,
5180 		    dinfo->cfg.bridge.br_seclat, 1);
5181 		pci_write_config(dev, PCIR_SUBBUS_2,
5182 		    dinfo->cfg.bridge.br_subbus, 1);
5183 		pci_write_config(dev, PCIR_SECBUS_2,
5184 		    dinfo->cfg.bridge.br_secbus, 1);
5185 		pci_write_config(dev, PCIR_PRIBUS_2,
5186 		    dinfo->cfg.bridge.br_pribus, 1);
5187 		pci_write_config(dev, PCIR_BRIDGECTL_2,
5188 		    dinfo->cfg.bridge.br_control, 2);
5189 		break;
5190 	}
5191 	pci_restore_bars(dev);
5192 
5193 	/*
5194 	 * Restore extended capabilities for PCI-Express and PCI-X
5195 	 */
5196 	if (dinfo->cfg.pcie.pcie_location != 0)
5197 		pci_cfg_restore_pcie(dev, dinfo);
5198 	if (dinfo->cfg.pcix.pcix_location != 0)
5199 		pci_cfg_restore_pcix(dev, dinfo);
5200 
5201 	/* Restore MSI and MSI-X configurations if they are present. */
5202 	if (dinfo->cfg.msi.msi_location != 0)
5203 		pci_resume_msi(dev);
5204 	if (dinfo->cfg.msix.msix_location != 0)
5205 		pci_resume_msix(dev);
5206 }
5207 
5208 static void
5209 pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
5210 {
5211 #define	RREG(n)	pci_read_config(dev, pos + (n), 2)
5212 	struct pcicfg_pcie *cfg;
5213 	int version, pos;
5214 
5215 	cfg = &dinfo->cfg.pcie;
5216 	pos = cfg->pcie_location;
5217 
5218 	cfg->pcie_flags = RREG(PCIER_FLAGS);
5219 
5220 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5221 
5222 	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
5223 
5224 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5225 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5226 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5227 		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
5228 
5229 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5230 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5231 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5232 		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
5233 
5234 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5235 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5236 		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
5237 
5238 	if (version > 1) {
5239 		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
5240 		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
5241 		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
5242 	}
5243 #undef RREG
5244 }
5245 
5246 static void
5247 pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
5248 {
5249 	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
5250 	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
5251 }
5252 
5253 void
5254 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
5255 {
5256 	uint32_t cls;
5257 	int ps;
5258 
5259 	/*
5260 	 * Some drivers apparently write to these registers w/o updating our
5261 	 * cached copy.  No harm happens if we update the copy, so do so here
5262 	 * so we can restore them.  The COMMAND register is modified by the
5263 	 * bus w/o updating the cache.  This should represent the normally
5264 	 * writable portion of the 'defined' part of type 0/1/2 headers.
5265 	 */
5266 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
5267 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
5268 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
5269 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
5270 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
5271 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
5272 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
5273 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
5274 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
5275 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
5276 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
5277 	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5278 	case PCIM_HDRTYPE_NORMAL:
5279 		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
5280 		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
5281 		dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
5282 		dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
5283 		break;
5284 	case PCIM_HDRTYPE_BRIDGE:
5285 		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
5286 		    PCIR_SECLAT_1, 1);
5287 		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
5288 		    PCIR_SUBBUS_1, 1);
5289 		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
5290 		    PCIR_SECBUS_1, 1);
5291 		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
5292 		    PCIR_PRIBUS_1, 1);
5293 		dinfo->cfg.bridge.br_control = pci_read_config(dev,
5294 		    PCIR_BRIDGECTL_1, 2);
5295 		break;
5296 	case PCIM_HDRTYPE_CARDBUS:
5297 		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
5298 		    PCIR_SECLAT_2, 1);
5299 		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
5300 		    PCIR_SUBBUS_2, 1);
5301 		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
5302 		    PCIR_SECBUS_2, 1);
5303 		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
5304 		    PCIR_PRIBUS_2, 1);
5305 		dinfo->cfg.bridge.br_control = pci_read_config(dev,
5306 		    PCIR_BRIDGECTL_2, 2);
5307 		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_2, 2);
5308 		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_2, 2);
5309 		break;
5310 	}
5311 
5312 	if (dinfo->cfg.pcie.pcie_location != 0)
5313 		pci_cfg_save_pcie(dev, dinfo);
5314 
5315 	if (dinfo->cfg.pcix.pcix_location != 0)
5316 		pci_cfg_save_pcix(dev, dinfo);
5317 
5318 	/*
5319 	 * don't set the state for display devices, base peripherals and
5320 	 * memory devices since bad things happen when they are powered down.
5321 	 * We should (a) have drivers that can easily detach and (b) use
5322 	 * generic drivers for these devices so that some device actually
5323 	 * attaches.  We need to make sure that when we implement (a) we don't
5324 	 * power the device down on a reattach.
5325 	 */
5326 	cls = pci_get_class(dev);
5327 	if (!setstate)
5328 		return;
5329 	switch (pci_do_power_nodriver)
5330 	{
5331 		case 0:		/* NO powerdown at all */
5332 			return;
5333 		case 1:		/* Conservative about what to power down */
5334 			if (cls == PCIC_STORAGE)
5335 				return;
5336 			/*FALLTHROUGH*/
5337 		case 2:		/* Agressive about what to power down */
5338 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
5339 			    cls == PCIC_BASEPERIPH)
5340 				return;
5341 			/*FALLTHROUGH*/
5342 		case 3:		/* Power down everything */
5343 			break;
5344 	}
5345 	/*
5346 	 * PCI spec says we can only go into D3 state from D0 state.
5347 	 * Transition from D[12] into D0 before going to D3 state.
5348 	 */
5349 	ps = pci_get_powerstate(dev);
5350 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
5351 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5352 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
5353 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
5354 }
5355 
5356 /* Wrapper APIs suitable for device driver use. */
5357 void
5358 pci_save_state(device_t dev)
5359 {
5360 	struct pci_devinfo *dinfo;
5361 
5362 	dinfo = device_get_ivars(dev);
5363 	pci_cfg_save(dev, dinfo, 0);
5364 }
5365 
5366 void
5367 pci_restore_state(device_t dev)
5368 {
5369 	struct pci_devinfo *dinfo;
5370 
5371 	dinfo = device_get_ivars(dev);
5372 	pci_cfg_restore(dev, dinfo);
5373 }
5374 
5375 static uint16_t
5376 pci_get_rid_method(device_t dev, device_t child)
5377 {
5378 
5379 	return (PCIB_GET_RID(device_get_parent(dev), child));
5380 }
5381