xref: /freebsd/sys/dev/pci/pci.c (revision 26a222dc0c048fc071b548eadad7b80405a1b126)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/limits.h>
39 #include <sys/linker.h>
40 #include <sys/fcntl.h>
41 #include <sys/conf.h>
42 #include <sys/kernel.h>
43 #include <sys/queue.h>
44 #include <sys/sysctl.h>
45 #include <sys/endian.h>
46 
47 #include <vm/vm.h>
48 #include <vm/pmap.h>
49 #include <vm/vm_extern.h>
50 
51 #include <sys/bus.h>
52 #include <machine/bus.h>
53 #include <sys/rman.h>
54 #include <machine/resource.h>
55 #include <machine/stdarg.h>
56 
57 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
58 #include <machine/intr_machdep.h>
59 #endif
60 
61 #include <sys/pciio.h>
62 #include <dev/pci/pcireg.h>
63 #include <dev/pci/pcivar.h>
64 #include <dev/pci/pci_private.h>
65 
66 #include <dev/usb/controller/xhcireg.h>
67 #include <dev/usb/controller/ehcireg.h>
68 #include <dev/usb/controller/ohcireg.h>
69 #include <dev/usb/controller/uhcireg.h>
70 
71 #include "pcib_if.h"
72 #include "pci_if.h"
73 
74 #define	PCIR_IS_BIOS(cfg, reg)						\
75 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
76 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
77 
78 static int		pci_has_quirk(uint32_t devid, int quirk);
79 static pci_addr_t	pci_mapbase(uint64_t mapreg);
80 static const char	*pci_maptype(uint64_t mapreg);
81 static int		pci_maprange(uint64_t mapreg);
82 static pci_addr_t	pci_rombase(uint64_t mapreg);
83 static int		pci_romsize(uint64_t testval);
84 static void		pci_fixancient(pcicfgregs *cfg);
85 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
86 
87 static int		pci_porten(device_t dev);
88 static int		pci_memen(device_t dev);
89 static void		pci_assign_interrupt(device_t bus, device_t dev,
90 			    int force_route);
91 static int		pci_add_map(device_t bus, device_t dev, int reg,
92 			    struct resource_list *rl, int force, int prefetch);
93 static int		pci_probe(device_t dev);
94 static int		pci_attach(device_t dev);
95 #ifdef PCI_RES_BUS
96 static int		pci_detach(device_t dev);
97 #endif
98 static void		pci_load_vendor_data(void);
99 static int		pci_describe_parse_line(char **ptr, int *vendor,
100 			    int *device, char **desc);
101 static char		*pci_describe_device(device_t dev);
102 static int		pci_modevent(module_t mod, int what, void *arg);
103 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
104 			    pcicfgregs *cfg);
105 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
106 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
107 			    int reg, uint32_t *data);
108 #if 0
109 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
110 			    int reg, uint32_t data);
111 #endif
112 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
113 static void		pci_mask_msix(device_t dev, u_int index);
114 static void		pci_unmask_msix(device_t dev, u_int index);
115 static int		pci_msi_blacklisted(void);
116 static int		pci_msix_blacklisted(void);
117 static void		pci_resume_msi(device_t dev);
118 static void		pci_resume_msix(device_t dev);
119 static int		pci_remap_intr_method(device_t bus, device_t dev,
120 			    u_int irq);
121 
122 static uint16_t		pci_get_rid_method(device_t dev, device_t child);
123 
124 static struct pci_devinfo * pci_fill_devinfo(device_t pcib, int d, int b, int s,
125     int f, uint16_t vid, uint16_t did, size_t size);
126 
127 static device_method_t pci_methods[] = {
128 	/* Device interface */
129 	DEVMETHOD(device_probe,		pci_probe),
130 	DEVMETHOD(device_attach,	pci_attach),
131 #ifdef PCI_RES_BUS
132 	DEVMETHOD(device_detach,	pci_detach),
133 #else
134 	DEVMETHOD(device_detach,	bus_generic_detach),
135 #endif
136 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
137 	DEVMETHOD(device_suspend,	bus_generic_suspend),
138 	DEVMETHOD(device_resume,	pci_resume),
139 
140 	/* Bus interface */
141 	DEVMETHOD(bus_print_child,	pci_print_child),
142 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
143 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
144 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
145 	DEVMETHOD(bus_driver_added,	pci_driver_added),
146 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
147 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
148 
149 	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
150 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
151 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
152 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
153 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
154 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
155 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
156 	DEVMETHOD(bus_release_resource,	pci_release_resource),
157 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
158 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
159 	DEVMETHOD(bus_child_detached,	pci_child_detached),
160 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
161 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
162 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
163 	DEVMETHOD(bus_suspend_child,	pci_suspend_child),
164 	DEVMETHOD(bus_resume_child,	pci_resume_child),
165 
166 	/* PCI interface */
167 	DEVMETHOD(pci_read_config,	pci_read_config_method),
168 	DEVMETHOD(pci_write_config,	pci_write_config_method),
169 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
170 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
171 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
172 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
173 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
174 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
175 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
176 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
177 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
178 	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
179 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
180 	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
181 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
182 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
183 	DEVMETHOD(pci_enable_msi,	pci_enable_msi_method),
184 	DEVMETHOD(pci_enable_msix,	pci_enable_msix_method),
185 	DEVMETHOD(pci_disable_msi,	pci_disable_msi_method),
186 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
187 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
188 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
189 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
190 	DEVMETHOD(pci_get_rid,		pci_get_rid_method),
191 	DEVMETHOD(pci_child_added,	pci_child_added_method),
192 #ifdef PCI_IOV
193 	DEVMETHOD(pci_iov_attach,	pci_iov_attach_method),
194 	DEVMETHOD(pci_iov_detach,	pci_iov_detach_method),
195 	DEVMETHOD(pci_create_iov_child,	pci_create_iov_child_method),
196 #endif
197 
198 	DEVMETHOD_END
199 };
200 
201 DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
202 
203 static devclass_t pci_devclass;
204 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
205 MODULE_VERSION(pci, 1);
206 
207 static char	*pci_vendordata;
208 static size_t	pci_vendordata_size;
209 
210 struct pci_quirk {
211 	uint32_t devid;	/* Vendor/device of the card */
212 	int	type;
213 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
214 #define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
215 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
216 #define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
217 #define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
218 #define	PCI_QUIRK_MSI_INTX_BUG	6 /* PCIM_CMD_INTxDIS disables MSI */
219 	int	arg1;
220 	int	arg2;
221 };
222 
223 static const struct pci_quirk pci_quirks[] = {
224 	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
225 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
226 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
227 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
228 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
229 
230 	/*
231 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
232 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
233 	 */
234 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
235 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
236 
237 	/*
238 	 * MSI doesn't work on earlier Intel chipsets including
239 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
240 	 */
241 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
242 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
243 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
244 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
245 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
246 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
247 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
248 
249 	/*
250 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
251 	 * bridge.
252 	 */
253 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
254 
255 	/*
256 	 * MSI-X allocation doesn't work properly for devices passed through
257 	 * by VMware up to at least ESXi 5.1.
258 	 */
259 	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
260 	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
261 
262 	/*
263 	 * Some virtualization environments emulate an older chipset
264 	 * but support MSI just fine.  QEMU uses the Intel 82440.
265 	 */
266 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
267 
268 	/*
269 	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
270 	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
271 	 * It prevents us from attaching hpet(4) when the bit is unset.
272 	 * Note this quirk only affects SB600 revision A13 and earlier.
273 	 * For SB600 A21 and later, firmware must set the bit to hide it.
274 	 * For SB700 and later, it is unused and hardcoded to zero.
275 	 */
276 	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
277 
278 	/*
279 	 * Atheros AR8161/AR8162/E2200 Ethernet controllers have a bug that
280 	 * MSI interrupt does not assert if PCIM_CMD_INTxDIS bit of the
281 	 * command register is set.
282 	 */
283 	{ 0x10911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
284 	{ 0xE0911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
285 	{ 0x10901969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
286 
287 	/*
288 	 * Broadcom BCM5714(S)/BCM5715(S)/BCM5780(S) Ethernet MACs don't
289 	 * issue MSI interrupts with PCIM_CMD_INTxDIS set either.
290 	 */
291 	{ 0x166814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714 */
292 	{ 0x166914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714S */
293 	{ 0x166a14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780 */
294 	{ 0x166b14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780S */
295 	{ 0x167814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715 */
296 	{ 0x167914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715S */
297 
298 	{ 0 }
299 };
300 
301 /* map register information */
302 #define	PCI_MAPMEM	0x01	/* memory map */
303 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
304 #define	PCI_MAPPORT	0x04	/* port map */
305 
306 struct devlist pci_devq;
307 uint32_t pci_generation;
308 uint32_t pci_numdevs = 0;
309 static int pcie_chipset, pcix_chipset;
310 
311 /* sysctl vars */
312 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
313 
314 static int pci_enable_io_modes = 1;
315 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RWTUN,
316     &pci_enable_io_modes, 1,
317     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
318 enable these bits correctly.  We'd like to do this all the time, but there\n\
319 are some peripherals that this causes problems with.");
320 
321 static int pci_do_realloc_bars = 0;
322 SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RWTUN,
323     &pci_do_realloc_bars, 0,
324     "Attempt to allocate a new range for any BARs whose original "
325     "firmware-assigned ranges fail to allocate during the initial device scan.");
326 
327 static int pci_do_power_nodriver = 0;
328 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RWTUN,
329     &pci_do_power_nodriver, 0,
330   "Place a function into D3 state when no driver attaches to it.  0 means\n\
331 disable.  1 means conservatively place devices into D3 state.  2 means\n\
332 agressively place devices into D3 state.  3 means put absolutely everything\n\
333 in D3 state.");
334 
335 int pci_do_power_resume = 1;
336 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RWTUN,
337     &pci_do_power_resume, 1,
338   "Transition from D3 -> D0 on resume.");
339 
340 int pci_do_power_suspend = 1;
341 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RWTUN,
342     &pci_do_power_suspend, 1,
343   "Transition from D0 -> D3 on suspend.");
344 
345 static int pci_do_msi = 1;
346 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RWTUN, &pci_do_msi, 1,
347     "Enable support for MSI interrupts");
348 
349 static int pci_do_msix = 1;
350 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RWTUN, &pci_do_msix, 1,
351     "Enable support for MSI-X interrupts");
352 
353 static int pci_honor_msi_blacklist = 1;
354 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RDTUN,
355     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
356 
357 #if defined(__i386__) || defined(__amd64__)
358 static int pci_usb_takeover = 1;
359 #else
360 static int pci_usb_takeover = 0;
361 #endif
362 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
363     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
364 Disable this if you depend on BIOS emulation of USB devices, that is\n\
365 you use USB devices (like keyboard or mouse) but do not load USB drivers");
366 
367 static int pci_clear_bars;
368 SYSCTL_INT(_hw_pci, OID_AUTO, clear_bars, CTLFLAG_RDTUN, &pci_clear_bars, 0,
369     "Ignore firmware-assigned resources for BARs.");
370 
371 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
372 static int pci_clear_buses;
373 SYSCTL_INT(_hw_pci, OID_AUTO, clear_buses, CTLFLAG_RDTUN, &pci_clear_buses, 0,
374     "Ignore firmware-assigned bus numbers.");
375 #endif
376 
377 static int pci_enable_ari = 1;
378 SYSCTL_INT(_hw_pci, OID_AUTO, enable_ari, CTLFLAG_RDTUN, &pci_enable_ari,
379     0, "Enable support for PCIe Alternative RID Interpretation");
380 
381 static int
382 pci_has_quirk(uint32_t devid, int quirk)
383 {
384 	const struct pci_quirk *q;
385 
386 	for (q = &pci_quirks[0]; q->devid; q++) {
387 		if (q->devid == devid && q->type == quirk)
388 			return (1);
389 	}
390 	return (0);
391 }
392 
393 /* Find a device_t by bus/slot/function in domain 0 */
394 
395 device_t
396 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
397 {
398 
399 	return (pci_find_dbsf(0, bus, slot, func));
400 }
401 
402 /* Find a device_t by domain/bus/slot/function */
403 
404 device_t
405 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
406 {
407 	struct pci_devinfo *dinfo;
408 
409 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
410 		if ((dinfo->cfg.domain == domain) &&
411 		    (dinfo->cfg.bus == bus) &&
412 		    (dinfo->cfg.slot == slot) &&
413 		    (dinfo->cfg.func == func)) {
414 			return (dinfo->cfg.dev);
415 		}
416 	}
417 
418 	return (NULL);
419 }
420 
421 /* Find a device_t by vendor/device ID */
422 
423 device_t
424 pci_find_device(uint16_t vendor, uint16_t device)
425 {
426 	struct pci_devinfo *dinfo;
427 
428 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
429 		if ((dinfo->cfg.vendor == vendor) &&
430 		    (dinfo->cfg.device == device)) {
431 			return (dinfo->cfg.dev);
432 		}
433 	}
434 
435 	return (NULL);
436 }
437 
438 device_t
439 pci_find_class(uint8_t class, uint8_t subclass)
440 {
441 	struct pci_devinfo *dinfo;
442 
443 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
444 		if (dinfo->cfg.baseclass == class &&
445 		    dinfo->cfg.subclass == subclass) {
446 			return (dinfo->cfg.dev);
447 		}
448 	}
449 
450 	return (NULL);
451 }
452 
453 static int
454 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
455 {
456 	va_list ap;
457 	int retval;
458 
459 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
460 	    cfg->func);
461 	va_start(ap, fmt);
462 	retval += vprintf(fmt, ap);
463 	va_end(ap);
464 	return (retval);
465 }
466 
467 /* return base address of memory or port map */
468 
469 static pci_addr_t
470 pci_mapbase(uint64_t mapreg)
471 {
472 
473 	if (PCI_BAR_MEM(mapreg))
474 		return (mapreg & PCIM_BAR_MEM_BASE);
475 	else
476 		return (mapreg & PCIM_BAR_IO_BASE);
477 }
478 
479 /* return map type of memory or port map */
480 
481 static const char *
482 pci_maptype(uint64_t mapreg)
483 {
484 
485 	if (PCI_BAR_IO(mapreg))
486 		return ("I/O Port");
487 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
488 		return ("Prefetchable Memory");
489 	return ("Memory");
490 }
491 
492 /* return log2 of map size decoded for memory or port map */
493 
494 int
495 pci_mapsize(uint64_t testval)
496 {
497 	int ln2size;
498 
499 	testval = pci_mapbase(testval);
500 	ln2size = 0;
501 	if (testval != 0) {
502 		while ((testval & 1) == 0)
503 		{
504 			ln2size++;
505 			testval >>= 1;
506 		}
507 	}
508 	return (ln2size);
509 }
510 
511 /* return base address of device ROM */
512 
513 static pci_addr_t
514 pci_rombase(uint64_t mapreg)
515 {
516 
517 	return (mapreg & PCIM_BIOS_ADDR_MASK);
518 }
519 
520 /* return log2 of map size decided for device ROM */
521 
522 static int
523 pci_romsize(uint64_t testval)
524 {
525 	int ln2size;
526 
527 	testval = pci_rombase(testval);
528 	ln2size = 0;
529 	if (testval != 0) {
530 		while ((testval & 1) == 0)
531 		{
532 			ln2size++;
533 			testval >>= 1;
534 		}
535 	}
536 	return (ln2size);
537 }
538 
539 /* return log2 of address range supported by map register */
540 
541 static int
542 pci_maprange(uint64_t mapreg)
543 {
544 	int ln2range = 0;
545 
546 	if (PCI_BAR_IO(mapreg))
547 		ln2range = 32;
548 	else
549 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
550 		case PCIM_BAR_MEM_32:
551 			ln2range = 32;
552 			break;
553 		case PCIM_BAR_MEM_1MB:
554 			ln2range = 20;
555 			break;
556 		case PCIM_BAR_MEM_64:
557 			ln2range = 64;
558 			break;
559 		}
560 	return (ln2range);
561 }
562 
563 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
564 
565 static void
566 pci_fixancient(pcicfgregs *cfg)
567 {
568 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
569 		return;
570 
571 	/* PCI to PCI bridges use header type 1 */
572 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
573 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
574 }
575 
576 /* extract header type specific config data */
577 
578 static void
579 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
580 {
581 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
582 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
583 	case PCIM_HDRTYPE_NORMAL:
584 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
585 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
586 		cfg->nummaps	    = PCI_MAXMAPS_0;
587 		break;
588 	case PCIM_HDRTYPE_BRIDGE:
589 		cfg->nummaps	    = PCI_MAXMAPS_1;
590 		break;
591 	case PCIM_HDRTYPE_CARDBUS:
592 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
593 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
594 		cfg->nummaps	    = PCI_MAXMAPS_2;
595 		break;
596 	}
597 #undef REG
598 }
599 
600 /* read configuration header into pcicfgregs structure */
601 struct pci_devinfo *
602 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
603 {
604 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
605 	uint16_t vid, did;
606 
607 	vid = REG(PCIR_VENDOR, 2);
608 	did = REG(PCIR_DEVICE, 2);
609 	if (vid != 0xffff)
610 		return (pci_fill_devinfo(pcib, d, b, s, f, vid, did, size));
611 
612 	return (NULL);
613 }
614 
615 static struct pci_devinfo *
616 pci_fill_devinfo(device_t pcib, int d, int b, int s, int f, uint16_t vid,
617     uint16_t did, size_t size)
618 {
619 	struct pci_devinfo *devlist_entry;
620 	pcicfgregs *cfg;
621 
622 	devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
623 
624 	cfg = &devlist_entry->cfg;
625 
626 	cfg->domain		= d;
627 	cfg->bus		= b;
628 	cfg->slot		= s;
629 	cfg->func		= f;
630 	cfg->vendor		= vid;
631 	cfg->device		= did;
632 	cfg->cmdreg		= REG(PCIR_COMMAND, 2);
633 	cfg->statreg		= REG(PCIR_STATUS, 2);
634 	cfg->baseclass		= REG(PCIR_CLASS, 1);
635 	cfg->subclass		= REG(PCIR_SUBCLASS, 1);
636 	cfg->progif		= REG(PCIR_PROGIF, 1);
637 	cfg->revid		= REG(PCIR_REVID, 1);
638 	cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
639 	cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
640 	cfg->lattimer		= REG(PCIR_LATTIMER, 1);
641 	cfg->intpin		= REG(PCIR_INTPIN, 1);
642 	cfg->intline		= REG(PCIR_INTLINE, 1);
643 
644 	cfg->mingnt		= REG(PCIR_MINGNT, 1);
645 	cfg->maxlat		= REG(PCIR_MAXLAT, 1);
646 
647 	cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
648 	cfg->hdrtype		&= ~PCIM_MFDEV;
649 	STAILQ_INIT(&cfg->maps);
650 
651 	cfg->devinfo_size	= size;
652 	cfg->iov		= NULL;
653 
654 	pci_fixancient(cfg);
655 	pci_hdrtypedata(pcib, b, s, f, cfg);
656 
657 	if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
658 		pci_read_cap(pcib, cfg);
659 
660 	STAILQ_INSERT_TAIL(&pci_devq, devlist_entry, pci_links);
661 
662 	devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
663 	devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
664 	devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
665 	devlist_entry->conf.pc_sel.pc_func = cfg->func;
666 	devlist_entry->conf.pc_hdr = cfg->hdrtype;
667 
668 	devlist_entry->conf.pc_subvendor = cfg->subvendor;
669 	devlist_entry->conf.pc_subdevice = cfg->subdevice;
670 	devlist_entry->conf.pc_vendor = cfg->vendor;
671 	devlist_entry->conf.pc_device = cfg->device;
672 
673 	devlist_entry->conf.pc_class = cfg->baseclass;
674 	devlist_entry->conf.pc_subclass = cfg->subclass;
675 	devlist_entry->conf.pc_progif = cfg->progif;
676 	devlist_entry->conf.pc_revid = cfg->revid;
677 
678 	pci_numdevs++;
679 	pci_generation++;
680 
681 	return (devlist_entry);
682 }
683 #undef REG
684 
685 static void
686 pci_read_cap(device_t pcib, pcicfgregs *cfg)
687 {
688 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
689 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
690 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
691 	uint64_t addr;
692 #endif
693 	uint32_t val;
694 	int	ptr, nextptr, ptrptr;
695 
696 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
697 	case PCIM_HDRTYPE_NORMAL:
698 	case PCIM_HDRTYPE_BRIDGE:
699 		ptrptr = PCIR_CAP_PTR;
700 		break;
701 	case PCIM_HDRTYPE_CARDBUS:
702 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
703 		break;
704 	default:
705 		return;		/* no extended capabilities support */
706 	}
707 	nextptr = REG(ptrptr, 1);	/* sanity check? */
708 
709 	/*
710 	 * Read capability entries.
711 	 */
712 	while (nextptr != 0) {
713 		/* Sanity check */
714 		if (nextptr > 255) {
715 			printf("illegal PCI extended capability offset %d\n",
716 			    nextptr);
717 			return;
718 		}
719 		/* Find the next entry */
720 		ptr = nextptr;
721 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
722 
723 		/* Process this entry */
724 		switch (REG(ptr + PCICAP_ID, 1)) {
725 		case PCIY_PMG:		/* PCI power management */
726 			if (cfg->pp.pp_cap == 0) {
727 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
728 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
729 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
730 				if ((nextptr - ptr) > PCIR_POWER_DATA)
731 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
732 			}
733 			break;
734 		case PCIY_HT:		/* HyperTransport */
735 			/* Determine HT-specific capability type. */
736 			val = REG(ptr + PCIR_HT_COMMAND, 2);
737 
738 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
739 				cfg->ht.ht_slave = ptr;
740 
741 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
742 			switch (val & PCIM_HTCMD_CAP_MASK) {
743 			case PCIM_HTCAP_MSI_MAPPING:
744 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
745 					/* Sanity check the mapping window. */
746 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
747 					    4);
748 					addr <<= 32;
749 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
750 					    4);
751 					if (addr != MSI_INTEL_ADDR_BASE)
752 						device_printf(pcib,
753 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
754 						    cfg->domain, cfg->bus,
755 						    cfg->slot, cfg->func,
756 						    (long long)addr);
757 				} else
758 					addr = MSI_INTEL_ADDR_BASE;
759 
760 				cfg->ht.ht_msimap = ptr;
761 				cfg->ht.ht_msictrl = val;
762 				cfg->ht.ht_msiaddr = addr;
763 				break;
764 			}
765 #endif
766 			break;
767 		case PCIY_MSI:		/* PCI MSI */
768 			cfg->msi.msi_location = ptr;
769 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
770 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
771 						     PCIM_MSICTRL_MMC_MASK)>>1);
772 			break;
773 		case PCIY_MSIX:		/* PCI MSI-X */
774 			cfg->msix.msix_location = ptr;
775 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
776 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
777 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
778 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
779 			cfg->msix.msix_table_bar = PCIR_BAR(val &
780 			    PCIM_MSIX_BIR_MASK);
781 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
782 			val = REG(ptr + PCIR_MSIX_PBA, 4);
783 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
784 			    PCIM_MSIX_BIR_MASK);
785 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
786 			break;
787 		case PCIY_VPD:		/* PCI Vital Product Data */
788 			cfg->vpd.vpd_reg = ptr;
789 			break;
790 		case PCIY_SUBVENDOR:
791 			/* Should always be true. */
792 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
793 			    PCIM_HDRTYPE_BRIDGE) {
794 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
795 				cfg->subvendor = val & 0xffff;
796 				cfg->subdevice = val >> 16;
797 			}
798 			break;
799 		case PCIY_PCIX:		/* PCI-X */
800 			/*
801 			 * Assume we have a PCI-X chipset if we have
802 			 * at least one PCI-PCI bridge with a PCI-X
803 			 * capability.  Note that some systems with
804 			 * PCI-express or HT chipsets might match on
805 			 * this check as well.
806 			 */
807 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
808 			    PCIM_HDRTYPE_BRIDGE)
809 				pcix_chipset = 1;
810 			cfg->pcix.pcix_location = ptr;
811 			break;
812 		case PCIY_EXPRESS:	/* PCI-express */
813 			/*
814 			 * Assume we have a PCI-express chipset if we have
815 			 * at least one PCI-express device.
816 			 */
817 			pcie_chipset = 1;
818 			cfg->pcie.pcie_location = ptr;
819 			val = REG(ptr + PCIER_FLAGS, 2);
820 			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
821 			break;
822 		default:
823 			break;
824 		}
825 	}
826 
827 #if defined(__powerpc__)
828 	/*
829 	 * Enable the MSI mapping window for all HyperTransport
830 	 * slaves.  PCI-PCI bridges have their windows enabled via
831 	 * PCIB_MAP_MSI().
832 	 */
833 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
834 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
835 		device_printf(pcib,
836 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
837 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
838 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
839 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
840 		     2);
841 	}
842 #endif
843 /* REG and WREG use carry through to next functions */
844 }
845 
846 /*
847  * PCI Vital Product Data
848  */
849 
850 #define	PCI_VPD_TIMEOUT		1000000
851 
852 static int
853 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
854 {
855 	int count = PCI_VPD_TIMEOUT;
856 
857 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
858 
859 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
860 
861 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
862 		if (--count < 0)
863 			return (ENXIO);
864 		DELAY(1);	/* limit looping */
865 	}
866 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
867 
868 	return (0);
869 }
870 
871 #if 0
872 static int
873 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
874 {
875 	int count = PCI_VPD_TIMEOUT;
876 
877 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
878 
879 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
880 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
881 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
882 		if (--count < 0)
883 			return (ENXIO);
884 		DELAY(1);	/* limit looping */
885 	}
886 
887 	return (0);
888 }
889 #endif
890 
891 #undef PCI_VPD_TIMEOUT
892 
893 struct vpd_readstate {
894 	device_t	pcib;
895 	pcicfgregs	*cfg;
896 	uint32_t	val;
897 	int		bytesinval;
898 	int		off;
899 	uint8_t		cksum;
900 };
901 
902 static int
903 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
904 {
905 	uint32_t reg;
906 	uint8_t byte;
907 
908 	if (vrs->bytesinval == 0) {
909 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
910 			return (ENXIO);
911 		vrs->val = le32toh(reg);
912 		vrs->off += 4;
913 		byte = vrs->val & 0xff;
914 		vrs->bytesinval = 3;
915 	} else {
916 		vrs->val = vrs->val >> 8;
917 		byte = vrs->val & 0xff;
918 		vrs->bytesinval--;
919 	}
920 
921 	vrs->cksum += byte;
922 	*data = byte;
923 	return (0);
924 }
925 
926 static void
927 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
928 {
929 	struct vpd_readstate vrs;
930 	int state;
931 	int name;
932 	int remain;
933 	int i;
934 	int alloc, off;		/* alloc/off for RO/W arrays */
935 	int cksumvalid;
936 	int dflen;
937 	uint8_t byte;
938 	uint8_t byte2;
939 
940 	/* init vpd reader */
941 	vrs.bytesinval = 0;
942 	vrs.off = 0;
943 	vrs.pcib = pcib;
944 	vrs.cfg = cfg;
945 	vrs.cksum = 0;
946 
947 	state = 0;
948 	name = remain = i = 0;	/* shut up stupid gcc */
949 	alloc = off = 0;	/* shut up stupid gcc */
950 	dflen = 0;		/* shut up stupid gcc */
951 	cksumvalid = -1;
952 	while (state >= 0) {
953 		if (vpd_nextbyte(&vrs, &byte)) {
954 			state = -2;
955 			break;
956 		}
957 #if 0
958 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
959 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
960 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
961 #endif
962 		switch (state) {
963 		case 0:		/* item name */
964 			if (byte & 0x80) {
965 				if (vpd_nextbyte(&vrs, &byte2)) {
966 					state = -2;
967 					break;
968 				}
969 				remain = byte2;
970 				if (vpd_nextbyte(&vrs, &byte2)) {
971 					state = -2;
972 					break;
973 				}
974 				remain |= byte2 << 8;
975 				if (remain > (0x7f*4 - vrs.off)) {
976 					state = -1;
977 					pci_printf(cfg,
978 					    "invalid VPD data, remain %#x\n",
979 					    remain);
980 				}
981 				name = byte & 0x7f;
982 			} else {
983 				remain = byte & 0x7;
984 				name = (byte >> 3) & 0xf;
985 			}
986 			switch (name) {
987 			case 0x2:	/* String */
988 				cfg->vpd.vpd_ident = malloc(remain + 1,
989 				    M_DEVBUF, M_WAITOK);
990 				i = 0;
991 				state = 1;
992 				break;
993 			case 0xf:	/* End */
994 				state = -1;
995 				break;
996 			case 0x10:	/* VPD-R */
997 				alloc = 8;
998 				off = 0;
999 				cfg->vpd.vpd_ros = malloc(alloc *
1000 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1001 				    M_WAITOK | M_ZERO);
1002 				state = 2;
1003 				break;
1004 			case 0x11:	/* VPD-W */
1005 				alloc = 8;
1006 				off = 0;
1007 				cfg->vpd.vpd_w = malloc(alloc *
1008 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1009 				    M_WAITOK | M_ZERO);
1010 				state = 5;
1011 				break;
1012 			default:	/* Invalid data, abort */
1013 				state = -1;
1014 				break;
1015 			}
1016 			break;
1017 
1018 		case 1:	/* Identifier String */
1019 			cfg->vpd.vpd_ident[i++] = byte;
1020 			remain--;
1021 			if (remain == 0)  {
1022 				cfg->vpd.vpd_ident[i] = '\0';
1023 				state = 0;
1024 			}
1025 			break;
1026 
1027 		case 2:	/* VPD-R Keyword Header */
1028 			if (off == alloc) {
1029 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1030 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1031 				    M_DEVBUF, M_WAITOK | M_ZERO);
1032 			}
1033 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1034 			if (vpd_nextbyte(&vrs, &byte2)) {
1035 				state = -2;
1036 				break;
1037 			}
1038 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1039 			if (vpd_nextbyte(&vrs, &byte2)) {
1040 				state = -2;
1041 				break;
1042 			}
1043 			cfg->vpd.vpd_ros[off].len = dflen = byte2;
1044 			if (dflen == 0 &&
1045 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1046 			    2) == 0) {
1047 				/*
1048 				 * if this happens, we can't trust the rest
1049 				 * of the VPD.
1050 				 */
1051 				pci_printf(cfg, "bad keyword length: %d\n",
1052 				    dflen);
1053 				cksumvalid = 0;
1054 				state = -1;
1055 				break;
1056 			} else if (dflen == 0) {
1057 				cfg->vpd.vpd_ros[off].value = malloc(1 *
1058 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1059 				    M_DEVBUF, M_WAITOK);
1060 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1061 			} else
1062 				cfg->vpd.vpd_ros[off].value = malloc(
1063 				    (dflen + 1) *
1064 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1065 				    M_DEVBUF, M_WAITOK);
1066 			remain -= 3;
1067 			i = 0;
1068 			/* keep in sync w/ state 3's transistions */
1069 			if (dflen == 0 && remain == 0)
1070 				state = 0;
1071 			else if (dflen == 0)
1072 				state = 2;
1073 			else
1074 				state = 3;
1075 			break;
1076 
1077 		case 3:	/* VPD-R Keyword Value */
1078 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1079 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1080 			    "RV", 2) == 0 && cksumvalid == -1) {
1081 				if (vrs.cksum == 0)
1082 					cksumvalid = 1;
1083 				else {
1084 					if (bootverbose)
1085 						pci_printf(cfg,
1086 					    "bad VPD cksum, remain %hhu\n",
1087 						    vrs.cksum);
1088 					cksumvalid = 0;
1089 					state = -1;
1090 					break;
1091 				}
1092 			}
1093 			dflen--;
1094 			remain--;
1095 			/* keep in sync w/ state 2's transistions */
1096 			if (dflen == 0)
1097 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1098 			if (dflen == 0 && remain == 0) {
1099 				cfg->vpd.vpd_rocnt = off;
1100 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1101 				    off * sizeof(*cfg->vpd.vpd_ros),
1102 				    M_DEVBUF, M_WAITOK | M_ZERO);
1103 				state = 0;
1104 			} else if (dflen == 0)
1105 				state = 2;
1106 			break;
1107 
1108 		case 4:
1109 			remain--;
1110 			if (remain == 0)
1111 				state = 0;
1112 			break;
1113 
1114 		case 5:	/* VPD-W Keyword Header */
1115 			if (off == alloc) {
1116 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1117 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1118 				    M_DEVBUF, M_WAITOK | M_ZERO);
1119 			}
1120 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1121 			if (vpd_nextbyte(&vrs, &byte2)) {
1122 				state = -2;
1123 				break;
1124 			}
1125 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1126 			if (vpd_nextbyte(&vrs, &byte2)) {
1127 				state = -2;
1128 				break;
1129 			}
1130 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1131 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1132 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1133 			    sizeof(*cfg->vpd.vpd_w[off].value),
1134 			    M_DEVBUF, M_WAITOK);
1135 			remain -= 3;
1136 			i = 0;
1137 			/* keep in sync w/ state 6's transistions */
1138 			if (dflen == 0 && remain == 0)
1139 				state = 0;
1140 			else if (dflen == 0)
1141 				state = 5;
1142 			else
1143 				state = 6;
1144 			break;
1145 
1146 		case 6:	/* VPD-W Keyword Value */
1147 			cfg->vpd.vpd_w[off].value[i++] = byte;
1148 			dflen--;
1149 			remain--;
1150 			/* keep in sync w/ state 5's transistions */
1151 			if (dflen == 0)
1152 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1153 			if (dflen == 0 && remain == 0) {
1154 				cfg->vpd.vpd_wcnt = off;
1155 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1156 				    off * sizeof(*cfg->vpd.vpd_w),
1157 				    M_DEVBUF, M_WAITOK | M_ZERO);
1158 				state = 0;
1159 			} else if (dflen == 0)
1160 				state = 5;
1161 			break;
1162 
1163 		default:
1164 			pci_printf(cfg, "invalid state: %d\n", state);
1165 			state = -1;
1166 			break;
1167 		}
1168 	}
1169 
1170 	if (cksumvalid == 0 || state < -1) {
1171 		/* read-only data bad, clean up */
1172 		if (cfg->vpd.vpd_ros != NULL) {
1173 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1174 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1175 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1176 			cfg->vpd.vpd_ros = NULL;
1177 		}
1178 	}
1179 	if (state < -1) {
1180 		/* I/O error, clean up */
1181 		pci_printf(cfg, "failed to read VPD data.\n");
1182 		if (cfg->vpd.vpd_ident != NULL) {
1183 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1184 			cfg->vpd.vpd_ident = NULL;
1185 		}
1186 		if (cfg->vpd.vpd_w != NULL) {
1187 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1188 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1189 			free(cfg->vpd.vpd_w, M_DEVBUF);
1190 			cfg->vpd.vpd_w = NULL;
1191 		}
1192 	}
1193 	cfg->vpd.vpd_cached = 1;
1194 #undef REG
1195 #undef WREG
1196 }
1197 
1198 int
1199 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1200 {
1201 	struct pci_devinfo *dinfo = device_get_ivars(child);
1202 	pcicfgregs *cfg = &dinfo->cfg;
1203 
1204 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1205 		pci_read_vpd(device_get_parent(dev), cfg);
1206 
1207 	*identptr = cfg->vpd.vpd_ident;
1208 
1209 	if (*identptr == NULL)
1210 		return (ENXIO);
1211 
1212 	return (0);
1213 }
1214 
1215 int
1216 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1217 	const char **vptr)
1218 {
1219 	struct pci_devinfo *dinfo = device_get_ivars(child);
1220 	pcicfgregs *cfg = &dinfo->cfg;
1221 	int i;
1222 
1223 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1224 		pci_read_vpd(device_get_parent(dev), cfg);
1225 
1226 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1227 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1228 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1229 			*vptr = cfg->vpd.vpd_ros[i].value;
1230 			return (0);
1231 		}
1232 
1233 	*vptr = NULL;
1234 	return (ENXIO);
1235 }
1236 
1237 struct pcicfg_vpd *
1238 pci_fetch_vpd_list(device_t dev)
1239 {
1240 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1241 	pcicfgregs *cfg = &dinfo->cfg;
1242 
1243 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1244 		pci_read_vpd(device_get_parent(device_get_parent(dev)), cfg);
1245 	return (&cfg->vpd);
1246 }
1247 
1248 /*
1249  * Find the requested HyperTransport capability and return the offset
1250  * in configuration space via the pointer provided.  The function
1251  * returns 0 on success and an error code otherwise.
1252  */
1253 int
1254 pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1255 {
1256 	int ptr, error;
1257 	uint16_t val;
1258 
1259 	error = pci_find_cap(child, PCIY_HT, &ptr);
1260 	if (error)
1261 		return (error);
1262 
1263 	/*
1264 	 * Traverse the capabilities list checking each HT capability
1265 	 * to see if it matches the requested HT capability.
1266 	 */
1267 	while (ptr != 0) {
1268 		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1269 		if (capability == PCIM_HTCAP_SLAVE ||
1270 		    capability == PCIM_HTCAP_HOST)
1271 			val &= 0xe000;
1272 		else
1273 			val &= PCIM_HTCMD_CAP_MASK;
1274 		if (val == capability) {
1275 			if (capreg != NULL)
1276 				*capreg = ptr;
1277 			return (0);
1278 		}
1279 
1280 		/* Skip to the next HT capability. */
1281 		while (ptr != 0) {
1282 			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1283 			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1284 			    PCIY_HT)
1285 				break;
1286 		}
1287 	}
1288 	return (ENOENT);
1289 }
1290 
1291 /*
1292  * Find the requested capability and return the offset in
1293  * configuration space via the pointer provided.  The function returns
1294  * 0 on success and an error code otherwise.
1295  */
1296 int
1297 pci_find_cap_method(device_t dev, device_t child, int capability,
1298     int *capreg)
1299 {
1300 	struct pci_devinfo *dinfo = device_get_ivars(child);
1301 	pcicfgregs *cfg = &dinfo->cfg;
1302 	u_int32_t status;
1303 	u_int8_t ptr;
1304 
1305 	/*
1306 	 * Check the CAP_LIST bit of the PCI status register first.
1307 	 */
1308 	status = pci_read_config(child, PCIR_STATUS, 2);
1309 	if (!(status & PCIM_STATUS_CAPPRESENT))
1310 		return (ENXIO);
1311 
1312 	/*
1313 	 * Determine the start pointer of the capabilities list.
1314 	 */
1315 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1316 	case PCIM_HDRTYPE_NORMAL:
1317 	case PCIM_HDRTYPE_BRIDGE:
1318 		ptr = PCIR_CAP_PTR;
1319 		break;
1320 	case PCIM_HDRTYPE_CARDBUS:
1321 		ptr = PCIR_CAP_PTR_2;
1322 		break;
1323 	default:
1324 		/* XXX: panic? */
1325 		return (ENXIO);		/* no extended capabilities support */
1326 	}
1327 	ptr = pci_read_config(child, ptr, 1);
1328 
1329 	/*
1330 	 * Traverse the capabilities list.
1331 	 */
1332 	while (ptr != 0) {
1333 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1334 			if (capreg != NULL)
1335 				*capreg = ptr;
1336 			return (0);
1337 		}
1338 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1339 	}
1340 
1341 	return (ENOENT);
1342 }
1343 
1344 /*
1345  * Find the requested extended capability and return the offset in
1346  * configuration space via the pointer provided.  The function returns
1347  * 0 on success and an error code otherwise.
1348  */
1349 int
1350 pci_find_extcap_method(device_t dev, device_t child, int capability,
1351     int *capreg)
1352 {
1353 	struct pci_devinfo *dinfo = device_get_ivars(child);
1354 	pcicfgregs *cfg = &dinfo->cfg;
1355 	uint32_t ecap;
1356 	uint16_t ptr;
1357 
1358 	/* Only supported for PCI-express devices. */
1359 	if (cfg->pcie.pcie_location == 0)
1360 		return (ENXIO);
1361 
1362 	ptr = PCIR_EXTCAP;
1363 	ecap = pci_read_config(child, ptr, 4);
1364 	if (ecap == 0xffffffff || ecap == 0)
1365 		return (ENOENT);
1366 	for (;;) {
1367 		if (PCI_EXTCAP_ID(ecap) == capability) {
1368 			if (capreg != NULL)
1369 				*capreg = ptr;
1370 			return (0);
1371 		}
1372 		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1373 		if (ptr == 0)
1374 			break;
1375 		ecap = pci_read_config(child, ptr, 4);
1376 	}
1377 
1378 	return (ENOENT);
1379 }
1380 
1381 /*
1382  * Support for MSI-X message interrupts.
1383  */
1384 void
1385 pci_enable_msix_method(device_t dev, device_t child, u_int index,
1386     uint64_t address, uint32_t data)
1387 {
1388 	struct pci_devinfo *dinfo = device_get_ivars(child);
1389 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1390 	uint32_t offset;
1391 
1392 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1393 	offset = msix->msix_table_offset + index * 16;
1394 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1395 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1396 	bus_write_4(msix->msix_table_res, offset + 8, data);
1397 
1398 	/* Enable MSI -> HT mapping. */
1399 	pci_ht_map_msi(child, address);
1400 }
1401 
1402 void
1403 pci_mask_msix(device_t dev, u_int index)
1404 {
1405 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1406 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1407 	uint32_t offset, val;
1408 
1409 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1410 	offset = msix->msix_table_offset + index * 16 + 12;
1411 	val = bus_read_4(msix->msix_table_res, offset);
1412 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1413 		val |= PCIM_MSIX_VCTRL_MASK;
1414 		bus_write_4(msix->msix_table_res, offset, val);
1415 	}
1416 }
1417 
1418 void
1419 pci_unmask_msix(device_t dev, u_int index)
1420 {
1421 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1422 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1423 	uint32_t offset, val;
1424 
1425 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1426 	offset = msix->msix_table_offset + index * 16 + 12;
1427 	val = bus_read_4(msix->msix_table_res, offset);
1428 	if (val & PCIM_MSIX_VCTRL_MASK) {
1429 		val &= ~PCIM_MSIX_VCTRL_MASK;
1430 		bus_write_4(msix->msix_table_res, offset, val);
1431 	}
1432 }
1433 
1434 int
1435 pci_pending_msix(device_t dev, u_int index)
1436 {
1437 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1438 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1439 	uint32_t offset, bit;
1440 
1441 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1442 	offset = msix->msix_pba_offset + (index / 32) * 4;
1443 	bit = 1 << index % 32;
1444 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1445 }
1446 
1447 /*
1448  * Restore MSI-X registers and table during resume.  If MSI-X is
1449  * enabled then walk the virtual table to restore the actual MSI-X
1450  * table.
1451  */
1452 static void
1453 pci_resume_msix(device_t dev)
1454 {
1455 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1456 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1457 	struct msix_table_entry *mte;
1458 	struct msix_vector *mv;
1459 	int i;
1460 
1461 	if (msix->msix_alloc > 0) {
1462 		/* First, mask all vectors. */
1463 		for (i = 0; i < msix->msix_msgnum; i++)
1464 			pci_mask_msix(dev, i);
1465 
1466 		/* Second, program any messages with at least one handler. */
1467 		for (i = 0; i < msix->msix_table_len; i++) {
1468 			mte = &msix->msix_table[i];
1469 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1470 				continue;
1471 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1472 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1473 			pci_unmask_msix(dev, i);
1474 		}
1475 	}
1476 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1477 	    msix->msix_ctrl, 2);
1478 }
1479 
1480 /*
1481  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1482  * returned in *count.  After this function returns, each message will be
1483  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1484  */
1485 int
1486 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1487 {
1488 	struct pci_devinfo *dinfo = device_get_ivars(child);
1489 	pcicfgregs *cfg = &dinfo->cfg;
1490 	struct resource_list_entry *rle;
1491 	int actual, error, i, irq, max;
1492 
1493 	/* Don't let count == 0 get us into trouble. */
1494 	if (*count == 0)
1495 		return (EINVAL);
1496 
1497 	/* If rid 0 is allocated, then fail. */
1498 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1499 	if (rle != NULL && rle->res != NULL)
1500 		return (ENXIO);
1501 
1502 	/* Already have allocated messages? */
1503 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1504 		return (ENXIO);
1505 
1506 	/* If MSI-X is blacklisted for this system, fail. */
1507 	if (pci_msix_blacklisted())
1508 		return (ENXIO);
1509 
1510 	/* MSI-X capability present? */
1511 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1512 		return (ENODEV);
1513 
1514 	/* Make sure the appropriate BARs are mapped. */
1515 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1516 	    cfg->msix.msix_table_bar);
1517 	if (rle == NULL || rle->res == NULL ||
1518 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1519 		return (ENXIO);
1520 	cfg->msix.msix_table_res = rle->res;
1521 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1522 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1523 		    cfg->msix.msix_pba_bar);
1524 		if (rle == NULL || rle->res == NULL ||
1525 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1526 			return (ENXIO);
1527 	}
1528 	cfg->msix.msix_pba_res = rle->res;
1529 
1530 	if (bootverbose)
1531 		device_printf(child,
1532 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1533 		    *count, cfg->msix.msix_msgnum);
1534 	max = min(*count, cfg->msix.msix_msgnum);
1535 	for (i = 0; i < max; i++) {
1536 		/* Allocate a message. */
1537 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1538 		if (error) {
1539 			if (i == 0)
1540 				return (error);
1541 			break;
1542 		}
1543 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1544 		    irq, 1);
1545 	}
1546 	actual = i;
1547 
1548 	if (bootverbose) {
1549 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1550 		if (actual == 1)
1551 			device_printf(child, "using IRQ %lu for MSI-X\n",
1552 			    rle->start);
1553 		else {
1554 			int run;
1555 
1556 			/*
1557 			 * Be fancy and try to print contiguous runs of
1558 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1559 			 * 'run' is true if we are in a range.
1560 			 */
1561 			device_printf(child, "using IRQs %lu", rle->start);
1562 			irq = rle->start;
1563 			run = 0;
1564 			for (i = 1; i < actual; i++) {
1565 				rle = resource_list_find(&dinfo->resources,
1566 				    SYS_RES_IRQ, i + 1);
1567 
1568 				/* Still in a run? */
1569 				if (rle->start == irq + 1) {
1570 					run = 1;
1571 					irq++;
1572 					continue;
1573 				}
1574 
1575 				/* Finish previous range. */
1576 				if (run) {
1577 					printf("-%d", irq);
1578 					run = 0;
1579 				}
1580 
1581 				/* Start new range. */
1582 				printf(",%lu", rle->start);
1583 				irq = rle->start;
1584 			}
1585 
1586 			/* Unfinished range? */
1587 			if (run)
1588 				printf("-%d", irq);
1589 			printf(" for MSI-X\n");
1590 		}
1591 	}
1592 
1593 	/* Mask all vectors. */
1594 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1595 		pci_mask_msix(child, i);
1596 
1597 	/* Allocate and initialize vector data and virtual table. */
1598 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1599 	    M_DEVBUF, M_WAITOK | M_ZERO);
1600 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1601 	    M_DEVBUF, M_WAITOK | M_ZERO);
1602 	for (i = 0; i < actual; i++) {
1603 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1604 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1605 		cfg->msix.msix_table[i].mte_vector = i + 1;
1606 	}
1607 
1608 	/* Update control register to enable MSI-X. */
1609 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1610 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1611 	    cfg->msix.msix_ctrl, 2);
1612 
1613 	/* Update counts of alloc'd messages. */
1614 	cfg->msix.msix_alloc = actual;
1615 	cfg->msix.msix_table_len = actual;
1616 	*count = actual;
1617 	return (0);
1618 }
1619 
1620 /*
1621  * By default, pci_alloc_msix() will assign the allocated IRQ
1622  * resources consecutively to the first N messages in the MSI-X table.
1623  * However, device drivers may want to use different layouts if they
1624  * either receive fewer messages than they asked for, or they wish to
1625  * populate the MSI-X table sparsely.  This method allows the driver
1626  * to specify what layout it wants.  It must be called after a
1627  * successful pci_alloc_msix() but before any of the associated
1628  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1629  *
1630  * The 'vectors' array contains 'count' message vectors.  The array
1631  * maps directly to the MSI-X table in that index 0 in the array
1632  * specifies the vector for the first message in the MSI-X table, etc.
1633  * The vector value in each array index can either be 0 to indicate
1634  * that no vector should be assigned to a message slot, or it can be a
1635  * number from 1 to N (where N is the count returned from a
1636  * succcessful call to pci_alloc_msix()) to indicate which message
1637  * vector (IRQ) to be used for the corresponding message.
1638  *
1639  * On successful return, each message with a non-zero vector will have
1640  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1641  * 1.  Additionally, if any of the IRQs allocated via the previous
1642  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1643  * will be freed back to the system automatically.
1644  *
1645  * For example, suppose a driver has a MSI-X table with 6 messages and
1646  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1647  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1648  * C.  After the call to pci_alloc_msix(), the device will be setup to
1649  * have an MSI-X table of ABC--- (where - means no vector assigned).
1650  * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1651  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1652  * be freed back to the system.  This device will also have valid
1653  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1654  *
1655  * In any case, the SYS_RES_IRQ rid X will always map to the message
1656  * at MSI-X table index X - 1 and will only be valid if a vector is
1657  * assigned to that table entry.
1658  */
1659 int
1660 pci_remap_msix_method(device_t dev, device_t child, int count,
1661     const u_int *vectors)
1662 {
1663 	struct pci_devinfo *dinfo = device_get_ivars(child);
1664 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1665 	struct resource_list_entry *rle;
1666 	int i, irq, j, *used;
1667 
1668 	/*
1669 	 * Have to have at least one message in the table but the
1670 	 * table can't be bigger than the actual MSI-X table in the
1671 	 * device.
1672 	 */
1673 	if (count == 0 || count > msix->msix_msgnum)
1674 		return (EINVAL);
1675 
1676 	/* Sanity check the vectors. */
1677 	for (i = 0; i < count; i++)
1678 		if (vectors[i] > msix->msix_alloc)
1679 			return (EINVAL);
1680 
1681 	/*
1682 	 * Make sure there aren't any holes in the vectors to be used.
1683 	 * It's a big pain to support it, and it doesn't really make
1684 	 * sense anyway.  Also, at least one vector must be used.
1685 	 */
1686 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1687 	    M_ZERO);
1688 	for (i = 0; i < count; i++)
1689 		if (vectors[i] != 0)
1690 			used[vectors[i] - 1] = 1;
1691 	for (i = 0; i < msix->msix_alloc - 1; i++)
1692 		if (used[i] == 0 && used[i + 1] == 1) {
1693 			free(used, M_DEVBUF);
1694 			return (EINVAL);
1695 		}
1696 	if (used[0] != 1) {
1697 		free(used, M_DEVBUF);
1698 		return (EINVAL);
1699 	}
1700 
1701 	/* Make sure none of the resources are allocated. */
1702 	for (i = 0; i < msix->msix_table_len; i++) {
1703 		if (msix->msix_table[i].mte_vector == 0)
1704 			continue;
1705 		if (msix->msix_table[i].mte_handlers > 0) {
1706 			free(used, M_DEVBUF);
1707 			return (EBUSY);
1708 		}
1709 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1710 		KASSERT(rle != NULL, ("missing resource"));
1711 		if (rle->res != NULL) {
1712 			free(used, M_DEVBUF);
1713 			return (EBUSY);
1714 		}
1715 	}
1716 
1717 	/* Free the existing resource list entries. */
1718 	for (i = 0; i < msix->msix_table_len; i++) {
1719 		if (msix->msix_table[i].mte_vector == 0)
1720 			continue;
1721 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1722 	}
1723 
1724 	/*
1725 	 * Build the new virtual table keeping track of which vectors are
1726 	 * used.
1727 	 */
1728 	free(msix->msix_table, M_DEVBUF);
1729 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1730 	    M_DEVBUF, M_WAITOK | M_ZERO);
1731 	for (i = 0; i < count; i++)
1732 		msix->msix_table[i].mte_vector = vectors[i];
1733 	msix->msix_table_len = count;
1734 
1735 	/* Free any unused IRQs and resize the vectors array if necessary. */
1736 	j = msix->msix_alloc - 1;
1737 	if (used[j] == 0) {
1738 		struct msix_vector *vec;
1739 
1740 		while (used[j] == 0) {
1741 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1742 			    msix->msix_vectors[j].mv_irq);
1743 			j--;
1744 		}
1745 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1746 		    M_WAITOK);
1747 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1748 		    (j + 1));
1749 		free(msix->msix_vectors, M_DEVBUF);
1750 		msix->msix_vectors = vec;
1751 		msix->msix_alloc = j + 1;
1752 	}
1753 	free(used, M_DEVBUF);
1754 
1755 	/* Map the IRQs onto the rids. */
1756 	for (i = 0; i < count; i++) {
1757 		if (vectors[i] == 0)
1758 			continue;
1759 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1760 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1761 		    irq, 1);
1762 	}
1763 
1764 	if (bootverbose) {
1765 		device_printf(child, "Remapped MSI-X IRQs as: ");
1766 		for (i = 0; i < count; i++) {
1767 			if (i != 0)
1768 				printf(", ");
1769 			if (vectors[i] == 0)
1770 				printf("---");
1771 			else
1772 				printf("%d",
1773 				    msix->msix_vectors[vectors[i]].mv_irq);
1774 		}
1775 		printf("\n");
1776 	}
1777 
1778 	return (0);
1779 }
1780 
1781 static int
1782 pci_release_msix(device_t dev, device_t child)
1783 {
1784 	struct pci_devinfo *dinfo = device_get_ivars(child);
1785 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1786 	struct resource_list_entry *rle;
1787 	int i;
1788 
1789 	/* Do we have any messages to release? */
1790 	if (msix->msix_alloc == 0)
1791 		return (ENODEV);
1792 
1793 	/* Make sure none of the resources are allocated. */
1794 	for (i = 0; i < msix->msix_table_len; i++) {
1795 		if (msix->msix_table[i].mte_vector == 0)
1796 			continue;
1797 		if (msix->msix_table[i].mte_handlers > 0)
1798 			return (EBUSY);
1799 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1800 		KASSERT(rle != NULL, ("missing resource"));
1801 		if (rle->res != NULL)
1802 			return (EBUSY);
1803 	}
1804 
1805 	/* Update control register to disable MSI-X. */
1806 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1807 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1808 	    msix->msix_ctrl, 2);
1809 
1810 	/* Free the resource list entries. */
1811 	for (i = 0; i < msix->msix_table_len; i++) {
1812 		if (msix->msix_table[i].mte_vector == 0)
1813 			continue;
1814 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1815 	}
1816 	free(msix->msix_table, M_DEVBUF);
1817 	msix->msix_table_len = 0;
1818 
1819 	/* Release the IRQs. */
1820 	for (i = 0; i < msix->msix_alloc; i++)
1821 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1822 		    msix->msix_vectors[i].mv_irq);
1823 	free(msix->msix_vectors, M_DEVBUF);
1824 	msix->msix_alloc = 0;
1825 	return (0);
1826 }
1827 
1828 /*
1829  * Return the max supported MSI-X messages this device supports.
1830  * Basically, assuming the MD code can alloc messages, this function
1831  * should return the maximum value that pci_alloc_msix() can return.
1832  * Thus, it is subject to the tunables, etc.
1833  */
1834 int
1835 pci_msix_count_method(device_t dev, device_t child)
1836 {
1837 	struct pci_devinfo *dinfo = device_get_ivars(child);
1838 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1839 
1840 	if (pci_do_msix && msix->msix_location != 0)
1841 		return (msix->msix_msgnum);
1842 	return (0);
1843 }
1844 
1845 /*
1846  * HyperTransport MSI mapping control
1847  */
1848 void
1849 pci_ht_map_msi(device_t dev, uint64_t addr)
1850 {
1851 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1852 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1853 
1854 	if (!ht->ht_msimap)
1855 		return;
1856 
1857 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1858 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1859 		/* Enable MSI -> HT mapping. */
1860 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1861 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1862 		    ht->ht_msictrl, 2);
1863 	}
1864 
1865 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1866 		/* Disable MSI -> HT mapping. */
1867 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1868 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1869 		    ht->ht_msictrl, 2);
1870 	}
1871 }
1872 
1873 int
1874 pci_get_max_read_req(device_t dev)
1875 {
1876 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1877 	int cap;
1878 	uint16_t val;
1879 
1880 	cap = dinfo->cfg.pcie.pcie_location;
1881 	if (cap == 0)
1882 		return (0);
1883 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1884 	val &= PCIEM_CTL_MAX_READ_REQUEST;
1885 	val >>= 12;
1886 	return (1 << (val + 7));
1887 }
1888 
1889 int
1890 pci_set_max_read_req(device_t dev, int size)
1891 {
1892 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1893 	int cap;
1894 	uint16_t val;
1895 
1896 	cap = dinfo->cfg.pcie.pcie_location;
1897 	if (cap == 0)
1898 		return (0);
1899 	if (size < 128)
1900 		size = 128;
1901 	if (size > 4096)
1902 		size = 4096;
1903 	size = (1 << (fls(size) - 1));
1904 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1905 	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
1906 	val |= (fls(size) - 8) << 12;
1907 	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
1908 	return (size);
1909 }
1910 
1911 /*
1912  * Support for MSI message signalled interrupts.
1913  */
1914 void
1915 pci_enable_msi_method(device_t dev, device_t child, uint64_t address,
1916     uint16_t data)
1917 {
1918 	struct pci_devinfo *dinfo = device_get_ivars(child);
1919 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1920 
1921 	/* Write data and address values. */
1922 	pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR,
1923 	    address & 0xffffffff, 4);
1924 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1925 		pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1926 		    address >> 32, 4);
1927 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA_64BIT,
1928 		    data, 2);
1929 	} else
1930 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA, data,
1931 		    2);
1932 
1933 	/* Enable MSI in the control register. */
1934 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1935 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1936 	    msi->msi_ctrl, 2);
1937 
1938 	/* Enable MSI -> HT mapping. */
1939 	pci_ht_map_msi(child, address);
1940 }
1941 
1942 void
1943 pci_disable_msi_method(device_t dev, device_t child)
1944 {
1945 	struct pci_devinfo *dinfo = device_get_ivars(child);
1946 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1947 
1948 	/* Disable MSI -> HT mapping. */
1949 	pci_ht_map_msi(child, 0);
1950 
1951 	/* Disable MSI in the control register. */
1952 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1953 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1954 	    msi->msi_ctrl, 2);
1955 }
1956 
1957 /*
1958  * Restore MSI registers during resume.  If MSI is enabled then
1959  * restore the data and address registers in addition to the control
1960  * register.
1961  */
1962 static void
1963 pci_resume_msi(device_t dev)
1964 {
1965 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1966 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1967 	uint64_t address;
1968 	uint16_t data;
1969 
1970 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1971 		address = msi->msi_addr;
1972 		data = msi->msi_data;
1973 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1974 		    address & 0xffffffff, 4);
1975 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1976 			pci_write_config(dev, msi->msi_location +
1977 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1978 			pci_write_config(dev, msi->msi_location +
1979 			    PCIR_MSI_DATA_64BIT, data, 2);
1980 		} else
1981 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1982 			    data, 2);
1983 	}
1984 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1985 	    2);
1986 }
1987 
1988 static int
1989 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1990 {
1991 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1992 	pcicfgregs *cfg = &dinfo->cfg;
1993 	struct resource_list_entry *rle;
1994 	struct msix_table_entry *mte;
1995 	struct msix_vector *mv;
1996 	uint64_t addr;
1997 	uint32_t data;
1998 	int error, i, j;
1999 
2000 	/*
2001 	 * Handle MSI first.  We try to find this IRQ among our list
2002 	 * of MSI IRQs.  If we find it, we request updated address and
2003 	 * data registers and apply the results.
2004 	 */
2005 	if (cfg->msi.msi_alloc > 0) {
2006 
2007 		/* If we don't have any active handlers, nothing to do. */
2008 		if (cfg->msi.msi_handlers == 0)
2009 			return (0);
2010 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
2011 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
2012 			    i + 1);
2013 			if (rle->start == irq) {
2014 				error = PCIB_MAP_MSI(device_get_parent(bus),
2015 				    dev, irq, &addr, &data);
2016 				if (error)
2017 					return (error);
2018 				pci_disable_msi(dev);
2019 				dinfo->cfg.msi.msi_addr = addr;
2020 				dinfo->cfg.msi.msi_data = data;
2021 				pci_enable_msi(dev, addr, data);
2022 				return (0);
2023 			}
2024 		}
2025 		return (ENOENT);
2026 	}
2027 
2028 	/*
2029 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
2030 	 * we request the updated mapping info.  If that works, we go
2031 	 * through all the slots that use this IRQ and update them.
2032 	 */
2033 	if (cfg->msix.msix_alloc > 0) {
2034 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
2035 			mv = &cfg->msix.msix_vectors[i];
2036 			if (mv->mv_irq == irq) {
2037 				error = PCIB_MAP_MSI(device_get_parent(bus),
2038 				    dev, irq, &addr, &data);
2039 				if (error)
2040 					return (error);
2041 				mv->mv_address = addr;
2042 				mv->mv_data = data;
2043 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
2044 					mte = &cfg->msix.msix_table[j];
2045 					if (mte->mte_vector != i + 1)
2046 						continue;
2047 					if (mte->mte_handlers == 0)
2048 						continue;
2049 					pci_mask_msix(dev, j);
2050 					pci_enable_msix(dev, j, addr, data);
2051 					pci_unmask_msix(dev, j);
2052 				}
2053 			}
2054 		}
2055 		return (ENOENT);
2056 	}
2057 
2058 	return (ENOENT);
2059 }
2060 
2061 /*
2062  * Returns true if the specified device is blacklisted because MSI
2063  * doesn't work.
2064  */
2065 int
2066 pci_msi_device_blacklisted(device_t dev)
2067 {
2068 
2069 	if (!pci_honor_msi_blacklist)
2070 		return (0);
2071 
2072 	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2073 }
2074 
2075 /*
2076  * Determine if MSI is blacklisted globally on this system.  Currently,
2077  * we just check for blacklisted chipsets as represented by the
2078  * host-PCI bridge at device 0:0:0.  In the future, it may become
2079  * necessary to check other system attributes, such as the kenv values
2080  * that give the motherboard manufacturer and model number.
2081  */
2082 static int
2083 pci_msi_blacklisted(void)
2084 {
2085 	device_t dev;
2086 
2087 	if (!pci_honor_msi_blacklist)
2088 		return (0);
2089 
2090 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2091 	if (!(pcie_chipset || pcix_chipset)) {
2092 		if (vm_guest != VM_GUEST_NO) {
2093 			/*
2094 			 * Whitelist older chipsets in virtual
2095 			 * machines known to support MSI.
2096 			 */
2097 			dev = pci_find_bsf(0, 0, 0);
2098 			if (dev != NULL)
2099 				return (!pci_has_quirk(pci_get_devid(dev),
2100 					PCI_QUIRK_ENABLE_MSI_VM));
2101 		}
2102 		return (1);
2103 	}
2104 
2105 	dev = pci_find_bsf(0, 0, 0);
2106 	if (dev != NULL)
2107 		return (pci_msi_device_blacklisted(dev));
2108 	return (0);
2109 }
2110 
2111 /*
2112  * Returns true if the specified device is blacklisted because MSI-X
2113  * doesn't work.  Note that this assumes that if MSI doesn't work,
2114  * MSI-X doesn't either.
2115  */
2116 int
2117 pci_msix_device_blacklisted(device_t dev)
2118 {
2119 
2120 	if (!pci_honor_msi_blacklist)
2121 		return (0);
2122 
2123 	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2124 		return (1);
2125 
2126 	return (pci_msi_device_blacklisted(dev));
2127 }
2128 
2129 /*
2130  * Determine if MSI-X is blacklisted globally on this system.  If MSI
2131  * is blacklisted, assume that MSI-X is as well.  Check for additional
2132  * chipsets where MSI works but MSI-X does not.
2133  */
2134 static int
2135 pci_msix_blacklisted(void)
2136 {
2137 	device_t dev;
2138 
2139 	if (!pci_honor_msi_blacklist)
2140 		return (0);
2141 
2142 	dev = pci_find_bsf(0, 0, 0);
2143 	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2144 	    PCI_QUIRK_DISABLE_MSIX))
2145 		return (1);
2146 
2147 	return (pci_msi_blacklisted());
2148 }
2149 
2150 /*
2151  * Attempt to allocate *count MSI messages.  The actual number allocated is
2152  * returned in *count.  After this function returns, each message will be
2153  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2154  */
2155 int
2156 pci_alloc_msi_method(device_t dev, device_t child, int *count)
2157 {
2158 	struct pci_devinfo *dinfo = device_get_ivars(child);
2159 	pcicfgregs *cfg = &dinfo->cfg;
2160 	struct resource_list_entry *rle;
2161 	int actual, error, i, irqs[32];
2162 	uint16_t ctrl;
2163 
2164 	/* Don't let count == 0 get us into trouble. */
2165 	if (*count == 0)
2166 		return (EINVAL);
2167 
2168 	/* If rid 0 is allocated, then fail. */
2169 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2170 	if (rle != NULL && rle->res != NULL)
2171 		return (ENXIO);
2172 
2173 	/* Already have allocated messages? */
2174 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2175 		return (ENXIO);
2176 
2177 	/* If MSI is blacklisted for this system, fail. */
2178 	if (pci_msi_blacklisted())
2179 		return (ENXIO);
2180 
2181 	/* MSI capability present? */
2182 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2183 		return (ENODEV);
2184 
2185 	if (bootverbose)
2186 		device_printf(child,
2187 		    "attempting to allocate %d MSI vectors (%d supported)\n",
2188 		    *count, cfg->msi.msi_msgnum);
2189 
2190 	/* Don't ask for more than the device supports. */
2191 	actual = min(*count, cfg->msi.msi_msgnum);
2192 
2193 	/* Don't ask for more than 32 messages. */
2194 	actual = min(actual, 32);
2195 
2196 	/* MSI requires power of 2 number of messages. */
2197 	if (!powerof2(actual))
2198 		return (EINVAL);
2199 
2200 	for (;;) {
2201 		/* Try to allocate N messages. */
2202 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2203 		    actual, irqs);
2204 		if (error == 0)
2205 			break;
2206 		if (actual == 1)
2207 			return (error);
2208 
2209 		/* Try N / 2. */
2210 		actual >>= 1;
2211 	}
2212 
2213 	/*
2214 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2215 	 * resources in the irqs[] array, so add new resources
2216 	 * starting at rid 1.
2217 	 */
2218 	for (i = 0; i < actual; i++)
2219 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2220 		    irqs[i], irqs[i], 1);
2221 
2222 	if (bootverbose) {
2223 		if (actual == 1)
2224 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2225 		else {
2226 			int run;
2227 
2228 			/*
2229 			 * Be fancy and try to print contiguous runs
2230 			 * of IRQ values as ranges.  'run' is true if
2231 			 * we are in a range.
2232 			 */
2233 			device_printf(child, "using IRQs %d", irqs[0]);
2234 			run = 0;
2235 			for (i = 1; i < actual; i++) {
2236 
2237 				/* Still in a run? */
2238 				if (irqs[i] == irqs[i - 1] + 1) {
2239 					run = 1;
2240 					continue;
2241 				}
2242 
2243 				/* Finish previous range. */
2244 				if (run) {
2245 					printf("-%d", irqs[i - 1]);
2246 					run = 0;
2247 				}
2248 
2249 				/* Start new range. */
2250 				printf(",%d", irqs[i]);
2251 			}
2252 
2253 			/* Unfinished range? */
2254 			if (run)
2255 				printf("-%d", irqs[actual - 1]);
2256 			printf(" for MSI\n");
2257 		}
2258 	}
2259 
2260 	/* Update control register with actual count. */
2261 	ctrl = cfg->msi.msi_ctrl;
2262 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2263 	ctrl |= (ffs(actual) - 1) << 4;
2264 	cfg->msi.msi_ctrl = ctrl;
2265 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2266 
2267 	/* Update counts of alloc'd messages. */
2268 	cfg->msi.msi_alloc = actual;
2269 	cfg->msi.msi_handlers = 0;
2270 	*count = actual;
2271 	return (0);
2272 }
2273 
2274 /* Release the MSI messages associated with this device. */
2275 int
2276 pci_release_msi_method(device_t dev, device_t child)
2277 {
2278 	struct pci_devinfo *dinfo = device_get_ivars(child);
2279 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2280 	struct resource_list_entry *rle;
2281 	int error, i, irqs[32];
2282 
2283 	/* Try MSI-X first. */
2284 	error = pci_release_msix(dev, child);
2285 	if (error != ENODEV)
2286 		return (error);
2287 
2288 	/* Do we have any messages to release? */
2289 	if (msi->msi_alloc == 0)
2290 		return (ENODEV);
2291 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2292 
2293 	/* Make sure none of the resources are allocated. */
2294 	if (msi->msi_handlers > 0)
2295 		return (EBUSY);
2296 	for (i = 0; i < msi->msi_alloc; i++) {
2297 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2298 		KASSERT(rle != NULL, ("missing MSI resource"));
2299 		if (rle->res != NULL)
2300 			return (EBUSY);
2301 		irqs[i] = rle->start;
2302 	}
2303 
2304 	/* Update control register with 0 count. */
2305 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2306 	    ("%s: MSI still enabled", __func__));
2307 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2308 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2309 	    msi->msi_ctrl, 2);
2310 
2311 	/* Release the messages. */
2312 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2313 	for (i = 0; i < msi->msi_alloc; i++)
2314 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2315 
2316 	/* Update alloc count. */
2317 	msi->msi_alloc = 0;
2318 	msi->msi_addr = 0;
2319 	msi->msi_data = 0;
2320 	return (0);
2321 }
2322 
2323 /*
2324  * Return the max supported MSI messages this device supports.
2325  * Basically, assuming the MD code can alloc messages, this function
2326  * should return the maximum value that pci_alloc_msi() can return.
2327  * Thus, it is subject to the tunables, etc.
2328  */
2329 int
2330 pci_msi_count_method(device_t dev, device_t child)
2331 {
2332 	struct pci_devinfo *dinfo = device_get_ivars(child);
2333 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2334 
2335 	if (pci_do_msi && msi->msi_location != 0)
2336 		return (msi->msi_msgnum);
2337 	return (0);
2338 }
2339 
2340 /* free pcicfgregs structure and all depending data structures */
2341 
2342 int
2343 pci_freecfg(struct pci_devinfo *dinfo)
2344 {
2345 	struct devlist *devlist_head;
2346 	struct pci_map *pm, *next;
2347 	int i;
2348 
2349 	devlist_head = &pci_devq;
2350 
2351 	if (dinfo->cfg.vpd.vpd_reg) {
2352 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2353 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2354 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2355 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2356 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2357 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2358 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2359 	}
2360 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2361 		free(pm, M_DEVBUF);
2362 	}
2363 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2364 	free(dinfo, M_DEVBUF);
2365 
2366 	/* increment the generation count */
2367 	pci_generation++;
2368 
2369 	/* we're losing one device */
2370 	pci_numdevs--;
2371 	return (0);
2372 }
2373 
2374 /*
2375  * PCI power manangement
2376  */
2377 int
2378 pci_set_powerstate_method(device_t dev, device_t child, int state)
2379 {
2380 	struct pci_devinfo *dinfo = device_get_ivars(child);
2381 	pcicfgregs *cfg = &dinfo->cfg;
2382 	uint16_t status;
2383 	int result, oldstate, highest, delay;
2384 
2385 	if (cfg->pp.pp_cap == 0)
2386 		return (EOPNOTSUPP);
2387 
2388 	/*
2389 	 * Optimize a no state change request away.  While it would be OK to
2390 	 * write to the hardware in theory, some devices have shown odd
2391 	 * behavior when going from D3 -> D3.
2392 	 */
2393 	oldstate = pci_get_powerstate(child);
2394 	if (oldstate == state)
2395 		return (0);
2396 
2397 	/*
2398 	 * The PCI power management specification states that after a state
2399 	 * transition between PCI power states, system software must
2400 	 * guarantee a minimal delay before the function accesses the device.
2401 	 * Compute the worst case delay that we need to guarantee before we
2402 	 * access the device.  Many devices will be responsive much more
2403 	 * quickly than this delay, but there are some that don't respond
2404 	 * instantly to state changes.  Transitions to/from D3 state require
2405 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2406 	 * is done below with DELAY rather than a sleeper function because
2407 	 * this function can be called from contexts where we cannot sleep.
2408 	 */
2409 	highest = (oldstate > state) ? oldstate : state;
2410 	if (highest == PCI_POWERSTATE_D3)
2411 	    delay = 10000;
2412 	else if (highest == PCI_POWERSTATE_D2)
2413 	    delay = 200;
2414 	else
2415 	    delay = 0;
2416 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2417 	    & ~PCIM_PSTAT_DMASK;
2418 	result = 0;
2419 	switch (state) {
2420 	case PCI_POWERSTATE_D0:
2421 		status |= PCIM_PSTAT_D0;
2422 		break;
2423 	case PCI_POWERSTATE_D1:
2424 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2425 			return (EOPNOTSUPP);
2426 		status |= PCIM_PSTAT_D1;
2427 		break;
2428 	case PCI_POWERSTATE_D2:
2429 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2430 			return (EOPNOTSUPP);
2431 		status |= PCIM_PSTAT_D2;
2432 		break;
2433 	case PCI_POWERSTATE_D3:
2434 		status |= PCIM_PSTAT_D3;
2435 		break;
2436 	default:
2437 		return (EINVAL);
2438 	}
2439 
2440 	if (bootverbose)
2441 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2442 		    state);
2443 
2444 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2445 	if (delay)
2446 		DELAY(delay);
2447 	return (0);
2448 }
2449 
2450 int
2451 pci_get_powerstate_method(device_t dev, device_t child)
2452 {
2453 	struct pci_devinfo *dinfo = device_get_ivars(child);
2454 	pcicfgregs *cfg = &dinfo->cfg;
2455 	uint16_t status;
2456 	int result;
2457 
2458 	if (cfg->pp.pp_cap != 0) {
2459 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2460 		switch (status & PCIM_PSTAT_DMASK) {
2461 		case PCIM_PSTAT_D0:
2462 			result = PCI_POWERSTATE_D0;
2463 			break;
2464 		case PCIM_PSTAT_D1:
2465 			result = PCI_POWERSTATE_D1;
2466 			break;
2467 		case PCIM_PSTAT_D2:
2468 			result = PCI_POWERSTATE_D2;
2469 			break;
2470 		case PCIM_PSTAT_D3:
2471 			result = PCI_POWERSTATE_D3;
2472 			break;
2473 		default:
2474 			result = PCI_POWERSTATE_UNKNOWN;
2475 			break;
2476 		}
2477 	} else {
2478 		/* No support, device is always at D0 */
2479 		result = PCI_POWERSTATE_D0;
2480 	}
2481 	return (result);
2482 }
2483 
2484 /*
2485  * Some convenience functions for PCI device drivers.
2486  */
2487 
2488 static __inline void
2489 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2490 {
2491 	uint16_t	command;
2492 
2493 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2494 	command |= bit;
2495 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2496 }
2497 
2498 static __inline void
2499 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2500 {
2501 	uint16_t	command;
2502 
2503 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2504 	command &= ~bit;
2505 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2506 }
2507 
2508 int
2509 pci_enable_busmaster_method(device_t dev, device_t child)
2510 {
2511 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2512 	return (0);
2513 }
2514 
2515 int
2516 pci_disable_busmaster_method(device_t dev, device_t child)
2517 {
2518 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2519 	return (0);
2520 }
2521 
2522 int
2523 pci_enable_io_method(device_t dev, device_t child, int space)
2524 {
2525 	uint16_t bit;
2526 
2527 	switch(space) {
2528 	case SYS_RES_IOPORT:
2529 		bit = PCIM_CMD_PORTEN;
2530 		break;
2531 	case SYS_RES_MEMORY:
2532 		bit = PCIM_CMD_MEMEN;
2533 		break;
2534 	default:
2535 		return (EINVAL);
2536 	}
2537 	pci_set_command_bit(dev, child, bit);
2538 	return (0);
2539 }
2540 
2541 int
2542 pci_disable_io_method(device_t dev, device_t child, int space)
2543 {
2544 	uint16_t bit;
2545 
2546 	switch(space) {
2547 	case SYS_RES_IOPORT:
2548 		bit = PCIM_CMD_PORTEN;
2549 		break;
2550 	case SYS_RES_MEMORY:
2551 		bit = PCIM_CMD_MEMEN;
2552 		break;
2553 	default:
2554 		return (EINVAL);
2555 	}
2556 	pci_clear_command_bit(dev, child, bit);
2557 	return (0);
2558 }
2559 
2560 /*
2561  * New style pci driver.  Parent device is either a pci-host-bridge or a
2562  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2563  */
2564 
2565 void
2566 pci_print_verbose(struct pci_devinfo *dinfo)
2567 {
2568 
2569 	if (bootverbose) {
2570 		pcicfgregs *cfg = &dinfo->cfg;
2571 
2572 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2573 		    cfg->vendor, cfg->device, cfg->revid);
2574 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2575 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2576 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2577 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2578 		    cfg->mfdev);
2579 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2580 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2581 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2582 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2583 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2584 		if (cfg->intpin > 0)
2585 			printf("\tintpin=%c, irq=%d\n",
2586 			    cfg->intpin +'a' -1, cfg->intline);
2587 		if (cfg->pp.pp_cap) {
2588 			uint16_t status;
2589 
2590 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2591 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2592 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2593 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2594 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2595 			    status & PCIM_PSTAT_DMASK);
2596 		}
2597 		if (cfg->msi.msi_location) {
2598 			int ctrl;
2599 
2600 			ctrl = cfg->msi.msi_ctrl;
2601 			printf("\tMSI supports %d message%s%s%s\n",
2602 			    cfg->msi.msi_msgnum,
2603 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2604 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2605 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2606 		}
2607 		if (cfg->msix.msix_location) {
2608 			printf("\tMSI-X supports %d message%s ",
2609 			    cfg->msix.msix_msgnum,
2610 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2611 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2612 				printf("in map 0x%x\n",
2613 				    cfg->msix.msix_table_bar);
2614 			else
2615 				printf("in maps 0x%x and 0x%x\n",
2616 				    cfg->msix.msix_table_bar,
2617 				    cfg->msix.msix_pba_bar);
2618 		}
2619 	}
2620 }
2621 
2622 static int
2623 pci_porten(device_t dev)
2624 {
2625 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2626 }
2627 
2628 static int
2629 pci_memen(device_t dev)
2630 {
2631 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2632 }
2633 
2634 void
2635 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp,
2636     int *bar64)
2637 {
2638 	struct pci_devinfo *dinfo;
2639 	pci_addr_t map, testval;
2640 	int ln2range;
2641 	uint16_t cmd;
2642 
2643 	/*
2644 	 * The device ROM BAR is special.  It is always a 32-bit
2645 	 * memory BAR.  Bit 0 is special and should not be set when
2646 	 * sizing the BAR.
2647 	 */
2648 	dinfo = device_get_ivars(dev);
2649 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2650 		map = pci_read_config(dev, reg, 4);
2651 		pci_write_config(dev, reg, 0xfffffffe, 4);
2652 		testval = pci_read_config(dev, reg, 4);
2653 		pci_write_config(dev, reg, map, 4);
2654 		*mapp = map;
2655 		*testvalp = testval;
2656 		if (bar64 != NULL)
2657 			*bar64 = 0;
2658 		return;
2659 	}
2660 
2661 	map = pci_read_config(dev, reg, 4);
2662 	ln2range = pci_maprange(map);
2663 	if (ln2range == 64)
2664 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2665 
2666 	/*
2667 	 * Disable decoding via the command register before
2668 	 * determining the BAR's length since we will be placing it in
2669 	 * a weird state.
2670 	 */
2671 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2672 	pci_write_config(dev, PCIR_COMMAND,
2673 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2674 
2675 	/*
2676 	 * Determine the BAR's length by writing all 1's.  The bottom
2677 	 * log_2(size) bits of the BAR will stick as 0 when we read
2678 	 * the value back.
2679 	 */
2680 	pci_write_config(dev, reg, 0xffffffff, 4);
2681 	testval = pci_read_config(dev, reg, 4);
2682 	if (ln2range == 64) {
2683 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2684 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2685 	}
2686 
2687 	/*
2688 	 * Restore the original value of the BAR.  We may have reprogrammed
2689 	 * the BAR of the low-level console device and when booting verbose,
2690 	 * we need the console device addressable.
2691 	 */
2692 	pci_write_config(dev, reg, map, 4);
2693 	if (ln2range == 64)
2694 		pci_write_config(dev, reg + 4, map >> 32, 4);
2695 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2696 
2697 	*mapp = map;
2698 	*testvalp = testval;
2699 	if (bar64 != NULL)
2700 		*bar64 = (ln2range == 64);
2701 }
2702 
2703 static void
2704 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2705 {
2706 	struct pci_devinfo *dinfo;
2707 	int ln2range;
2708 
2709 	/* The device ROM BAR is always a 32-bit memory BAR. */
2710 	dinfo = device_get_ivars(dev);
2711 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2712 		ln2range = 32;
2713 	else
2714 		ln2range = pci_maprange(pm->pm_value);
2715 	pci_write_config(dev, pm->pm_reg, base, 4);
2716 	if (ln2range == 64)
2717 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2718 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2719 	if (ln2range == 64)
2720 		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2721 		    pm->pm_reg + 4, 4) << 32;
2722 }
2723 
2724 struct pci_map *
2725 pci_find_bar(device_t dev, int reg)
2726 {
2727 	struct pci_devinfo *dinfo;
2728 	struct pci_map *pm;
2729 
2730 	dinfo = device_get_ivars(dev);
2731 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2732 		if (pm->pm_reg == reg)
2733 			return (pm);
2734 	}
2735 	return (NULL);
2736 }
2737 
2738 int
2739 pci_bar_enabled(device_t dev, struct pci_map *pm)
2740 {
2741 	struct pci_devinfo *dinfo;
2742 	uint16_t cmd;
2743 
2744 	dinfo = device_get_ivars(dev);
2745 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2746 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2747 		return (0);
2748 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2749 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2750 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2751 	else
2752 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2753 }
2754 
2755 struct pci_map *
2756 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2757 {
2758 	struct pci_devinfo *dinfo;
2759 	struct pci_map *pm, *prev;
2760 
2761 	dinfo = device_get_ivars(dev);
2762 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2763 	pm->pm_reg = reg;
2764 	pm->pm_value = value;
2765 	pm->pm_size = size;
2766 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2767 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2768 		    reg));
2769 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2770 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2771 			break;
2772 	}
2773 	if (prev != NULL)
2774 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2775 	else
2776 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2777 	return (pm);
2778 }
2779 
2780 static void
2781 pci_restore_bars(device_t dev)
2782 {
2783 	struct pci_devinfo *dinfo;
2784 	struct pci_map *pm;
2785 	int ln2range;
2786 
2787 	dinfo = device_get_ivars(dev);
2788 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2789 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2790 			ln2range = 32;
2791 		else
2792 			ln2range = pci_maprange(pm->pm_value);
2793 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2794 		if (ln2range == 64)
2795 			pci_write_config(dev, pm->pm_reg + 4,
2796 			    pm->pm_value >> 32, 4);
2797 	}
2798 }
2799 
2800 /*
2801  * Add a resource based on a pci map register. Return 1 if the map
2802  * register is a 32bit map register or 2 if it is a 64bit register.
2803  */
2804 static int
2805 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2806     int force, int prefetch)
2807 {
2808 	struct pci_map *pm;
2809 	pci_addr_t base, map, testval;
2810 	pci_addr_t start, end, count;
2811 	int barlen, basezero, flags, maprange, mapsize, type;
2812 	uint16_t cmd;
2813 	struct resource *res;
2814 
2815 	/*
2816 	 * The BAR may already exist if the device is a CardBus card
2817 	 * whose CIS is stored in this BAR.
2818 	 */
2819 	pm = pci_find_bar(dev, reg);
2820 	if (pm != NULL) {
2821 		maprange = pci_maprange(pm->pm_value);
2822 		barlen = maprange == 64 ? 2 : 1;
2823 		return (barlen);
2824 	}
2825 
2826 	pci_read_bar(dev, reg, &map, &testval, NULL);
2827 	if (PCI_BAR_MEM(map)) {
2828 		type = SYS_RES_MEMORY;
2829 		if (map & PCIM_BAR_MEM_PREFETCH)
2830 			prefetch = 1;
2831 	} else
2832 		type = SYS_RES_IOPORT;
2833 	mapsize = pci_mapsize(testval);
2834 	base = pci_mapbase(map);
2835 #ifdef __PCI_BAR_ZERO_VALID
2836 	basezero = 0;
2837 #else
2838 	basezero = base == 0;
2839 #endif
2840 	maprange = pci_maprange(map);
2841 	barlen = maprange == 64 ? 2 : 1;
2842 
2843 	/*
2844 	 * For I/O registers, if bottom bit is set, and the next bit up
2845 	 * isn't clear, we know we have a BAR that doesn't conform to the
2846 	 * spec, so ignore it.  Also, sanity check the size of the data
2847 	 * areas to the type of memory involved.  Memory must be at least
2848 	 * 16 bytes in size, while I/O ranges must be at least 4.
2849 	 */
2850 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2851 		return (barlen);
2852 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2853 	    (type == SYS_RES_IOPORT && mapsize < 2))
2854 		return (barlen);
2855 
2856 	/* Save a record of this BAR. */
2857 	pm = pci_add_bar(dev, reg, map, mapsize);
2858 	if (bootverbose) {
2859 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2860 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2861 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2862 			printf(", port disabled\n");
2863 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2864 			printf(", memory disabled\n");
2865 		else
2866 			printf(", enabled\n");
2867 	}
2868 
2869 	/*
2870 	 * If base is 0, then we have problems if this architecture does
2871 	 * not allow that.  It is best to ignore such entries for the
2872 	 * moment.  These will be allocated later if the driver specifically
2873 	 * requests them.  However, some removable busses look better when
2874 	 * all resources are allocated, so allow '0' to be overriden.
2875 	 *
2876 	 * Similarly treat maps whose values is the same as the test value
2877 	 * read back.  These maps have had all f's written to them by the
2878 	 * BIOS in an attempt to disable the resources.
2879 	 */
2880 	if (!force && (basezero || map == testval))
2881 		return (barlen);
2882 	if ((u_long)base != base) {
2883 		device_printf(bus,
2884 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2885 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2886 		    pci_get_function(dev), reg);
2887 		return (barlen);
2888 	}
2889 
2890 	/*
2891 	 * This code theoretically does the right thing, but has
2892 	 * undesirable side effects in some cases where peripherals
2893 	 * respond oddly to having these bits enabled.  Let the user
2894 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2895 	 * default).
2896 	 */
2897 	if (pci_enable_io_modes) {
2898 		/* Turn on resources that have been left off by a lazy BIOS */
2899 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2900 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2901 			cmd |= PCIM_CMD_PORTEN;
2902 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2903 		}
2904 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2905 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2906 			cmd |= PCIM_CMD_MEMEN;
2907 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2908 		}
2909 	} else {
2910 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2911 			return (barlen);
2912 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2913 			return (barlen);
2914 	}
2915 
2916 	count = (pci_addr_t)1 << mapsize;
2917 	flags = RF_ALIGNMENT_LOG2(mapsize);
2918 	if (prefetch)
2919 		flags |= RF_PREFETCHABLE;
2920 	if (basezero || base == pci_mapbase(testval) || pci_clear_bars) {
2921 		start = 0;	/* Let the parent decide. */
2922 		end = ~0ul;
2923 	} else {
2924 		start = base;
2925 		end = base + count - 1;
2926 	}
2927 	resource_list_add(rl, type, reg, start, end, count);
2928 
2929 	/*
2930 	 * Try to allocate the resource for this BAR from our parent
2931 	 * so that this resource range is already reserved.  The
2932 	 * driver for this device will later inherit this resource in
2933 	 * pci_alloc_resource().
2934 	 */
2935 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2936 	    flags);
2937 	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0ul)) {
2938 		/*
2939 		 * If the allocation fails, try to allocate a resource for
2940 		 * this BAR using any available range.  The firmware felt
2941 		 * it was important enough to assign a resource, so don't
2942 		 * disable decoding if we can help it.
2943 		 */
2944 		resource_list_delete(rl, type, reg);
2945 		resource_list_add(rl, type, reg, 0, ~0ul, count);
2946 		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0ul,
2947 		    count, flags);
2948 	}
2949 	if (res == NULL) {
2950 		/*
2951 		 * If the allocation fails, delete the resource list entry
2952 		 * and disable decoding for this device.
2953 		 *
2954 		 * If the driver requests this resource in the future,
2955 		 * pci_reserve_map() will try to allocate a fresh
2956 		 * resource range.
2957 		 */
2958 		resource_list_delete(rl, type, reg);
2959 		pci_disable_io(dev, type);
2960 		if (bootverbose)
2961 			device_printf(bus,
2962 			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
2963 			    pci_get_domain(dev), pci_get_bus(dev),
2964 			    pci_get_slot(dev), pci_get_function(dev), reg);
2965 	} else {
2966 		start = rman_get_start(res);
2967 		pci_write_bar(dev, pm, start);
2968 	}
2969 	return (barlen);
2970 }
2971 
2972 /*
2973  * For ATA devices we need to decide early what addressing mode to use.
2974  * Legacy demands that the primary and secondary ATA ports sits on the
2975  * same addresses that old ISA hardware did. This dictates that we use
2976  * those addresses and ignore the BAR's if we cannot set PCI native
2977  * addressing mode.
2978  */
2979 static void
2980 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2981     uint32_t prefetchmask)
2982 {
2983 	struct resource *r;
2984 	int rid, type, progif;
2985 #if 0
2986 	/* if this device supports PCI native addressing use it */
2987 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2988 	if ((progif & 0x8a) == 0x8a) {
2989 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2990 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2991 			printf("Trying ATA native PCI addressing mode\n");
2992 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2993 		}
2994 	}
2995 #endif
2996 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2997 	type = SYS_RES_IOPORT;
2998 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2999 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
3000 		    prefetchmask & (1 << 0));
3001 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
3002 		    prefetchmask & (1 << 1));
3003 	} else {
3004 		rid = PCIR_BAR(0);
3005 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
3006 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
3007 		    0x1f7, 8, 0);
3008 		rid = PCIR_BAR(1);
3009 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
3010 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
3011 		    0x3f6, 1, 0);
3012 	}
3013 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
3014 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
3015 		    prefetchmask & (1 << 2));
3016 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
3017 		    prefetchmask & (1 << 3));
3018 	} else {
3019 		rid = PCIR_BAR(2);
3020 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
3021 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
3022 		    0x177, 8, 0);
3023 		rid = PCIR_BAR(3);
3024 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
3025 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
3026 		    0x376, 1, 0);
3027 	}
3028 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
3029 	    prefetchmask & (1 << 4));
3030 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
3031 	    prefetchmask & (1 << 5));
3032 }
3033 
3034 static void
3035 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
3036 {
3037 	struct pci_devinfo *dinfo = device_get_ivars(dev);
3038 	pcicfgregs *cfg = &dinfo->cfg;
3039 	char tunable_name[64];
3040 	int irq;
3041 
3042 	/* Has to have an intpin to have an interrupt. */
3043 	if (cfg->intpin == 0)
3044 		return;
3045 
3046 	/* Let the user override the IRQ with a tunable. */
3047 	irq = PCI_INVALID_IRQ;
3048 	snprintf(tunable_name, sizeof(tunable_name),
3049 	    "hw.pci%d.%d.%d.INT%c.irq",
3050 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
3051 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
3052 		irq = PCI_INVALID_IRQ;
3053 
3054 	/*
3055 	 * If we didn't get an IRQ via the tunable, then we either use the
3056 	 * IRQ value in the intline register or we ask the bus to route an
3057 	 * interrupt for us.  If force_route is true, then we only use the
3058 	 * value in the intline register if the bus was unable to assign an
3059 	 * IRQ.
3060 	 */
3061 	if (!PCI_INTERRUPT_VALID(irq)) {
3062 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
3063 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
3064 		if (!PCI_INTERRUPT_VALID(irq))
3065 			irq = cfg->intline;
3066 	}
3067 
3068 	/* If after all that we don't have an IRQ, just bail. */
3069 	if (!PCI_INTERRUPT_VALID(irq))
3070 		return;
3071 
3072 	/* Update the config register if it changed. */
3073 	if (irq != cfg->intline) {
3074 		cfg->intline = irq;
3075 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
3076 	}
3077 
3078 	/* Add this IRQ as rid 0 interrupt resource. */
3079 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3080 }
3081 
3082 /* Perform early OHCI takeover from SMM. */
3083 static void
3084 ohci_early_takeover(device_t self)
3085 {
3086 	struct resource *res;
3087 	uint32_t ctl;
3088 	int rid;
3089 	int i;
3090 
3091 	rid = PCIR_BAR(0);
3092 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3093 	if (res == NULL)
3094 		return;
3095 
3096 	ctl = bus_read_4(res, OHCI_CONTROL);
3097 	if (ctl & OHCI_IR) {
3098 		if (bootverbose)
3099 			printf("ohci early: "
3100 			    "SMM active, request owner change\n");
3101 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3102 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3103 			DELAY(1000);
3104 			ctl = bus_read_4(res, OHCI_CONTROL);
3105 		}
3106 		if (ctl & OHCI_IR) {
3107 			if (bootverbose)
3108 				printf("ohci early: "
3109 				    "SMM does not respond, resetting\n");
3110 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3111 		}
3112 		/* Disable interrupts */
3113 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3114 	}
3115 
3116 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3117 }
3118 
3119 /* Perform early UHCI takeover from SMM. */
3120 static void
3121 uhci_early_takeover(device_t self)
3122 {
3123 	struct resource *res;
3124 	int rid;
3125 
3126 	/*
3127 	 * Set the PIRQD enable bit and switch off all the others. We don't
3128 	 * want legacy support to interfere with us XXX Does this also mean
3129 	 * that the BIOS won't touch the keyboard anymore if it is connected
3130 	 * to the ports of the root hub?
3131 	 */
3132 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3133 
3134 	/* Disable interrupts */
3135 	rid = PCI_UHCI_BASE_REG;
3136 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3137 	if (res != NULL) {
3138 		bus_write_2(res, UHCI_INTR, 0);
3139 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3140 	}
3141 }
3142 
3143 /* Perform early EHCI takeover from SMM. */
3144 static void
3145 ehci_early_takeover(device_t self)
3146 {
3147 	struct resource *res;
3148 	uint32_t cparams;
3149 	uint32_t eec;
3150 	uint8_t eecp;
3151 	uint8_t bios_sem;
3152 	uint8_t offs;
3153 	int rid;
3154 	int i;
3155 
3156 	rid = PCIR_BAR(0);
3157 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3158 	if (res == NULL)
3159 		return;
3160 
3161 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3162 
3163 	/* Synchronise with the BIOS if it owns the controller. */
3164 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3165 	    eecp = EHCI_EECP_NEXT(eec)) {
3166 		eec = pci_read_config(self, eecp, 4);
3167 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3168 			continue;
3169 		}
3170 		bios_sem = pci_read_config(self, eecp +
3171 		    EHCI_LEGSUP_BIOS_SEM, 1);
3172 		if (bios_sem == 0) {
3173 			continue;
3174 		}
3175 		if (bootverbose)
3176 			printf("ehci early: "
3177 			    "SMM active, request owner change\n");
3178 
3179 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3180 
3181 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3182 			DELAY(1000);
3183 			bios_sem = pci_read_config(self, eecp +
3184 			    EHCI_LEGSUP_BIOS_SEM, 1);
3185 		}
3186 
3187 		if (bios_sem != 0) {
3188 			if (bootverbose)
3189 				printf("ehci early: "
3190 				    "SMM does not respond\n");
3191 		}
3192 		/* Disable interrupts */
3193 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3194 		bus_write_4(res, offs + EHCI_USBINTR, 0);
3195 	}
3196 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3197 }
3198 
3199 /* Perform early XHCI takeover from SMM. */
3200 static void
3201 xhci_early_takeover(device_t self)
3202 {
3203 	struct resource *res;
3204 	uint32_t cparams;
3205 	uint32_t eec;
3206 	uint8_t eecp;
3207 	uint8_t bios_sem;
3208 	uint8_t offs;
3209 	int rid;
3210 	int i;
3211 
3212 	rid = PCIR_BAR(0);
3213 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3214 	if (res == NULL)
3215 		return;
3216 
3217 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3218 
3219 	eec = -1;
3220 
3221 	/* Synchronise with the BIOS if it owns the controller. */
3222 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3223 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3224 		eec = bus_read_4(res, eecp);
3225 
3226 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3227 			continue;
3228 
3229 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3230 		if (bios_sem == 0)
3231 			continue;
3232 
3233 		if (bootverbose)
3234 			printf("xhci early: "
3235 			    "SMM active, request owner change\n");
3236 
3237 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3238 
3239 		/* wait a maximum of 5 second */
3240 
3241 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3242 			DELAY(1000);
3243 			bios_sem = bus_read_1(res, eecp +
3244 			    XHCI_XECP_BIOS_SEM);
3245 		}
3246 
3247 		if (bios_sem != 0) {
3248 			if (bootverbose)
3249 				printf("xhci early: "
3250 				    "SMM does not respond\n");
3251 		}
3252 
3253 		/* Disable interrupts */
3254 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3255 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3256 		bus_read_4(res, offs + XHCI_USBSTS);
3257 	}
3258 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3259 }
3260 
3261 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3262 static void
3263 pci_reserve_secbus(device_t bus, device_t dev, pcicfgregs *cfg,
3264     struct resource_list *rl)
3265 {
3266 	struct resource *res;
3267 	char *cp;
3268 	u_long start, end, count;
3269 	int rid, sec_bus, sec_reg, sub_bus, sub_reg, sup_bus;
3270 
3271 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3272 	case PCIM_HDRTYPE_BRIDGE:
3273 		sec_reg = PCIR_SECBUS_1;
3274 		sub_reg = PCIR_SUBBUS_1;
3275 		break;
3276 	case PCIM_HDRTYPE_CARDBUS:
3277 		sec_reg = PCIR_SECBUS_2;
3278 		sub_reg = PCIR_SUBBUS_2;
3279 		break;
3280 	default:
3281 		return;
3282 	}
3283 
3284 	/*
3285 	 * If the existing bus range is valid, attempt to reserve it
3286 	 * from our parent.  If this fails for any reason, clear the
3287 	 * secbus and subbus registers.
3288 	 *
3289 	 * XXX: Should we reset sub_bus to sec_bus if it is < sec_bus?
3290 	 * This would at least preserve the existing sec_bus if it is
3291 	 * valid.
3292 	 */
3293 	sec_bus = PCI_READ_CONFIG(bus, dev, sec_reg, 1);
3294 	sub_bus = PCI_READ_CONFIG(bus, dev, sub_reg, 1);
3295 
3296 	/* Quirk handling. */
3297 	switch (pci_get_devid(dev)) {
3298 	case 0x12258086:		/* Intel 82454KX/GX (Orion) */
3299 		sup_bus = pci_read_config(dev, 0x41, 1);
3300 		if (sup_bus != 0xff) {
3301 			sec_bus = sup_bus + 1;
3302 			sub_bus = sup_bus + 1;
3303 			PCI_WRITE_CONFIG(bus, dev, sec_reg, sec_bus, 1);
3304 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3305 		}
3306 		break;
3307 
3308 	case 0x00dd10de:
3309 		/* Compaq R3000 BIOS sets wrong subordinate bus number. */
3310 		if ((cp = kern_getenv("smbios.planar.maker")) == NULL)
3311 			break;
3312 		if (strncmp(cp, "Compal", 6) != 0) {
3313 			freeenv(cp);
3314 			break;
3315 		}
3316 		freeenv(cp);
3317 		if ((cp = kern_getenv("smbios.planar.product")) == NULL)
3318 			break;
3319 		if (strncmp(cp, "08A0", 4) != 0) {
3320 			freeenv(cp);
3321 			break;
3322 		}
3323 		freeenv(cp);
3324 		if (sub_bus < 0xa) {
3325 			sub_bus = 0xa;
3326 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3327 		}
3328 		break;
3329 	}
3330 
3331 	if (bootverbose)
3332 		printf("\tsecbus=%d, subbus=%d\n", sec_bus, sub_bus);
3333 	if (sec_bus > 0 && sub_bus >= sec_bus) {
3334 		start = sec_bus;
3335 		end = sub_bus;
3336 		count = end - start + 1;
3337 
3338 		resource_list_add(rl, PCI_RES_BUS, 0, 0ul, ~0ul, count);
3339 
3340 		/*
3341 		 * If requested, clear secondary bus registers in
3342 		 * bridge devices to force a complete renumbering
3343 		 * rather than reserving the existing range.  However,
3344 		 * preserve the existing size.
3345 		 */
3346 		if (pci_clear_buses)
3347 			goto clear;
3348 
3349 		rid = 0;
3350 		res = resource_list_reserve(rl, bus, dev, PCI_RES_BUS, &rid,
3351 		    start, end, count, 0);
3352 		if (res != NULL)
3353 			return;
3354 
3355 		if (bootverbose)
3356 			device_printf(bus,
3357 			    "pci%d:%d:%d:%d secbus failed to allocate\n",
3358 			    pci_get_domain(dev), pci_get_bus(dev),
3359 			    pci_get_slot(dev), pci_get_function(dev));
3360 	}
3361 
3362 clear:
3363 	PCI_WRITE_CONFIG(bus, dev, sec_reg, 0, 1);
3364 	PCI_WRITE_CONFIG(bus, dev, sub_reg, 0, 1);
3365 }
3366 
3367 static struct resource *
3368 pci_alloc_secbus(device_t dev, device_t child, int *rid, u_long start,
3369     u_long end, u_long count, u_int flags)
3370 {
3371 	struct pci_devinfo *dinfo;
3372 	pcicfgregs *cfg;
3373 	struct resource_list *rl;
3374 	struct resource *res;
3375 	int sec_reg, sub_reg;
3376 
3377 	dinfo = device_get_ivars(child);
3378 	cfg = &dinfo->cfg;
3379 	rl = &dinfo->resources;
3380 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3381 	case PCIM_HDRTYPE_BRIDGE:
3382 		sec_reg = PCIR_SECBUS_1;
3383 		sub_reg = PCIR_SUBBUS_1;
3384 		break;
3385 	case PCIM_HDRTYPE_CARDBUS:
3386 		sec_reg = PCIR_SECBUS_2;
3387 		sub_reg = PCIR_SUBBUS_2;
3388 		break;
3389 	default:
3390 		return (NULL);
3391 	}
3392 
3393 	if (*rid != 0)
3394 		return (NULL);
3395 
3396 	if (resource_list_find(rl, PCI_RES_BUS, *rid) == NULL)
3397 		resource_list_add(rl, PCI_RES_BUS, *rid, start, end, count);
3398 	if (!resource_list_reserved(rl, PCI_RES_BUS, *rid)) {
3399 		res = resource_list_reserve(rl, dev, child, PCI_RES_BUS, rid,
3400 		    start, end, count, flags & ~RF_ACTIVE);
3401 		if (res == NULL) {
3402 			resource_list_delete(rl, PCI_RES_BUS, *rid);
3403 			device_printf(child, "allocating %lu bus%s failed\n",
3404 			    count, count == 1 ? "" : "es");
3405 			return (NULL);
3406 		}
3407 		if (bootverbose)
3408 			device_printf(child,
3409 			    "Lazy allocation of %lu bus%s at %lu\n", count,
3410 			    count == 1 ? "" : "es", rman_get_start(res));
3411 		PCI_WRITE_CONFIG(dev, child, sec_reg, rman_get_start(res), 1);
3412 		PCI_WRITE_CONFIG(dev, child, sub_reg, rman_get_end(res), 1);
3413 	}
3414 	return (resource_list_alloc(rl, dev, child, PCI_RES_BUS, rid, start,
3415 	    end, count, flags));
3416 }
3417 #endif
3418 
3419 void
3420 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3421 {
3422 	struct pci_devinfo *dinfo;
3423 	pcicfgregs *cfg;
3424 	struct resource_list *rl;
3425 	const struct pci_quirk *q;
3426 	uint32_t devid;
3427 	int i;
3428 
3429 	dinfo = device_get_ivars(dev);
3430 	cfg = &dinfo->cfg;
3431 	rl = &dinfo->resources;
3432 	devid = (cfg->device << 16) | cfg->vendor;
3433 
3434 	/* ATA devices needs special map treatment */
3435 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3436 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3437 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3438 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3439 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3440 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3441 	else
3442 		for (i = 0; i < cfg->nummaps;) {
3443 			/*
3444 			 * Skip quirked resources.
3445 			 */
3446 			for (q = &pci_quirks[0]; q->devid != 0; q++)
3447 				if (q->devid == devid &&
3448 				    q->type == PCI_QUIRK_UNMAP_REG &&
3449 				    q->arg1 == PCIR_BAR(i))
3450 					break;
3451 			if (q->devid != 0) {
3452 				i++;
3453 				continue;
3454 			}
3455 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3456 			    prefetchmask & (1 << i));
3457 		}
3458 
3459 	/*
3460 	 * Add additional, quirked resources.
3461 	 */
3462 	for (q = &pci_quirks[0]; q->devid != 0; q++)
3463 		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3464 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3465 
3466 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3467 #ifdef __PCI_REROUTE_INTERRUPT
3468 		/*
3469 		 * Try to re-route interrupts. Sometimes the BIOS or
3470 		 * firmware may leave bogus values in these registers.
3471 		 * If the re-route fails, then just stick with what we
3472 		 * have.
3473 		 */
3474 		pci_assign_interrupt(bus, dev, 1);
3475 #else
3476 		pci_assign_interrupt(bus, dev, 0);
3477 #endif
3478 	}
3479 
3480 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3481 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3482 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3483 			xhci_early_takeover(dev);
3484 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3485 			ehci_early_takeover(dev);
3486 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3487 			ohci_early_takeover(dev);
3488 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3489 			uhci_early_takeover(dev);
3490 	}
3491 
3492 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3493 	/*
3494 	 * Reserve resources for secondary bus ranges behind bridge
3495 	 * devices.
3496 	 */
3497 	pci_reserve_secbus(bus, dev, cfg, rl);
3498 #endif
3499 }
3500 
3501 static struct pci_devinfo *
3502 pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
3503     int slot, int func, size_t dinfo_size)
3504 {
3505 	struct pci_devinfo *dinfo;
3506 
3507 	dinfo = pci_read_device(pcib, domain, busno, slot, func, dinfo_size);
3508 	if (dinfo != NULL)
3509 		pci_add_child(dev, dinfo);
3510 
3511 	return (dinfo);
3512 }
3513 
3514 void
3515 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3516 {
3517 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3518 	device_t pcib = device_get_parent(dev);
3519 	struct pci_devinfo *dinfo;
3520 	int maxslots;
3521 	int s, f, pcifunchigh;
3522 	uint8_t hdrtype;
3523 	int first_func;
3524 
3525 	/*
3526 	 * Try to detect a device at slot 0, function 0.  If it exists, try to
3527 	 * enable ARI.  We must enable ARI before detecting the rest of the
3528 	 * functions on this bus as ARI changes the set of slots and functions
3529 	 * that are legal on this bus.
3530 	 */
3531 	dinfo = pci_identify_function(pcib, dev, domain, busno, 0, 0,
3532 	    dinfo_size);
3533 	if (dinfo != NULL && pci_enable_ari)
3534 		PCIB_TRY_ENABLE_ARI(pcib, dinfo->cfg.dev);
3535 
3536 	/*
3537 	 * Start looking for new devices on slot 0 at function 1 because we
3538 	 * just identified the device at slot 0, function 0.
3539 	 */
3540 	first_func = 1;
3541 
3542 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3543 	    ("dinfo_size too small"));
3544 	maxslots = PCIB_MAXSLOTS(pcib);
3545 	for (s = 0; s <= maxslots; s++, first_func = 0) {
3546 		pcifunchigh = 0;
3547 		f = 0;
3548 		DELAY(1);
3549 		hdrtype = REG(PCIR_HDRTYPE, 1);
3550 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3551 			continue;
3552 		if (hdrtype & PCIM_MFDEV)
3553 			pcifunchigh = PCIB_MAXFUNCS(pcib);
3554 		for (f = first_func; f <= pcifunchigh; f++)
3555 			pci_identify_function(pcib, dev, domain, busno, s, f,
3556 			    dinfo_size);
3557 	}
3558 #undef REG
3559 }
3560 
3561 #ifdef PCI_IOV
3562 device_t
3563 pci_add_iov_child(device_t bus, device_t pf, size_t size, uint16_t rid,
3564     uint16_t vid, uint16_t did)
3565 {
3566 	struct pci_devinfo *pf_dinfo, *vf_dinfo;
3567 	device_t pcib;
3568 	int busno, slot, func;
3569 
3570 	pf_dinfo = device_get_ivars(pf);
3571 
3572 	/*
3573 	 * Do a sanity check that we have been passed the correct size.  If this
3574 	 * test fails then likely the pci subclass hasn't implemented the
3575 	 * pci_create_iov_child method like it's supposed it.
3576 	 */
3577 	if (size != pf_dinfo->cfg.devinfo_size) {
3578 		device_printf(pf,
3579 		    "PCI subclass does not properly implement PCI_IOV\n");
3580 		return (NULL);
3581 	}
3582 
3583 	pcib = device_get_parent(bus);
3584 
3585 	PCIB_DECODE_RID(pcib, rid, &busno, &slot, &func);
3586 
3587 	vf_dinfo = pci_fill_devinfo(pcib, pci_get_domain(pcib), busno, slot, func,
3588 	    vid, did, size);
3589 
3590 	vf_dinfo->cfg.flags |= PCICFG_VF;
3591 	pci_add_child(bus, vf_dinfo);
3592 
3593 	return (vf_dinfo->cfg.dev);
3594 }
3595 
3596 device_t
3597 pci_create_iov_child_method(device_t bus, device_t pf, uint16_t rid,
3598     uint16_t vid, uint16_t did)
3599 {
3600 
3601 	return (pci_add_iov_child(bus, pf, sizeof(struct pci_devinfo), rid, vid,
3602 	    did));
3603 }
3604 #endif
3605 
3606 void
3607 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3608 {
3609 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3610 	device_set_ivars(dinfo->cfg.dev, dinfo);
3611 	resource_list_init(&dinfo->resources);
3612 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3613 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3614 	pci_print_verbose(dinfo);
3615 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3616 	pci_child_added(dinfo->cfg.dev);
3617 }
3618 
3619 void
3620 pci_child_added_method(device_t dev, device_t child)
3621 {
3622 
3623 }
3624 
3625 static int
3626 pci_probe(device_t dev)
3627 {
3628 
3629 	device_set_desc(dev, "PCI bus");
3630 
3631 	/* Allow other subclasses to override this driver. */
3632 	return (BUS_PROBE_GENERIC);
3633 }
3634 
3635 int
3636 pci_attach_common(device_t dev)
3637 {
3638 	struct pci_softc *sc;
3639 	int busno, domain;
3640 #ifdef PCI_DMA_BOUNDARY
3641 	int error, tag_valid;
3642 #endif
3643 #ifdef PCI_RES_BUS
3644 	int rid;
3645 #endif
3646 
3647 	sc = device_get_softc(dev);
3648 	domain = pcib_get_domain(dev);
3649 	busno = pcib_get_bus(dev);
3650 #ifdef PCI_RES_BUS
3651 	rid = 0;
3652 	sc->sc_bus = bus_alloc_resource(dev, PCI_RES_BUS, &rid, busno, busno,
3653 	    1, 0);
3654 	if (sc->sc_bus == NULL) {
3655 		device_printf(dev, "failed to allocate bus number\n");
3656 		return (ENXIO);
3657 	}
3658 #endif
3659 	if (bootverbose)
3660 		device_printf(dev, "domain=%d, physical bus=%d\n",
3661 		    domain, busno);
3662 #ifdef PCI_DMA_BOUNDARY
3663 	tag_valid = 0;
3664 	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
3665 	    devclass_find("pci")) {
3666 		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
3667 		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3668 		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
3669 		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
3670 		if (error)
3671 			device_printf(dev, "Failed to create DMA tag: %d\n",
3672 			    error);
3673 		else
3674 			tag_valid = 1;
3675 	}
3676 	if (!tag_valid)
3677 #endif
3678 		sc->sc_dma_tag = bus_get_dma_tag(dev);
3679 	return (0);
3680 }
3681 
3682 static int
3683 pci_attach(device_t dev)
3684 {
3685 	int busno, domain, error;
3686 
3687 	error = pci_attach_common(dev);
3688 	if (error)
3689 		return (error);
3690 
3691 	/*
3692 	 * Since there can be multiple independantly numbered PCI
3693 	 * busses on systems with multiple PCI domains, we can't use
3694 	 * the unit number to decide which bus we are probing. We ask
3695 	 * the parent pcib what our domain and bus numbers are.
3696 	 */
3697 	domain = pcib_get_domain(dev);
3698 	busno = pcib_get_bus(dev);
3699 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3700 	return (bus_generic_attach(dev));
3701 }
3702 
3703 #ifdef PCI_RES_BUS
3704 static int
3705 pci_detach(device_t dev)
3706 {
3707 	struct pci_softc *sc;
3708 	int error;
3709 
3710 	error = bus_generic_detach(dev);
3711 	if (error)
3712 		return (error);
3713 	sc = device_get_softc(dev);
3714 	return (bus_release_resource(dev, PCI_RES_BUS, 0, sc->sc_bus));
3715 }
3716 #endif
3717 
3718 static void
3719 pci_set_power_child(device_t dev, device_t child, int state)
3720 {
3721 	struct pci_devinfo *dinfo;
3722 	device_t pcib;
3723 	int dstate;
3724 
3725 	/*
3726 	 * Set the device to the given state.  If the firmware suggests
3727 	 * a different power state, use it instead.  If power management
3728 	 * is not present, the firmware is responsible for managing
3729 	 * device power.  Skip children who aren't attached since they
3730 	 * are handled separately.
3731 	 */
3732 	pcib = device_get_parent(dev);
3733 	dinfo = device_get_ivars(child);
3734 	dstate = state;
3735 	if (device_is_attached(child) &&
3736 	    PCIB_POWER_FOR_SLEEP(pcib, child, &dstate) == 0)
3737 		pci_set_powerstate(child, dstate);
3738 }
3739 
3740 int
3741 pci_suspend_child(device_t dev, device_t child)
3742 {
3743 	struct pci_devinfo *dinfo;
3744 	int error;
3745 
3746 	dinfo = device_get_ivars(child);
3747 
3748 	/*
3749 	 * Save the PCI configuration space for the child and set the
3750 	 * device in the appropriate power state for this sleep state.
3751 	 */
3752 	pci_cfg_save(child, dinfo, 0);
3753 
3754 	/* Suspend devices before potentially powering them down. */
3755 	error = bus_generic_suspend_child(dev, child);
3756 
3757 	if (error)
3758 		return (error);
3759 
3760 	if (pci_do_power_suspend)
3761 		pci_set_power_child(dev, child, PCI_POWERSTATE_D3);
3762 
3763 	return (0);
3764 }
3765 
3766 int
3767 pci_resume_child(device_t dev, device_t child)
3768 {
3769 	struct pci_devinfo *dinfo;
3770 
3771 	if (pci_do_power_resume)
3772 		pci_set_power_child(dev, child, PCI_POWERSTATE_D0);
3773 
3774 	dinfo = device_get_ivars(child);
3775 	pci_cfg_restore(child, dinfo);
3776 	if (!device_is_attached(child))
3777 		pci_cfg_save(child, dinfo, 1);
3778 
3779 	bus_generic_resume_child(dev, child);
3780 
3781 	return (0);
3782 }
3783 
3784 int
3785 pci_resume(device_t dev)
3786 {
3787 	device_t child, *devlist;
3788 	int error, i, numdevs;
3789 
3790 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3791 		return (error);
3792 
3793 	/*
3794 	 * Resume critical devices first, then everything else later.
3795 	 */
3796 	for (i = 0; i < numdevs; i++) {
3797 		child = devlist[i];
3798 		switch (pci_get_class(child)) {
3799 		case PCIC_DISPLAY:
3800 		case PCIC_MEMORY:
3801 		case PCIC_BRIDGE:
3802 		case PCIC_BASEPERIPH:
3803 			BUS_RESUME_CHILD(dev, child);
3804 			break;
3805 		}
3806 	}
3807 	for (i = 0; i < numdevs; i++) {
3808 		child = devlist[i];
3809 		switch (pci_get_class(child)) {
3810 		case PCIC_DISPLAY:
3811 		case PCIC_MEMORY:
3812 		case PCIC_BRIDGE:
3813 		case PCIC_BASEPERIPH:
3814 			break;
3815 		default:
3816 			BUS_RESUME_CHILD(dev, child);
3817 		}
3818 	}
3819 	free(devlist, M_TEMP);
3820 	return (0);
3821 }
3822 
3823 static void
3824 pci_load_vendor_data(void)
3825 {
3826 	caddr_t data;
3827 	void *ptr;
3828 	size_t sz;
3829 
3830 	data = preload_search_by_type("pci_vendor_data");
3831 	if (data != NULL) {
3832 		ptr = preload_fetch_addr(data);
3833 		sz = preload_fetch_size(data);
3834 		if (ptr != NULL && sz != 0) {
3835 			pci_vendordata = ptr;
3836 			pci_vendordata_size = sz;
3837 			/* terminate the database */
3838 			pci_vendordata[pci_vendordata_size] = '\n';
3839 		}
3840 	}
3841 }
3842 
3843 void
3844 pci_driver_added(device_t dev, driver_t *driver)
3845 {
3846 	int numdevs;
3847 	device_t *devlist;
3848 	device_t child;
3849 	struct pci_devinfo *dinfo;
3850 	int i;
3851 
3852 	if (bootverbose)
3853 		device_printf(dev, "driver added\n");
3854 	DEVICE_IDENTIFY(driver, dev);
3855 	if (device_get_children(dev, &devlist, &numdevs) != 0)
3856 		return;
3857 	for (i = 0; i < numdevs; i++) {
3858 		child = devlist[i];
3859 		if (device_get_state(child) != DS_NOTPRESENT)
3860 			continue;
3861 		dinfo = device_get_ivars(child);
3862 		pci_print_verbose(dinfo);
3863 		if (bootverbose)
3864 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3865 		pci_cfg_restore(child, dinfo);
3866 		if (device_probe_and_attach(child) != 0)
3867 			pci_child_detached(dev, child);
3868 	}
3869 	free(devlist, M_TEMP);
3870 }
3871 
3872 int
3873 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3874     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3875 {
3876 	struct pci_devinfo *dinfo;
3877 	struct msix_table_entry *mte;
3878 	struct msix_vector *mv;
3879 	uint64_t addr;
3880 	uint32_t data;
3881 	void *cookie;
3882 	int error, rid;
3883 
3884 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3885 	    arg, &cookie);
3886 	if (error)
3887 		return (error);
3888 
3889 	/* If this is not a direct child, just bail out. */
3890 	if (device_get_parent(child) != dev) {
3891 		*cookiep = cookie;
3892 		return(0);
3893 	}
3894 
3895 	rid = rman_get_rid(irq);
3896 	if (rid == 0) {
3897 		/* Make sure that INTx is enabled */
3898 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3899 	} else {
3900 		/*
3901 		 * Check to see if the interrupt is MSI or MSI-X.
3902 		 * Ask our parent to map the MSI and give
3903 		 * us the address and data register values.
3904 		 * If we fail for some reason, teardown the
3905 		 * interrupt handler.
3906 		 */
3907 		dinfo = device_get_ivars(child);
3908 		if (dinfo->cfg.msi.msi_alloc > 0) {
3909 			if (dinfo->cfg.msi.msi_addr == 0) {
3910 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3911 			    ("MSI has handlers, but vectors not mapped"));
3912 				error = PCIB_MAP_MSI(device_get_parent(dev),
3913 				    child, rman_get_start(irq), &addr, &data);
3914 				if (error)
3915 					goto bad;
3916 				dinfo->cfg.msi.msi_addr = addr;
3917 				dinfo->cfg.msi.msi_data = data;
3918 			}
3919 			if (dinfo->cfg.msi.msi_handlers == 0)
3920 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3921 				    dinfo->cfg.msi.msi_data);
3922 			dinfo->cfg.msi.msi_handlers++;
3923 		} else {
3924 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3925 			    ("No MSI or MSI-X interrupts allocated"));
3926 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3927 			    ("MSI-X index too high"));
3928 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3929 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3930 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3931 			KASSERT(mv->mv_irq == rman_get_start(irq),
3932 			    ("IRQ mismatch"));
3933 			if (mv->mv_address == 0) {
3934 				KASSERT(mte->mte_handlers == 0,
3935 		    ("MSI-X table entry has handlers, but vector not mapped"));
3936 				error = PCIB_MAP_MSI(device_get_parent(dev),
3937 				    child, rman_get_start(irq), &addr, &data);
3938 				if (error)
3939 					goto bad;
3940 				mv->mv_address = addr;
3941 				mv->mv_data = data;
3942 			}
3943 			if (mte->mte_handlers == 0) {
3944 				pci_enable_msix(child, rid - 1, mv->mv_address,
3945 				    mv->mv_data);
3946 				pci_unmask_msix(child, rid - 1);
3947 			}
3948 			mte->mte_handlers++;
3949 		}
3950 
3951 		/*
3952 		 * Make sure that INTx is disabled if we are using MSI/MSI-X,
3953 		 * unless the device is affected by PCI_QUIRK_MSI_INTX_BUG,
3954 		 * in which case we "enable" INTx so MSI/MSI-X actually works.
3955 		 */
3956 		if (!pci_has_quirk(pci_get_devid(child),
3957 		    PCI_QUIRK_MSI_INTX_BUG))
3958 			pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3959 		else
3960 			pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3961 	bad:
3962 		if (error) {
3963 			(void)bus_generic_teardown_intr(dev, child, irq,
3964 			    cookie);
3965 			return (error);
3966 		}
3967 	}
3968 	*cookiep = cookie;
3969 	return (0);
3970 }
3971 
3972 int
3973 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3974     void *cookie)
3975 {
3976 	struct msix_table_entry *mte;
3977 	struct resource_list_entry *rle;
3978 	struct pci_devinfo *dinfo;
3979 	int error, rid;
3980 
3981 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3982 		return (EINVAL);
3983 
3984 	/* If this isn't a direct child, just bail out */
3985 	if (device_get_parent(child) != dev)
3986 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3987 
3988 	rid = rman_get_rid(irq);
3989 	if (rid == 0) {
3990 		/* Mask INTx */
3991 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3992 	} else {
3993 		/*
3994 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3995 		 * decrement the appropriate handlers count and mask the
3996 		 * MSI-X message, or disable MSI messages if the count
3997 		 * drops to 0.
3998 		 */
3999 		dinfo = device_get_ivars(child);
4000 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
4001 		if (rle->res != irq)
4002 			return (EINVAL);
4003 		if (dinfo->cfg.msi.msi_alloc > 0) {
4004 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
4005 			    ("MSI-X index too high"));
4006 			if (dinfo->cfg.msi.msi_handlers == 0)
4007 				return (EINVAL);
4008 			dinfo->cfg.msi.msi_handlers--;
4009 			if (dinfo->cfg.msi.msi_handlers == 0)
4010 				pci_disable_msi(child);
4011 		} else {
4012 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4013 			    ("No MSI or MSI-X interrupts allocated"));
4014 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4015 			    ("MSI-X index too high"));
4016 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4017 			if (mte->mte_handlers == 0)
4018 				return (EINVAL);
4019 			mte->mte_handlers--;
4020 			if (mte->mte_handlers == 0)
4021 				pci_mask_msix(child, rid - 1);
4022 		}
4023 	}
4024 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
4025 	if (rid > 0)
4026 		KASSERT(error == 0,
4027 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
4028 	return (error);
4029 }
4030 
4031 int
4032 pci_print_child(device_t dev, device_t child)
4033 {
4034 	struct pci_devinfo *dinfo;
4035 	struct resource_list *rl;
4036 	int retval = 0;
4037 
4038 	dinfo = device_get_ivars(child);
4039 	rl = &dinfo->resources;
4040 
4041 	retval += bus_print_child_header(dev, child);
4042 
4043 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
4044 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
4045 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
4046 	if (device_get_flags(dev))
4047 		retval += printf(" flags %#x", device_get_flags(dev));
4048 
4049 	retval += printf(" at device %d.%d", pci_get_slot(child),
4050 	    pci_get_function(child));
4051 
4052 	retval += bus_print_child_domain(dev, child);
4053 	retval += bus_print_child_footer(dev, child);
4054 
4055 	return (retval);
4056 }
4057 
4058 static const struct
4059 {
4060 	int		class;
4061 	int		subclass;
4062 	int		report; /* 0 = bootverbose, 1 = always */
4063 	const char	*desc;
4064 } pci_nomatch_tab[] = {
4065 	{PCIC_OLD,		-1,			1, "old"},
4066 	{PCIC_OLD,		PCIS_OLD_NONVGA,	1, "non-VGA display device"},
4067 	{PCIC_OLD,		PCIS_OLD_VGA,		1, "VGA-compatible display device"},
4068 	{PCIC_STORAGE,		-1,			1, "mass storage"},
4069 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	1, "SCSI"},
4070 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	1, "ATA"},
4071 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	1, "floppy disk"},
4072 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	1, "IPI"},
4073 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	1, "RAID"},
4074 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	1, "ATA (ADMA)"},
4075 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	1, "SATA"},
4076 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	1, "SAS"},
4077 	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	1, "NVM"},
4078 	{PCIC_NETWORK,		-1,			1, "network"},
4079 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	1, "ethernet"},
4080 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	1, "token ring"},
4081 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	1, "fddi"},
4082 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	1, "ATM"},
4083 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	1, "ISDN"},
4084 	{PCIC_DISPLAY,		-1,			1, "display"},
4085 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	1, "VGA"},
4086 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	1, "XGA"},
4087 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	1, "3D"},
4088 	{PCIC_MULTIMEDIA,	-1,			1, "multimedia"},
4089 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	1, "video"},
4090 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	1, "audio"},
4091 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	1, "telephony"},
4092 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	1, "HDA"},
4093 	{PCIC_MEMORY,		-1,			1, "memory"},
4094 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	1, "RAM"},
4095 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	1, "flash"},
4096 	{PCIC_BRIDGE,		-1,			1, "bridge"},
4097 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	1, "HOST-PCI"},
4098 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	1, "PCI-ISA"},
4099 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	1, "PCI-EISA"},
4100 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	1, "PCI-MCA"},
4101 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	1, "PCI-PCI"},
4102 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	1, "PCI-PCMCIA"},
4103 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	1, "PCI-NuBus"},
4104 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	1, "PCI-CardBus"},
4105 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	1, "PCI-RACEway"},
4106 	{PCIC_SIMPLECOMM,	-1,			1, "simple comms"},
4107 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	1, "UART"},	/* could detect 16550 */
4108 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	1, "parallel port"},
4109 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	1, "multiport serial"},
4110 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	1, "generic modem"},
4111 	{PCIC_BASEPERIPH,	-1,			0, "base peripheral"},
4112 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	1, "interrupt controller"},
4113 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	1, "DMA controller"},
4114 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	1, "timer"},
4115 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	1, "realtime clock"},
4116 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	1, "PCI hot-plug controller"},
4117 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	1, "SD host controller"},
4118 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_IOMMU,	1, "IOMMU"},
4119 	{PCIC_INPUTDEV,		-1,			1, "input device"},
4120 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	1, "keyboard"},
4121 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,1, "digitizer"},
4122 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	1, "mouse"},
4123 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	1, "scanner"},
4124 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	1, "gameport"},
4125 	{PCIC_DOCKING,		-1,			1, "docking station"},
4126 	{PCIC_PROCESSOR,	-1,			1, "processor"},
4127 	{PCIC_SERIALBUS,	-1,			1, "serial bus"},
4128 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	1, "FireWire"},
4129 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	1, "AccessBus"},
4130 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	1, "SSA"},
4131 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	1, "USB"},
4132 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	1, "Fibre Channel"},
4133 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	0, "SMBus"},
4134 	{PCIC_WIRELESS,		-1,			1, "wireless controller"},
4135 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	1, "iRDA"},
4136 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	1, "IR"},
4137 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	1, "RF"},
4138 	{PCIC_INTELLIIO,	-1,			1, "intelligent I/O controller"},
4139 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	1, "I2O"},
4140 	{PCIC_SATCOM,		-1,			1, "satellite communication"},
4141 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		1, "sat TV"},
4142 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	1, "sat audio"},
4143 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	1, "sat voice"},
4144 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	1, "sat data"},
4145 	{PCIC_CRYPTO,		-1,			1, "encrypt/decrypt"},
4146 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	1, "network/computer crypto"},
4147 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	1, "entertainment crypto"},
4148 	{PCIC_DASP,		-1,			0, "dasp"},
4149 	{PCIC_DASP,		PCIS_DASP_DPIO,		1, "DPIO module"},
4150 	{0, 0, 0,		NULL}
4151 };
4152 
4153 void
4154 pci_probe_nomatch(device_t dev, device_t child)
4155 {
4156 	int i, report;
4157 	const char *cp, *scp;
4158 	char *device;
4159 
4160 	/*
4161 	 * Look for a listing for this device in a loaded device database.
4162 	 */
4163 	report = 1;
4164 	if ((device = pci_describe_device(child)) != NULL) {
4165 		device_printf(dev, "<%s>", device);
4166 		free(device, M_DEVBUF);
4167 	} else {
4168 		/*
4169 		 * Scan the class/subclass descriptions for a general
4170 		 * description.
4171 		 */
4172 		cp = "unknown";
4173 		scp = NULL;
4174 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
4175 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
4176 				if (pci_nomatch_tab[i].subclass == -1) {
4177 					cp = pci_nomatch_tab[i].desc;
4178 					report = pci_nomatch_tab[i].report;
4179 				} else if (pci_nomatch_tab[i].subclass ==
4180 				    pci_get_subclass(child)) {
4181 					scp = pci_nomatch_tab[i].desc;
4182 					report = pci_nomatch_tab[i].report;
4183 				}
4184 			}
4185 		}
4186 		if (report || bootverbose) {
4187 			device_printf(dev, "<%s%s%s>",
4188 			    cp ? cp : "",
4189 			    ((cp != NULL) && (scp != NULL)) ? ", " : "",
4190 			    scp ? scp : "");
4191 		}
4192 	}
4193 	if (report || bootverbose) {
4194 		printf(" at device %d.%d (no driver attached)\n",
4195 		    pci_get_slot(child), pci_get_function(child));
4196 	}
4197 	pci_cfg_save(child, device_get_ivars(child), 1);
4198 }
4199 
4200 void
4201 pci_child_detached(device_t dev, device_t child)
4202 {
4203 	struct pci_devinfo *dinfo;
4204 	struct resource_list *rl;
4205 
4206 	dinfo = device_get_ivars(child);
4207 	rl = &dinfo->resources;
4208 
4209 	/*
4210 	 * Have to deallocate IRQs before releasing any MSI messages and
4211 	 * have to release MSI messages before deallocating any memory
4212 	 * BARs.
4213 	 */
4214 	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
4215 		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
4216 	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
4217 		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
4218 		(void)pci_release_msi(child);
4219 	}
4220 	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
4221 		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
4222 	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
4223 		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
4224 #ifdef PCI_RES_BUS
4225 	if (resource_list_release_active(rl, dev, child, PCI_RES_BUS) != 0)
4226 		pci_printf(&dinfo->cfg, "Device leaked PCI bus numbers\n");
4227 #endif
4228 
4229 	pci_cfg_save(child, dinfo, 1);
4230 }
4231 
4232 /*
4233  * Parse the PCI device database, if loaded, and return a pointer to a
4234  * description of the device.
4235  *
4236  * The database is flat text formatted as follows:
4237  *
4238  * Any line not in a valid format is ignored.
4239  * Lines are terminated with newline '\n' characters.
4240  *
4241  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
4242  * the vendor name.
4243  *
4244  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
4245  * - devices cannot be listed without a corresponding VENDOR line.
4246  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
4247  * another TAB, then the device name.
4248  */
4249 
4250 /*
4251  * Assuming (ptr) points to the beginning of a line in the database,
4252  * return the vendor or device and description of the next entry.
4253  * The value of (vendor) or (device) inappropriate for the entry type
4254  * is set to -1.  Returns nonzero at the end of the database.
4255  *
4256  * Note that this is slightly unrobust in the face of corrupt data;
4257  * we attempt to safeguard against this by spamming the end of the
4258  * database with a newline when we initialise.
4259  */
4260 static int
4261 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
4262 {
4263 	char	*cp = *ptr;
4264 	int	left;
4265 
4266 	*device = -1;
4267 	*vendor = -1;
4268 	**desc = '\0';
4269 	for (;;) {
4270 		left = pci_vendordata_size - (cp - pci_vendordata);
4271 		if (left <= 0) {
4272 			*ptr = cp;
4273 			return(1);
4274 		}
4275 
4276 		/* vendor entry? */
4277 		if (*cp != '\t' &&
4278 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
4279 			break;
4280 		/* device entry? */
4281 		if (*cp == '\t' &&
4282 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
4283 			break;
4284 
4285 		/* skip to next line */
4286 		while (*cp != '\n' && left > 0) {
4287 			cp++;
4288 			left--;
4289 		}
4290 		if (*cp == '\n') {
4291 			cp++;
4292 			left--;
4293 		}
4294 	}
4295 	/* skip to next line */
4296 	while (*cp != '\n' && left > 0) {
4297 		cp++;
4298 		left--;
4299 	}
4300 	if (*cp == '\n' && left > 0)
4301 		cp++;
4302 	*ptr = cp;
4303 	return(0);
4304 }
4305 
4306 static char *
4307 pci_describe_device(device_t dev)
4308 {
4309 	int	vendor, device;
4310 	char	*desc, *vp, *dp, *line;
4311 
4312 	desc = vp = dp = NULL;
4313 
4314 	/*
4315 	 * If we have no vendor data, we can't do anything.
4316 	 */
4317 	if (pci_vendordata == NULL)
4318 		goto out;
4319 
4320 	/*
4321 	 * Scan the vendor data looking for this device
4322 	 */
4323 	line = pci_vendordata;
4324 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4325 		goto out;
4326 	for (;;) {
4327 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
4328 			goto out;
4329 		if (vendor == pci_get_vendor(dev))
4330 			break;
4331 	}
4332 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4333 		goto out;
4334 	for (;;) {
4335 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
4336 			*dp = 0;
4337 			break;
4338 		}
4339 		if (vendor != -1) {
4340 			*dp = 0;
4341 			break;
4342 		}
4343 		if (device == pci_get_device(dev))
4344 			break;
4345 	}
4346 	if (dp[0] == '\0')
4347 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
4348 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
4349 	    NULL)
4350 		sprintf(desc, "%s, %s", vp, dp);
4351 out:
4352 	if (vp != NULL)
4353 		free(vp, M_DEVBUF);
4354 	if (dp != NULL)
4355 		free(dp, M_DEVBUF);
4356 	return(desc);
4357 }
4358 
4359 int
4360 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
4361 {
4362 	struct pci_devinfo *dinfo;
4363 	pcicfgregs *cfg;
4364 
4365 	dinfo = device_get_ivars(child);
4366 	cfg = &dinfo->cfg;
4367 
4368 	switch (which) {
4369 	case PCI_IVAR_ETHADDR:
4370 		/*
4371 		 * The generic accessor doesn't deal with failure, so
4372 		 * we set the return value, then return an error.
4373 		 */
4374 		*((uint8_t **) result) = NULL;
4375 		return (EINVAL);
4376 	case PCI_IVAR_SUBVENDOR:
4377 		*result = cfg->subvendor;
4378 		break;
4379 	case PCI_IVAR_SUBDEVICE:
4380 		*result = cfg->subdevice;
4381 		break;
4382 	case PCI_IVAR_VENDOR:
4383 		*result = cfg->vendor;
4384 		break;
4385 	case PCI_IVAR_DEVICE:
4386 		*result = cfg->device;
4387 		break;
4388 	case PCI_IVAR_DEVID:
4389 		*result = (cfg->device << 16) | cfg->vendor;
4390 		break;
4391 	case PCI_IVAR_CLASS:
4392 		*result = cfg->baseclass;
4393 		break;
4394 	case PCI_IVAR_SUBCLASS:
4395 		*result = cfg->subclass;
4396 		break;
4397 	case PCI_IVAR_PROGIF:
4398 		*result = cfg->progif;
4399 		break;
4400 	case PCI_IVAR_REVID:
4401 		*result = cfg->revid;
4402 		break;
4403 	case PCI_IVAR_INTPIN:
4404 		*result = cfg->intpin;
4405 		break;
4406 	case PCI_IVAR_IRQ:
4407 		*result = cfg->intline;
4408 		break;
4409 	case PCI_IVAR_DOMAIN:
4410 		*result = cfg->domain;
4411 		break;
4412 	case PCI_IVAR_BUS:
4413 		*result = cfg->bus;
4414 		break;
4415 	case PCI_IVAR_SLOT:
4416 		*result = cfg->slot;
4417 		break;
4418 	case PCI_IVAR_FUNCTION:
4419 		*result = cfg->func;
4420 		break;
4421 	case PCI_IVAR_CMDREG:
4422 		*result = cfg->cmdreg;
4423 		break;
4424 	case PCI_IVAR_CACHELNSZ:
4425 		*result = cfg->cachelnsz;
4426 		break;
4427 	case PCI_IVAR_MINGNT:
4428 		*result = cfg->mingnt;
4429 		break;
4430 	case PCI_IVAR_MAXLAT:
4431 		*result = cfg->maxlat;
4432 		break;
4433 	case PCI_IVAR_LATTIMER:
4434 		*result = cfg->lattimer;
4435 		break;
4436 	default:
4437 		return (ENOENT);
4438 	}
4439 	return (0);
4440 }
4441 
4442 int
4443 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4444 {
4445 	struct pci_devinfo *dinfo;
4446 
4447 	dinfo = device_get_ivars(child);
4448 
4449 	switch (which) {
4450 	case PCI_IVAR_INTPIN:
4451 		dinfo->cfg.intpin = value;
4452 		return (0);
4453 	case PCI_IVAR_ETHADDR:
4454 	case PCI_IVAR_SUBVENDOR:
4455 	case PCI_IVAR_SUBDEVICE:
4456 	case PCI_IVAR_VENDOR:
4457 	case PCI_IVAR_DEVICE:
4458 	case PCI_IVAR_DEVID:
4459 	case PCI_IVAR_CLASS:
4460 	case PCI_IVAR_SUBCLASS:
4461 	case PCI_IVAR_PROGIF:
4462 	case PCI_IVAR_REVID:
4463 	case PCI_IVAR_IRQ:
4464 	case PCI_IVAR_DOMAIN:
4465 	case PCI_IVAR_BUS:
4466 	case PCI_IVAR_SLOT:
4467 	case PCI_IVAR_FUNCTION:
4468 		return (EINVAL);	/* disallow for now */
4469 
4470 	default:
4471 		return (ENOENT);
4472 	}
4473 }
4474 
4475 #include "opt_ddb.h"
4476 #ifdef DDB
4477 #include <ddb/ddb.h>
4478 #include <sys/cons.h>
4479 
4480 /*
4481  * List resources based on pci map registers, used for within ddb
4482  */
4483 
4484 DB_SHOW_COMMAND(pciregs, db_pci_dump)
4485 {
4486 	struct pci_devinfo *dinfo;
4487 	struct devlist *devlist_head;
4488 	struct pci_conf *p;
4489 	const char *name;
4490 	int i, error, none_count;
4491 
4492 	none_count = 0;
4493 	/* get the head of the device queue */
4494 	devlist_head = &pci_devq;
4495 
4496 	/*
4497 	 * Go through the list of devices and print out devices
4498 	 */
4499 	for (error = 0, i = 0,
4500 	     dinfo = STAILQ_FIRST(devlist_head);
4501 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4502 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4503 
4504 		/* Populate pd_name and pd_unit */
4505 		name = NULL;
4506 		if (dinfo->cfg.dev)
4507 			name = device_get_name(dinfo->cfg.dev);
4508 
4509 		p = &dinfo->conf;
4510 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4511 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4512 			(name && *name) ? name : "none",
4513 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4514 			none_count++,
4515 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4516 			p->pc_sel.pc_func, (p->pc_class << 16) |
4517 			(p->pc_subclass << 8) | p->pc_progif,
4518 			(p->pc_subdevice << 16) | p->pc_subvendor,
4519 			(p->pc_device << 16) | p->pc_vendor,
4520 			p->pc_revid, p->pc_hdr);
4521 	}
4522 }
4523 #endif /* DDB */
4524 
4525 static struct resource *
4526 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4527     u_long start, u_long end, u_long count, u_int num, u_int flags)
4528 {
4529 	struct pci_devinfo *dinfo = device_get_ivars(child);
4530 	struct resource_list *rl = &dinfo->resources;
4531 	struct resource *res;
4532 	struct pci_map *pm;
4533 	pci_addr_t map, testval;
4534 	int mapsize;
4535 
4536 	res = NULL;
4537 	pm = pci_find_bar(child, *rid);
4538 	if (pm != NULL) {
4539 		/* This is a BAR that we failed to allocate earlier. */
4540 		mapsize = pm->pm_size;
4541 		map = pm->pm_value;
4542 	} else {
4543 		/*
4544 		 * Weed out the bogons, and figure out how large the
4545 		 * BAR/map is.  BARs that read back 0 here are bogus
4546 		 * and unimplemented.  Note: atapci in legacy mode are
4547 		 * special and handled elsewhere in the code.  If you
4548 		 * have a atapci device in legacy mode and it fails
4549 		 * here, that other code is broken.
4550 		 */
4551 		pci_read_bar(child, *rid, &map, &testval, NULL);
4552 
4553 		/*
4554 		 * Determine the size of the BAR and ignore BARs with a size
4555 		 * of 0.  Device ROM BARs use a different mask value.
4556 		 */
4557 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
4558 			mapsize = pci_romsize(testval);
4559 		else
4560 			mapsize = pci_mapsize(testval);
4561 		if (mapsize == 0)
4562 			goto out;
4563 		pm = pci_add_bar(child, *rid, map, mapsize);
4564 	}
4565 
4566 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
4567 		if (type != SYS_RES_MEMORY) {
4568 			if (bootverbose)
4569 				device_printf(dev,
4570 				    "child %s requested type %d for rid %#x,"
4571 				    " but the BAR says it is an memio\n",
4572 				    device_get_nameunit(child), type, *rid);
4573 			goto out;
4574 		}
4575 	} else {
4576 		if (type != SYS_RES_IOPORT) {
4577 			if (bootverbose)
4578 				device_printf(dev,
4579 				    "child %s requested type %d for rid %#x,"
4580 				    " but the BAR says it is an ioport\n",
4581 				    device_get_nameunit(child), type, *rid);
4582 			goto out;
4583 		}
4584 	}
4585 
4586 	/*
4587 	 * For real BARs, we need to override the size that
4588 	 * the driver requests, because that's what the BAR
4589 	 * actually uses and we would otherwise have a
4590 	 * situation where we might allocate the excess to
4591 	 * another driver, which won't work.
4592 	 */
4593 	count = ((pci_addr_t)1 << mapsize) * num;
4594 	if (RF_ALIGNMENT(flags) < mapsize)
4595 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
4596 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
4597 		flags |= RF_PREFETCHABLE;
4598 
4599 	/*
4600 	 * Allocate enough resource, and then write back the
4601 	 * appropriate BAR for that resource.
4602 	 */
4603 	resource_list_add(rl, type, *rid, start, end, count);
4604 	res = resource_list_reserve(rl, dev, child, type, rid, start, end,
4605 	    count, flags & ~RF_ACTIVE);
4606 	if (res == NULL) {
4607 		resource_list_delete(rl, type, *rid);
4608 		device_printf(child,
4609 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4610 		    count, *rid, type, start, end);
4611 		goto out;
4612 	}
4613 	if (bootverbose)
4614 		device_printf(child,
4615 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4616 		    count, *rid, type, rman_get_start(res));
4617 	map = rman_get_start(res);
4618 	pci_write_bar(child, pm, map);
4619 out:
4620 	return (res);
4621 }
4622 
4623 struct resource *
4624 pci_alloc_multi_resource(device_t dev, device_t child, int type, int *rid,
4625     u_long start, u_long end, u_long count, u_long num, u_int flags)
4626 {
4627 	struct pci_devinfo *dinfo;
4628 	struct resource_list *rl;
4629 	struct resource_list_entry *rle;
4630 	struct resource *res;
4631 	pcicfgregs *cfg;
4632 
4633 	/*
4634 	 * Perform lazy resource allocation
4635 	 */
4636 	dinfo = device_get_ivars(child);
4637 	rl = &dinfo->resources;
4638 	cfg = &dinfo->cfg;
4639 	switch (type) {
4640 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
4641 	case PCI_RES_BUS:
4642 		return (pci_alloc_secbus(dev, child, rid, start, end, count,
4643 		    flags));
4644 #endif
4645 	case SYS_RES_IRQ:
4646 		/*
4647 		 * Can't alloc legacy interrupt once MSI messages have
4648 		 * been allocated.
4649 		 */
4650 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4651 		    cfg->msix.msix_alloc > 0))
4652 			return (NULL);
4653 
4654 		/*
4655 		 * If the child device doesn't have an interrupt
4656 		 * routed and is deserving of an interrupt, try to
4657 		 * assign it one.
4658 		 */
4659 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4660 		    (cfg->intpin != 0))
4661 			pci_assign_interrupt(dev, child, 0);
4662 		break;
4663 	case SYS_RES_IOPORT:
4664 	case SYS_RES_MEMORY:
4665 #ifdef NEW_PCIB
4666 		/*
4667 		 * PCI-PCI bridge I/O window resources are not BARs.
4668 		 * For those allocations just pass the request up the
4669 		 * tree.
4670 		 */
4671 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4672 			switch (*rid) {
4673 			case PCIR_IOBASEL_1:
4674 			case PCIR_MEMBASE_1:
4675 			case PCIR_PMBASEL_1:
4676 				/*
4677 				 * XXX: Should we bother creating a resource
4678 				 * list entry?
4679 				 */
4680 				return (bus_generic_alloc_resource(dev, child,
4681 				    type, rid, start, end, count, flags));
4682 			}
4683 		}
4684 #endif
4685 		/* Reserve resources for this BAR if needed. */
4686 		rle = resource_list_find(rl, type, *rid);
4687 		if (rle == NULL) {
4688 			res = pci_reserve_map(dev, child, type, rid, start, end,
4689 			    count, num, flags);
4690 			if (res == NULL)
4691 				return (NULL);
4692 		}
4693 	}
4694 	return (resource_list_alloc(rl, dev, child, type, rid,
4695 	    start, end, count, flags));
4696 }
4697 
4698 struct resource *
4699 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4700     u_long start, u_long end, u_long count, u_int flags)
4701 {
4702 #ifdef PCI_IOV
4703 	struct pci_devinfo *dinfo;
4704 #endif
4705 
4706 	if (device_get_parent(child) != dev)
4707 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4708 		    type, rid, start, end, count, flags));
4709 
4710 #ifdef PCI_IOV
4711 	dinfo = device_get_ivars(child);
4712 	if (dinfo->cfg.flags & PCICFG_VF) {
4713 		switch (type) {
4714 		/* VFs can't have I/O BARs. */
4715 		case SYS_RES_IOPORT:
4716 			return (NULL);
4717 		case SYS_RES_MEMORY:
4718 			return (pci_vf_alloc_mem_resource(dev, child, rid,
4719 			    start, end, count, flags));
4720 		}
4721 
4722 		/* Fall through for other types of resource allocations. */
4723 	}
4724 #endif
4725 
4726 	return (pci_alloc_multi_resource(dev, child, type, rid, start, end,
4727 	    count, 1, flags));
4728 }
4729 
4730 int
4731 pci_release_resource(device_t dev, device_t child, int type, int rid,
4732     struct resource *r)
4733 {
4734 	struct pci_devinfo *dinfo;
4735 	struct resource_list *rl;
4736 	pcicfgregs *cfg;
4737 
4738 	if (device_get_parent(child) != dev)
4739 		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
4740 		    type, rid, r));
4741 
4742 	dinfo = device_get_ivars(child);
4743 	cfg = &dinfo->cfg;
4744 
4745 #ifdef PCI_IOV
4746 	if (dinfo->cfg.flags & PCICFG_VF) {
4747 		switch (type) {
4748 		/* VFs can't have I/O BARs. */
4749 		case SYS_RES_IOPORT:
4750 			return (EDOOFUS);
4751 		case SYS_RES_MEMORY:
4752 			return (pci_vf_release_mem_resource(dev, child, rid,
4753 			    r));
4754 		}
4755 
4756 		/* Fall through for other types of resource allocations. */
4757 	}
4758 #endif
4759 
4760 #ifdef NEW_PCIB
4761 	/*
4762 	 * PCI-PCI bridge I/O window resources are not BARs.  For
4763 	 * those allocations just pass the request up the tree.
4764 	 */
4765 	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
4766 	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
4767 		switch (rid) {
4768 		case PCIR_IOBASEL_1:
4769 		case PCIR_MEMBASE_1:
4770 		case PCIR_PMBASEL_1:
4771 			return (bus_generic_release_resource(dev, child, type,
4772 			    rid, r));
4773 		}
4774 	}
4775 #endif
4776 
4777 	rl = &dinfo->resources;
4778 	return (resource_list_release(rl, dev, child, type, rid, r));
4779 }
4780 
4781 int
4782 pci_activate_resource(device_t dev, device_t child, int type, int rid,
4783     struct resource *r)
4784 {
4785 	struct pci_devinfo *dinfo;
4786 	int error;
4787 
4788 	error = bus_generic_activate_resource(dev, child, type, rid, r);
4789 	if (error)
4790 		return (error);
4791 
4792 	/* Enable decoding in the command register when activating BARs. */
4793 	if (device_get_parent(child) == dev) {
4794 		/* Device ROMs need their decoding explicitly enabled. */
4795 		dinfo = device_get_ivars(child);
4796 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4797 			pci_write_bar(child, pci_find_bar(child, rid),
4798 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4799 		switch (type) {
4800 		case SYS_RES_IOPORT:
4801 		case SYS_RES_MEMORY:
4802 			error = PCI_ENABLE_IO(dev, child, type);
4803 			break;
4804 		}
4805 	}
4806 	return (error);
4807 }
4808 
4809 int
4810 pci_deactivate_resource(device_t dev, device_t child, int type,
4811     int rid, struct resource *r)
4812 {
4813 	struct pci_devinfo *dinfo;
4814 	int error;
4815 
4816 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4817 	if (error)
4818 		return (error);
4819 
4820 	/* Disable decoding for device ROMs. */
4821 	if (device_get_parent(child) == dev) {
4822 		dinfo = device_get_ivars(child);
4823 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4824 			pci_write_bar(child, pci_find_bar(child, rid),
4825 			    rman_get_start(r));
4826 	}
4827 	return (0);
4828 }
4829 
4830 void
4831 pci_delete_child(device_t dev, device_t child)
4832 {
4833 	struct resource_list_entry *rle;
4834 	struct resource_list *rl;
4835 	struct pci_devinfo *dinfo;
4836 
4837 	dinfo = device_get_ivars(child);
4838 	rl = &dinfo->resources;
4839 
4840 	if (device_is_attached(child))
4841 		device_detach(child);
4842 
4843 	/* Turn off access to resources we're about to free */
4844 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4845 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4846 
4847 	/* Free all allocated resources */
4848 	STAILQ_FOREACH(rle, rl, link) {
4849 		if (rle->res) {
4850 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4851 			    resource_list_busy(rl, rle->type, rle->rid)) {
4852 				pci_printf(&dinfo->cfg,
4853 				    "Resource still owned, oops. "
4854 				    "(type=%d, rid=%d, addr=%lx)\n",
4855 				    rle->type, rle->rid,
4856 				    rman_get_start(rle->res));
4857 				bus_release_resource(child, rle->type, rle->rid,
4858 				    rle->res);
4859 			}
4860 			resource_list_unreserve(rl, dev, child, rle->type,
4861 			    rle->rid);
4862 		}
4863 	}
4864 	resource_list_free(rl);
4865 
4866 	device_delete_child(dev, child);
4867 	pci_freecfg(dinfo);
4868 }
4869 
4870 void
4871 pci_delete_resource(device_t dev, device_t child, int type, int rid)
4872 {
4873 	struct pci_devinfo *dinfo;
4874 	struct resource_list *rl;
4875 	struct resource_list_entry *rle;
4876 
4877 	if (device_get_parent(child) != dev)
4878 		return;
4879 
4880 	dinfo = device_get_ivars(child);
4881 	rl = &dinfo->resources;
4882 	rle = resource_list_find(rl, type, rid);
4883 	if (rle == NULL)
4884 		return;
4885 
4886 	if (rle->res) {
4887 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4888 		    resource_list_busy(rl, type, rid)) {
4889 			device_printf(dev, "delete_resource: "
4890 			    "Resource still owned by child, oops. "
4891 			    "(type=%d, rid=%d, addr=%lx)\n",
4892 			    type, rid, rman_get_start(rle->res));
4893 			return;
4894 		}
4895 		resource_list_unreserve(rl, dev, child, type, rid);
4896 	}
4897 	resource_list_delete(rl, type, rid);
4898 }
4899 
4900 struct resource_list *
4901 pci_get_resource_list (device_t dev, device_t child)
4902 {
4903 	struct pci_devinfo *dinfo = device_get_ivars(child);
4904 
4905 	return (&dinfo->resources);
4906 }
4907 
4908 bus_dma_tag_t
4909 pci_get_dma_tag(device_t bus, device_t dev)
4910 {
4911 	struct pci_softc *sc = device_get_softc(bus);
4912 
4913 	return (sc->sc_dma_tag);
4914 }
4915 
4916 uint32_t
4917 pci_read_config_method(device_t dev, device_t child, int reg, int width)
4918 {
4919 	struct pci_devinfo *dinfo = device_get_ivars(child);
4920 	pcicfgregs *cfg = &dinfo->cfg;
4921 
4922 #ifdef PCI_IOV
4923 	/*
4924 	 * SR-IOV VFs don't implement the VID or DID registers, so we have to
4925 	 * emulate them here.
4926 	 */
4927 	if (cfg->flags & PCICFG_VF) {
4928 		if (reg == PCIR_VENDOR) {
4929 			switch (width) {
4930 			case 4:
4931 				return (cfg->device << 16 | cfg->vendor);
4932 			case 2:
4933 				return (cfg->vendor);
4934 			case 1:
4935 				return (cfg->vendor & 0xff);
4936 			default:
4937 				return (0xffffffff);
4938 			}
4939 		} else if (reg == PCIR_DEVICE) {
4940 			switch (width) {
4941 			/* Note that an unaligned 4-byte read is an error. */
4942 			case 2:
4943 				return (cfg->device);
4944 			case 1:
4945 				return (cfg->device & 0xff);
4946 			default:
4947 				return (0xffffffff);
4948 			}
4949 		}
4950 	}
4951 #endif
4952 
4953 	return (PCIB_READ_CONFIG(device_get_parent(dev),
4954 	    cfg->bus, cfg->slot, cfg->func, reg, width));
4955 }
4956 
4957 void
4958 pci_write_config_method(device_t dev, device_t child, int reg,
4959     uint32_t val, int width)
4960 {
4961 	struct pci_devinfo *dinfo = device_get_ivars(child);
4962 	pcicfgregs *cfg = &dinfo->cfg;
4963 
4964 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4965 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4966 }
4967 
4968 int
4969 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4970     size_t buflen)
4971 {
4972 
4973 	snprintf(buf, buflen, "pci%d:%d:%d:%d", pci_get_domain(child),
4974 	    pci_get_bus(child), pci_get_slot(child), pci_get_function(child));
4975 	return (0);
4976 }
4977 
4978 int
4979 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4980     size_t buflen)
4981 {
4982 	struct pci_devinfo *dinfo;
4983 	pcicfgregs *cfg;
4984 
4985 	dinfo = device_get_ivars(child);
4986 	cfg = &dinfo->cfg;
4987 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4988 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4989 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4990 	    cfg->progif);
4991 	return (0);
4992 }
4993 
4994 int
4995 pci_assign_interrupt_method(device_t dev, device_t child)
4996 {
4997 	struct pci_devinfo *dinfo = device_get_ivars(child);
4998 	pcicfgregs *cfg = &dinfo->cfg;
4999 
5000 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
5001 	    cfg->intpin));
5002 }
5003 
5004 static void
5005 pci_lookup(void *arg, const char *name, device_t *dev)
5006 {
5007 	long val;
5008 	char *end;
5009 	int domain, bus, slot, func;
5010 
5011 	if (*dev != NULL)
5012 		return;
5013 
5014 	/*
5015 	 * Accept pciconf-style selectors of either pciD:B:S:F or
5016 	 * pciB:S:F.  In the latter case, the domain is assumed to
5017 	 * be zero.
5018 	 */
5019 	if (strncmp(name, "pci", 3) != 0)
5020 		return;
5021 	val = strtol(name + 3, &end, 10);
5022 	if (val < 0 || val > INT_MAX || *end != ':')
5023 		return;
5024 	domain = val;
5025 	val = strtol(end + 1, &end, 10);
5026 	if (val < 0 || val > INT_MAX || *end != ':')
5027 		return;
5028 	bus = val;
5029 	val = strtol(end + 1, &end, 10);
5030 	if (val < 0 || val > INT_MAX)
5031 		return;
5032 	slot = val;
5033 	if (*end == ':') {
5034 		val = strtol(end + 1, &end, 10);
5035 		if (val < 0 || val > INT_MAX || *end != '\0')
5036 			return;
5037 		func = val;
5038 	} else if (*end == '\0') {
5039 		func = slot;
5040 		slot = bus;
5041 		bus = domain;
5042 		domain = 0;
5043 	} else
5044 		return;
5045 
5046 	if (domain > PCI_DOMAINMAX || bus > PCI_BUSMAX || slot > PCI_SLOTMAX ||
5047 	    func > PCIE_ARI_FUNCMAX || (slot != 0 && func > PCI_FUNCMAX))
5048 		return;
5049 
5050 	*dev = pci_find_dbsf(domain, bus, slot, func);
5051 }
5052 
5053 static int
5054 pci_modevent(module_t mod, int what, void *arg)
5055 {
5056 	static struct cdev *pci_cdev;
5057 	static eventhandler_tag tag;
5058 
5059 	switch (what) {
5060 	case MOD_LOAD:
5061 		STAILQ_INIT(&pci_devq);
5062 		pci_generation = 0;
5063 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
5064 		    "pci");
5065 		pci_load_vendor_data();
5066 		tag = EVENTHANDLER_REGISTER(dev_lookup, pci_lookup, NULL,
5067 		    1000);
5068 		break;
5069 
5070 	case MOD_UNLOAD:
5071 		if (tag != NULL)
5072 			EVENTHANDLER_DEREGISTER(dev_lookup, tag);
5073 		destroy_dev(pci_cdev);
5074 		break;
5075 	}
5076 
5077 	return (0);
5078 }
5079 
5080 static void
5081 pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
5082 {
5083 #define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
5084 	struct pcicfg_pcie *cfg;
5085 	int version, pos;
5086 
5087 	cfg = &dinfo->cfg.pcie;
5088 	pos = cfg->pcie_location;
5089 
5090 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5091 
5092 	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
5093 
5094 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5095 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5096 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5097 		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
5098 
5099 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5100 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5101 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5102 		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
5103 
5104 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5105 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5106 		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
5107 
5108 	if (version > 1) {
5109 		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
5110 		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
5111 		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
5112 	}
5113 #undef WREG
5114 }
5115 
5116 static void
5117 pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
5118 {
5119 	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
5120 	    dinfo->cfg.pcix.pcix_command,  2);
5121 }
5122 
5123 void
5124 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
5125 {
5126 
5127 	/*
5128 	 * Only do header type 0 devices.  Type 1 devices are bridges,
5129 	 * which we know need special treatment.  Type 2 devices are
5130 	 * cardbus bridges which also require special treatment.
5131 	 * Other types are unknown, and we err on the side of safety
5132 	 * by ignoring them.
5133 	 */
5134 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
5135 		return;
5136 
5137 	/*
5138 	 * Restore the device to full power mode.  We must do this
5139 	 * before we restore the registers because moving from D3 to
5140 	 * D0 will cause the chip's BARs and some other registers to
5141 	 * be reset to some unknown power on reset values.  Cut down
5142 	 * the noise on boot by doing nothing if we are already in
5143 	 * state D0.
5144 	 */
5145 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
5146 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5147 	pci_restore_bars(dev);
5148 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
5149 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
5150 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
5151 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
5152 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
5153 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
5154 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
5155 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
5156 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
5157 
5158 	/*
5159 	 * Restore extended capabilities for PCI-Express and PCI-X
5160 	 */
5161 	if (dinfo->cfg.pcie.pcie_location != 0)
5162 		pci_cfg_restore_pcie(dev, dinfo);
5163 	if (dinfo->cfg.pcix.pcix_location != 0)
5164 		pci_cfg_restore_pcix(dev, dinfo);
5165 
5166 	/* Restore MSI and MSI-X configurations if they are present. */
5167 	if (dinfo->cfg.msi.msi_location != 0)
5168 		pci_resume_msi(dev);
5169 	if (dinfo->cfg.msix.msix_location != 0)
5170 		pci_resume_msix(dev);
5171 }
5172 
5173 static void
5174 pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
5175 {
5176 #define	RREG(n)	pci_read_config(dev, pos + (n), 2)
5177 	struct pcicfg_pcie *cfg;
5178 	int version, pos;
5179 
5180 	cfg = &dinfo->cfg.pcie;
5181 	pos = cfg->pcie_location;
5182 
5183 	cfg->pcie_flags = RREG(PCIER_FLAGS);
5184 
5185 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5186 
5187 	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
5188 
5189 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5190 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5191 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5192 		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
5193 
5194 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5195 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5196 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5197 		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
5198 
5199 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5200 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5201 		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
5202 
5203 	if (version > 1) {
5204 		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
5205 		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
5206 		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
5207 	}
5208 #undef RREG
5209 }
5210 
5211 static void
5212 pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
5213 {
5214 	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
5215 	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
5216 }
5217 
5218 void
5219 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
5220 {
5221 	uint32_t cls;
5222 	int ps;
5223 
5224 	/*
5225 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
5226 	 * we know need special treatment.  Type 2 devices are cardbus bridges
5227 	 * which also require special treatment.  Other types are unknown, and
5228 	 * we err on the side of safety by ignoring them.  Powering down
5229 	 * bridges should not be undertaken lightly.
5230 	 */
5231 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
5232 		return;
5233 
5234 	/*
5235 	 * Some drivers apparently write to these registers w/o updating our
5236 	 * cached copy.  No harm happens if we update the copy, so do so here
5237 	 * so we can restore them.  The COMMAND register is modified by the
5238 	 * bus w/o updating the cache.  This should represent the normally
5239 	 * writable portion of the 'defined' part of type 0 headers.  In
5240 	 * theory we also need to save/restore the PCI capability structures
5241 	 * we know about, but apart from power we don't know any that are
5242 	 * writable.
5243 	 */
5244 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
5245 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
5246 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
5247 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
5248 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
5249 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
5250 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
5251 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
5252 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
5253 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
5254 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
5255 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
5256 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
5257 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
5258 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
5259 
5260 	if (dinfo->cfg.pcie.pcie_location != 0)
5261 		pci_cfg_save_pcie(dev, dinfo);
5262 
5263 	if (dinfo->cfg.pcix.pcix_location != 0)
5264 		pci_cfg_save_pcix(dev, dinfo);
5265 
5266 	/*
5267 	 * don't set the state for display devices, base peripherals and
5268 	 * memory devices since bad things happen when they are powered down.
5269 	 * We should (a) have drivers that can easily detach and (b) use
5270 	 * generic drivers for these devices so that some device actually
5271 	 * attaches.  We need to make sure that when we implement (a) we don't
5272 	 * power the device down on a reattach.
5273 	 */
5274 	cls = pci_get_class(dev);
5275 	if (!setstate)
5276 		return;
5277 	switch (pci_do_power_nodriver)
5278 	{
5279 		case 0:		/* NO powerdown at all */
5280 			return;
5281 		case 1:		/* Conservative about what to power down */
5282 			if (cls == PCIC_STORAGE)
5283 				return;
5284 			/*FALLTHROUGH*/
5285 		case 2:		/* Agressive about what to power down */
5286 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
5287 			    cls == PCIC_BASEPERIPH)
5288 				return;
5289 			/*FALLTHROUGH*/
5290 		case 3:		/* Power down everything */
5291 			break;
5292 	}
5293 	/*
5294 	 * PCI spec says we can only go into D3 state from D0 state.
5295 	 * Transition from D[12] into D0 before going to D3 state.
5296 	 */
5297 	ps = pci_get_powerstate(dev);
5298 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
5299 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5300 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
5301 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
5302 }
5303 
5304 /* Wrapper APIs suitable for device driver use. */
5305 void
5306 pci_save_state(device_t dev)
5307 {
5308 	struct pci_devinfo *dinfo;
5309 
5310 	dinfo = device_get_ivars(dev);
5311 	pci_cfg_save(dev, dinfo, 0);
5312 }
5313 
5314 void
5315 pci_restore_state(device_t dev)
5316 {
5317 	struct pci_devinfo *dinfo;
5318 
5319 	dinfo = device_get_ivars(dev);
5320 	pci_cfg_restore(dev, dinfo);
5321 }
5322 
5323 static uint16_t
5324 pci_get_rid_method(device_t dev, device_t child)
5325 {
5326 
5327 	return (PCIB_GET_RID(device_get_parent(dev), child));
5328 }
5329