xref: /freebsd/sys/dev/pci/pci.c (revision f5e9c916afed4a948fe5c03bfaee038d165e12ab)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/limits.h>
39 #include <sys/linker.h>
40 #include <sys/fcntl.h>
41 #include <sys/conf.h>
42 #include <sys/kernel.h>
43 #include <sys/queue.h>
44 #include <sys/sysctl.h>
45 #include <sys/endian.h>
46 
47 #include <vm/vm.h>
48 #include <vm/pmap.h>
49 #include <vm/vm_extern.h>
50 
51 #include <sys/bus.h>
52 #include <machine/bus.h>
53 #include <sys/rman.h>
54 #include <machine/resource.h>
55 #include <machine/stdarg.h>
56 
57 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
58 #include <machine/intr_machdep.h>
59 #endif
60 
61 #include <sys/pciio.h>
62 #include <dev/pci/pcireg.h>
63 #include <dev/pci/pcivar.h>
64 #include <dev/pci/pci_private.h>
65 
66 #ifdef PCI_IOV
67 #include <sys/nv.h>
68 #include <dev/pci/pci_iov_private.h>
69 #endif
70 
71 #include <dev/usb/controller/xhcireg.h>
72 #include <dev/usb/controller/ehcireg.h>
73 #include <dev/usb/controller/ohcireg.h>
74 #include <dev/usb/controller/uhcireg.h>
75 
76 #include "pcib_if.h"
77 #include "pci_if.h"
78 
79 #define	PCIR_IS_BIOS(cfg, reg)						\
80 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
81 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
82 
83 static int		pci_has_quirk(uint32_t devid, int quirk);
84 static pci_addr_t	pci_mapbase(uint64_t mapreg);
85 static const char	*pci_maptype(uint64_t mapreg);
86 static int		pci_maprange(uint64_t mapreg);
87 static pci_addr_t	pci_rombase(uint64_t mapreg);
88 static int		pci_romsize(uint64_t testval);
89 static void		pci_fixancient(pcicfgregs *cfg);
90 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
91 
92 static int		pci_porten(device_t dev);
93 static int		pci_memen(device_t dev);
94 static void		pci_assign_interrupt(device_t bus, device_t dev,
95 			    int force_route);
96 static int		pci_add_map(device_t bus, device_t dev, int reg,
97 			    struct resource_list *rl, int force, int prefetch);
98 static int		pci_probe(device_t dev);
99 static int		pci_attach(device_t dev);
100 #ifdef PCI_RES_BUS
101 static int		pci_detach(device_t dev);
102 #endif
103 static void		pci_load_vendor_data(void);
104 static int		pci_describe_parse_line(char **ptr, int *vendor,
105 			    int *device, char **desc);
106 static char		*pci_describe_device(device_t dev);
107 static int		pci_modevent(module_t mod, int what, void *arg);
108 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
109 			    pcicfgregs *cfg);
110 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
111 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
112 			    int reg, uint32_t *data);
113 #if 0
114 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
115 			    int reg, uint32_t data);
116 #endif
117 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
118 static void		pci_mask_msix(device_t dev, u_int index);
119 static void		pci_unmask_msix(device_t dev, u_int index);
120 static int		pci_msi_blacklisted(void);
121 static int		pci_msix_blacklisted(void);
122 static void		pci_resume_msi(device_t dev);
123 static void		pci_resume_msix(device_t dev);
124 static int		pci_remap_intr_method(device_t bus, device_t dev,
125 			    u_int irq);
126 
127 static uint16_t		pci_get_rid_method(device_t dev, device_t child);
128 
129 static struct pci_devinfo * pci_fill_devinfo(device_t pcib, int d, int b, int s,
130     int f, uint16_t vid, uint16_t did, size_t size);
131 
132 static device_method_t pci_methods[] = {
133 	/* Device interface */
134 	DEVMETHOD(device_probe,		pci_probe),
135 	DEVMETHOD(device_attach,	pci_attach),
136 #ifdef PCI_RES_BUS
137 	DEVMETHOD(device_detach,	pci_detach),
138 #else
139 	DEVMETHOD(device_detach,	bus_generic_detach),
140 #endif
141 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
142 	DEVMETHOD(device_suspend,	bus_generic_suspend),
143 	DEVMETHOD(device_resume,	pci_resume),
144 
145 	/* Bus interface */
146 	DEVMETHOD(bus_print_child,	pci_print_child),
147 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
148 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
149 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
150 	DEVMETHOD(bus_driver_added,	pci_driver_added),
151 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
152 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
153 
154 	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
155 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
156 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
157 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
158 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
159 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
160 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
161 	DEVMETHOD(bus_release_resource,	pci_release_resource),
162 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
163 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
164 	DEVMETHOD(bus_child_detached,	pci_child_detached),
165 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
166 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
167 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
168 	DEVMETHOD(bus_suspend_child,	pci_suspend_child),
169 	DEVMETHOD(bus_resume_child,	pci_resume_child),
170 
171 	/* PCI interface */
172 	DEVMETHOD(pci_read_config,	pci_read_config_method),
173 	DEVMETHOD(pci_write_config,	pci_write_config_method),
174 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
175 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
176 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
177 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
178 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
179 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
180 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
181 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
182 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
183 	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
184 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
185 	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
186 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
187 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
188 	DEVMETHOD(pci_enable_msi,	pci_enable_msi_method),
189 	DEVMETHOD(pci_enable_msix,	pci_enable_msix_method),
190 	DEVMETHOD(pci_disable_msi,	pci_disable_msi_method),
191 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
192 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
193 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
194 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
195 	DEVMETHOD(pci_msix_pba_bar,	pci_msix_pba_bar_method),
196 	DEVMETHOD(pci_msix_table_bar,	pci_msix_table_bar_method),
197 	DEVMETHOD(pci_get_rid,		pci_get_rid_method),
198 	DEVMETHOD(pci_child_added,	pci_child_added_method),
199 #ifdef PCI_IOV
200 	DEVMETHOD(pci_iov_attach,	pci_iov_attach_method),
201 	DEVMETHOD(pci_iov_detach,	pci_iov_detach_method),
202 	DEVMETHOD(pci_create_iov_child,	pci_create_iov_child_method),
203 #endif
204 
205 	DEVMETHOD_END
206 };
207 
208 DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
209 
210 static devclass_t pci_devclass;
211 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
212 MODULE_VERSION(pci, 1);
213 
214 static char	*pci_vendordata;
215 static size_t	pci_vendordata_size;
216 
217 struct pci_quirk {
218 	uint32_t devid;	/* Vendor/device of the card */
219 	int	type;
220 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
221 #define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
222 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
223 #define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
224 #define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
225 #define	PCI_QUIRK_MSI_INTX_BUG	6 /* PCIM_CMD_INTxDIS disables MSI */
226 	int	arg1;
227 	int	arg2;
228 };
229 
230 static const struct pci_quirk pci_quirks[] = {
231 	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
232 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
233 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
234 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
235 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
236 
237 	/*
238 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
239 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
240 	 */
241 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
242 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
243 
244 	/*
245 	 * MSI doesn't work on earlier Intel chipsets including
246 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
247 	 */
248 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
249 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
250 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
251 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
252 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
253 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
254 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
255 
256 	/*
257 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
258 	 * bridge.
259 	 */
260 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
261 
262 	/*
263 	 * MSI-X allocation doesn't work properly for devices passed through
264 	 * by VMware up to at least ESXi 5.1.
265 	 */
266 	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
267 	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
268 
269 	/*
270 	 * Some virtualization environments emulate an older chipset
271 	 * but support MSI just fine.  QEMU uses the Intel 82440.
272 	 */
273 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
274 
275 	/*
276 	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
277 	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
278 	 * It prevents us from attaching hpet(4) when the bit is unset.
279 	 * Note this quirk only affects SB600 revision A13 and earlier.
280 	 * For SB600 A21 and later, firmware must set the bit to hide it.
281 	 * For SB700 and later, it is unused and hardcoded to zero.
282 	 */
283 	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
284 
285 	/*
286 	 * Atheros AR8161/AR8162/E2200 Ethernet controllers have a bug that
287 	 * MSI interrupt does not assert if PCIM_CMD_INTxDIS bit of the
288 	 * command register is set.
289 	 */
290 	{ 0x10911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
291 	{ 0xE0911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
292 	{ 0x10901969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
293 
294 	/*
295 	 * Broadcom BCM5714(S)/BCM5715(S)/BCM5780(S) Ethernet MACs don't
296 	 * issue MSI interrupts with PCIM_CMD_INTxDIS set either.
297 	 */
298 	{ 0x166814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714 */
299 	{ 0x166914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714S */
300 	{ 0x166a14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780 */
301 	{ 0x166b14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780S */
302 	{ 0x167814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715 */
303 	{ 0x167914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715S */
304 
305 	{ 0 }
306 };
307 
308 /* map register information */
309 #define	PCI_MAPMEM	0x01	/* memory map */
310 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
311 #define	PCI_MAPPORT	0x04	/* port map */
312 
313 struct devlist pci_devq;
314 uint32_t pci_generation;
315 uint32_t pci_numdevs = 0;
316 static int pcie_chipset, pcix_chipset;
317 
318 /* sysctl vars */
319 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
320 
321 static int pci_enable_io_modes = 1;
322 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RWTUN,
323     &pci_enable_io_modes, 1,
324     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
325 enable these bits correctly.  We'd like to do this all the time, but there\n\
326 are some peripherals that this causes problems with.");
327 
328 static int pci_do_realloc_bars = 0;
329 SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RWTUN,
330     &pci_do_realloc_bars, 0,
331     "Attempt to allocate a new range for any BARs whose original "
332     "firmware-assigned ranges fail to allocate during the initial device scan.");
333 
334 static int pci_do_power_nodriver = 0;
335 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RWTUN,
336     &pci_do_power_nodriver, 0,
337   "Place a function into D3 state when no driver attaches to it.  0 means\n\
338 disable.  1 means conservatively place devices into D3 state.  2 means\n\
339 agressively place devices into D3 state.  3 means put absolutely everything\n\
340 in D3 state.");
341 
342 int pci_do_power_resume = 1;
343 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RWTUN,
344     &pci_do_power_resume, 1,
345   "Transition from D3 -> D0 on resume.");
346 
347 int pci_do_power_suspend = 1;
348 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RWTUN,
349     &pci_do_power_suspend, 1,
350   "Transition from D0 -> D3 on suspend.");
351 
352 static int pci_do_msi = 1;
353 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RWTUN, &pci_do_msi, 1,
354     "Enable support for MSI interrupts");
355 
356 static int pci_do_msix = 1;
357 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RWTUN, &pci_do_msix, 1,
358     "Enable support for MSI-X interrupts");
359 
360 static int pci_honor_msi_blacklist = 1;
361 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RDTUN,
362     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
363 
364 #if defined(__i386__) || defined(__amd64__)
365 static int pci_usb_takeover = 1;
366 #else
367 static int pci_usb_takeover = 0;
368 #endif
369 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
370     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
371 Disable this if you depend on BIOS emulation of USB devices, that is\n\
372 you use USB devices (like keyboard or mouse) but do not load USB drivers");
373 
374 static int pci_clear_bars;
375 SYSCTL_INT(_hw_pci, OID_AUTO, clear_bars, CTLFLAG_RDTUN, &pci_clear_bars, 0,
376     "Ignore firmware-assigned resources for BARs.");
377 
378 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
379 static int pci_clear_buses;
380 SYSCTL_INT(_hw_pci, OID_AUTO, clear_buses, CTLFLAG_RDTUN, &pci_clear_buses, 0,
381     "Ignore firmware-assigned bus numbers.");
382 #endif
383 
384 static int pci_enable_ari = 1;
385 SYSCTL_INT(_hw_pci, OID_AUTO, enable_ari, CTLFLAG_RDTUN, &pci_enable_ari,
386     0, "Enable support for PCIe Alternative RID Interpretation");
387 
388 static int
389 pci_has_quirk(uint32_t devid, int quirk)
390 {
391 	const struct pci_quirk *q;
392 
393 	for (q = &pci_quirks[0]; q->devid; q++) {
394 		if (q->devid == devid && q->type == quirk)
395 			return (1);
396 	}
397 	return (0);
398 }
399 
400 /* Find a device_t by bus/slot/function in domain 0 */
401 
402 device_t
403 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
404 {
405 
406 	return (pci_find_dbsf(0, bus, slot, func));
407 }
408 
409 /* Find a device_t by domain/bus/slot/function */
410 
411 device_t
412 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
413 {
414 	struct pci_devinfo *dinfo;
415 
416 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
417 		if ((dinfo->cfg.domain == domain) &&
418 		    (dinfo->cfg.bus == bus) &&
419 		    (dinfo->cfg.slot == slot) &&
420 		    (dinfo->cfg.func == func)) {
421 			return (dinfo->cfg.dev);
422 		}
423 	}
424 
425 	return (NULL);
426 }
427 
428 /* Find a device_t by vendor/device ID */
429 
430 device_t
431 pci_find_device(uint16_t vendor, uint16_t device)
432 {
433 	struct pci_devinfo *dinfo;
434 
435 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
436 		if ((dinfo->cfg.vendor == vendor) &&
437 		    (dinfo->cfg.device == device)) {
438 			return (dinfo->cfg.dev);
439 		}
440 	}
441 
442 	return (NULL);
443 }
444 
445 device_t
446 pci_find_class(uint8_t class, uint8_t subclass)
447 {
448 	struct pci_devinfo *dinfo;
449 
450 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
451 		if (dinfo->cfg.baseclass == class &&
452 		    dinfo->cfg.subclass == subclass) {
453 			return (dinfo->cfg.dev);
454 		}
455 	}
456 
457 	return (NULL);
458 }
459 
460 static int
461 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
462 {
463 	va_list ap;
464 	int retval;
465 
466 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
467 	    cfg->func);
468 	va_start(ap, fmt);
469 	retval += vprintf(fmt, ap);
470 	va_end(ap);
471 	return (retval);
472 }
473 
474 /* return base address of memory or port map */
475 
476 static pci_addr_t
477 pci_mapbase(uint64_t mapreg)
478 {
479 
480 	if (PCI_BAR_MEM(mapreg))
481 		return (mapreg & PCIM_BAR_MEM_BASE);
482 	else
483 		return (mapreg & PCIM_BAR_IO_BASE);
484 }
485 
486 /* return map type of memory or port map */
487 
488 static const char *
489 pci_maptype(uint64_t mapreg)
490 {
491 
492 	if (PCI_BAR_IO(mapreg))
493 		return ("I/O Port");
494 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
495 		return ("Prefetchable Memory");
496 	return ("Memory");
497 }
498 
499 /* return log2 of map size decoded for memory or port map */
500 
501 int
502 pci_mapsize(uint64_t testval)
503 {
504 	int ln2size;
505 
506 	testval = pci_mapbase(testval);
507 	ln2size = 0;
508 	if (testval != 0) {
509 		while ((testval & 1) == 0)
510 		{
511 			ln2size++;
512 			testval >>= 1;
513 		}
514 	}
515 	return (ln2size);
516 }
517 
518 /* return base address of device ROM */
519 
520 static pci_addr_t
521 pci_rombase(uint64_t mapreg)
522 {
523 
524 	return (mapreg & PCIM_BIOS_ADDR_MASK);
525 }
526 
527 /* return log2 of map size decided for device ROM */
528 
529 static int
530 pci_romsize(uint64_t testval)
531 {
532 	int ln2size;
533 
534 	testval = pci_rombase(testval);
535 	ln2size = 0;
536 	if (testval != 0) {
537 		while ((testval & 1) == 0)
538 		{
539 			ln2size++;
540 			testval >>= 1;
541 		}
542 	}
543 	return (ln2size);
544 }
545 
546 /* return log2 of address range supported by map register */
547 
548 static int
549 pci_maprange(uint64_t mapreg)
550 {
551 	int ln2range = 0;
552 
553 	if (PCI_BAR_IO(mapreg))
554 		ln2range = 32;
555 	else
556 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
557 		case PCIM_BAR_MEM_32:
558 			ln2range = 32;
559 			break;
560 		case PCIM_BAR_MEM_1MB:
561 			ln2range = 20;
562 			break;
563 		case PCIM_BAR_MEM_64:
564 			ln2range = 64;
565 			break;
566 		}
567 	return (ln2range);
568 }
569 
570 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
571 
572 static void
573 pci_fixancient(pcicfgregs *cfg)
574 {
575 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
576 		return;
577 
578 	/* PCI to PCI bridges use header type 1 */
579 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
580 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
581 }
582 
583 /* extract header type specific config data */
584 
585 static void
586 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
587 {
588 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
589 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
590 	case PCIM_HDRTYPE_NORMAL:
591 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
592 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
593 		cfg->mingnt         = REG(PCIR_MINGNT, 1);
594 		cfg->maxlat         = REG(PCIR_MAXLAT, 1);
595 		cfg->nummaps	    = PCI_MAXMAPS_0;
596 		break;
597 	case PCIM_HDRTYPE_BRIDGE:
598 		cfg->bridge.br_seclat = REG(PCIR_SECLAT_1, 1);
599 		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_1, 1);
600 		cfg->bridge.br_secbus = REG(PCIR_SECBUS_1, 1);
601 		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_1, 1);
602 		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_1, 2);
603 		cfg->nummaps	    = PCI_MAXMAPS_1;
604 		break;
605 	case PCIM_HDRTYPE_CARDBUS:
606 		cfg->bridge.br_seclat = REG(PCIR_SECLAT_2, 1);
607 		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_2, 1);
608 		cfg->bridge.br_secbus = REG(PCIR_SECBUS_2, 1);
609 		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_2, 1);
610 		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_2, 2);
611 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
612 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
613 		cfg->nummaps	    = PCI_MAXMAPS_2;
614 		break;
615 	}
616 #undef REG
617 }
618 
619 /* read configuration header into pcicfgregs structure */
620 struct pci_devinfo *
621 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
622 {
623 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
624 	uint16_t vid, did;
625 
626 	vid = REG(PCIR_VENDOR, 2);
627 	did = REG(PCIR_DEVICE, 2);
628 	if (vid != 0xffff)
629 		return (pci_fill_devinfo(pcib, d, b, s, f, vid, did, size));
630 
631 	return (NULL);
632 }
633 
634 static struct pci_devinfo *
635 pci_fill_devinfo(device_t pcib, int d, int b, int s, int f, uint16_t vid,
636     uint16_t did, size_t size)
637 {
638 	struct pci_devinfo *devlist_entry;
639 	pcicfgregs *cfg;
640 
641 	devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
642 
643 	cfg = &devlist_entry->cfg;
644 
645 	cfg->domain		= d;
646 	cfg->bus		= b;
647 	cfg->slot		= s;
648 	cfg->func		= f;
649 	cfg->vendor		= vid;
650 	cfg->device		= did;
651 	cfg->cmdreg		= REG(PCIR_COMMAND, 2);
652 	cfg->statreg		= REG(PCIR_STATUS, 2);
653 	cfg->baseclass		= REG(PCIR_CLASS, 1);
654 	cfg->subclass		= REG(PCIR_SUBCLASS, 1);
655 	cfg->progif		= REG(PCIR_PROGIF, 1);
656 	cfg->revid		= REG(PCIR_REVID, 1);
657 	cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
658 	cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
659 	cfg->lattimer		= REG(PCIR_LATTIMER, 1);
660 	cfg->intpin		= REG(PCIR_INTPIN, 1);
661 	cfg->intline		= REG(PCIR_INTLINE, 1);
662 
663 	cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
664 	cfg->hdrtype		&= ~PCIM_MFDEV;
665 	STAILQ_INIT(&cfg->maps);
666 
667 	cfg->devinfo_size	= size;
668 	cfg->iov		= NULL;
669 
670 	pci_fixancient(cfg);
671 	pci_hdrtypedata(pcib, b, s, f, cfg);
672 
673 	if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
674 		pci_read_cap(pcib, cfg);
675 
676 	STAILQ_INSERT_TAIL(&pci_devq, devlist_entry, pci_links);
677 
678 	devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
679 	devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
680 	devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
681 	devlist_entry->conf.pc_sel.pc_func = cfg->func;
682 	devlist_entry->conf.pc_hdr = cfg->hdrtype;
683 
684 	devlist_entry->conf.pc_subvendor = cfg->subvendor;
685 	devlist_entry->conf.pc_subdevice = cfg->subdevice;
686 	devlist_entry->conf.pc_vendor = cfg->vendor;
687 	devlist_entry->conf.pc_device = cfg->device;
688 
689 	devlist_entry->conf.pc_class = cfg->baseclass;
690 	devlist_entry->conf.pc_subclass = cfg->subclass;
691 	devlist_entry->conf.pc_progif = cfg->progif;
692 	devlist_entry->conf.pc_revid = cfg->revid;
693 
694 	pci_numdevs++;
695 	pci_generation++;
696 
697 	return (devlist_entry);
698 }
699 #undef REG
700 
701 static void
702 pci_ea_fill_info(device_t pcib, pcicfgregs *cfg)
703 {
704 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, \
705     cfg->ea.ea_location + (n), w)
706 	int num_ent;
707 	int ptr;
708 	int a, b;
709 	uint32_t val;
710 	int ent_size;
711 	uint32_t dw[4];
712 	uint64_t base, max_offset;
713 	struct pci_ea_entry *eae;
714 
715 	if (cfg->ea.ea_location == 0)
716 		return;
717 
718 	STAILQ_INIT(&cfg->ea.ea_entries);
719 
720 	/* Determine the number of entries */
721 	num_ent = REG(PCIR_EA_NUM_ENT, 2);
722 	num_ent &= PCIM_EA_NUM_ENT_MASK;
723 
724 	/* Find the first entry to care of */
725 	ptr = PCIR_EA_FIRST_ENT;
726 
727 	/* Skip DWORD 2 for type 1 functions */
728 	if ((cfg->hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_BRIDGE)
729 		ptr += 4;
730 
731 	for (a = 0; a < num_ent; a++) {
732 
733 		eae = malloc(sizeof(*eae), M_DEVBUF, M_WAITOK | M_ZERO);
734 		eae->eae_cfg_offset = cfg->ea.ea_location + ptr;
735 
736 		/* Read a number of dwords in the entry */
737 		val = REG(ptr, 4);
738 		ptr += 4;
739 		ent_size = (val & PCIM_EA_ES);
740 
741 		for (b = 0; b < ent_size; b++) {
742 			dw[b] = REG(ptr, 4);
743 			ptr += 4;
744 		}
745 
746 		eae->eae_flags = val;
747 		eae->eae_bei = (PCIM_EA_BEI & val) >> PCIM_EA_BEI_OFFSET;
748 
749 		base = dw[0] & PCIM_EA_FIELD_MASK;
750 		max_offset = dw[1] | ~PCIM_EA_FIELD_MASK;
751 		b = 2;
752 		if (((dw[0] & PCIM_EA_IS_64) != 0) && (b < ent_size)) {
753 			base |= (uint64_t)dw[b] << 32UL;
754 			b++;
755 		}
756 		if (((dw[1] & PCIM_EA_IS_64) != 0)
757 		    && (b < ent_size)) {
758 			max_offset |= (uint64_t)dw[b] << 32UL;
759 			b++;
760 		}
761 
762 		eae->eae_base = base;
763 		eae->eae_max_offset = max_offset;
764 
765 		STAILQ_INSERT_TAIL(&cfg->ea.ea_entries, eae, eae_link);
766 
767 		if (bootverbose) {
768 			printf("PCI(EA) dev %04x:%04x, bei %d, flags #%x, base #%jx, max_offset #%jx\n",
769 			    cfg->vendor, cfg->device, eae->eae_bei, eae->eae_flags,
770 			    (uintmax_t)eae->eae_base, (uintmax_t)eae->eae_max_offset);
771 		}
772 	}
773 }
774 #undef REG
775 
776 static void
777 pci_read_cap(device_t pcib, pcicfgregs *cfg)
778 {
779 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
780 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
781 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
782 	uint64_t addr;
783 #endif
784 	uint32_t val;
785 	int	ptr, nextptr, ptrptr;
786 
787 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
788 	case PCIM_HDRTYPE_NORMAL:
789 	case PCIM_HDRTYPE_BRIDGE:
790 		ptrptr = PCIR_CAP_PTR;
791 		break;
792 	case PCIM_HDRTYPE_CARDBUS:
793 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
794 		break;
795 	default:
796 		return;		/* no extended capabilities support */
797 	}
798 	nextptr = REG(ptrptr, 1);	/* sanity check? */
799 
800 	/*
801 	 * Read capability entries.
802 	 */
803 	while (nextptr != 0) {
804 		/* Sanity check */
805 		if (nextptr > 255) {
806 			printf("illegal PCI extended capability offset %d\n",
807 			    nextptr);
808 			return;
809 		}
810 		/* Find the next entry */
811 		ptr = nextptr;
812 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
813 
814 		/* Process this entry */
815 		switch (REG(ptr + PCICAP_ID, 1)) {
816 		case PCIY_PMG:		/* PCI power management */
817 			if (cfg->pp.pp_cap == 0) {
818 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
819 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
820 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
821 				if ((nextptr - ptr) > PCIR_POWER_DATA)
822 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
823 			}
824 			break;
825 		case PCIY_HT:		/* HyperTransport */
826 			/* Determine HT-specific capability type. */
827 			val = REG(ptr + PCIR_HT_COMMAND, 2);
828 
829 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
830 				cfg->ht.ht_slave = ptr;
831 
832 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
833 			switch (val & PCIM_HTCMD_CAP_MASK) {
834 			case PCIM_HTCAP_MSI_MAPPING:
835 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
836 					/* Sanity check the mapping window. */
837 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
838 					    4);
839 					addr <<= 32;
840 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
841 					    4);
842 					if (addr != MSI_INTEL_ADDR_BASE)
843 						device_printf(pcib,
844 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
845 						    cfg->domain, cfg->bus,
846 						    cfg->slot, cfg->func,
847 						    (long long)addr);
848 				} else
849 					addr = MSI_INTEL_ADDR_BASE;
850 
851 				cfg->ht.ht_msimap = ptr;
852 				cfg->ht.ht_msictrl = val;
853 				cfg->ht.ht_msiaddr = addr;
854 				break;
855 			}
856 #endif
857 			break;
858 		case PCIY_MSI:		/* PCI MSI */
859 			cfg->msi.msi_location = ptr;
860 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
861 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
862 						     PCIM_MSICTRL_MMC_MASK)>>1);
863 			break;
864 		case PCIY_MSIX:		/* PCI MSI-X */
865 			cfg->msix.msix_location = ptr;
866 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
867 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
868 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
869 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
870 			cfg->msix.msix_table_bar = PCIR_BAR(val &
871 			    PCIM_MSIX_BIR_MASK);
872 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
873 			val = REG(ptr + PCIR_MSIX_PBA, 4);
874 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
875 			    PCIM_MSIX_BIR_MASK);
876 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
877 			break;
878 		case PCIY_VPD:		/* PCI Vital Product Data */
879 			cfg->vpd.vpd_reg = ptr;
880 			break;
881 		case PCIY_SUBVENDOR:
882 			/* Should always be true. */
883 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
884 			    PCIM_HDRTYPE_BRIDGE) {
885 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
886 				cfg->subvendor = val & 0xffff;
887 				cfg->subdevice = val >> 16;
888 			}
889 			break;
890 		case PCIY_PCIX:		/* PCI-X */
891 			/*
892 			 * Assume we have a PCI-X chipset if we have
893 			 * at least one PCI-PCI bridge with a PCI-X
894 			 * capability.  Note that some systems with
895 			 * PCI-express or HT chipsets might match on
896 			 * this check as well.
897 			 */
898 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
899 			    PCIM_HDRTYPE_BRIDGE)
900 				pcix_chipset = 1;
901 			cfg->pcix.pcix_location = ptr;
902 			break;
903 		case PCIY_EXPRESS:	/* PCI-express */
904 			/*
905 			 * Assume we have a PCI-express chipset if we have
906 			 * at least one PCI-express device.
907 			 */
908 			pcie_chipset = 1;
909 			cfg->pcie.pcie_location = ptr;
910 			val = REG(ptr + PCIER_FLAGS, 2);
911 			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
912 			break;
913 		case PCIY_EA:		/* Enhanced Allocation */
914 			cfg->ea.ea_location = ptr;
915 			pci_ea_fill_info(pcib, cfg);
916 			break;
917 		default:
918 			break;
919 		}
920 	}
921 
922 #if defined(__powerpc__)
923 	/*
924 	 * Enable the MSI mapping window for all HyperTransport
925 	 * slaves.  PCI-PCI bridges have their windows enabled via
926 	 * PCIB_MAP_MSI().
927 	 */
928 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
929 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
930 		device_printf(pcib,
931 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
932 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
933 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
934 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
935 		     2);
936 	}
937 #endif
938 /* REG and WREG use carry through to next functions */
939 }
940 
941 /*
942  * PCI Vital Product Data
943  */
944 
945 #define	PCI_VPD_TIMEOUT		1000000
946 
947 static int
948 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
949 {
950 	int count = PCI_VPD_TIMEOUT;
951 
952 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
953 
954 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
955 
956 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
957 		if (--count < 0)
958 			return (ENXIO);
959 		DELAY(1);	/* limit looping */
960 	}
961 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
962 
963 	return (0);
964 }
965 
966 #if 0
967 static int
968 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
969 {
970 	int count = PCI_VPD_TIMEOUT;
971 
972 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
973 
974 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
975 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
976 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
977 		if (--count < 0)
978 			return (ENXIO);
979 		DELAY(1);	/* limit looping */
980 	}
981 
982 	return (0);
983 }
984 #endif
985 
986 #undef PCI_VPD_TIMEOUT
987 
988 struct vpd_readstate {
989 	device_t	pcib;
990 	pcicfgregs	*cfg;
991 	uint32_t	val;
992 	int		bytesinval;
993 	int		off;
994 	uint8_t		cksum;
995 };
996 
997 static int
998 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
999 {
1000 	uint32_t reg;
1001 	uint8_t byte;
1002 
1003 	if (vrs->bytesinval == 0) {
1004 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
1005 			return (ENXIO);
1006 		vrs->val = le32toh(reg);
1007 		vrs->off += 4;
1008 		byte = vrs->val & 0xff;
1009 		vrs->bytesinval = 3;
1010 	} else {
1011 		vrs->val = vrs->val >> 8;
1012 		byte = vrs->val & 0xff;
1013 		vrs->bytesinval--;
1014 	}
1015 
1016 	vrs->cksum += byte;
1017 	*data = byte;
1018 	return (0);
1019 }
1020 
1021 static void
1022 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
1023 {
1024 	struct vpd_readstate vrs;
1025 	int state;
1026 	int name;
1027 	int remain;
1028 	int i;
1029 	int alloc, off;		/* alloc/off for RO/W arrays */
1030 	int cksumvalid;
1031 	int dflen;
1032 	uint8_t byte;
1033 	uint8_t byte2;
1034 
1035 	/* init vpd reader */
1036 	vrs.bytesinval = 0;
1037 	vrs.off = 0;
1038 	vrs.pcib = pcib;
1039 	vrs.cfg = cfg;
1040 	vrs.cksum = 0;
1041 
1042 	state = 0;
1043 	name = remain = i = 0;	/* shut up stupid gcc */
1044 	alloc = off = 0;	/* shut up stupid gcc */
1045 	dflen = 0;		/* shut up stupid gcc */
1046 	cksumvalid = -1;
1047 	while (state >= 0) {
1048 		if (vpd_nextbyte(&vrs, &byte)) {
1049 			state = -2;
1050 			break;
1051 		}
1052 #if 0
1053 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
1054 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
1055 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
1056 #endif
1057 		switch (state) {
1058 		case 0:		/* item name */
1059 			if (byte & 0x80) {
1060 				if (vpd_nextbyte(&vrs, &byte2)) {
1061 					state = -2;
1062 					break;
1063 				}
1064 				remain = byte2;
1065 				if (vpd_nextbyte(&vrs, &byte2)) {
1066 					state = -2;
1067 					break;
1068 				}
1069 				remain |= byte2 << 8;
1070 				if (remain > (0x7f*4 - vrs.off)) {
1071 					state = -1;
1072 					pci_printf(cfg,
1073 					    "invalid VPD data, remain %#x\n",
1074 					    remain);
1075 				}
1076 				name = byte & 0x7f;
1077 			} else {
1078 				remain = byte & 0x7;
1079 				name = (byte >> 3) & 0xf;
1080 			}
1081 			switch (name) {
1082 			case 0x2:	/* String */
1083 				cfg->vpd.vpd_ident = malloc(remain + 1,
1084 				    M_DEVBUF, M_WAITOK);
1085 				i = 0;
1086 				state = 1;
1087 				break;
1088 			case 0xf:	/* End */
1089 				state = -1;
1090 				break;
1091 			case 0x10:	/* VPD-R */
1092 				alloc = 8;
1093 				off = 0;
1094 				cfg->vpd.vpd_ros = malloc(alloc *
1095 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1096 				    M_WAITOK | M_ZERO);
1097 				state = 2;
1098 				break;
1099 			case 0x11:	/* VPD-W */
1100 				alloc = 8;
1101 				off = 0;
1102 				cfg->vpd.vpd_w = malloc(alloc *
1103 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1104 				    M_WAITOK | M_ZERO);
1105 				state = 5;
1106 				break;
1107 			default:	/* Invalid data, abort */
1108 				state = -1;
1109 				break;
1110 			}
1111 			break;
1112 
1113 		case 1:	/* Identifier String */
1114 			cfg->vpd.vpd_ident[i++] = byte;
1115 			remain--;
1116 			if (remain == 0)  {
1117 				cfg->vpd.vpd_ident[i] = '\0';
1118 				state = 0;
1119 			}
1120 			break;
1121 
1122 		case 2:	/* VPD-R Keyword Header */
1123 			if (off == alloc) {
1124 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1125 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1126 				    M_DEVBUF, M_WAITOK | M_ZERO);
1127 			}
1128 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1129 			if (vpd_nextbyte(&vrs, &byte2)) {
1130 				state = -2;
1131 				break;
1132 			}
1133 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1134 			if (vpd_nextbyte(&vrs, &byte2)) {
1135 				state = -2;
1136 				break;
1137 			}
1138 			cfg->vpd.vpd_ros[off].len = dflen = byte2;
1139 			if (dflen == 0 &&
1140 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1141 			    2) == 0) {
1142 				/*
1143 				 * if this happens, we can't trust the rest
1144 				 * of the VPD.
1145 				 */
1146 				pci_printf(cfg, "bad keyword length: %d\n",
1147 				    dflen);
1148 				cksumvalid = 0;
1149 				state = -1;
1150 				break;
1151 			} else if (dflen == 0) {
1152 				cfg->vpd.vpd_ros[off].value = malloc(1 *
1153 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1154 				    M_DEVBUF, M_WAITOK);
1155 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1156 			} else
1157 				cfg->vpd.vpd_ros[off].value = malloc(
1158 				    (dflen + 1) *
1159 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1160 				    M_DEVBUF, M_WAITOK);
1161 			remain -= 3;
1162 			i = 0;
1163 			/* keep in sync w/ state 3's transistions */
1164 			if (dflen == 0 && remain == 0)
1165 				state = 0;
1166 			else if (dflen == 0)
1167 				state = 2;
1168 			else
1169 				state = 3;
1170 			break;
1171 
1172 		case 3:	/* VPD-R Keyword Value */
1173 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1174 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1175 			    "RV", 2) == 0 && cksumvalid == -1) {
1176 				if (vrs.cksum == 0)
1177 					cksumvalid = 1;
1178 				else {
1179 					if (bootverbose)
1180 						pci_printf(cfg,
1181 					    "bad VPD cksum, remain %hhu\n",
1182 						    vrs.cksum);
1183 					cksumvalid = 0;
1184 					state = -1;
1185 					break;
1186 				}
1187 			}
1188 			dflen--;
1189 			remain--;
1190 			/* keep in sync w/ state 2's transistions */
1191 			if (dflen == 0)
1192 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1193 			if (dflen == 0 && remain == 0) {
1194 				cfg->vpd.vpd_rocnt = off;
1195 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1196 				    off * sizeof(*cfg->vpd.vpd_ros),
1197 				    M_DEVBUF, M_WAITOK | M_ZERO);
1198 				state = 0;
1199 			} else if (dflen == 0)
1200 				state = 2;
1201 			break;
1202 
1203 		case 4:
1204 			remain--;
1205 			if (remain == 0)
1206 				state = 0;
1207 			break;
1208 
1209 		case 5:	/* VPD-W Keyword Header */
1210 			if (off == alloc) {
1211 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1212 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1213 				    M_DEVBUF, M_WAITOK | M_ZERO);
1214 			}
1215 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1216 			if (vpd_nextbyte(&vrs, &byte2)) {
1217 				state = -2;
1218 				break;
1219 			}
1220 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1221 			if (vpd_nextbyte(&vrs, &byte2)) {
1222 				state = -2;
1223 				break;
1224 			}
1225 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1226 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1227 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1228 			    sizeof(*cfg->vpd.vpd_w[off].value),
1229 			    M_DEVBUF, M_WAITOK);
1230 			remain -= 3;
1231 			i = 0;
1232 			/* keep in sync w/ state 6's transistions */
1233 			if (dflen == 0 && remain == 0)
1234 				state = 0;
1235 			else if (dflen == 0)
1236 				state = 5;
1237 			else
1238 				state = 6;
1239 			break;
1240 
1241 		case 6:	/* VPD-W Keyword Value */
1242 			cfg->vpd.vpd_w[off].value[i++] = byte;
1243 			dflen--;
1244 			remain--;
1245 			/* keep in sync w/ state 5's transistions */
1246 			if (dflen == 0)
1247 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1248 			if (dflen == 0 && remain == 0) {
1249 				cfg->vpd.vpd_wcnt = off;
1250 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1251 				    off * sizeof(*cfg->vpd.vpd_w),
1252 				    M_DEVBUF, M_WAITOK | M_ZERO);
1253 				state = 0;
1254 			} else if (dflen == 0)
1255 				state = 5;
1256 			break;
1257 
1258 		default:
1259 			pci_printf(cfg, "invalid state: %d\n", state);
1260 			state = -1;
1261 			break;
1262 		}
1263 	}
1264 
1265 	if (cksumvalid == 0 || state < -1) {
1266 		/* read-only data bad, clean up */
1267 		if (cfg->vpd.vpd_ros != NULL) {
1268 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1269 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1270 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1271 			cfg->vpd.vpd_ros = NULL;
1272 		}
1273 	}
1274 	if (state < -1) {
1275 		/* I/O error, clean up */
1276 		pci_printf(cfg, "failed to read VPD data.\n");
1277 		if (cfg->vpd.vpd_ident != NULL) {
1278 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1279 			cfg->vpd.vpd_ident = NULL;
1280 		}
1281 		if (cfg->vpd.vpd_w != NULL) {
1282 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1283 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1284 			free(cfg->vpd.vpd_w, M_DEVBUF);
1285 			cfg->vpd.vpd_w = NULL;
1286 		}
1287 	}
1288 	cfg->vpd.vpd_cached = 1;
1289 #undef REG
1290 #undef WREG
1291 }
1292 
1293 int
1294 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1295 {
1296 	struct pci_devinfo *dinfo = device_get_ivars(child);
1297 	pcicfgregs *cfg = &dinfo->cfg;
1298 
1299 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1300 		pci_read_vpd(device_get_parent(dev), cfg);
1301 
1302 	*identptr = cfg->vpd.vpd_ident;
1303 
1304 	if (*identptr == NULL)
1305 		return (ENXIO);
1306 
1307 	return (0);
1308 }
1309 
1310 int
1311 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1312 	const char **vptr)
1313 {
1314 	struct pci_devinfo *dinfo = device_get_ivars(child);
1315 	pcicfgregs *cfg = &dinfo->cfg;
1316 	int i;
1317 
1318 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1319 		pci_read_vpd(device_get_parent(dev), cfg);
1320 
1321 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1322 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1323 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1324 			*vptr = cfg->vpd.vpd_ros[i].value;
1325 			return (0);
1326 		}
1327 
1328 	*vptr = NULL;
1329 	return (ENXIO);
1330 }
1331 
1332 struct pcicfg_vpd *
1333 pci_fetch_vpd_list(device_t dev)
1334 {
1335 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1336 	pcicfgregs *cfg = &dinfo->cfg;
1337 
1338 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1339 		pci_read_vpd(device_get_parent(device_get_parent(dev)), cfg);
1340 	return (&cfg->vpd);
1341 }
1342 
1343 /*
1344  * Find the requested HyperTransport capability and return the offset
1345  * in configuration space via the pointer provided.  The function
1346  * returns 0 on success and an error code otherwise.
1347  */
1348 int
1349 pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1350 {
1351 	int ptr, error;
1352 	uint16_t val;
1353 
1354 	error = pci_find_cap(child, PCIY_HT, &ptr);
1355 	if (error)
1356 		return (error);
1357 
1358 	/*
1359 	 * Traverse the capabilities list checking each HT capability
1360 	 * to see if it matches the requested HT capability.
1361 	 */
1362 	while (ptr != 0) {
1363 		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1364 		if (capability == PCIM_HTCAP_SLAVE ||
1365 		    capability == PCIM_HTCAP_HOST)
1366 			val &= 0xe000;
1367 		else
1368 			val &= PCIM_HTCMD_CAP_MASK;
1369 		if (val == capability) {
1370 			if (capreg != NULL)
1371 				*capreg = ptr;
1372 			return (0);
1373 		}
1374 
1375 		/* Skip to the next HT capability. */
1376 		while (ptr != 0) {
1377 			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1378 			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1379 			    PCIY_HT)
1380 				break;
1381 		}
1382 	}
1383 	return (ENOENT);
1384 }
1385 
1386 /*
1387  * Find the requested capability and return the offset in
1388  * configuration space via the pointer provided.  The function returns
1389  * 0 on success and an error code otherwise.
1390  */
1391 int
1392 pci_find_cap_method(device_t dev, device_t child, int capability,
1393     int *capreg)
1394 {
1395 	struct pci_devinfo *dinfo = device_get_ivars(child);
1396 	pcicfgregs *cfg = &dinfo->cfg;
1397 	u_int32_t status;
1398 	u_int8_t ptr;
1399 
1400 	/*
1401 	 * Check the CAP_LIST bit of the PCI status register first.
1402 	 */
1403 	status = pci_read_config(child, PCIR_STATUS, 2);
1404 	if (!(status & PCIM_STATUS_CAPPRESENT))
1405 		return (ENXIO);
1406 
1407 	/*
1408 	 * Determine the start pointer of the capabilities list.
1409 	 */
1410 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1411 	case PCIM_HDRTYPE_NORMAL:
1412 	case PCIM_HDRTYPE_BRIDGE:
1413 		ptr = PCIR_CAP_PTR;
1414 		break;
1415 	case PCIM_HDRTYPE_CARDBUS:
1416 		ptr = PCIR_CAP_PTR_2;
1417 		break;
1418 	default:
1419 		/* XXX: panic? */
1420 		return (ENXIO);		/* no extended capabilities support */
1421 	}
1422 	ptr = pci_read_config(child, ptr, 1);
1423 
1424 	/*
1425 	 * Traverse the capabilities list.
1426 	 */
1427 	while (ptr != 0) {
1428 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1429 			if (capreg != NULL)
1430 				*capreg = ptr;
1431 			return (0);
1432 		}
1433 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1434 	}
1435 
1436 	return (ENOENT);
1437 }
1438 
1439 /*
1440  * Find the requested extended capability and return the offset in
1441  * configuration space via the pointer provided.  The function returns
1442  * 0 on success and an error code otherwise.
1443  */
1444 int
1445 pci_find_extcap_method(device_t dev, device_t child, int capability,
1446     int *capreg)
1447 {
1448 	struct pci_devinfo *dinfo = device_get_ivars(child);
1449 	pcicfgregs *cfg = &dinfo->cfg;
1450 	uint32_t ecap;
1451 	uint16_t ptr;
1452 
1453 	/* Only supported for PCI-express devices. */
1454 	if (cfg->pcie.pcie_location == 0)
1455 		return (ENXIO);
1456 
1457 	ptr = PCIR_EXTCAP;
1458 	ecap = pci_read_config(child, ptr, 4);
1459 	if (ecap == 0xffffffff || ecap == 0)
1460 		return (ENOENT);
1461 	for (;;) {
1462 		if (PCI_EXTCAP_ID(ecap) == capability) {
1463 			if (capreg != NULL)
1464 				*capreg = ptr;
1465 			return (0);
1466 		}
1467 		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1468 		if (ptr == 0)
1469 			break;
1470 		ecap = pci_read_config(child, ptr, 4);
1471 	}
1472 
1473 	return (ENOENT);
1474 }
1475 
1476 /*
1477  * Support for MSI-X message interrupts.
1478  */
1479 void
1480 pci_enable_msix_method(device_t dev, device_t child, u_int index,
1481     uint64_t address, uint32_t data)
1482 {
1483 	struct pci_devinfo *dinfo = device_get_ivars(child);
1484 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1485 	uint32_t offset;
1486 
1487 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1488 	offset = msix->msix_table_offset + index * 16;
1489 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1490 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1491 	bus_write_4(msix->msix_table_res, offset + 8, data);
1492 
1493 	/* Enable MSI -> HT mapping. */
1494 	pci_ht_map_msi(child, address);
1495 }
1496 
1497 void
1498 pci_mask_msix(device_t dev, u_int index)
1499 {
1500 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1501 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1502 	uint32_t offset, val;
1503 
1504 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1505 	offset = msix->msix_table_offset + index * 16 + 12;
1506 	val = bus_read_4(msix->msix_table_res, offset);
1507 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1508 		val |= PCIM_MSIX_VCTRL_MASK;
1509 		bus_write_4(msix->msix_table_res, offset, val);
1510 	}
1511 }
1512 
1513 void
1514 pci_unmask_msix(device_t dev, u_int index)
1515 {
1516 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1517 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1518 	uint32_t offset, val;
1519 
1520 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1521 	offset = msix->msix_table_offset + index * 16 + 12;
1522 	val = bus_read_4(msix->msix_table_res, offset);
1523 	if (val & PCIM_MSIX_VCTRL_MASK) {
1524 		val &= ~PCIM_MSIX_VCTRL_MASK;
1525 		bus_write_4(msix->msix_table_res, offset, val);
1526 	}
1527 }
1528 
1529 int
1530 pci_pending_msix(device_t dev, u_int index)
1531 {
1532 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1533 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1534 	uint32_t offset, bit;
1535 
1536 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1537 	offset = msix->msix_pba_offset + (index / 32) * 4;
1538 	bit = 1 << index % 32;
1539 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1540 }
1541 
1542 /*
1543  * Restore MSI-X registers and table during resume.  If MSI-X is
1544  * enabled then walk the virtual table to restore the actual MSI-X
1545  * table.
1546  */
1547 static void
1548 pci_resume_msix(device_t dev)
1549 {
1550 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1551 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1552 	struct msix_table_entry *mte;
1553 	struct msix_vector *mv;
1554 	int i;
1555 
1556 	if (msix->msix_alloc > 0) {
1557 		/* First, mask all vectors. */
1558 		for (i = 0; i < msix->msix_msgnum; i++)
1559 			pci_mask_msix(dev, i);
1560 
1561 		/* Second, program any messages with at least one handler. */
1562 		for (i = 0; i < msix->msix_table_len; i++) {
1563 			mte = &msix->msix_table[i];
1564 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1565 				continue;
1566 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1567 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1568 			pci_unmask_msix(dev, i);
1569 		}
1570 	}
1571 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1572 	    msix->msix_ctrl, 2);
1573 }
1574 
1575 /*
1576  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1577  * returned in *count.  After this function returns, each message will be
1578  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1579  */
1580 int
1581 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1582 {
1583 	struct pci_devinfo *dinfo = device_get_ivars(child);
1584 	pcicfgregs *cfg = &dinfo->cfg;
1585 	struct resource_list_entry *rle;
1586 	int actual, error, i, irq, max;
1587 
1588 	/* Don't let count == 0 get us into trouble. */
1589 	if (*count == 0)
1590 		return (EINVAL);
1591 
1592 	/* If rid 0 is allocated, then fail. */
1593 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1594 	if (rle != NULL && rle->res != NULL)
1595 		return (ENXIO);
1596 
1597 	/* Already have allocated messages? */
1598 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1599 		return (ENXIO);
1600 
1601 	/* If MSI-X is blacklisted for this system, fail. */
1602 	if (pci_msix_blacklisted())
1603 		return (ENXIO);
1604 
1605 	/* MSI-X capability present? */
1606 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1607 		return (ENODEV);
1608 
1609 	/* Make sure the appropriate BARs are mapped. */
1610 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1611 	    cfg->msix.msix_table_bar);
1612 	if (rle == NULL || rle->res == NULL ||
1613 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1614 		return (ENXIO);
1615 	cfg->msix.msix_table_res = rle->res;
1616 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1617 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1618 		    cfg->msix.msix_pba_bar);
1619 		if (rle == NULL || rle->res == NULL ||
1620 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1621 			return (ENXIO);
1622 	}
1623 	cfg->msix.msix_pba_res = rle->res;
1624 
1625 	if (bootverbose)
1626 		device_printf(child,
1627 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1628 		    *count, cfg->msix.msix_msgnum);
1629 	max = min(*count, cfg->msix.msix_msgnum);
1630 	for (i = 0; i < max; i++) {
1631 		/* Allocate a message. */
1632 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1633 		if (error) {
1634 			if (i == 0)
1635 				return (error);
1636 			break;
1637 		}
1638 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1639 		    irq, 1);
1640 	}
1641 	actual = i;
1642 
1643 	if (bootverbose) {
1644 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1645 		if (actual == 1)
1646 			device_printf(child, "using IRQ %lu for MSI-X\n",
1647 			    rle->start);
1648 		else {
1649 			int run;
1650 
1651 			/*
1652 			 * Be fancy and try to print contiguous runs of
1653 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1654 			 * 'run' is true if we are in a range.
1655 			 */
1656 			device_printf(child, "using IRQs %lu", rle->start);
1657 			irq = rle->start;
1658 			run = 0;
1659 			for (i = 1; i < actual; i++) {
1660 				rle = resource_list_find(&dinfo->resources,
1661 				    SYS_RES_IRQ, i + 1);
1662 
1663 				/* Still in a run? */
1664 				if (rle->start == irq + 1) {
1665 					run = 1;
1666 					irq++;
1667 					continue;
1668 				}
1669 
1670 				/* Finish previous range. */
1671 				if (run) {
1672 					printf("-%d", irq);
1673 					run = 0;
1674 				}
1675 
1676 				/* Start new range. */
1677 				printf(",%lu", rle->start);
1678 				irq = rle->start;
1679 			}
1680 
1681 			/* Unfinished range? */
1682 			if (run)
1683 				printf("-%d", irq);
1684 			printf(" for MSI-X\n");
1685 		}
1686 	}
1687 
1688 	/* Mask all vectors. */
1689 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1690 		pci_mask_msix(child, i);
1691 
1692 	/* Allocate and initialize vector data and virtual table. */
1693 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1694 	    M_DEVBUF, M_WAITOK | M_ZERO);
1695 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1696 	    M_DEVBUF, M_WAITOK | M_ZERO);
1697 	for (i = 0; i < actual; i++) {
1698 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1699 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1700 		cfg->msix.msix_table[i].mte_vector = i + 1;
1701 	}
1702 
1703 	/* Update control register to enable MSI-X. */
1704 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1705 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1706 	    cfg->msix.msix_ctrl, 2);
1707 
1708 	/* Update counts of alloc'd messages. */
1709 	cfg->msix.msix_alloc = actual;
1710 	cfg->msix.msix_table_len = actual;
1711 	*count = actual;
1712 	return (0);
1713 }
1714 
1715 /*
1716  * By default, pci_alloc_msix() will assign the allocated IRQ
1717  * resources consecutively to the first N messages in the MSI-X table.
1718  * However, device drivers may want to use different layouts if they
1719  * either receive fewer messages than they asked for, or they wish to
1720  * populate the MSI-X table sparsely.  This method allows the driver
1721  * to specify what layout it wants.  It must be called after a
1722  * successful pci_alloc_msix() but before any of the associated
1723  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1724  *
1725  * The 'vectors' array contains 'count' message vectors.  The array
1726  * maps directly to the MSI-X table in that index 0 in the array
1727  * specifies the vector for the first message in the MSI-X table, etc.
1728  * The vector value in each array index can either be 0 to indicate
1729  * that no vector should be assigned to a message slot, or it can be a
1730  * number from 1 to N (where N is the count returned from a
1731  * succcessful call to pci_alloc_msix()) to indicate which message
1732  * vector (IRQ) to be used for the corresponding message.
1733  *
1734  * On successful return, each message with a non-zero vector will have
1735  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1736  * 1.  Additionally, if any of the IRQs allocated via the previous
1737  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1738  * will be freed back to the system automatically.
1739  *
1740  * For example, suppose a driver has a MSI-X table with 6 messages and
1741  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1742  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1743  * C.  After the call to pci_alloc_msix(), the device will be setup to
1744  * have an MSI-X table of ABC--- (where - means no vector assigned).
1745  * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1746  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1747  * be freed back to the system.  This device will also have valid
1748  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1749  *
1750  * In any case, the SYS_RES_IRQ rid X will always map to the message
1751  * at MSI-X table index X - 1 and will only be valid if a vector is
1752  * assigned to that table entry.
1753  */
1754 int
1755 pci_remap_msix_method(device_t dev, device_t child, int count,
1756     const u_int *vectors)
1757 {
1758 	struct pci_devinfo *dinfo = device_get_ivars(child);
1759 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1760 	struct resource_list_entry *rle;
1761 	int i, irq, j, *used;
1762 
1763 	/*
1764 	 * Have to have at least one message in the table but the
1765 	 * table can't be bigger than the actual MSI-X table in the
1766 	 * device.
1767 	 */
1768 	if (count == 0 || count > msix->msix_msgnum)
1769 		return (EINVAL);
1770 
1771 	/* Sanity check the vectors. */
1772 	for (i = 0; i < count; i++)
1773 		if (vectors[i] > msix->msix_alloc)
1774 			return (EINVAL);
1775 
1776 	/*
1777 	 * Make sure there aren't any holes in the vectors to be used.
1778 	 * It's a big pain to support it, and it doesn't really make
1779 	 * sense anyway.  Also, at least one vector must be used.
1780 	 */
1781 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1782 	    M_ZERO);
1783 	for (i = 0; i < count; i++)
1784 		if (vectors[i] != 0)
1785 			used[vectors[i] - 1] = 1;
1786 	for (i = 0; i < msix->msix_alloc - 1; i++)
1787 		if (used[i] == 0 && used[i + 1] == 1) {
1788 			free(used, M_DEVBUF);
1789 			return (EINVAL);
1790 		}
1791 	if (used[0] != 1) {
1792 		free(used, M_DEVBUF);
1793 		return (EINVAL);
1794 	}
1795 
1796 	/* Make sure none of the resources are allocated. */
1797 	for (i = 0; i < msix->msix_table_len; i++) {
1798 		if (msix->msix_table[i].mte_vector == 0)
1799 			continue;
1800 		if (msix->msix_table[i].mte_handlers > 0) {
1801 			free(used, M_DEVBUF);
1802 			return (EBUSY);
1803 		}
1804 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1805 		KASSERT(rle != NULL, ("missing resource"));
1806 		if (rle->res != NULL) {
1807 			free(used, M_DEVBUF);
1808 			return (EBUSY);
1809 		}
1810 	}
1811 
1812 	/* Free the existing resource list entries. */
1813 	for (i = 0; i < msix->msix_table_len; i++) {
1814 		if (msix->msix_table[i].mte_vector == 0)
1815 			continue;
1816 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1817 	}
1818 
1819 	/*
1820 	 * Build the new virtual table keeping track of which vectors are
1821 	 * used.
1822 	 */
1823 	free(msix->msix_table, M_DEVBUF);
1824 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1825 	    M_DEVBUF, M_WAITOK | M_ZERO);
1826 	for (i = 0; i < count; i++)
1827 		msix->msix_table[i].mte_vector = vectors[i];
1828 	msix->msix_table_len = count;
1829 
1830 	/* Free any unused IRQs and resize the vectors array if necessary. */
1831 	j = msix->msix_alloc - 1;
1832 	if (used[j] == 0) {
1833 		struct msix_vector *vec;
1834 
1835 		while (used[j] == 0) {
1836 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1837 			    msix->msix_vectors[j].mv_irq);
1838 			j--;
1839 		}
1840 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1841 		    M_WAITOK);
1842 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1843 		    (j + 1));
1844 		free(msix->msix_vectors, M_DEVBUF);
1845 		msix->msix_vectors = vec;
1846 		msix->msix_alloc = j + 1;
1847 	}
1848 	free(used, M_DEVBUF);
1849 
1850 	/* Map the IRQs onto the rids. */
1851 	for (i = 0; i < count; i++) {
1852 		if (vectors[i] == 0)
1853 			continue;
1854 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1855 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1856 		    irq, 1);
1857 	}
1858 
1859 	if (bootverbose) {
1860 		device_printf(child, "Remapped MSI-X IRQs as: ");
1861 		for (i = 0; i < count; i++) {
1862 			if (i != 0)
1863 				printf(", ");
1864 			if (vectors[i] == 0)
1865 				printf("---");
1866 			else
1867 				printf("%d",
1868 				    msix->msix_vectors[vectors[i]].mv_irq);
1869 		}
1870 		printf("\n");
1871 	}
1872 
1873 	return (0);
1874 }
1875 
1876 static int
1877 pci_release_msix(device_t dev, device_t child)
1878 {
1879 	struct pci_devinfo *dinfo = device_get_ivars(child);
1880 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1881 	struct resource_list_entry *rle;
1882 	int i;
1883 
1884 	/* Do we have any messages to release? */
1885 	if (msix->msix_alloc == 0)
1886 		return (ENODEV);
1887 
1888 	/* Make sure none of the resources are allocated. */
1889 	for (i = 0; i < msix->msix_table_len; i++) {
1890 		if (msix->msix_table[i].mte_vector == 0)
1891 			continue;
1892 		if (msix->msix_table[i].mte_handlers > 0)
1893 			return (EBUSY);
1894 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1895 		KASSERT(rle != NULL, ("missing resource"));
1896 		if (rle->res != NULL)
1897 			return (EBUSY);
1898 	}
1899 
1900 	/* Update control register to disable MSI-X. */
1901 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1902 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1903 	    msix->msix_ctrl, 2);
1904 
1905 	/* Free the resource list entries. */
1906 	for (i = 0; i < msix->msix_table_len; i++) {
1907 		if (msix->msix_table[i].mte_vector == 0)
1908 			continue;
1909 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1910 	}
1911 	free(msix->msix_table, M_DEVBUF);
1912 	msix->msix_table_len = 0;
1913 
1914 	/* Release the IRQs. */
1915 	for (i = 0; i < msix->msix_alloc; i++)
1916 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1917 		    msix->msix_vectors[i].mv_irq);
1918 	free(msix->msix_vectors, M_DEVBUF);
1919 	msix->msix_alloc = 0;
1920 	return (0);
1921 }
1922 
1923 /*
1924  * Return the max supported MSI-X messages this device supports.
1925  * Basically, assuming the MD code can alloc messages, this function
1926  * should return the maximum value that pci_alloc_msix() can return.
1927  * Thus, it is subject to the tunables, etc.
1928  */
1929 int
1930 pci_msix_count_method(device_t dev, device_t child)
1931 {
1932 	struct pci_devinfo *dinfo = device_get_ivars(child);
1933 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1934 
1935 	if (pci_do_msix && msix->msix_location != 0)
1936 		return (msix->msix_msgnum);
1937 	return (0);
1938 }
1939 
1940 int
1941 pci_msix_pba_bar_method(device_t dev, device_t child)
1942 {
1943 	struct pci_devinfo *dinfo = device_get_ivars(child);
1944 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1945 
1946 	if (pci_do_msix && msix->msix_location != 0)
1947 		return (msix->msix_pba_bar);
1948 	return (-1);
1949 }
1950 
1951 int
1952 pci_msix_table_bar_method(device_t dev, device_t child)
1953 {
1954 	struct pci_devinfo *dinfo = device_get_ivars(child);
1955 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1956 
1957 	if (pci_do_msix && msix->msix_location != 0)
1958 		return (msix->msix_table_bar);
1959 	return (-1);
1960 }
1961 
1962 /*
1963  * HyperTransport MSI mapping control
1964  */
1965 void
1966 pci_ht_map_msi(device_t dev, uint64_t addr)
1967 {
1968 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1969 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1970 
1971 	if (!ht->ht_msimap)
1972 		return;
1973 
1974 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1975 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1976 		/* Enable MSI -> HT mapping. */
1977 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1978 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1979 		    ht->ht_msictrl, 2);
1980 	}
1981 
1982 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1983 		/* Disable MSI -> HT mapping. */
1984 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1985 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1986 		    ht->ht_msictrl, 2);
1987 	}
1988 }
1989 
1990 int
1991 pci_get_max_read_req(device_t dev)
1992 {
1993 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1994 	int cap;
1995 	uint16_t val;
1996 
1997 	cap = dinfo->cfg.pcie.pcie_location;
1998 	if (cap == 0)
1999 		return (0);
2000 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2001 	val &= PCIEM_CTL_MAX_READ_REQUEST;
2002 	val >>= 12;
2003 	return (1 << (val + 7));
2004 }
2005 
2006 int
2007 pci_set_max_read_req(device_t dev, int size)
2008 {
2009 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2010 	int cap;
2011 	uint16_t val;
2012 
2013 	cap = dinfo->cfg.pcie.pcie_location;
2014 	if (cap == 0)
2015 		return (0);
2016 	if (size < 128)
2017 		size = 128;
2018 	if (size > 4096)
2019 		size = 4096;
2020 	size = (1 << (fls(size) - 1));
2021 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2022 	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
2023 	val |= (fls(size) - 8) << 12;
2024 	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
2025 	return (size);
2026 }
2027 
2028 uint32_t
2029 pcie_read_config(device_t dev, int reg, int width)
2030 {
2031 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2032 	int cap;
2033 
2034 	cap = dinfo->cfg.pcie.pcie_location;
2035 	if (cap == 0) {
2036 		if (width == 2)
2037 			return (0xffff);
2038 		return (0xffffffff);
2039 	}
2040 
2041 	return (pci_read_config(dev, cap + reg, width));
2042 }
2043 
2044 void
2045 pcie_write_config(device_t dev, int reg, uint32_t value, int width)
2046 {
2047 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2048 	int cap;
2049 
2050 	cap = dinfo->cfg.pcie.pcie_location;
2051 	if (cap == 0)
2052 		return;
2053 	pci_write_config(dev, cap + reg, value, width);
2054 }
2055 
2056 /*
2057  * Adjusts a PCI-e capability register by clearing the bits in mask
2058  * and setting the bits in (value & mask).  Bits not set in mask are
2059  * not adjusted.
2060  *
2061  * Returns the old value on success or all ones on failure.
2062  */
2063 uint32_t
2064 pcie_adjust_config(device_t dev, int reg, uint32_t mask, uint32_t value,
2065     int width)
2066 {
2067 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2068 	uint32_t old, new;
2069 	int cap;
2070 
2071 	cap = dinfo->cfg.pcie.pcie_location;
2072 	if (cap == 0) {
2073 		if (width == 2)
2074 			return (0xffff);
2075 		return (0xffffffff);
2076 	}
2077 
2078 	old = pci_read_config(dev, cap + reg, width);
2079 	new = old & ~mask;
2080 	new |= (value & mask);
2081 	pci_write_config(dev, cap + reg, new, width);
2082 	return (old);
2083 }
2084 
2085 /*
2086  * Support for MSI message signalled interrupts.
2087  */
2088 void
2089 pci_enable_msi_method(device_t dev, device_t child, uint64_t address,
2090     uint16_t data)
2091 {
2092 	struct pci_devinfo *dinfo = device_get_ivars(child);
2093 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2094 
2095 	/* Write data and address values. */
2096 	pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR,
2097 	    address & 0xffffffff, 4);
2098 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2099 		pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR_HIGH,
2100 		    address >> 32, 4);
2101 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA_64BIT,
2102 		    data, 2);
2103 	} else
2104 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA, data,
2105 		    2);
2106 
2107 	/* Enable MSI in the control register. */
2108 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
2109 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2110 	    msi->msi_ctrl, 2);
2111 
2112 	/* Enable MSI -> HT mapping. */
2113 	pci_ht_map_msi(child, address);
2114 }
2115 
2116 void
2117 pci_disable_msi_method(device_t dev, device_t child)
2118 {
2119 	struct pci_devinfo *dinfo = device_get_ivars(child);
2120 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2121 
2122 	/* Disable MSI -> HT mapping. */
2123 	pci_ht_map_msi(child, 0);
2124 
2125 	/* Disable MSI in the control register. */
2126 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
2127 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2128 	    msi->msi_ctrl, 2);
2129 }
2130 
2131 /*
2132  * Restore MSI registers during resume.  If MSI is enabled then
2133  * restore the data and address registers in addition to the control
2134  * register.
2135  */
2136 static void
2137 pci_resume_msi(device_t dev)
2138 {
2139 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2140 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2141 	uint64_t address;
2142 	uint16_t data;
2143 
2144 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
2145 		address = msi->msi_addr;
2146 		data = msi->msi_data;
2147 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
2148 		    address & 0xffffffff, 4);
2149 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2150 			pci_write_config(dev, msi->msi_location +
2151 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
2152 			pci_write_config(dev, msi->msi_location +
2153 			    PCIR_MSI_DATA_64BIT, data, 2);
2154 		} else
2155 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
2156 			    data, 2);
2157 	}
2158 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
2159 	    2);
2160 }
2161 
2162 static int
2163 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
2164 {
2165 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2166 	pcicfgregs *cfg = &dinfo->cfg;
2167 	struct resource_list_entry *rle;
2168 	struct msix_table_entry *mte;
2169 	struct msix_vector *mv;
2170 	uint64_t addr;
2171 	uint32_t data;
2172 	int error, i, j;
2173 
2174 	/*
2175 	 * Handle MSI first.  We try to find this IRQ among our list
2176 	 * of MSI IRQs.  If we find it, we request updated address and
2177 	 * data registers and apply the results.
2178 	 */
2179 	if (cfg->msi.msi_alloc > 0) {
2180 
2181 		/* If we don't have any active handlers, nothing to do. */
2182 		if (cfg->msi.msi_handlers == 0)
2183 			return (0);
2184 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
2185 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
2186 			    i + 1);
2187 			if (rle->start == irq) {
2188 				error = PCIB_MAP_MSI(device_get_parent(bus),
2189 				    dev, irq, &addr, &data);
2190 				if (error)
2191 					return (error);
2192 				pci_disable_msi(dev);
2193 				dinfo->cfg.msi.msi_addr = addr;
2194 				dinfo->cfg.msi.msi_data = data;
2195 				pci_enable_msi(dev, addr, data);
2196 				return (0);
2197 			}
2198 		}
2199 		return (ENOENT);
2200 	}
2201 
2202 	/*
2203 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
2204 	 * we request the updated mapping info.  If that works, we go
2205 	 * through all the slots that use this IRQ and update them.
2206 	 */
2207 	if (cfg->msix.msix_alloc > 0) {
2208 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
2209 			mv = &cfg->msix.msix_vectors[i];
2210 			if (mv->mv_irq == irq) {
2211 				error = PCIB_MAP_MSI(device_get_parent(bus),
2212 				    dev, irq, &addr, &data);
2213 				if (error)
2214 					return (error);
2215 				mv->mv_address = addr;
2216 				mv->mv_data = data;
2217 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
2218 					mte = &cfg->msix.msix_table[j];
2219 					if (mte->mte_vector != i + 1)
2220 						continue;
2221 					if (mte->mte_handlers == 0)
2222 						continue;
2223 					pci_mask_msix(dev, j);
2224 					pci_enable_msix(dev, j, addr, data);
2225 					pci_unmask_msix(dev, j);
2226 				}
2227 			}
2228 		}
2229 		return (ENOENT);
2230 	}
2231 
2232 	return (ENOENT);
2233 }
2234 
2235 /*
2236  * Returns true if the specified device is blacklisted because MSI
2237  * doesn't work.
2238  */
2239 int
2240 pci_msi_device_blacklisted(device_t dev)
2241 {
2242 
2243 	if (!pci_honor_msi_blacklist)
2244 		return (0);
2245 
2246 	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2247 }
2248 
2249 /*
2250  * Determine if MSI is blacklisted globally on this system.  Currently,
2251  * we just check for blacklisted chipsets as represented by the
2252  * host-PCI bridge at device 0:0:0.  In the future, it may become
2253  * necessary to check other system attributes, such as the kenv values
2254  * that give the motherboard manufacturer and model number.
2255  */
2256 static int
2257 pci_msi_blacklisted(void)
2258 {
2259 	device_t dev;
2260 
2261 	if (!pci_honor_msi_blacklist)
2262 		return (0);
2263 
2264 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2265 	if (!(pcie_chipset || pcix_chipset)) {
2266 		if (vm_guest != VM_GUEST_NO) {
2267 			/*
2268 			 * Whitelist older chipsets in virtual
2269 			 * machines known to support MSI.
2270 			 */
2271 			dev = pci_find_bsf(0, 0, 0);
2272 			if (dev != NULL)
2273 				return (!pci_has_quirk(pci_get_devid(dev),
2274 					PCI_QUIRK_ENABLE_MSI_VM));
2275 		}
2276 		return (1);
2277 	}
2278 
2279 	dev = pci_find_bsf(0, 0, 0);
2280 	if (dev != NULL)
2281 		return (pci_msi_device_blacklisted(dev));
2282 	return (0);
2283 }
2284 
2285 /*
2286  * Returns true if the specified device is blacklisted because MSI-X
2287  * doesn't work.  Note that this assumes that if MSI doesn't work,
2288  * MSI-X doesn't either.
2289  */
2290 int
2291 pci_msix_device_blacklisted(device_t dev)
2292 {
2293 
2294 	if (!pci_honor_msi_blacklist)
2295 		return (0);
2296 
2297 	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2298 		return (1);
2299 
2300 	return (pci_msi_device_blacklisted(dev));
2301 }
2302 
2303 /*
2304  * Determine if MSI-X is blacklisted globally on this system.  If MSI
2305  * is blacklisted, assume that MSI-X is as well.  Check for additional
2306  * chipsets where MSI works but MSI-X does not.
2307  */
2308 static int
2309 pci_msix_blacklisted(void)
2310 {
2311 	device_t dev;
2312 
2313 	if (!pci_honor_msi_blacklist)
2314 		return (0);
2315 
2316 	dev = pci_find_bsf(0, 0, 0);
2317 	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2318 	    PCI_QUIRK_DISABLE_MSIX))
2319 		return (1);
2320 
2321 	return (pci_msi_blacklisted());
2322 }
2323 
2324 /*
2325  * Attempt to allocate *count MSI messages.  The actual number allocated is
2326  * returned in *count.  After this function returns, each message will be
2327  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2328  */
2329 int
2330 pci_alloc_msi_method(device_t dev, device_t child, int *count)
2331 {
2332 	struct pci_devinfo *dinfo = device_get_ivars(child);
2333 	pcicfgregs *cfg = &dinfo->cfg;
2334 	struct resource_list_entry *rle;
2335 	int actual, error, i, irqs[32];
2336 	uint16_t ctrl;
2337 
2338 	/* Don't let count == 0 get us into trouble. */
2339 	if (*count == 0)
2340 		return (EINVAL);
2341 
2342 	/* If rid 0 is allocated, then fail. */
2343 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2344 	if (rle != NULL && rle->res != NULL)
2345 		return (ENXIO);
2346 
2347 	/* Already have allocated messages? */
2348 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2349 		return (ENXIO);
2350 
2351 	/* If MSI is blacklisted for this system, fail. */
2352 	if (pci_msi_blacklisted())
2353 		return (ENXIO);
2354 
2355 	/* MSI capability present? */
2356 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2357 		return (ENODEV);
2358 
2359 	if (bootverbose)
2360 		device_printf(child,
2361 		    "attempting to allocate %d MSI vectors (%d supported)\n",
2362 		    *count, cfg->msi.msi_msgnum);
2363 
2364 	/* Don't ask for more than the device supports. */
2365 	actual = min(*count, cfg->msi.msi_msgnum);
2366 
2367 	/* Don't ask for more than 32 messages. */
2368 	actual = min(actual, 32);
2369 
2370 	/* MSI requires power of 2 number of messages. */
2371 	if (!powerof2(actual))
2372 		return (EINVAL);
2373 
2374 	for (;;) {
2375 		/* Try to allocate N messages. */
2376 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2377 		    actual, irqs);
2378 		if (error == 0)
2379 			break;
2380 		if (actual == 1)
2381 			return (error);
2382 
2383 		/* Try N / 2. */
2384 		actual >>= 1;
2385 	}
2386 
2387 	/*
2388 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2389 	 * resources in the irqs[] array, so add new resources
2390 	 * starting at rid 1.
2391 	 */
2392 	for (i = 0; i < actual; i++)
2393 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2394 		    irqs[i], irqs[i], 1);
2395 
2396 	if (bootverbose) {
2397 		if (actual == 1)
2398 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2399 		else {
2400 			int run;
2401 
2402 			/*
2403 			 * Be fancy and try to print contiguous runs
2404 			 * of IRQ values as ranges.  'run' is true if
2405 			 * we are in a range.
2406 			 */
2407 			device_printf(child, "using IRQs %d", irqs[0]);
2408 			run = 0;
2409 			for (i = 1; i < actual; i++) {
2410 
2411 				/* Still in a run? */
2412 				if (irqs[i] == irqs[i - 1] + 1) {
2413 					run = 1;
2414 					continue;
2415 				}
2416 
2417 				/* Finish previous range. */
2418 				if (run) {
2419 					printf("-%d", irqs[i - 1]);
2420 					run = 0;
2421 				}
2422 
2423 				/* Start new range. */
2424 				printf(",%d", irqs[i]);
2425 			}
2426 
2427 			/* Unfinished range? */
2428 			if (run)
2429 				printf("-%d", irqs[actual - 1]);
2430 			printf(" for MSI\n");
2431 		}
2432 	}
2433 
2434 	/* Update control register with actual count. */
2435 	ctrl = cfg->msi.msi_ctrl;
2436 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2437 	ctrl |= (ffs(actual) - 1) << 4;
2438 	cfg->msi.msi_ctrl = ctrl;
2439 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2440 
2441 	/* Update counts of alloc'd messages. */
2442 	cfg->msi.msi_alloc = actual;
2443 	cfg->msi.msi_handlers = 0;
2444 	*count = actual;
2445 	return (0);
2446 }
2447 
2448 /* Release the MSI messages associated with this device. */
2449 int
2450 pci_release_msi_method(device_t dev, device_t child)
2451 {
2452 	struct pci_devinfo *dinfo = device_get_ivars(child);
2453 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2454 	struct resource_list_entry *rle;
2455 	int error, i, irqs[32];
2456 
2457 	/* Try MSI-X first. */
2458 	error = pci_release_msix(dev, child);
2459 	if (error != ENODEV)
2460 		return (error);
2461 
2462 	/* Do we have any messages to release? */
2463 	if (msi->msi_alloc == 0)
2464 		return (ENODEV);
2465 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2466 
2467 	/* Make sure none of the resources are allocated. */
2468 	if (msi->msi_handlers > 0)
2469 		return (EBUSY);
2470 	for (i = 0; i < msi->msi_alloc; i++) {
2471 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2472 		KASSERT(rle != NULL, ("missing MSI resource"));
2473 		if (rle->res != NULL)
2474 			return (EBUSY);
2475 		irqs[i] = rle->start;
2476 	}
2477 
2478 	/* Update control register with 0 count. */
2479 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2480 	    ("%s: MSI still enabled", __func__));
2481 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2482 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2483 	    msi->msi_ctrl, 2);
2484 
2485 	/* Release the messages. */
2486 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2487 	for (i = 0; i < msi->msi_alloc; i++)
2488 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2489 
2490 	/* Update alloc count. */
2491 	msi->msi_alloc = 0;
2492 	msi->msi_addr = 0;
2493 	msi->msi_data = 0;
2494 	return (0);
2495 }
2496 
2497 /*
2498  * Return the max supported MSI messages this device supports.
2499  * Basically, assuming the MD code can alloc messages, this function
2500  * should return the maximum value that pci_alloc_msi() can return.
2501  * Thus, it is subject to the tunables, etc.
2502  */
2503 int
2504 pci_msi_count_method(device_t dev, device_t child)
2505 {
2506 	struct pci_devinfo *dinfo = device_get_ivars(child);
2507 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2508 
2509 	if (pci_do_msi && msi->msi_location != 0)
2510 		return (msi->msi_msgnum);
2511 	return (0);
2512 }
2513 
2514 /* free pcicfgregs structure and all depending data structures */
2515 
2516 int
2517 pci_freecfg(struct pci_devinfo *dinfo)
2518 {
2519 	struct devlist *devlist_head;
2520 	struct pci_map *pm, *next;
2521 	int i;
2522 
2523 	devlist_head = &pci_devq;
2524 
2525 	if (dinfo->cfg.vpd.vpd_reg) {
2526 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2527 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2528 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2529 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2530 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2531 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2532 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2533 	}
2534 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2535 		free(pm, M_DEVBUF);
2536 	}
2537 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2538 	free(dinfo, M_DEVBUF);
2539 
2540 	/* increment the generation count */
2541 	pci_generation++;
2542 
2543 	/* we're losing one device */
2544 	pci_numdevs--;
2545 	return (0);
2546 }
2547 
2548 /*
2549  * PCI power manangement
2550  */
2551 int
2552 pci_set_powerstate_method(device_t dev, device_t child, int state)
2553 {
2554 	struct pci_devinfo *dinfo = device_get_ivars(child);
2555 	pcicfgregs *cfg = &dinfo->cfg;
2556 	uint16_t status;
2557 	int oldstate, highest, delay;
2558 
2559 	if (cfg->pp.pp_cap == 0)
2560 		return (EOPNOTSUPP);
2561 
2562 	/*
2563 	 * Optimize a no state change request away.  While it would be OK to
2564 	 * write to the hardware in theory, some devices have shown odd
2565 	 * behavior when going from D3 -> D3.
2566 	 */
2567 	oldstate = pci_get_powerstate(child);
2568 	if (oldstate == state)
2569 		return (0);
2570 
2571 	/*
2572 	 * The PCI power management specification states that after a state
2573 	 * transition between PCI power states, system software must
2574 	 * guarantee a minimal delay before the function accesses the device.
2575 	 * Compute the worst case delay that we need to guarantee before we
2576 	 * access the device.  Many devices will be responsive much more
2577 	 * quickly than this delay, but there are some that don't respond
2578 	 * instantly to state changes.  Transitions to/from D3 state require
2579 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2580 	 * is done below with DELAY rather than a sleeper function because
2581 	 * this function can be called from contexts where we cannot sleep.
2582 	 */
2583 	highest = (oldstate > state) ? oldstate : state;
2584 	if (highest == PCI_POWERSTATE_D3)
2585 	    delay = 10000;
2586 	else if (highest == PCI_POWERSTATE_D2)
2587 	    delay = 200;
2588 	else
2589 	    delay = 0;
2590 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2591 	    & ~PCIM_PSTAT_DMASK;
2592 	switch (state) {
2593 	case PCI_POWERSTATE_D0:
2594 		status |= PCIM_PSTAT_D0;
2595 		break;
2596 	case PCI_POWERSTATE_D1:
2597 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2598 			return (EOPNOTSUPP);
2599 		status |= PCIM_PSTAT_D1;
2600 		break;
2601 	case PCI_POWERSTATE_D2:
2602 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2603 			return (EOPNOTSUPP);
2604 		status |= PCIM_PSTAT_D2;
2605 		break;
2606 	case PCI_POWERSTATE_D3:
2607 		status |= PCIM_PSTAT_D3;
2608 		break;
2609 	default:
2610 		return (EINVAL);
2611 	}
2612 
2613 	if (bootverbose)
2614 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2615 		    state);
2616 
2617 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2618 	if (delay)
2619 		DELAY(delay);
2620 	return (0);
2621 }
2622 
2623 int
2624 pci_get_powerstate_method(device_t dev, device_t child)
2625 {
2626 	struct pci_devinfo *dinfo = device_get_ivars(child);
2627 	pcicfgregs *cfg = &dinfo->cfg;
2628 	uint16_t status;
2629 	int result;
2630 
2631 	if (cfg->pp.pp_cap != 0) {
2632 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2633 		switch (status & PCIM_PSTAT_DMASK) {
2634 		case PCIM_PSTAT_D0:
2635 			result = PCI_POWERSTATE_D0;
2636 			break;
2637 		case PCIM_PSTAT_D1:
2638 			result = PCI_POWERSTATE_D1;
2639 			break;
2640 		case PCIM_PSTAT_D2:
2641 			result = PCI_POWERSTATE_D2;
2642 			break;
2643 		case PCIM_PSTAT_D3:
2644 			result = PCI_POWERSTATE_D3;
2645 			break;
2646 		default:
2647 			result = PCI_POWERSTATE_UNKNOWN;
2648 			break;
2649 		}
2650 	} else {
2651 		/* No support, device is always at D0 */
2652 		result = PCI_POWERSTATE_D0;
2653 	}
2654 	return (result);
2655 }
2656 
2657 /*
2658  * Some convenience functions for PCI device drivers.
2659  */
2660 
2661 static __inline void
2662 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2663 {
2664 	uint16_t	command;
2665 
2666 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2667 	command |= bit;
2668 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2669 }
2670 
2671 static __inline void
2672 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2673 {
2674 	uint16_t	command;
2675 
2676 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2677 	command &= ~bit;
2678 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2679 }
2680 
2681 int
2682 pci_enable_busmaster_method(device_t dev, device_t child)
2683 {
2684 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2685 	return (0);
2686 }
2687 
2688 int
2689 pci_disable_busmaster_method(device_t dev, device_t child)
2690 {
2691 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2692 	return (0);
2693 }
2694 
2695 int
2696 pci_enable_io_method(device_t dev, device_t child, int space)
2697 {
2698 	uint16_t bit;
2699 
2700 	switch(space) {
2701 	case SYS_RES_IOPORT:
2702 		bit = PCIM_CMD_PORTEN;
2703 		break;
2704 	case SYS_RES_MEMORY:
2705 		bit = PCIM_CMD_MEMEN;
2706 		break;
2707 	default:
2708 		return (EINVAL);
2709 	}
2710 	pci_set_command_bit(dev, child, bit);
2711 	return (0);
2712 }
2713 
2714 int
2715 pci_disable_io_method(device_t dev, device_t child, int space)
2716 {
2717 	uint16_t bit;
2718 
2719 	switch(space) {
2720 	case SYS_RES_IOPORT:
2721 		bit = PCIM_CMD_PORTEN;
2722 		break;
2723 	case SYS_RES_MEMORY:
2724 		bit = PCIM_CMD_MEMEN;
2725 		break;
2726 	default:
2727 		return (EINVAL);
2728 	}
2729 	pci_clear_command_bit(dev, child, bit);
2730 	return (0);
2731 }
2732 
2733 /*
2734  * New style pci driver.  Parent device is either a pci-host-bridge or a
2735  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2736  */
2737 
2738 void
2739 pci_print_verbose(struct pci_devinfo *dinfo)
2740 {
2741 
2742 	if (bootverbose) {
2743 		pcicfgregs *cfg = &dinfo->cfg;
2744 
2745 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2746 		    cfg->vendor, cfg->device, cfg->revid);
2747 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2748 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2749 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2750 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2751 		    cfg->mfdev);
2752 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2753 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2754 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2755 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2756 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2757 		if (cfg->intpin > 0)
2758 			printf("\tintpin=%c, irq=%d\n",
2759 			    cfg->intpin +'a' -1, cfg->intline);
2760 		if (cfg->pp.pp_cap) {
2761 			uint16_t status;
2762 
2763 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2764 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2765 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2766 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2767 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2768 			    status & PCIM_PSTAT_DMASK);
2769 		}
2770 		if (cfg->msi.msi_location) {
2771 			int ctrl;
2772 
2773 			ctrl = cfg->msi.msi_ctrl;
2774 			printf("\tMSI supports %d message%s%s%s\n",
2775 			    cfg->msi.msi_msgnum,
2776 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2777 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2778 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2779 		}
2780 		if (cfg->msix.msix_location) {
2781 			printf("\tMSI-X supports %d message%s ",
2782 			    cfg->msix.msix_msgnum,
2783 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2784 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2785 				printf("in map 0x%x\n",
2786 				    cfg->msix.msix_table_bar);
2787 			else
2788 				printf("in maps 0x%x and 0x%x\n",
2789 				    cfg->msix.msix_table_bar,
2790 				    cfg->msix.msix_pba_bar);
2791 		}
2792 	}
2793 }
2794 
2795 static int
2796 pci_porten(device_t dev)
2797 {
2798 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2799 }
2800 
2801 static int
2802 pci_memen(device_t dev)
2803 {
2804 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2805 }
2806 
2807 void
2808 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp,
2809     int *bar64)
2810 {
2811 	struct pci_devinfo *dinfo;
2812 	pci_addr_t map, testval;
2813 	int ln2range;
2814 	uint16_t cmd;
2815 
2816 	/*
2817 	 * The device ROM BAR is special.  It is always a 32-bit
2818 	 * memory BAR.  Bit 0 is special and should not be set when
2819 	 * sizing the BAR.
2820 	 */
2821 	dinfo = device_get_ivars(dev);
2822 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2823 		map = pci_read_config(dev, reg, 4);
2824 		pci_write_config(dev, reg, 0xfffffffe, 4);
2825 		testval = pci_read_config(dev, reg, 4);
2826 		pci_write_config(dev, reg, map, 4);
2827 		*mapp = map;
2828 		*testvalp = testval;
2829 		if (bar64 != NULL)
2830 			*bar64 = 0;
2831 		return;
2832 	}
2833 
2834 	map = pci_read_config(dev, reg, 4);
2835 	ln2range = pci_maprange(map);
2836 	if (ln2range == 64)
2837 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2838 
2839 	/*
2840 	 * Disable decoding via the command register before
2841 	 * determining the BAR's length since we will be placing it in
2842 	 * a weird state.
2843 	 */
2844 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2845 	pci_write_config(dev, PCIR_COMMAND,
2846 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2847 
2848 	/*
2849 	 * Determine the BAR's length by writing all 1's.  The bottom
2850 	 * log_2(size) bits of the BAR will stick as 0 when we read
2851 	 * the value back.
2852 	 */
2853 	pci_write_config(dev, reg, 0xffffffff, 4);
2854 	testval = pci_read_config(dev, reg, 4);
2855 	if (ln2range == 64) {
2856 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2857 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2858 	}
2859 
2860 	/*
2861 	 * Restore the original value of the BAR.  We may have reprogrammed
2862 	 * the BAR of the low-level console device and when booting verbose,
2863 	 * we need the console device addressable.
2864 	 */
2865 	pci_write_config(dev, reg, map, 4);
2866 	if (ln2range == 64)
2867 		pci_write_config(dev, reg + 4, map >> 32, 4);
2868 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2869 
2870 	*mapp = map;
2871 	*testvalp = testval;
2872 	if (bar64 != NULL)
2873 		*bar64 = (ln2range == 64);
2874 }
2875 
2876 static void
2877 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2878 {
2879 	struct pci_devinfo *dinfo;
2880 	int ln2range;
2881 
2882 	/* The device ROM BAR is always a 32-bit memory BAR. */
2883 	dinfo = device_get_ivars(dev);
2884 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2885 		ln2range = 32;
2886 	else
2887 		ln2range = pci_maprange(pm->pm_value);
2888 	pci_write_config(dev, pm->pm_reg, base, 4);
2889 	if (ln2range == 64)
2890 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2891 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2892 	if (ln2range == 64)
2893 		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2894 		    pm->pm_reg + 4, 4) << 32;
2895 }
2896 
2897 struct pci_map *
2898 pci_find_bar(device_t dev, int reg)
2899 {
2900 	struct pci_devinfo *dinfo;
2901 	struct pci_map *pm;
2902 
2903 	dinfo = device_get_ivars(dev);
2904 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2905 		if (pm->pm_reg == reg)
2906 			return (pm);
2907 	}
2908 	return (NULL);
2909 }
2910 
2911 int
2912 pci_bar_enabled(device_t dev, struct pci_map *pm)
2913 {
2914 	struct pci_devinfo *dinfo;
2915 	uint16_t cmd;
2916 
2917 	dinfo = device_get_ivars(dev);
2918 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2919 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2920 		return (0);
2921 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2922 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2923 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2924 	else
2925 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2926 }
2927 
2928 struct pci_map *
2929 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2930 {
2931 	struct pci_devinfo *dinfo;
2932 	struct pci_map *pm, *prev;
2933 
2934 	dinfo = device_get_ivars(dev);
2935 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2936 	pm->pm_reg = reg;
2937 	pm->pm_value = value;
2938 	pm->pm_size = size;
2939 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2940 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2941 		    reg));
2942 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2943 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2944 			break;
2945 	}
2946 	if (prev != NULL)
2947 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2948 	else
2949 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2950 	return (pm);
2951 }
2952 
2953 static void
2954 pci_restore_bars(device_t dev)
2955 {
2956 	struct pci_devinfo *dinfo;
2957 	struct pci_map *pm;
2958 	int ln2range;
2959 
2960 	dinfo = device_get_ivars(dev);
2961 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2962 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2963 			ln2range = 32;
2964 		else
2965 			ln2range = pci_maprange(pm->pm_value);
2966 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2967 		if (ln2range == 64)
2968 			pci_write_config(dev, pm->pm_reg + 4,
2969 			    pm->pm_value >> 32, 4);
2970 	}
2971 }
2972 
2973 /*
2974  * Add a resource based on a pci map register. Return 1 if the map
2975  * register is a 32bit map register or 2 if it is a 64bit register.
2976  */
2977 static int
2978 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2979     int force, int prefetch)
2980 {
2981 	struct pci_map *pm;
2982 	pci_addr_t base, map, testval;
2983 	pci_addr_t start, end, count;
2984 	int barlen, basezero, flags, maprange, mapsize, type;
2985 	uint16_t cmd;
2986 	struct resource *res;
2987 
2988 	/*
2989 	 * The BAR may already exist if the device is a CardBus card
2990 	 * whose CIS is stored in this BAR.
2991 	 */
2992 	pm = pci_find_bar(dev, reg);
2993 	if (pm != NULL) {
2994 		maprange = pci_maprange(pm->pm_value);
2995 		barlen = maprange == 64 ? 2 : 1;
2996 		return (barlen);
2997 	}
2998 
2999 	pci_read_bar(dev, reg, &map, &testval, NULL);
3000 	if (PCI_BAR_MEM(map)) {
3001 		type = SYS_RES_MEMORY;
3002 		if (map & PCIM_BAR_MEM_PREFETCH)
3003 			prefetch = 1;
3004 	} else
3005 		type = SYS_RES_IOPORT;
3006 	mapsize = pci_mapsize(testval);
3007 	base = pci_mapbase(map);
3008 #ifdef __PCI_BAR_ZERO_VALID
3009 	basezero = 0;
3010 #else
3011 	basezero = base == 0;
3012 #endif
3013 	maprange = pci_maprange(map);
3014 	barlen = maprange == 64 ? 2 : 1;
3015 
3016 	/*
3017 	 * For I/O registers, if bottom bit is set, and the next bit up
3018 	 * isn't clear, we know we have a BAR that doesn't conform to the
3019 	 * spec, so ignore it.  Also, sanity check the size of the data
3020 	 * areas to the type of memory involved.  Memory must be at least
3021 	 * 16 bytes in size, while I/O ranges must be at least 4.
3022 	 */
3023 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
3024 		return (barlen);
3025 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
3026 	    (type == SYS_RES_IOPORT && mapsize < 2))
3027 		return (barlen);
3028 
3029 	/* Save a record of this BAR. */
3030 	pm = pci_add_bar(dev, reg, map, mapsize);
3031 	if (bootverbose) {
3032 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
3033 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
3034 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
3035 			printf(", port disabled\n");
3036 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
3037 			printf(", memory disabled\n");
3038 		else
3039 			printf(", enabled\n");
3040 	}
3041 
3042 	/*
3043 	 * If base is 0, then we have problems if this architecture does
3044 	 * not allow that.  It is best to ignore such entries for the
3045 	 * moment.  These will be allocated later if the driver specifically
3046 	 * requests them.  However, some removable busses look better when
3047 	 * all resources are allocated, so allow '0' to be overriden.
3048 	 *
3049 	 * Similarly treat maps whose values is the same as the test value
3050 	 * read back.  These maps have had all f's written to them by the
3051 	 * BIOS in an attempt to disable the resources.
3052 	 */
3053 	if (!force && (basezero || map == testval))
3054 		return (barlen);
3055 	if ((u_long)base != base) {
3056 		device_printf(bus,
3057 		    "pci%d:%d:%d:%d bar %#x too many address bits",
3058 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
3059 		    pci_get_function(dev), reg);
3060 		return (barlen);
3061 	}
3062 
3063 	/*
3064 	 * This code theoretically does the right thing, but has
3065 	 * undesirable side effects in some cases where peripherals
3066 	 * respond oddly to having these bits enabled.  Let the user
3067 	 * be able to turn them off (since pci_enable_io_modes is 1 by
3068 	 * default).
3069 	 */
3070 	if (pci_enable_io_modes) {
3071 		/* Turn on resources that have been left off by a lazy BIOS */
3072 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
3073 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3074 			cmd |= PCIM_CMD_PORTEN;
3075 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3076 		}
3077 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
3078 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3079 			cmd |= PCIM_CMD_MEMEN;
3080 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3081 		}
3082 	} else {
3083 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
3084 			return (barlen);
3085 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
3086 			return (barlen);
3087 	}
3088 
3089 	count = (pci_addr_t)1 << mapsize;
3090 	flags = RF_ALIGNMENT_LOG2(mapsize);
3091 	if (prefetch)
3092 		flags |= RF_PREFETCHABLE;
3093 	if (basezero || base == pci_mapbase(testval) || pci_clear_bars) {
3094 		start = 0;	/* Let the parent decide. */
3095 		end = ~0;
3096 	} else {
3097 		start = base;
3098 		end = base + count - 1;
3099 	}
3100 	resource_list_add(rl, type, reg, start, end, count);
3101 
3102 	/*
3103 	 * Try to allocate the resource for this BAR from our parent
3104 	 * so that this resource range is already reserved.  The
3105 	 * driver for this device will later inherit this resource in
3106 	 * pci_alloc_resource().
3107 	 */
3108 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
3109 	    flags);
3110 	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0)) {
3111 		/*
3112 		 * If the allocation fails, try to allocate a resource for
3113 		 * this BAR using any available range.  The firmware felt
3114 		 * it was important enough to assign a resource, so don't
3115 		 * disable decoding if we can help it.
3116 		 */
3117 		resource_list_delete(rl, type, reg);
3118 		resource_list_add(rl, type, reg, 0, ~0, count);
3119 		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0,
3120 		    count, flags);
3121 	}
3122 	if (res == NULL) {
3123 		/*
3124 		 * If the allocation fails, delete the resource list entry
3125 		 * and disable decoding for this device.
3126 		 *
3127 		 * If the driver requests this resource in the future,
3128 		 * pci_reserve_map() will try to allocate a fresh
3129 		 * resource range.
3130 		 */
3131 		resource_list_delete(rl, type, reg);
3132 		pci_disable_io(dev, type);
3133 		if (bootverbose)
3134 			device_printf(bus,
3135 			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
3136 			    pci_get_domain(dev), pci_get_bus(dev),
3137 			    pci_get_slot(dev), pci_get_function(dev), reg);
3138 	} else {
3139 		start = rman_get_start(res);
3140 		pci_write_bar(dev, pm, start);
3141 	}
3142 	return (barlen);
3143 }
3144 
3145 /*
3146  * For ATA devices we need to decide early what addressing mode to use.
3147  * Legacy demands that the primary and secondary ATA ports sits on the
3148  * same addresses that old ISA hardware did. This dictates that we use
3149  * those addresses and ignore the BAR's if we cannot set PCI native
3150  * addressing mode.
3151  */
3152 static void
3153 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
3154     uint32_t prefetchmask)
3155 {
3156 	int rid, type, progif;
3157 #if 0
3158 	/* if this device supports PCI native addressing use it */
3159 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3160 	if ((progif & 0x8a) == 0x8a) {
3161 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
3162 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
3163 			printf("Trying ATA native PCI addressing mode\n");
3164 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
3165 		}
3166 	}
3167 #endif
3168 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3169 	type = SYS_RES_IOPORT;
3170 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
3171 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
3172 		    prefetchmask & (1 << 0));
3173 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
3174 		    prefetchmask & (1 << 1));
3175 	} else {
3176 		rid = PCIR_BAR(0);
3177 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
3178 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
3179 		    0x1f7, 8, 0);
3180 		rid = PCIR_BAR(1);
3181 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
3182 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
3183 		    0x3f6, 1, 0);
3184 	}
3185 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
3186 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
3187 		    prefetchmask & (1 << 2));
3188 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
3189 		    prefetchmask & (1 << 3));
3190 	} else {
3191 		rid = PCIR_BAR(2);
3192 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
3193 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
3194 		    0x177, 8, 0);
3195 		rid = PCIR_BAR(3);
3196 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
3197 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
3198 		    0x376, 1, 0);
3199 	}
3200 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
3201 	    prefetchmask & (1 << 4));
3202 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
3203 	    prefetchmask & (1 << 5));
3204 }
3205 
3206 static void
3207 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
3208 {
3209 	struct pci_devinfo *dinfo = device_get_ivars(dev);
3210 	pcicfgregs *cfg = &dinfo->cfg;
3211 	char tunable_name[64];
3212 	int irq;
3213 
3214 	/* Has to have an intpin to have an interrupt. */
3215 	if (cfg->intpin == 0)
3216 		return;
3217 
3218 	/* Let the user override the IRQ with a tunable. */
3219 	irq = PCI_INVALID_IRQ;
3220 	snprintf(tunable_name, sizeof(tunable_name),
3221 	    "hw.pci%d.%d.%d.INT%c.irq",
3222 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
3223 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
3224 		irq = PCI_INVALID_IRQ;
3225 
3226 	/*
3227 	 * If we didn't get an IRQ via the tunable, then we either use the
3228 	 * IRQ value in the intline register or we ask the bus to route an
3229 	 * interrupt for us.  If force_route is true, then we only use the
3230 	 * value in the intline register if the bus was unable to assign an
3231 	 * IRQ.
3232 	 */
3233 	if (!PCI_INTERRUPT_VALID(irq)) {
3234 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
3235 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
3236 		if (!PCI_INTERRUPT_VALID(irq))
3237 			irq = cfg->intline;
3238 	}
3239 
3240 	/* If after all that we don't have an IRQ, just bail. */
3241 	if (!PCI_INTERRUPT_VALID(irq))
3242 		return;
3243 
3244 	/* Update the config register if it changed. */
3245 	if (irq != cfg->intline) {
3246 		cfg->intline = irq;
3247 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
3248 	}
3249 
3250 	/* Add this IRQ as rid 0 interrupt resource. */
3251 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3252 }
3253 
3254 /* Perform early OHCI takeover from SMM. */
3255 static void
3256 ohci_early_takeover(device_t self)
3257 {
3258 	struct resource *res;
3259 	uint32_t ctl;
3260 	int rid;
3261 	int i;
3262 
3263 	rid = PCIR_BAR(0);
3264 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3265 	if (res == NULL)
3266 		return;
3267 
3268 	ctl = bus_read_4(res, OHCI_CONTROL);
3269 	if (ctl & OHCI_IR) {
3270 		if (bootverbose)
3271 			printf("ohci early: "
3272 			    "SMM active, request owner change\n");
3273 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3274 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3275 			DELAY(1000);
3276 			ctl = bus_read_4(res, OHCI_CONTROL);
3277 		}
3278 		if (ctl & OHCI_IR) {
3279 			if (bootverbose)
3280 				printf("ohci early: "
3281 				    "SMM does not respond, resetting\n");
3282 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3283 		}
3284 		/* Disable interrupts */
3285 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3286 	}
3287 
3288 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3289 }
3290 
3291 /* Perform early UHCI takeover from SMM. */
3292 static void
3293 uhci_early_takeover(device_t self)
3294 {
3295 	struct resource *res;
3296 	int rid;
3297 
3298 	/*
3299 	 * Set the PIRQD enable bit and switch off all the others. We don't
3300 	 * want legacy support to interfere with us XXX Does this also mean
3301 	 * that the BIOS won't touch the keyboard anymore if it is connected
3302 	 * to the ports of the root hub?
3303 	 */
3304 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3305 
3306 	/* Disable interrupts */
3307 	rid = PCI_UHCI_BASE_REG;
3308 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3309 	if (res != NULL) {
3310 		bus_write_2(res, UHCI_INTR, 0);
3311 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3312 	}
3313 }
3314 
3315 /* Perform early EHCI takeover from SMM. */
3316 static void
3317 ehci_early_takeover(device_t self)
3318 {
3319 	struct resource *res;
3320 	uint32_t cparams;
3321 	uint32_t eec;
3322 	uint8_t eecp;
3323 	uint8_t bios_sem;
3324 	uint8_t offs;
3325 	int rid;
3326 	int i;
3327 
3328 	rid = PCIR_BAR(0);
3329 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3330 	if (res == NULL)
3331 		return;
3332 
3333 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3334 
3335 	/* Synchronise with the BIOS if it owns the controller. */
3336 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3337 	    eecp = EHCI_EECP_NEXT(eec)) {
3338 		eec = pci_read_config(self, eecp, 4);
3339 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3340 			continue;
3341 		}
3342 		bios_sem = pci_read_config(self, eecp +
3343 		    EHCI_LEGSUP_BIOS_SEM, 1);
3344 		if (bios_sem == 0) {
3345 			continue;
3346 		}
3347 		if (bootverbose)
3348 			printf("ehci early: "
3349 			    "SMM active, request owner change\n");
3350 
3351 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3352 
3353 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3354 			DELAY(1000);
3355 			bios_sem = pci_read_config(self, eecp +
3356 			    EHCI_LEGSUP_BIOS_SEM, 1);
3357 		}
3358 
3359 		if (bios_sem != 0) {
3360 			if (bootverbose)
3361 				printf("ehci early: "
3362 				    "SMM does not respond\n");
3363 		}
3364 		/* Disable interrupts */
3365 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3366 		bus_write_4(res, offs + EHCI_USBINTR, 0);
3367 	}
3368 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3369 }
3370 
3371 /* Perform early XHCI takeover from SMM. */
3372 static void
3373 xhci_early_takeover(device_t self)
3374 {
3375 	struct resource *res;
3376 	uint32_t cparams;
3377 	uint32_t eec;
3378 	uint8_t eecp;
3379 	uint8_t bios_sem;
3380 	uint8_t offs;
3381 	int rid;
3382 	int i;
3383 
3384 	rid = PCIR_BAR(0);
3385 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3386 	if (res == NULL)
3387 		return;
3388 
3389 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3390 
3391 	eec = -1;
3392 
3393 	/* Synchronise with the BIOS if it owns the controller. */
3394 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3395 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3396 		eec = bus_read_4(res, eecp);
3397 
3398 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3399 			continue;
3400 
3401 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3402 		if (bios_sem == 0)
3403 			continue;
3404 
3405 		if (bootverbose)
3406 			printf("xhci early: "
3407 			    "SMM active, request owner change\n");
3408 
3409 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3410 
3411 		/* wait a maximum of 5 second */
3412 
3413 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3414 			DELAY(1000);
3415 			bios_sem = bus_read_1(res, eecp +
3416 			    XHCI_XECP_BIOS_SEM);
3417 		}
3418 
3419 		if (bios_sem != 0) {
3420 			if (bootverbose)
3421 				printf("xhci early: "
3422 				    "SMM does not respond\n");
3423 		}
3424 
3425 		/* Disable interrupts */
3426 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3427 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3428 		bus_read_4(res, offs + XHCI_USBSTS);
3429 	}
3430 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3431 }
3432 
3433 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3434 static void
3435 pci_reserve_secbus(device_t bus, device_t dev, pcicfgregs *cfg,
3436     struct resource_list *rl)
3437 {
3438 	struct resource *res;
3439 	char *cp;
3440 	rman_res_t start, end, count;
3441 	int rid, sec_bus, sec_reg, sub_bus, sub_reg, sup_bus;
3442 
3443 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3444 	case PCIM_HDRTYPE_BRIDGE:
3445 		sec_reg = PCIR_SECBUS_1;
3446 		sub_reg = PCIR_SUBBUS_1;
3447 		break;
3448 	case PCIM_HDRTYPE_CARDBUS:
3449 		sec_reg = PCIR_SECBUS_2;
3450 		sub_reg = PCIR_SUBBUS_2;
3451 		break;
3452 	default:
3453 		return;
3454 	}
3455 
3456 	/*
3457 	 * If the existing bus range is valid, attempt to reserve it
3458 	 * from our parent.  If this fails for any reason, clear the
3459 	 * secbus and subbus registers.
3460 	 *
3461 	 * XXX: Should we reset sub_bus to sec_bus if it is < sec_bus?
3462 	 * This would at least preserve the existing sec_bus if it is
3463 	 * valid.
3464 	 */
3465 	sec_bus = PCI_READ_CONFIG(bus, dev, sec_reg, 1);
3466 	sub_bus = PCI_READ_CONFIG(bus, dev, sub_reg, 1);
3467 
3468 	/* Quirk handling. */
3469 	switch (pci_get_devid(dev)) {
3470 	case 0x12258086:		/* Intel 82454KX/GX (Orion) */
3471 		sup_bus = pci_read_config(dev, 0x41, 1);
3472 		if (sup_bus != 0xff) {
3473 			sec_bus = sup_bus + 1;
3474 			sub_bus = sup_bus + 1;
3475 			PCI_WRITE_CONFIG(bus, dev, sec_reg, sec_bus, 1);
3476 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3477 		}
3478 		break;
3479 
3480 	case 0x00dd10de:
3481 		/* Compaq R3000 BIOS sets wrong subordinate bus number. */
3482 		if ((cp = kern_getenv("smbios.planar.maker")) == NULL)
3483 			break;
3484 		if (strncmp(cp, "Compal", 6) != 0) {
3485 			freeenv(cp);
3486 			break;
3487 		}
3488 		freeenv(cp);
3489 		if ((cp = kern_getenv("smbios.planar.product")) == NULL)
3490 			break;
3491 		if (strncmp(cp, "08A0", 4) != 0) {
3492 			freeenv(cp);
3493 			break;
3494 		}
3495 		freeenv(cp);
3496 		if (sub_bus < 0xa) {
3497 			sub_bus = 0xa;
3498 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3499 		}
3500 		break;
3501 	}
3502 
3503 	if (bootverbose)
3504 		printf("\tsecbus=%d, subbus=%d\n", sec_bus, sub_bus);
3505 	if (sec_bus > 0 && sub_bus >= sec_bus) {
3506 		start = sec_bus;
3507 		end = sub_bus;
3508 		count = end - start + 1;
3509 
3510 		resource_list_add(rl, PCI_RES_BUS, 0, 0, ~0, count);
3511 
3512 		/*
3513 		 * If requested, clear secondary bus registers in
3514 		 * bridge devices to force a complete renumbering
3515 		 * rather than reserving the existing range.  However,
3516 		 * preserve the existing size.
3517 		 */
3518 		if (pci_clear_buses)
3519 			goto clear;
3520 
3521 		rid = 0;
3522 		res = resource_list_reserve(rl, bus, dev, PCI_RES_BUS, &rid,
3523 		    start, end, count, 0);
3524 		if (res != NULL)
3525 			return;
3526 
3527 		if (bootverbose)
3528 			device_printf(bus,
3529 			    "pci%d:%d:%d:%d secbus failed to allocate\n",
3530 			    pci_get_domain(dev), pci_get_bus(dev),
3531 			    pci_get_slot(dev), pci_get_function(dev));
3532 	}
3533 
3534 clear:
3535 	PCI_WRITE_CONFIG(bus, dev, sec_reg, 0, 1);
3536 	PCI_WRITE_CONFIG(bus, dev, sub_reg, 0, 1);
3537 }
3538 
3539 static struct resource *
3540 pci_alloc_secbus(device_t dev, device_t child, int *rid, rman_res_t start,
3541     rman_res_t end, rman_res_t count, u_int flags)
3542 {
3543 	struct pci_devinfo *dinfo;
3544 	pcicfgregs *cfg;
3545 	struct resource_list *rl;
3546 	struct resource *res;
3547 	int sec_reg, sub_reg;
3548 
3549 	dinfo = device_get_ivars(child);
3550 	cfg = &dinfo->cfg;
3551 	rl = &dinfo->resources;
3552 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3553 	case PCIM_HDRTYPE_BRIDGE:
3554 		sec_reg = PCIR_SECBUS_1;
3555 		sub_reg = PCIR_SUBBUS_1;
3556 		break;
3557 	case PCIM_HDRTYPE_CARDBUS:
3558 		sec_reg = PCIR_SECBUS_2;
3559 		sub_reg = PCIR_SUBBUS_2;
3560 		break;
3561 	default:
3562 		return (NULL);
3563 	}
3564 
3565 	if (*rid != 0)
3566 		return (NULL);
3567 
3568 	if (resource_list_find(rl, PCI_RES_BUS, *rid) == NULL)
3569 		resource_list_add(rl, PCI_RES_BUS, *rid, start, end, count);
3570 	if (!resource_list_reserved(rl, PCI_RES_BUS, *rid)) {
3571 		res = resource_list_reserve(rl, dev, child, PCI_RES_BUS, rid,
3572 		    start, end, count, flags & ~RF_ACTIVE);
3573 		if (res == NULL) {
3574 			resource_list_delete(rl, PCI_RES_BUS, *rid);
3575 			device_printf(child, "allocating %lu bus%s failed\n",
3576 			    count, count == 1 ? "" : "es");
3577 			return (NULL);
3578 		}
3579 		if (bootverbose)
3580 			device_printf(child,
3581 			    "Lazy allocation of %lu bus%s at %lu\n", count,
3582 			    count == 1 ? "" : "es", rman_get_start(res));
3583 		PCI_WRITE_CONFIG(dev, child, sec_reg, rman_get_start(res), 1);
3584 		PCI_WRITE_CONFIG(dev, child, sub_reg, rman_get_end(res), 1);
3585 	}
3586 	return (resource_list_alloc(rl, dev, child, PCI_RES_BUS, rid, start,
3587 	    end, count, flags));
3588 }
3589 #endif
3590 
3591 static int
3592 pci_ea_bei_to_rid(device_t dev, int bei)
3593 {
3594 #ifdef PCI_IOV
3595 	struct pci_devinfo *dinfo;
3596 	int iov_pos;
3597 	struct pcicfg_iov *iov;
3598 
3599 	dinfo = device_get_ivars(dev);
3600 	iov = dinfo->cfg.iov;
3601 	if (iov != NULL)
3602 		iov_pos = iov->iov_pos;
3603 	else
3604 		iov_pos = 0;
3605 #endif
3606 
3607 	/* Check if matches BAR */
3608 	if ((bei >= PCIM_EA_BEI_BAR_0) &&
3609 	    (bei <= PCIM_EA_BEI_BAR_5))
3610 		return (PCIR_BAR(bei));
3611 
3612 	/* Check ROM */
3613 	if (bei == PCIM_EA_BEI_ROM)
3614 		return (PCIR_BIOS);
3615 
3616 #ifdef PCI_IOV
3617 	/* Check if matches VF_BAR */
3618 	if ((iov != NULL) && (bei >= PCIM_EA_BEI_VF_BAR_0) &&
3619 	    (bei <= PCIM_EA_BEI_VF_BAR_5))
3620 		return (PCIR_SRIOV_BAR(bei - PCIM_EA_BEI_VF_BAR_0) +
3621 		    iov_pos);
3622 #endif
3623 
3624 	return (-1);
3625 }
3626 
3627 int
3628 pci_ea_is_enabled(device_t dev, int rid)
3629 {
3630 	struct pci_ea_entry *ea;
3631 	struct pci_devinfo *dinfo;
3632 
3633 	dinfo = device_get_ivars(dev);
3634 
3635 	STAILQ_FOREACH(ea, &dinfo->cfg.ea.ea_entries, eae_link) {
3636 		if (pci_ea_bei_to_rid(dev, ea->eae_bei) == rid)
3637 			return ((ea->eae_flags & PCIM_EA_ENABLE) > 0);
3638 	}
3639 
3640 	return (0);
3641 }
3642 
3643 void
3644 pci_add_resources_ea(device_t bus, device_t dev, int alloc_iov)
3645 {
3646 	struct pci_ea_entry *ea;
3647 	struct pci_devinfo *dinfo;
3648 	pci_addr_t start, end, count;
3649 	struct resource_list *rl;
3650 	int type, flags, rid;
3651 	struct resource *res;
3652 	uint32_t tmp;
3653 #ifdef PCI_IOV
3654 	struct pcicfg_iov *iov;
3655 #endif
3656 
3657 	dinfo = device_get_ivars(dev);
3658 	rl = &dinfo->resources;
3659 	flags = 0;
3660 
3661 #ifdef PCI_IOV
3662 	iov = dinfo->cfg.iov;
3663 #endif
3664 
3665 	if (dinfo->cfg.ea.ea_location == 0)
3666 		return;
3667 
3668 	STAILQ_FOREACH(ea, &dinfo->cfg.ea.ea_entries, eae_link) {
3669 
3670 		/*
3671 		 * TODO: Ignore EA-BAR if is not enabled.
3672 		 *   Currently the EA implementation supports
3673 		 *   only situation, where EA structure contains
3674 		 *   predefined entries. In case they are not enabled
3675 		 *   leave them unallocated and proceed with
3676 		 *   a legacy-BAR mechanism.
3677 		 */
3678 		if ((ea->eae_flags & PCIM_EA_ENABLE) == 0)
3679 			continue;
3680 
3681 		switch ((ea->eae_flags & PCIM_EA_PP) >> PCIM_EA_PP_OFFSET) {
3682 		case PCIM_EA_P_MEM_PREFETCH:
3683 		case PCIM_EA_P_VF_MEM_PREFETCH:
3684 			flags = RF_PREFETCHABLE;
3685 		case PCIM_EA_P_VF_MEM:
3686 		case PCIM_EA_P_MEM:
3687 			type = SYS_RES_MEMORY;
3688 			break;
3689 		case PCIM_EA_P_IO:
3690 			type = SYS_RES_IOPORT;
3691 			break;
3692 		default:
3693 			continue;
3694 		}
3695 
3696 		if (alloc_iov != 0) {
3697 #ifdef PCI_IOV
3698 			/* Allocating IOV, confirm BEI matches */
3699 			if ((ea->eae_bei < PCIM_EA_BEI_VF_BAR_0) ||
3700 			    (ea->eae_bei > PCIM_EA_BEI_VF_BAR_5))
3701 				continue;
3702 #else
3703 			continue;
3704 #endif
3705 		} else {
3706 			/* Allocating BAR, confirm BEI matches */
3707 			if (((ea->eae_bei < PCIM_EA_BEI_BAR_0) ||
3708 			    (ea->eae_bei > PCIM_EA_BEI_BAR_5)) &&
3709 			    (ea->eae_bei != PCIM_EA_BEI_ROM))
3710 				continue;
3711 		}
3712 
3713 		rid = pci_ea_bei_to_rid(dev, ea->eae_bei);
3714 		if (rid < 0)
3715 			continue;
3716 
3717 		/* Skip resources already allocated by EA */
3718 		if ((resource_list_find(rl, SYS_RES_MEMORY, rid) != NULL) ||
3719 		    (resource_list_find(rl, SYS_RES_IOPORT, rid) != NULL))
3720 			continue;
3721 
3722 		start = ea->eae_base;
3723 		count = ea->eae_max_offset + 1;
3724 #ifdef PCI_IOV
3725 		if (iov != NULL)
3726 			count = count * iov->iov_num_vfs;
3727 #endif
3728 		end = start + count - 1;
3729 		if (count == 0)
3730 			continue;
3731 
3732 		resource_list_add(rl, type, rid, start, end, count);
3733 		res = resource_list_reserve(rl, bus, dev, type, &rid, start, end, count,
3734 		    flags);
3735 		if (res == NULL) {
3736 			resource_list_delete(rl, type, rid);
3737 
3738 			/*
3739 			 * Failed to allocate using EA, disable entry.
3740 			 * Another attempt to allocation will be performed
3741 			 * further, but this time using legacy BAR registers
3742 			 */
3743 			tmp = pci_read_config(dev, ea->eae_cfg_offset, 4);
3744 			tmp &= ~PCIM_EA_ENABLE;
3745 			pci_write_config(dev, ea->eae_cfg_offset, tmp, 4);
3746 
3747 			/*
3748 			 * Disabling entry might fail in case it is hardwired.
3749 			 * Read flags again to match current status.
3750 			 */
3751 			ea->eae_flags = pci_read_config(dev, ea->eae_cfg_offset, 4);
3752 
3753 			continue;
3754 		}
3755 
3756 		/* As per specification, fill BAR with zeros */
3757 		pci_write_config(dev, rid, 0, 4);
3758 	}
3759 }
3760 
3761 void
3762 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3763 {
3764 	struct pci_devinfo *dinfo;
3765 	pcicfgregs *cfg;
3766 	struct resource_list *rl;
3767 	const struct pci_quirk *q;
3768 	uint32_t devid;
3769 	int i;
3770 
3771 	dinfo = device_get_ivars(dev);
3772 	cfg = &dinfo->cfg;
3773 	rl = &dinfo->resources;
3774 	devid = (cfg->device << 16) | cfg->vendor;
3775 
3776 	/* Allocate resources using Enhanced Allocation */
3777 	pci_add_resources_ea(bus, dev, 0);
3778 
3779 	/* ATA devices needs special map treatment */
3780 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3781 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3782 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3783 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3784 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3785 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3786 	else
3787 		for (i = 0; i < cfg->nummaps;) {
3788 			/* Skip resources already managed by EA */
3789 			if ((resource_list_find(rl, SYS_RES_MEMORY, PCIR_BAR(i)) != NULL) ||
3790 			    (resource_list_find(rl, SYS_RES_IOPORT, PCIR_BAR(i)) != NULL) ||
3791 			    pci_ea_is_enabled(dev, PCIR_BAR(i))) {
3792 				i++;
3793 				continue;
3794 			}
3795 
3796 			/*
3797 			 * Skip quirked resources.
3798 			 */
3799 			for (q = &pci_quirks[0]; q->devid != 0; q++)
3800 				if (q->devid == devid &&
3801 				    q->type == PCI_QUIRK_UNMAP_REG &&
3802 				    q->arg1 == PCIR_BAR(i))
3803 					break;
3804 			if (q->devid != 0) {
3805 				i++;
3806 				continue;
3807 			}
3808 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3809 			    prefetchmask & (1 << i));
3810 		}
3811 
3812 	/*
3813 	 * Add additional, quirked resources.
3814 	 */
3815 	for (q = &pci_quirks[0]; q->devid != 0; q++)
3816 		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3817 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3818 
3819 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3820 #ifdef __PCI_REROUTE_INTERRUPT
3821 		/*
3822 		 * Try to re-route interrupts. Sometimes the BIOS or
3823 		 * firmware may leave bogus values in these registers.
3824 		 * If the re-route fails, then just stick with what we
3825 		 * have.
3826 		 */
3827 		pci_assign_interrupt(bus, dev, 1);
3828 #else
3829 		pci_assign_interrupt(bus, dev, 0);
3830 #endif
3831 	}
3832 
3833 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3834 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3835 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3836 			xhci_early_takeover(dev);
3837 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3838 			ehci_early_takeover(dev);
3839 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3840 			ohci_early_takeover(dev);
3841 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3842 			uhci_early_takeover(dev);
3843 	}
3844 
3845 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3846 	/*
3847 	 * Reserve resources for secondary bus ranges behind bridge
3848 	 * devices.
3849 	 */
3850 	pci_reserve_secbus(bus, dev, cfg, rl);
3851 #endif
3852 }
3853 
3854 static struct pci_devinfo *
3855 pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
3856     int slot, int func, size_t dinfo_size)
3857 {
3858 	struct pci_devinfo *dinfo;
3859 
3860 	dinfo = pci_read_device(pcib, domain, busno, slot, func, dinfo_size);
3861 	if (dinfo != NULL)
3862 		pci_add_child(dev, dinfo);
3863 
3864 	return (dinfo);
3865 }
3866 
3867 void
3868 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3869 {
3870 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3871 	device_t pcib = device_get_parent(dev);
3872 	struct pci_devinfo *dinfo;
3873 	int maxslots;
3874 	int s, f, pcifunchigh;
3875 	uint8_t hdrtype;
3876 	int first_func;
3877 
3878 	/*
3879 	 * Try to detect a device at slot 0, function 0.  If it exists, try to
3880 	 * enable ARI.  We must enable ARI before detecting the rest of the
3881 	 * functions on this bus as ARI changes the set of slots and functions
3882 	 * that are legal on this bus.
3883 	 */
3884 	dinfo = pci_identify_function(pcib, dev, domain, busno, 0, 0,
3885 	    dinfo_size);
3886 	if (dinfo != NULL && pci_enable_ari)
3887 		PCIB_TRY_ENABLE_ARI(pcib, dinfo->cfg.dev);
3888 
3889 	/*
3890 	 * Start looking for new devices on slot 0 at function 1 because we
3891 	 * just identified the device at slot 0, function 0.
3892 	 */
3893 	first_func = 1;
3894 
3895 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3896 	    ("dinfo_size too small"));
3897 	maxslots = PCIB_MAXSLOTS(pcib);
3898 	for (s = 0; s <= maxslots; s++, first_func = 0) {
3899 		pcifunchigh = 0;
3900 		f = 0;
3901 		DELAY(1);
3902 		hdrtype = REG(PCIR_HDRTYPE, 1);
3903 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3904 			continue;
3905 		if (hdrtype & PCIM_MFDEV)
3906 			pcifunchigh = PCIB_MAXFUNCS(pcib);
3907 		for (f = first_func; f <= pcifunchigh; f++)
3908 			pci_identify_function(pcib, dev, domain, busno, s, f,
3909 			    dinfo_size);
3910 	}
3911 #undef REG
3912 }
3913 
3914 #ifdef PCI_IOV
3915 device_t
3916 pci_add_iov_child(device_t bus, device_t pf, size_t size, uint16_t rid,
3917     uint16_t vid, uint16_t did)
3918 {
3919 	struct pci_devinfo *pf_dinfo, *vf_dinfo;
3920 	device_t pcib;
3921 	int busno, slot, func;
3922 
3923 	pf_dinfo = device_get_ivars(pf);
3924 
3925 	/*
3926 	 * Do a sanity check that we have been passed the correct size.  If this
3927 	 * test fails then likely the pci subclass hasn't implemented the
3928 	 * pci_create_iov_child method like it's supposed it.
3929 	 */
3930 	if (size != pf_dinfo->cfg.devinfo_size) {
3931 		device_printf(pf,
3932 		    "PCI subclass does not properly implement PCI_IOV\n");
3933 		return (NULL);
3934 	}
3935 
3936 	pcib = device_get_parent(bus);
3937 
3938 	PCIB_DECODE_RID(pcib, rid, &busno, &slot, &func);
3939 
3940 	vf_dinfo = pci_fill_devinfo(pcib, pci_get_domain(pcib), busno, slot, func,
3941 	    vid, did, size);
3942 
3943 	vf_dinfo->cfg.flags |= PCICFG_VF;
3944 	pci_add_child(bus, vf_dinfo);
3945 
3946 	return (vf_dinfo->cfg.dev);
3947 }
3948 
3949 device_t
3950 pci_create_iov_child_method(device_t bus, device_t pf, uint16_t rid,
3951     uint16_t vid, uint16_t did)
3952 {
3953 
3954 	return (pci_add_iov_child(bus, pf, sizeof(struct pci_devinfo), rid, vid,
3955 	    did));
3956 }
3957 #endif
3958 
3959 void
3960 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3961 {
3962 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3963 	device_set_ivars(dinfo->cfg.dev, dinfo);
3964 	resource_list_init(&dinfo->resources);
3965 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3966 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3967 	pci_print_verbose(dinfo);
3968 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3969 	pci_child_added(dinfo->cfg.dev);
3970 }
3971 
3972 void
3973 pci_child_added_method(device_t dev, device_t child)
3974 {
3975 
3976 }
3977 
3978 static int
3979 pci_probe(device_t dev)
3980 {
3981 
3982 	device_set_desc(dev, "PCI bus");
3983 
3984 	/* Allow other subclasses to override this driver. */
3985 	return (BUS_PROBE_GENERIC);
3986 }
3987 
3988 int
3989 pci_attach_common(device_t dev)
3990 {
3991 	struct pci_softc *sc;
3992 	int busno, domain;
3993 #ifdef PCI_DMA_BOUNDARY
3994 	int error, tag_valid;
3995 #endif
3996 #ifdef PCI_RES_BUS
3997 	int rid;
3998 #endif
3999 
4000 	sc = device_get_softc(dev);
4001 	domain = pcib_get_domain(dev);
4002 	busno = pcib_get_bus(dev);
4003 #ifdef PCI_RES_BUS
4004 	rid = 0;
4005 	sc->sc_bus = bus_alloc_resource(dev, PCI_RES_BUS, &rid, busno, busno,
4006 	    1, 0);
4007 	if (sc->sc_bus == NULL) {
4008 		device_printf(dev, "failed to allocate bus number\n");
4009 		return (ENXIO);
4010 	}
4011 #endif
4012 	if (bootverbose)
4013 		device_printf(dev, "domain=%d, physical bus=%d\n",
4014 		    domain, busno);
4015 #ifdef PCI_DMA_BOUNDARY
4016 	tag_valid = 0;
4017 	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
4018 	    devclass_find("pci")) {
4019 		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
4020 		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
4021 		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
4022 		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
4023 		if (error)
4024 			device_printf(dev, "Failed to create DMA tag: %d\n",
4025 			    error);
4026 		else
4027 			tag_valid = 1;
4028 	}
4029 	if (!tag_valid)
4030 #endif
4031 		sc->sc_dma_tag = bus_get_dma_tag(dev);
4032 	return (0);
4033 }
4034 
4035 static int
4036 pci_attach(device_t dev)
4037 {
4038 	int busno, domain, error;
4039 
4040 	error = pci_attach_common(dev);
4041 	if (error)
4042 		return (error);
4043 
4044 	/*
4045 	 * Since there can be multiple independantly numbered PCI
4046 	 * busses on systems with multiple PCI domains, we can't use
4047 	 * the unit number to decide which bus we are probing. We ask
4048 	 * the parent pcib what our domain and bus numbers are.
4049 	 */
4050 	domain = pcib_get_domain(dev);
4051 	busno = pcib_get_bus(dev);
4052 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
4053 	return (bus_generic_attach(dev));
4054 }
4055 
4056 #ifdef PCI_RES_BUS
4057 static int
4058 pci_detach(device_t dev)
4059 {
4060 	struct pci_softc *sc;
4061 	int error;
4062 
4063 	error = bus_generic_detach(dev);
4064 	if (error)
4065 		return (error);
4066 	sc = device_get_softc(dev);
4067 	return (bus_release_resource(dev, PCI_RES_BUS, 0, sc->sc_bus));
4068 }
4069 #endif
4070 
4071 static void
4072 pci_set_power_child(device_t dev, device_t child, int state)
4073 {
4074 	device_t pcib;
4075 	int dstate;
4076 
4077 	/*
4078 	 * Set the device to the given state.  If the firmware suggests
4079 	 * a different power state, use it instead.  If power management
4080 	 * is not present, the firmware is responsible for managing
4081 	 * device power.  Skip children who aren't attached since they
4082 	 * are handled separately.
4083 	 */
4084 	pcib = device_get_parent(dev);
4085 	dstate = state;
4086 	if (device_is_attached(child) &&
4087 	    PCIB_POWER_FOR_SLEEP(pcib, child, &dstate) == 0)
4088 		pci_set_powerstate(child, dstate);
4089 }
4090 
4091 int
4092 pci_suspend_child(device_t dev, device_t child)
4093 {
4094 	struct pci_devinfo *dinfo;
4095 	int error;
4096 
4097 	dinfo = device_get_ivars(child);
4098 
4099 	/*
4100 	 * Save the PCI configuration space for the child and set the
4101 	 * device in the appropriate power state for this sleep state.
4102 	 */
4103 	pci_cfg_save(child, dinfo, 0);
4104 
4105 	/* Suspend devices before potentially powering them down. */
4106 	error = bus_generic_suspend_child(dev, child);
4107 
4108 	if (error)
4109 		return (error);
4110 
4111 	if (pci_do_power_suspend)
4112 		pci_set_power_child(dev, child, PCI_POWERSTATE_D3);
4113 
4114 	return (0);
4115 }
4116 
4117 int
4118 pci_resume_child(device_t dev, device_t child)
4119 {
4120 	struct pci_devinfo *dinfo;
4121 
4122 	if (pci_do_power_resume)
4123 		pci_set_power_child(dev, child, PCI_POWERSTATE_D0);
4124 
4125 	dinfo = device_get_ivars(child);
4126 	pci_cfg_restore(child, dinfo);
4127 	if (!device_is_attached(child))
4128 		pci_cfg_save(child, dinfo, 1);
4129 
4130 	bus_generic_resume_child(dev, child);
4131 
4132 	return (0);
4133 }
4134 
4135 int
4136 pci_resume(device_t dev)
4137 {
4138 	device_t child, *devlist;
4139 	int error, i, numdevs;
4140 
4141 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
4142 		return (error);
4143 
4144 	/*
4145 	 * Resume critical devices first, then everything else later.
4146 	 */
4147 	for (i = 0; i < numdevs; i++) {
4148 		child = devlist[i];
4149 		switch (pci_get_class(child)) {
4150 		case PCIC_DISPLAY:
4151 		case PCIC_MEMORY:
4152 		case PCIC_BRIDGE:
4153 		case PCIC_BASEPERIPH:
4154 			BUS_RESUME_CHILD(dev, child);
4155 			break;
4156 		}
4157 	}
4158 	for (i = 0; i < numdevs; i++) {
4159 		child = devlist[i];
4160 		switch (pci_get_class(child)) {
4161 		case PCIC_DISPLAY:
4162 		case PCIC_MEMORY:
4163 		case PCIC_BRIDGE:
4164 		case PCIC_BASEPERIPH:
4165 			break;
4166 		default:
4167 			BUS_RESUME_CHILD(dev, child);
4168 		}
4169 	}
4170 	free(devlist, M_TEMP);
4171 	return (0);
4172 }
4173 
4174 static void
4175 pci_load_vendor_data(void)
4176 {
4177 	caddr_t data;
4178 	void *ptr;
4179 	size_t sz;
4180 
4181 	data = preload_search_by_type("pci_vendor_data");
4182 	if (data != NULL) {
4183 		ptr = preload_fetch_addr(data);
4184 		sz = preload_fetch_size(data);
4185 		if (ptr != NULL && sz != 0) {
4186 			pci_vendordata = ptr;
4187 			pci_vendordata_size = sz;
4188 			/* terminate the database */
4189 			pci_vendordata[pci_vendordata_size] = '\n';
4190 		}
4191 	}
4192 }
4193 
4194 void
4195 pci_driver_added(device_t dev, driver_t *driver)
4196 {
4197 	int numdevs;
4198 	device_t *devlist;
4199 	device_t child;
4200 	struct pci_devinfo *dinfo;
4201 	int i;
4202 
4203 	if (bootverbose)
4204 		device_printf(dev, "driver added\n");
4205 	DEVICE_IDENTIFY(driver, dev);
4206 	if (device_get_children(dev, &devlist, &numdevs) != 0)
4207 		return;
4208 	for (i = 0; i < numdevs; i++) {
4209 		child = devlist[i];
4210 		if (device_get_state(child) != DS_NOTPRESENT)
4211 			continue;
4212 		dinfo = device_get_ivars(child);
4213 		pci_print_verbose(dinfo);
4214 		if (bootverbose)
4215 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
4216 		pci_cfg_restore(child, dinfo);
4217 		if (device_probe_and_attach(child) != 0)
4218 			pci_child_detached(dev, child);
4219 	}
4220 	free(devlist, M_TEMP);
4221 }
4222 
4223 int
4224 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
4225     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
4226 {
4227 	struct pci_devinfo *dinfo;
4228 	struct msix_table_entry *mte;
4229 	struct msix_vector *mv;
4230 	uint64_t addr;
4231 	uint32_t data;
4232 	void *cookie;
4233 	int error, rid;
4234 
4235 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
4236 	    arg, &cookie);
4237 	if (error)
4238 		return (error);
4239 
4240 	/* If this is not a direct child, just bail out. */
4241 	if (device_get_parent(child) != dev) {
4242 		*cookiep = cookie;
4243 		return(0);
4244 	}
4245 
4246 	rid = rman_get_rid(irq);
4247 	if (rid == 0) {
4248 		/* Make sure that INTx is enabled */
4249 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
4250 	} else {
4251 		/*
4252 		 * Check to see if the interrupt is MSI or MSI-X.
4253 		 * Ask our parent to map the MSI and give
4254 		 * us the address and data register values.
4255 		 * If we fail for some reason, teardown the
4256 		 * interrupt handler.
4257 		 */
4258 		dinfo = device_get_ivars(child);
4259 		if (dinfo->cfg.msi.msi_alloc > 0) {
4260 			if (dinfo->cfg.msi.msi_addr == 0) {
4261 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
4262 			    ("MSI has handlers, but vectors not mapped"));
4263 				error = PCIB_MAP_MSI(device_get_parent(dev),
4264 				    child, rman_get_start(irq), &addr, &data);
4265 				if (error)
4266 					goto bad;
4267 				dinfo->cfg.msi.msi_addr = addr;
4268 				dinfo->cfg.msi.msi_data = data;
4269 			}
4270 			if (dinfo->cfg.msi.msi_handlers == 0)
4271 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
4272 				    dinfo->cfg.msi.msi_data);
4273 			dinfo->cfg.msi.msi_handlers++;
4274 		} else {
4275 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4276 			    ("No MSI or MSI-X interrupts allocated"));
4277 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4278 			    ("MSI-X index too high"));
4279 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4280 			KASSERT(mte->mte_vector != 0, ("no message vector"));
4281 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
4282 			KASSERT(mv->mv_irq == rman_get_start(irq),
4283 			    ("IRQ mismatch"));
4284 			if (mv->mv_address == 0) {
4285 				KASSERT(mte->mte_handlers == 0,
4286 		    ("MSI-X table entry has handlers, but vector not mapped"));
4287 				error = PCIB_MAP_MSI(device_get_parent(dev),
4288 				    child, rman_get_start(irq), &addr, &data);
4289 				if (error)
4290 					goto bad;
4291 				mv->mv_address = addr;
4292 				mv->mv_data = data;
4293 			}
4294 			if (mte->mte_handlers == 0) {
4295 				pci_enable_msix(child, rid - 1, mv->mv_address,
4296 				    mv->mv_data);
4297 				pci_unmask_msix(child, rid - 1);
4298 			}
4299 			mte->mte_handlers++;
4300 		}
4301 
4302 		/*
4303 		 * Make sure that INTx is disabled if we are using MSI/MSI-X,
4304 		 * unless the device is affected by PCI_QUIRK_MSI_INTX_BUG,
4305 		 * in which case we "enable" INTx so MSI/MSI-X actually works.
4306 		 */
4307 		if (!pci_has_quirk(pci_get_devid(child),
4308 		    PCI_QUIRK_MSI_INTX_BUG))
4309 			pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4310 		else
4311 			pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
4312 	bad:
4313 		if (error) {
4314 			(void)bus_generic_teardown_intr(dev, child, irq,
4315 			    cookie);
4316 			return (error);
4317 		}
4318 	}
4319 	*cookiep = cookie;
4320 	return (0);
4321 }
4322 
4323 int
4324 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
4325     void *cookie)
4326 {
4327 	struct msix_table_entry *mte;
4328 	struct resource_list_entry *rle;
4329 	struct pci_devinfo *dinfo;
4330 	int error, rid;
4331 
4332 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
4333 		return (EINVAL);
4334 
4335 	/* If this isn't a direct child, just bail out */
4336 	if (device_get_parent(child) != dev)
4337 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
4338 
4339 	rid = rman_get_rid(irq);
4340 	if (rid == 0) {
4341 		/* Mask INTx */
4342 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4343 	} else {
4344 		/*
4345 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
4346 		 * decrement the appropriate handlers count and mask the
4347 		 * MSI-X message, or disable MSI messages if the count
4348 		 * drops to 0.
4349 		 */
4350 		dinfo = device_get_ivars(child);
4351 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
4352 		if (rle->res != irq)
4353 			return (EINVAL);
4354 		if (dinfo->cfg.msi.msi_alloc > 0) {
4355 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
4356 			    ("MSI-X index too high"));
4357 			if (dinfo->cfg.msi.msi_handlers == 0)
4358 				return (EINVAL);
4359 			dinfo->cfg.msi.msi_handlers--;
4360 			if (dinfo->cfg.msi.msi_handlers == 0)
4361 				pci_disable_msi(child);
4362 		} else {
4363 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4364 			    ("No MSI or MSI-X interrupts allocated"));
4365 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4366 			    ("MSI-X index too high"));
4367 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4368 			if (mte->mte_handlers == 0)
4369 				return (EINVAL);
4370 			mte->mte_handlers--;
4371 			if (mte->mte_handlers == 0)
4372 				pci_mask_msix(child, rid - 1);
4373 		}
4374 	}
4375 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
4376 	if (rid > 0)
4377 		KASSERT(error == 0,
4378 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
4379 	return (error);
4380 }
4381 
4382 int
4383 pci_print_child(device_t dev, device_t child)
4384 {
4385 	struct pci_devinfo *dinfo;
4386 	struct resource_list *rl;
4387 	int retval = 0;
4388 
4389 	dinfo = device_get_ivars(child);
4390 	rl = &dinfo->resources;
4391 
4392 	retval += bus_print_child_header(dev, child);
4393 
4394 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
4395 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
4396 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
4397 	if (device_get_flags(dev))
4398 		retval += printf(" flags %#x", device_get_flags(dev));
4399 
4400 	retval += printf(" at device %d.%d", pci_get_slot(child),
4401 	    pci_get_function(child));
4402 
4403 	retval += bus_print_child_domain(dev, child);
4404 	retval += bus_print_child_footer(dev, child);
4405 
4406 	return (retval);
4407 }
4408 
4409 static const struct
4410 {
4411 	int		class;
4412 	int		subclass;
4413 	int		report; /* 0 = bootverbose, 1 = always */
4414 	const char	*desc;
4415 } pci_nomatch_tab[] = {
4416 	{PCIC_OLD,		-1,			1, "old"},
4417 	{PCIC_OLD,		PCIS_OLD_NONVGA,	1, "non-VGA display device"},
4418 	{PCIC_OLD,		PCIS_OLD_VGA,		1, "VGA-compatible display device"},
4419 	{PCIC_STORAGE,		-1,			1, "mass storage"},
4420 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	1, "SCSI"},
4421 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	1, "ATA"},
4422 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	1, "floppy disk"},
4423 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	1, "IPI"},
4424 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	1, "RAID"},
4425 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	1, "ATA (ADMA)"},
4426 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	1, "SATA"},
4427 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	1, "SAS"},
4428 	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	1, "NVM"},
4429 	{PCIC_NETWORK,		-1,			1, "network"},
4430 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	1, "ethernet"},
4431 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	1, "token ring"},
4432 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	1, "fddi"},
4433 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	1, "ATM"},
4434 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	1, "ISDN"},
4435 	{PCIC_DISPLAY,		-1,			1, "display"},
4436 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	1, "VGA"},
4437 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	1, "XGA"},
4438 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	1, "3D"},
4439 	{PCIC_MULTIMEDIA,	-1,			1, "multimedia"},
4440 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	1, "video"},
4441 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	1, "audio"},
4442 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	1, "telephony"},
4443 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	1, "HDA"},
4444 	{PCIC_MEMORY,		-1,			1, "memory"},
4445 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	1, "RAM"},
4446 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	1, "flash"},
4447 	{PCIC_BRIDGE,		-1,			1, "bridge"},
4448 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	1, "HOST-PCI"},
4449 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	1, "PCI-ISA"},
4450 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	1, "PCI-EISA"},
4451 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	1, "PCI-MCA"},
4452 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	1, "PCI-PCI"},
4453 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	1, "PCI-PCMCIA"},
4454 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	1, "PCI-NuBus"},
4455 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	1, "PCI-CardBus"},
4456 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	1, "PCI-RACEway"},
4457 	{PCIC_SIMPLECOMM,	-1,			1, "simple comms"},
4458 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	1, "UART"},	/* could detect 16550 */
4459 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	1, "parallel port"},
4460 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	1, "multiport serial"},
4461 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	1, "generic modem"},
4462 	{PCIC_BASEPERIPH,	-1,			0, "base peripheral"},
4463 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	1, "interrupt controller"},
4464 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	1, "DMA controller"},
4465 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	1, "timer"},
4466 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	1, "realtime clock"},
4467 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	1, "PCI hot-plug controller"},
4468 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	1, "SD host controller"},
4469 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_IOMMU,	1, "IOMMU"},
4470 	{PCIC_INPUTDEV,		-1,			1, "input device"},
4471 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	1, "keyboard"},
4472 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,1, "digitizer"},
4473 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	1, "mouse"},
4474 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	1, "scanner"},
4475 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	1, "gameport"},
4476 	{PCIC_DOCKING,		-1,			1, "docking station"},
4477 	{PCIC_PROCESSOR,	-1,			1, "processor"},
4478 	{PCIC_SERIALBUS,	-1,			1, "serial bus"},
4479 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	1, "FireWire"},
4480 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	1, "AccessBus"},
4481 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	1, "SSA"},
4482 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	1, "USB"},
4483 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	1, "Fibre Channel"},
4484 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	0, "SMBus"},
4485 	{PCIC_WIRELESS,		-1,			1, "wireless controller"},
4486 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	1, "iRDA"},
4487 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	1, "IR"},
4488 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	1, "RF"},
4489 	{PCIC_INTELLIIO,	-1,			1, "intelligent I/O controller"},
4490 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	1, "I2O"},
4491 	{PCIC_SATCOM,		-1,			1, "satellite communication"},
4492 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		1, "sat TV"},
4493 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	1, "sat audio"},
4494 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	1, "sat voice"},
4495 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	1, "sat data"},
4496 	{PCIC_CRYPTO,		-1,			1, "encrypt/decrypt"},
4497 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	1, "network/computer crypto"},
4498 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	1, "entertainment crypto"},
4499 	{PCIC_DASP,		-1,			0, "dasp"},
4500 	{PCIC_DASP,		PCIS_DASP_DPIO,		1, "DPIO module"},
4501 	{0, 0, 0,		NULL}
4502 };
4503 
4504 void
4505 pci_probe_nomatch(device_t dev, device_t child)
4506 {
4507 	int i, report;
4508 	const char *cp, *scp;
4509 	char *device;
4510 
4511 	/*
4512 	 * Look for a listing for this device in a loaded device database.
4513 	 */
4514 	report = 1;
4515 	if ((device = pci_describe_device(child)) != NULL) {
4516 		device_printf(dev, "<%s>", device);
4517 		free(device, M_DEVBUF);
4518 	} else {
4519 		/*
4520 		 * Scan the class/subclass descriptions for a general
4521 		 * description.
4522 		 */
4523 		cp = "unknown";
4524 		scp = NULL;
4525 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
4526 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
4527 				if (pci_nomatch_tab[i].subclass == -1) {
4528 					cp = pci_nomatch_tab[i].desc;
4529 					report = pci_nomatch_tab[i].report;
4530 				} else if (pci_nomatch_tab[i].subclass ==
4531 				    pci_get_subclass(child)) {
4532 					scp = pci_nomatch_tab[i].desc;
4533 					report = pci_nomatch_tab[i].report;
4534 				}
4535 			}
4536 		}
4537 		if (report || bootverbose) {
4538 			device_printf(dev, "<%s%s%s>",
4539 			    cp ? cp : "",
4540 			    ((cp != NULL) && (scp != NULL)) ? ", " : "",
4541 			    scp ? scp : "");
4542 		}
4543 	}
4544 	if (report || bootverbose) {
4545 		printf(" at device %d.%d (no driver attached)\n",
4546 		    pci_get_slot(child), pci_get_function(child));
4547 	}
4548 	pci_cfg_save(child, device_get_ivars(child), 1);
4549 }
4550 
4551 void
4552 pci_child_detached(device_t dev, device_t child)
4553 {
4554 	struct pci_devinfo *dinfo;
4555 	struct resource_list *rl;
4556 
4557 	dinfo = device_get_ivars(child);
4558 	rl = &dinfo->resources;
4559 
4560 	/*
4561 	 * Have to deallocate IRQs before releasing any MSI messages and
4562 	 * have to release MSI messages before deallocating any memory
4563 	 * BARs.
4564 	 */
4565 	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
4566 		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
4567 	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
4568 		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
4569 		(void)pci_release_msi(child);
4570 	}
4571 	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
4572 		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
4573 	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
4574 		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
4575 #ifdef PCI_RES_BUS
4576 	if (resource_list_release_active(rl, dev, child, PCI_RES_BUS) != 0)
4577 		pci_printf(&dinfo->cfg, "Device leaked PCI bus numbers\n");
4578 #endif
4579 
4580 	pci_cfg_save(child, dinfo, 1);
4581 }
4582 
4583 /*
4584  * Parse the PCI device database, if loaded, and return a pointer to a
4585  * description of the device.
4586  *
4587  * The database is flat text formatted as follows:
4588  *
4589  * Any line not in a valid format is ignored.
4590  * Lines are terminated with newline '\n' characters.
4591  *
4592  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
4593  * the vendor name.
4594  *
4595  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
4596  * - devices cannot be listed without a corresponding VENDOR line.
4597  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
4598  * another TAB, then the device name.
4599  */
4600 
4601 /*
4602  * Assuming (ptr) points to the beginning of a line in the database,
4603  * return the vendor or device and description of the next entry.
4604  * The value of (vendor) or (device) inappropriate for the entry type
4605  * is set to -1.  Returns nonzero at the end of the database.
4606  *
4607  * Note that this is slightly unrobust in the face of corrupt data;
4608  * we attempt to safeguard against this by spamming the end of the
4609  * database with a newline when we initialise.
4610  */
4611 static int
4612 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
4613 {
4614 	char	*cp = *ptr;
4615 	int	left;
4616 
4617 	*device = -1;
4618 	*vendor = -1;
4619 	**desc = '\0';
4620 	for (;;) {
4621 		left = pci_vendordata_size - (cp - pci_vendordata);
4622 		if (left <= 0) {
4623 			*ptr = cp;
4624 			return(1);
4625 		}
4626 
4627 		/* vendor entry? */
4628 		if (*cp != '\t' &&
4629 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
4630 			break;
4631 		/* device entry? */
4632 		if (*cp == '\t' &&
4633 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
4634 			break;
4635 
4636 		/* skip to next line */
4637 		while (*cp != '\n' && left > 0) {
4638 			cp++;
4639 			left--;
4640 		}
4641 		if (*cp == '\n') {
4642 			cp++;
4643 			left--;
4644 		}
4645 	}
4646 	/* skip to next line */
4647 	while (*cp != '\n' && left > 0) {
4648 		cp++;
4649 		left--;
4650 	}
4651 	if (*cp == '\n' && left > 0)
4652 		cp++;
4653 	*ptr = cp;
4654 	return(0);
4655 }
4656 
4657 static char *
4658 pci_describe_device(device_t dev)
4659 {
4660 	int	vendor, device;
4661 	char	*desc, *vp, *dp, *line;
4662 
4663 	desc = vp = dp = NULL;
4664 
4665 	/*
4666 	 * If we have no vendor data, we can't do anything.
4667 	 */
4668 	if (pci_vendordata == NULL)
4669 		goto out;
4670 
4671 	/*
4672 	 * Scan the vendor data looking for this device
4673 	 */
4674 	line = pci_vendordata;
4675 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4676 		goto out;
4677 	for (;;) {
4678 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
4679 			goto out;
4680 		if (vendor == pci_get_vendor(dev))
4681 			break;
4682 	}
4683 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4684 		goto out;
4685 	for (;;) {
4686 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
4687 			*dp = 0;
4688 			break;
4689 		}
4690 		if (vendor != -1) {
4691 			*dp = 0;
4692 			break;
4693 		}
4694 		if (device == pci_get_device(dev))
4695 			break;
4696 	}
4697 	if (dp[0] == '\0')
4698 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
4699 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
4700 	    NULL)
4701 		sprintf(desc, "%s, %s", vp, dp);
4702 out:
4703 	if (vp != NULL)
4704 		free(vp, M_DEVBUF);
4705 	if (dp != NULL)
4706 		free(dp, M_DEVBUF);
4707 	return(desc);
4708 }
4709 
4710 int
4711 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
4712 {
4713 	struct pci_devinfo *dinfo;
4714 	pcicfgregs *cfg;
4715 
4716 	dinfo = device_get_ivars(child);
4717 	cfg = &dinfo->cfg;
4718 
4719 	switch (which) {
4720 	case PCI_IVAR_ETHADDR:
4721 		/*
4722 		 * The generic accessor doesn't deal with failure, so
4723 		 * we set the return value, then return an error.
4724 		 */
4725 		*((uint8_t **) result) = NULL;
4726 		return (EINVAL);
4727 	case PCI_IVAR_SUBVENDOR:
4728 		*result = cfg->subvendor;
4729 		break;
4730 	case PCI_IVAR_SUBDEVICE:
4731 		*result = cfg->subdevice;
4732 		break;
4733 	case PCI_IVAR_VENDOR:
4734 		*result = cfg->vendor;
4735 		break;
4736 	case PCI_IVAR_DEVICE:
4737 		*result = cfg->device;
4738 		break;
4739 	case PCI_IVAR_DEVID:
4740 		*result = (cfg->device << 16) | cfg->vendor;
4741 		break;
4742 	case PCI_IVAR_CLASS:
4743 		*result = cfg->baseclass;
4744 		break;
4745 	case PCI_IVAR_SUBCLASS:
4746 		*result = cfg->subclass;
4747 		break;
4748 	case PCI_IVAR_PROGIF:
4749 		*result = cfg->progif;
4750 		break;
4751 	case PCI_IVAR_REVID:
4752 		*result = cfg->revid;
4753 		break;
4754 	case PCI_IVAR_INTPIN:
4755 		*result = cfg->intpin;
4756 		break;
4757 	case PCI_IVAR_IRQ:
4758 		*result = cfg->intline;
4759 		break;
4760 	case PCI_IVAR_DOMAIN:
4761 		*result = cfg->domain;
4762 		break;
4763 	case PCI_IVAR_BUS:
4764 		*result = cfg->bus;
4765 		break;
4766 	case PCI_IVAR_SLOT:
4767 		*result = cfg->slot;
4768 		break;
4769 	case PCI_IVAR_FUNCTION:
4770 		*result = cfg->func;
4771 		break;
4772 	case PCI_IVAR_CMDREG:
4773 		*result = cfg->cmdreg;
4774 		break;
4775 	case PCI_IVAR_CACHELNSZ:
4776 		*result = cfg->cachelnsz;
4777 		break;
4778 	case PCI_IVAR_MINGNT:
4779 		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4780 			*result = -1;
4781 			return (EINVAL);
4782 		}
4783 		*result = cfg->mingnt;
4784 		break;
4785 	case PCI_IVAR_MAXLAT:
4786 		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4787 			*result = -1;
4788 			return (EINVAL);
4789 		}
4790 		*result = cfg->maxlat;
4791 		break;
4792 	case PCI_IVAR_LATTIMER:
4793 		*result = cfg->lattimer;
4794 		break;
4795 	default:
4796 		return (ENOENT);
4797 	}
4798 	return (0);
4799 }
4800 
4801 int
4802 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4803 {
4804 	struct pci_devinfo *dinfo;
4805 
4806 	dinfo = device_get_ivars(child);
4807 
4808 	switch (which) {
4809 	case PCI_IVAR_INTPIN:
4810 		dinfo->cfg.intpin = value;
4811 		return (0);
4812 	case PCI_IVAR_ETHADDR:
4813 	case PCI_IVAR_SUBVENDOR:
4814 	case PCI_IVAR_SUBDEVICE:
4815 	case PCI_IVAR_VENDOR:
4816 	case PCI_IVAR_DEVICE:
4817 	case PCI_IVAR_DEVID:
4818 	case PCI_IVAR_CLASS:
4819 	case PCI_IVAR_SUBCLASS:
4820 	case PCI_IVAR_PROGIF:
4821 	case PCI_IVAR_REVID:
4822 	case PCI_IVAR_IRQ:
4823 	case PCI_IVAR_DOMAIN:
4824 	case PCI_IVAR_BUS:
4825 	case PCI_IVAR_SLOT:
4826 	case PCI_IVAR_FUNCTION:
4827 		return (EINVAL);	/* disallow for now */
4828 
4829 	default:
4830 		return (ENOENT);
4831 	}
4832 }
4833 
4834 #include "opt_ddb.h"
4835 #ifdef DDB
4836 #include <ddb/ddb.h>
4837 #include <sys/cons.h>
4838 
4839 /*
4840  * List resources based on pci map registers, used for within ddb
4841  */
4842 
4843 DB_SHOW_COMMAND(pciregs, db_pci_dump)
4844 {
4845 	struct pci_devinfo *dinfo;
4846 	struct devlist *devlist_head;
4847 	struct pci_conf *p;
4848 	const char *name;
4849 	int i, error, none_count;
4850 
4851 	none_count = 0;
4852 	/* get the head of the device queue */
4853 	devlist_head = &pci_devq;
4854 
4855 	/*
4856 	 * Go through the list of devices and print out devices
4857 	 */
4858 	for (error = 0, i = 0,
4859 	     dinfo = STAILQ_FIRST(devlist_head);
4860 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4861 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4862 
4863 		/* Populate pd_name and pd_unit */
4864 		name = NULL;
4865 		if (dinfo->cfg.dev)
4866 			name = device_get_name(dinfo->cfg.dev);
4867 
4868 		p = &dinfo->conf;
4869 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4870 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4871 			(name && *name) ? name : "none",
4872 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4873 			none_count++,
4874 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4875 			p->pc_sel.pc_func, (p->pc_class << 16) |
4876 			(p->pc_subclass << 8) | p->pc_progif,
4877 			(p->pc_subdevice << 16) | p->pc_subvendor,
4878 			(p->pc_device << 16) | p->pc_vendor,
4879 			p->pc_revid, p->pc_hdr);
4880 	}
4881 }
4882 #endif /* DDB */
4883 
4884 static struct resource *
4885 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4886     rman_res_t start, rman_res_t end, rman_res_t count, u_int num,
4887     u_int flags)
4888 {
4889 	struct pci_devinfo *dinfo = device_get_ivars(child);
4890 	struct resource_list *rl = &dinfo->resources;
4891 	struct resource *res;
4892 	struct pci_map *pm;
4893 	pci_addr_t map, testval;
4894 	int mapsize;
4895 
4896 	res = NULL;
4897 
4898 	/* If rid is managed by EA, ignore it */
4899 	if (pci_ea_is_enabled(child, *rid))
4900 		goto out;
4901 
4902 	pm = pci_find_bar(child, *rid);
4903 	if (pm != NULL) {
4904 		/* This is a BAR that we failed to allocate earlier. */
4905 		mapsize = pm->pm_size;
4906 		map = pm->pm_value;
4907 	} else {
4908 		/*
4909 		 * Weed out the bogons, and figure out how large the
4910 		 * BAR/map is.  BARs that read back 0 here are bogus
4911 		 * and unimplemented.  Note: atapci in legacy mode are
4912 		 * special and handled elsewhere in the code.  If you
4913 		 * have a atapci device in legacy mode and it fails
4914 		 * here, that other code is broken.
4915 		 */
4916 		pci_read_bar(child, *rid, &map, &testval, NULL);
4917 
4918 		/*
4919 		 * Determine the size of the BAR and ignore BARs with a size
4920 		 * of 0.  Device ROM BARs use a different mask value.
4921 		 */
4922 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
4923 			mapsize = pci_romsize(testval);
4924 		else
4925 			mapsize = pci_mapsize(testval);
4926 		if (mapsize == 0)
4927 			goto out;
4928 		pm = pci_add_bar(child, *rid, map, mapsize);
4929 	}
4930 
4931 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
4932 		if (type != SYS_RES_MEMORY) {
4933 			if (bootverbose)
4934 				device_printf(dev,
4935 				    "child %s requested type %d for rid %#x,"
4936 				    " but the BAR says it is an memio\n",
4937 				    device_get_nameunit(child), type, *rid);
4938 			goto out;
4939 		}
4940 	} else {
4941 		if (type != SYS_RES_IOPORT) {
4942 			if (bootverbose)
4943 				device_printf(dev,
4944 				    "child %s requested type %d for rid %#x,"
4945 				    " but the BAR says it is an ioport\n",
4946 				    device_get_nameunit(child), type, *rid);
4947 			goto out;
4948 		}
4949 	}
4950 
4951 	/*
4952 	 * For real BARs, we need to override the size that
4953 	 * the driver requests, because that's what the BAR
4954 	 * actually uses and we would otherwise have a
4955 	 * situation where we might allocate the excess to
4956 	 * another driver, which won't work.
4957 	 */
4958 	count = ((pci_addr_t)1 << mapsize) * num;
4959 	if (RF_ALIGNMENT(flags) < mapsize)
4960 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
4961 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
4962 		flags |= RF_PREFETCHABLE;
4963 
4964 	/*
4965 	 * Allocate enough resource, and then write back the
4966 	 * appropriate BAR for that resource.
4967 	 */
4968 	resource_list_add(rl, type, *rid, start, end, count);
4969 	res = resource_list_reserve(rl, dev, child, type, rid, start, end,
4970 	    count, flags & ~RF_ACTIVE);
4971 	if (res == NULL) {
4972 		resource_list_delete(rl, type, *rid);
4973 		device_printf(child,
4974 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4975 		    count, *rid, type, start, end);
4976 		goto out;
4977 	}
4978 	if (bootverbose)
4979 		device_printf(child,
4980 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4981 		    count, *rid, type, rman_get_start(res));
4982 	map = rman_get_start(res);
4983 	pci_write_bar(child, pm, map);
4984 out:
4985 	return (res);
4986 }
4987 
4988 struct resource *
4989 pci_alloc_multi_resource(device_t dev, device_t child, int type, int *rid,
4990     rman_res_t start, rman_res_t end, rman_res_t count, u_long num,
4991     u_int flags)
4992 {
4993 	struct pci_devinfo *dinfo;
4994 	struct resource_list *rl;
4995 	struct resource_list_entry *rle;
4996 	struct resource *res;
4997 	pcicfgregs *cfg;
4998 
4999 	/*
5000 	 * Perform lazy resource allocation
5001 	 */
5002 	dinfo = device_get_ivars(child);
5003 	rl = &dinfo->resources;
5004 	cfg = &dinfo->cfg;
5005 	switch (type) {
5006 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
5007 	case PCI_RES_BUS:
5008 		return (pci_alloc_secbus(dev, child, rid, start, end, count,
5009 		    flags));
5010 #endif
5011 	case SYS_RES_IRQ:
5012 		/*
5013 		 * Can't alloc legacy interrupt once MSI messages have
5014 		 * been allocated.
5015 		 */
5016 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
5017 		    cfg->msix.msix_alloc > 0))
5018 			return (NULL);
5019 
5020 		/*
5021 		 * If the child device doesn't have an interrupt
5022 		 * routed and is deserving of an interrupt, try to
5023 		 * assign it one.
5024 		 */
5025 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
5026 		    (cfg->intpin != 0))
5027 			pci_assign_interrupt(dev, child, 0);
5028 		break;
5029 	case SYS_RES_IOPORT:
5030 	case SYS_RES_MEMORY:
5031 #ifdef NEW_PCIB
5032 		/*
5033 		 * PCI-PCI bridge I/O window resources are not BARs.
5034 		 * For those allocations just pass the request up the
5035 		 * tree.
5036 		 */
5037 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
5038 			switch (*rid) {
5039 			case PCIR_IOBASEL_1:
5040 			case PCIR_MEMBASE_1:
5041 			case PCIR_PMBASEL_1:
5042 				/*
5043 				 * XXX: Should we bother creating a resource
5044 				 * list entry?
5045 				 */
5046 				return (bus_generic_alloc_resource(dev, child,
5047 				    type, rid, start, end, count, flags));
5048 			}
5049 		}
5050 #endif
5051 		/* Reserve resources for this BAR if needed. */
5052 		rle = resource_list_find(rl, type, *rid);
5053 		if (rle == NULL) {
5054 			res = pci_reserve_map(dev, child, type, rid, start, end,
5055 			    count, num, flags);
5056 			if (res == NULL)
5057 				return (NULL);
5058 		}
5059 	}
5060 	return (resource_list_alloc(rl, dev, child, type, rid,
5061 	    start, end, count, flags));
5062 }
5063 
5064 struct resource *
5065 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
5066     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
5067 {
5068 #ifdef PCI_IOV
5069 	struct pci_devinfo *dinfo;
5070 #endif
5071 
5072 	if (device_get_parent(child) != dev)
5073 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
5074 		    type, rid, start, end, count, flags));
5075 
5076 #ifdef PCI_IOV
5077 	dinfo = device_get_ivars(child);
5078 	if (dinfo->cfg.flags & PCICFG_VF) {
5079 		switch (type) {
5080 		/* VFs can't have I/O BARs. */
5081 		case SYS_RES_IOPORT:
5082 			return (NULL);
5083 		case SYS_RES_MEMORY:
5084 			return (pci_vf_alloc_mem_resource(dev, child, rid,
5085 			    start, end, count, flags));
5086 		}
5087 
5088 		/* Fall through for other types of resource allocations. */
5089 	}
5090 #endif
5091 
5092 	return (pci_alloc_multi_resource(dev, child, type, rid, start, end,
5093 	    count, 1, flags));
5094 }
5095 
5096 int
5097 pci_release_resource(device_t dev, device_t child, int type, int rid,
5098     struct resource *r)
5099 {
5100 	struct pci_devinfo *dinfo;
5101 	struct resource_list *rl;
5102 	pcicfgregs *cfg;
5103 
5104 	if (device_get_parent(child) != dev)
5105 		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
5106 		    type, rid, r));
5107 
5108 	dinfo = device_get_ivars(child);
5109 	cfg = &dinfo->cfg;
5110 
5111 #ifdef PCI_IOV
5112 	if (dinfo->cfg.flags & PCICFG_VF) {
5113 		switch (type) {
5114 		/* VFs can't have I/O BARs. */
5115 		case SYS_RES_IOPORT:
5116 			return (EDOOFUS);
5117 		case SYS_RES_MEMORY:
5118 			return (pci_vf_release_mem_resource(dev, child, rid,
5119 			    r));
5120 		}
5121 
5122 		/* Fall through for other types of resource allocations. */
5123 	}
5124 #endif
5125 
5126 #ifdef NEW_PCIB
5127 	/*
5128 	 * PCI-PCI bridge I/O window resources are not BARs.  For
5129 	 * those allocations just pass the request up the tree.
5130 	 */
5131 	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
5132 	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
5133 		switch (rid) {
5134 		case PCIR_IOBASEL_1:
5135 		case PCIR_MEMBASE_1:
5136 		case PCIR_PMBASEL_1:
5137 			return (bus_generic_release_resource(dev, child, type,
5138 			    rid, r));
5139 		}
5140 	}
5141 #endif
5142 
5143 	rl = &dinfo->resources;
5144 	return (resource_list_release(rl, dev, child, type, rid, r));
5145 }
5146 
5147 int
5148 pci_activate_resource(device_t dev, device_t child, int type, int rid,
5149     struct resource *r)
5150 {
5151 	struct pci_devinfo *dinfo;
5152 	int error;
5153 
5154 	error = bus_generic_activate_resource(dev, child, type, rid, r);
5155 	if (error)
5156 		return (error);
5157 
5158 	/* Enable decoding in the command register when activating BARs. */
5159 	if (device_get_parent(child) == dev) {
5160 		/* Device ROMs need their decoding explicitly enabled. */
5161 		dinfo = device_get_ivars(child);
5162 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
5163 			pci_write_bar(child, pci_find_bar(child, rid),
5164 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
5165 		switch (type) {
5166 		case SYS_RES_IOPORT:
5167 		case SYS_RES_MEMORY:
5168 			error = PCI_ENABLE_IO(dev, child, type);
5169 			break;
5170 		}
5171 	}
5172 	return (error);
5173 }
5174 
5175 int
5176 pci_deactivate_resource(device_t dev, device_t child, int type,
5177     int rid, struct resource *r)
5178 {
5179 	struct pci_devinfo *dinfo;
5180 	int error;
5181 
5182 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
5183 	if (error)
5184 		return (error);
5185 
5186 	/* Disable decoding for device ROMs. */
5187 	if (device_get_parent(child) == dev) {
5188 		dinfo = device_get_ivars(child);
5189 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
5190 			pci_write_bar(child, pci_find_bar(child, rid),
5191 			    rman_get_start(r));
5192 	}
5193 	return (0);
5194 }
5195 
5196 void
5197 pci_delete_child(device_t dev, device_t child)
5198 {
5199 	struct resource_list_entry *rle;
5200 	struct resource_list *rl;
5201 	struct pci_devinfo *dinfo;
5202 
5203 	dinfo = device_get_ivars(child);
5204 	rl = &dinfo->resources;
5205 
5206 	if (device_is_attached(child))
5207 		device_detach(child);
5208 
5209 	/* Turn off access to resources we're about to free */
5210 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
5211 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
5212 
5213 	/* Free all allocated resources */
5214 	STAILQ_FOREACH(rle, rl, link) {
5215 		if (rle->res) {
5216 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
5217 			    resource_list_busy(rl, rle->type, rle->rid)) {
5218 				pci_printf(&dinfo->cfg,
5219 				    "Resource still owned, oops. "
5220 				    "(type=%d, rid=%d, addr=%lx)\n",
5221 				    rle->type, rle->rid,
5222 				    rman_get_start(rle->res));
5223 				bus_release_resource(child, rle->type, rle->rid,
5224 				    rle->res);
5225 			}
5226 			resource_list_unreserve(rl, dev, child, rle->type,
5227 			    rle->rid);
5228 		}
5229 	}
5230 	resource_list_free(rl);
5231 
5232 	device_delete_child(dev, child);
5233 	pci_freecfg(dinfo);
5234 }
5235 
5236 void
5237 pci_delete_resource(device_t dev, device_t child, int type, int rid)
5238 {
5239 	struct pci_devinfo *dinfo;
5240 	struct resource_list *rl;
5241 	struct resource_list_entry *rle;
5242 
5243 	if (device_get_parent(child) != dev)
5244 		return;
5245 
5246 	dinfo = device_get_ivars(child);
5247 	rl = &dinfo->resources;
5248 	rle = resource_list_find(rl, type, rid);
5249 	if (rle == NULL)
5250 		return;
5251 
5252 	if (rle->res) {
5253 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
5254 		    resource_list_busy(rl, type, rid)) {
5255 			device_printf(dev, "delete_resource: "
5256 			    "Resource still owned by child, oops. "
5257 			    "(type=%d, rid=%d, addr=%lx)\n",
5258 			    type, rid, rman_get_start(rle->res));
5259 			return;
5260 		}
5261 		resource_list_unreserve(rl, dev, child, type, rid);
5262 	}
5263 	resource_list_delete(rl, type, rid);
5264 }
5265 
5266 struct resource_list *
5267 pci_get_resource_list (device_t dev, device_t child)
5268 {
5269 	struct pci_devinfo *dinfo = device_get_ivars(child);
5270 
5271 	return (&dinfo->resources);
5272 }
5273 
5274 bus_dma_tag_t
5275 pci_get_dma_tag(device_t bus, device_t dev)
5276 {
5277 	struct pci_softc *sc = device_get_softc(bus);
5278 
5279 	return (sc->sc_dma_tag);
5280 }
5281 
5282 uint32_t
5283 pci_read_config_method(device_t dev, device_t child, int reg, int width)
5284 {
5285 	struct pci_devinfo *dinfo = device_get_ivars(child);
5286 	pcicfgregs *cfg = &dinfo->cfg;
5287 
5288 #ifdef PCI_IOV
5289 	/*
5290 	 * SR-IOV VFs don't implement the VID or DID registers, so we have to
5291 	 * emulate them here.
5292 	 */
5293 	if (cfg->flags & PCICFG_VF) {
5294 		if (reg == PCIR_VENDOR) {
5295 			switch (width) {
5296 			case 4:
5297 				return (cfg->device << 16 | cfg->vendor);
5298 			case 2:
5299 				return (cfg->vendor);
5300 			case 1:
5301 				return (cfg->vendor & 0xff);
5302 			default:
5303 				return (0xffffffff);
5304 			}
5305 		} else if (reg == PCIR_DEVICE) {
5306 			switch (width) {
5307 			/* Note that an unaligned 4-byte read is an error. */
5308 			case 2:
5309 				return (cfg->device);
5310 			case 1:
5311 				return (cfg->device & 0xff);
5312 			default:
5313 				return (0xffffffff);
5314 			}
5315 		}
5316 	}
5317 #endif
5318 
5319 	return (PCIB_READ_CONFIG(device_get_parent(dev),
5320 	    cfg->bus, cfg->slot, cfg->func, reg, width));
5321 }
5322 
5323 void
5324 pci_write_config_method(device_t dev, device_t child, int reg,
5325     uint32_t val, int width)
5326 {
5327 	struct pci_devinfo *dinfo = device_get_ivars(child);
5328 	pcicfgregs *cfg = &dinfo->cfg;
5329 
5330 	PCIB_WRITE_CONFIG(device_get_parent(dev),
5331 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
5332 }
5333 
5334 int
5335 pci_child_location_str_method(device_t dev, device_t child, char *buf,
5336     size_t buflen)
5337 {
5338 
5339 	snprintf(buf, buflen, "pci%d:%d:%d:%d", pci_get_domain(child),
5340 	    pci_get_bus(child), pci_get_slot(child), pci_get_function(child));
5341 	return (0);
5342 }
5343 
5344 int
5345 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
5346     size_t buflen)
5347 {
5348 	struct pci_devinfo *dinfo;
5349 	pcicfgregs *cfg;
5350 
5351 	dinfo = device_get_ivars(child);
5352 	cfg = &dinfo->cfg;
5353 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
5354 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
5355 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
5356 	    cfg->progif);
5357 	return (0);
5358 }
5359 
5360 int
5361 pci_assign_interrupt_method(device_t dev, device_t child)
5362 {
5363 	struct pci_devinfo *dinfo = device_get_ivars(child);
5364 	pcicfgregs *cfg = &dinfo->cfg;
5365 
5366 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
5367 	    cfg->intpin));
5368 }
5369 
5370 static void
5371 pci_lookup(void *arg, const char *name, device_t *dev)
5372 {
5373 	long val;
5374 	char *end;
5375 	int domain, bus, slot, func;
5376 
5377 	if (*dev != NULL)
5378 		return;
5379 
5380 	/*
5381 	 * Accept pciconf-style selectors of either pciD:B:S:F or
5382 	 * pciB:S:F.  In the latter case, the domain is assumed to
5383 	 * be zero.
5384 	 */
5385 	if (strncmp(name, "pci", 3) != 0)
5386 		return;
5387 	val = strtol(name + 3, &end, 10);
5388 	if (val < 0 || val > INT_MAX || *end != ':')
5389 		return;
5390 	domain = val;
5391 	val = strtol(end + 1, &end, 10);
5392 	if (val < 0 || val > INT_MAX || *end != ':')
5393 		return;
5394 	bus = val;
5395 	val = strtol(end + 1, &end, 10);
5396 	if (val < 0 || val > INT_MAX)
5397 		return;
5398 	slot = val;
5399 	if (*end == ':') {
5400 		val = strtol(end + 1, &end, 10);
5401 		if (val < 0 || val > INT_MAX || *end != '\0')
5402 			return;
5403 		func = val;
5404 	} else if (*end == '\0') {
5405 		func = slot;
5406 		slot = bus;
5407 		bus = domain;
5408 		domain = 0;
5409 	} else
5410 		return;
5411 
5412 	if (domain > PCI_DOMAINMAX || bus > PCI_BUSMAX || slot > PCI_SLOTMAX ||
5413 	    func > PCIE_ARI_FUNCMAX || (slot != 0 && func > PCI_FUNCMAX))
5414 		return;
5415 
5416 	*dev = pci_find_dbsf(domain, bus, slot, func);
5417 }
5418 
5419 static int
5420 pci_modevent(module_t mod, int what, void *arg)
5421 {
5422 	static struct cdev *pci_cdev;
5423 	static eventhandler_tag tag;
5424 
5425 	switch (what) {
5426 	case MOD_LOAD:
5427 		STAILQ_INIT(&pci_devq);
5428 		pci_generation = 0;
5429 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
5430 		    "pci");
5431 		pci_load_vendor_data();
5432 		tag = EVENTHANDLER_REGISTER(dev_lookup, pci_lookup, NULL,
5433 		    1000);
5434 		break;
5435 
5436 	case MOD_UNLOAD:
5437 		if (tag != NULL)
5438 			EVENTHANDLER_DEREGISTER(dev_lookup, tag);
5439 		destroy_dev(pci_cdev);
5440 		break;
5441 	}
5442 
5443 	return (0);
5444 }
5445 
5446 static void
5447 pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
5448 {
5449 #define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
5450 	struct pcicfg_pcie *cfg;
5451 	int version, pos;
5452 
5453 	cfg = &dinfo->cfg.pcie;
5454 	pos = cfg->pcie_location;
5455 
5456 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5457 
5458 	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
5459 
5460 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5461 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5462 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5463 		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
5464 
5465 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5466 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5467 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5468 		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
5469 
5470 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5471 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5472 		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
5473 
5474 	if (version > 1) {
5475 		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
5476 		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
5477 		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
5478 	}
5479 #undef WREG
5480 }
5481 
5482 static void
5483 pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
5484 {
5485 	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
5486 	    dinfo->cfg.pcix.pcix_command,  2);
5487 }
5488 
5489 void
5490 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
5491 {
5492 
5493 	/*
5494 	 * Restore the device to full power mode.  We must do this
5495 	 * before we restore the registers because moving from D3 to
5496 	 * D0 will cause the chip's BARs and some other registers to
5497 	 * be reset to some unknown power on reset values.  Cut down
5498 	 * the noise on boot by doing nothing if we are already in
5499 	 * state D0.
5500 	 */
5501 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
5502 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5503 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
5504 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
5505 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
5506 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
5507 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
5508 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
5509 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
5510 	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5511 	case PCIM_HDRTYPE_NORMAL:
5512 		pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
5513 		pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
5514 		break;
5515 	case PCIM_HDRTYPE_BRIDGE:
5516 		pci_write_config(dev, PCIR_SECLAT_1,
5517 		    dinfo->cfg.bridge.br_seclat, 1);
5518 		pci_write_config(dev, PCIR_SUBBUS_1,
5519 		    dinfo->cfg.bridge.br_subbus, 1);
5520 		pci_write_config(dev, PCIR_SECBUS_1,
5521 		    dinfo->cfg.bridge.br_secbus, 1);
5522 		pci_write_config(dev, PCIR_PRIBUS_1,
5523 		    dinfo->cfg.bridge.br_pribus, 1);
5524 		pci_write_config(dev, PCIR_BRIDGECTL_1,
5525 		    dinfo->cfg.bridge.br_control, 2);
5526 		break;
5527 	case PCIM_HDRTYPE_CARDBUS:
5528 		pci_write_config(dev, PCIR_SECLAT_2,
5529 		    dinfo->cfg.bridge.br_seclat, 1);
5530 		pci_write_config(dev, PCIR_SUBBUS_2,
5531 		    dinfo->cfg.bridge.br_subbus, 1);
5532 		pci_write_config(dev, PCIR_SECBUS_2,
5533 		    dinfo->cfg.bridge.br_secbus, 1);
5534 		pci_write_config(dev, PCIR_PRIBUS_2,
5535 		    dinfo->cfg.bridge.br_pribus, 1);
5536 		pci_write_config(dev, PCIR_BRIDGECTL_2,
5537 		    dinfo->cfg.bridge.br_control, 2);
5538 		break;
5539 	}
5540 	pci_restore_bars(dev);
5541 
5542 	/*
5543 	 * Restore extended capabilities for PCI-Express and PCI-X
5544 	 */
5545 	if (dinfo->cfg.pcie.pcie_location != 0)
5546 		pci_cfg_restore_pcie(dev, dinfo);
5547 	if (dinfo->cfg.pcix.pcix_location != 0)
5548 		pci_cfg_restore_pcix(dev, dinfo);
5549 
5550 	/* Restore MSI and MSI-X configurations if they are present. */
5551 	if (dinfo->cfg.msi.msi_location != 0)
5552 		pci_resume_msi(dev);
5553 	if (dinfo->cfg.msix.msix_location != 0)
5554 		pci_resume_msix(dev);
5555 }
5556 
5557 static void
5558 pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
5559 {
5560 #define	RREG(n)	pci_read_config(dev, pos + (n), 2)
5561 	struct pcicfg_pcie *cfg;
5562 	int version, pos;
5563 
5564 	cfg = &dinfo->cfg.pcie;
5565 	pos = cfg->pcie_location;
5566 
5567 	cfg->pcie_flags = RREG(PCIER_FLAGS);
5568 
5569 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5570 
5571 	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
5572 
5573 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5574 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5575 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5576 		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
5577 
5578 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5579 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5580 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5581 		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
5582 
5583 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5584 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5585 		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
5586 
5587 	if (version > 1) {
5588 		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
5589 		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
5590 		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
5591 	}
5592 #undef RREG
5593 }
5594 
5595 static void
5596 pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
5597 {
5598 	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
5599 	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
5600 }
5601 
5602 void
5603 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
5604 {
5605 	uint32_t cls;
5606 	int ps;
5607 
5608 	/*
5609 	 * Some drivers apparently write to these registers w/o updating our
5610 	 * cached copy.  No harm happens if we update the copy, so do so here
5611 	 * so we can restore them.  The COMMAND register is modified by the
5612 	 * bus w/o updating the cache.  This should represent the normally
5613 	 * writable portion of the 'defined' part of type 0/1/2 headers.
5614 	 */
5615 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
5616 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
5617 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
5618 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
5619 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
5620 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
5621 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
5622 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
5623 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
5624 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
5625 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
5626 	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5627 	case PCIM_HDRTYPE_NORMAL:
5628 		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
5629 		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
5630 		dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
5631 		dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
5632 		break;
5633 	case PCIM_HDRTYPE_BRIDGE:
5634 		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
5635 		    PCIR_SECLAT_1, 1);
5636 		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
5637 		    PCIR_SUBBUS_1, 1);
5638 		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
5639 		    PCIR_SECBUS_1, 1);
5640 		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
5641 		    PCIR_PRIBUS_1, 1);
5642 		dinfo->cfg.bridge.br_control = pci_read_config(dev,
5643 		    PCIR_BRIDGECTL_1, 2);
5644 		break;
5645 	case PCIM_HDRTYPE_CARDBUS:
5646 		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
5647 		    PCIR_SECLAT_2, 1);
5648 		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
5649 		    PCIR_SUBBUS_2, 1);
5650 		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
5651 		    PCIR_SECBUS_2, 1);
5652 		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
5653 		    PCIR_PRIBUS_2, 1);
5654 		dinfo->cfg.bridge.br_control = pci_read_config(dev,
5655 		    PCIR_BRIDGECTL_2, 2);
5656 		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_2, 2);
5657 		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_2, 2);
5658 		break;
5659 	}
5660 
5661 	if (dinfo->cfg.pcie.pcie_location != 0)
5662 		pci_cfg_save_pcie(dev, dinfo);
5663 
5664 	if (dinfo->cfg.pcix.pcix_location != 0)
5665 		pci_cfg_save_pcix(dev, dinfo);
5666 
5667 	/*
5668 	 * don't set the state for display devices, base peripherals and
5669 	 * memory devices since bad things happen when they are powered down.
5670 	 * We should (a) have drivers that can easily detach and (b) use
5671 	 * generic drivers for these devices so that some device actually
5672 	 * attaches.  We need to make sure that when we implement (a) we don't
5673 	 * power the device down on a reattach.
5674 	 */
5675 	cls = pci_get_class(dev);
5676 	if (!setstate)
5677 		return;
5678 	switch (pci_do_power_nodriver)
5679 	{
5680 		case 0:		/* NO powerdown at all */
5681 			return;
5682 		case 1:		/* Conservative about what to power down */
5683 			if (cls == PCIC_STORAGE)
5684 				return;
5685 			/*FALLTHROUGH*/
5686 		case 2:		/* Agressive about what to power down */
5687 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
5688 			    cls == PCIC_BASEPERIPH)
5689 				return;
5690 			/*FALLTHROUGH*/
5691 		case 3:		/* Power down everything */
5692 			break;
5693 	}
5694 	/*
5695 	 * PCI spec says we can only go into D3 state from D0 state.
5696 	 * Transition from D[12] into D0 before going to D3 state.
5697 	 */
5698 	ps = pci_get_powerstate(dev);
5699 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
5700 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5701 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
5702 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
5703 }
5704 
5705 /* Wrapper APIs suitable for device driver use. */
5706 void
5707 pci_save_state(device_t dev)
5708 {
5709 	struct pci_devinfo *dinfo;
5710 
5711 	dinfo = device_get_ivars(dev);
5712 	pci_cfg_save(dev, dinfo, 0);
5713 }
5714 
5715 void
5716 pci_restore_state(device_t dev)
5717 {
5718 	struct pci_devinfo *dinfo;
5719 
5720 	dinfo = device_get_ivars(dev);
5721 	pci_cfg_restore(dev, dinfo);
5722 }
5723 
5724 static uint16_t
5725 pci_get_rid_method(device_t dev, device_t child)
5726 {
5727 
5728 	return (PCIB_GET_RID(device_get_parent(dev), child));
5729 }
5730 
5731 /* Find the upstream port of a given PCI device in a root complex. */
5732 device_t
5733 pci_find_pcie_root_port(device_t dev)
5734 {
5735 	struct pci_devinfo *dinfo;
5736 	devclass_t pci_class;
5737 	device_t pcib, bus;
5738 
5739 	pci_class = devclass_find("pci");
5740 	KASSERT(device_get_devclass(device_get_parent(dev)) == pci_class,
5741 	    ("%s: non-pci device %s", __func__, device_get_nameunit(dev)));
5742 
5743 	/*
5744 	 * Walk the bridge hierarchy until we find a PCI-e root
5745 	 * port or a non-PCI device.
5746 	 */
5747 	for (;;) {
5748 		bus = device_get_parent(dev);
5749 		KASSERT(bus != NULL, ("%s: null parent of %s", __func__,
5750 		    device_get_nameunit(dev)));
5751 
5752 		pcib = device_get_parent(bus);
5753 		KASSERT(pcib != NULL, ("%s: null bridge of %s", __func__,
5754 		    device_get_nameunit(bus)));
5755 
5756 		/*
5757 		 * pcib's parent must be a PCI bus for this to be a
5758 		 * PCI-PCI bridge.
5759 		 */
5760 		if (device_get_devclass(device_get_parent(pcib)) != pci_class)
5761 			return (NULL);
5762 
5763 		dinfo = device_get_ivars(pcib);
5764 		if (dinfo->cfg.pcie.pcie_location != 0 &&
5765 		    dinfo->cfg.pcie.pcie_type == PCIEM_TYPE_ROOT_PORT)
5766 			return (pcib);
5767 
5768 		dev = pcib;
5769 	}
5770 }
5771