xref: /freebsd/sys/dev/pci/pci.c (revision d93a896ef95946b0bf1219866fcb324b78543444)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/limits.h>
39 #include <sys/linker.h>
40 #include <sys/fcntl.h>
41 #include <sys/conf.h>
42 #include <sys/kernel.h>
43 #include <sys/queue.h>
44 #include <sys/sysctl.h>
45 #include <sys/endian.h>
46 
47 #include <vm/vm.h>
48 #include <vm/pmap.h>
49 #include <vm/vm_extern.h>
50 
51 #include <sys/bus.h>
52 #include <machine/bus.h>
53 #include <sys/rman.h>
54 #include <machine/resource.h>
55 #include <machine/stdarg.h>
56 
57 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
58 #include <machine/intr_machdep.h>
59 #endif
60 
61 #include <sys/pciio.h>
62 #include <dev/pci/pcireg.h>
63 #include <dev/pci/pcivar.h>
64 #include <dev/pci/pci_private.h>
65 
66 #ifdef PCI_IOV
67 #include <sys/nv.h>
68 #include <dev/pci/pci_iov_private.h>
69 #endif
70 
71 #include <dev/usb/controller/xhcireg.h>
72 #include <dev/usb/controller/ehcireg.h>
73 #include <dev/usb/controller/ohcireg.h>
74 #include <dev/usb/controller/uhcireg.h>
75 
76 #include "pcib_if.h"
77 #include "pci_if.h"
78 
79 #define	PCIR_IS_BIOS(cfg, reg)						\
80 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
81 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
82 
83 static int		pci_has_quirk(uint32_t devid, int quirk);
84 static pci_addr_t	pci_mapbase(uint64_t mapreg);
85 static const char	*pci_maptype(uint64_t mapreg);
86 static int		pci_maprange(uint64_t mapreg);
87 static pci_addr_t	pci_rombase(uint64_t mapreg);
88 static int		pci_romsize(uint64_t testval);
89 static void		pci_fixancient(pcicfgregs *cfg);
90 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
91 
92 static int		pci_porten(device_t dev);
93 static int		pci_memen(device_t dev);
94 static void		pci_assign_interrupt(device_t bus, device_t dev,
95 			    int force_route);
96 static int		pci_add_map(device_t bus, device_t dev, int reg,
97 			    struct resource_list *rl, int force, int prefetch);
98 static int		pci_probe(device_t dev);
99 static int		pci_attach(device_t dev);
100 static int		pci_detach(device_t dev);
101 static void		pci_load_vendor_data(void);
102 static int		pci_describe_parse_line(char **ptr, int *vendor,
103 			    int *device, char **desc);
104 static char		*pci_describe_device(device_t dev);
105 static int		pci_modevent(module_t mod, int what, void *arg);
106 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
107 			    pcicfgregs *cfg);
108 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
109 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
110 			    int reg, uint32_t *data);
111 #if 0
112 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
113 			    int reg, uint32_t data);
114 #endif
115 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
116 static void		pci_mask_msix(device_t dev, u_int index);
117 static void		pci_unmask_msix(device_t dev, u_int index);
118 static int		pci_msi_blacklisted(void);
119 static int		pci_msix_blacklisted(void);
120 static void		pci_resume_msi(device_t dev);
121 static void		pci_resume_msix(device_t dev);
122 static int		pci_remap_intr_method(device_t bus, device_t dev,
123 			    u_int irq);
124 
125 static int		pci_get_id_method(device_t dev, device_t child,
126 			    enum pci_id_type type, uintptr_t *rid);
127 
128 static struct pci_devinfo * pci_fill_devinfo(device_t pcib, device_t bus, int d,
129     int b, int s, int f, uint16_t vid, uint16_t did);
130 
131 static device_method_t pci_methods[] = {
132 	/* Device interface */
133 	DEVMETHOD(device_probe,		pci_probe),
134 	DEVMETHOD(device_attach,	pci_attach),
135 	DEVMETHOD(device_detach,	pci_detach),
136 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
137 	DEVMETHOD(device_suspend,	bus_generic_suspend),
138 	DEVMETHOD(device_resume,	pci_resume),
139 
140 	/* Bus interface */
141 	DEVMETHOD(bus_print_child,	pci_print_child),
142 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
143 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
144 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
145 	DEVMETHOD(bus_driver_added,	pci_driver_added),
146 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
147 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
148 
149 	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
150 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
151 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
152 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
153 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
154 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
155 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
156 	DEVMETHOD(bus_release_resource,	pci_release_resource),
157 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
158 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
159 	DEVMETHOD(bus_child_deleted,	pci_child_deleted),
160 	DEVMETHOD(bus_child_detached,	pci_child_detached),
161 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
162 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
163 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
164 	DEVMETHOD(bus_suspend_child,	pci_suspend_child),
165 	DEVMETHOD(bus_resume_child,	pci_resume_child),
166 	DEVMETHOD(bus_rescan,		pci_rescan_method),
167 
168 	/* PCI interface */
169 	DEVMETHOD(pci_read_config,	pci_read_config_method),
170 	DEVMETHOD(pci_write_config,	pci_write_config_method),
171 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
172 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
173 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
174 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
175 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
176 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
177 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
178 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
179 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
180 	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
181 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
182 	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
183 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
184 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
185 	DEVMETHOD(pci_enable_msi,	pci_enable_msi_method),
186 	DEVMETHOD(pci_enable_msix,	pci_enable_msix_method),
187 	DEVMETHOD(pci_disable_msi,	pci_disable_msi_method),
188 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
189 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
190 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
191 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
192 	DEVMETHOD(pci_msix_pba_bar,	pci_msix_pba_bar_method),
193 	DEVMETHOD(pci_msix_table_bar,	pci_msix_table_bar_method),
194 	DEVMETHOD(pci_get_id,		pci_get_id_method),
195 	DEVMETHOD(pci_alloc_devinfo,	pci_alloc_devinfo_method),
196 	DEVMETHOD(pci_child_added,	pci_child_added_method),
197 #ifdef PCI_IOV
198 	DEVMETHOD(pci_iov_attach,	pci_iov_attach_method),
199 	DEVMETHOD(pci_iov_detach,	pci_iov_detach_method),
200 	DEVMETHOD(pci_create_iov_child,	pci_create_iov_child_method),
201 #endif
202 
203 	DEVMETHOD_END
204 };
205 
206 DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
207 
208 static devclass_t pci_devclass;
209 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
210 MODULE_VERSION(pci, 1);
211 
212 static char	*pci_vendordata;
213 static size_t	pci_vendordata_size;
214 
215 struct pci_quirk {
216 	uint32_t devid;	/* Vendor/device of the card */
217 	int	type;
218 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
219 #define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
220 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
221 #define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
222 #define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
223 #define	PCI_QUIRK_MSI_INTX_BUG	6 /* PCIM_CMD_INTxDIS disables MSI */
224 	int	arg1;
225 	int	arg2;
226 };
227 
228 static const struct pci_quirk pci_quirks[] = {
229 	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
230 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
231 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
232 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
233 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
234 
235 	/*
236 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
237 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
238 	 */
239 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
240 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
241 
242 	/*
243 	 * MSI doesn't work on earlier Intel chipsets including
244 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
245 	 */
246 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
247 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
248 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
249 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
250 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
251 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
252 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
253 
254 	/*
255 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
256 	 * bridge.
257 	 */
258 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
259 
260 	/*
261 	 * MSI-X allocation doesn't work properly for devices passed through
262 	 * by VMware up to at least ESXi 5.1.
263 	 */
264 	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
265 	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
266 
267 	/*
268 	 * Some virtualization environments emulate an older chipset
269 	 * but support MSI just fine.  QEMU uses the Intel 82440.
270 	 */
271 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
272 
273 	/*
274 	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
275 	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
276 	 * It prevents us from attaching hpet(4) when the bit is unset.
277 	 * Note this quirk only affects SB600 revision A13 and earlier.
278 	 * For SB600 A21 and later, firmware must set the bit to hide it.
279 	 * For SB700 and later, it is unused and hardcoded to zero.
280 	 */
281 	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
282 
283 	/*
284 	 * Atheros AR8161/AR8162/E2200/E2400/E2500 Ethernet controllers have
285 	 * a bug that MSI interrupt does not assert if PCIM_CMD_INTxDIS bit
286 	 * of the command register is set.
287 	 */
288 	{ 0x10911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
289 	{ 0xE0911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
290 	{ 0xE0A11969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
291 	{ 0xE0B11969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
292 	{ 0x10901969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
293 
294 	/*
295 	 * Broadcom BCM5714(S)/BCM5715(S)/BCM5780(S) Ethernet MACs don't
296 	 * issue MSI interrupts with PCIM_CMD_INTxDIS set either.
297 	 */
298 	{ 0x166814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714 */
299 	{ 0x166914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714S */
300 	{ 0x166a14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780 */
301 	{ 0x166b14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780S */
302 	{ 0x167814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715 */
303 	{ 0x167914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715S */
304 
305 	{ 0 }
306 };
307 
308 /* map register information */
309 #define	PCI_MAPMEM	0x01	/* memory map */
310 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
311 #define	PCI_MAPPORT	0x04	/* port map */
312 
313 struct devlist pci_devq;
314 uint32_t pci_generation;
315 uint32_t pci_numdevs = 0;
316 static int pcie_chipset, pcix_chipset;
317 
318 /* sysctl vars */
319 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
320 
321 static int pci_enable_io_modes = 1;
322 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RWTUN,
323     &pci_enable_io_modes, 1,
324     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
325 enable these bits correctly.  We'd like to do this all the time, but there\n\
326 are some peripherals that this causes problems with.");
327 
328 static int pci_do_realloc_bars = 0;
329 SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RWTUN,
330     &pci_do_realloc_bars, 0,
331     "Attempt to allocate a new range for any BARs whose original "
332     "firmware-assigned ranges fail to allocate during the initial device scan.");
333 
334 static int pci_do_power_nodriver = 0;
335 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RWTUN,
336     &pci_do_power_nodriver, 0,
337   "Place a function into D3 state when no driver attaches to it.  0 means\n\
338 disable.  1 means conservatively place devices into D3 state.  2 means\n\
339 aggressively place devices into D3 state.  3 means put absolutely everything\n\
340 in D3 state.");
341 
342 int pci_do_power_resume = 1;
343 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RWTUN,
344     &pci_do_power_resume, 1,
345   "Transition from D3 -> D0 on resume.");
346 
347 int pci_do_power_suspend = 1;
348 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RWTUN,
349     &pci_do_power_suspend, 1,
350   "Transition from D0 -> D3 on suspend.");
351 
352 static int pci_do_msi = 1;
353 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RWTUN, &pci_do_msi, 1,
354     "Enable support for MSI interrupts");
355 
356 static int pci_do_msix = 1;
357 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RWTUN, &pci_do_msix, 1,
358     "Enable support for MSI-X interrupts");
359 
360 static int pci_msix_rewrite_table = 0;
361 SYSCTL_INT(_hw_pci, OID_AUTO, msix_rewrite_table, CTLFLAG_RWTUN,
362     &pci_msix_rewrite_table, 0,
363     "Rewrite entire MSI-X table when updating MSI-X entries");
364 
365 static int pci_honor_msi_blacklist = 1;
366 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RDTUN,
367     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
368 
369 #if defined(__i386__) || defined(__amd64__)
370 static int pci_usb_takeover = 1;
371 #else
372 static int pci_usb_takeover = 0;
373 #endif
374 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
375     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
376 Disable this if you depend on BIOS emulation of USB devices, that is\n\
377 you use USB devices (like keyboard or mouse) but do not load USB drivers");
378 
379 static int pci_clear_bars;
380 SYSCTL_INT(_hw_pci, OID_AUTO, clear_bars, CTLFLAG_RDTUN, &pci_clear_bars, 0,
381     "Ignore firmware-assigned resources for BARs.");
382 
383 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
384 static int pci_clear_buses;
385 SYSCTL_INT(_hw_pci, OID_AUTO, clear_buses, CTLFLAG_RDTUN, &pci_clear_buses, 0,
386     "Ignore firmware-assigned bus numbers.");
387 #endif
388 
389 static int pci_enable_ari = 1;
390 SYSCTL_INT(_hw_pci, OID_AUTO, enable_ari, CTLFLAG_RDTUN, &pci_enable_ari,
391     0, "Enable support for PCIe Alternative RID Interpretation");
392 
393 static int
394 pci_has_quirk(uint32_t devid, int quirk)
395 {
396 	const struct pci_quirk *q;
397 
398 	for (q = &pci_quirks[0]; q->devid; q++) {
399 		if (q->devid == devid && q->type == quirk)
400 			return (1);
401 	}
402 	return (0);
403 }
404 
405 /* Find a device_t by bus/slot/function in domain 0 */
406 
407 device_t
408 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
409 {
410 
411 	return (pci_find_dbsf(0, bus, slot, func));
412 }
413 
414 /* Find a device_t by domain/bus/slot/function */
415 
416 device_t
417 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
418 {
419 	struct pci_devinfo *dinfo;
420 
421 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
422 		if ((dinfo->cfg.domain == domain) &&
423 		    (dinfo->cfg.bus == bus) &&
424 		    (dinfo->cfg.slot == slot) &&
425 		    (dinfo->cfg.func == func)) {
426 			return (dinfo->cfg.dev);
427 		}
428 	}
429 
430 	return (NULL);
431 }
432 
433 /* Find a device_t by vendor/device ID */
434 
435 device_t
436 pci_find_device(uint16_t vendor, uint16_t device)
437 {
438 	struct pci_devinfo *dinfo;
439 
440 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
441 		if ((dinfo->cfg.vendor == vendor) &&
442 		    (dinfo->cfg.device == device)) {
443 			return (dinfo->cfg.dev);
444 		}
445 	}
446 
447 	return (NULL);
448 }
449 
450 device_t
451 pci_find_class(uint8_t class, uint8_t subclass)
452 {
453 	struct pci_devinfo *dinfo;
454 
455 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
456 		if (dinfo->cfg.baseclass == class &&
457 		    dinfo->cfg.subclass == subclass) {
458 			return (dinfo->cfg.dev);
459 		}
460 	}
461 
462 	return (NULL);
463 }
464 
465 static int
466 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
467 {
468 	va_list ap;
469 	int retval;
470 
471 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
472 	    cfg->func);
473 	va_start(ap, fmt);
474 	retval += vprintf(fmt, ap);
475 	va_end(ap);
476 	return (retval);
477 }
478 
479 /* return base address of memory or port map */
480 
481 static pci_addr_t
482 pci_mapbase(uint64_t mapreg)
483 {
484 
485 	if (PCI_BAR_MEM(mapreg))
486 		return (mapreg & PCIM_BAR_MEM_BASE);
487 	else
488 		return (mapreg & PCIM_BAR_IO_BASE);
489 }
490 
491 /* return map type of memory or port map */
492 
493 static const char *
494 pci_maptype(uint64_t mapreg)
495 {
496 
497 	if (PCI_BAR_IO(mapreg))
498 		return ("I/O Port");
499 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
500 		return ("Prefetchable Memory");
501 	return ("Memory");
502 }
503 
504 /* return log2 of map size decoded for memory or port map */
505 
506 int
507 pci_mapsize(uint64_t testval)
508 {
509 	int ln2size;
510 
511 	testval = pci_mapbase(testval);
512 	ln2size = 0;
513 	if (testval != 0) {
514 		while ((testval & 1) == 0)
515 		{
516 			ln2size++;
517 			testval >>= 1;
518 		}
519 	}
520 	return (ln2size);
521 }
522 
523 /* return base address of device ROM */
524 
525 static pci_addr_t
526 pci_rombase(uint64_t mapreg)
527 {
528 
529 	return (mapreg & PCIM_BIOS_ADDR_MASK);
530 }
531 
532 /* return log2 of map size decided for device ROM */
533 
534 static int
535 pci_romsize(uint64_t testval)
536 {
537 	int ln2size;
538 
539 	testval = pci_rombase(testval);
540 	ln2size = 0;
541 	if (testval != 0) {
542 		while ((testval & 1) == 0)
543 		{
544 			ln2size++;
545 			testval >>= 1;
546 		}
547 	}
548 	return (ln2size);
549 }
550 
551 /* return log2 of address range supported by map register */
552 
553 static int
554 pci_maprange(uint64_t mapreg)
555 {
556 	int ln2range = 0;
557 
558 	if (PCI_BAR_IO(mapreg))
559 		ln2range = 32;
560 	else
561 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
562 		case PCIM_BAR_MEM_32:
563 			ln2range = 32;
564 			break;
565 		case PCIM_BAR_MEM_1MB:
566 			ln2range = 20;
567 			break;
568 		case PCIM_BAR_MEM_64:
569 			ln2range = 64;
570 			break;
571 		}
572 	return (ln2range);
573 }
574 
575 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
576 
577 static void
578 pci_fixancient(pcicfgregs *cfg)
579 {
580 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
581 		return;
582 
583 	/* PCI to PCI bridges use header type 1 */
584 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
585 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
586 }
587 
588 /* extract header type specific config data */
589 
590 static void
591 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
592 {
593 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
594 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
595 	case PCIM_HDRTYPE_NORMAL:
596 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
597 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
598 		cfg->mingnt         = REG(PCIR_MINGNT, 1);
599 		cfg->maxlat         = REG(PCIR_MAXLAT, 1);
600 		cfg->nummaps	    = PCI_MAXMAPS_0;
601 		break;
602 	case PCIM_HDRTYPE_BRIDGE:
603 		cfg->bridge.br_seclat = REG(PCIR_SECLAT_1, 1);
604 		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_1, 1);
605 		cfg->bridge.br_secbus = REG(PCIR_SECBUS_1, 1);
606 		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_1, 1);
607 		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_1, 2);
608 		cfg->nummaps	    = PCI_MAXMAPS_1;
609 		break;
610 	case PCIM_HDRTYPE_CARDBUS:
611 		cfg->bridge.br_seclat = REG(PCIR_SECLAT_2, 1);
612 		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_2, 1);
613 		cfg->bridge.br_secbus = REG(PCIR_SECBUS_2, 1);
614 		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_2, 1);
615 		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_2, 2);
616 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
617 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
618 		cfg->nummaps	    = PCI_MAXMAPS_2;
619 		break;
620 	}
621 #undef REG
622 }
623 
624 /* read configuration header into pcicfgregs structure */
625 struct pci_devinfo *
626 pci_read_device(device_t pcib, device_t bus, int d, int b, int s, int f)
627 {
628 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
629 	uint16_t vid, did;
630 
631 	vid = REG(PCIR_VENDOR, 2);
632 	did = REG(PCIR_DEVICE, 2);
633 	if (vid != 0xffff)
634 		return (pci_fill_devinfo(pcib, bus, d, b, s, f, vid, did));
635 
636 	return (NULL);
637 }
638 
639 struct pci_devinfo *
640 pci_alloc_devinfo_method(device_t dev)
641 {
642 
643 	return (malloc(sizeof(struct pci_devinfo), M_DEVBUF,
644 	    M_WAITOK | M_ZERO));
645 }
646 
647 static struct pci_devinfo *
648 pci_fill_devinfo(device_t pcib, device_t bus, int d, int b, int s, int f,
649     uint16_t vid, uint16_t did)
650 {
651 	struct pci_devinfo *devlist_entry;
652 	pcicfgregs *cfg;
653 
654 	devlist_entry = PCI_ALLOC_DEVINFO(bus);
655 
656 	cfg = &devlist_entry->cfg;
657 
658 	cfg->domain		= d;
659 	cfg->bus		= b;
660 	cfg->slot		= s;
661 	cfg->func		= f;
662 	cfg->vendor		= vid;
663 	cfg->device		= did;
664 	cfg->cmdreg		= REG(PCIR_COMMAND, 2);
665 	cfg->statreg		= REG(PCIR_STATUS, 2);
666 	cfg->baseclass		= REG(PCIR_CLASS, 1);
667 	cfg->subclass		= REG(PCIR_SUBCLASS, 1);
668 	cfg->progif		= REG(PCIR_PROGIF, 1);
669 	cfg->revid		= REG(PCIR_REVID, 1);
670 	cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
671 	cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
672 	cfg->lattimer		= REG(PCIR_LATTIMER, 1);
673 	cfg->intpin		= REG(PCIR_INTPIN, 1);
674 	cfg->intline		= REG(PCIR_INTLINE, 1);
675 
676 	cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
677 	cfg->hdrtype		&= ~PCIM_MFDEV;
678 	STAILQ_INIT(&cfg->maps);
679 
680 	cfg->iov		= NULL;
681 
682 	pci_fixancient(cfg);
683 	pci_hdrtypedata(pcib, b, s, f, cfg);
684 
685 	if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
686 		pci_read_cap(pcib, cfg);
687 
688 	STAILQ_INSERT_TAIL(&pci_devq, devlist_entry, pci_links);
689 
690 	devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
691 	devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
692 	devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
693 	devlist_entry->conf.pc_sel.pc_func = cfg->func;
694 	devlist_entry->conf.pc_hdr = cfg->hdrtype;
695 
696 	devlist_entry->conf.pc_subvendor = cfg->subvendor;
697 	devlist_entry->conf.pc_subdevice = cfg->subdevice;
698 	devlist_entry->conf.pc_vendor = cfg->vendor;
699 	devlist_entry->conf.pc_device = cfg->device;
700 
701 	devlist_entry->conf.pc_class = cfg->baseclass;
702 	devlist_entry->conf.pc_subclass = cfg->subclass;
703 	devlist_entry->conf.pc_progif = cfg->progif;
704 	devlist_entry->conf.pc_revid = cfg->revid;
705 
706 	pci_numdevs++;
707 	pci_generation++;
708 
709 	return (devlist_entry);
710 }
711 #undef REG
712 
713 static void
714 pci_ea_fill_info(device_t pcib, pcicfgregs *cfg)
715 {
716 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, \
717     cfg->ea.ea_location + (n), w)
718 	int num_ent;
719 	int ptr;
720 	int a, b;
721 	uint32_t val;
722 	int ent_size;
723 	uint32_t dw[4];
724 	uint64_t base, max_offset;
725 	struct pci_ea_entry *eae;
726 
727 	if (cfg->ea.ea_location == 0)
728 		return;
729 
730 	STAILQ_INIT(&cfg->ea.ea_entries);
731 
732 	/* Determine the number of entries */
733 	num_ent = REG(PCIR_EA_NUM_ENT, 2);
734 	num_ent &= PCIM_EA_NUM_ENT_MASK;
735 
736 	/* Find the first entry to care of */
737 	ptr = PCIR_EA_FIRST_ENT;
738 
739 	/* Skip DWORD 2 for type 1 functions */
740 	if ((cfg->hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_BRIDGE)
741 		ptr += 4;
742 
743 	for (a = 0; a < num_ent; a++) {
744 
745 		eae = malloc(sizeof(*eae), M_DEVBUF, M_WAITOK | M_ZERO);
746 		eae->eae_cfg_offset = cfg->ea.ea_location + ptr;
747 
748 		/* Read a number of dwords in the entry */
749 		val = REG(ptr, 4);
750 		ptr += 4;
751 		ent_size = (val & PCIM_EA_ES);
752 
753 		for (b = 0; b < ent_size; b++) {
754 			dw[b] = REG(ptr, 4);
755 			ptr += 4;
756 		}
757 
758 		eae->eae_flags = val;
759 		eae->eae_bei = (PCIM_EA_BEI & val) >> PCIM_EA_BEI_OFFSET;
760 
761 		base = dw[0] & PCIM_EA_FIELD_MASK;
762 		max_offset = dw[1] | ~PCIM_EA_FIELD_MASK;
763 		b = 2;
764 		if (((dw[0] & PCIM_EA_IS_64) != 0) && (b < ent_size)) {
765 			base |= (uint64_t)dw[b] << 32UL;
766 			b++;
767 		}
768 		if (((dw[1] & PCIM_EA_IS_64) != 0)
769 		    && (b < ent_size)) {
770 			max_offset |= (uint64_t)dw[b] << 32UL;
771 			b++;
772 		}
773 
774 		eae->eae_base = base;
775 		eae->eae_max_offset = max_offset;
776 
777 		STAILQ_INSERT_TAIL(&cfg->ea.ea_entries, eae, eae_link);
778 
779 		if (bootverbose) {
780 			printf("PCI(EA) dev %04x:%04x, bei %d, flags #%x, base #%jx, max_offset #%jx\n",
781 			    cfg->vendor, cfg->device, eae->eae_bei, eae->eae_flags,
782 			    (uintmax_t)eae->eae_base, (uintmax_t)eae->eae_max_offset);
783 		}
784 	}
785 }
786 #undef REG
787 
788 static void
789 pci_read_cap(device_t pcib, pcicfgregs *cfg)
790 {
791 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
792 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
793 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
794 	uint64_t addr;
795 #endif
796 	uint32_t val;
797 	int	ptr, nextptr, ptrptr;
798 
799 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
800 	case PCIM_HDRTYPE_NORMAL:
801 	case PCIM_HDRTYPE_BRIDGE:
802 		ptrptr = PCIR_CAP_PTR;
803 		break;
804 	case PCIM_HDRTYPE_CARDBUS:
805 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
806 		break;
807 	default:
808 		return;		/* no extended capabilities support */
809 	}
810 	nextptr = REG(ptrptr, 1);	/* sanity check? */
811 
812 	/*
813 	 * Read capability entries.
814 	 */
815 	while (nextptr != 0) {
816 		/* Sanity check */
817 		if (nextptr > 255) {
818 			printf("illegal PCI extended capability offset %d\n",
819 			    nextptr);
820 			return;
821 		}
822 		/* Find the next entry */
823 		ptr = nextptr;
824 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
825 
826 		/* Process this entry */
827 		switch (REG(ptr + PCICAP_ID, 1)) {
828 		case PCIY_PMG:		/* PCI power management */
829 			if (cfg->pp.pp_cap == 0) {
830 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
831 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
832 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
833 				if ((nextptr - ptr) > PCIR_POWER_DATA)
834 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
835 			}
836 			break;
837 		case PCIY_HT:		/* HyperTransport */
838 			/* Determine HT-specific capability type. */
839 			val = REG(ptr + PCIR_HT_COMMAND, 2);
840 
841 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
842 				cfg->ht.ht_slave = ptr;
843 
844 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
845 			switch (val & PCIM_HTCMD_CAP_MASK) {
846 			case PCIM_HTCAP_MSI_MAPPING:
847 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
848 					/* Sanity check the mapping window. */
849 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
850 					    4);
851 					addr <<= 32;
852 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
853 					    4);
854 					if (addr != MSI_INTEL_ADDR_BASE)
855 						device_printf(pcib,
856 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
857 						    cfg->domain, cfg->bus,
858 						    cfg->slot, cfg->func,
859 						    (long long)addr);
860 				} else
861 					addr = MSI_INTEL_ADDR_BASE;
862 
863 				cfg->ht.ht_msimap = ptr;
864 				cfg->ht.ht_msictrl = val;
865 				cfg->ht.ht_msiaddr = addr;
866 				break;
867 			}
868 #endif
869 			break;
870 		case PCIY_MSI:		/* PCI MSI */
871 			cfg->msi.msi_location = ptr;
872 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
873 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
874 						     PCIM_MSICTRL_MMC_MASK)>>1);
875 			break;
876 		case PCIY_MSIX:		/* PCI MSI-X */
877 			cfg->msix.msix_location = ptr;
878 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
879 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
880 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
881 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
882 			cfg->msix.msix_table_bar = PCIR_BAR(val &
883 			    PCIM_MSIX_BIR_MASK);
884 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
885 			val = REG(ptr + PCIR_MSIX_PBA, 4);
886 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
887 			    PCIM_MSIX_BIR_MASK);
888 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
889 			break;
890 		case PCIY_VPD:		/* PCI Vital Product Data */
891 			cfg->vpd.vpd_reg = ptr;
892 			break;
893 		case PCIY_SUBVENDOR:
894 			/* Should always be true. */
895 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
896 			    PCIM_HDRTYPE_BRIDGE) {
897 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
898 				cfg->subvendor = val & 0xffff;
899 				cfg->subdevice = val >> 16;
900 			}
901 			break;
902 		case PCIY_PCIX:		/* PCI-X */
903 			/*
904 			 * Assume we have a PCI-X chipset if we have
905 			 * at least one PCI-PCI bridge with a PCI-X
906 			 * capability.  Note that some systems with
907 			 * PCI-express or HT chipsets might match on
908 			 * this check as well.
909 			 */
910 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
911 			    PCIM_HDRTYPE_BRIDGE)
912 				pcix_chipset = 1;
913 			cfg->pcix.pcix_location = ptr;
914 			break;
915 		case PCIY_EXPRESS:	/* PCI-express */
916 			/*
917 			 * Assume we have a PCI-express chipset if we have
918 			 * at least one PCI-express device.
919 			 */
920 			pcie_chipset = 1;
921 			cfg->pcie.pcie_location = ptr;
922 			val = REG(ptr + PCIER_FLAGS, 2);
923 			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
924 			break;
925 		case PCIY_EA:		/* Enhanced Allocation */
926 			cfg->ea.ea_location = ptr;
927 			pci_ea_fill_info(pcib, cfg);
928 			break;
929 		default:
930 			break;
931 		}
932 	}
933 
934 #if defined(__powerpc__)
935 	/*
936 	 * Enable the MSI mapping window for all HyperTransport
937 	 * slaves.  PCI-PCI bridges have their windows enabled via
938 	 * PCIB_MAP_MSI().
939 	 */
940 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
941 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
942 		device_printf(pcib,
943 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
944 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
945 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
946 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
947 		     2);
948 	}
949 #endif
950 /* REG and WREG use carry through to next functions */
951 }
952 
953 /*
954  * PCI Vital Product Data
955  */
956 
957 #define	PCI_VPD_TIMEOUT		1000000
958 
959 static int
960 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
961 {
962 	int count = PCI_VPD_TIMEOUT;
963 
964 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
965 
966 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
967 
968 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
969 		if (--count < 0)
970 			return (ENXIO);
971 		DELAY(1);	/* limit looping */
972 	}
973 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
974 
975 	return (0);
976 }
977 
978 #if 0
979 static int
980 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
981 {
982 	int count = PCI_VPD_TIMEOUT;
983 
984 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
985 
986 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
987 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
988 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
989 		if (--count < 0)
990 			return (ENXIO);
991 		DELAY(1);	/* limit looping */
992 	}
993 
994 	return (0);
995 }
996 #endif
997 
998 #undef PCI_VPD_TIMEOUT
999 
1000 struct vpd_readstate {
1001 	device_t	pcib;
1002 	pcicfgregs	*cfg;
1003 	uint32_t	val;
1004 	int		bytesinval;
1005 	int		off;
1006 	uint8_t		cksum;
1007 };
1008 
1009 static int
1010 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
1011 {
1012 	uint32_t reg;
1013 	uint8_t byte;
1014 
1015 	if (vrs->bytesinval == 0) {
1016 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
1017 			return (ENXIO);
1018 		vrs->val = le32toh(reg);
1019 		vrs->off += 4;
1020 		byte = vrs->val & 0xff;
1021 		vrs->bytesinval = 3;
1022 	} else {
1023 		vrs->val = vrs->val >> 8;
1024 		byte = vrs->val & 0xff;
1025 		vrs->bytesinval--;
1026 	}
1027 
1028 	vrs->cksum += byte;
1029 	*data = byte;
1030 	return (0);
1031 }
1032 
1033 static void
1034 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
1035 {
1036 	struct vpd_readstate vrs;
1037 	int state;
1038 	int name;
1039 	int remain;
1040 	int i;
1041 	int alloc, off;		/* alloc/off for RO/W arrays */
1042 	int cksumvalid;
1043 	int dflen;
1044 	uint8_t byte;
1045 	uint8_t byte2;
1046 
1047 	/* init vpd reader */
1048 	vrs.bytesinval = 0;
1049 	vrs.off = 0;
1050 	vrs.pcib = pcib;
1051 	vrs.cfg = cfg;
1052 	vrs.cksum = 0;
1053 
1054 	state = 0;
1055 	name = remain = i = 0;	/* shut up stupid gcc */
1056 	alloc = off = 0;	/* shut up stupid gcc */
1057 	dflen = 0;		/* shut up stupid gcc */
1058 	cksumvalid = -1;
1059 	while (state >= 0) {
1060 		if (vpd_nextbyte(&vrs, &byte)) {
1061 			state = -2;
1062 			break;
1063 		}
1064 #if 0
1065 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
1066 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
1067 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
1068 #endif
1069 		switch (state) {
1070 		case 0:		/* item name */
1071 			if (byte & 0x80) {
1072 				if (vpd_nextbyte(&vrs, &byte2)) {
1073 					state = -2;
1074 					break;
1075 				}
1076 				remain = byte2;
1077 				if (vpd_nextbyte(&vrs, &byte2)) {
1078 					state = -2;
1079 					break;
1080 				}
1081 				remain |= byte2 << 8;
1082 				if (remain > (0x7f*4 - vrs.off)) {
1083 					state = -1;
1084 					pci_printf(cfg,
1085 					    "invalid VPD data, remain %#x\n",
1086 					    remain);
1087 				}
1088 				name = byte & 0x7f;
1089 			} else {
1090 				remain = byte & 0x7;
1091 				name = (byte >> 3) & 0xf;
1092 			}
1093 			switch (name) {
1094 			case 0x2:	/* String */
1095 				cfg->vpd.vpd_ident = malloc(remain + 1,
1096 				    M_DEVBUF, M_WAITOK);
1097 				i = 0;
1098 				state = 1;
1099 				break;
1100 			case 0xf:	/* End */
1101 				state = -1;
1102 				break;
1103 			case 0x10:	/* VPD-R */
1104 				alloc = 8;
1105 				off = 0;
1106 				cfg->vpd.vpd_ros = malloc(alloc *
1107 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1108 				    M_WAITOK | M_ZERO);
1109 				state = 2;
1110 				break;
1111 			case 0x11:	/* VPD-W */
1112 				alloc = 8;
1113 				off = 0;
1114 				cfg->vpd.vpd_w = malloc(alloc *
1115 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1116 				    M_WAITOK | M_ZERO);
1117 				state = 5;
1118 				break;
1119 			default:	/* Invalid data, abort */
1120 				state = -1;
1121 				break;
1122 			}
1123 			break;
1124 
1125 		case 1:	/* Identifier String */
1126 			cfg->vpd.vpd_ident[i++] = byte;
1127 			remain--;
1128 			if (remain == 0)  {
1129 				cfg->vpd.vpd_ident[i] = '\0';
1130 				state = 0;
1131 			}
1132 			break;
1133 
1134 		case 2:	/* VPD-R Keyword Header */
1135 			if (off == alloc) {
1136 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1137 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1138 				    M_DEVBUF, M_WAITOK | M_ZERO);
1139 			}
1140 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1141 			if (vpd_nextbyte(&vrs, &byte2)) {
1142 				state = -2;
1143 				break;
1144 			}
1145 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1146 			if (vpd_nextbyte(&vrs, &byte2)) {
1147 				state = -2;
1148 				break;
1149 			}
1150 			cfg->vpd.vpd_ros[off].len = dflen = byte2;
1151 			if (dflen == 0 &&
1152 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1153 			    2) == 0) {
1154 				/*
1155 				 * if this happens, we can't trust the rest
1156 				 * of the VPD.
1157 				 */
1158 				pci_printf(cfg, "bad keyword length: %d\n",
1159 				    dflen);
1160 				cksumvalid = 0;
1161 				state = -1;
1162 				break;
1163 			} else if (dflen == 0) {
1164 				cfg->vpd.vpd_ros[off].value = malloc(1 *
1165 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1166 				    M_DEVBUF, M_WAITOK);
1167 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1168 			} else
1169 				cfg->vpd.vpd_ros[off].value = malloc(
1170 				    (dflen + 1) *
1171 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1172 				    M_DEVBUF, M_WAITOK);
1173 			remain -= 3;
1174 			i = 0;
1175 			/* keep in sync w/ state 3's transistions */
1176 			if (dflen == 0 && remain == 0)
1177 				state = 0;
1178 			else if (dflen == 0)
1179 				state = 2;
1180 			else
1181 				state = 3;
1182 			break;
1183 
1184 		case 3:	/* VPD-R Keyword Value */
1185 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1186 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1187 			    "RV", 2) == 0 && cksumvalid == -1) {
1188 				if (vrs.cksum == 0)
1189 					cksumvalid = 1;
1190 				else {
1191 					if (bootverbose)
1192 						pci_printf(cfg,
1193 					    "bad VPD cksum, remain %hhu\n",
1194 						    vrs.cksum);
1195 					cksumvalid = 0;
1196 					state = -1;
1197 					break;
1198 				}
1199 			}
1200 			dflen--;
1201 			remain--;
1202 			/* keep in sync w/ state 2's transistions */
1203 			if (dflen == 0)
1204 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1205 			if (dflen == 0 && remain == 0) {
1206 				cfg->vpd.vpd_rocnt = off;
1207 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1208 				    off * sizeof(*cfg->vpd.vpd_ros),
1209 				    M_DEVBUF, M_WAITOK | M_ZERO);
1210 				state = 0;
1211 			} else if (dflen == 0)
1212 				state = 2;
1213 			break;
1214 
1215 		case 4:
1216 			remain--;
1217 			if (remain == 0)
1218 				state = 0;
1219 			break;
1220 
1221 		case 5:	/* VPD-W Keyword Header */
1222 			if (off == alloc) {
1223 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1224 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1225 				    M_DEVBUF, M_WAITOK | M_ZERO);
1226 			}
1227 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1228 			if (vpd_nextbyte(&vrs, &byte2)) {
1229 				state = -2;
1230 				break;
1231 			}
1232 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1233 			if (vpd_nextbyte(&vrs, &byte2)) {
1234 				state = -2;
1235 				break;
1236 			}
1237 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1238 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1239 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1240 			    sizeof(*cfg->vpd.vpd_w[off].value),
1241 			    M_DEVBUF, M_WAITOK);
1242 			remain -= 3;
1243 			i = 0;
1244 			/* keep in sync w/ state 6's transistions */
1245 			if (dflen == 0 && remain == 0)
1246 				state = 0;
1247 			else if (dflen == 0)
1248 				state = 5;
1249 			else
1250 				state = 6;
1251 			break;
1252 
1253 		case 6:	/* VPD-W Keyword Value */
1254 			cfg->vpd.vpd_w[off].value[i++] = byte;
1255 			dflen--;
1256 			remain--;
1257 			/* keep in sync w/ state 5's transistions */
1258 			if (dflen == 0)
1259 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1260 			if (dflen == 0 && remain == 0) {
1261 				cfg->vpd.vpd_wcnt = off;
1262 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1263 				    off * sizeof(*cfg->vpd.vpd_w),
1264 				    M_DEVBUF, M_WAITOK | M_ZERO);
1265 				state = 0;
1266 			} else if (dflen == 0)
1267 				state = 5;
1268 			break;
1269 
1270 		default:
1271 			pci_printf(cfg, "invalid state: %d\n", state);
1272 			state = -1;
1273 			break;
1274 		}
1275 	}
1276 
1277 	if (cksumvalid == 0 || state < -1) {
1278 		/* read-only data bad, clean up */
1279 		if (cfg->vpd.vpd_ros != NULL) {
1280 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1281 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1282 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1283 			cfg->vpd.vpd_ros = NULL;
1284 		}
1285 	}
1286 	if (state < -1) {
1287 		/* I/O error, clean up */
1288 		pci_printf(cfg, "failed to read VPD data.\n");
1289 		if (cfg->vpd.vpd_ident != NULL) {
1290 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1291 			cfg->vpd.vpd_ident = NULL;
1292 		}
1293 		if (cfg->vpd.vpd_w != NULL) {
1294 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1295 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1296 			free(cfg->vpd.vpd_w, M_DEVBUF);
1297 			cfg->vpd.vpd_w = NULL;
1298 		}
1299 	}
1300 	cfg->vpd.vpd_cached = 1;
1301 #undef REG
1302 #undef WREG
1303 }
1304 
1305 int
1306 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1307 {
1308 	struct pci_devinfo *dinfo = device_get_ivars(child);
1309 	pcicfgregs *cfg = &dinfo->cfg;
1310 
1311 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1312 		pci_read_vpd(device_get_parent(dev), cfg);
1313 
1314 	*identptr = cfg->vpd.vpd_ident;
1315 
1316 	if (*identptr == NULL)
1317 		return (ENXIO);
1318 
1319 	return (0);
1320 }
1321 
1322 int
1323 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1324 	const char **vptr)
1325 {
1326 	struct pci_devinfo *dinfo = device_get_ivars(child);
1327 	pcicfgregs *cfg = &dinfo->cfg;
1328 	int i;
1329 
1330 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1331 		pci_read_vpd(device_get_parent(dev), cfg);
1332 
1333 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1334 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1335 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1336 			*vptr = cfg->vpd.vpd_ros[i].value;
1337 			return (0);
1338 		}
1339 
1340 	*vptr = NULL;
1341 	return (ENXIO);
1342 }
1343 
1344 struct pcicfg_vpd *
1345 pci_fetch_vpd_list(device_t dev)
1346 {
1347 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1348 	pcicfgregs *cfg = &dinfo->cfg;
1349 
1350 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1351 		pci_read_vpd(device_get_parent(device_get_parent(dev)), cfg);
1352 	return (&cfg->vpd);
1353 }
1354 
1355 /*
1356  * Find the requested HyperTransport capability and return the offset
1357  * in configuration space via the pointer provided.  The function
1358  * returns 0 on success and an error code otherwise.
1359  */
1360 int
1361 pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1362 {
1363 	int ptr, error;
1364 	uint16_t val;
1365 
1366 	error = pci_find_cap(child, PCIY_HT, &ptr);
1367 	if (error)
1368 		return (error);
1369 
1370 	/*
1371 	 * Traverse the capabilities list checking each HT capability
1372 	 * to see if it matches the requested HT capability.
1373 	 */
1374 	while (ptr != 0) {
1375 		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1376 		if (capability == PCIM_HTCAP_SLAVE ||
1377 		    capability == PCIM_HTCAP_HOST)
1378 			val &= 0xe000;
1379 		else
1380 			val &= PCIM_HTCMD_CAP_MASK;
1381 		if (val == capability) {
1382 			if (capreg != NULL)
1383 				*capreg = ptr;
1384 			return (0);
1385 		}
1386 
1387 		/* Skip to the next HT capability. */
1388 		while (ptr != 0) {
1389 			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1390 			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1391 			    PCIY_HT)
1392 				break;
1393 		}
1394 	}
1395 	return (ENOENT);
1396 }
1397 
1398 /*
1399  * Find the requested capability and return the offset in
1400  * configuration space via the pointer provided.  The function returns
1401  * 0 on success and an error code otherwise.
1402  */
1403 int
1404 pci_find_cap_method(device_t dev, device_t child, int capability,
1405     int *capreg)
1406 {
1407 	struct pci_devinfo *dinfo = device_get_ivars(child);
1408 	pcicfgregs *cfg = &dinfo->cfg;
1409 	u_int32_t status;
1410 	u_int8_t ptr;
1411 
1412 	/*
1413 	 * Check the CAP_LIST bit of the PCI status register first.
1414 	 */
1415 	status = pci_read_config(child, PCIR_STATUS, 2);
1416 	if (!(status & PCIM_STATUS_CAPPRESENT))
1417 		return (ENXIO);
1418 
1419 	/*
1420 	 * Determine the start pointer of the capabilities list.
1421 	 */
1422 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1423 	case PCIM_HDRTYPE_NORMAL:
1424 	case PCIM_HDRTYPE_BRIDGE:
1425 		ptr = PCIR_CAP_PTR;
1426 		break;
1427 	case PCIM_HDRTYPE_CARDBUS:
1428 		ptr = PCIR_CAP_PTR_2;
1429 		break;
1430 	default:
1431 		/* XXX: panic? */
1432 		return (ENXIO);		/* no extended capabilities support */
1433 	}
1434 	ptr = pci_read_config(child, ptr, 1);
1435 
1436 	/*
1437 	 * Traverse the capabilities list.
1438 	 */
1439 	while (ptr != 0) {
1440 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1441 			if (capreg != NULL)
1442 				*capreg = ptr;
1443 			return (0);
1444 		}
1445 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1446 	}
1447 
1448 	return (ENOENT);
1449 }
1450 
1451 /*
1452  * Find the requested extended capability and return the offset in
1453  * configuration space via the pointer provided.  The function returns
1454  * 0 on success and an error code otherwise.
1455  */
1456 int
1457 pci_find_extcap_method(device_t dev, device_t child, int capability,
1458     int *capreg)
1459 {
1460 	struct pci_devinfo *dinfo = device_get_ivars(child);
1461 	pcicfgregs *cfg = &dinfo->cfg;
1462 	uint32_t ecap;
1463 	uint16_t ptr;
1464 
1465 	/* Only supported for PCI-express devices. */
1466 	if (cfg->pcie.pcie_location == 0)
1467 		return (ENXIO);
1468 
1469 	ptr = PCIR_EXTCAP;
1470 	ecap = pci_read_config(child, ptr, 4);
1471 	if (ecap == 0xffffffff || ecap == 0)
1472 		return (ENOENT);
1473 	for (;;) {
1474 		if (PCI_EXTCAP_ID(ecap) == capability) {
1475 			if (capreg != NULL)
1476 				*capreg = ptr;
1477 			return (0);
1478 		}
1479 		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1480 		if (ptr == 0)
1481 			break;
1482 		ecap = pci_read_config(child, ptr, 4);
1483 	}
1484 
1485 	return (ENOENT);
1486 }
1487 
1488 /*
1489  * Support for MSI-X message interrupts.
1490  */
1491 static void
1492 pci_write_msix_entry(device_t dev, u_int index, uint64_t address, uint32_t data)
1493 {
1494 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1495 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1496 	uint32_t offset;
1497 
1498 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1499 	offset = msix->msix_table_offset + index * 16;
1500 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1501 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1502 	bus_write_4(msix->msix_table_res, offset + 8, data);
1503 }
1504 
1505 void
1506 pci_enable_msix_method(device_t dev, device_t child, u_int index,
1507     uint64_t address, uint32_t data)
1508 {
1509 
1510 	if (pci_msix_rewrite_table) {
1511 		struct pci_devinfo *dinfo = device_get_ivars(child);
1512 		struct pcicfg_msix *msix = &dinfo->cfg.msix;
1513 
1514 		/*
1515 		 * Some VM hosts require MSIX to be disabled in the
1516 		 * control register before updating the MSIX table
1517 		 * entries are allowed. It is not enough to only
1518 		 * disable MSIX while updating a single entry. MSIX
1519 		 * must be disabled while updating all entries in the
1520 		 * table.
1521 		 */
1522 		pci_write_config(child,
1523 		    msix->msix_location + PCIR_MSIX_CTRL,
1524 		    msix->msix_ctrl & ~PCIM_MSIXCTRL_MSIX_ENABLE, 2);
1525 		pci_resume_msix(child);
1526 	} else
1527 		pci_write_msix_entry(child, index, address, data);
1528 
1529 	/* Enable MSI -> HT mapping. */
1530 	pci_ht_map_msi(child, address);
1531 }
1532 
1533 void
1534 pci_mask_msix(device_t dev, u_int index)
1535 {
1536 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1537 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1538 	uint32_t offset, val;
1539 
1540 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1541 	offset = msix->msix_table_offset + index * 16 + 12;
1542 	val = bus_read_4(msix->msix_table_res, offset);
1543 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1544 		val |= PCIM_MSIX_VCTRL_MASK;
1545 		bus_write_4(msix->msix_table_res, offset, val);
1546 	}
1547 }
1548 
1549 void
1550 pci_unmask_msix(device_t dev, u_int index)
1551 {
1552 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1553 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1554 	uint32_t offset, val;
1555 
1556 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1557 	offset = msix->msix_table_offset + index * 16 + 12;
1558 	val = bus_read_4(msix->msix_table_res, offset);
1559 	if (val & PCIM_MSIX_VCTRL_MASK) {
1560 		val &= ~PCIM_MSIX_VCTRL_MASK;
1561 		bus_write_4(msix->msix_table_res, offset, val);
1562 	}
1563 }
1564 
1565 int
1566 pci_pending_msix(device_t dev, u_int index)
1567 {
1568 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1569 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1570 	uint32_t offset, bit;
1571 
1572 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1573 	offset = msix->msix_pba_offset + (index / 32) * 4;
1574 	bit = 1 << index % 32;
1575 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1576 }
1577 
1578 /*
1579  * Restore MSI-X registers and table during resume.  If MSI-X is
1580  * enabled then walk the virtual table to restore the actual MSI-X
1581  * table.
1582  */
1583 static void
1584 pci_resume_msix(device_t dev)
1585 {
1586 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1587 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1588 	struct msix_table_entry *mte;
1589 	struct msix_vector *mv;
1590 	int i;
1591 
1592 	if (msix->msix_alloc > 0) {
1593 		/* First, mask all vectors. */
1594 		for (i = 0; i < msix->msix_msgnum; i++)
1595 			pci_mask_msix(dev, i);
1596 
1597 		/* Second, program any messages with at least one handler. */
1598 		for (i = 0; i < msix->msix_table_len; i++) {
1599 			mte = &msix->msix_table[i];
1600 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1601 				continue;
1602 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1603 			pci_write_msix_entry(dev, i, mv->mv_address,
1604 			    mv->mv_data);
1605 			pci_unmask_msix(dev, i);
1606 		}
1607 	}
1608 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1609 	    msix->msix_ctrl, 2);
1610 }
1611 
1612 /*
1613  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1614  * returned in *count.  After this function returns, each message will be
1615  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1616  */
1617 int
1618 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1619 {
1620 	struct pci_devinfo *dinfo = device_get_ivars(child);
1621 	pcicfgregs *cfg = &dinfo->cfg;
1622 	struct resource_list_entry *rle;
1623 	int actual, error, i, irq, max;
1624 
1625 	/* Don't let count == 0 get us into trouble. */
1626 	if (*count == 0)
1627 		return (EINVAL);
1628 
1629 	/* If rid 0 is allocated, then fail. */
1630 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1631 	if (rle != NULL && rle->res != NULL)
1632 		return (ENXIO);
1633 
1634 	/* Already have allocated messages? */
1635 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1636 		return (ENXIO);
1637 
1638 	/* If MSI-X is blacklisted for this system, fail. */
1639 	if (pci_msix_blacklisted())
1640 		return (ENXIO);
1641 
1642 	/* MSI-X capability present? */
1643 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1644 		return (ENODEV);
1645 
1646 	/* Make sure the appropriate BARs are mapped. */
1647 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1648 	    cfg->msix.msix_table_bar);
1649 	if (rle == NULL || rle->res == NULL ||
1650 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1651 		return (ENXIO);
1652 	cfg->msix.msix_table_res = rle->res;
1653 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1654 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1655 		    cfg->msix.msix_pba_bar);
1656 		if (rle == NULL || rle->res == NULL ||
1657 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1658 			return (ENXIO);
1659 	}
1660 	cfg->msix.msix_pba_res = rle->res;
1661 
1662 	if (bootverbose)
1663 		device_printf(child,
1664 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1665 		    *count, cfg->msix.msix_msgnum);
1666 	max = min(*count, cfg->msix.msix_msgnum);
1667 	for (i = 0; i < max; i++) {
1668 		/* Allocate a message. */
1669 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1670 		if (error) {
1671 			if (i == 0)
1672 				return (error);
1673 			break;
1674 		}
1675 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1676 		    irq, 1);
1677 	}
1678 	actual = i;
1679 
1680 	if (bootverbose) {
1681 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1682 		if (actual == 1)
1683 			device_printf(child, "using IRQ %ju for MSI-X\n",
1684 			    rle->start);
1685 		else {
1686 			int run;
1687 
1688 			/*
1689 			 * Be fancy and try to print contiguous runs of
1690 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1691 			 * 'run' is true if we are in a range.
1692 			 */
1693 			device_printf(child, "using IRQs %ju", rle->start);
1694 			irq = rle->start;
1695 			run = 0;
1696 			for (i = 1; i < actual; i++) {
1697 				rle = resource_list_find(&dinfo->resources,
1698 				    SYS_RES_IRQ, i + 1);
1699 
1700 				/* Still in a run? */
1701 				if (rle->start == irq + 1) {
1702 					run = 1;
1703 					irq++;
1704 					continue;
1705 				}
1706 
1707 				/* Finish previous range. */
1708 				if (run) {
1709 					printf("-%d", irq);
1710 					run = 0;
1711 				}
1712 
1713 				/* Start new range. */
1714 				printf(",%ju", rle->start);
1715 				irq = rle->start;
1716 			}
1717 
1718 			/* Unfinished range? */
1719 			if (run)
1720 				printf("-%d", irq);
1721 			printf(" for MSI-X\n");
1722 		}
1723 	}
1724 
1725 	/* Mask all vectors. */
1726 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1727 		pci_mask_msix(child, i);
1728 
1729 	/* Allocate and initialize vector data and virtual table. */
1730 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1731 	    M_DEVBUF, M_WAITOK | M_ZERO);
1732 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1733 	    M_DEVBUF, M_WAITOK | M_ZERO);
1734 	for (i = 0; i < actual; i++) {
1735 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1736 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1737 		cfg->msix.msix_table[i].mte_vector = i + 1;
1738 	}
1739 
1740 	/* Update control register to enable MSI-X. */
1741 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1742 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1743 	    cfg->msix.msix_ctrl, 2);
1744 
1745 	/* Update counts of alloc'd messages. */
1746 	cfg->msix.msix_alloc = actual;
1747 	cfg->msix.msix_table_len = actual;
1748 	*count = actual;
1749 	return (0);
1750 }
1751 
1752 /*
1753  * By default, pci_alloc_msix() will assign the allocated IRQ
1754  * resources consecutively to the first N messages in the MSI-X table.
1755  * However, device drivers may want to use different layouts if they
1756  * either receive fewer messages than they asked for, or they wish to
1757  * populate the MSI-X table sparsely.  This method allows the driver
1758  * to specify what layout it wants.  It must be called after a
1759  * successful pci_alloc_msix() but before any of the associated
1760  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1761  *
1762  * The 'vectors' array contains 'count' message vectors.  The array
1763  * maps directly to the MSI-X table in that index 0 in the array
1764  * specifies the vector for the first message in the MSI-X table, etc.
1765  * The vector value in each array index can either be 0 to indicate
1766  * that no vector should be assigned to a message slot, or it can be a
1767  * number from 1 to N (where N is the count returned from a
1768  * succcessful call to pci_alloc_msix()) to indicate which message
1769  * vector (IRQ) to be used for the corresponding message.
1770  *
1771  * On successful return, each message with a non-zero vector will have
1772  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1773  * 1.  Additionally, if any of the IRQs allocated via the previous
1774  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1775  * will be freed back to the system automatically.
1776  *
1777  * For example, suppose a driver has a MSI-X table with 6 messages and
1778  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1779  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1780  * C.  After the call to pci_alloc_msix(), the device will be setup to
1781  * have an MSI-X table of ABC--- (where - means no vector assigned).
1782  * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1783  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1784  * be freed back to the system.  This device will also have valid
1785  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1786  *
1787  * In any case, the SYS_RES_IRQ rid X will always map to the message
1788  * at MSI-X table index X - 1 and will only be valid if a vector is
1789  * assigned to that table entry.
1790  */
1791 int
1792 pci_remap_msix_method(device_t dev, device_t child, int count,
1793     const u_int *vectors)
1794 {
1795 	struct pci_devinfo *dinfo = device_get_ivars(child);
1796 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1797 	struct resource_list_entry *rle;
1798 	int i, irq, j, *used;
1799 
1800 	/*
1801 	 * Have to have at least one message in the table but the
1802 	 * table can't be bigger than the actual MSI-X table in the
1803 	 * device.
1804 	 */
1805 	if (count == 0 || count > msix->msix_msgnum)
1806 		return (EINVAL);
1807 
1808 	/* Sanity check the vectors. */
1809 	for (i = 0; i < count; i++)
1810 		if (vectors[i] > msix->msix_alloc)
1811 			return (EINVAL);
1812 
1813 	/*
1814 	 * Make sure there aren't any holes in the vectors to be used.
1815 	 * It's a big pain to support it, and it doesn't really make
1816 	 * sense anyway.  Also, at least one vector must be used.
1817 	 */
1818 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1819 	    M_ZERO);
1820 	for (i = 0; i < count; i++)
1821 		if (vectors[i] != 0)
1822 			used[vectors[i] - 1] = 1;
1823 	for (i = 0; i < msix->msix_alloc - 1; i++)
1824 		if (used[i] == 0 && used[i + 1] == 1) {
1825 			free(used, M_DEVBUF);
1826 			return (EINVAL);
1827 		}
1828 	if (used[0] != 1) {
1829 		free(used, M_DEVBUF);
1830 		return (EINVAL);
1831 	}
1832 
1833 	/* Make sure none of the resources are allocated. */
1834 	for (i = 0; i < msix->msix_table_len; i++) {
1835 		if (msix->msix_table[i].mte_vector == 0)
1836 			continue;
1837 		if (msix->msix_table[i].mte_handlers > 0) {
1838 			free(used, M_DEVBUF);
1839 			return (EBUSY);
1840 		}
1841 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1842 		KASSERT(rle != NULL, ("missing resource"));
1843 		if (rle->res != NULL) {
1844 			free(used, M_DEVBUF);
1845 			return (EBUSY);
1846 		}
1847 	}
1848 
1849 	/* Free the existing resource list entries. */
1850 	for (i = 0; i < msix->msix_table_len; i++) {
1851 		if (msix->msix_table[i].mte_vector == 0)
1852 			continue;
1853 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1854 	}
1855 
1856 	/*
1857 	 * Build the new virtual table keeping track of which vectors are
1858 	 * used.
1859 	 */
1860 	free(msix->msix_table, M_DEVBUF);
1861 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1862 	    M_DEVBUF, M_WAITOK | M_ZERO);
1863 	for (i = 0; i < count; i++)
1864 		msix->msix_table[i].mte_vector = vectors[i];
1865 	msix->msix_table_len = count;
1866 
1867 	/* Free any unused IRQs and resize the vectors array if necessary. */
1868 	j = msix->msix_alloc - 1;
1869 	if (used[j] == 0) {
1870 		struct msix_vector *vec;
1871 
1872 		while (used[j] == 0) {
1873 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1874 			    msix->msix_vectors[j].mv_irq);
1875 			j--;
1876 		}
1877 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1878 		    M_WAITOK);
1879 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1880 		    (j + 1));
1881 		free(msix->msix_vectors, M_DEVBUF);
1882 		msix->msix_vectors = vec;
1883 		msix->msix_alloc = j + 1;
1884 	}
1885 	free(used, M_DEVBUF);
1886 
1887 	/* Map the IRQs onto the rids. */
1888 	for (i = 0; i < count; i++) {
1889 		if (vectors[i] == 0)
1890 			continue;
1891 		irq = msix->msix_vectors[vectors[i] - 1].mv_irq;
1892 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1893 		    irq, 1);
1894 	}
1895 
1896 	if (bootverbose) {
1897 		device_printf(child, "Remapped MSI-X IRQs as: ");
1898 		for (i = 0; i < count; i++) {
1899 			if (i != 0)
1900 				printf(", ");
1901 			if (vectors[i] == 0)
1902 				printf("---");
1903 			else
1904 				printf("%d",
1905 				    msix->msix_vectors[vectors[i] - 1].mv_irq);
1906 		}
1907 		printf("\n");
1908 	}
1909 
1910 	return (0);
1911 }
1912 
1913 static int
1914 pci_release_msix(device_t dev, device_t child)
1915 {
1916 	struct pci_devinfo *dinfo = device_get_ivars(child);
1917 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1918 	struct resource_list_entry *rle;
1919 	int i;
1920 
1921 	/* Do we have any messages to release? */
1922 	if (msix->msix_alloc == 0)
1923 		return (ENODEV);
1924 
1925 	/* Make sure none of the resources are allocated. */
1926 	for (i = 0; i < msix->msix_table_len; i++) {
1927 		if (msix->msix_table[i].mte_vector == 0)
1928 			continue;
1929 		if (msix->msix_table[i].mte_handlers > 0)
1930 			return (EBUSY);
1931 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1932 		KASSERT(rle != NULL, ("missing resource"));
1933 		if (rle->res != NULL)
1934 			return (EBUSY);
1935 	}
1936 
1937 	/* Update control register to disable MSI-X. */
1938 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1939 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1940 	    msix->msix_ctrl, 2);
1941 
1942 	/* Free the resource list entries. */
1943 	for (i = 0; i < msix->msix_table_len; i++) {
1944 		if (msix->msix_table[i].mte_vector == 0)
1945 			continue;
1946 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1947 	}
1948 	free(msix->msix_table, M_DEVBUF);
1949 	msix->msix_table_len = 0;
1950 
1951 	/* Release the IRQs. */
1952 	for (i = 0; i < msix->msix_alloc; i++)
1953 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1954 		    msix->msix_vectors[i].mv_irq);
1955 	free(msix->msix_vectors, M_DEVBUF);
1956 	msix->msix_alloc = 0;
1957 	return (0);
1958 }
1959 
1960 /*
1961  * Return the max supported MSI-X messages this device supports.
1962  * Basically, assuming the MD code can alloc messages, this function
1963  * should return the maximum value that pci_alloc_msix() can return.
1964  * Thus, it is subject to the tunables, etc.
1965  */
1966 int
1967 pci_msix_count_method(device_t dev, device_t child)
1968 {
1969 	struct pci_devinfo *dinfo = device_get_ivars(child);
1970 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1971 
1972 	if (pci_do_msix && msix->msix_location != 0)
1973 		return (msix->msix_msgnum);
1974 	return (0);
1975 }
1976 
1977 int
1978 pci_msix_pba_bar_method(device_t dev, device_t child)
1979 {
1980 	struct pci_devinfo *dinfo = device_get_ivars(child);
1981 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1982 
1983 	if (pci_do_msix && msix->msix_location != 0)
1984 		return (msix->msix_pba_bar);
1985 	return (-1);
1986 }
1987 
1988 int
1989 pci_msix_table_bar_method(device_t dev, device_t child)
1990 {
1991 	struct pci_devinfo *dinfo = device_get_ivars(child);
1992 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1993 
1994 	if (pci_do_msix && msix->msix_location != 0)
1995 		return (msix->msix_table_bar);
1996 	return (-1);
1997 }
1998 
1999 /*
2000  * HyperTransport MSI mapping control
2001  */
2002 void
2003 pci_ht_map_msi(device_t dev, uint64_t addr)
2004 {
2005 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2006 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
2007 
2008 	if (!ht->ht_msimap)
2009 		return;
2010 
2011 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
2012 	    ht->ht_msiaddr >> 20 == addr >> 20) {
2013 		/* Enable MSI -> HT mapping. */
2014 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
2015 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
2016 		    ht->ht_msictrl, 2);
2017 	}
2018 
2019 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
2020 		/* Disable MSI -> HT mapping. */
2021 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
2022 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
2023 		    ht->ht_msictrl, 2);
2024 	}
2025 }
2026 
2027 int
2028 pci_get_max_payload(device_t dev)
2029 {
2030 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2031 	int cap;
2032 	uint16_t val;
2033 
2034 	cap = dinfo->cfg.pcie.pcie_location;
2035 	if (cap == 0)
2036 		return (0);
2037 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2038 	val &= PCIEM_CTL_MAX_PAYLOAD;
2039 	val >>= 5;
2040 	return (1 << (val + 7));
2041 }
2042 
2043 int
2044 pci_get_max_read_req(device_t dev)
2045 {
2046 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2047 	int cap;
2048 	uint16_t val;
2049 
2050 	cap = dinfo->cfg.pcie.pcie_location;
2051 	if (cap == 0)
2052 		return (0);
2053 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2054 	val &= PCIEM_CTL_MAX_READ_REQUEST;
2055 	val >>= 12;
2056 	return (1 << (val + 7));
2057 }
2058 
2059 int
2060 pci_set_max_read_req(device_t dev, int size)
2061 {
2062 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2063 	int cap;
2064 	uint16_t val;
2065 
2066 	cap = dinfo->cfg.pcie.pcie_location;
2067 	if (cap == 0)
2068 		return (0);
2069 	if (size < 128)
2070 		size = 128;
2071 	if (size > 4096)
2072 		size = 4096;
2073 	size = (1 << (fls(size) - 1));
2074 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2075 	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
2076 	val |= (fls(size) - 8) << 12;
2077 	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
2078 	return (size);
2079 }
2080 
2081 uint32_t
2082 pcie_read_config(device_t dev, int reg, int width)
2083 {
2084 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2085 	int cap;
2086 
2087 	cap = dinfo->cfg.pcie.pcie_location;
2088 	if (cap == 0) {
2089 		if (width == 2)
2090 			return (0xffff);
2091 		return (0xffffffff);
2092 	}
2093 
2094 	return (pci_read_config(dev, cap + reg, width));
2095 }
2096 
2097 void
2098 pcie_write_config(device_t dev, int reg, uint32_t value, int width)
2099 {
2100 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2101 	int cap;
2102 
2103 	cap = dinfo->cfg.pcie.pcie_location;
2104 	if (cap == 0)
2105 		return;
2106 	pci_write_config(dev, cap + reg, value, width);
2107 }
2108 
2109 /*
2110  * Adjusts a PCI-e capability register by clearing the bits in mask
2111  * and setting the bits in (value & mask).  Bits not set in mask are
2112  * not adjusted.
2113  *
2114  * Returns the old value on success or all ones on failure.
2115  */
2116 uint32_t
2117 pcie_adjust_config(device_t dev, int reg, uint32_t mask, uint32_t value,
2118     int width)
2119 {
2120 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2121 	uint32_t old, new;
2122 	int cap;
2123 
2124 	cap = dinfo->cfg.pcie.pcie_location;
2125 	if (cap == 0) {
2126 		if (width == 2)
2127 			return (0xffff);
2128 		return (0xffffffff);
2129 	}
2130 
2131 	old = pci_read_config(dev, cap + reg, width);
2132 	new = old & ~mask;
2133 	new |= (value & mask);
2134 	pci_write_config(dev, cap + reg, new, width);
2135 	return (old);
2136 }
2137 
2138 /*
2139  * Support for MSI message signalled interrupts.
2140  */
2141 void
2142 pci_enable_msi_method(device_t dev, device_t child, uint64_t address,
2143     uint16_t data)
2144 {
2145 	struct pci_devinfo *dinfo = device_get_ivars(child);
2146 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2147 
2148 	/* Write data and address values. */
2149 	pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR,
2150 	    address & 0xffffffff, 4);
2151 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2152 		pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR_HIGH,
2153 		    address >> 32, 4);
2154 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA_64BIT,
2155 		    data, 2);
2156 	} else
2157 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA, data,
2158 		    2);
2159 
2160 	/* Enable MSI in the control register. */
2161 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
2162 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2163 	    msi->msi_ctrl, 2);
2164 
2165 	/* Enable MSI -> HT mapping. */
2166 	pci_ht_map_msi(child, address);
2167 }
2168 
2169 void
2170 pci_disable_msi_method(device_t dev, device_t child)
2171 {
2172 	struct pci_devinfo *dinfo = device_get_ivars(child);
2173 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2174 
2175 	/* Disable MSI -> HT mapping. */
2176 	pci_ht_map_msi(child, 0);
2177 
2178 	/* Disable MSI in the control register. */
2179 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
2180 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2181 	    msi->msi_ctrl, 2);
2182 }
2183 
2184 /*
2185  * Restore MSI registers during resume.  If MSI is enabled then
2186  * restore the data and address registers in addition to the control
2187  * register.
2188  */
2189 static void
2190 pci_resume_msi(device_t dev)
2191 {
2192 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2193 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2194 	uint64_t address;
2195 	uint16_t data;
2196 
2197 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
2198 		address = msi->msi_addr;
2199 		data = msi->msi_data;
2200 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
2201 		    address & 0xffffffff, 4);
2202 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2203 			pci_write_config(dev, msi->msi_location +
2204 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
2205 			pci_write_config(dev, msi->msi_location +
2206 			    PCIR_MSI_DATA_64BIT, data, 2);
2207 		} else
2208 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
2209 			    data, 2);
2210 	}
2211 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
2212 	    2);
2213 }
2214 
2215 static int
2216 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
2217 {
2218 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2219 	pcicfgregs *cfg = &dinfo->cfg;
2220 	struct resource_list_entry *rle;
2221 	struct msix_table_entry *mte;
2222 	struct msix_vector *mv;
2223 	uint64_t addr;
2224 	uint32_t data;
2225 	int error, i, j;
2226 
2227 	/*
2228 	 * Handle MSI first.  We try to find this IRQ among our list
2229 	 * of MSI IRQs.  If we find it, we request updated address and
2230 	 * data registers and apply the results.
2231 	 */
2232 	if (cfg->msi.msi_alloc > 0) {
2233 
2234 		/* If we don't have any active handlers, nothing to do. */
2235 		if (cfg->msi.msi_handlers == 0)
2236 			return (0);
2237 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
2238 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
2239 			    i + 1);
2240 			if (rle->start == irq) {
2241 				error = PCIB_MAP_MSI(device_get_parent(bus),
2242 				    dev, irq, &addr, &data);
2243 				if (error)
2244 					return (error);
2245 				pci_disable_msi(dev);
2246 				dinfo->cfg.msi.msi_addr = addr;
2247 				dinfo->cfg.msi.msi_data = data;
2248 				pci_enable_msi(dev, addr, data);
2249 				return (0);
2250 			}
2251 		}
2252 		return (ENOENT);
2253 	}
2254 
2255 	/*
2256 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
2257 	 * we request the updated mapping info.  If that works, we go
2258 	 * through all the slots that use this IRQ and update them.
2259 	 */
2260 	if (cfg->msix.msix_alloc > 0) {
2261 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
2262 			mv = &cfg->msix.msix_vectors[i];
2263 			if (mv->mv_irq == irq) {
2264 				error = PCIB_MAP_MSI(device_get_parent(bus),
2265 				    dev, irq, &addr, &data);
2266 				if (error)
2267 					return (error);
2268 				mv->mv_address = addr;
2269 				mv->mv_data = data;
2270 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
2271 					mte = &cfg->msix.msix_table[j];
2272 					if (mte->mte_vector != i + 1)
2273 						continue;
2274 					if (mte->mte_handlers == 0)
2275 						continue;
2276 					pci_mask_msix(dev, j);
2277 					pci_enable_msix(dev, j, addr, data);
2278 					pci_unmask_msix(dev, j);
2279 				}
2280 			}
2281 		}
2282 		return (ENOENT);
2283 	}
2284 
2285 	return (ENOENT);
2286 }
2287 
2288 /*
2289  * Returns true if the specified device is blacklisted because MSI
2290  * doesn't work.
2291  */
2292 int
2293 pci_msi_device_blacklisted(device_t dev)
2294 {
2295 
2296 	if (!pci_honor_msi_blacklist)
2297 		return (0);
2298 
2299 	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2300 }
2301 
2302 /*
2303  * Determine if MSI is blacklisted globally on this system.  Currently,
2304  * we just check for blacklisted chipsets as represented by the
2305  * host-PCI bridge at device 0:0:0.  In the future, it may become
2306  * necessary to check other system attributes, such as the kenv values
2307  * that give the motherboard manufacturer and model number.
2308  */
2309 static int
2310 pci_msi_blacklisted(void)
2311 {
2312 	device_t dev;
2313 
2314 	if (!pci_honor_msi_blacklist)
2315 		return (0);
2316 
2317 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2318 	if (!(pcie_chipset || pcix_chipset)) {
2319 		if (vm_guest != VM_GUEST_NO) {
2320 			/*
2321 			 * Whitelist older chipsets in virtual
2322 			 * machines known to support MSI.
2323 			 */
2324 			dev = pci_find_bsf(0, 0, 0);
2325 			if (dev != NULL)
2326 				return (!pci_has_quirk(pci_get_devid(dev),
2327 					PCI_QUIRK_ENABLE_MSI_VM));
2328 		}
2329 		return (1);
2330 	}
2331 
2332 	dev = pci_find_bsf(0, 0, 0);
2333 	if (dev != NULL)
2334 		return (pci_msi_device_blacklisted(dev));
2335 	return (0);
2336 }
2337 
2338 /*
2339  * Returns true if the specified device is blacklisted because MSI-X
2340  * doesn't work.  Note that this assumes that if MSI doesn't work,
2341  * MSI-X doesn't either.
2342  */
2343 int
2344 pci_msix_device_blacklisted(device_t dev)
2345 {
2346 
2347 	if (!pci_honor_msi_blacklist)
2348 		return (0);
2349 
2350 	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2351 		return (1);
2352 
2353 	return (pci_msi_device_blacklisted(dev));
2354 }
2355 
2356 /*
2357  * Determine if MSI-X is blacklisted globally on this system.  If MSI
2358  * is blacklisted, assume that MSI-X is as well.  Check for additional
2359  * chipsets where MSI works but MSI-X does not.
2360  */
2361 static int
2362 pci_msix_blacklisted(void)
2363 {
2364 	device_t dev;
2365 
2366 	if (!pci_honor_msi_blacklist)
2367 		return (0);
2368 
2369 	dev = pci_find_bsf(0, 0, 0);
2370 	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2371 	    PCI_QUIRK_DISABLE_MSIX))
2372 		return (1);
2373 
2374 	return (pci_msi_blacklisted());
2375 }
2376 
2377 /*
2378  * Attempt to allocate *count MSI messages.  The actual number allocated is
2379  * returned in *count.  After this function returns, each message will be
2380  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2381  */
2382 int
2383 pci_alloc_msi_method(device_t dev, device_t child, int *count)
2384 {
2385 	struct pci_devinfo *dinfo = device_get_ivars(child);
2386 	pcicfgregs *cfg = &dinfo->cfg;
2387 	struct resource_list_entry *rle;
2388 	int actual, error, i, irqs[32];
2389 	uint16_t ctrl;
2390 
2391 	/* Don't let count == 0 get us into trouble. */
2392 	if (*count == 0)
2393 		return (EINVAL);
2394 
2395 	/* If rid 0 is allocated, then fail. */
2396 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2397 	if (rle != NULL && rle->res != NULL)
2398 		return (ENXIO);
2399 
2400 	/* Already have allocated messages? */
2401 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2402 		return (ENXIO);
2403 
2404 	/* If MSI is blacklisted for this system, fail. */
2405 	if (pci_msi_blacklisted())
2406 		return (ENXIO);
2407 
2408 	/* MSI capability present? */
2409 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2410 		return (ENODEV);
2411 
2412 	if (bootverbose)
2413 		device_printf(child,
2414 		    "attempting to allocate %d MSI vectors (%d supported)\n",
2415 		    *count, cfg->msi.msi_msgnum);
2416 
2417 	/* Don't ask for more than the device supports. */
2418 	actual = min(*count, cfg->msi.msi_msgnum);
2419 
2420 	/* Don't ask for more than 32 messages. */
2421 	actual = min(actual, 32);
2422 
2423 	/* MSI requires power of 2 number of messages. */
2424 	if (!powerof2(actual))
2425 		return (EINVAL);
2426 
2427 	for (;;) {
2428 		/* Try to allocate N messages. */
2429 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2430 		    actual, irqs);
2431 		if (error == 0)
2432 			break;
2433 		if (actual == 1)
2434 			return (error);
2435 
2436 		/* Try N / 2. */
2437 		actual >>= 1;
2438 	}
2439 
2440 	/*
2441 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2442 	 * resources in the irqs[] array, so add new resources
2443 	 * starting at rid 1.
2444 	 */
2445 	for (i = 0; i < actual; i++)
2446 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2447 		    irqs[i], irqs[i], 1);
2448 
2449 	if (bootverbose) {
2450 		if (actual == 1)
2451 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2452 		else {
2453 			int run;
2454 
2455 			/*
2456 			 * Be fancy and try to print contiguous runs
2457 			 * of IRQ values as ranges.  'run' is true if
2458 			 * we are in a range.
2459 			 */
2460 			device_printf(child, "using IRQs %d", irqs[0]);
2461 			run = 0;
2462 			for (i = 1; i < actual; i++) {
2463 
2464 				/* Still in a run? */
2465 				if (irqs[i] == irqs[i - 1] + 1) {
2466 					run = 1;
2467 					continue;
2468 				}
2469 
2470 				/* Finish previous range. */
2471 				if (run) {
2472 					printf("-%d", irqs[i - 1]);
2473 					run = 0;
2474 				}
2475 
2476 				/* Start new range. */
2477 				printf(",%d", irqs[i]);
2478 			}
2479 
2480 			/* Unfinished range? */
2481 			if (run)
2482 				printf("-%d", irqs[actual - 1]);
2483 			printf(" for MSI\n");
2484 		}
2485 	}
2486 
2487 	/* Update control register with actual count. */
2488 	ctrl = cfg->msi.msi_ctrl;
2489 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2490 	ctrl |= (ffs(actual) - 1) << 4;
2491 	cfg->msi.msi_ctrl = ctrl;
2492 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2493 
2494 	/* Update counts of alloc'd messages. */
2495 	cfg->msi.msi_alloc = actual;
2496 	cfg->msi.msi_handlers = 0;
2497 	*count = actual;
2498 	return (0);
2499 }
2500 
2501 /* Release the MSI messages associated with this device. */
2502 int
2503 pci_release_msi_method(device_t dev, device_t child)
2504 {
2505 	struct pci_devinfo *dinfo = device_get_ivars(child);
2506 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2507 	struct resource_list_entry *rle;
2508 	int error, i, irqs[32];
2509 
2510 	/* Try MSI-X first. */
2511 	error = pci_release_msix(dev, child);
2512 	if (error != ENODEV)
2513 		return (error);
2514 
2515 	/* Do we have any messages to release? */
2516 	if (msi->msi_alloc == 0)
2517 		return (ENODEV);
2518 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2519 
2520 	/* Make sure none of the resources are allocated. */
2521 	if (msi->msi_handlers > 0)
2522 		return (EBUSY);
2523 	for (i = 0; i < msi->msi_alloc; i++) {
2524 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2525 		KASSERT(rle != NULL, ("missing MSI resource"));
2526 		if (rle->res != NULL)
2527 			return (EBUSY);
2528 		irqs[i] = rle->start;
2529 	}
2530 
2531 	/* Update control register with 0 count. */
2532 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2533 	    ("%s: MSI still enabled", __func__));
2534 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2535 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2536 	    msi->msi_ctrl, 2);
2537 
2538 	/* Release the messages. */
2539 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2540 	for (i = 0; i < msi->msi_alloc; i++)
2541 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2542 
2543 	/* Update alloc count. */
2544 	msi->msi_alloc = 0;
2545 	msi->msi_addr = 0;
2546 	msi->msi_data = 0;
2547 	return (0);
2548 }
2549 
2550 /*
2551  * Return the max supported MSI messages this device supports.
2552  * Basically, assuming the MD code can alloc messages, this function
2553  * should return the maximum value that pci_alloc_msi() can return.
2554  * Thus, it is subject to the tunables, etc.
2555  */
2556 int
2557 pci_msi_count_method(device_t dev, device_t child)
2558 {
2559 	struct pci_devinfo *dinfo = device_get_ivars(child);
2560 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2561 
2562 	if (pci_do_msi && msi->msi_location != 0)
2563 		return (msi->msi_msgnum);
2564 	return (0);
2565 }
2566 
2567 /* free pcicfgregs structure and all depending data structures */
2568 
2569 int
2570 pci_freecfg(struct pci_devinfo *dinfo)
2571 {
2572 	struct devlist *devlist_head;
2573 	struct pci_map *pm, *next;
2574 	int i;
2575 
2576 	devlist_head = &pci_devq;
2577 
2578 	if (dinfo->cfg.vpd.vpd_reg) {
2579 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2580 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2581 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2582 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2583 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2584 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2585 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2586 	}
2587 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2588 		free(pm, M_DEVBUF);
2589 	}
2590 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2591 	free(dinfo, M_DEVBUF);
2592 
2593 	/* increment the generation count */
2594 	pci_generation++;
2595 
2596 	/* we're losing one device */
2597 	pci_numdevs--;
2598 	return (0);
2599 }
2600 
2601 /*
2602  * PCI power manangement
2603  */
2604 int
2605 pci_set_powerstate_method(device_t dev, device_t child, int state)
2606 {
2607 	struct pci_devinfo *dinfo = device_get_ivars(child);
2608 	pcicfgregs *cfg = &dinfo->cfg;
2609 	uint16_t status;
2610 	int oldstate, highest, delay;
2611 
2612 	if (cfg->pp.pp_cap == 0)
2613 		return (EOPNOTSUPP);
2614 
2615 	/*
2616 	 * Optimize a no state change request away.  While it would be OK to
2617 	 * write to the hardware in theory, some devices have shown odd
2618 	 * behavior when going from D3 -> D3.
2619 	 */
2620 	oldstate = pci_get_powerstate(child);
2621 	if (oldstate == state)
2622 		return (0);
2623 
2624 	/*
2625 	 * The PCI power management specification states that after a state
2626 	 * transition between PCI power states, system software must
2627 	 * guarantee a minimal delay before the function accesses the device.
2628 	 * Compute the worst case delay that we need to guarantee before we
2629 	 * access the device.  Many devices will be responsive much more
2630 	 * quickly than this delay, but there are some that don't respond
2631 	 * instantly to state changes.  Transitions to/from D3 state require
2632 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2633 	 * is done below with DELAY rather than a sleeper function because
2634 	 * this function can be called from contexts where we cannot sleep.
2635 	 */
2636 	highest = (oldstate > state) ? oldstate : state;
2637 	if (highest == PCI_POWERSTATE_D3)
2638 	    delay = 10000;
2639 	else if (highest == PCI_POWERSTATE_D2)
2640 	    delay = 200;
2641 	else
2642 	    delay = 0;
2643 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2644 	    & ~PCIM_PSTAT_DMASK;
2645 	switch (state) {
2646 	case PCI_POWERSTATE_D0:
2647 		status |= PCIM_PSTAT_D0;
2648 		break;
2649 	case PCI_POWERSTATE_D1:
2650 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2651 			return (EOPNOTSUPP);
2652 		status |= PCIM_PSTAT_D1;
2653 		break;
2654 	case PCI_POWERSTATE_D2:
2655 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2656 			return (EOPNOTSUPP);
2657 		status |= PCIM_PSTAT_D2;
2658 		break;
2659 	case PCI_POWERSTATE_D3:
2660 		status |= PCIM_PSTAT_D3;
2661 		break;
2662 	default:
2663 		return (EINVAL);
2664 	}
2665 
2666 	if (bootverbose)
2667 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2668 		    state);
2669 
2670 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2671 	if (delay)
2672 		DELAY(delay);
2673 	return (0);
2674 }
2675 
2676 int
2677 pci_get_powerstate_method(device_t dev, device_t child)
2678 {
2679 	struct pci_devinfo *dinfo = device_get_ivars(child);
2680 	pcicfgregs *cfg = &dinfo->cfg;
2681 	uint16_t status;
2682 	int result;
2683 
2684 	if (cfg->pp.pp_cap != 0) {
2685 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2686 		switch (status & PCIM_PSTAT_DMASK) {
2687 		case PCIM_PSTAT_D0:
2688 			result = PCI_POWERSTATE_D0;
2689 			break;
2690 		case PCIM_PSTAT_D1:
2691 			result = PCI_POWERSTATE_D1;
2692 			break;
2693 		case PCIM_PSTAT_D2:
2694 			result = PCI_POWERSTATE_D2;
2695 			break;
2696 		case PCIM_PSTAT_D3:
2697 			result = PCI_POWERSTATE_D3;
2698 			break;
2699 		default:
2700 			result = PCI_POWERSTATE_UNKNOWN;
2701 			break;
2702 		}
2703 	} else {
2704 		/* No support, device is always at D0 */
2705 		result = PCI_POWERSTATE_D0;
2706 	}
2707 	return (result);
2708 }
2709 
2710 /*
2711  * Some convenience functions for PCI device drivers.
2712  */
2713 
2714 static __inline void
2715 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2716 {
2717 	uint16_t	command;
2718 
2719 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2720 	command |= bit;
2721 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2722 }
2723 
2724 static __inline void
2725 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2726 {
2727 	uint16_t	command;
2728 
2729 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2730 	command &= ~bit;
2731 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2732 }
2733 
2734 int
2735 pci_enable_busmaster_method(device_t dev, device_t child)
2736 {
2737 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2738 	return (0);
2739 }
2740 
2741 int
2742 pci_disable_busmaster_method(device_t dev, device_t child)
2743 {
2744 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2745 	return (0);
2746 }
2747 
2748 int
2749 pci_enable_io_method(device_t dev, device_t child, int space)
2750 {
2751 	uint16_t bit;
2752 
2753 	switch(space) {
2754 	case SYS_RES_IOPORT:
2755 		bit = PCIM_CMD_PORTEN;
2756 		break;
2757 	case SYS_RES_MEMORY:
2758 		bit = PCIM_CMD_MEMEN;
2759 		break;
2760 	default:
2761 		return (EINVAL);
2762 	}
2763 	pci_set_command_bit(dev, child, bit);
2764 	return (0);
2765 }
2766 
2767 int
2768 pci_disable_io_method(device_t dev, device_t child, int space)
2769 {
2770 	uint16_t bit;
2771 
2772 	switch(space) {
2773 	case SYS_RES_IOPORT:
2774 		bit = PCIM_CMD_PORTEN;
2775 		break;
2776 	case SYS_RES_MEMORY:
2777 		bit = PCIM_CMD_MEMEN;
2778 		break;
2779 	default:
2780 		return (EINVAL);
2781 	}
2782 	pci_clear_command_bit(dev, child, bit);
2783 	return (0);
2784 }
2785 
2786 /*
2787  * New style pci driver.  Parent device is either a pci-host-bridge or a
2788  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2789  */
2790 
2791 void
2792 pci_print_verbose(struct pci_devinfo *dinfo)
2793 {
2794 
2795 	if (bootverbose) {
2796 		pcicfgregs *cfg = &dinfo->cfg;
2797 
2798 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2799 		    cfg->vendor, cfg->device, cfg->revid);
2800 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2801 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2802 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2803 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2804 		    cfg->mfdev);
2805 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2806 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2807 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2808 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2809 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2810 		if (cfg->intpin > 0)
2811 			printf("\tintpin=%c, irq=%d\n",
2812 			    cfg->intpin +'a' -1, cfg->intline);
2813 		if (cfg->pp.pp_cap) {
2814 			uint16_t status;
2815 
2816 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2817 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2818 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2819 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2820 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2821 			    status & PCIM_PSTAT_DMASK);
2822 		}
2823 		if (cfg->msi.msi_location) {
2824 			int ctrl;
2825 
2826 			ctrl = cfg->msi.msi_ctrl;
2827 			printf("\tMSI supports %d message%s%s%s\n",
2828 			    cfg->msi.msi_msgnum,
2829 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2830 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2831 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2832 		}
2833 		if (cfg->msix.msix_location) {
2834 			printf("\tMSI-X supports %d message%s ",
2835 			    cfg->msix.msix_msgnum,
2836 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2837 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2838 				printf("in map 0x%x\n",
2839 				    cfg->msix.msix_table_bar);
2840 			else
2841 				printf("in maps 0x%x and 0x%x\n",
2842 				    cfg->msix.msix_table_bar,
2843 				    cfg->msix.msix_pba_bar);
2844 		}
2845 	}
2846 }
2847 
2848 static int
2849 pci_porten(device_t dev)
2850 {
2851 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2852 }
2853 
2854 static int
2855 pci_memen(device_t dev)
2856 {
2857 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2858 }
2859 
2860 void
2861 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp,
2862     int *bar64)
2863 {
2864 	struct pci_devinfo *dinfo;
2865 	pci_addr_t map, testval;
2866 	int ln2range;
2867 	uint16_t cmd;
2868 
2869 	/*
2870 	 * The device ROM BAR is special.  It is always a 32-bit
2871 	 * memory BAR.  Bit 0 is special and should not be set when
2872 	 * sizing the BAR.
2873 	 */
2874 	dinfo = device_get_ivars(dev);
2875 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2876 		map = pci_read_config(dev, reg, 4);
2877 		pci_write_config(dev, reg, 0xfffffffe, 4);
2878 		testval = pci_read_config(dev, reg, 4);
2879 		pci_write_config(dev, reg, map, 4);
2880 		*mapp = map;
2881 		*testvalp = testval;
2882 		if (bar64 != NULL)
2883 			*bar64 = 0;
2884 		return;
2885 	}
2886 
2887 	map = pci_read_config(dev, reg, 4);
2888 	ln2range = pci_maprange(map);
2889 	if (ln2range == 64)
2890 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2891 
2892 	/*
2893 	 * Disable decoding via the command register before
2894 	 * determining the BAR's length since we will be placing it in
2895 	 * a weird state.
2896 	 */
2897 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2898 	pci_write_config(dev, PCIR_COMMAND,
2899 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2900 
2901 	/*
2902 	 * Determine the BAR's length by writing all 1's.  The bottom
2903 	 * log_2(size) bits of the BAR will stick as 0 when we read
2904 	 * the value back.
2905 	 *
2906 	 * NB: according to the PCI Local Bus Specification, rev. 3.0:
2907 	 * "Software writes 0FFFFFFFFh to both registers, reads them back,
2908 	 * and combines the result into a 64-bit value." (section 6.2.5.1)
2909 	 *
2910 	 * Writes to both registers must be performed before attempting to
2911 	 * read back the size value.
2912 	 */
2913 	testval = 0;
2914 	pci_write_config(dev, reg, 0xffffffff, 4);
2915 	if (ln2range == 64) {
2916 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2917 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2918 	}
2919 	testval |= pci_read_config(dev, reg, 4);
2920 
2921 	/*
2922 	 * Restore the original value of the BAR.  We may have reprogrammed
2923 	 * the BAR of the low-level console device and when booting verbose,
2924 	 * we need the console device addressable.
2925 	 */
2926 	pci_write_config(dev, reg, map, 4);
2927 	if (ln2range == 64)
2928 		pci_write_config(dev, reg + 4, map >> 32, 4);
2929 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2930 
2931 	*mapp = map;
2932 	*testvalp = testval;
2933 	if (bar64 != NULL)
2934 		*bar64 = (ln2range == 64);
2935 }
2936 
2937 static void
2938 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2939 {
2940 	struct pci_devinfo *dinfo;
2941 	int ln2range;
2942 
2943 	/* The device ROM BAR is always a 32-bit memory BAR. */
2944 	dinfo = device_get_ivars(dev);
2945 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2946 		ln2range = 32;
2947 	else
2948 		ln2range = pci_maprange(pm->pm_value);
2949 	pci_write_config(dev, pm->pm_reg, base, 4);
2950 	if (ln2range == 64)
2951 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2952 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2953 	if (ln2range == 64)
2954 		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2955 		    pm->pm_reg + 4, 4) << 32;
2956 }
2957 
2958 struct pci_map *
2959 pci_find_bar(device_t dev, int reg)
2960 {
2961 	struct pci_devinfo *dinfo;
2962 	struct pci_map *pm;
2963 
2964 	dinfo = device_get_ivars(dev);
2965 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2966 		if (pm->pm_reg == reg)
2967 			return (pm);
2968 	}
2969 	return (NULL);
2970 }
2971 
2972 int
2973 pci_bar_enabled(device_t dev, struct pci_map *pm)
2974 {
2975 	struct pci_devinfo *dinfo;
2976 	uint16_t cmd;
2977 
2978 	dinfo = device_get_ivars(dev);
2979 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2980 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2981 		return (0);
2982 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2983 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2984 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2985 	else
2986 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2987 }
2988 
2989 struct pci_map *
2990 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2991 {
2992 	struct pci_devinfo *dinfo;
2993 	struct pci_map *pm, *prev;
2994 
2995 	dinfo = device_get_ivars(dev);
2996 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2997 	pm->pm_reg = reg;
2998 	pm->pm_value = value;
2999 	pm->pm_size = size;
3000 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
3001 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
3002 		    reg));
3003 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
3004 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
3005 			break;
3006 	}
3007 	if (prev != NULL)
3008 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
3009 	else
3010 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
3011 	return (pm);
3012 }
3013 
3014 static void
3015 pci_restore_bars(device_t dev)
3016 {
3017 	struct pci_devinfo *dinfo;
3018 	struct pci_map *pm;
3019 	int ln2range;
3020 
3021 	dinfo = device_get_ivars(dev);
3022 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
3023 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
3024 			ln2range = 32;
3025 		else
3026 			ln2range = pci_maprange(pm->pm_value);
3027 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
3028 		if (ln2range == 64)
3029 			pci_write_config(dev, pm->pm_reg + 4,
3030 			    pm->pm_value >> 32, 4);
3031 	}
3032 }
3033 
3034 /*
3035  * Add a resource based on a pci map register. Return 1 if the map
3036  * register is a 32bit map register or 2 if it is a 64bit register.
3037  */
3038 static int
3039 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
3040     int force, int prefetch)
3041 {
3042 	struct pci_map *pm;
3043 	pci_addr_t base, map, testval;
3044 	pci_addr_t start, end, count;
3045 	int barlen, basezero, flags, maprange, mapsize, type;
3046 	uint16_t cmd;
3047 	struct resource *res;
3048 
3049 	/*
3050 	 * The BAR may already exist if the device is a CardBus card
3051 	 * whose CIS is stored in this BAR.
3052 	 */
3053 	pm = pci_find_bar(dev, reg);
3054 	if (pm != NULL) {
3055 		maprange = pci_maprange(pm->pm_value);
3056 		barlen = maprange == 64 ? 2 : 1;
3057 		return (barlen);
3058 	}
3059 
3060 	pci_read_bar(dev, reg, &map, &testval, NULL);
3061 	if (PCI_BAR_MEM(map)) {
3062 		type = SYS_RES_MEMORY;
3063 		if (map & PCIM_BAR_MEM_PREFETCH)
3064 			prefetch = 1;
3065 	} else
3066 		type = SYS_RES_IOPORT;
3067 	mapsize = pci_mapsize(testval);
3068 	base = pci_mapbase(map);
3069 #ifdef __PCI_BAR_ZERO_VALID
3070 	basezero = 0;
3071 #else
3072 	basezero = base == 0;
3073 #endif
3074 	maprange = pci_maprange(map);
3075 	barlen = maprange == 64 ? 2 : 1;
3076 
3077 	/*
3078 	 * For I/O registers, if bottom bit is set, and the next bit up
3079 	 * isn't clear, we know we have a BAR that doesn't conform to the
3080 	 * spec, so ignore it.  Also, sanity check the size of the data
3081 	 * areas to the type of memory involved.  Memory must be at least
3082 	 * 16 bytes in size, while I/O ranges must be at least 4.
3083 	 */
3084 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
3085 		return (barlen);
3086 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
3087 	    (type == SYS_RES_IOPORT && mapsize < 2))
3088 		return (barlen);
3089 
3090 	/* Save a record of this BAR. */
3091 	pm = pci_add_bar(dev, reg, map, mapsize);
3092 	if (bootverbose) {
3093 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
3094 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
3095 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
3096 			printf(", port disabled\n");
3097 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
3098 			printf(", memory disabled\n");
3099 		else
3100 			printf(", enabled\n");
3101 	}
3102 
3103 	/*
3104 	 * If base is 0, then we have problems if this architecture does
3105 	 * not allow that.  It is best to ignore such entries for the
3106 	 * moment.  These will be allocated later if the driver specifically
3107 	 * requests them.  However, some removable buses look better when
3108 	 * all resources are allocated, so allow '0' to be overriden.
3109 	 *
3110 	 * Similarly treat maps whose values is the same as the test value
3111 	 * read back.  These maps have had all f's written to them by the
3112 	 * BIOS in an attempt to disable the resources.
3113 	 */
3114 	if (!force && (basezero || map == testval))
3115 		return (barlen);
3116 	if ((u_long)base != base) {
3117 		device_printf(bus,
3118 		    "pci%d:%d:%d:%d bar %#x too many address bits",
3119 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
3120 		    pci_get_function(dev), reg);
3121 		return (barlen);
3122 	}
3123 
3124 	/*
3125 	 * This code theoretically does the right thing, but has
3126 	 * undesirable side effects in some cases where peripherals
3127 	 * respond oddly to having these bits enabled.  Let the user
3128 	 * be able to turn them off (since pci_enable_io_modes is 1 by
3129 	 * default).
3130 	 */
3131 	if (pci_enable_io_modes) {
3132 		/* Turn on resources that have been left off by a lazy BIOS */
3133 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
3134 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3135 			cmd |= PCIM_CMD_PORTEN;
3136 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3137 		}
3138 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
3139 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3140 			cmd |= PCIM_CMD_MEMEN;
3141 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3142 		}
3143 	} else {
3144 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
3145 			return (barlen);
3146 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
3147 			return (barlen);
3148 	}
3149 
3150 	count = (pci_addr_t)1 << mapsize;
3151 	flags = RF_ALIGNMENT_LOG2(mapsize);
3152 	if (prefetch)
3153 		flags |= RF_PREFETCHABLE;
3154 	if (basezero || base == pci_mapbase(testval) || pci_clear_bars) {
3155 		start = 0;	/* Let the parent decide. */
3156 		end = ~0;
3157 	} else {
3158 		start = base;
3159 		end = base + count - 1;
3160 	}
3161 	resource_list_add(rl, type, reg, start, end, count);
3162 
3163 	/*
3164 	 * Try to allocate the resource for this BAR from our parent
3165 	 * so that this resource range is already reserved.  The
3166 	 * driver for this device will later inherit this resource in
3167 	 * pci_alloc_resource().
3168 	 */
3169 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
3170 	    flags);
3171 	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0)) {
3172 		/*
3173 		 * If the allocation fails, try to allocate a resource for
3174 		 * this BAR using any available range.  The firmware felt
3175 		 * it was important enough to assign a resource, so don't
3176 		 * disable decoding if we can help it.
3177 		 */
3178 		resource_list_delete(rl, type, reg);
3179 		resource_list_add(rl, type, reg, 0, ~0, count);
3180 		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0,
3181 		    count, flags);
3182 	}
3183 	if (res == NULL) {
3184 		/*
3185 		 * If the allocation fails, delete the resource list entry
3186 		 * and disable decoding for this device.
3187 		 *
3188 		 * If the driver requests this resource in the future,
3189 		 * pci_reserve_map() will try to allocate a fresh
3190 		 * resource range.
3191 		 */
3192 		resource_list_delete(rl, type, reg);
3193 		pci_disable_io(dev, type);
3194 		if (bootverbose)
3195 			device_printf(bus,
3196 			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
3197 			    pci_get_domain(dev), pci_get_bus(dev),
3198 			    pci_get_slot(dev), pci_get_function(dev), reg);
3199 	} else {
3200 		start = rman_get_start(res);
3201 		pci_write_bar(dev, pm, start);
3202 	}
3203 	return (barlen);
3204 }
3205 
3206 /*
3207  * For ATA devices we need to decide early what addressing mode to use.
3208  * Legacy demands that the primary and secondary ATA ports sits on the
3209  * same addresses that old ISA hardware did. This dictates that we use
3210  * those addresses and ignore the BAR's if we cannot set PCI native
3211  * addressing mode.
3212  */
3213 static void
3214 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
3215     uint32_t prefetchmask)
3216 {
3217 	int rid, type, progif;
3218 #if 0
3219 	/* if this device supports PCI native addressing use it */
3220 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3221 	if ((progif & 0x8a) == 0x8a) {
3222 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
3223 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
3224 			printf("Trying ATA native PCI addressing mode\n");
3225 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
3226 		}
3227 	}
3228 #endif
3229 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3230 	type = SYS_RES_IOPORT;
3231 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
3232 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
3233 		    prefetchmask & (1 << 0));
3234 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
3235 		    prefetchmask & (1 << 1));
3236 	} else {
3237 		rid = PCIR_BAR(0);
3238 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
3239 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
3240 		    0x1f7, 8, 0);
3241 		rid = PCIR_BAR(1);
3242 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
3243 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
3244 		    0x3f6, 1, 0);
3245 	}
3246 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
3247 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
3248 		    prefetchmask & (1 << 2));
3249 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
3250 		    prefetchmask & (1 << 3));
3251 	} else {
3252 		rid = PCIR_BAR(2);
3253 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
3254 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
3255 		    0x177, 8, 0);
3256 		rid = PCIR_BAR(3);
3257 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
3258 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
3259 		    0x376, 1, 0);
3260 	}
3261 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
3262 	    prefetchmask & (1 << 4));
3263 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
3264 	    prefetchmask & (1 << 5));
3265 }
3266 
3267 static void
3268 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
3269 {
3270 	struct pci_devinfo *dinfo = device_get_ivars(dev);
3271 	pcicfgregs *cfg = &dinfo->cfg;
3272 	char tunable_name[64];
3273 	int irq;
3274 
3275 	/* Has to have an intpin to have an interrupt. */
3276 	if (cfg->intpin == 0)
3277 		return;
3278 
3279 	/* Let the user override the IRQ with a tunable. */
3280 	irq = PCI_INVALID_IRQ;
3281 	snprintf(tunable_name, sizeof(tunable_name),
3282 	    "hw.pci%d.%d.%d.INT%c.irq",
3283 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
3284 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
3285 		irq = PCI_INVALID_IRQ;
3286 
3287 	/*
3288 	 * If we didn't get an IRQ via the tunable, then we either use the
3289 	 * IRQ value in the intline register or we ask the bus to route an
3290 	 * interrupt for us.  If force_route is true, then we only use the
3291 	 * value in the intline register if the bus was unable to assign an
3292 	 * IRQ.
3293 	 */
3294 	if (!PCI_INTERRUPT_VALID(irq)) {
3295 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
3296 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
3297 		if (!PCI_INTERRUPT_VALID(irq))
3298 			irq = cfg->intline;
3299 	}
3300 
3301 	/* If after all that we don't have an IRQ, just bail. */
3302 	if (!PCI_INTERRUPT_VALID(irq))
3303 		return;
3304 
3305 	/* Update the config register if it changed. */
3306 	if (irq != cfg->intline) {
3307 		cfg->intline = irq;
3308 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
3309 	}
3310 
3311 	/* Add this IRQ as rid 0 interrupt resource. */
3312 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3313 }
3314 
3315 /* Perform early OHCI takeover from SMM. */
3316 static void
3317 ohci_early_takeover(device_t self)
3318 {
3319 	struct resource *res;
3320 	uint32_t ctl;
3321 	int rid;
3322 	int i;
3323 
3324 	rid = PCIR_BAR(0);
3325 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3326 	if (res == NULL)
3327 		return;
3328 
3329 	ctl = bus_read_4(res, OHCI_CONTROL);
3330 	if (ctl & OHCI_IR) {
3331 		if (bootverbose)
3332 			printf("ohci early: "
3333 			    "SMM active, request owner change\n");
3334 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3335 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3336 			DELAY(1000);
3337 			ctl = bus_read_4(res, OHCI_CONTROL);
3338 		}
3339 		if (ctl & OHCI_IR) {
3340 			if (bootverbose)
3341 				printf("ohci early: "
3342 				    "SMM does not respond, resetting\n");
3343 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3344 		}
3345 		/* Disable interrupts */
3346 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3347 	}
3348 
3349 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3350 }
3351 
3352 /* Perform early UHCI takeover from SMM. */
3353 static void
3354 uhci_early_takeover(device_t self)
3355 {
3356 	struct resource *res;
3357 	int rid;
3358 
3359 	/*
3360 	 * Set the PIRQD enable bit and switch off all the others. We don't
3361 	 * want legacy support to interfere with us XXX Does this also mean
3362 	 * that the BIOS won't touch the keyboard anymore if it is connected
3363 	 * to the ports of the root hub?
3364 	 */
3365 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3366 
3367 	/* Disable interrupts */
3368 	rid = PCI_UHCI_BASE_REG;
3369 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3370 	if (res != NULL) {
3371 		bus_write_2(res, UHCI_INTR, 0);
3372 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3373 	}
3374 }
3375 
3376 /* Perform early EHCI takeover from SMM. */
3377 static void
3378 ehci_early_takeover(device_t self)
3379 {
3380 	struct resource *res;
3381 	uint32_t cparams;
3382 	uint32_t eec;
3383 	uint8_t eecp;
3384 	uint8_t bios_sem;
3385 	uint8_t offs;
3386 	int rid;
3387 	int i;
3388 
3389 	rid = PCIR_BAR(0);
3390 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3391 	if (res == NULL)
3392 		return;
3393 
3394 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3395 
3396 	/* Synchronise with the BIOS if it owns the controller. */
3397 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3398 	    eecp = EHCI_EECP_NEXT(eec)) {
3399 		eec = pci_read_config(self, eecp, 4);
3400 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3401 			continue;
3402 		}
3403 		bios_sem = pci_read_config(self, eecp +
3404 		    EHCI_LEGSUP_BIOS_SEM, 1);
3405 		if (bios_sem == 0) {
3406 			continue;
3407 		}
3408 		if (bootverbose)
3409 			printf("ehci early: "
3410 			    "SMM active, request owner change\n");
3411 
3412 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3413 
3414 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3415 			DELAY(1000);
3416 			bios_sem = pci_read_config(self, eecp +
3417 			    EHCI_LEGSUP_BIOS_SEM, 1);
3418 		}
3419 
3420 		if (bios_sem != 0) {
3421 			if (bootverbose)
3422 				printf("ehci early: "
3423 				    "SMM does not respond\n");
3424 		}
3425 		/* Disable interrupts */
3426 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3427 		bus_write_4(res, offs + EHCI_USBINTR, 0);
3428 	}
3429 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3430 }
3431 
3432 /* Perform early XHCI takeover from SMM. */
3433 static void
3434 xhci_early_takeover(device_t self)
3435 {
3436 	struct resource *res;
3437 	uint32_t cparams;
3438 	uint32_t eec;
3439 	uint8_t eecp;
3440 	uint8_t bios_sem;
3441 	uint8_t offs;
3442 	int rid;
3443 	int i;
3444 
3445 	rid = PCIR_BAR(0);
3446 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3447 	if (res == NULL)
3448 		return;
3449 
3450 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3451 
3452 	eec = -1;
3453 
3454 	/* Synchronise with the BIOS if it owns the controller. */
3455 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3456 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3457 		eec = bus_read_4(res, eecp);
3458 
3459 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3460 			continue;
3461 
3462 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3463 		if (bios_sem == 0)
3464 			continue;
3465 
3466 		if (bootverbose)
3467 			printf("xhci early: "
3468 			    "SMM active, request owner change\n");
3469 
3470 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3471 
3472 		/* wait a maximum of 5 second */
3473 
3474 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3475 			DELAY(1000);
3476 			bios_sem = bus_read_1(res, eecp +
3477 			    XHCI_XECP_BIOS_SEM);
3478 		}
3479 
3480 		if (bios_sem != 0) {
3481 			if (bootverbose)
3482 				printf("xhci early: "
3483 				    "SMM does not respond\n");
3484 		}
3485 
3486 		/* Disable interrupts */
3487 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3488 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3489 		bus_read_4(res, offs + XHCI_USBSTS);
3490 	}
3491 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3492 }
3493 
3494 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3495 static void
3496 pci_reserve_secbus(device_t bus, device_t dev, pcicfgregs *cfg,
3497     struct resource_list *rl)
3498 {
3499 	struct resource *res;
3500 	char *cp;
3501 	rman_res_t start, end, count;
3502 	int rid, sec_bus, sec_reg, sub_bus, sub_reg, sup_bus;
3503 
3504 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3505 	case PCIM_HDRTYPE_BRIDGE:
3506 		sec_reg = PCIR_SECBUS_1;
3507 		sub_reg = PCIR_SUBBUS_1;
3508 		break;
3509 	case PCIM_HDRTYPE_CARDBUS:
3510 		sec_reg = PCIR_SECBUS_2;
3511 		sub_reg = PCIR_SUBBUS_2;
3512 		break;
3513 	default:
3514 		return;
3515 	}
3516 
3517 	/*
3518 	 * If the existing bus range is valid, attempt to reserve it
3519 	 * from our parent.  If this fails for any reason, clear the
3520 	 * secbus and subbus registers.
3521 	 *
3522 	 * XXX: Should we reset sub_bus to sec_bus if it is < sec_bus?
3523 	 * This would at least preserve the existing sec_bus if it is
3524 	 * valid.
3525 	 */
3526 	sec_bus = PCI_READ_CONFIG(bus, dev, sec_reg, 1);
3527 	sub_bus = PCI_READ_CONFIG(bus, dev, sub_reg, 1);
3528 
3529 	/* Quirk handling. */
3530 	switch (pci_get_devid(dev)) {
3531 	case 0x12258086:		/* Intel 82454KX/GX (Orion) */
3532 		sup_bus = pci_read_config(dev, 0x41, 1);
3533 		if (sup_bus != 0xff) {
3534 			sec_bus = sup_bus + 1;
3535 			sub_bus = sup_bus + 1;
3536 			PCI_WRITE_CONFIG(bus, dev, sec_reg, sec_bus, 1);
3537 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3538 		}
3539 		break;
3540 
3541 	case 0x00dd10de:
3542 		/* Compaq R3000 BIOS sets wrong subordinate bus number. */
3543 		if ((cp = kern_getenv("smbios.planar.maker")) == NULL)
3544 			break;
3545 		if (strncmp(cp, "Compal", 6) != 0) {
3546 			freeenv(cp);
3547 			break;
3548 		}
3549 		freeenv(cp);
3550 		if ((cp = kern_getenv("smbios.planar.product")) == NULL)
3551 			break;
3552 		if (strncmp(cp, "08A0", 4) != 0) {
3553 			freeenv(cp);
3554 			break;
3555 		}
3556 		freeenv(cp);
3557 		if (sub_bus < 0xa) {
3558 			sub_bus = 0xa;
3559 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3560 		}
3561 		break;
3562 	}
3563 
3564 	if (bootverbose)
3565 		printf("\tsecbus=%d, subbus=%d\n", sec_bus, sub_bus);
3566 	if (sec_bus > 0 && sub_bus >= sec_bus) {
3567 		start = sec_bus;
3568 		end = sub_bus;
3569 		count = end - start + 1;
3570 
3571 		resource_list_add(rl, PCI_RES_BUS, 0, 0, ~0, count);
3572 
3573 		/*
3574 		 * If requested, clear secondary bus registers in
3575 		 * bridge devices to force a complete renumbering
3576 		 * rather than reserving the existing range.  However,
3577 		 * preserve the existing size.
3578 		 */
3579 		if (pci_clear_buses)
3580 			goto clear;
3581 
3582 		rid = 0;
3583 		res = resource_list_reserve(rl, bus, dev, PCI_RES_BUS, &rid,
3584 		    start, end, count, 0);
3585 		if (res != NULL)
3586 			return;
3587 
3588 		if (bootverbose)
3589 			device_printf(bus,
3590 			    "pci%d:%d:%d:%d secbus failed to allocate\n",
3591 			    pci_get_domain(dev), pci_get_bus(dev),
3592 			    pci_get_slot(dev), pci_get_function(dev));
3593 	}
3594 
3595 clear:
3596 	PCI_WRITE_CONFIG(bus, dev, sec_reg, 0, 1);
3597 	PCI_WRITE_CONFIG(bus, dev, sub_reg, 0, 1);
3598 }
3599 
3600 static struct resource *
3601 pci_alloc_secbus(device_t dev, device_t child, int *rid, rman_res_t start,
3602     rman_res_t end, rman_res_t count, u_int flags)
3603 {
3604 	struct pci_devinfo *dinfo;
3605 	pcicfgregs *cfg;
3606 	struct resource_list *rl;
3607 	struct resource *res;
3608 	int sec_reg, sub_reg;
3609 
3610 	dinfo = device_get_ivars(child);
3611 	cfg = &dinfo->cfg;
3612 	rl = &dinfo->resources;
3613 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3614 	case PCIM_HDRTYPE_BRIDGE:
3615 		sec_reg = PCIR_SECBUS_1;
3616 		sub_reg = PCIR_SUBBUS_1;
3617 		break;
3618 	case PCIM_HDRTYPE_CARDBUS:
3619 		sec_reg = PCIR_SECBUS_2;
3620 		sub_reg = PCIR_SUBBUS_2;
3621 		break;
3622 	default:
3623 		return (NULL);
3624 	}
3625 
3626 	if (*rid != 0)
3627 		return (NULL);
3628 
3629 	if (resource_list_find(rl, PCI_RES_BUS, *rid) == NULL)
3630 		resource_list_add(rl, PCI_RES_BUS, *rid, start, end, count);
3631 	if (!resource_list_reserved(rl, PCI_RES_BUS, *rid)) {
3632 		res = resource_list_reserve(rl, dev, child, PCI_RES_BUS, rid,
3633 		    start, end, count, flags & ~RF_ACTIVE);
3634 		if (res == NULL) {
3635 			resource_list_delete(rl, PCI_RES_BUS, *rid);
3636 			device_printf(child, "allocating %ju bus%s failed\n",
3637 			    count, count == 1 ? "" : "es");
3638 			return (NULL);
3639 		}
3640 		if (bootverbose)
3641 			device_printf(child,
3642 			    "Lazy allocation of %ju bus%s at %ju\n", count,
3643 			    count == 1 ? "" : "es", rman_get_start(res));
3644 		PCI_WRITE_CONFIG(dev, child, sec_reg, rman_get_start(res), 1);
3645 		PCI_WRITE_CONFIG(dev, child, sub_reg, rman_get_end(res), 1);
3646 	}
3647 	return (resource_list_alloc(rl, dev, child, PCI_RES_BUS, rid, start,
3648 	    end, count, flags));
3649 }
3650 #endif
3651 
3652 static int
3653 pci_ea_bei_to_rid(device_t dev, int bei)
3654 {
3655 #ifdef PCI_IOV
3656 	struct pci_devinfo *dinfo;
3657 	int iov_pos;
3658 	struct pcicfg_iov *iov;
3659 
3660 	dinfo = device_get_ivars(dev);
3661 	iov = dinfo->cfg.iov;
3662 	if (iov != NULL)
3663 		iov_pos = iov->iov_pos;
3664 	else
3665 		iov_pos = 0;
3666 #endif
3667 
3668 	/* Check if matches BAR */
3669 	if ((bei >= PCIM_EA_BEI_BAR_0) &&
3670 	    (bei <= PCIM_EA_BEI_BAR_5))
3671 		return (PCIR_BAR(bei));
3672 
3673 	/* Check ROM */
3674 	if (bei == PCIM_EA_BEI_ROM)
3675 		return (PCIR_BIOS);
3676 
3677 #ifdef PCI_IOV
3678 	/* Check if matches VF_BAR */
3679 	if ((iov != NULL) && (bei >= PCIM_EA_BEI_VF_BAR_0) &&
3680 	    (bei <= PCIM_EA_BEI_VF_BAR_5))
3681 		return (PCIR_SRIOV_BAR(bei - PCIM_EA_BEI_VF_BAR_0) +
3682 		    iov_pos);
3683 #endif
3684 
3685 	return (-1);
3686 }
3687 
3688 int
3689 pci_ea_is_enabled(device_t dev, int rid)
3690 {
3691 	struct pci_ea_entry *ea;
3692 	struct pci_devinfo *dinfo;
3693 
3694 	dinfo = device_get_ivars(dev);
3695 
3696 	STAILQ_FOREACH(ea, &dinfo->cfg.ea.ea_entries, eae_link) {
3697 		if (pci_ea_bei_to_rid(dev, ea->eae_bei) == rid)
3698 			return ((ea->eae_flags & PCIM_EA_ENABLE) > 0);
3699 	}
3700 
3701 	return (0);
3702 }
3703 
3704 void
3705 pci_add_resources_ea(device_t bus, device_t dev, int alloc_iov)
3706 {
3707 	struct pci_ea_entry *ea;
3708 	struct pci_devinfo *dinfo;
3709 	pci_addr_t start, end, count;
3710 	struct resource_list *rl;
3711 	int type, flags, rid;
3712 	struct resource *res;
3713 	uint32_t tmp;
3714 #ifdef PCI_IOV
3715 	struct pcicfg_iov *iov;
3716 #endif
3717 
3718 	dinfo = device_get_ivars(dev);
3719 	rl = &dinfo->resources;
3720 	flags = 0;
3721 
3722 #ifdef PCI_IOV
3723 	iov = dinfo->cfg.iov;
3724 #endif
3725 
3726 	if (dinfo->cfg.ea.ea_location == 0)
3727 		return;
3728 
3729 	STAILQ_FOREACH(ea, &dinfo->cfg.ea.ea_entries, eae_link) {
3730 
3731 		/*
3732 		 * TODO: Ignore EA-BAR if is not enabled.
3733 		 *   Currently the EA implementation supports
3734 		 *   only situation, where EA structure contains
3735 		 *   predefined entries. In case they are not enabled
3736 		 *   leave them unallocated and proceed with
3737 		 *   a legacy-BAR mechanism.
3738 		 */
3739 		if ((ea->eae_flags & PCIM_EA_ENABLE) == 0)
3740 			continue;
3741 
3742 		switch ((ea->eae_flags & PCIM_EA_PP) >> PCIM_EA_PP_OFFSET) {
3743 		case PCIM_EA_P_MEM_PREFETCH:
3744 		case PCIM_EA_P_VF_MEM_PREFETCH:
3745 			flags = RF_PREFETCHABLE;
3746 			/* FALLTHROUGH */
3747 		case PCIM_EA_P_VF_MEM:
3748 		case PCIM_EA_P_MEM:
3749 			type = SYS_RES_MEMORY;
3750 			break;
3751 		case PCIM_EA_P_IO:
3752 			type = SYS_RES_IOPORT;
3753 			break;
3754 		default:
3755 			continue;
3756 		}
3757 
3758 		if (alloc_iov != 0) {
3759 #ifdef PCI_IOV
3760 			/* Allocating IOV, confirm BEI matches */
3761 			if ((ea->eae_bei < PCIM_EA_BEI_VF_BAR_0) ||
3762 			    (ea->eae_bei > PCIM_EA_BEI_VF_BAR_5))
3763 				continue;
3764 #else
3765 			continue;
3766 #endif
3767 		} else {
3768 			/* Allocating BAR, confirm BEI matches */
3769 			if (((ea->eae_bei < PCIM_EA_BEI_BAR_0) ||
3770 			    (ea->eae_bei > PCIM_EA_BEI_BAR_5)) &&
3771 			    (ea->eae_bei != PCIM_EA_BEI_ROM))
3772 				continue;
3773 		}
3774 
3775 		rid = pci_ea_bei_to_rid(dev, ea->eae_bei);
3776 		if (rid < 0)
3777 			continue;
3778 
3779 		/* Skip resources already allocated by EA */
3780 		if ((resource_list_find(rl, SYS_RES_MEMORY, rid) != NULL) ||
3781 		    (resource_list_find(rl, SYS_RES_IOPORT, rid) != NULL))
3782 			continue;
3783 
3784 		start = ea->eae_base;
3785 		count = ea->eae_max_offset + 1;
3786 #ifdef PCI_IOV
3787 		if (iov != NULL)
3788 			count = count * iov->iov_num_vfs;
3789 #endif
3790 		end = start + count - 1;
3791 		if (count == 0)
3792 			continue;
3793 
3794 		resource_list_add(rl, type, rid, start, end, count);
3795 		res = resource_list_reserve(rl, bus, dev, type, &rid, start, end, count,
3796 		    flags);
3797 		if (res == NULL) {
3798 			resource_list_delete(rl, type, rid);
3799 
3800 			/*
3801 			 * Failed to allocate using EA, disable entry.
3802 			 * Another attempt to allocation will be performed
3803 			 * further, but this time using legacy BAR registers
3804 			 */
3805 			tmp = pci_read_config(dev, ea->eae_cfg_offset, 4);
3806 			tmp &= ~PCIM_EA_ENABLE;
3807 			pci_write_config(dev, ea->eae_cfg_offset, tmp, 4);
3808 
3809 			/*
3810 			 * Disabling entry might fail in case it is hardwired.
3811 			 * Read flags again to match current status.
3812 			 */
3813 			ea->eae_flags = pci_read_config(dev, ea->eae_cfg_offset, 4);
3814 
3815 			continue;
3816 		}
3817 
3818 		/* As per specification, fill BAR with zeros */
3819 		pci_write_config(dev, rid, 0, 4);
3820 	}
3821 }
3822 
3823 void
3824 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3825 {
3826 	struct pci_devinfo *dinfo;
3827 	pcicfgregs *cfg;
3828 	struct resource_list *rl;
3829 	const struct pci_quirk *q;
3830 	uint32_t devid;
3831 	int i;
3832 
3833 	dinfo = device_get_ivars(dev);
3834 	cfg = &dinfo->cfg;
3835 	rl = &dinfo->resources;
3836 	devid = (cfg->device << 16) | cfg->vendor;
3837 
3838 	/* Allocate resources using Enhanced Allocation */
3839 	pci_add_resources_ea(bus, dev, 0);
3840 
3841 	/* ATA devices needs special map treatment */
3842 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3843 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3844 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3845 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3846 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3847 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3848 	else
3849 		for (i = 0; i < cfg->nummaps;) {
3850 			/* Skip resources already managed by EA */
3851 			if ((resource_list_find(rl, SYS_RES_MEMORY, PCIR_BAR(i)) != NULL) ||
3852 			    (resource_list_find(rl, SYS_RES_IOPORT, PCIR_BAR(i)) != NULL) ||
3853 			    pci_ea_is_enabled(dev, PCIR_BAR(i))) {
3854 				i++;
3855 				continue;
3856 			}
3857 
3858 			/*
3859 			 * Skip quirked resources.
3860 			 */
3861 			for (q = &pci_quirks[0]; q->devid != 0; q++)
3862 				if (q->devid == devid &&
3863 				    q->type == PCI_QUIRK_UNMAP_REG &&
3864 				    q->arg1 == PCIR_BAR(i))
3865 					break;
3866 			if (q->devid != 0) {
3867 				i++;
3868 				continue;
3869 			}
3870 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3871 			    prefetchmask & (1 << i));
3872 		}
3873 
3874 	/*
3875 	 * Add additional, quirked resources.
3876 	 */
3877 	for (q = &pci_quirks[0]; q->devid != 0; q++)
3878 		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3879 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3880 
3881 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3882 #ifdef __PCI_REROUTE_INTERRUPT
3883 		/*
3884 		 * Try to re-route interrupts. Sometimes the BIOS or
3885 		 * firmware may leave bogus values in these registers.
3886 		 * If the re-route fails, then just stick with what we
3887 		 * have.
3888 		 */
3889 		pci_assign_interrupt(bus, dev, 1);
3890 #else
3891 		pci_assign_interrupt(bus, dev, 0);
3892 #endif
3893 	}
3894 
3895 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3896 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3897 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3898 			xhci_early_takeover(dev);
3899 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3900 			ehci_early_takeover(dev);
3901 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3902 			ohci_early_takeover(dev);
3903 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3904 			uhci_early_takeover(dev);
3905 	}
3906 
3907 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3908 	/*
3909 	 * Reserve resources for secondary bus ranges behind bridge
3910 	 * devices.
3911 	 */
3912 	pci_reserve_secbus(bus, dev, cfg, rl);
3913 #endif
3914 }
3915 
3916 static struct pci_devinfo *
3917 pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
3918     int slot, int func)
3919 {
3920 	struct pci_devinfo *dinfo;
3921 
3922 	dinfo = pci_read_device(pcib, dev, domain, busno, slot, func);
3923 	if (dinfo != NULL)
3924 		pci_add_child(dev, dinfo);
3925 
3926 	return (dinfo);
3927 }
3928 
3929 void
3930 pci_add_children(device_t dev, int domain, int busno)
3931 {
3932 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3933 	device_t pcib = device_get_parent(dev);
3934 	struct pci_devinfo *dinfo;
3935 	int maxslots;
3936 	int s, f, pcifunchigh;
3937 	uint8_t hdrtype;
3938 	int first_func;
3939 
3940 	/*
3941 	 * Try to detect a device at slot 0, function 0.  If it exists, try to
3942 	 * enable ARI.  We must enable ARI before detecting the rest of the
3943 	 * functions on this bus as ARI changes the set of slots and functions
3944 	 * that are legal on this bus.
3945 	 */
3946 	dinfo = pci_identify_function(pcib, dev, domain, busno, 0, 0);
3947 	if (dinfo != NULL && pci_enable_ari)
3948 		PCIB_TRY_ENABLE_ARI(pcib, dinfo->cfg.dev);
3949 
3950 	/*
3951 	 * Start looking for new devices on slot 0 at function 1 because we
3952 	 * just identified the device at slot 0, function 0.
3953 	 */
3954 	first_func = 1;
3955 
3956 	maxslots = PCIB_MAXSLOTS(pcib);
3957 	for (s = 0; s <= maxslots; s++, first_func = 0) {
3958 		pcifunchigh = 0;
3959 		f = 0;
3960 		DELAY(1);
3961 		hdrtype = REG(PCIR_HDRTYPE, 1);
3962 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3963 			continue;
3964 		if (hdrtype & PCIM_MFDEV)
3965 			pcifunchigh = PCIB_MAXFUNCS(pcib);
3966 		for (f = first_func; f <= pcifunchigh; f++)
3967 			pci_identify_function(pcib, dev, domain, busno, s, f);
3968 	}
3969 #undef REG
3970 }
3971 
3972 int
3973 pci_rescan_method(device_t dev)
3974 {
3975 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3976 	device_t pcib = device_get_parent(dev);
3977 	struct pci_softc *sc;
3978 	device_t child, *devlist, *unchanged;
3979 	int devcount, error, i, j, maxslots, oldcount;
3980 	int busno, domain, s, f, pcifunchigh;
3981 	uint8_t hdrtype;
3982 
3983 	/* No need to check for ARI on a rescan. */
3984 	error = device_get_children(dev, &devlist, &devcount);
3985 	if (error)
3986 		return (error);
3987 	if (devcount != 0) {
3988 		unchanged = malloc(devcount * sizeof(device_t), M_TEMP,
3989 		    M_NOWAIT | M_ZERO);
3990 		if (unchanged == NULL) {
3991 			free(devlist, M_TEMP);
3992 			return (ENOMEM);
3993 		}
3994 	} else
3995 		unchanged = NULL;
3996 
3997 	sc = device_get_softc(dev);
3998 	domain = pcib_get_domain(dev);
3999 	busno = pcib_get_bus(dev);
4000 	maxslots = PCIB_MAXSLOTS(pcib);
4001 	for (s = 0; s <= maxslots; s++) {
4002 		/* If function 0 is not present, skip to the next slot. */
4003 		f = 0;
4004 		if (REG(PCIR_VENDOR, 2) == 0xffff)
4005 			continue;
4006 		pcifunchigh = 0;
4007 		hdrtype = REG(PCIR_HDRTYPE, 1);
4008 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
4009 			continue;
4010 		if (hdrtype & PCIM_MFDEV)
4011 			pcifunchigh = PCIB_MAXFUNCS(pcib);
4012 		for (f = 0; f <= pcifunchigh; f++) {
4013 			if (REG(PCIR_VENDOR, 2) == 0xffff)
4014 				continue;
4015 
4016 			/*
4017 			 * Found a valid function.  Check if a
4018 			 * device_t for this device already exists.
4019 			 */
4020 			for (i = 0; i < devcount; i++) {
4021 				child = devlist[i];
4022 				if (child == NULL)
4023 					continue;
4024 				if (pci_get_slot(child) == s &&
4025 				    pci_get_function(child) == f) {
4026 					unchanged[i] = child;
4027 					goto next_func;
4028 				}
4029 			}
4030 
4031 			pci_identify_function(pcib, dev, domain, busno, s, f);
4032 		next_func:;
4033 		}
4034 	}
4035 
4036 	/* Remove devices that are no longer present. */
4037 	for (i = 0; i < devcount; i++) {
4038 		if (unchanged[i] != NULL)
4039 			continue;
4040 		device_delete_child(dev, devlist[i]);
4041 	}
4042 
4043 	free(devlist, M_TEMP);
4044 	oldcount = devcount;
4045 
4046 	/* Try to attach the devices just added. */
4047 	error = device_get_children(dev, &devlist, &devcount);
4048 	if (error) {
4049 		free(unchanged, M_TEMP);
4050 		return (error);
4051 	}
4052 
4053 	for (i = 0; i < devcount; i++) {
4054 		for (j = 0; j < oldcount; j++) {
4055 			if (devlist[i] == unchanged[j])
4056 				goto next_device;
4057 		}
4058 
4059 		device_probe_and_attach(devlist[i]);
4060 	next_device:;
4061 	}
4062 
4063 	free(unchanged, M_TEMP);
4064 	free(devlist, M_TEMP);
4065 	return (0);
4066 #undef REG
4067 }
4068 
4069 #ifdef PCI_IOV
4070 device_t
4071 pci_add_iov_child(device_t bus, device_t pf, uint16_t rid, uint16_t vid,
4072     uint16_t did)
4073 {
4074 	struct pci_devinfo *pf_dinfo, *vf_dinfo;
4075 	device_t pcib;
4076 	int busno, slot, func;
4077 
4078 	pf_dinfo = device_get_ivars(pf);
4079 
4080 	pcib = device_get_parent(bus);
4081 
4082 	PCIB_DECODE_RID(pcib, rid, &busno, &slot, &func);
4083 
4084 	vf_dinfo = pci_fill_devinfo(pcib, bus, pci_get_domain(pcib), busno,
4085 	    slot, func, vid, did);
4086 
4087 	vf_dinfo->cfg.flags |= PCICFG_VF;
4088 	pci_add_child(bus, vf_dinfo);
4089 
4090 	return (vf_dinfo->cfg.dev);
4091 }
4092 
4093 device_t
4094 pci_create_iov_child_method(device_t bus, device_t pf, uint16_t rid,
4095     uint16_t vid, uint16_t did)
4096 {
4097 
4098 	return (pci_add_iov_child(bus, pf, rid, vid, did));
4099 }
4100 #endif
4101 
4102 void
4103 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
4104 {
4105 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
4106 	device_set_ivars(dinfo->cfg.dev, dinfo);
4107 	resource_list_init(&dinfo->resources);
4108 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
4109 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
4110 	pci_print_verbose(dinfo);
4111 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
4112 	pci_child_added(dinfo->cfg.dev);
4113 	EVENTHANDLER_INVOKE(pci_add_device, dinfo->cfg.dev);
4114 }
4115 
4116 void
4117 pci_child_added_method(device_t dev, device_t child)
4118 {
4119 
4120 }
4121 
4122 static int
4123 pci_probe(device_t dev)
4124 {
4125 
4126 	device_set_desc(dev, "PCI bus");
4127 
4128 	/* Allow other subclasses to override this driver. */
4129 	return (BUS_PROBE_GENERIC);
4130 }
4131 
4132 int
4133 pci_attach_common(device_t dev)
4134 {
4135 	struct pci_softc *sc;
4136 	int busno, domain;
4137 #ifdef PCI_DMA_BOUNDARY
4138 	int error, tag_valid;
4139 #endif
4140 #ifdef PCI_RES_BUS
4141 	int rid;
4142 #endif
4143 
4144 	sc = device_get_softc(dev);
4145 	domain = pcib_get_domain(dev);
4146 	busno = pcib_get_bus(dev);
4147 #ifdef PCI_RES_BUS
4148 	rid = 0;
4149 	sc->sc_bus = bus_alloc_resource(dev, PCI_RES_BUS, &rid, busno, busno,
4150 	    1, 0);
4151 	if (sc->sc_bus == NULL) {
4152 		device_printf(dev, "failed to allocate bus number\n");
4153 		return (ENXIO);
4154 	}
4155 #endif
4156 	if (bootverbose)
4157 		device_printf(dev, "domain=%d, physical bus=%d\n",
4158 		    domain, busno);
4159 #ifdef PCI_DMA_BOUNDARY
4160 	tag_valid = 0;
4161 	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
4162 	    devclass_find("pci")) {
4163 		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
4164 		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
4165 		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
4166 		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
4167 		if (error)
4168 			device_printf(dev, "Failed to create DMA tag: %d\n",
4169 			    error);
4170 		else
4171 			tag_valid = 1;
4172 	}
4173 	if (!tag_valid)
4174 #endif
4175 		sc->sc_dma_tag = bus_get_dma_tag(dev);
4176 	return (0);
4177 }
4178 
4179 static int
4180 pci_attach(device_t dev)
4181 {
4182 	int busno, domain, error;
4183 
4184 	error = pci_attach_common(dev);
4185 	if (error)
4186 		return (error);
4187 
4188 	/*
4189 	 * Since there can be multiple independently numbered PCI
4190 	 * buses on systems with multiple PCI domains, we can't use
4191 	 * the unit number to decide which bus we are probing. We ask
4192 	 * the parent pcib what our domain and bus numbers are.
4193 	 */
4194 	domain = pcib_get_domain(dev);
4195 	busno = pcib_get_bus(dev);
4196 	pci_add_children(dev, domain, busno);
4197 	return (bus_generic_attach(dev));
4198 }
4199 
4200 static int
4201 pci_detach(device_t dev)
4202 {
4203 #ifdef PCI_RES_BUS
4204 	struct pci_softc *sc;
4205 #endif
4206 	int error;
4207 
4208 	error = bus_generic_detach(dev);
4209 	if (error)
4210 		return (error);
4211 #ifdef PCI_RES_BUS
4212 	sc = device_get_softc(dev);
4213 	error = bus_release_resource(dev, PCI_RES_BUS, 0, sc->sc_bus);
4214 	if (error)
4215 		return (error);
4216 #endif
4217 	return (device_delete_children(dev));
4218 }
4219 
4220 static void
4221 pci_set_power_child(device_t dev, device_t child, int state)
4222 {
4223 	device_t pcib;
4224 	int dstate;
4225 
4226 	/*
4227 	 * Set the device to the given state.  If the firmware suggests
4228 	 * a different power state, use it instead.  If power management
4229 	 * is not present, the firmware is responsible for managing
4230 	 * device power.  Skip children who aren't attached since they
4231 	 * are handled separately.
4232 	 */
4233 	pcib = device_get_parent(dev);
4234 	dstate = state;
4235 	if (device_is_attached(child) &&
4236 	    PCIB_POWER_FOR_SLEEP(pcib, child, &dstate) == 0)
4237 		pci_set_powerstate(child, dstate);
4238 }
4239 
4240 int
4241 pci_suspend_child(device_t dev, device_t child)
4242 {
4243 	struct pci_devinfo *dinfo;
4244 	int error;
4245 
4246 	dinfo = device_get_ivars(child);
4247 
4248 	/*
4249 	 * Save the PCI configuration space for the child and set the
4250 	 * device in the appropriate power state for this sleep state.
4251 	 */
4252 	pci_cfg_save(child, dinfo, 0);
4253 
4254 	/* Suspend devices before potentially powering them down. */
4255 	error = bus_generic_suspend_child(dev, child);
4256 
4257 	if (error)
4258 		return (error);
4259 
4260 	if (pci_do_power_suspend)
4261 		pci_set_power_child(dev, child, PCI_POWERSTATE_D3);
4262 
4263 	return (0);
4264 }
4265 
4266 int
4267 pci_resume_child(device_t dev, device_t child)
4268 {
4269 	struct pci_devinfo *dinfo;
4270 
4271 	if (pci_do_power_resume)
4272 		pci_set_power_child(dev, child, PCI_POWERSTATE_D0);
4273 
4274 	dinfo = device_get_ivars(child);
4275 	pci_cfg_restore(child, dinfo);
4276 	if (!device_is_attached(child))
4277 		pci_cfg_save(child, dinfo, 1);
4278 
4279 	bus_generic_resume_child(dev, child);
4280 
4281 	return (0);
4282 }
4283 
4284 int
4285 pci_resume(device_t dev)
4286 {
4287 	device_t child, *devlist;
4288 	int error, i, numdevs;
4289 
4290 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
4291 		return (error);
4292 
4293 	/*
4294 	 * Resume critical devices first, then everything else later.
4295 	 */
4296 	for (i = 0; i < numdevs; i++) {
4297 		child = devlist[i];
4298 		switch (pci_get_class(child)) {
4299 		case PCIC_DISPLAY:
4300 		case PCIC_MEMORY:
4301 		case PCIC_BRIDGE:
4302 		case PCIC_BASEPERIPH:
4303 			BUS_RESUME_CHILD(dev, child);
4304 			break;
4305 		}
4306 	}
4307 	for (i = 0; i < numdevs; i++) {
4308 		child = devlist[i];
4309 		switch (pci_get_class(child)) {
4310 		case PCIC_DISPLAY:
4311 		case PCIC_MEMORY:
4312 		case PCIC_BRIDGE:
4313 		case PCIC_BASEPERIPH:
4314 			break;
4315 		default:
4316 			BUS_RESUME_CHILD(dev, child);
4317 		}
4318 	}
4319 	free(devlist, M_TEMP);
4320 	return (0);
4321 }
4322 
4323 static void
4324 pci_load_vendor_data(void)
4325 {
4326 	caddr_t data;
4327 	void *ptr;
4328 	size_t sz;
4329 
4330 	data = preload_search_by_type("pci_vendor_data");
4331 	if (data != NULL) {
4332 		ptr = preload_fetch_addr(data);
4333 		sz = preload_fetch_size(data);
4334 		if (ptr != NULL && sz != 0) {
4335 			pci_vendordata = ptr;
4336 			pci_vendordata_size = sz;
4337 			/* terminate the database */
4338 			pci_vendordata[pci_vendordata_size] = '\n';
4339 		}
4340 	}
4341 }
4342 
4343 void
4344 pci_driver_added(device_t dev, driver_t *driver)
4345 {
4346 	int numdevs;
4347 	device_t *devlist;
4348 	device_t child;
4349 	struct pci_devinfo *dinfo;
4350 	int i;
4351 
4352 	if (bootverbose)
4353 		device_printf(dev, "driver added\n");
4354 	DEVICE_IDENTIFY(driver, dev);
4355 	if (device_get_children(dev, &devlist, &numdevs) != 0)
4356 		return;
4357 	for (i = 0; i < numdevs; i++) {
4358 		child = devlist[i];
4359 		if (device_get_state(child) != DS_NOTPRESENT)
4360 			continue;
4361 		dinfo = device_get_ivars(child);
4362 		pci_print_verbose(dinfo);
4363 		if (bootverbose)
4364 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
4365 		pci_cfg_restore(child, dinfo);
4366 		if (device_probe_and_attach(child) != 0)
4367 			pci_child_detached(dev, child);
4368 	}
4369 	free(devlist, M_TEMP);
4370 }
4371 
4372 int
4373 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
4374     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
4375 {
4376 	struct pci_devinfo *dinfo;
4377 	struct msix_table_entry *mte;
4378 	struct msix_vector *mv;
4379 	uint64_t addr;
4380 	uint32_t data;
4381 	void *cookie;
4382 	int error, rid;
4383 
4384 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
4385 	    arg, &cookie);
4386 	if (error)
4387 		return (error);
4388 
4389 	/* If this is not a direct child, just bail out. */
4390 	if (device_get_parent(child) != dev) {
4391 		*cookiep = cookie;
4392 		return(0);
4393 	}
4394 
4395 	rid = rman_get_rid(irq);
4396 	if (rid == 0) {
4397 		/* Make sure that INTx is enabled */
4398 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
4399 	} else {
4400 		/*
4401 		 * Check to see if the interrupt is MSI or MSI-X.
4402 		 * Ask our parent to map the MSI and give
4403 		 * us the address and data register values.
4404 		 * If we fail for some reason, teardown the
4405 		 * interrupt handler.
4406 		 */
4407 		dinfo = device_get_ivars(child);
4408 		if (dinfo->cfg.msi.msi_alloc > 0) {
4409 			if (dinfo->cfg.msi.msi_addr == 0) {
4410 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
4411 			    ("MSI has handlers, but vectors not mapped"));
4412 				error = PCIB_MAP_MSI(device_get_parent(dev),
4413 				    child, rman_get_start(irq), &addr, &data);
4414 				if (error)
4415 					goto bad;
4416 				dinfo->cfg.msi.msi_addr = addr;
4417 				dinfo->cfg.msi.msi_data = data;
4418 			}
4419 			if (dinfo->cfg.msi.msi_handlers == 0)
4420 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
4421 				    dinfo->cfg.msi.msi_data);
4422 			dinfo->cfg.msi.msi_handlers++;
4423 		} else {
4424 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4425 			    ("No MSI or MSI-X interrupts allocated"));
4426 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4427 			    ("MSI-X index too high"));
4428 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4429 			KASSERT(mte->mte_vector != 0, ("no message vector"));
4430 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
4431 			KASSERT(mv->mv_irq == rman_get_start(irq),
4432 			    ("IRQ mismatch"));
4433 			if (mv->mv_address == 0) {
4434 				KASSERT(mte->mte_handlers == 0,
4435 		    ("MSI-X table entry has handlers, but vector not mapped"));
4436 				error = PCIB_MAP_MSI(device_get_parent(dev),
4437 				    child, rman_get_start(irq), &addr, &data);
4438 				if (error)
4439 					goto bad;
4440 				mv->mv_address = addr;
4441 				mv->mv_data = data;
4442 			}
4443 
4444 			/*
4445 			 * The MSIX table entry must be made valid by
4446 			 * incrementing the mte_handlers before
4447 			 * calling pci_enable_msix() and
4448 			 * pci_resume_msix(). Else the MSIX rewrite
4449 			 * table quirk will not work as expected.
4450 			 */
4451 			mte->mte_handlers++;
4452 			if (mte->mte_handlers == 1) {
4453 				pci_enable_msix(child, rid - 1, mv->mv_address,
4454 				    mv->mv_data);
4455 				pci_unmask_msix(child, rid - 1);
4456 			}
4457 		}
4458 
4459 		/*
4460 		 * Make sure that INTx is disabled if we are using MSI/MSI-X,
4461 		 * unless the device is affected by PCI_QUIRK_MSI_INTX_BUG,
4462 		 * in which case we "enable" INTx so MSI/MSI-X actually works.
4463 		 */
4464 		if (!pci_has_quirk(pci_get_devid(child),
4465 		    PCI_QUIRK_MSI_INTX_BUG))
4466 			pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4467 		else
4468 			pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
4469 	bad:
4470 		if (error) {
4471 			(void)bus_generic_teardown_intr(dev, child, irq,
4472 			    cookie);
4473 			return (error);
4474 		}
4475 	}
4476 	*cookiep = cookie;
4477 	return (0);
4478 }
4479 
4480 int
4481 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
4482     void *cookie)
4483 {
4484 	struct msix_table_entry *mte;
4485 	struct resource_list_entry *rle;
4486 	struct pci_devinfo *dinfo;
4487 	int error, rid;
4488 
4489 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
4490 		return (EINVAL);
4491 
4492 	/* If this isn't a direct child, just bail out */
4493 	if (device_get_parent(child) != dev)
4494 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
4495 
4496 	rid = rman_get_rid(irq);
4497 	if (rid == 0) {
4498 		/* Mask INTx */
4499 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4500 	} else {
4501 		/*
4502 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
4503 		 * decrement the appropriate handlers count and mask the
4504 		 * MSI-X message, or disable MSI messages if the count
4505 		 * drops to 0.
4506 		 */
4507 		dinfo = device_get_ivars(child);
4508 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
4509 		if (rle->res != irq)
4510 			return (EINVAL);
4511 		if (dinfo->cfg.msi.msi_alloc > 0) {
4512 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
4513 			    ("MSI-X index too high"));
4514 			if (dinfo->cfg.msi.msi_handlers == 0)
4515 				return (EINVAL);
4516 			dinfo->cfg.msi.msi_handlers--;
4517 			if (dinfo->cfg.msi.msi_handlers == 0)
4518 				pci_disable_msi(child);
4519 		} else {
4520 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4521 			    ("No MSI or MSI-X interrupts allocated"));
4522 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4523 			    ("MSI-X index too high"));
4524 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4525 			if (mte->mte_handlers == 0)
4526 				return (EINVAL);
4527 			mte->mte_handlers--;
4528 			if (mte->mte_handlers == 0)
4529 				pci_mask_msix(child, rid - 1);
4530 		}
4531 	}
4532 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
4533 	if (rid > 0)
4534 		KASSERT(error == 0,
4535 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
4536 	return (error);
4537 }
4538 
4539 int
4540 pci_print_child(device_t dev, device_t child)
4541 {
4542 	struct pci_devinfo *dinfo;
4543 	struct resource_list *rl;
4544 	int retval = 0;
4545 
4546 	dinfo = device_get_ivars(child);
4547 	rl = &dinfo->resources;
4548 
4549 	retval += bus_print_child_header(dev, child);
4550 
4551 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#jx");
4552 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#jx");
4553 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%jd");
4554 	if (device_get_flags(dev))
4555 		retval += printf(" flags %#x", device_get_flags(dev));
4556 
4557 	retval += printf(" at device %d.%d", pci_get_slot(child),
4558 	    pci_get_function(child));
4559 
4560 	retval += bus_print_child_domain(dev, child);
4561 	retval += bus_print_child_footer(dev, child);
4562 
4563 	return (retval);
4564 }
4565 
4566 static const struct
4567 {
4568 	int		class;
4569 	int		subclass;
4570 	int		report; /* 0 = bootverbose, 1 = always */
4571 	const char	*desc;
4572 } pci_nomatch_tab[] = {
4573 	{PCIC_OLD,		-1,			1, "old"},
4574 	{PCIC_OLD,		PCIS_OLD_NONVGA,	1, "non-VGA display device"},
4575 	{PCIC_OLD,		PCIS_OLD_VGA,		1, "VGA-compatible display device"},
4576 	{PCIC_STORAGE,		-1,			1, "mass storage"},
4577 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	1, "SCSI"},
4578 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	1, "ATA"},
4579 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	1, "floppy disk"},
4580 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	1, "IPI"},
4581 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	1, "RAID"},
4582 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	1, "ATA (ADMA)"},
4583 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	1, "SATA"},
4584 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	1, "SAS"},
4585 	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	1, "NVM"},
4586 	{PCIC_NETWORK,		-1,			1, "network"},
4587 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	1, "ethernet"},
4588 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	1, "token ring"},
4589 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	1, "fddi"},
4590 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	1, "ATM"},
4591 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	1, "ISDN"},
4592 	{PCIC_DISPLAY,		-1,			1, "display"},
4593 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	1, "VGA"},
4594 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	1, "XGA"},
4595 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	1, "3D"},
4596 	{PCIC_MULTIMEDIA,	-1,			1, "multimedia"},
4597 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	1, "video"},
4598 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	1, "audio"},
4599 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	1, "telephony"},
4600 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	1, "HDA"},
4601 	{PCIC_MEMORY,		-1,			1, "memory"},
4602 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	1, "RAM"},
4603 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	1, "flash"},
4604 	{PCIC_BRIDGE,		-1,			1, "bridge"},
4605 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	1, "HOST-PCI"},
4606 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	1, "PCI-ISA"},
4607 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	1, "PCI-EISA"},
4608 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	1, "PCI-MCA"},
4609 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	1, "PCI-PCI"},
4610 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	1, "PCI-PCMCIA"},
4611 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	1, "PCI-NuBus"},
4612 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	1, "PCI-CardBus"},
4613 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	1, "PCI-RACEway"},
4614 	{PCIC_SIMPLECOMM,	-1,			1, "simple comms"},
4615 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	1, "UART"},	/* could detect 16550 */
4616 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	1, "parallel port"},
4617 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	1, "multiport serial"},
4618 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	1, "generic modem"},
4619 	{PCIC_BASEPERIPH,	-1,			0, "base peripheral"},
4620 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	1, "interrupt controller"},
4621 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	1, "DMA controller"},
4622 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	1, "timer"},
4623 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	1, "realtime clock"},
4624 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	1, "PCI hot-plug controller"},
4625 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	1, "SD host controller"},
4626 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_IOMMU,	1, "IOMMU"},
4627 	{PCIC_INPUTDEV,		-1,			1, "input device"},
4628 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	1, "keyboard"},
4629 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,1, "digitizer"},
4630 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	1, "mouse"},
4631 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	1, "scanner"},
4632 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	1, "gameport"},
4633 	{PCIC_DOCKING,		-1,			1, "docking station"},
4634 	{PCIC_PROCESSOR,	-1,			1, "processor"},
4635 	{PCIC_SERIALBUS,	-1,			1, "serial bus"},
4636 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	1, "FireWire"},
4637 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	1, "AccessBus"},
4638 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	1, "SSA"},
4639 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	1, "USB"},
4640 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	1, "Fibre Channel"},
4641 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	0, "SMBus"},
4642 	{PCIC_WIRELESS,		-1,			1, "wireless controller"},
4643 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	1, "iRDA"},
4644 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	1, "IR"},
4645 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	1, "RF"},
4646 	{PCIC_INTELLIIO,	-1,			1, "intelligent I/O controller"},
4647 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	1, "I2O"},
4648 	{PCIC_SATCOM,		-1,			1, "satellite communication"},
4649 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		1, "sat TV"},
4650 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	1, "sat audio"},
4651 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	1, "sat voice"},
4652 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	1, "sat data"},
4653 	{PCIC_CRYPTO,		-1,			1, "encrypt/decrypt"},
4654 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	1, "network/computer crypto"},
4655 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	1, "entertainment crypto"},
4656 	{PCIC_DASP,		-1,			0, "dasp"},
4657 	{PCIC_DASP,		PCIS_DASP_DPIO,		1, "DPIO module"},
4658 	{PCIC_DASP,		PCIS_DASP_PERFCNTRS,	1, "performance counters"},
4659 	{PCIC_DASP,		PCIS_DASP_COMM_SYNC,	1, "communication synchronizer"},
4660 	{PCIC_DASP,		PCIS_DASP_MGMT_CARD,	1, "signal processing management"},
4661 	{0, 0, 0,		NULL}
4662 };
4663 
4664 void
4665 pci_probe_nomatch(device_t dev, device_t child)
4666 {
4667 	int i, report;
4668 	const char *cp, *scp;
4669 	char *device;
4670 
4671 	/*
4672 	 * Look for a listing for this device in a loaded device database.
4673 	 */
4674 	report = 1;
4675 	if ((device = pci_describe_device(child)) != NULL) {
4676 		device_printf(dev, "<%s>", device);
4677 		free(device, M_DEVBUF);
4678 	} else {
4679 		/*
4680 		 * Scan the class/subclass descriptions for a general
4681 		 * description.
4682 		 */
4683 		cp = "unknown";
4684 		scp = NULL;
4685 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
4686 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
4687 				if (pci_nomatch_tab[i].subclass == -1) {
4688 					cp = pci_nomatch_tab[i].desc;
4689 					report = pci_nomatch_tab[i].report;
4690 				} else if (pci_nomatch_tab[i].subclass ==
4691 				    pci_get_subclass(child)) {
4692 					scp = pci_nomatch_tab[i].desc;
4693 					report = pci_nomatch_tab[i].report;
4694 				}
4695 			}
4696 		}
4697 		if (report || bootverbose) {
4698 			device_printf(dev, "<%s%s%s>",
4699 			    cp ? cp : "",
4700 			    ((cp != NULL) && (scp != NULL)) ? ", " : "",
4701 			    scp ? scp : "");
4702 		}
4703 	}
4704 	if (report || bootverbose) {
4705 		printf(" at device %d.%d (no driver attached)\n",
4706 		    pci_get_slot(child), pci_get_function(child));
4707 	}
4708 	pci_cfg_save(child, device_get_ivars(child), 1);
4709 }
4710 
4711 void
4712 pci_child_detached(device_t dev, device_t child)
4713 {
4714 	struct pci_devinfo *dinfo;
4715 	struct resource_list *rl;
4716 
4717 	dinfo = device_get_ivars(child);
4718 	rl = &dinfo->resources;
4719 
4720 	/*
4721 	 * Have to deallocate IRQs before releasing any MSI messages and
4722 	 * have to release MSI messages before deallocating any memory
4723 	 * BARs.
4724 	 */
4725 	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
4726 		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
4727 	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
4728 		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
4729 		(void)pci_release_msi(child);
4730 	}
4731 	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
4732 		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
4733 	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
4734 		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
4735 #ifdef PCI_RES_BUS
4736 	if (resource_list_release_active(rl, dev, child, PCI_RES_BUS) != 0)
4737 		pci_printf(&dinfo->cfg, "Device leaked PCI bus numbers\n");
4738 #endif
4739 
4740 	pci_cfg_save(child, dinfo, 1);
4741 }
4742 
4743 /*
4744  * Parse the PCI device database, if loaded, and return a pointer to a
4745  * description of the device.
4746  *
4747  * The database is flat text formatted as follows:
4748  *
4749  * Any line not in a valid format is ignored.
4750  * Lines are terminated with newline '\n' characters.
4751  *
4752  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
4753  * the vendor name.
4754  *
4755  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
4756  * - devices cannot be listed without a corresponding VENDOR line.
4757  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
4758  * another TAB, then the device name.
4759  */
4760 
4761 /*
4762  * Assuming (ptr) points to the beginning of a line in the database,
4763  * return the vendor or device and description of the next entry.
4764  * The value of (vendor) or (device) inappropriate for the entry type
4765  * is set to -1.  Returns nonzero at the end of the database.
4766  *
4767  * Note that this is slightly unrobust in the face of corrupt data;
4768  * we attempt to safeguard against this by spamming the end of the
4769  * database with a newline when we initialise.
4770  */
4771 static int
4772 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
4773 {
4774 	char	*cp = *ptr;
4775 	int	left;
4776 
4777 	*device = -1;
4778 	*vendor = -1;
4779 	**desc = '\0';
4780 	for (;;) {
4781 		left = pci_vendordata_size - (cp - pci_vendordata);
4782 		if (left <= 0) {
4783 			*ptr = cp;
4784 			return(1);
4785 		}
4786 
4787 		/* vendor entry? */
4788 		if (*cp != '\t' &&
4789 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
4790 			break;
4791 		/* device entry? */
4792 		if (*cp == '\t' &&
4793 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
4794 			break;
4795 
4796 		/* skip to next line */
4797 		while (*cp != '\n' && left > 0) {
4798 			cp++;
4799 			left--;
4800 		}
4801 		if (*cp == '\n') {
4802 			cp++;
4803 			left--;
4804 		}
4805 	}
4806 	/* skip to next line */
4807 	while (*cp != '\n' && left > 0) {
4808 		cp++;
4809 		left--;
4810 	}
4811 	if (*cp == '\n' && left > 0)
4812 		cp++;
4813 	*ptr = cp;
4814 	return(0);
4815 }
4816 
4817 static char *
4818 pci_describe_device(device_t dev)
4819 {
4820 	int	vendor, device;
4821 	char	*desc, *vp, *dp, *line;
4822 
4823 	desc = vp = dp = NULL;
4824 
4825 	/*
4826 	 * If we have no vendor data, we can't do anything.
4827 	 */
4828 	if (pci_vendordata == NULL)
4829 		goto out;
4830 
4831 	/*
4832 	 * Scan the vendor data looking for this device
4833 	 */
4834 	line = pci_vendordata;
4835 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4836 		goto out;
4837 	for (;;) {
4838 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
4839 			goto out;
4840 		if (vendor == pci_get_vendor(dev))
4841 			break;
4842 	}
4843 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4844 		goto out;
4845 	for (;;) {
4846 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
4847 			*dp = 0;
4848 			break;
4849 		}
4850 		if (vendor != -1) {
4851 			*dp = 0;
4852 			break;
4853 		}
4854 		if (device == pci_get_device(dev))
4855 			break;
4856 	}
4857 	if (dp[0] == '\0')
4858 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
4859 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
4860 	    NULL)
4861 		sprintf(desc, "%s, %s", vp, dp);
4862 out:
4863 	if (vp != NULL)
4864 		free(vp, M_DEVBUF);
4865 	if (dp != NULL)
4866 		free(dp, M_DEVBUF);
4867 	return(desc);
4868 }
4869 
4870 int
4871 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
4872 {
4873 	struct pci_devinfo *dinfo;
4874 	pcicfgregs *cfg;
4875 
4876 	dinfo = device_get_ivars(child);
4877 	cfg = &dinfo->cfg;
4878 
4879 	switch (which) {
4880 	case PCI_IVAR_ETHADDR:
4881 		/*
4882 		 * The generic accessor doesn't deal with failure, so
4883 		 * we set the return value, then return an error.
4884 		 */
4885 		*((uint8_t **) result) = NULL;
4886 		return (EINVAL);
4887 	case PCI_IVAR_SUBVENDOR:
4888 		*result = cfg->subvendor;
4889 		break;
4890 	case PCI_IVAR_SUBDEVICE:
4891 		*result = cfg->subdevice;
4892 		break;
4893 	case PCI_IVAR_VENDOR:
4894 		*result = cfg->vendor;
4895 		break;
4896 	case PCI_IVAR_DEVICE:
4897 		*result = cfg->device;
4898 		break;
4899 	case PCI_IVAR_DEVID:
4900 		*result = (cfg->device << 16) | cfg->vendor;
4901 		break;
4902 	case PCI_IVAR_CLASS:
4903 		*result = cfg->baseclass;
4904 		break;
4905 	case PCI_IVAR_SUBCLASS:
4906 		*result = cfg->subclass;
4907 		break;
4908 	case PCI_IVAR_PROGIF:
4909 		*result = cfg->progif;
4910 		break;
4911 	case PCI_IVAR_REVID:
4912 		*result = cfg->revid;
4913 		break;
4914 	case PCI_IVAR_INTPIN:
4915 		*result = cfg->intpin;
4916 		break;
4917 	case PCI_IVAR_IRQ:
4918 		*result = cfg->intline;
4919 		break;
4920 	case PCI_IVAR_DOMAIN:
4921 		*result = cfg->domain;
4922 		break;
4923 	case PCI_IVAR_BUS:
4924 		*result = cfg->bus;
4925 		break;
4926 	case PCI_IVAR_SLOT:
4927 		*result = cfg->slot;
4928 		break;
4929 	case PCI_IVAR_FUNCTION:
4930 		*result = cfg->func;
4931 		break;
4932 	case PCI_IVAR_CMDREG:
4933 		*result = cfg->cmdreg;
4934 		break;
4935 	case PCI_IVAR_CACHELNSZ:
4936 		*result = cfg->cachelnsz;
4937 		break;
4938 	case PCI_IVAR_MINGNT:
4939 		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4940 			*result = -1;
4941 			return (EINVAL);
4942 		}
4943 		*result = cfg->mingnt;
4944 		break;
4945 	case PCI_IVAR_MAXLAT:
4946 		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4947 			*result = -1;
4948 			return (EINVAL);
4949 		}
4950 		*result = cfg->maxlat;
4951 		break;
4952 	case PCI_IVAR_LATTIMER:
4953 		*result = cfg->lattimer;
4954 		break;
4955 	default:
4956 		return (ENOENT);
4957 	}
4958 	return (0);
4959 }
4960 
4961 int
4962 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4963 {
4964 	struct pci_devinfo *dinfo;
4965 
4966 	dinfo = device_get_ivars(child);
4967 
4968 	switch (which) {
4969 	case PCI_IVAR_INTPIN:
4970 		dinfo->cfg.intpin = value;
4971 		return (0);
4972 	case PCI_IVAR_ETHADDR:
4973 	case PCI_IVAR_SUBVENDOR:
4974 	case PCI_IVAR_SUBDEVICE:
4975 	case PCI_IVAR_VENDOR:
4976 	case PCI_IVAR_DEVICE:
4977 	case PCI_IVAR_DEVID:
4978 	case PCI_IVAR_CLASS:
4979 	case PCI_IVAR_SUBCLASS:
4980 	case PCI_IVAR_PROGIF:
4981 	case PCI_IVAR_REVID:
4982 	case PCI_IVAR_IRQ:
4983 	case PCI_IVAR_DOMAIN:
4984 	case PCI_IVAR_BUS:
4985 	case PCI_IVAR_SLOT:
4986 	case PCI_IVAR_FUNCTION:
4987 		return (EINVAL);	/* disallow for now */
4988 
4989 	default:
4990 		return (ENOENT);
4991 	}
4992 }
4993 
4994 #include "opt_ddb.h"
4995 #ifdef DDB
4996 #include <ddb/ddb.h>
4997 #include <sys/cons.h>
4998 
4999 /*
5000  * List resources based on pci map registers, used for within ddb
5001  */
5002 
5003 DB_SHOW_COMMAND(pciregs, db_pci_dump)
5004 {
5005 	struct pci_devinfo *dinfo;
5006 	struct devlist *devlist_head;
5007 	struct pci_conf *p;
5008 	const char *name;
5009 	int i, error, none_count;
5010 
5011 	none_count = 0;
5012 	/* get the head of the device queue */
5013 	devlist_head = &pci_devq;
5014 
5015 	/*
5016 	 * Go through the list of devices and print out devices
5017 	 */
5018 	for (error = 0, i = 0,
5019 	     dinfo = STAILQ_FIRST(devlist_head);
5020 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
5021 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
5022 
5023 		/* Populate pd_name and pd_unit */
5024 		name = NULL;
5025 		if (dinfo->cfg.dev)
5026 			name = device_get_name(dinfo->cfg.dev);
5027 
5028 		p = &dinfo->conf;
5029 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
5030 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
5031 			(name && *name) ? name : "none",
5032 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
5033 			none_count++,
5034 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
5035 			p->pc_sel.pc_func, (p->pc_class << 16) |
5036 			(p->pc_subclass << 8) | p->pc_progif,
5037 			(p->pc_subdevice << 16) | p->pc_subvendor,
5038 			(p->pc_device << 16) | p->pc_vendor,
5039 			p->pc_revid, p->pc_hdr);
5040 	}
5041 }
5042 #endif /* DDB */
5043 
5044 static struct resource *
5045 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
5046     rman_res_t start, rman_res_t end, rman_res_t count, u_int num,
5047     u_int flags)
5048 {
5049 	struct pci_devinfo *dinfo = device_get_ivars(child);
5050 	struct resource_list *rl = &dinfo->resources;
5051 	struct resource *res;
5052 	struct pci_map *pm;
5053 	uint16_t cmd;
5054 	pci_addr_t map, testval;
5055 	int mapsize;
5056 
5057 	res = NULL;
5058 
5059 	/* If rid is managed by EA, ignore it */
5060 	if (pci_ea_is_enabled(child, *rid))
5061 		goto out;
5062 
5063 	pm = pci_find_bar(child, *rid);
5064 	if (pm != NULL) {
5065 		/* This is a BAR that we failed to allocate earlier. */
5066 		mapsize = pm->pm_size;
5067 		map = pm->pm_value;
5068 	} else {
5069 		/*
5070 		 * Weed out the bogons, and figure out how large the
5071 		 * BAR/map is.  BARs that read back 0 here are bogus
5072 		 * and unimplemented.  Note: atapci in legacy mode are
5073 		 * special and handled elsewhere in the code.  If you
5074 		 * have a atapci device in legacy mode and it fails
5075 		 * here, that other code is broken.
5076 		 */
5077 		pci_read_bar(child, *rid, &map, &testval, NULL);
5078 
5079 		/*
5080 		 * Determine the size of the BAR and ignore BARs with a size
5081 		 * of 0.  Device ROM BARs use a different mask value.
5082 		 */
5083 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
5084 			mapsize = pci_romsize(testval);
5085 		else
5086 			mapsize = pci_mapsize(testval);
5087 		if (mapsize == 0)
5088 			goto out;
5089 		pm = pci_add_bar(child, *rid, map, mapsize);
5090 	}
5091 
5092 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
5093 		if (type != SYS_RES_MEMORY) {
5094 			if (bootverbose)
5095 				device_printf(dev,
5096 				    "child %s requested type %d for rid %#x,"
5097 				    " but the BAR says it is an memio\n",
5098 				    device_get_nameunit(child), type, *rid);
5099 			goto out;
5100 		}
5101 	} else {
5102 		if (type != SYS_RES_IOPORT) {
5103 			if (bootverbose)
5104 				device_printf(dev,
5105 				    "child %s requested type %d for rid %#x,"
5106 				    " but the BAR says it is an ioport\n",
5107 				    device_get_nameunit(child), type, *rid);
5108 			goto out;
5109 		}
5110 	}
5111 
5112 	/*
5113 	 * For real BARs, we need to override the size that
5114 	 * the driver requests, because that's what the BAR
5115 	 * actually uses and we would otherwise have a
5116 	 * situation where we might allocate the excess to
5117 	 * another driver, which won't work.
5118 	 */
5119 	count = ((pci_addr_t)1 << mapsize) * num;
5120 	if (RF_ALIGNMENT(flags) < mapsize)
5121 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
5122 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
5123 		flags |= RF_PREFETCHABLE;
5124 
5125 	/*
5126 	 * Allocate enough resource, and then write back the
5127 	 * appropriate BAR for that resource.
5128 	 */
5129 	resource_list_add(rl, type, *rid, start, end, count);
5130 	res = resource_list_reserve(rl, dev, child, type, rid, start, end,
5131 	    count, flags & ~RF_ACTIVE);
5132 	if (res == NULL) {
5133 		resource_list_delete(rl, type, *rid);
5134 		device_printf(child,
5135 		    "%#jx bytes of rid %#x res %d failed (%#jx, %#jx).\n",
5136 		    count, *rid, type, start, end);
5137 		goto out;
5138 	}
5139 	if (bootverbose)
5140 		device_printf(child,
5141 		    "Lazy allocation of %#jx bytes rid %#x type %d at %#jx\n",
5142 		    count, *rid, type, rman_get_start(res));
5143 
5144 	/* Disable decoding via the CMD register before updating the BAR */
5145 	cmd = pci_read_config(child, PCIR_COMMAND, 2);
5146 	pci_write_config(child, PCIR_COMMAND,
5147 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
5148 
5149 	map = rman_get_start(res);
5150 	pci_write_bar(child, pm, map);
5151 
5152 	/* Restore the original value of the CMD register */
5153 	pci_write_config(child, PCIR_COMMAND, cmd, 2);
5154 out:
5155 	return (res);
5156 }
5157 
5158 struct resource *
5159 pci_alloc_multi_resource(device_t dev, device_t child, int type, int *rid,
5160     rman_res_t start, rman_res_t end, rman_res_t count, u_long num,
5161     u_int flags)
5162 {
5163 	struct pci_devinfo *dinfo;
5164 	struct resource_list *rl;
5165 	struct resource_list_entry *rle;
5166 	struct resource *res;
5167 	pcicfgregs *cfg;
5168 
5169 	/*
5170 	 * Perform lazy resource allocation
5171 	 */
5172 	dinfo = device_get_ivars(child);
5173 	rl = &dinfo->resources;
5174 	cfg = &dinfo->cfg;
5175 	switch (type) {
5176 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
5177 	case PCI_RES_BUS:
5178 		return (pci_alloc_secbus(dev, child, rid, start, end, count,
5179 		    flags));
5180 #endif
5181 	case SYS_RES_IRQ:
5182 		/*
5183 		 * Can't alloc legacy interrupt once MSI messages have
5184 		 * been allocated.
5185 		 */
5186 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
5187 		    cfg->msix.msix_alloc > 0))
5188 			return (NULL);
5189 
5190 		/*
5191 		 * If the child device doesn't have an interrupt
5192 		 * routed and is deserving of an interrupt, try to
5193 		 * assign it one.
5194 		 */
5195 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
5196 		    (cfg->intpin != 0))
5197 			pci_assign_interrupt(dev, child, 0);
5198 		break;
5199 	case SYS_RES_IOPORT:
5200 	case SYS_RES_MEMORY:
5201 #ifdef NEW_PCIB
5202 		/*
5203 		 * PCI-PCI bridge I/O window resources are not BARs.
5204 		 * For those allocations just pass the request up the
5205 		 * tree.
5206 		 */
5207 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
5208 			switch (*rid) {
5209 			case PCIR_IOBASEL_1:
5210 			case PCIR_MEMBASE_1:
5211 			case PCIR_PMBASEL_1:
5212 				/*
5213 				 * XXX: Should we bother creating a resource
5214 				 * list entry?
5215 				 */
5216 				return (bus_generic_alloc_resource(dev, child,
5217 				    type, rid, start, end, count, flags));
5218 			}
5219 		}
5220 #endif
5221 		/* Reserve resources for this BAR if needed. */
5222 		rle = resource_list_find(rl, type, *rid);
5223 		if (rle == NULL) {
5224 			res = pci_reserve_map(dev, child, type, rid, start, end,
5225 			    count, num, flags);
5226 			if (res == NULL)
5227 				return (NULL);
5228 		}
5229 	}
5230 	return (resource_list_alloc(rl, dev, child, type, rid,
5231 	    start, end, count, flags));
5232 }
5233 
5234 struct resource *
5235 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
5236     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
5237 {
5238 #ifdef PCI_IOV
5239 	struct pci_devinfo *dinfo;
5240 #endif
5241 
5242 	if (device_get_parent(child) != dev)
5243 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
5244 		    type, rid, start, end, count, flags));
5245 
5246 #ifdef PCI_IOV
5247 	dinfo = device_get_ivars(child);
5248 	if (dinfo->cfg.flags & PCICFG_VF) {
5249 		switch (type) {
5250 		/* VFs can't have I/O BARs. */
5251 		case SYS_RES_IOPORT:
5252 			return (NULL);
5253 		case SYS_RES_MEMORY:
5254 			return (pci_vf_alloc_mem_resource(dev, child, rid,
5255 			    start, end, count, flags));
5256 		}
5257 
5258 		/* Fall through for other types of resource allocations. */
5259 	}
5260 #endif
5261 
5262 	return (pci_alloc_multi_resource(dev, child, type, rid, start, end,
5263 	    count, 1, flags));
5264 }
5265 
5266 int
5267 pci_release_resource(device_t dev, device_t child, int type, int rid,
5268     struct resource *r)
5269 {
5270 	struct pci_devinfo *dinfo;
5271 	struct resource_list *rl;
5272 	pcicfgregs *cfg;
5273 
5274 	if (device_get_parent(child) != dev)
5275 		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
5276 		    type, rid, r));
5277 
5278 	dinfo = device_get_ivars(child);
5279 	cfg = &dinfo->cfg;
5280 
5281 #ifdef PCI_IOV
5282 	if (dinfo->cfg.flags & PCICFG_VF) {
5283 		switch (type) {
5284 		/* VFs can't have I/O BARs. */
5285 		case SYS_RES_IOPORT:
5286 			return (EDOOFUS);
5287 		case SYS_RES_MEMORY:
5288 			return (pci_vf_release_mem_resource(dev, child, rid,
5289 			    r));
5290 		}
5291 
5292 		/* Fall through for other types of resource allocations. */
5293 	}
5294 #endif
5295 
5296 #ifdef NEW_PCIB
5297 	/*
5298 	 * PCI-PCI bridge I/O window resources are not BARs.  For
5299 	 * those allocations just pass the request up the tree.
5300 	 */
5301 	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
5302 	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
5303 		switch (rid) {
5304 		case PCIR_IOBASEL_1:
5305 		case PCIR_MEMBASE_1:
5306 		case PCIR_PMBASEL_1:
5307 			return (bus_generic_release_resource(dev, child, type,
5308 			    rid, r));
5309 		}
5310 	}
5311 #endif
5312 
5313 	rl = &dinfo->resources;
5314 	return (resource_list_release(rl, dev, child, type, rid, r));
5315 }
5316 
5317 int
5318 pci_activate_resource(device_t dev, device_t child, int type, int rid,
5319     struct resource *r)
5320 {
5321 	struct pci_devinfo *dinfo;
5322 	int error;
5323 
5324 	error = bus_generic_activate_resource(dev, child, type, rid, r);
5325 	if (error)
5326 		return (error);
5327 
5328 	/* Enable decoding in the command register when activating BARs. */
5329 	if (device_get_parent(child) == dev) {
5330 		/* Device ROMs need their decoding explicitly enabled. */
5331 		dinfo = device_get_ivars(child);
5332 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
5333 			pci_write_bar(child, pci_find_bar(child, rid),
5334 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
5335 		switch (type) {
5336 		case SYS_RES_IOPORT:
5337 		case SYS_RES_MEMORY:
5338 			error = PCI_ENABLE_IO(dev, child, type);
5339 			break;
5340 		}
5341 	}
5342 	return (error);
5343 }
5344 
5345 int
5346 pci_deactivate_resource(device_t dev, device_t child, int type,
5347     int rid, struct resource *r)
5348 {
5349 	struct pci_devinfo *dinfo;
5350 	int error;
5351 
5352 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
5353 	if (error)
5354 		return (error);
5355 
5356 	/* Disable decoding for device ROMs. */
5357 	if (device_get_parent(child) == dev) {
5358 		dinfo = device_get_ivars(child);
5359 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
5360 			pci_write_bar(child, pci_find_bar(child, rid),
5361 			    rman_get_start(r));
5362 	}
5363 	return (0);
5364 }
5365 
5366 void
5367 pci_child_deleted(device_t dev, device_t child)
5368 {
5369 	struct resource_list_entry *rle;
5370 	struct resource_list *rl;
5371 	struct pci_devinfo *dinfo;
5372 
5373 	dinfo = device_get_ivars(child);
5374 	rl = &dinfo->resources;
5375 
5376 	EVENTHANDLER_INVOKE(pci_delete_device, child);
5377 
5378 	/* Turn off access to resources we're about to free */
5379 	if (bus_child_present(child) != 0) {
5380 		pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
5381 		    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
5382 
5383 		pci_disable_busmaster(child);
5384 	}
5385 
5386 	/* Free all allocated resources */
5387 	STAILQ_FOREACH(rle, rl, link) {
5388 		if (rle->res) {
5389 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
5390 			    resource_list_busy(rl, rle->type, rle->rid)) {
5391 				pci_printf(&dinfo->cfg,
5392 				    "Resource still owned, oops. "
5393 				    "(type=%d, rid=%d, addr=%lx)\n",
5394 				    rle->type, rle->rid,
5395 				    rman_get_start(rle->res));
5396 				bus_release_resource(child, rle->type, rle->rid,
5397 				    rle->res);
5398 			}
5399 			resource_list_unreserve(rl, dev, child, rle->type,
5400 			    rle->rid);
5401 		}
5402 	}
5403 	resource_list_free(rl);
5404 
5405 	pci_freecfg(dinfo);
5406 }
5407 
5408 void
5409 pci_delete_resource(device_t dev, device_t child, int type, int rid)
5410 {
5411 	struct pci_devinfo *dinfo;
5412 	struct resource_list *rl;
5413 	struct resource_list_entry *rle;
5414 
5415 	if (device_get_parent(child) != dev)
5416 		return;
5417 
5418 	dinfo = device_get_ivars(child);
5419 	rl = &dinfo->resources;
5420 	rle = resource_list_find(rl, type, rid);
5421 	if (rle == NULL)
5422 		return;
5423 
5424 	if (rle->res) {
5425 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
5426 		    resource_list_busy(rl, type, rid)) {
5427 			device_printf(dev, "delete_resource: "
5428 			    "Resource still owned by child, oops. "
5429 			    "(type=%d, rid=%d, addr=%jx)\n",
5430 			    type, rid, rman_get_start(rle->res));
5431 			return;
5432 		}
5433 		resource_list_unreserve(rl, dev, child, type, rid);
5434 	}
5435 	resource_list_delete(rl, type, rid);
5436 }
5437 
5438 struct resource_list *
5439 pci_get_resource_list (device_t dev, device_t child)
5440 {
5441 	struct pci_devinfo *dinfo = device_get_ivars(child);
5442 
5443 	return (&dinfo->resources);
5444 }
5445 
5446 bus_dma_tag_t
5447 pci_get_dma_tag(device_t bus, device_t dev)
5448 {
5449 	struct pci_softc *sc = device_get_softc(bus);
5450 
5451 	return (sc->sc_dma_tag);
5452 }
5453 
5454 uint32_t
5455 pci_read_config_method(device_t dev, device_t child, int reg, int width)
5456 {
5457 	struct pci_devinfo *dinfo = device_get_ivars(child);
5458 	pcicfgregs *cfg = &dinfo->cfg;
5459 
5460 #ifdef PCI_IOV
5461 	/*
5462 	 * SR-IOV VFs don't implement the VID or DID registers, so we have to
5463 	 * emulate them here.
5464 	 */
5465 	if (cfg->flags & PCICFG_VF) {
5466 		if (reg == PCIR_VENDOR) {
5467 			switch (width) {
5468 			case 4:
5469 				return (cfg->device << 16 | cfg->vendor);
5470 			case 2:
5471 				return (cfg->vendor);
5472 			case 1:
5473 				return (cfg->vendor & 0xff);
5474 			default:
5475 				return (0xffffffff);
5476 			}
5477 		} else if (reg == PCIR_DEVICE) {
5478 			switch (width) {
5479 			/* Note that an unaligned 4-byte read is an error. */
5480 			case 2:
5481 				return (cfg->device);
5482 			case 1:
5483 				return (cfg->device & 0xff);
5484 			default:
5485 				return (0xffffffff);
5486 			}
5487 		}
5488 	}
5489 #endif
5490 
5491 	return (PCIB_READ_CONFIG(device_get_parent(dev),
5492 	    cfg->bus, cfg->slot, cfg->func, reg, width));
5493 }
5494 
5495 void
5496 pci_write_config_method(device_t dev, device_t child, int reg,
5497     uint32_t val, int width)
5498 {
5499 	struct pci_devinfo *dinfo = device_get_ivars(child);
5500 	pcicfgregs *cfg = &dinfo->cfg;
5501 
5502 	PCIB_WRITE_CONFIG(device_get_parent(dev),
5503 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
5504 }
5505 
5506 int
5507 pci_child_location_str_method(device_t dev, device_t child, char *buf,
5508     size_t buflen)
5509 {
5510 
5511 	snprintf(buf, buflen, "slot=%d function=%d dbsf=pci%d:%d:%d:%d",
5512 	    pci_get_slot(child), pci_get_function(child), pci_get_domain(child),
5513 	    pci_get_bus(child), pci_get_slot(child), pci_get_function(child));
5514 	return (0);
5515 }
5516 
5517 int
5518 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
5519     size_t buflen)
5520 {
5521 	struct pci_devinfo *dinfo;
5522 	pcicfgregs *cfg;
5523 
5524 	dinfo = device_get_ivars(child);
5525 	cfg = &dinfo->cfg;
5526 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
5527 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
5528 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
5529 	    cfg->progif);
5530 	return (0);
5531 }
5532 
5533 int
5534 pci_assign_interrupt_method(device_t dev, device_t child)
5535 {
5536 	struct pci_devinfo *dinfo = device_get_ivars(child);
5537 	pcicfgregs *cfg = &dinfo->cfg;
5538 
5539 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
5540 	    cfg->intpin));
5541 }
5542 
5543 static void
5544 pci_lookup(void *arg, const char *name, device_t *dev)
5545 {
5546 	long val;
5547 	char *end;
5548 	int domain, bus, slot, func;
5549 
5550 	if (*dev != NULL)
5551 		return;
5552 
5553 	/*
5554 	 * Accept pciconf-style selectors of either pciD:B:S:F or
5555 	 * pciB:S:F.  In the latter case, the domain is assumed to
5556 	 * be zero.
5557 	 */
5558 	if (strncmp(name, "pci", 3) != 0)
5559 		return;
5560 	val = strtol(name + 3, &end, 10);
5561 	if (val < 0 || val > INT_MAX || *end != ':')
5562 		return;
5563 	domain = val;
5564 	val = strtol(end + 1, &end, 10);
5565 	if (val < 0 || val > INT_MAX || *end != ':')
5566 		return;
5567 	bus = val;
5568 	val = strtol(end + 1, &end, 10);
5569 	if (val < 0 || val > INT_MAX)
5570 		return;
5571 	slot = val;
5572 	if (*end == ':') {
5573 		val = strtol(end + 1, &end, 10);
5574 		if (val < 0 || val > INT_MAX || *end != '\0')
5575 			return;
5576 		func = val;
5577 	} else if (*end == '\0') {
5578 		func = slot;
5579 		slot = bus;
5580 		bus = domain;
5581 		domain = 0;
5582 	} else
5583 		return;
5584 
5585 	if (domain > PCI_DOMAINMAX || bus > PCI_BUSMAX || slot > PCI_SLOTMAX ||
5586 	    func > PCIE_ARI_FUNCMAX || (slot != 0 && func > PCI_FUNCMAX))
5587 		return;
5588 
5589 	*dev = pci_find_dbsf(domain, bus, slot, func);
5590 }
5591 
5592 static int
5593 pci_modevent(module_t mod, int what, void *arg)
5594 {
5595 	static struct cdev *pci_cdev;
5596 	static eventhandler_tag tag;
5597 
5598 	switch (what) {
5599 	case MOD_LOAD:
5600 		STAILQ_INIT(&pci_devq);
5601 		pci_generation = 0;
5602 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
5603 		    "pci");
5604 		pci_load_vendor_data();
5605 		tag = EVENTHANDLER_REGISTER(dev_lookup, pci_lookup, NULL,
5606 		    1000);
5607 		break;
5608 
5609 	case MOD_UNLOAD:
5610 		if (tag != NULL)
5611 			EVENTHANDLER_DEREGISTER(dev_lookup, tag);
5612 		destroy_dev(pci_cdev);
5613 		break;
5614 	}
5615 
5616 	return (0);
5617 }
5618 
5619 static void
5620 pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
5621 {
5622 #define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
5623 	struct pcicfg_pcie *cfg;
5624 	int version, pos;
5625 
5626 	cfg = &dinfo->cfg.pcie;
5627 	pos = cfg->pcie_location;
5628 
5629 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5630 
5631 	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
5632 
5633 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5634 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5635 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5636 		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
5637 
5638 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5639 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5640 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5641 		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
5642 
5643 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5644 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5645 		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
5646 
5647 	if (version > 1) {
5648 		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
5649 		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
5650 		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
5651 	}
5652 #undef WREG
5653 }
5654 
5655 static void
5656 pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
5657 {
5658 	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
5659 	    dinfo->cfg.pcix.pcix_command,  2);
5660 }
5661 
5662 void
5663 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
5664 {
5665 
5666 	/*
5667 	 * Restore the device to full power mode.  We must do this
5668 	 * before we restore the registers because moving from D3 to
5669 	 * D0 will cause the chip's BARs and some other registers to
5670 	 * be reset to some unknown power on reset values.  Cut down
5671 	 * the noise on boot by doing nothing if we are already in
5672 	 * state D0.
5673 	 */
5674 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
5675 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5676 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
5677 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
5678 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
5679 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
5680 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
5681 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
5682 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
5683 	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5684 	case PCIM_HDRTYPE_NORMAL:
5685 		pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
5686 		pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
5687 		break;
5688 	case PCIM_HDRTYPE_BRIDGE:
5689 		pci_write_config(dev, PCIR_SECLAT_1,
5690 		    dinfo->cfg.bridge.br_seclat, 1);
5691 		pci_write_config(dev, PCIR_SUBBUS_1,
5692 		    dinfo->cfg.bridge.br_subbus, 1);
5693 		pci_write_config(dev, PCIR_SECBUS_1,
5694 		    dinfo->cfg.bridge.br_secbus, 1);
5695 		pci_write_config(dev, PCIR_PRIBUS_1,
5696 		    dinfo->cfg.bridge.br_pribus, 1);
5697 		pci_write_config(dev, PCIR_BRIDGECTL_1,
5698 		    dinfo->cfg.bridge.br_control, 2);
5699 		break;
5700 	case PCIM_HDRTYPE_CARDBUS:
5701 		pci_write_config(dev, PCIR_SECLAT_2,
5702 		    dinfo->cfg.bridge.br_seclat, 1);
5703 		pci_write_config(dev, PCIR_SUBBUS_2,
5704 		    dinfo->cfg.bridge.br_subbus, 1);
5705 		pci_write_config(dev, PCIR_SECBUS_2,
5706 		    dinfo->cfg.bridge.br_secbus, 1);
5707 		pci_write_config(dev, PCIR_PRIBUS_2,
5708 		    dinfo->cfg.bridge.br_pribus, 1);
5709 		pci_write_config(dev, PCIR_BRIDGECTL_2,
5710 		    dinfo->cfg.bridge.br_control, 2);
5711 		break;
5712 	}
5713 	pci_restore_bars(dev);
5714 
5715 	/*
5716 	 * Restore extended capabilities for PCI-Express and PCI-X
5717 	 */
5718 	if (dinfo->cfg.pcie.pcie_location != 0)
5719 		pci_cfg_restore_pcie(dev, dinfo);
5720 	if (dinfo->cfg.pcix.pcix_location != 0)
5721 		pci_cfg_restore_pcix(dev, dinfo);
5722 
5723 	/* Restore MSI and MSI-X configurations if they are present. */
5724 	if (dinfo->cfg.msi.msi_location != 0)
5725 		pci_resume_msi(dev);
5726 	if (dinfo->cfg.msix.msix_location != 0)
5727 		pci_resume_msix(dev);
5728 
5729 #ifdef PCI_IOV
5730 	if (dinfo->cfg.iov != NULL)
5731 		pci_iov_cfg_restore(dev, dinfo);
5732 #endif
5733 }
5734 
5735 static void
5736 pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
5737 {
5738 #define	RREG(n)	pci_read_config(dev, pos + (n), 2)
5739 	struct pcicfg_pcie *cfg;
5740 	int version, pos;
5741 
5742 	cfg = &dinfo->cfg.pcie;
5743 	pos = cfg->pcie_location;
5744 
5745 	cfg->pcie_flags = RREG(PCIER_FLAGS);
5746 
5747 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5748 
5749 	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
5750 
5751 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5752 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5753 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5754 		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
5755 
5756 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5757 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5758 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5759 		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
5760 
5761 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5762 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5763 		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
5764 
5765 	if (version > 1) {
5766 		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
5767 		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
5768 		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
5769 	}
5770 #undef RREG
5771 }
5772 
5773 static void
5774 pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
5775 {
5776 	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
5777 	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
5778 }
5779 
5780 void
5781 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
5782 {
5783 	uint32_t cls;
5784 	int ps;
5785 
5786 	/*
5787 	 * Some drivers apparently write to these registers w/o updating our
5788 	 * cached copy.  No harm happens if we update the copy, so do so here
5789 	 * so we can restore them.  The COMMAND register is modified by the
5790 	 * bus w/o updating the cache.  This should represent the normally
5791 	 * writable portion of the 'defined' part of type 0/1/2 headers.
5792 	 */
5793 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
5794 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
5795 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
5796 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
5797 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
5798 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
5799 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
5800 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
5801 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
5802 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
5803 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
5804 	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5805 	case PCIM_HDRTYPE_NORMAL:
5806 		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
5807 		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
5808 		dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
5809 		dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
5810 		break;
5811 	case PCIM_HDRTYPE_BRIDGE:
5812 		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
5813 		    PCIR_SECLAT_1, 1);
5814 		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
5815 		    PCIR_SUBBUS_1, 1);
5816 		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
5817 		    PCIR_SECBUS_1, 1);
5818 		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
5819 		    PCIR_PRIBUS_1, 1);
5820 		dinfo->cfg.bridge.br_control = pci_read_config(dev,
5821 		    PCIR_BRIDGECTL_1, 2);
5822 		break;
5823 	case PCIM_HDRTYPE_CARDBUS:
5824 		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
5825 		    PCIR_SECLAT_2, 1);
5826 		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
5827 		    PCIR_SUBBUS_2, 1);
5828 		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
5829 		    PCIR_SECBUS_2, 1);
5830 		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
5831 		    PCIR_PRIBUS_2, 1);
5832 		dinfo->cfg.bridge.br_control = pci_read_config(dev,
5833 		    PCIR_BRIDGECTL_2, 2);
5834 		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_2, 2);
5835 		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_2, 2);
5836 		break;
5837 	}
5838 
5839 	if (dinfo->cfg.pcie.pcie_location != 0)
5840 		pci_cfg_save_pcie(dev, dinfo);
5841 
5842 	if (dinfo->cfg.pcix.pcix_location != 0)
5843 		pci_cfg_save_pcix(dev, dinfo);
5844 
5845 #ifdef PCI_IOV
5846 	if (dinfo->cfg.iov != NULL)
5847 		pci_iov_cfg_save(dev, dinfo);
5848 #endif
5849 
5850 	/*
5851 	 * don't set the state for display devices, base peripherals and
5852 	 * memory devices since bad things happen when they are powered down.
5853 	 * We should (a) have drivers that can easily detach and (b) use
5854 	 * generic drivers for these devices so that some device actually
5855 	 * attaches.  We need to make sure that when we implement (a) we don't
5856 	 * power the device down on a reattach.
5857 	 */
5858 	cls = pci_get_class(dev);
5859 	if (!setstate)
5860 		return;
5861 	switch (pci_do_power_nodriver)
5862 	{
5863 		case 0:		/* NO powerdown at all */
5864 			return;
5865 		case 1:		/* Conservative about what to power down */
5866 			if (cls == PCIC_STORAGE)
5867 				return;
5868 			/*FALLTHROUGH*/
5869 		case 2:		/* Aggressive about what to power down */
5870 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
5871 			    cls == PCIC_BASEPERIPH)
5872 				return;
5873 			/*FALLTHROUGH*/
5874 		case 3:		/* Power down everything */
5875 			break;
5876 	}
5877 	/*
5878 	 * PCI spec says we can only go into D3 state from D0 state.
5879 	 * Transition from D[12] into D0 before going to D3 state.
5880 	 */
5881 	ps = pci_get_powerstate(dev);
5882 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
5883 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5884 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
5885 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
5886 }
5887 
5888 /* Wrapper APIs suitable for device driver use. */
5889 void
5890 pci_save_state(device_t dev)
5891 {
5892 	struct pci_devinfo *dinfo;
5893 
5894 	dinfo = device_get_ivars(dev);
5895 	pci_cfg_save(dev, dinfo, 0);
5896 }
5897 
5898 void
5899 pci_restore_state(device_t dev)
5900 {
5901 	struct pci_devinfo *dinfo;
5902 
5903 	dinfo = device_get_ivars(dev);
5904 	pci_cfg_restore(dev, dinfo);
5905 }
5906 
5907 static int
5908 pci_get_id_method(device_t dev, device_t child, enum pci_id_type type,
5909     uintptr_t *id)
5910 {
5911 
5912 	return (PCIB_GET_ID(device_get_parent(dev), child, type, id));
5913 }
5914 
5915 /* Find the upstream port of a given PCI device in a root complex. */
5916 device_t
5917 pci_find_pcie_root_port(device_t dev)
5918 {
5919 	struct pci_devinfo *dinfo;
5920 	devclass_t pci_class;
5921 	device_t pcib, bus;
5922 
5923 	pci_class = devclass_find("pci");
5924 	KASSERT(device_get_devclass(device_get_parent(dev)) == pci_class,
5925 	    ("%s: non-pci device %s", __func__, device_get_nameunit(dev)));
5926 
5927 	/*
5928 	 * Walk the bridge hierarchy until we find a PCI-e root
5929 	 * port or a non-PCI device.
5930 	 */
5931 	for (;;) {
5932 		bus = device_get_parent(dev);
5933 		KASSERT(bus != NULL, ("%s: null parent of %s", __func__,
5934 		    device_get_nameunit(dev)));
5935 
5936 		pcib = device_get_parent(bus);
5937 		KASSERT(pcib != NULL, ("%s: null bridge of %s", __func__,
5938 		    device_get_nameunit(bus)));
5939 
5940 		/*
5941 		 * pcib's parent must be a PCI bus for this to be a
5942 		 * PCI-PCI bridge.
5943 		 */
5944 		if (device_get_devclass(device_get_parent(pcib)) != pci_class)
5945 			return (NULL);
5946 
5947 		dinfo = device_get_ivars(pcib);
5948 		if (dinfo->cfg.pcie.pcie_location != 0 &&
5949 		    dinfo->cfg.pcie.pcie_type == PCIEM_TYPE_ROOT_PORT)
5950 			return (pcib);
5951 
5952 		dev = pcib;
5953 	}
5954 }
5955 
5956 /*
5957  * Wait for pending transactions to complete on a PCI-express function.
5958  *
5959  * The maximum delay is specified in milliseconds in max_delay.  Note
5960  * that this function may sleep.
5961  *
5962  * Returns true if the function is idle and false if the timeout is
5963  * exceeded.  If dev is not a PCI-express function, this returns true.
5964  */
5965 bool
5966 pcie_wait_for_pending_transactions(device_t dev, u_int max_delay)
5967 {
5968 	struct pci_devinfo *dinfo = device_get_ivars(dev);
5969 	uint16_t sta;
5970 	int cap;
5971 
5972 	cap = dinfo->cfg.pcie.pcie_location;
5973 	if (cap == 0)
5974 		return (true);
5975 
5976 	sta = pci_read_config(dev, cap + PCIER_DEVICE_STA, 2);
5977 	while (sta & PCIEM_STA_TRANSACTION_PND) {
5978 		if (max_delay == 0)
5979 			return (false);
5980 
5981 		/* Poll once every 100 milliseconds up to the timeout. */
5982 		if (max_delay > 100) {
5983 			pause_sbt("pcietp", 100 * SBT_1MS, 0, C_HARDCLOCK);
5984 			max_delay -= 100;
5985 		} else {
5986 			pause_sbt("pcietp", max_delay * SBT_1MS, 0,
5987 			    C_HARDCLOCK);
5988 			max_delay = 0;
5989 		}
5990 		sta = pci_read_config(dev, cap + PCIER_DEVICE_STA, 2);
5991 	}
5992 
5993 	return (true);
5994 }
5995 
5996 /*
5997  * Determine the maximum Completion Timeout in microseconds.
5998  *
5999  * For non-PCI-express functions this returns 0.
6000  */
6001 int
6002 pcie_get_max_completion_timeout(device_t dev)
6003 {
6004 	struct pci_devinfo *dinfo = device_get_ivars(dev);
6005 	int cap;
6006 
6007 	cap = dinfo->cfg.pcie.pcie_location;
6008 	if (cap == 0)
6009 		return (0);
6010 
6011 	/*
6012 	 * Functions using the 1.x spec use the default timeout range of
6013 	 * 50 microseconds to 50 milliseconds.  Functions that do not
6014 	 * support programmable timeouts also use this range.
6015 	 */
6016 	if ((dinfo->cfg.pcie.pcie_flags & PCIEM_FLAGS_VERSION) < 2 ||
6017 	    (pci_read_config(dev, cap + PCIER_DEVICE_CAP2, 4) &
6018 	    PCIEM_CAP2_COMP_TIMO_RANGES) == 0)
6019 		return (50 * 1000);
6020 
6021 	switch (pci_read_config(dev, cap + PCIER_DEVICE_CTL2, 2) &
6022 	    PCIEM_CTL2_COMP_TIMO_VAL) {
6023 	case PCIEM_CTL2_COMP_TIMO_100US:
6024 		return (100);
6025 	case PCIEM_CTL2_COMP_TIMO_10MS:
6026 		return (10 * 1000);
6027 	case PCIEM_CTL2_COMP_TIMO_55MS:
6028 		return (55 * 1000);
6029 	case PCIEM_CTL2_COMP_TIMO_210MS:
6030 		return (210 * 1000);
6031 	case PCIEM_CTL2_COMP_TIMO_900MS:
6032 		return (900 * 1000);
6033 	case PCIEM_CTL2_COMP_TIMO_3500MS:
6034 		return (3500 * 1000);
6035 	case PCIEM_CTL2_COMP_TIMO_13S:
6036 		return (13 * 1000 * 1000);
6037 	case PCIEM_CTL2_COMP_TIMO_64S:
6038 		return (64 * 1000 * 1000);
6039 	default:
6040 		return (50 * 1000);
6041 	}
6042 }
6043 
6044 /*
6045  * Perform a Function Level Reset (FLR) on a device.
6046  *
6047  * This function first waits for any pending transactions to complete
6048  * within the timeout specified by max_delay.  If transactions are
6049  * still pending, the function will return false without attempting a
6050  * reset.
6051  *
6052  * If dev is not a PCI-express function or does not support FLR, this
6053  * function returns false.
6054  *
6055  * Note that no registers are saved or restored.  The caller is
6056  * responsible for saving and restoring any registers including
6057  * PCI-standard registers via pci_save_state() and
6058  * pci_restore_state().
6059  */
6060 bool
6061 pcie_flr(device_t dev, u_int max_delay, bool force)
6062 {
6063 	struct pci_devinfo *dinfo = device_get_ivars(dev);
6064 	uint16_t cmd, ctl;
6065 	int compl_delay;
6066 	int cap;
6067 
6068 	cap = dinfo->cfg.pcie.pcie_location;
6069 	if (cap == 0)
6070 		return (false);
6071 
6072 	if (!(pci_read_config(dev, cap + PCIER_DEVICE_CAP, 4) & PCIEM_CAP_FLR))
6073 		return (false);
6074 
6075 	/*
6076 	 * Disable busmastering to prevent generation of new
6077 	 * transactions while waiting for the device to go idle.  If
6078 	 * the idle timeout fails, the command register is restored
6079 	 * which will re-enable busmastering.
6080 	 */
6081 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
6082 	pci_write_config(dev, PCIR_COMMAND, cmd & ~(PCIM_CMD_BUSMASTEREN), 2);
6083 	if (!pcie_wait_for_pending_transactions(dev, max_delay)) {
6084 		if (!force) {
6085 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
6086 			return (false);
6087 		}
6088 		pci_printf(&dinfo->cfg,
6089 		    "Resetting with transactions pending after %d ms\n",
6090 		    max_delay);
6091 
6092 		/*
6093 		 * Extend the post-FLR delay to cover the maximum
6094 		 * Completion Timeout delay of anything in flight
6095 		 * during the FLR delay.  Enforce a minimum delay of
6096 		 * at least 10ms.
6097 		 */
6098 		compl_delay = pcie_get_max_completion_timeout(dev) / 1000;
6099 		if (compl_delay < 10)
6100 			compl_delay = 10;
6101 	} else
6102 		compl_delay = 0;
6103 
6104 	/* Initiate the reset. */
6105 	ctl = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
6106 	pci_write_config(dev, cap + PCIER_DEVICE_CTL, ctl |
6107 	    PCIEM_CTL_INITIATE_FLR, 2);
6108 
6109 	/* Wait for 100ms. */
6110 	pause_sbt("pcieflr", (100 + compl_delay) * SBT_1MS, 0, C_HARDCLOCK);
6111 
6112 	if (pci_read_config(dev, cap + PCIER_DEVICE_STA, 2) &
6113 	    PCIEM_STA_TRANSACTION_PND)
6114 		pci_printf(&dinfo->cfg, "Transactions pending after FLR!\n");
6115 	return (true);
6116 }
6117