xref: /freebsd/sys/dev/pci/pci.c (revision 642870485c089b57000fe538d3485e272b038d59)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/limits.h>
39 #include <sys/linker.h>
40 #include <sys/fcntl.h>
41 #include <sys/conf.h>
42 #include <sys/kernel.h>
43 #include <sys/queue.h>
44 #include <sys/sysctl.h>
45 #include <sys/endian.h>
46 
47 #include <vm/vm.h>
48 #include <vm/pmap.h>
49 #include <vm/vm_extern.h>
50 
51 #include <sys/bus.h>
52 #include <machine/bus.h>
53 #include <sys/rman.h>
54 #include <machine/resource.h>
55 #include <machine/stdarg.h>
56 
57 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
58 #include <machine/intr_machdep.h>
59 #endif
60 
61 #include <sys/pciio.h>
62 #include <dev/pci/pcireg.h>
63 #include <dev/pci/pcivar.h>
64 #include <dev/pci/pci_private.h>
65 
66 #ifdef PCI_IOV
67 #include <sys/nv.h>
68 #include <dev/pci/pci_iov_private.h>
69 #endif
70 
71 #include <dev/usb/controller/xhcireg.h>
72 #include <dev/usb/controller/ehcireg.h>
73 #include <dev/usb/controller/ohcireg.h>
74 #include <dev/usb/controller/uhcireg.h>
75 
76 #include "pcib_if.h"
77 #include "pci_if.h"
78 
79 #define	PCIR_IS_BIOS(cfg, reg)						\
80 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
81 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
82 
83 static int		pci_has_quirk(uint32_t devid, int quirk);
84 static pci_addr_t	pci_mapbase(uint64_t mapreg);
85 static const char	*pci_maptype(uint64_t mapreg);
86 static int		pci_maprange(uint64_t mapreg);
87 static pci_addr_t	pci_rombase(uint64_t mapreg);
88 static int		pci_romsize(uint64_t testval);
89 static void		pci_fixancient(pcicfgregs *cfg);
90 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
91 
92 static int		pci_porten(device_t dev);
93 static int		pci_memen(device_t dev);
94 static void		pci_assign_interrupt(device_t bus, device_t dev,
95 			    int force_route);
96 static int		pci_add_map(device_t bus, device_t dev, int reg,
97 			    struct resource_list *rl, int force, int prefetch);
98 static int		pci_probe(device_t dev);
99 static int		pci_attach(device_t dev);
100 static int		pci_detach(device_t dev);
101 static void		pci_load_vendor_data(void);
102 static int		pci_describe_parse_line(char **ptr, int *vendor,
103 			    int *device, char **desc);
104 static char		*pci_describe_device(device_t dev);
105 static int		pci_modevent(module_t mod, int what, void *arg);
106 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
107 			    pcicfgregs *cfg);
108 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
109 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
110 			    int reg, uint32_t *data);
111 #if 0
112 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
113 			    int reg, uint32_t data);
114 #endif
115 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
116 static void		pci_mask_msix(device_t dev, u_int index);
117 static void		pci_unmask_msix(device_t dev, u_int index);
118 static int		pci_msi_blacklisted(void);
119 static int		pci_msix_blacklisted(void);
120 static void		pci_resume_msi(device_t dev);
121 static void		pci_resume_msix(device_t dev);
122 static int		pci_remap_intr_method(device_t bus, device_t dev,
123 			    u_int irq);
124 
125 static int		pci_get_id_method(device_t dev, device_t child,
126 			    enum pci_id_type type, uintptr_t *rid);
127 
128 static struct pci_devinfo * pci_fill_devinfo(device_t pcib, device_t bus, int d,
129     int b, int s, int f, uint16_t vid, uint16_t did);
130 
131 static device_method_t pci_methods[] = {
132 	/* Device interface */
133 	DEVMETHOD(device_probe,		pci_probe),
134 	DEVMETHOD(device_attach,	pci_attach),
135 	DEVMETHOD(device_detach,	pci_detach),
136 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
137 	DEVMETHOD(device_suspend,	bus_generic_suspend),
138 	DEVMETHOD(device_resume,	pci_resume),
139 
140 	/* Bus interface */
141 	DEVMETHOD(bus_print_child,	pci_print_child),
142 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
143 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
144 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
145 	DEVMETHOD(bus_driver_added,	pci_driver_added),
146 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
147 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
148 
149 	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
150 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
151 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
152 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
153 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
154 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
155 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
156 	DEVMETHOD(bus_release_resource,	pci_release_resource),
157 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
158 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
159 	DEVMETHOD(bus_child_deleted,	pci_child_deleted),
160 	DEVMETHOD(bus_child_detached,	pci_child_detached),
161 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
162 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
163 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
164 	DEVMETHOD(bus_suspend_child,	pci_suspend_child),
165 	DEVMETHOD(bus_resume_child,	pci_resume_child),
166 	DEVMETHOD(bus_rescan,		pci_rescan_method),
167 
168 	/* PCI interface */
169 	DEVMETHOD(pci_read_config,	pci_read_config_method),
170 	DEVMETHOD(pci_write_config,	pci_write_config_method),
171 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
172 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
173 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
174 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
175 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
176 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
177 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
178 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
179 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
180 	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
181 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
182 	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
183 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
184 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
185 	DEVMETHOD(pci_enable_msi,	pci_enable_msi_method),
186 	DEVMETHOD(pci_enable_msix,	pci_enable_msix_method),
187 	DEVMETHOD(pci_disable_msi,	pci_disable_msi_method),
188 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
189 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
190 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
191 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
192 	DEVMETHOD(pci_msix_pba_bar,	pci_msix_pba_bar_method),
193 	DEVMETHOD(pci_msix_table_bar,	pci_msix_table_bar_method),
194 	DEVMETHOD(pci_get_id,		pci_get_id_method),
195 	DEVMETHOD(pci_alloc_devinfo,	pci_alloc_devinfo_method),
196 	DEVMETHOD(pci_child_added,	pci_child_added_method),
197 #ifdef PCI_IOV
198 	DEVMETHOD(pci_iov_attach,	pci_iov_attach_method),
199 	DEVMETHOD(pci_iov_detach,	pci_iov_detach_method),
200 	DEVMETHOD(pci_create_iov_child,	pci_create_iov_child_method),
201 #endif
202 
203 	DEVMETHOD_END
204 };
205 
206 DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
207 
208 static devclass_t pci_devclass;
209 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
210 MODULE_VERSION(pci, 1);
211 
212 static char	*pci_vendordata;
213 static size_t	pci_vendordata_size;
214 
215 struct pci_quirk {
216 	uint32_t devid;	/* Vendor/device of the card */
217 	int	type;
218 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
219 #define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
220 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
221 #define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
222 #define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
223 #define	PCI_QUIRK_MSI_INTX_BUG	6 /* PCIM_CMD_INTxDIS disables MSI */
224 	int	arg1;
225 	int	arg2;
226 };
227 
228 static const struct pci_quirk pci_quirks[] = {
229 	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
230 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
231 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
232 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
233 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
234 
235 	/*
236 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
237 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
238 	 */
239 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
240 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
241 
242 	/*
243 	 * MSI doesn't work on earlier Intel chipsets including
244 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
245 	 */
246 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
247 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
248 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
249 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
250 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
251 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
252 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
253 
254 	/*
255 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
256 	 * bridge.
257 	 */
258 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
259 
260 	/*
261 	 * MSI-X allocation doesn't work properly for devices passed through
262 	 * by VMware up to at least ESXi 5.1.
263 	 */
264 	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
265 	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
266 
267 	/*
268 	 * Some virtualization environments emulate an older chipset
269 	 * but support MSI just fine.  QEMU uses the Intel 82440.
270 	 */
271 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
272 
273 	/*
274 	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
275 	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
276 	 * It prevents us from attaching hpet(4) when the bit is unset.
277 	 * Note this quirk only affects SB600 revision A13 and earlier.
278 	 * For SB600 A21 and later, firmware must set the bit to hide it.
279 	 * For SB700 and later, it is unused and hardcoded to zero.
280 	 */
281 	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
282 
283 	/*
284 	 * Atheros AR8161/AR8162/E2200/E2400/E2500 Ethernet controllers have
285 	 * a bug that MSI interrupt does not assert if PCIM_CMD_INTxDIS bit
286 	 * of the command register is set.
287 	 */
288 	{ 0x10911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
289 	{ 0xE0911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
290 	{ 0xE0A11969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
291 	{ 0xE0B11969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
292 	{ 0x10901969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
293 
294 	/*
295 	 * Broadcom BCM5714(S)/BCM5715(S)/BCM5780(S) Ethernet MACs don't
296 	 * issue MSI interrupts with PCIM_CMD_INTxDIS set either.
297 	 */
298 	{ 0x166814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714 */
299 	{ 0x166914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714S */
300 	{ 0x166a14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780 */
301 	{ 0x166b14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780S */
302 	{ 0x167814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715 */
303 	{ 0x167914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715S */
304 
305 	{ 0 }
306 };
307 
308 /* map register information */
309 #define	PCI_MAPMEM	0x01	/* memory map */
310 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
311 #define	PCI_MAPPORT	0x04	/* port map */
312 
313 struct devlist pci_devq;
314 uint32_t pci_generation;
315 uint32_t pci_numdevs = 0;
316 static int pcie_chipset, pcix_chipset;
317 
318 /* sysctl vars */
319 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
320 
321 static int pci_enable_io_modes = 1;
322 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RWTUN,
323     &pci_enable_io_modes, 1,
324     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
325 enable these bits correctly.  We'd like to do this all the time, but there\n\
326 are some peripherals that this causes problems with.");
327 
328 static int pci_do_realloc_bars = 0;
329 SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RWTUN,
330     &pci_do_realloc_bars, 0,
331     "Attempt to allocate a new range for any BARs whose original "
332     "firmware-assigned ranges fail to allocate during the initial device scan.");
333 
334 static int pci_do_power_nodriver = 0;
335 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RWTUN,
336     &pci_do_power_nodriver, 0,
337   "Place a function into D3 state when no driver attaches to it.  0 means\n\
338 disable.  1 means conservatively place devices into D3 state.  2 means\n\
339 aggressively place devices into D3 state.  3 means put absolutely everything\n\
340 in D3 state.");
341 
342 int pci_do_power_resume = 1;
343 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RWTUN,
344     &pci_do_power_resume, 1,
345   "Transition from D3 -> D0 on resume.");
346 
347 int pci_do_power_suspend = 1;
348 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RWTUN,
349     &pci_do_power_suspend, 1,
350   "Transition from D0 -> D3 on suspend.");
351 
352 static int pci_do_msi = 1;
353 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RWTUN, &pci_do_msi, 1,
354     "Enable support for MSI interrupts");
355 
356 static int pci_do_msix = 1;
357 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RWTUN, &pci_do_msix, 1,
358     "Enable support for MSI-X interrupts");
359 
360 static int pci_msix_rewrite_table = 0;
361 SYSCTL_INT(_hw_pci, OID_AUTO, msix_rewrite_table, CTLFLAG_RWTUN,
362     &pci_msix_rewrite_table, 0,
363     "Rewrite entire MSI-X table when updating MSI-X entries");
364 
365 static int pci_honor_msi_blacklist = 1;
366 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RDTUN,
367     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
368 
369 #if defined(__i386__) || defined(__amd64__)
370 static int pci_usb_takeover = 1;
371 #else
372 static int pci_usb_takeover = 0;
373 #endif
374 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
375     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
376 Disable this if you depend on BIOS emulation of USB devices, that is\n\
377 you use USB devices (like keyboard or mouse) but do not load USB drivers");
378 
379 static int pci_clear_bars;
380 SYSCTL_INT(_hw_pci, OID_AUTO, clear_bars, CTLFLAG_RDTUN, &pci_clear_bars, 0,
381     "Ignore firmware-assigned resources for BARs.");
382 
383 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
384 static int pci_clear_buses;
385 SYSCTL_INT(_hw_pci, OID_AUTO, clear_buses, CTLFLAG_RDTUN, &pci_clear_buses, 0,
386     "Ignore firmware-assigned bus numbers.");
387 #endif
388 
389 static int pci_enable_ari = 1;
390 SYSCTL_INT(_hw_pci, OID_AUTO, enable_ari, CTLFLAG_RDTUN, &pci_enable_ari,
391     0, "Enable support for PCIe Alternative RID Interpretation");
392 
393 static int
394 pci_has_quirk(uint32_t devid, int quirk)
395 {
396 	const struct pci_quirk *q;
397 
398 	for (q = &pci_quirks[0]; q->devid; q++) {
399 		if (q->devid == devid && q->type == quirk)
400 			return (1);
401 	}
402 	return (0);
403 }
404 
405 /* Find a device_t by bus/slot/function in domain 0 */
406 
407 device_t
408 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
409 {
410 
411 	return (pci_find_dbsf(0, bus, slot, func));
412 }
413 
414 /* Find a device_t by domain/bus/slot/function */
415 
416 device_t
417 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
418 {
419 	struct pci_devinfo *dinfo;
420 
421 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
422 		if ((dinfo->cfg.domain == domain) &&
423 		    (dinfo->cfg.bus == bus) &&
424 		    (dinfo->cfg.slot == slot) &&
425 		    (dinfo->cfg.func == func)) {
426 			return (dinfo->cfg.dev);
427 		}
428 	}
429 
430 	return (NULL);
431 }
432 
433 /* Find a device_t by vendor/device ID */
434 
435 device_t
436 pci_find_device(uint16_t vendor, uint16_t device)
437 {
438 	struct pci_devinfo *dinfo;
439 
440 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
441 		if ((dinfo->cfg.vendor == vendor) &&
442 		    (dinfo->cfg.device == device)) {
443 			return (dinfo->cfg.dev);
444 		}
445 	}
446 
447 	return (NULL);
448 }
449 
450 device_t
451 pci_find_class(uint8_t class, uint8_t subclass)
452 {
453 	struct pci_devinfo *dinfo;
454 
455 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
456 		if (dinfo->cfg.baseclass == class &&
457 		    dinfo->cfg.subclass == subclass) {
458 			return (dinfo->cfg.dev);
459 		}
460 	}
461 
462 	return (NULL);
463 }
464 
465 static int
466 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
467 {
468 	va_list ap;
469 	int retval;
470 
471 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
472 	    cfg->func);
473 	va_start(ap, fmt);
474 	retval += vprintf(fmt, ap);
475 	va_end(ap);
476 	return (retval);
477 }
478 
479 /* return base address of memory or port map */
480 
481 static pci_addr_t
482 pci_mapbase(uint64_t mapreg)
483 {
484 
485 	if (PCI_BAR_MEM(mapreg))
486 		return (mapreg & PCIM_BAR_MEM_BASE);
487 	else
488 		return (mapreg & PCIM_BAR_IO_BASE);
489 }
490 
491 /* return map type of memory or port map */
492 
493 static const char *
494 pci_maptype(uint64_t mapreg)
495 {
496 
497 	if (PCI_BAR_IO(mapreg))
498 		return ("I/O Port");
499 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
500 		return ("Prefetchable Memory");
501 	return ("Memory");
502 }
503 
504 /* return log2 of map size decoded for memory or port map */
505 
506 int
507 pci_mapsize(uint64_t testval)
508 {
509 	int ln2size;
510 
511 	testval = pci_mapbase(testval);
512 	ln2size = 0;
513 	if (testval != 0) {
514 		while ((testval & 1) == 0)
515 		{
516 			ln2size++;
517 			testval >>= 1;
518 		}
519 	}
520 	return (ln2size);
521 }
522 
523 /* return base address of device ROM */
524 
525 static pci_addr_t
526 pci_rombase(uint64_t mapreg)
527 {
528 
529 	return (mapreg & PCIM_BIOS_ADDR_MASK);
530 }
531 
532 /* return log2 of map size decided for device ROM */
533 
534 static int
535 pci_romsize(uint64_t testval)
536 {
537 	int ln2size;
538 
539 	testval = pci_rombase(testval);
540 	ln2size = 0;
541 	if (testval != 0) {
542 		while ((testval & 1) == 0)
543 		{
544 			ln2size++;
545 			testval >>= 1;
546 		}
547 	}
548 	return (ln2size);
549 }
550 
551 /* return log2 of address range supported by map register */
552 
553 static int
554 pci_maprange(uint64_t mapreg)
555 {
556 	int ln2range = 0;
557 
558 	if (PCI_BAR_IO(mapreg))
559 		ln2range = 32;
560 	else
561 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
562 		case PCIM_BAR_MEM_32:
563 			ln2range = 32;
564 			break;
565 		case PCIM_BAR_MEM_1MB:
566 			ln2range = 20;
567 			break;
568 		case PCIM_BAR_MEM_64:
569 			ln2range = 64;
570 			break;
571 		}
572 	return (ln2range);
573 }
574 
575 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
576 
577 static void
578 pci_fixancient(pcicfgregs *cfg)
579 {
580 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
581 		return;
582 
583 	/* PCI to PCI bridges use header type 1 */
584 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
585 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
586 }
587 
588 /* extract header type specific config data */
589 
590 static void
591 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
592 {
593 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
594 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
595 	case PCIM_HDRTYPE_NORMAL:
596 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
597 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
598 		cfg->mingnt         = REG(PCIR_MINGNT, 1);
599 		cfg->maxlat         = REG(PCIR_MAXLAT, 1);
600 		cfg->nummaps	    = PCI_MAXMAPS_0;
601 		break;
602 	case PCIM_HDRTYPE_BRIDGE:
603 		cfg->bridge.br_seclat = REG(PCIR_SECLAT_1, 1);
604 		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_1, 1);
605 		cfg->bridge.br_secbus = REG(PCIR_SECBUS_1, 1);
606 		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_1, 1);
607 		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_1, 2);
608 		cfg->nummaps	    = PCI_MAXMAPS_1;
609 		break;
610 	case PCIM_HDRTYPE_CARDBUS:
611 		cfg->bridge.br_seclat = REG(PCIR_SECLAT_2, 1);
612 		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_2, 1);
613 		cfg->bridge.br_secbus = REG(PCIR_SECBUS_2, 1);
614 		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_2, 1);
615 		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_2, 2);
616 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
617 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
618 		cfg->nummaps	    = PCI_MAXMAPS_2;
619 		break;
620 	}
621 #undef REG
622 }
623 
624 /* read configuration header into pcicfgregs structure */
625 struct pci_devinfo *
626 pci_read_device(device_t pcib, device_t bus, int d, int b, int s, int f)
627 {
628 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
629 	uint16_t vid, did;
630 
631 	vid = REG(PCIR_VENDOR, 2);
632 	did = REG(PCIR_DEVICE, 2);
633 	if (vid != 0xffff)
634 		return (pci_fill_devinfo(pcib, bus, d, b, s, f, vid, did));
635 
636 	return (NULL);
637 }
638 
639 struct pci_devinfo *
640 pci_alloc_devinfo_method(device_t dev)
641 {
642 
643 	return (malloc(sizeof(struct pci_devinfo), M_DEVBUF,
644 	    M_WAITOK | M_ZERO));
645 }
646 
647 static struct pci_devinfo *
648 pci_fill_devinfo(device_t pcib, device_t bus, int d, int b, int s, int f,
649     uint16_t vid, uint16_t did)
650 {
651 	struct pci_devinfo *devlist_entry;
652 	pcicfgregs *cfg;
653 
654 	devlist_entry = PCI_ALLOC_DEVINFO(bus);
655 
656 	cfg = &devlist_entry->cfg;
657 
658 	cfg->domain		= d;
659 	cfg->bus		= b;
660 	cfg->slot		= s;
661 	cfg->func		= f;
662 	cfg->vendor		= vid;
663 	cfg->device		= did;
664 	cfg->cmdreg		= REG(PCIR_COMMAND, 2);
665 	cfg->statreg		= REG(PCIR_STATUS, 2);
666 	cfg->baseclass		= REG(PCIR_CLASS, 1);
667 	cfg->subclass		= REG(PCIR_SUBCLASS, 1);
668 	cfg->progif		= REG(PCIR_PROGIF, 1);
669 	cfg->revid		= REG(PCIR_REVID, 1);
670 	cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
671 	cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
672 	cfg->lattimer		= REG(PCIR_LATTIMER, 1);
673 	cfg->intpin		= REG(PCIR_INTPIN, 1);
674 	cfg->intline		= REG(PCIR_INTLINE, 1);
675 
676 	cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
677 	cfg->hdrtype		&= ~PCIM_MFDEV;
678 	STAILQ_INIT(&cfg->maps);
679 
680 	cfg->iov		= NULL;
681 
682 	pci_fixancient(cfg);
683 	pci_hdrtypedata(pcib, b, s, f, cfg);
684 
685 	if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
686 		pci_read_cap(pcib, cfg);
687 
688 	STAILQ_INSERT_TAIL(&pci_devq, devlist_entry, pci_links);
689 
690 	devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
691 	devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
692 	devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
693 	devlist_entry->conf.pc_sel.pc_func = cfg->func;
694 	devlist_entry->conf.pc_hdr = cfg->hdrtype;
695 
696 	devlist_entry->conf.pc_subvendor = cfg->subvendor;
697 	devlist_entry->conf.pc_subdevice = cfg->subdevice;
698 	devlist_entry->conf.pc_vendor = cfg->vendor;
699 	devlist_entry->conf.pc_device = cfg->device;
700 
701 	devlist_entry->conf.pc_class = cfg->baseclass;
702 	devlist_entry->conf.pc_subclass = cfg->subclass;
703 	devlist_entry->conf.pc_progif = cfg->progif;
704 	devlist_entry->conf.pc_revid = cfg->revid;
705 
706 	pci_numdevs++;
707 	pci_generation++;
708 
709 	return (devlist_entry);
710 }
711 #undef REG
712 
713 static void
714 pci_ea_fill_info(device_t pcib, pcicfgregs *cfg)
715 {
716 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, \
717     cfg->ea.ea_location + (n), w)
718 	int num_ent;
719 	int ptr;
720 	int a, b;
721 	uint32_t val;
722 	int ent_size;
723 	uint32_t dw[4];
724 	uint64_t base, max_offset;
725 	struct pci_ea_entry *eae;
726 
727 	if (cfg->ea.ea_location == 0)
728 		return;
729 
730 	STAILQ_INIT(&cfg->ea.ea_entries);
731 
732 	/* Determine the number of entries */
733 	num_ent = REG(PCIR_EA_NUM_ENT, 2);
734 	num_ent &= PCIM_EA_NUM_ENT_MASK;
735 
736 	/* Find the first entry to care of */
737 	ptr = PCIR_EA_FIRST_ENT;
738 
739 	/* Skip DWORD 2 for type 1 functions */
740 	if ((cfg->hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_BRIDGE)
741 		ptr += 4;
742 
743 	for (a = 0; a < num_ent; a++) {
744 
745 		eae = malloc(sizeof(*eae), M_DEVBUF, M_WAITOK | M_ZERO);
746 		eae->eae_cfg_offset = cfg->ea.ea_location + ptr;
747 
748 		/* Read a number of dwords in the entry */
749 		val = REG(ptr, 4);
750 		ptr += 4;
751 		ent_size = (val & PCIM_EA_ES);
752 
753 		for (b = 0; b < ent_size; b++) {
754 			dw[b] = REG(ptr, 4);
755 			ptr += 4;
756 		}
757 
758 		eae->eae_flags = val;
759 		eae->eae_bei = (PCIM_EA_BEI & val) >> PCIM_EA_BEI_OFFSET;
760 
761 		base = dw[0] & PCIM_EA_FIELD_MASK;
762 		max_offset = dw[1] | ~PCIM_EA_FIELD_MASK;
763 		b = 2;
764 		if (((dw[0] & PCIM_EA_IS_64) != 0) && (b < ent_size)) {
765 			base |= (uint64_t)dw[b] << 32UL;
766 			b++;
767 		}
768 		if (((dw[1] & PCIM_EA_IS_64) != 0)
769 		    && (b < ent_size)) {
770 			max_offset |= (uint64_t)dw[b] << 32UL;
771 			b++;
772 		}
773 
774 		eae->eae_base = base;
775 		eae->eae_max_offset = max_offset;
776 
777 		STAILQ_INSERT_TAIL(&cfg->ea.ea_entries, eae, eae_link);
778 
779 		if (bootverbose) {
780 			printf("PCI(EA) dev %04x:%04x, bei %d, flags #%x, base #%jx, max_offset #%jx\n",
781 			    cfg->vendor, cfg->device, eae->eae_bei, eae->eae_flags,
782 			    (uintmax_t)eae->eae_base, (uintmax_t)eae->eae_max_offset);
783 		}
784 	}
785 }
786 #undef REG
787 
788 static void
789 pci_read_cap(device_t pcib, pcicfgregs *cfg)
790 {
791 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
792 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
793 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
794 	uint64_t addr;
795 #endif
796 	uint32_t val;
797 	int	ptr, nextptr, ptrptr;
798 
799 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
800 	case PCIM_HDRTYPE_NORMAL:
801 	case PCIM_HDRTYPE_BRIDGE:
802 		ptrptr = PCIR_CAP_PTR;
803 		break;
804 	case PCIM_HDRTYPE_CARDBUS:
805 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
806 		break;
807 	default:
808 		return;		/* no extended capabilities support */
809 	}
810 	nextptr = REG(ptrptr, 1);	/* sanity check? */
811 
812 	/*
813 	 * Read capability entries.
814 	 */
815 	while (nextptr != 0) {
816 		/* Sanity check */
817 		if (nextptr > 255) {
818 			printf("illegal PCI extended capability offset %d\n",
819 			    nextptr);
820 			return;
821 		}
822 		/* Find the next entry */
823 		ptr = nextptr;
824 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
825 
826 		/* Process this entry */
827 		switch (REG(ptr + PCICAP_ID, 1)) {
828 		case PCIY_PMG:		/* PCI power management */
829 			if (cfg->pp.pp_cap == 0) {
830 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
831 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
832 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
833 				if ((nextptr - ptr) > PCIR_POWER_DATA)
834 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
835 			}
836 			break;
837 		case PCIY_HT:		/* HyperTransport */
838 			/* Determine HT-specific capability type. */
839 			val = REG(ptr + PCIR_HT_COMMAND, 2);
840 
841 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
842 				cfg->ht.ht_slave = ptr;
843 
844 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
845 			switch (val & PCIM_HTCMD_CAP_MASK) {
846 			case PCIM_HTCAP_MSI_MAPPING:
847 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
848 					/* Sanity check the mapping window. */
849 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
850 					    4);
851 					addr <<= 32;
852 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
853 					    4);
854 					if (addr != MSI_INTEL_ADDR_BASE)
855 						device_printf(pcib,
856 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
857 						    cfg->domain, cfg->bus,
858 						    cfg->slot, cfg->func,
859 						    (long long)addr);
860 				} else
861 					addr = MSI_INTEL_ADDR_BASE;
862 
863 				cfg->ht.ht_msimap = ptr;
864 				cfg->ht.ht_msictrl = val;
865 				cfg->ht.ht_msiaddr = addr;
866 				break;
867 			}
868 #endif
869 			break;
870 		case PCIY_MSI:		/* PCI MSI */
871 			cfg->msi.msi_location = ptr;
872 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
873 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
874 						     PCIM_MSICTRL_MMC_MASK)>>1);
875 			break;
876 		case PCIY_MSIX:		/* PCI MSI-X */
877 			cfg->msix.msix_location = ptr;
878 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
879 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
880 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
881 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
882 			cfg->msix.msix_table_bar = PCIR_BAR(val &
883 			    PCIM_MSIX_BIR_MASK);
884 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
885 			val = REG(ptr + PCIR_MSIX_PBA, 4);
886 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
887 			    PCIM_MSIX_BIR_MASK);
888 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
889 			break;
890 		case PCIY_VPD:		/* PCI Vital Product Data */
891 			cfg->vpd.vpd_reg = ptr;
892 			break;
893 		case PCIY_SUBVENDOR:
894 			/* Should always be true. */
895 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
896 			    PCIM_HDRTYPE_BRIDGE) {
897 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
898 				cfg->subvendor = val & 0xffff;
899 				cfg->subdevice = val >> 16;
900 			}
901 			break;
902 		case PCIY_PCIX:		/* PCI-X */
903 			/*
904 			 * Assume we have a PCI-X chipset if we have
905 			 * at least one PCI-PCI bridge with a PCI-X
906 			 * capability.  Note that some systems with
907 			 * PCI-express or HT chipsets might match on
908 			 * this check as well.
909 			 */
910 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
911 			    PCIM_HDRTYPE_BRIDGE)
912 				pcix_chipset = 1;
913 			cfg->pcix.pcix_location = ptr;
914 			break;
915 		case PCIY_EXPRESS:	/* PCI-express */
916 			/*
917 			 * Assume we have a PCI-express chipset if we have
918 			 * at least one PCI-express device.
919 			 */
920 			pcie_chipset = 1;
921 			cfg->pcie.pcie_location = ptr;
922 			val = REG(ptr + PCIER_FLAGS, 2);
923 			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
924 			break;
925 		case PCIY_EA:		/* Enhanced Allocation */
926 			cfg->ea.ea_location = ptr;
927 			pci_ea_fill_info(pcib, cfg);
928 			break;
929 		default:
930 			break;
931 		}
932 	}
933 
934 #if defined(__powerpc__)
935 	/*
936 	 * Enable the MSI mapping window for all HyperTransport
937 	 * slaves.  PCI-PCI bridges have their windows enabled via
938 	 * PCIB_MAP_MSI().
939 	 */
940 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
941 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
942 		device_printf(pcib,
943 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
944 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
945 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
946 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
947 		     2);
948 	}
949 #endif
950 /* REG and WREG use carry through to next functions */
951 }
952 
953 /*
954  * PCI Vital Product Data
955  */
956 
957 #define	PCI_VPD_TIMEOUT		1000000
958 
959 static int
960 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
961 {
962 	int count = PCI_VPD_TIMEOUT;
963 
964 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
965 
966 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
967 
968 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
969 		if (--count < 0)
970 			return (ENXIO);
971 		DELAY(1);	/* limit looping */
972 	}
973 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
974 
975 	return (0);
976 }
977 
978 #if 0
979 static int
980 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
981 {
982 	int count = PCI_VPD_TIMEOUT;
983 
984 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
985 
986 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
987 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
988 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
989 		if (--count < 0)
990 			return (ENXIO);
991 		DELAY(1);	/* limit looping */
992 	}
993 
994 	return (0);
995 }
996 #endif
997 
998 #undef PCI_VPD_TIMEOUT
999 
1000 struct vpd_readstate {
1001 	device_t	pcib;
1002 	pcicfgregs	*cfg;
1003 	uint32_t	val;
1004 	int		bytesinval;
1005 	int		off;
1006 	uint8_t		cksum;
1007 };
1008 
1009 static int
1010 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
1011 {
1012 	uint32_t reg;
1013 	uint8_t byte;
1014 
1015 	if (vrs->bytesinval == 0) {
1016 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
1017 			return (ENXIO);
1018 		vrs->val = le32toh(reg);
1019 		vrs->off += 4;
1020 		byte = vrs->val & 0xff;
1021 		vrs->bytesinval = 3;
1022 	} else {
1023 		vrs->val = vrs->val >> 8;
1024 		byte = vrs->val & 0xff;
1025 		vrs->bytesinval--;
1026 	}
1027 
1028 	vrs->cksum += byte;
1029 	*data = byte;
1030 	return (0);
1031 }
1032 
1033 static void
1034 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
1035 {
1036 	struct vpd_readstate vrs;
1037 	int state;
1038 	int name;
1039 	int remain;
1040 	int i;
1041 	int alloc, off;		/* alloc/off for RO/W arrays */
1042 	int cksumvalid;
1043 	int dflen;
1044 	uint8_t byte;
1045 	uint8_t byte2;
1046 
1047 	/* init vpd reader */
1048 	vrs.bytesinval = 0;
1049 	vrs.off = 0;
1050 	vrs.pcib = pcib;
1051 	vrs.cfg = cfg;
1052 	vrs.cksum = 0;
1053 
1054 	state = 0;
1055 	name = remain = i = 0;	/* shut up stupid gcc */
1056 	alloc = off = 0;	/* shut up stupid gcc */
1057 	dflen = 0;		/* shut up stupid gcc */
1058 	cksumvalid = -1;
1059 	while (state >= 0) {
1060 		if (vpd_nextbyte(&vrs, &byte)) {
1061 			state = -2;
1062 			break;
1063 		}
1064 #if 0
1065 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
1066 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
1067 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
1068 #endif
1069 		switch (state) {
1070 		case 0:		/* item name */
1071 			if (byte & 0x80) {
1072 				if (vpd_nextbyte(&vrs, &byte2)) {
1073 					state = -2;
1074 					break;
1075 				}
1076 				remain = byte2;
1077 				if (vpd_nextbyte(&vrs, &byte2)) {
1078 					state = -2;
1079 					break;
1080 				}
1081 				remain |= byte2 << 8;
1082 				if (remain > (0x7f*4 - vrs.off)) {
1083 					state = -1;
1084 					pci_printf(cfg,
1085 					    "invalid VPD data, remain %#x\n",
1086 					    remain);
1087 				}
1088 				name = byte & 0x7f;
1089 			} else {
1090 				remain = byte & 0x7;
1091 				name = (byte >> 3) & 0xf;
1092 			}
1093 			switch (name) {
1094 			case 0x2:	/* String */
1095 				cfg->vpd.vpd_ident = malloc(remain + 1,
1096 				    M_DEVBUF, M_WAITOK);
1097 				i = 0;
1098 				state = 1;
1099 				break;
1100 			case 0xf:	/* End */
1101 				state = -1;
1102 				break;
1103 			case 0x10:	/* VPD-R */
1104 				alloc = 8;
1105 				off = 0;
1106 				cfg->vpd.vpd_ros = malloc(alloc *
1107 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1108 				    M_WAITOK | M_ZERO);
1109 				state = 2;
1110 				break;
1111 			case 0x11:	/* VPD-W */
1112 				alloc = 8;
1113 				off = 0;
1114 				cfg->vpd.vpd_w = malloc(alloc *
1115 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1116 				    M_WAITOK | M_ZERO);
1117 				state = 5;
1118 				break;
1119 			default:	/* Invalid data, abort */
1120 				state = -1;
1121 				break;
1122 			}
1123 			break;
1124 
1125 		case 1:	/* Identifier String */
1126 			cfg->vpd.vpd_ident[i++] = byte;
1127 			remain--;
1128 			if (remain == 0)  {
1129 				cfg->vpd.vpd_ident[i] = '\0';
1130 				state = 0;
1131 			}
1132 			break;
1133 
1134 		case 2:	/* VPD-R Keyword Header */
1135 			if (off == alloc) {
1136 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1137 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1138 				    M_DEVBUF, M_WAITOK | M_ZERO);
1139 			}
1140 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1141 			if (vpd_nextbyte(&vrs, &byte2)) {
1142 				state = -2;
1143 				break;
1144 			}
1145 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1146 			if (vpd_nextbyte(&vrs, &byte2)) {
1147 				state = -2;
1148 				break;
1149 			}
1150 			cfg->vpd.vpd_ros[off].len = dflen = byte2;
1151 			if (dflen == 0 &&
1152 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1153 			    2) == 0) {
1154 				/*
1155 				 * if this happens, we can't trust the rest
1156 				 * of the VPD.
1157 				 */
1158 				pci_printf(cfg, "bad keyword length: %d\n",
1159 				    dflen);
1160 				cksumvalid = 0;
1161 				state = -1;
1162 				break;
1163 			} else if (dflen == 0) {
1164 				cfg->vpd.vpd_ros[off].value = malloc(1 *
1165 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1166 				    M_DEVBUF, M_WAITOK);
1167 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1168 			} else
1169 				cfg->vpd.vpd_ros[off].value = malloc(
1170 				    (dflen + 1) *
1171 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1172 				    M_DEVBUF, M_WAITOK);
1173 			remain -= 3;
1174 			i = 0;
1175 			/* keep in sync w/ state 3's transistions */
1176 			if (dflen == 0 && remain == 0)
1177 				state = 0;
1178 			else if (dflen == 0)
1179 				state = 2;
1180 			else
1181 				state = 3;
1182 			break;
1183 
1184 		case 3:	/* VPD-R Keyword Value */
1185 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1186 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1187 			    "RV", 2) == 0 && cksumvalid == -1) {
1188 				if (vrs.cksum == 0)
1189 					cksumvalid = 1;
1190 				else {
1191 					if (bootverbose)
1192 						pci_printf(cfg,
1193 					    "bad VPD cksum, remain %hhu\n",
1194 						    vrs.cksum);
1195 					cksumvalid = 0;
1196 					state = -1;
1197 					break;
1198 				}
1199 			}
1200 			dflen--;
1201 			remain--;
1202 			/* keep in sync w/ state 2's transistions */
1203 			if (dflen == 0)
1204 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1205 			if (dflen == 0 && remain == 0) {
1206 				cfg->vpd.vpd_rocnt = off;
1207 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1208 				    off * sizeof(*cfg->vpd.vpd_ros),
1209 				    M_DEVBUF, M_WAITOK | M_ZERO);
1210 				state = 0;
1211 			} else if (dflen == 0)
1212 				state = 2;
1213 			break;
1214 
1215 		case 4:
1216 			remain--;
1217 			if (remain == 0)
1218 				state = 0;
1219 			break;
1220 
1221 		case 5:	/* VPD-W Keyword Header */
1222 			if (off == alloc) {
1223 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1224 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1225 				    M_DEVBUF, M_WAITOK | M_ZERO);
1226 			}
1227 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1228 			if (vpd_nextbyte(&vrs, &byte2)) {
1229 				state = -2;
1230 				break;
1231 			}
1232 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1233 			if (vpd_nextbyte(&vrs, &byte2)) {
1234 				state = -2;
1235 				break;
1236 			}
1237 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1238 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1239 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1240 			    sizeof(*cfg->vpd.vpd_w[off].value),
1241 			    M_DEVBUF, M_WAITOK);
1242 			remain -= 3;
1243 			i = 0;
1244 			/* keep in sync w/ state 6's transistions */
1245 			if (dflen == 0 && remain == 0)
1246 				state = 0;
1247 			else if (dflen == 0)
1248 				state = 5;
1249 			else
1250 				state = 6;
1251 			break;
1252 
1253 		case 6:	/* VPD-W Keyword Value */
1254 			cfg->vpd.vpd_w[off].value[i++] = byte;
1255 			dflen--;
1256 			remain--;
1257 			/* keep in sync w/ state 5's transistions */
1258 			if (dflen == 0)
1259 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1260 			if (dflen == 0 && remain == 0) {
1261 				cfg->vpd.vpd_wcnt = off;
1262 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1263 				    off * sizeof(*cfg->vpd.vpd_w),
1264 				    M_DEVBUF, M_WAITOK | M_ZERO);
1265 				state = 0;
1266 			} else if (dflen == 0)
1267 				state = 5;
1268 			break;
1269 
1270 		default:
1271 			pci_printf(cfg, "invalid state: %d\n", state);
1272 			state = -1;
1273 			break;
1274 		}
1275 	}
1276 
1277 	if (cksumvalid == 0 || state < -1) {
1278 		/* read-only data bad, clean up */
1279 		if (cfg->vpd.vpd_ros != NULL) {
1280 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1281 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1282 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1283 			cfg->vpd.vpd_ros = NULL;
1284 		}
1285 	}
1286 	if (state < -1) {
1287 		/* I/O error, clean up */
1288 		pci_printf(cfg, "failed to read VPD data.\n");
1289 		if (cfg->vpd.vpd_ident != NULL) {
1290 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1291 			cfg->vpd.vpd_ident = NULL;
1292 		}
1293 		if (cfg->vpd.vpd_w != NULL) {
1294 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1295 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1296 			free(cfg->vpd.vpd_w, M_DEVBUF);
1297 			cfg->vpd.vpd_w = NULL;
1298 		}
1299 	}
1300 	cfg->vpd.vpd_cached = 1;
1301 #undef REG
1302 #undef WREG
1303 }
1304 
1305 int
1306 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1307 {
1308 	struct pci_devinfo *dinfo = device_get_ivars(child);
1309 	pcicfgregs *cfg = &dinfo->cfg;
1310 
1311 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1312 		pci_read_vpd(device_get_parent(dev), cfg);
1313 
1314 	*identptr = cfg->vpd.vpd_ident;
1315 
1316 	if (*identptr == NULL)
1317 		return (ENXIO);
1318 
1319 	return (0);
1320 }
1321 
1322 int
1323 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1324 	const char **vptr)
1325 {
1326 	struct pci_devinfo *dinfo = device_get_ivars(child);
1327 	pcicfgregs *cfg = &dinfo->cfg;
1328 	int i;
1329 
1330 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1331 		pci_read_vpd(device_get_parent(dev), cfg);
1332 
1333 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1334 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1335 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1336 			*vptr = cfg->vpd.vpd_ros[i].value;
1337 			return (0);
1338 		}
1339 
1340 	*vptr = NULL;
1341 	return (ENXIO);
1342 }
1343 
1344 struct pcicfg_vpd *
1345 pci_fetch_vpd_list(device_t dev)
1346 {
1347 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1348 	pcicfgregs *cfg = &dinfo->cfg;
1349 
1350 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1351 		pci_read_vpd(device_get_parent(device_get_parent(dev)), cfg);
1352 	return (&cfg->vpd);
1353 }
1354 
1355 /*
1356  * Find the requested HyperTransport capability and return the offset
1357  * in configuration space via the pointer provided.  The function
1358  * returns 0 on success and an error code otherwise.
1359  */
1360 int
1361 pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1362 {
1363 	int ptr, error;
1364 	uint16_t val;
1365 
1366 	error = pci_find_cap(child, PCIY_HT, &ptr);
1367 	if (error)
1368 		return (error);
1369 
1370 	/*
1371 	 * Traverse the capabilities list checking each HT capability
1372 	 * to see if it matches the requested HT capability.
1373 	 */
1374 	while (ptr != 0) {
1375 		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1376 		if (capability == PCIM_HTCAP_SLAVE ||
1377 		    capability == PCIM_HTCAP_HOST)
1378 			val &= 0xe000;
1379 		else
1380 			val &= PCIM_HTCMD_CAP_MASK;
1381 		if (val == capability) {
1382 			if (capreg != NULL)
1383 				*capreg = ptr;
1384 			return (0);
1385 		}
1386 
1387 		/* Skip to the next HT capability. */
1388 		while (ptr != 0) {
1389 			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1390 			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1391 			    PCIY_HT)
1392 				break;
1393 		}
1394 	}
1395 	return (ENOENT);
1396 }
1397 
1398 /*
1399  * Find the requested capability and return the offset in
1400  * configuration space via the pointer provided.  The function returns
1401  * 0 on success and an error code otherwise.
1402  */
1403 int
1404 pci_find_cap_method(device_t dev, device_t child, int capability,
1405     int *capreg)
1406 {
1407 	struct pci_devinfo *dinfo = device_get_ivars(child);
1408 	pcicfgregs *cfg = &dinfo->cfg;
1409 	u_int32_t status;
1410 	u_int8_t ptr;
1411 
1412 	/*
1413 	 * Check the CAP_LIST bit of the PCI status register first.
1414 	 */
1415 	status = pci_read_config(child, PCIR_STATUS, 2);
1416 	if (!(status & PCIM_STATUS_CAPPRESENT))
1417 		return (ENXIO);
1418 
1419 	/*
1420 	 * Determine the start pointer of the capabilities list.
1421 	 */
1422 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1423 	case PCIM_HDRTYPE_NORMAL:
1424 	case PCIM_HDRTYPE_BRIDGE:
1425 		ptr = PCIR_CAP_PTR;
1426 		break;
1427 	case PCIM_HDRTYPE_CARDBUS:
1428 		ptr = PCIR_CAP_PTR_2;
1429 		break;
1430 	default:
1431 		/* XXX: panic? */
1432 		return (ENXIO);		/* no extended capabilities support */
1433 	}
1434 	ptr = pci_read_config(child, ptr, 1);
1435 
1436 	/*
1437 	 * Traverse the capabilities list.
1438 	 */
1439 	while (ptr != 0) {
1440 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1441 			if (capreg != NULL)
1442 				*capreg = ptr;
1443 			return (0);
1444 		}
1445 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1446 	}
1447 
1448 	return (ENOENT);
1449 }
1450 
1451 /*
1452  * Find the requested extended capability and return the offset in
1453  * configuration space via the pointer provided.  The function returns
1454  * 0 on success and an error code otherwise.
1455  */
1456 int
1457 pci_find_extcap_method(device_t dev, device_t child, int capability,
1458     int *capreg)
1459 {
1460 	struct pci_devinfo *dinfo = device_get_ivars(child);
1461 	pcicfgregs *cfg = &dinfo->cfg;
1462 	uint32_t ecap;
1463 	uint16_t ptr;
1464 
1465 	/* Only supported for PCI-express devices. */
1466 	if (cfg->pcie.pcie_location == 0)
1467 		return (ENXIO);
1468 
1469 	ptr = PCIR_EXTCAP;
1470 	ecap = pci_read_config(child, ptr, 4);
1471 	if (ecap == 0xffffffff || ecap == 0)
1472 		return (ENOENT);
1473 	for (;;) {
1474 		if (PCI_EXTCAP_ID(ecap) == capability) {
1475 			if (capreg != NULL)
1476 				*capreg = ptr;
1477 			return (0);
1478 		}
1479 		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1480 		if (ptr == 0)
1481 			break;
1482 		ecap = pci_read_config(child, ptr, 4);
1483 	}
1484 
1485 	return (ENOENT);
1486 }
1487 
1488 /*
1489  * Support for MSI-X message interrupts.
1490  */
1491 static void
1492 pci_write_msix_entry(device_t dev, u_int index, uint64_t address, uint32_t data)
1493 {
1494 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1495 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1496 	uint32_t offset;
1497 
1498 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1499 	offset = msix->msix_table_offset + index * 16;
1500 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1501 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1502 	bus_write_4(msix->msix_table_res, offset + 8, data);
1503 }
1504 
1505 void
1506 pci_enable_msix_method(device_t dev, device_t child, u_int index,
1507     uint64_t address, uint32_t data)
1508 {
1509 
1510 	if (pci_msix_rewrite_table) {
1511 		struct pci_devinfo *dinfo = device_get_ivars(child);
1512 		struct pcicfg_msix *msix = &dinfo->cfg.msix;
1513 
1514 		/*
1515 		 * Some VM hosts require MSIX to be disabled in the
1516 		 * control register before updating the MSIX table
1517 		 * entries are allowed. It is not enough to only
1518 		 * disable MSIX while updating a single entry. MSIX
1519 		 * must be disabled while updating all entries in the
1520 		 * table.
1521 		 */
1522 		pci_write_config(child,
1523 		    msix->msix_location + PCIR_MSIX_CTRL,
1524 		    msix->msix_ctrl & ~PCIM_MSIXCTRL_MSIX_ENABLE, 2);
1525 		pci_resume_msix(child);
1526 	} else
1527 		pci_write_msix_entry(child, index, address, data);
1528 
1529 	/* Enable MSI -> HT mapping. */
1530 	pci_ht_map_msi(child, address);
1531 }
1532 
1533 void
1534 pci_mask_msix(device_t dev, u_int index)
1535 {
1536 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1537 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1538 	uint32_t offset, val;
1539 
1540 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1541 	offset = msix->msix_table_offset + index * 16 + 12;
1542 	val = bus_read_4(msix->msix_table_res, offset);
1543 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1544 		val |= PCIM_MSIX_VCTRL_MASK;
1545 		bus_write_4(msix->msix_table_res, offset, val);
1546 	}
1547 }
1548 
1549 void
1550 pci_unmask_msix(device_t dev, u_int index)
1551 {
1552 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1553 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1554 	uint32_t offset, val;
1555 
1556 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1557 	offset = msix->msix_table_offset + index * 16 + 12;
1558 	val = bus_read_4(msix->msix_table_res, offset);
1559 	if (val & PCIM_MSIX_VCTRL_MASK) {
1560 		val &= ~PCIM_MSIX_VCTRL_MASK;
1561 		bus_write_4(msix->msix_table_res, offset, val);
1562 	}
1563 }
1564 
1565 int
1566 pci_pending_msix(device_t dev, u_int index)
1567 {
1568 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1569 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1570 	uint32_t offset, bit;
1571 
1572 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1573 	offset = msix->msix_pba_offset + (index / 32) * 4;
1574 	bit = 1 << index % 32;
1575 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1576 }
1577 
1578 /*
1579  * Restore MSI-X registers and table during resume.  If MSI-X is
1580  * enabled then walk the virtual table to restore the actual MSI-X
1581  * table.
1582  */
1583 static void
1584 pci_resume_msix(device_t dev)
1585 {
1586 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1587 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1588 	struct msix_table_entry *mte;
1589 	struct msix_vector *mv;
1590 	int i;
1591 
1592 	if (msix->msix_alloc > 0) {
1593 		/* First, mask all vectors. */
1594 		for (i = 0; i < msix->msix_msgnum; i++)
1595 			pci_mask_msix(dev, i);
1596 
1597 		/* Second, program any messages with at least one handler. */
1598 		for (i = 0; i < msix->msix_table_len; i++) {
1599 			mte = &msix->msix_table[i];
1600 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1601 				continue;
1602 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1603 			pci_write_msix_entry(dev, i, mv->mv_address,
1604 			    mv->mv_data);
1605 			pci_unmask_msix(dev, i);
1606 		}
1607 	}
1608 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1609 	    msix->msix_ctrl, 2);
1610 }
1611 
1612 /*
1613  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1614  * returned in *count.  After this function returns, each message will be
1615  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1616  */
1617 int
1618 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1619 {
1620 	struct pci_devinfo *dinfo = device_get_ivars(child);
1621 	pcicfgregs *cfg = &dinfo->cfg;
1622 	struct resource_list_entry *rle;
1623 	int actual, error, i, irq, max;
1624 
1625 	/* Don't let count == 0 get us into trouble. */
1626 	if (*count == 0)
1627 		return (EINVAL);
1628 
1629 	/* If rid 0 is allocated, then fail. */
1630 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1631 	if (rle != NULL && rle->res != NULL)
1632 		return (ENXIO);
1633 
1634 	/* Already have allocated messages? */
1635 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1636 		return (ENXIO);
1637 
1638 	/* If MSI-X is blacklisted for this system, fail. */
1639 	if (pci_msix_blacklisted())
1640 		return (ENXIO);
1641 
1642 	/* MSI-X capability present? */
1643 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1644 		return (ENODEV);
1645 
1646 	/* Make sure the appropriate BARs are mapped. */
1647 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1648 	    cfg->msix.msix_table_bar);
1649 	if (rle == NULL || rle->res == NULL ||
1650 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1651 		return (ENXIO);
1652 	cfg->msix.msix_table_res = rle->res;
1653 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1654 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1655 		    cfg->msix.msix_pba_bar);
1656 		if (rle == NULL || rle->res == NULL ||
1657 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1658 			return (ENXIO);
1659 	}
1660 	cfg->msix.msix_pba_res = rle->res;
1661 
1662 	if (bootverbose)
1663 		device_printf(child,
1664 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1665 		    *count, cfg->msix.msix_msgnum);
1666 	max = min(*count, cfg->msix.msix_msgnum);
1667 	for (i = 0; i < max; i++) {
1668 		/* Allocate a message. */
1669 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1670 		if (error) {
1671 			if (i == 0)
1672 				return (error);
1673 			break;
1674 		}
1675 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1676 		    irq, 1);
1677 	}
1678 	actual = i;
1679 
1680 	if (bootverbose) {
1681 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1682 		if (actual == 1)
1683 			device_printf(child, "using IRQ %ju for MSI-X\n",
1684 			    rle->start);
1685 		else {
1686 			int run;
1687 
1688 			/*
1689 			 * Be fancy and try to print contiguous runs of
1690 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1691 			 * 'run' is true if we are in a range.
1692 			 */
1693 			device_printf(child, "using IRQs %ju", rle->start);
1694 			irq = rle->start;
1695 			run = 0;
1696 			for (i = 1; i < actual; i++) {
1697 				rle = resource_list_find(&dinfo->resources,
1698 				    SYS_RES_IRQ, i + 1);
1699 
1700 				/* Still in a run? */
1701 				if (rle->start == irq + 1) {
1702 					run = 1;
1703 					irq++;
1704 					continue;
1705 				}
1706 
1707 				/* Finish previous range. */
1708 				if (run) {
1709 					printf("-%d", irq);
1710 					run = 0;
1711 				}
1712 
1713 				/* Start new range. */
1714 				printf(",%ju", rle->start);
1715 				irq = rle->start;
1716 			}
1717 
1718 			/* Unfinished range? */
1719 			if (run)
1720 				printf("-%d", irq);
1721 			printf(" for MSI-X\n");
1722 		}
1723 	}
1724 
1725 	/* Mask all vectors. */
1726 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1727 		pci_mask_msix(child, i);
1728 
1729 	/* Allocate and initialize vector data and virtual table. */
1730 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1731 	    M_DEVBUF, M_WAITOK | M_ZERO);
1732 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1733 	    M_DEVBUF, M_WAITOK | M_ZERO);
1734 	for (i = 0; i < actual; i++) {
1735 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1736 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1737 		cfg->msix.msix_table[i].mte_vector = i + 1;
1738 	}
1739 
1740 	/* Update control register to enable MSI-X. */
1741 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1742 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1743 	    cfg->msix.msix_ctrl, 2);
1744 
1745 	/* Update counts of alloc'd messages. */
1746 	cfg->msix.msix_alloc = actual;
1747 	cfg->msix.msix_table_len = actual;
1748 	*count = actual;
1749 	return (0);
1750 }
1751 
1752 /*
1753  * By default, pci_alloc_msix() will assign the allocated IRQ
1754  * resources consecutively to the first N messages in the MSI-X table.
1755  * However, device drivers may want to use different layouts if they
1756  * either receive fewer messages than they asked for, or they wish to
1757  * populate the MSI-X table sparsely.  This method allows the driver
1758  * to specify what layout it wants.  It must be called after a
1759  * successful pci_alloc_msix() but before any of the associated
1760  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1761  *
1762  * The 'vectors' array contains 'count' message vectors.  The array
1763  * maps directly to the MSI-X table in that index 0 in the array
1764  * specifies the vector for the first message in the MSI-X table, etc.
1765  * The vector value in each array index can either be 0 to indicate
1766  * that no vector should be assigned to a message slot, or it can be a
1767  * number from 1 to N (where N is the count returned from a
1768  * succcessful call to pci_alloc_msix()) to indicate which message
1769  * vector (IRQ) to be used for the corresponding message.
1770  *
1771  * On successful return, each message with a non-zero vector will have
1772  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1773  * 1.  Additionally, if any of the IRQs allocated via the previous
1774  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1775  * will be freed back to the system automatically.
1776  *
1777  * For example, suppose a driver has a MSI-X table with 6 messages and
1778  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1779  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1780  * C.  After the call to pci_alloc_msix(), the device will be setup to
1781  * have an MSI-X table of ABC--- (where - means no vector assigned).
1782  * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1783  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1784  * be freed back to the system.  This device will also have valid
1785  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1786  *
1787  * In any case, the SYS_RES_IRQ rid X will always map to the message
1788  * at MSI-X table index X - 1 and will only be valid if a vector is
1789  * assigned to that table entry.
1790  */
1791 int
1792 pci_remap_msix_method(device_t dev, device_t child, int count,
1793     const u_int *vectors)
1794 {
1795 	struct pci_devinfo *dinfo = device_get_ivars(child);
1796 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1797 	struct resource_list_entry *rle;
1798 	int i, irq, j, *used;
1799 
1800 	/*
1801 	 * Have to have at least one message in the table but the
1802 	 * table can't be bigger than the actual MSI-X table in the
1803 	 * device.
1804 	 */
1805 	if (count == 0 || count > msix->msix_msgnum)
1806 		return (EINVAL);
1807 
1808 	/* Sanity check the vectors. */
1809 	for (i = 0; i < count; i++)
1810 		if (vectors[i] > msix->msix_alloc)
1811 			return (EINVAL);
1812 
1813 	/*
1814 	 * Make sure there aren't any holes in the vectors to be used.
1815 	 * It's a big pain to support it, and it doesn't really make
1816 	 * sense anyway.  Also, at least one vector must be used.
1817 	 */
1818 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1819 	    M_ZERO);
1820 	for (i = 0; i < count; i++)
1821 		if (vectors[i] != 0)
1822 			used[vectors[i] - 1] = 1;
1823 	for (i = 0; i < msix->msix_alloc - 1; i++)
1824 		if (used[i] == 0 && used[i + 1] == 1) {
1825 			free(used, M_DEVBUF);
1826 			return (EINVAL);
1827 		}
1828 	if (used[0] != 1) {
1829 		free(used, M_DEVBUF);
1830 		return (EINVAL);
1831 	}
1832 
1833 	/* Make sure none of the resources are allocated. */
1834 	for (i = 0; i < msix->msix_table_len; i++) {
1835 		if (msix->msix_table[i].mte_vector == 0)
1836 			continue;
1837 		if (msix->msix_table[i].mte_handlers > 0) {
1838 			free(used, M_DEVBUF);
1839 			return (EBUSY);
1840 		}
1841 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1842 		KASSERT(rle != NULL, ("missing resource"));
1843 		if (rle->res != NULL) {
1844 			free(used, M_DEVBUF);
1845 			return (EBUSY);
1846 		}
1847 	}
1848 
1849 	/* Free the existing resource list entries. */
1850 	for (i = 0; i < msix->msix_table_len; i++) {
1851 		if (msix->msix_table[i].mte_vector == 0)
1852 			continue;
1853 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1854 	}
1855 
1856 	/*
1857 	 * Build the new virtual table keeping track of which vectors are
1858 	 * used.
1859 	 */
1860 	free(msix->msix_table, M_DEVBUF);
1861 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1862 	    M_DEVBUF, M_WAITOK | M_ZERO);
1863 	for (i = 0; i < count; i++)
1864 		msix->msix_table[i].mte_vector = vectors[i];
1865 	msix->msix_table_len = count;
1866 
1867 	/* Free any unused IRQs and resize the vectors array if necessary. */
1868 	j = msix->msix_alloc - 1;
1869 	if (used[j] == 0) {
1870 		struct msix_vector *vec;
1871 
1872 		while (used[j] == 0) {
1873 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1874 			    msix->msix_vectors[j].mv_irq);
1875 			j--;
1876 		}
1877 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1878 		    M_WAITOK);
1879 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1880 		    (j + 1));
1881 		free(msix->msix_vectors, M_DEVBUF);
1882 		msix->msix_vectors = vec;
1883 		msix->msix_alloc = j + 1;
1884 	}
1885 	free(used, M_DEVBUF);
1886 
1887 	/* Map the IRQs onto the rids. */
1888 	for (i = 0; i < count; i++) {
1889 		if (vectors[i] == 0)
1890 			continue;
1891 		irq = msix->msix_vectors[vectors[i] - 1].mv_irq;
1892 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1893 		    irq, 1);
1894 	}
1895 
1896 	if (bootverbose) {
1897 		device_printf(child, "Remapped MSI-X IRQs as: ");
1898 		for (i = 0; i < count; i++) {
1899 			if (i != 0)
1900 				printf(", ");
1901 			if (vectors[i] == 0)
1902 				printf("---");
1903 			else
1904 				printf("%d",
1905 				    msix->msix_vectors[vectors[i] - 1].mv_irq);
1906 		}
1907 		printf("\n");
1908 	}
1909 
1910 	return (0);
1911 }
1912 
1913 static int
1914 pci_release_msix(device_t dev, device_t child)
1915 {
1916 	struct pci_devinfo *dinfo = device_get_ivars(child);
1917 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1918 	struct resource_list_entry *rle;
1919 	int i;
1920 
1921 	/* Do we have any messages to release? */
1922 	if (msix->msix_alloc == 0)
1923 		return (ENODEV);
1924 
1925 	/* Make sure none of the resources are allocated. */
1926 	for (i = 0; i < msix->msix_table_len; i++) {
1927 		if (msix->msix_table[i].mte_vector == 0)
1928 			continue;
1929 		if (msix->msix_table[i].mte_handlers > 0)
1930 			return (EBUSY);
1931 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1932 		KASSERT(rle != NULL, ("missing resource"));
1933 		if (rle->res != NULL)
1934 			return (EBUSY);
1935 	}
1936 
1937 	/* Update control register to disable MSI-X. */
1938 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1939 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1940 	    msix->msix_ctrl, 2);
1941 
1942 	/* Free the resource list entries. */
1943 	for (i = 0; i < msix->msix_table_len; i++) {
1944 		if (msix->msix_table[i].mte_vector == 0)
1945 			continue;
1946 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1947 	}
1948 	free(msix->msix_table, M_DEVBUF);
1949 	msix->msix_table_len = 0;
1950 
1951 	/* Release the IRQs. */
1952 	for (i = 0; i < msix->msix_alloc; i++)
1953 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1954 		    msix->msix_vectors[i].mv_irq);
1955 	free(msix->msix_vectors, M_DEVBUF);
1956 	msix->msix_alloc = 0;
1957 	return (0);
1958 }
1959 
1960 /*
1961  * Return the max supported MSI-X messages this device supports.
1962  * Basically, assuming the MD code can alloc messages, this function
1963  * should return the maximum value that pci_alloc_msix() can return.
1964  * Thus, it is subject to the tunables, etc.
1965  */
1966 int
1967 pci_msix_count_method(device_t dev, device_t child)
1968 {
1969 	struct pci_devinfo *dinfo = device_get_ivars(child);
1970 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1971 
1972 	if (pci_do_msix && msix->msix_location != 0)
1973 		return (msix->msix_msgnum);
1974 	return (0);
1975 }
1976 
1977 int
1978 pci_msix_pba_bar_method(device_t dev, device_t child)
1979 {
1980 	struct pci_devinfo *dinfo = device_get_ivars(child);
1981 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1982 
1983 	if (pci_do_msix && msix->msix_location != 0)
1984 		return (msix->msix_pba_bar);
1985 	return (-1);
1986 }
1987 
1988 int
1989 pci_msix_table_bar_method(device_t dev, device_t child)
1990 {
1991 	struct pci_devinfo *dinfo = device_get_ivars(child);
1992 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1993 
1994 	if (pci_do_msix && msix->msix_location != 0)
1995 		return (msix->msix_table_bar);
1996 	return (-1);
1997 }
1998 
1999 /*
2000  * HyperTransport MSI mapping control
2001  */
2002 void
2003 pci_ht_map_msi(device_t dev, uint64_t addr)
2004 {
2005 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2006 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
2007 
2008 	if (!ht->ht_msimap)
2009 		return;
2010 
2011 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
2012 	    ht->ht_msiaddr >> 20 == addr >> 20) {
2013 		/* Enable MSI -> HT mapping. */
2014 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
2015 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
2016 		    ht->ht_msictrl, 2);
2017 	}
2018 
2019 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
2020 		/* Disable MSI -> HT mapping. */
2021 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
2022 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
2023 		    ht->ht_msictrl, 2);
2024 	}
2025 }
2026 
2027 int
2028 pci_get_max_payload(device_t dev)
2029 {
2030 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2031 	int cap;
2032 	uint16_t val;
2033 
2034 	cap = dinfo->cfg.pcie.pcie_location;
2035 	if (cap == 0)
2036 		return (0);
2037 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2038 	val &= PCIEM_CTL_MAX_PAYLOAD;
2039 	val >>= 5;
2040 	return (1 << (val + 7));
2041 }
2042 
2043 int
2044 pci_get_max_read_req(device_t dev)
2045 {
2046 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2047 	int cap;
2048 	uint16_t val;
2049 
2050 	cap = dinfo->cfg.pcie.pcie_location;
2051 	if (cap == 0)
2052 		return (0);
2053 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2054 	val &= PCIEM_CTL_MAX_READ_REQUEST;
2055 	val >>= 12;
2056 	return (1 << (val + 7));
2057 }
2058 
2059 int
2060 pci_set_max_read_req(device_t dev, int size)
2061 {
2062 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2063 	int cap;
2064 	uint16_t val;
2065 
2066 	cap = dinfo->cfg.pcie.pcie_location;
2067 	if (cap == 0)
2068 		return (0);
2069 	if (size < 128)
2070 		size = 128;
2071 	if (size > 4096)
2072 		size = 4096;
2073 	size = (1 << (fls(size) - 1));
2074 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2075 	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
2076 	val |= (fls(size) - 8) << 12;
2077 	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
2078 	return (size);
2079 }
2080 
2081 uint32_t
2082 pcie_read_config(device_t dev, int reg, int width)
2083 {
2084 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2085 	int cap;
2086 
2087 	cap = dinfo->cfg.pcie.pcie_location;
2088 	if (cap == 0) {
2089 		if (width == 2)
2090 			return (0xffff);
2091 		return (0xffffffff);
2092 	}
2093 
2094 	return (pci_read_config(dev, cap + reg, width));
2095 }
2096 
2097 void
2098 pcie_write_config(device_t dev, int reg, uint32_t value, int width)
2099 {
2100 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2101 	int cap;
2102 
2103 	cap = dinfo->cfg.pcie.pcie_location;
2104 	if (cap == 0)
2105 		return;
2106 	pci_write_config(dev, cap + reg, value, width);
2107 }
2108 
2109 /*
2110  * Adjusts a PCI-e capability register by clearing the bits in mask
2111  * and setting the bits in (value & mask).  Bits not set in mask are
2112  * not adjusted.
2113  *
2114  * Returns the old value on success or all ones on failure.
2115  */
2116 uint32_t
2117 pcie_adjust_config(device_t dev, int reg, uint32_t mask, uint32_t value,
2118     int width)
2119 {
2120 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2121 	uint32_t old, new;
2122 	int cap;
2123 
2124 	cap = dinfo->cfg.pcie.pcie_location;
2125 	if (cap == 0) {
2126 		if (width == 2)
2127 			return (0xffff);
2128 		return (0xffffffff);
2129 	}
2130 
2131 	old = pci_read_config(dev, cap + reg, width);
2132 	new = old & ~mask;
2133 	new |= (value & mask);
2134 	pci_write_config(dev, cap + reg, new, width);
2135 	return (old);
2136 }
2137 
2138 /*
2139  * Support for MSI message signalled interrupts.
2140  */
2141 void
2142 pci_enable_msi_method(device_t dev, device_t child, uint64_t address,
2143     uint16_t data)
2144 {
2145 	struct pci_devinfo *dinfo = device_get_ivars(child);
2146 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2147 
2148 	/* Write data and address values. */
2149 	pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR,
2150 	    address & 0xffffffff, 4);
2151 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2152 		pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR_HIGH,
2153 		    address >> 32, 4);
2154 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA_64BIT,
2155 		    data, 2);
2156 	} else
2157 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA, data,
2158 		    2);
2159 
2160 	/* Enable MSI in the control register. */
2161 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
2162 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2163 	    msi->msi_ctrl, 2);
2164 
2165 	/* Enable MSI -> HT mapping. */
2166 	pci_ht_map_msi(child, address);
2167 }
2168 
2169 void
2170 pci_disable_msi_method(device_t dev, device_t child)
2171 {
2172 	struct pci_devinfo *dinfo = device_get_ivars(child);
2173 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2174 
2175 	/* Disable MSI -> HT mapping. */
2176 	pci_ht_map_msi(child, 0);
2177 
2178 	/* Disable MSI in the control register. */
2179 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
2180 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2181 	    msi->msi_ctrl, 2);
2182 }
2183 
2184 /*
2185  * Restore MSI registers during resume.  If MSI is enabled then
2186  * restore the data and address registers in addition to the control
2187  * register.
2188  */
2189 static void
2190 pci_resume_msi(device_t dev)
2191 {
2192 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2193 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2194 	uint64_t address;
2195 	uint16_t data;
2196 
2197 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
2198 		address = msi->msi_addr;
2199 		data = msi->msi_data;
2200 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
2201 		    address & 0xffffffff, 4);
2202 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2203 			pci_write_config(dev, msi->msi_location +
2204 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
2205 			pci_write_config(dev, msi->msi_location +
2206 			    PCIR_MSI_DATA_64BIT, data, 2);
2207 		} else
2208 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
2209 			    data, 2);
2210 	}
2211 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
2212 	    2);
2213 }
2214 
2215 static int
2216 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
2217 {
2218 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2219 	pcicfgregs *cfg = &dinfo->cfg;
2220 	struct resource_list_entry *rle;
2221 	struct msix_table_entry *mte;
2222 	struct msix_vector *mv;
2223 	uint64_t addr;
2224 	uint32_t data;
2225 	int error, i, j;
2226 
2227 	/*
2228 	 * Handle MSI first.  We try to find this IRQ among our list
2229 	 * of MSI IRQs.  If we find it, we request updated address and
2230 	 * data registers and apply the results.
2231 	 */
2232 	if (cfg->msi.msi_alloc > 0) {
2233 
2234 		/* If we don't have any active handlers, nothing to do. */
2235 		if (cfg->msi.msi_handlers == 0)
2236 			return (0);
2237 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
2238 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
2239 			    i + 1);
2240 			if (rle->start == irq) {
2241 				error = PCIB_MAP_MSI(device_get_parent(bus),
2242 				    dev, irq, &addr, &data);
2243 				if (error)
2244 					return (error);
2245 				pci_disable_msi(dev);
2246 				dinfo->cfg.msi.msi_addr = addr;
2247 				dinfo->cfg.msi.msi_data = data;
2248 				pci_enable_msi(dev, addr, data);
2249 				return (0);
2250 			}
2251 		}
2252 		return (ENOENT);
2253 	}
2254 
2255 	/*
2256 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
2257 	 * we request the updated mapping info.  If that works, we go
2258 	 * through all the slots that use this IRQ and update them.
2259 	 */
2260 	if (cfg->msix.msix_alloc > 0) {
2261 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
2262 			mv = &cfg->msix.msix_vectors[i];
2263 			if (mv->mv_irq == irq) {
2264 				error = PCIB_MAP_MSI(device_get_parent(bus),
2265 				    dev, irq, &addr, &data);
2266 				if (error)
2267 					return (error);
2268 				mv->mv_address = addr;
2269 				mv->mv_data = data;
2270 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
2271 					mte = &cfg->msix.msix_table[j];
2272 					if (mte->mte_vector != i + 1)
2273 						continue;
2274 					if (mte->mte_handlers == 0)
2275 						continue;
2276 					pci_mask_msix(dev, j);
2277 					pci_enable_msix(dev, j, addr, data);
2278 					pci_unmask_msix(dev, j);
2279 				}
2280 			}
2281 		}
2282 		return (ENOENT);
2283 	}
2284 
2285 	return (ENOENT);
2286 }
2287 
2288 /*
2289  * Returns true if the specified device is blacklisted because MSI
2290  * doesn't work.
2291  */
2292 int
2293 pci_msi_device_blacklisted(device_t dev)
2294 {
2295 
2296 	if (!pci_honor_msi_blacklist)
2297 		return (0);
2298 
2299 	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2300 }
2301 
2302 /*
2303  * Determine if MSI is blacklisted globally on this system.  Currently,
2304  * we just check for blacklisted chipsets as represented by the
2305  * host-PCI bridge at device 0:0:0.  In the future, it may become
2306  * necessary to check other system attributes, such as the kenv values
2307  * that give the motherboard manufacturer and model number.
2308  */
2309 static int
2310 pci_msi_blacklisted(void)
2311 {
2312 	device_t dev;
2313 
2314 	if (!pci_honor_msi_blacklist)
2315 		return (0);
2316 
2317 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2318 	if (!(pcie_chipset || pcix_chipset)) {
2319 		if (vm_guest != VM_GUEST_NO) {
2320 			/*
2321 			 * Whitelist older chipsets in virtual
2322 			 * machines known to support MSI.
2323 			 */
2324 			dev = pci_find_bsf(0, 0, 0);
2325 			if (dev != NULL)
2326 				return (!pci_has_quirk(pci_get_devid(dev),
2327 					PCI_QUIRK_ENABLE_MSI_VM));
2328 		}
2329 		return (1);
2330 	}
2331 
2332 	dev = pci_find_bsf(0, 0, 0);
2333 	if (dev != NULL)
2334 		return (pci_msi_device_blacklisted(dev));
2335 	return (0);
2336 }
2337 
2338 /*
2339  * Returns true if the specified device is blacklisted because MSI-X
2340  * doesn't work.  Note that this assumes that if MSI doesn't work,
2341  * MSI-X doesn't either.
2342  */
2343 int
2344 pci_msix_device_blacklisted(device_t dev)
2345 {
2346 
2347 	if (!pci_honor_msi_blacklist)
2348 		return (0);
2349 
2350 	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2351 		return (1);
2352 
2353 	return (pci_msi_device_blacklisted(dev));
2354 }
2355 
2356 /*
2357  * Determine if MSI-X is blacklisted globally on this system.  If MSI
2358  * is blacklisted, assume that MSI-X is as well.  Check for additional
2359  * chipsets where MSI works but MSI-X does not.
2360  */
2361 static int
2362 pci_msix_blacklisted(void)
2363 {
2364 	device_t dev;
2365 
2366 	if (!pci_honor_msi_blacklist)
2367 		return (0);
2368 
2369 	dev = pci_find_bsf(0, 0, 0);
2370 	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2371 	    PCI_QUIRK_DISABLE_MSIX))
2372 		return (1);
2373 
2374 	return (pci_msi_blacklisted());
2375 }
2376 
2377 /*
2378  * Attempt to allocate *count MSI messages.  The actual number allocated is
2379  * returned in *count.  After this function returns, each message will be
2380  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2381  */
2382 int
2383 pci_alloc_msi_method(device_t dev, device_t child, int *count)
2384 {
2385 	struct pci_devinfo *dinfo = device_get_ivars(child);
2386 	pcicfgregs *cfg = &dinfo->cfg;
2387 	struct resource_list_entry *rle;
2388 	int actual, error, i, irqs[32];
2389 	uint16_t ctrl;
2390 
2391 	/* Don't let count == 0 get us into trouble. */
2392 	if (*count == 0)
2393 		return (EINVAL);
2394 
2395 	/* If rid 0 is allocated, then fail. */
2396 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2397 	if (rle != NULL && rle->res != NULL)
2398 		return (ENXIO);
2399 
2400 	/* Already have allocated messages? */
2401 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2402 		return (ENXIO);
2403 
2404 	/* If MSI is blacklisted for this system, fail. */
2405 	if (pci_msi_blacklisted())
2406 		return (ENXIO);
2407 
2408 	/* MSI capability present? */
2409 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2410 		return (ENODEV);
2411 
2412 	if (bootverbose)
2413 		device_printf(child,
2414 		    "attempting to allocate %d MSI vectors (%d supported)\n",
2415 		    *count, cfg->msi.msi_msgnum);
2416 
2417 	/* Don't ask for more than the device supports. */
2418 	actual = min(*count, cfg->msi.msi_msgnum);
2419 
2420 	/* Don't ask for more than 32 messages. */
2421 	actual = min(actual, 32);
2422 
2423 	/* MSI requires power of 2 number of messages. */
2424 	if (!powerof2(actual))
2425 		return (EINVAL);
2426 
2427 	for (;;) {
2428 		/* Try to allocate N messages. */
2429 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2430 		    actual, irqs);
2431 		if (error == 0)
2432 			break;
2433 		if (actual == 1)
2434 			return (error);
2435 
2436 		/* Try N / 2. */
2437 		actual >>= 1;
2438 	}
2439 
2440 	/*
2441 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2442 	 * resources in the irqs[] array, so add new resources
2443 	 * starting at rid 1.
2444 	 */
2445 	for (i = 0; i < actual; i++)
2446 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2447 		    irqs[i], irqs[i], 1);
2448 
2449 	if (bootverbose) {
2450 		if (actual == 1)
2451 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2452 		else {
2453 			int run;
2454 
2455 			/*
2456 			 * Be fancy and try to print contiguous runs
2457 			 * of IRQ values as ranges.  'run' is true if
2458 			 * we are in a range.
2459 			 */
2460 			device_printf(child, "using IRQs %d", irqs[0]);
2461 			run = 0;
2462 			for (i = 1; i < actual; i++) {
2463 
2464 				/* Still in a run? */
2465 				if (irqs[i] == irqs[i - 1] + 1) {
2466 					run = 1;
2467 					continue;
2468 				}
2469 
2470 				/* Finish previous range. */
2471 				if (run) {
2472 					printf("-%d", irqs[i - 1]);
2473 					run = 0;
2474 				}
2475 
2476 				/* Start new range. */
2477 				printf(",%d", irqs[i]);
2478 			}
2479 
2480 			/* Unfinished range? */
2481 			if (run)
2482 				printf("-%d", irqs[actual - 1]);
2483 			printf(" for MSI\n");
2484 		}
2485 	}
2486 
2487 	/* Update control register with actual count. */
2488 	ctrl = cfg->msi.msi_ctrl;
2489 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2490 	ctrl |= (ffs(actual) - 1) << 4;
2491 	cfg->msi.msi_ctrl = ctrl;
2492 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2493 
2494 	/* Update counts of alloc'd messages. */
2495 	cfg->msi.msi_alloc = actual;
2496 	cfg->msi.msi_handlers = 0;
2497 	*count = actual;
2498 	return (0);
2499 }
2500 
2501 /* Release the MSI messages associated with this device. */
2502 int
2503 pci_release_msi_method(device_t dev, device_t child)
2504 {
2505 	struct pci_devinfo *dinfo = device_get_ivars(child);
2506 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2507 	struct resource_list_entry *rle;
2508 	int error, i, irqs[32];
2509 
2510 	/* Try MSI-X first. */
2511 	error = pci_release_msix(dev, child);
2512 	if (error != ENODEV)
2513 		return (error);
2514 
2515 	/* Do we have any messages to release? */
2516 	if (msi->msi_alloc == 0)
2517 		return (ENODEV);
2518 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2519 
2520 	/* Make sure none of the resources are allocated. */
2521 	if (msi->msi_handlers > 0)
2522 		return (EBUSY);
2523 	for (i = 0; i < msi->msi_alloc; i++) {
2524 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2525 		KASSERT(rle != NULL, ("missing MSI resource"));
2526 		if (rle->res != NULL)
2527 			return (EBUSY);
2528 		irqs[i] = rle->start;
2529 	}
2530 
2531 	/* Update control register with 0 count. */
2532 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2533 	    ("%s: MSI still enabled", __func__));
2534 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2535 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2536 	    msi->msi_ctrl, 2);
2537 
2538 	/* Release the messages. */
2539 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2540 	for (i = 0; i < msi->msi_alloc; i++)
2541 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2542 
2543 	/* Update alloc count. */
2544 	msi->msi_alloc = 0;
2545 	msi->msi_addr = 0;
2546 	msi->msi_data = 0;
2547 	return (0);
2548 }
2549 
2550 /*
2551  * Return the max supported MSI messages this device supports.
2552  * Basically, assuming the MD code can alloc messages, this function
2553  * should return the maximum value that pci_alloc_msi() can return.
2554  * Thus, it is subject to the tunables, etc.
2555  */
2556 int
2557 pci_msi_count_method(device_t dev, device_t child)
2558 {
2559 	struct pci_devinfo *dinfo = device_get_ivars(child);
2560 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2561 
2562 	if (pci_do_msi && msi->msi_location != 0)
2563 		return (msi->msi_msgnum);
2564 	return (0);
2565 }
2566 
2567 /* free pcicfgregs structure and all depending data structures */
2568 
2569 int
2570 pci_freecfg(struct pci_devinfo *dinfo)
2571 {
2572 	struct devlist *devlist_head;
2573 	struct pci_map *pm, *next;
2574 	int i;
2575 
2576 	devlist_head = &pci_devq;
2577 
2578 	if (dinfo->cfg.vpd.vpd_reg) {
2579 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2580 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2581 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2582 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2583 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2584 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2585 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2586 	}
2587 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2588 		free(pm, M_DEVBUF);
2589 	}
2590 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2591 	free(dinfo, M_DEVBUF);
2592 
2593 	/* increment the generation count */
2594 	pci_generation++;
2595 
2596 	/* we're losing one device */
2597 	pci_numdevs--;
2598 	return (0);
2599 }
2600 
2601 /*
2602  * PCI power manangement
2603  */
2604 int
2605 pci_set_powerstate_method(device_t dev, device_t child, int state)
2606 {
2607 	struct pci_devinfo *dinfo = device_get_ivars(child);
2608 	pcicfgregs *cfg = &dinfo->cfg;
2609 	uint16_t status;
2610 	int oldstate, highest, delay;
2611 
2612 	if (cfg->pp.pp_cap == 0)
2613 		return (EOPNOTSUPP);
2614 
2615 	/*
2616 	 * Optimize a no state change request away.  While it would be OK to
2617 	 * write to the hardware in theory, some devices have shown odd
2618 	 * behavior when going from D3 -> D3.
2619 	 */
2620 	oldstate = pci_get_powerstate(child);
2621 	if (oldstate == state)
2622 		return (0);
2623 
2624 	/*
2625 	 * The PCI power management specification states that after a state
2626 	 * transition between PCI power states, system software must
2627 	 * guarantee a minimal delay before the function accesses the device.
2628 	 * Compute the worst case delay that we need to guarantee before we
2629 	 * access the device.  Many devices will be responsive much more
2630 	 * quickly than this delay, but there are some that don't respond
2631 	 * instantly to state changes.  Transitions to/from D3 state require
2632 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2633 	 * is done below with DELAY rather than a sleeper function because
2634 	 * this function can be called from contexts where we cannot sleep.
2635 	 */
2636 	highest = (oldstate > state) ? oldstate : state;
2637 	if (highest == PCI_POWERSTATE_D3)
2638 	    delay = 10000;
2639 	else if (highest == PCI_POWERSTATE_D2)
2640 	    delay = 200;
2641 	else
2642 	    delay = 0;
2643 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2644 	    & ~PCIM_PSTAT_DMASK;
2645 	switch (state) {
2646 	case PCI_POWERSTATE_D0:
2647 		status |= PCIM_PSTAT_D0;
2648 		break;
2649 	case PCI_POWERSTATE_D1:
2650 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2651 			return (EOPNOTSUPP);
2652 		status |= PCIM_PSTAT_D1;
2653 		break;
2654 	case PCI_POWERSTATE_D2:
2655 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2656 			return (EOPNOTSUPP);
2657 		status |= PCIM_PSTAT_D2;
2658 		break;
2659 	case PCI_POWERSTATE_D3:
2660 		status |= PCIM_PSTAT_D3;
2661 		break;
2662 	default:
2663 		return (EINVAL);
2664 	}
2665 
2666 	if (bootverbose)
2667 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2668 		    state);
2669 
2670 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2671 	if (delay)
2672 		DELAY(delay);
2673 	return (0);
2674 }
2675 
2676 int
2677 pci_get_powerstate_method(device_t dev, device_t child)
2678 {
2679 	struct pci_devinfo *dinfo = device_get_ivars(child);
2680 	pcicfgregs *cfg = &dinfo->cfg;
2681 	uint16_t status;
2682 	int result;
2683 
2684 	if (cfg->pp.pp_cap != 0) {
2685 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2686 		switch (status & PCIM_PSTAT_DMASK) {
2687 		case PCIM_PSTAT_D0:
2688 			result = PCI_POWERSTATE_D0;
2689 			break;
2690 		case PCIM_PSTAT_D1:
2691 			result = PCI_POWERSTATE_D1;
2692 			break;
2693 		case PCIM_PSTAT_D2:
2694 			result = PCI_POWERSTATE_D2;
2695 			break;
2696 		case PCIM_PSTAT_D3:
2697 			result = PCI_POWERSTATE_D3;
2698 			break;
2699 		default:
2700 			result = PCI_POWERSTATE_UNKNOWN;
2701 			break;
2702 		}
2703 	} else {
2704 		/* No support, device is always at D0 */
2705 		result = PCI_POWERSTATE_D0;
2706 	}
2707 	return (result);
2708 }
2709 
2710 /*
2711  * Some convenience functions for PCI device drivers.
2712  */
2713 
2714 static __inline void
2715 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2716 {
2717 	uint16_t	command;
2718 
2719 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2720 	command |= bit;
2721 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2722 }
2723 
2724 static __inline void
2725 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2726 {
2727 	uint16_t	command;
2728 
2729 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2730 	command &= ~bit;
2731 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2732 }
2733 
2734 int
2735 pci_enable_busmaster_method(device_t dev, device_t child)
2736 {
2737 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2738 	return (0);
2739 }
2740 
2741 int
2742 pci_disable_busmaster_method(device_t dev, device_t child)
2743 {
2744 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2745 	return (0);
2746 }
2747 
2748 int
2749 pci_enable_io_method(device_t dev, device_t child, int space)
2750 {
2751 	uint16_t bit;
2752 
2753 	switch(space) {
2754 	case SYS_RES_IOPORT:
2755 		bit = PCIM_CMD_PORTEN;
2756 		break;
2757 	case SYS_RES_MEMORY:
2758 		bit = PCIM_CMD_MEMEN;
2759 		break;
2760 	default:
2761 		return (EINVAL);
2762 	}
2763 	pci_set_command_bit(dev, child, bit);
2764 	return (0);
2765 }
2766 
2767 int
2768 pci_disable_io_method(device_t dev, device_t child, int space)
2769 {
2770 	uint16_t bit;
2771 
2772 	switch(space) {
2773 	case SYS_RES_IOPORT:
2774 		bit = PCIM_CMD_PORTEN;
2775 		break;
2776 	case SYS_RES_MEMORY:
2777 		bit = PCIM_CMD_MEMEN;
2778 		break;
2779 	default:
2780 		return (EINVAL);
2781 	}
2782 	pci_clear_command_bit(dev, child, bit);
2783 	return (0);
2784 }
2785 
2786 /*
2787  * New style pci driver.  Parent device is either a pci-host-bridge or a
2788  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2789  */
2790 
2791 void
2792 pci_print_verbose(struct pci_devinfo *dinfo)
2793 {
2794 
2795 	if (bootverbose) {
2796 		pcicfgregs *cfg = &dinfo->cfg;
2797 
2798 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2799 		    cfg->vendor, cfg->device, cfg->revid);
2800 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2801 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2802 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2803 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2804 		    cfg->mfdev);
2805 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2806 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2807 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2808 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2809 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2810 		if (cfg->intpin > 0)
2811 			printf("\tintpin=%c, irq=%d\n",
2812 			    cfg->intpin +'a' -1, cfg->intline);
2813 		if (cfg->pp.pp_cap) {
2814 			uint16_t status;
2815 
2816 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2817 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2818 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2819 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2820 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2821 			    status & PCIM_PSTAT_DMASK);
2822 		}
2823 		if (cfg->msi.msi_location) {
2824 			int ctrl;
2825 
2826 			ctrl = cfg->msi.msi_ctrl;
2827 			printf("\tMSI supports %d message%s%s%s\n",
2828 			    cfg->msi.msi_msgnum,
2829 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2830 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2831 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2832 		}
2833 		if (cfg->msix.msix_location) {
2834 			printf("\tMSI-X supports %d message%s ",
2835 			    cfg->msix.msix_msgnum,
2836 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2837 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2838 				printf("in map 0x%x\n",
2839 				    cfg->msix.msix_table_bar);
2840 			else
2841 				printf("in maps 0x%x and 0x%x\n",
2842 				    cfg->msix.msix_table_bar,
2843 				    cfg->msix.msix_pba_bar);
2844 		}
2845 	}
2846 }
2847 
2848 static int
2849 pci_porten(device_t dev)
2850 {
2851 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2852 }
2853 
2854 static int
2855 pci_memen(device_t dev)
2856 {
2857 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2858 }
2859 
2860 void
2861 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp,
2862     int *bar64)
2863 {
2864 	struct pci_devinfo *dinfo;
2865 	pci_addr_t map, testval;
2866 	int ln2range;
2867 	uint16_t cmd;
2868 
2869 	/*
2870 	 * The device ROM BAR is special.  It is always a 32-bit
2871 	 * memory BAR.  Bit 0 is special and should not be set when
2872 	 * sizing the BAR.
2873 	 */
2874 	dinfo = device_get_ivars(dev);
2875 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2876 		map = pci_read_config(dev, reg, 4);
2877 		pci_write_config(dev, reg, 0xfffffffe, 4);
2878 		testval = pci_read_config(dev, reg, 4);
2879 		pci_write_config(dev, reg, map, 4);
2880 		*mapp = map;
2881 		*testvalp = testval;
2882 		if (bar64 != NULL)
2883 			*bar64 = 0;
2884 		return;
2885 	}
2886 
2887 	map = pci_read_config(dev, reg, 4);
2888 	ln2range = pci_maprange(map);
2889 	if (ln2range == 64)
2890 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2891 
2892 	/*
2893 	 * Disable decoding via the command register before
2894 	 * determining the BAR's length since we will be placing it in
2895 	 * a weird state.
2896 	 */
2897 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2898 	pci_write_config(dev, PCIR_COMMAND,
2899 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2900 
2901 	/*
2902 	 * Determine the BAR's length by writing all 1's.  The bottom
2903 	 * log_2(size) bits of the BAR will stick as 0 when we read
2904 	 * the value back.
2905 	 */
2906 	pci_write_config(dev, reg, 0xffffffff, 4);
2907 	testval = pci_read_config(dev, reg, 4);
2908 	if (ln2range == 64) {
2909 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2910 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2911 	}
2912 
2913 	/*
2914 	 * Restore the original value of the BAR.  We may have reprogrammed
2915 	 * the BAR of the low-level console device and when booting verbose,
2916 	 * we need the console device addressable.
2917 	 */
2918 	pci_write_config(dev, reg, map, 4);
2919 	if (ln2range == 64)
2920 		pci_write_config(dev, reg + 4, map >> 32, 4);
2921 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2922 
2923 	*mapp = map;
2924 	*testvalp = testval;
2925 	if (bar64 != NULL)
2926 		*bar64 = (ln2range == 64);
2927 }
2928 
2929 static void
2930 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2931 {
2932 	struct pci_devinfo *dinfo;
2933 	int ln2range;
2934 
2935 	/* The device ROM BAR is always a 32-bit memory BAR. */
2936 	dinfo = device_get_ivars(dev);
2937 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2938 		ln2range = 32;
2939 	else
2940 		ln2range = pci_maprange(pm->pm_value);
2941 	pci_write_config(dev, pm->pm_reg, base, 4);
2942 	if (ln2range == 64)
2943 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2944 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2945 	if (ln2range == 64)
2946 		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2947 		    pm->pm_reg + 4, 4) << 32;
2948 }
2949 
2950 struct pci_map *
2951 pci_find_bar(device_t dev, int reg)
2952 {
2953 	struct pci_devinfo *dinfo;
2954 	struct pci_map *pm;
2955 
2956 	dinfo = device_get_ivars(dev);
2957 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2958 		if (pm->pm_reg == reg)
2959 			return (pm);
2960 	}
2961 	return (NULL);
2962 }
2963 
2964 int
2965 pci_bar_enabled(device_t dev, struct pci_map *pm)
2966 {
2967 	struct pci_devinfo *dinfo;
2968 	uint16_t cmd;
2969 
2970 	dinfo = device_get_ivars(dev);
2971 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2972 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2973 		return (0);
2974 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2975 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2976 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2977 	else
2978 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2979 }
2980 
2981 struct pci_map *
2982 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2983 {
2984 	struct pci_devinfo *dinfo;
2985 	struct pci_map *pm, *prev;
2986 
2987 	dinfo = device_get_ivars(dev);
2988 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2989 	pm->pm_reg = reg;
2990 	pm->pm_value = value;
2991 	pm->pm_size = size;
2992 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2993 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2994 		    reg));
2995 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2996 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2997 			break;
2998 	}
2999 	if (prev != NULL)
3000 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
3001 	else
3002 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
3003 	return (pm);
3004 }
3005 
3006 static void
3007 pci_restore_bars(device_t dev)
3008 {
3009 	struct pci_devinfo *dinfo;
3010 	struct pci_map *pm;
3011 	int ln2range;
3012 
3013 	dinfo = device_get_ivars(dev);
3014 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
3015 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
3016 			ln2range = 32;
3017 		else
3018 			ln2range = pci_maprange(pm->pm_value);
3019 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
3020 		if (ln2range == 64)
3021 			pci_write_config(dev, pm->pm_reg + 4,
3022 			    pm->pm_value >> 32, 4);
3023 	}
3024 }
3025 
3026 /*
3027  * Add a resource based on a pci map register. Return 1 if the map
3028  * register is a 32bit map register or 2 if it is a 64bit register.
3029  */
3030 static int
3031 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
3032     int force, int prefetch)
3033 {
3034 	struct pci_map *pm;
3035 	pci_addr_t base, map, testval;
3036 	pci_addr_t start, end, count;
3037 	int barlen, basezero, flags, maprange, mapsize, type;
3038 	uint16_t cmd;
3039 	struct resource *res;
3040 
3041 	/*
3042 	 * The BAR may already exist if the device is a CardBus card
3043 	 * whose CIS is stored in this BAR.
3044 	 */
3045 	pm = pci_find_bar(dev, reg);
3046 	if (pm != NULL) {
3047 		maprange = pci_maprange(pm->pm_value);
3048 		barlen = maprange == 64 ? 2 : 1;
3049 		return (barlen);
3050 	}
3051 
3052 	pci_read_bar(dev, reg, &map, &testval, NULL);
3053 	if (PCI_BAR_MEM(map)) {
3054 		type = SYS_RES_MEMORY;
3055 		if (map & PCIM_BAR_MEM_PREFETCH)
3056 			prefetch = 1;
3057 	} else
3058 		type = SYS_RES_IOPORT;
3059 	mapsize = pci_mapsize(testval);
3060 	base = pci_mapbase(map);
3061 #ifdef __PCI_BAR_ZERO_VALID
3062 	basezero = 0;
3063 #else
3064 	basezero = base == 0;
3065 #endif
3066 	maprange = pci_maprange(map);
3067 	barlen = maprange == 64 ? 2 : 1;
3068 
3069 	/*
3070 	 * For I/O registers, if bottom bit is set, and the next bit up
3071 	 * isn't clear, we know we have a BAR that doesn't conform to the
3072 	 * spec, so ignore it.  Also, sanity check the size of the data
3073 	 * areas to the type of memory involved.  Memory must be at least
3074 	 * 16 bytes in size, while I/O ranges must be at least 4.
3075 	 */
3076 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
3077 		return (barlen);
3078 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
3079 	    (type == SYS_RES_IOPORT && mapsize < 2))
3080 		return (barlen);
3081 
3082 	/* Save a record of this BAR. */
3083 	pm = pci_add_bar(dev, reg, map, mapsize);
3084 	if (bootverbose) {
3085 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
3086 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
3087 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
3088 			printf(", port disabled\n");
3089 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
3090 			printf(", memory disabled\n");
3091 		else
3092 			printf(", enabled\n");
3093 	}
3094 
3095 	/*
3096 	 * If base is 0, then we have problems if this architecture does
3097 	 * not allow that.  It is best to ignore such entries for the
3098 	 * moment.  These will be allocated later if the driver specifically
3099 	 * requests them.  However, some removable buses look better when
3100 	 * all resources are allocated, so allow '0' to be overriden.
3101 	 *
3102 	 * Similarly treat maps whose values is the same as the test value
3103 	 * read back.  These maps have had all f's written to them by the
3104 	 * BIOS in an attempt to disable the resources.
3105 	 */
3106 	if (!force && (basezero || map == testval))
3107 		return (barlen);
3108 	if ((u_long)base != base) {
3109 		device_printf(bus,
3110 		    "pci%d:%d:%d:%d bar %#x too many address bits",
3111 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
3112 		    pci_get_function(dev), reg);
3113 		return (barlen);
3114 	}
3115 
3116 	/*
3117 	 * This code theoretically does the right thing, but has
3118 	 * undesirable side effects in some cases where peripherals
3119 	 * respond oddly to having these bits enabled.  Let the user
3120 	 * be able to turn them off (since pci_enable_io_modes is 1 by
3121 	 * default).
3122 	 */
3123 	if (pci_enable_io_modes) {
3124 		/* Turn on resources that have been left off by a lazy BIOS */
3125 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
3126 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3127 			cmd |= PCIM_CMD_PORTEN;
3128 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3129 		}
3130 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
3131 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3132 			cmd |= PCIM_CMD_MEMEN;
3133 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3134 		}
3135 	} else {
3136 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
3137 			return (barlen);
3138 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
3139 			return (barlen);
3140 	}
3141 
3142 	count = (pci_addr_t)1 << mapsize;
3143 	flags = RF_ALIGNMENT_LOG2(mapsize);
3144 	if (prefetch)
3145 		flags |= RF_PREFETCHABLE;
3146 	if (basezero || base == pci_mapbase(testval) || pci_clear_bars) {
3147 		start = 0;	/* Let the parent decide. */
3148 		end = ~0;
3149 	} else {
3150 		start = base;
3151 		end = base + count - 1;
3152 	}
3153 	resource_list_add(rl, type, reg, start, end, count);
3154 
3155 	/*
3156 	 * Try to allocate the resource for this BAR from our parent
3157 	 * so that this resource range is already reserved.  The
3158 	 * driver for this device will later inherit this resource in
3159 	 * pci_alloc_resource().
3160 	 */
3161 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
3162 	    flags);
3163 	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0)) {
3164 		/*
3165 		 * If the allocation fails, try to allocate a resource for
3166 		 * this BAR using any available range.  The firmware felt
3167 		 * it was important enough to assign a resource, so don't
3168 		 * disable decoding if we can help it.
3169 		 */
3170 		resource_list_delete(rl, type, reg);
3171 		resource_list_add(rl, type, reg, 0, ~0, count);
3172 		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0,
3173 		    count, flags);
3174 	}
3175 	if (res == NULL) {
3176 		/*
3177 		 * If the allocation fails, delete the resource list entry
3178 		 * and disable decoding for this device.
3179 		 *
3180 		 * If the driver requests this resource in the future,
3181 		 * pci_reserve_map() will try to allocate a fresh
3182 		 * resource range.
3183 		 */
3184 		resource_list_delete(rl, type, reg);
3185 		pci_disable_io(dev, type);
3186 		if (bootverbose)
3187 			device_printf(bus,
3188 			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
3189 			    pci_get_domain(dev), pci_get_bus(dev),
3190 			    pci_get_slot(dev), pci_get_function(dev), reg);
3191 	} else {
3192 		start = rman_get_start(res);
3193 		pci_write_bar(dev, pm, start);
3194 	}
3195 	return (barlen);
3196 }
3197 
3198 /*
3199  * For ATA devices we need to decide early what addressing mode to use.
3200  * Legacy demands that the primary and secondary ATA ports sits on the
3201  * same addresses that old ISA hardware did. This dictates that we use
3202  * those addresses and ignore the BAR's if we cannot set PCI native
3203  * addressing mode.
3204  */
3205 static void
3206 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
3207     uint32_t prefetchmask)
3208 {
3209 	int rid, type, progif;
3210 #if 0
3211 	/* if this device supports PCI native addressing use it */
3212 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3213 	if ((progif & 0x8a) == 0x8a) {
3214 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
3215 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
3216 			printf("Trying ATA native PCI addressing mode\n");
3217 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
3218 		}
3219 	}
3220 #endif
3221 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3222 	type = SYS_RES_IOPORT;
3223 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
3224 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
3225 		    prefetchmask & (1 << 0));
3226 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
3227 		    prefetchmask & (1 << 1));
3228 	} else {
3229 		rid = PCIR_BAR(0);
3230 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
3231 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
3232 		    0x1f7, 8, 0);
3233 		rid = PCIR_BAR(1);
3234 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
3235 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
3236 		    0x3f6, 1, 0);
3237 	}
3238 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
3239 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
3240 		    prefetchmask & (1 << 2));
3241 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
3242 		    prefetchmask & (1 << 3));
3243 	} else {
3244 		rid = PCIR_BAR(2);
3245 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
3246 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
3247 		    0x177, 8, 0);
3248 		rid = PCIR_BAR(3);
3249 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
3250 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
3251 		    0x376, 1, 0);
3252 	}
3253 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
3254 	    prefetchmask & (1 << 4));
3255 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
3256 	    prefetchmask & (1 << 5));
3257 }
3258 
3259 static void
3260 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
3261 {
3262 	struct pci_devinfo *dinfo = device_get_ivars(dev);
3263 	pcicfgregs *cfg = &dinfo->cfg;
3264 	char tunable_name[64];
3265 	int irq;
3266 
3267 	/* Has to have an intpin to have an interrupt. */
3268 	if (cfg->intpin == 0)
3269 		return;
3270 
3271 	/* Let the user override the IRQ with a tunable. */
3272 	irq = PCI_INVALID_IRQ;
3273 	snprintf(tunable_name, sizeof(tunable_name),
3274 	    "hw.pci%d.%d.%d.INT%c.irq",
3275 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
3276 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
3277 		irq = PCI_INVALID_IRQ;
3278 
3279 	/*
3280 	 * If we didn't get an IRQ via the tunable, then we either use the
3281 	 * IRQ value in the intline register or we ask the bus to route an
3282 	 * interrupt for us.  If force_route is true, then we only use the
3283 	 * value in the intline register if the bus was unable to assign an
3284 	 * IRQ.
3285 	 */
3286 	if (!PCI_INTERRUPT_VALID(irq)) {
3287 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
3288 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
3289 		if (!PCI_INTERRUPT_VALID(irq))
3290 			irq = cfg->intline;
3291 	}
3292 
3293 	/* If after all that we don't have an IRQ, just bail. */
3294 	if (!PCI_INTERRUPT_VALID(irq))
3295 		return;
3296 
3297 	/* Update the config register if it changed. */
3298 	if (irq != cfg->intline) {
3299 		cfg->intline = irq;
3300 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
3301 	}
3302 
3303 	/* Add this IRQ as rid 0 interrupt resource. */
3304 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3305 }
3306 
3307 /* Perform early OHCI takeover from SMM. */
3308 static void
3309 ohci_early_takeover(device_t self)
3310 {
3311 	struct resource *res;
3312 	uint32_t ctl;
3313 	int rid;
3314 	int i;
3315 
3316 	rid = PCIR_BAR(0);
3317 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3318 	if (res == NULL)
3319 		return;
3320 
3321 	ctl = bus_read_4(res, OHCI_CONTROL);
3322 	if (ctl & OHCI_IR) {
3323 		if (bootverbose)
3324 			printf("ohci early: "
3325 			    "SMM active, request owner change\n");
3326 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3327 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3328 			DELAY(1000);
3329 			ctl = bus_read_4(res, OHCI_CONTROL);
3330 		}
3331 		if (ctl & OHCI_IR) {
3332 			if (bootverbose)
3333 				printf("ohci early: "
3334 				    "SMM does not respond, resetting\n");
3335 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3336 		}
3337 		/* Disable interrupts */
3338 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3339 	}
3340 
3341 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3342 }
3343 
3344 /* Perform early UHCI takeover from SMM. */
3345 static void
3346 uhci_early_takeover(device_t self)
3347 {
3348 	struct resource *res;
3349 	int rid;
3350 
3351 	/*
3352 	 * Set the PIRQD enable bit and switch off all the others. We don't
3353 	 * want legacy support to interfere with us XXX Does this also mean
3354 	 * that the BIOS won't touch the keyboard anymore if it is connected
3355 	 * to the ports of the root hub?
3356 	 */
3357 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3358 
3359 	/* Disable interrupts */
3360 	rid = PCI_UHCI_BASE_REG;
3361 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3362 	if (res != NULL) {
3363 		bus_write_2(res, UHCI_INTR, 0);
3364 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3365 	}
3366 }
3367 
3368 /* Perform early EHCI takeover from SMM. */
3369 static void
3370 ehci_early_takeover(device_t self)
3371 {
3372 	struct resource *res;
3373 	uint32_t cparams;
3374 	uint32_t eec;
3375 	uint8_t eecp;
3376 	uint8_t bios_sem;
3377 	uint8_t offs;
3378 	int rid;
3379 	int i;
3380 
3381 	rid = PCIR_BAR(0);
3382 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3383 	if (res == NULL)
3384 		return;
3385 
3386 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3387 
3388 	/* Synchronise with the BIOS if it owns the controller. */
3389 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3390 	    eecp = EHCI_EECP_NEXT(eec)) {
3391 		eec = pci_read_config(self, eecp, 4);
3392 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3393 			continue;
3394 		}
3395 		bios_sem = pci_read_config(self, eecp +
3396 		    EHCI_LEGSUP_BIOS_SEM, 1);
3397 		if (bios_sem == 0) {
3398 			continue;
3399 		}
3400 		if (bootverbose)
3401 			printf("ehci early: "
3402 			    "SMM active, request owner change\n");
3403 
3404 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3405 
3406 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3407 			DELAY(1000);
3408 			bios_sem = pci_read_config(self, eecp +
3409 			    EHCI_LEGSUP_BIOS_SEM, 1);
3410 		}
3411 
3412 		if (bios_sem != 0) {
3413 			if (bootverbose)
3414 				printf("ehci early: "
3415 				    "SMM does not respond\n");
3416 		}
3417 		/* Disable interrupts */
3418 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3419 		bus_write_4(res, offs + EHCI_USBINTR, 0);
3420 	}
3421 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3422 }
3423 
3424 /* Perform early XHCI takeover from SMM. */
3425 static void
3426 xhci_early_takeover(device_t self)
3427 {
3428 	struct resource *res;
3429 	uint32_t cparams;
3430 	uint32_t eec;
3431 	uint8_t eecp;
3432 	uint8_t bios_sem;
3433 	uint8_t offs;
3434 	int rid;
3435 	int i;
3436 
3437 	rid = PCIR_BAR(0);
3438 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3439 	if (res == NULL)
3440 		return;
3441 
3442 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3443 
3444 	eec = -1;
3445 
3446 	/* Synchronise with the BIOS if it owns the controller. */
3447 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3448 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3449 		eec = bus_read_4(res, eecp);
3450 
3451 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3452 			continue;
3453 
3454 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3455 		if (bios_sem == 0)
3456 			continue;
3457 
3458 		if (bootverbose)
3459 			printf("xhci early: "
3460 			    "SMM active, request owner change\n");
3461 
3462 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3463 
3464 		/* wait a maximum of 5 second */
3465 
3466 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3467 			DELAY(1000);
3468 			bios_sem = bus_read_1(res, eecp +
3469 			    XHCI_XECP_BIOS_SEM);
3470 		}
3471 
3472 		if (bios_sem != 0) {
3473 			if (bootverbose)
3474 				printf("xhci early: "
3475 				    "SMM does not respond\n");
3476 		}
3477 
3478 		/* Disable interrupts */
3479 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3480 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3481 		bus_read_4(res, offs + XHCI_USBSTS);
3482 	}
3483 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3484 }
3485 
3486 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3487 static void
3488 pci_reserve_secbus(device_t bus, device_t dev, pcicfgregs *cfg,
3489     struct resource_list *rl)
3490 {
3491 	struct resource *res;
3492 	char *cp;
3493 	rman_res_t start, end, count;
3494 	int rid, sec_bus, sec_reg, sub_bus, sub_reg, sup_bus;
3495 
3496 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3497 	case PCIM_HDRTYPE_BRIDGE:
3498 		sec_reg = PCIR_SECBUS_1;
3499 		sub_reg = PCIR_SUBBUS_1;
3500 		break;
3501 	case PCIM_HDRTYPE_CARDBUS:
3502 		sec_reg = PCIR_SECBUS_2;
3503 		sub_reg = PCIR_SUBBUS_2;
3504 		break;
3505 	default:
3506 		return;
3507 	}
3508 
3509 	/*
3510 	 * If the existing bus range is valid, attempt to reserve it
3511 	 * from our parent.  If this fails for any reason, clear the
3512 	 * secbus and subbus registers.
3513 	 *
3514 	 * XXX: Should we reset sub_bus to sec_bus if it is < sec_bus?
3515 	 * This would at least preserve the existing sec_bus if it is
3516 	 * valid.
3517 	 */
3518 	sec_bus = PCI_READ_CONFIG(bus, dev, sec_reg, 1);
3519 	sub_bus = PCI_READ_CONFIG(bus, dev, sub_reg, 1);
3520 
3521 	/* Quirk handling. */
3522 	switch (pci_get_devid(dev)) {
3523 	case 0x12258086:		/* Intel 82454KX/GX (Orion) */
3524 		sup_bus = pci_read_config(dev, 0x41, 1);
3525 		if (sup_bus != 0xff) {
3526 			sec_bus = sup_bus + 1;
3527 			sub_bus = sup_bus + 1;
3528 			PCI_WRITE_CONFIG(bus, dev, sec_reg, sec_bus, 1);
3529 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3530 		}
3531 		break;
3532 
3533 	case 0x00dd10de:
3534 		/* Compaq R3000 BIOS sets wrong subordinate bus number. */
3535 		if ((cp = kern_getenv("smbios.planar.maker")) == NULL)
3536 			break;
3537 		if (strncmp(cp, "Compal", 6) != 0) {
3538 			freeenv(cp);
3539 			break;
3540 		}
3541 		freeenv(cp);
3542 		if ((cp = kern_getenv("smbios.planar.product")) == NULL)
3543 			break;
3544 		if (strncmp(cp, "08A0", 4) != 0) {
3545 			freeenv(cp);
3546 			break;
3547 		}
3548 		freeenv(cp);
3549 		if (sub_bus < 0xa) {
3550 			sub_bus = 0xa;
3551 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3552 		}
3553 		break;
3554 	}
3555 
3556 	if (bootverbose)
3557 		printf("\tsecbus=%d, subbus=%d\n", sec_bus, sub_bus);
3558 	if (sec_bus > 0 && sub_bus >= sec_bus) {
3559 		start = sec_bus;
3560 		end = sub_bus;
3561 		count = end - start + 1;
3562 
3563 		resource_list_add(rl, PCI_RES_BUS, 0, 0, ~0, count);
3564 
3565 		/*
3566 		 * If requested, clear secondary bus registers in
3567 		 * bridge devices to force a complete renumbering
3568 		 * rather than reserving the existing range.  However,
3569 		 * preserve the existing size.
3570 		 */
3571 		if (pci_clear_buses)
3572 			goto clear;
3573 
3574 		rid = 0;
3575 		res = resource_list_reserve(rl, bus, dev, PCI_RES_BUS, &rid,
3576 		    start, end, count, 0);
3577 		if (res != NULL)
3578 			return;
3579 
3580 		if (bootverbose)
3581 			device_printf(bus,
3582 			    "pci%d:%d:%d:%d secbus failed to allocate\n",
3583 			    pci_get_domain(dev), pci_get_bus(dev),
3584 			    pci_get_slot(dev), pci_get_function(dev));
3585 	}
3586 
3587 clear:
3588 	PCI_WRITE_CONFIG(bus, dev, sec_reg, 0, 1);
3589 	PCI_WRITE_CONFIG(bus, dev, sub_reg, 0, 1);
3590 }
3591 
3592 static struct resource *
3593 pci_alloc_secbus(device_t dev, device_t child, int *rid, rman_res_t start,
3594     rman_res_t end, rman_res_t count, u_int flags)
3595 {
3596 	struct pci_devinfo *dinfo;
3597 	pcicfgregs *cfg;
3598 	struct resource_list *rl;
3599 	struct resource *res;
3600 	int sec_reg, sub_reg;
3601 
3602 	dinfo = device_get_ivars(child);
3603 	cfg = &dinfo->cfg;
3604 	rl = &dinfo->resources;
3605 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3606 	case PCIM_HDRTYPE_BRIDGE:
3607 		sec_reg = PCIR_SECBUS_1;
3608 		sub_reg = PCIR_SUBBUS_1;
3609 		break;
3610 	case PCIM_HDRTYPE_CARDBUS:
3611 		sec_reg = PCIR_SECBUS_2;
3612 		sub_reg = PCIR_SUBBUS_2;
3613 		break;
3614 	default:
3615 		return (NULL);
3616 	}
3617 
3618 	if (*rid != 0)
3619 		return (NULL);
3620 
3621 	if (resource_list_find(rl, PCI_RES_BUS, *rid) == NULL)
3622 		resource_list_add(rl, PCI_RES_BUS, *rid, start, end, count);
3623 	if (!resource_list_reserved(rl, PCI_RES_BUS, *rid)) {
3624 		res = resource_list_reserve(rl, dev, child, PCI_RES_BUS, rid,
3625 		    start, end, count, flags & ~RF_ACTIVE);
3626 		if (res == NULL) {
3627 			resource_list_delete(rl, PCI_RES_BUS, *rid);
3628 			device_printf(child, "allocating %ju bus%s failed\n",
3629 			    count, count == 1 ? "" : "es");
3630 			return (NULL);
3631 		}
3632 		if (bootverbose)
3633 			device_printf(child,
3634 			    "Lazy allocation of %ju bus%s at %ju\n", count,
3635 			    count == 1 ? "" : "es", rman_get_start(res));
3636 		PCI_WRITE_CONFIG(dev, child, sec_reg, rman_get_start(res), 1);
3637 		PCI_WRITE_CONFIG(dev, child, sub_reg, rman_get_end(res), 1);
3638 	}
3639 	return (resource_list_alloc(rl, dev, child, PCI_RES_BUS, rid, start,
3640 	    end, count, flags));
3641 }
3642 #endif
3643 
3644 static int
3645 pci_ea_bei_to_rid(device_t dev, int bei)
3646 {
3647 #ifdef PCI_IOV
3648 	struct pci_devinfo *dinfo;
3649 	int iov_pos;
3650 	struct pcicfg_iov *iov;
3651 
3652 	dinfo = device_get_ivars(dev);
3653 	iov = dinfo->cfg.iov;
3654 	if (iov != NULL)
3655 		iov_pos = iov->iov_pos;
3656 	else
3657 		iov_pos = 0;
3658 #endif
3659 
3660 	/* Check if matches BAR */
3661 	if ((bei >= PCIM_EA_BEI_BAR_0) &&
3662 	    (bei <= PCIM_EA_BEI_BAR_5))
3663 		return (PCIR_BAR(bei));
3664 
3665 	/* Check ROM */
3666 	if (bei == PCIM_EA_BEI_ROM)
3667 		return (PCIR_BIOS);
3668 
3669 #ifdef PCI_IOV
3670 	/* Check if matches VF_BAR */
3671 	if ((iov != NULL) && (bei >= PCIM_EA_BEI_VF_BAR_0) &&
3672 	    (bei <= PCIM_EA_BEI_VF_BAR_5))
3673 		return (PCIR_SRIOV_BAR(bei - PCIM_EA_BEI_VF_BAR_0) +
3674 		    iov_pos);
3675 #endif
3676 
3677 	return (-1);
3678 }
3679 
3680 int
3681 pci_ea_is_enabled(device_t dev, int rid)
3682 {
3683 	struct pci_ea_entry *ea;
3684 	struct pci_devinfo *dinfo;
3685 
3686 	dinfo = device_get_ivars(dev);
3687 
3688 	STAILQ_FOREACH(ea, &dinfo->cfg.ea.ea_entries, eae_link) {
3689 		if (pci_ea_bei_to_rid(dev, ea->eae_bei) == rid)
3690 			return ((ea->eae_flags & PCIM_EA_ENABLE) > 0);
3691 	}
3692 
3693 	return (0);
3694 }
3695 
3696 void
3697 pci_add_resources_ea(device_t bus, device_t dev, int alloc_iov)
3698 {
3699 	struct pci_ea_entry *ea;
3700 	struct pci_devinfo *dinfo;
3701 	pci_addr_t start, end, count;
3702 	struct resource_list *rl;
3703 	int type, flags, rid;
3704 	struct resource *res;
3705 	uint32_t tmp;
3706 #ifdef PCI_IOV
3707 	struct pcicfg_iov *iov;
3708 #endif
3709 
3710 	dinfo = device_get_ivars(dev);
3711 	rl = &dinfo->resources;
3712 	flags = 0;
3713 
3714 #ifdef PCI_IOV
3715 	iov = dinfo->cfg.iov;
3716 #endif
3717 
3718 	if (dinfo->cfg.ea.ea_location == 0)
3719 		return;
3720 
3721 	STAILQ_FOREACH(ea, &dinfo->cfg.ea.ea_entries, eae_link) {
3722 
3723 		/*
3724 		 * TODO: Ignore EA-BAR if is not enabled.
3725 		 *   Currently the EA implementation supports
3726 		 *   only situation, where EA structure contains
3727 		 *   predefined entries. In case they are not enabled
3728 		 *   leave them unallocated and proceed with
3729 		 *   a legacy-BAR mechanism.
3730 		 */
3731 		if ((ea->eae_flags & PCIM_EA_ENABLE) == 0)
3732 			continue;
3733 
3734 		switch ((ea->eae_flags & PCIM_EA_PP) >> PCIM_EA_PP_OFFSET) {
3735 		case PCIM_EA_P_MEM_PREFETCH:
3736 		case PCIM_EA_P_VF_MEM_PREFETCH:
3737 			flags = RF_PREFETCHABLE;
3738 			/* FALLTHROUGH */
3739 		case PCIM_EA_P_VF_MEM:
3740 		case PCIM_EA_P_MEM:
3741 			type = SYS_RES_MEMORY;
3742 			break;
3743 		case PCIM_EA_P_IO:
3744 			type = SYS_RES_IOPORT;
3745 			break;
3746 		default:
3747 			continue;
3748 		}
3749 
3750 		if (alloc_iov != 0) {
3751 #ifdef PCI_IOV
3752 			/* Allocating IOV, confirm BEI matches */
3753 			if ((ea->eae_bei < PCIM_EA_BEI_VF_BAR_0) ||
3754 			    (ea->eae_bei > PCIM_EA_BEI_VF_BAR_5))
3755 				continue;
3756 #else
3757 			continue;
3758 #endif
3759 		} else {
3760 			/* Allocating BAR, confirm BEI matches */
3761 			if (((ea->eae_bei < PCIM_EA_BEI_BAR_0) ||
3762 			    (ea->eae_bei > PCIM_EA_BEI_BAR_5)) &&
3763 			    (ea->eae_bei != PCIM_EA_BEI_ROM))
3764 				continue;
3765 		}
3766 
3767 		rid = pci_ea_bei_to_rid(dev, ea->eae_bei);
3768 		if (rid < 0)
3769 			continue;
3770 
3771 		/* Skip resources already allocated by EA */
3772 		if ((resource_list_find(rl, SYS_RES_MEMORY, rid) != NULL) ||
3773 		    (resource_list_find(rl, SYS_RES_IOPORT, rid) != NULL))
3774 			continue;
3775 
3776 		start = ea->eae_base;
3777 		count = ea->eae_max_offset + 1;
3778 #ifdef PCI_IOV
3779 		if (iov != NULL)
3780 			count = count * iov->iov_num_vfs;
3781 #endif
3782 		end = start + count - 1;
3783 		if (count == 0)
3784 			continue;
3785 
3786 		resource_list_add(rl, type, rid, start, end, count);
3787 		res = resource_list_reserve(rl, bus, dev, type, &rid, start, end, count,
3788 		    flags);
3789 		if (res == NULL) {
3790 			resource_list_delete(rl, type, rid);
3791 
3792 			/*
3793 			 * Failed to allocate using EA, disable entry.
3794 			 * Another attempt to allocation will be performed
3795 			 * further, but this time using legacy BAR registers
3796 			 */
3797 			tmp = pci_read_config(dev, ea->eae_cfg_offset, 4);
3798 			tmp &= ~PCIM_EA_ENABLE;
3799 			pci_write_config(dev, ea->eae_cfg_offset, tmp, 4);
3800 
3801 			/*
3802 			 * Disabling entry might fail in case it is hardwired.
3803 			 * Read flags again to match current status.
3804 			 */
3805 			ea->eae_flags = pci_read_config(dev, ea->eae_cfg_offset, 4);
3806 
3807 			continue;
3808 		}
3809 
3810 		/* As per specification, fill BAR with zeros */
3811 		pci_write_config(dev, rid, 0, 4);
3812 	}
3813 }
3814 
3815 void
3816 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3817 {
3818 	struct pci_devinfo *dinfo;
3819 	pcicfgregs *cfg;
3820 	struct resource_list *rl;
3821 	const struct pci_quirk *q;
3822 	uint32_t devid;
3823 	int i;
3824 
3825 	dinfo = device_get_ivars(dev);
3826 	cfg = &dinfo->cfg;
3827 	rl = &dinfo->resources;
3828 	devid = (cfg->device << 16) | cfg->vendor;
3829 
3830 	/* Allocate resources using Enhanced Allocation */
3831 	pci_add_resources_ea(bus, dev, 0);
3832 
3833 	/* ATA devices needs special map treatment */
3834 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3835 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3836 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3837 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3838 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3839 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3840 	else
3841 		for (i = 0; i < cfg->nummaps;) {
3842 			/* Skip resources already managed by EA */
3843 			if ((resource_list_find(rl, SYS_RES_MEMORY, PCIR_BAR(i)) != NULL) ||
3844 			    (resource_list_find(rl, SYS_RES_IOPORT, PCIR_BAR(i)) != NULL) ||
3845 			    pci_ea_is_enabled(dev, PCIR_BAR(i))) {
3846 				i++;
3847 				continue;
3848 			}
3849 
3850 			/*
3851 			 * Skip quirked resources.
3852 			 */
3853 			for (q = &pci_quirks[0]; q->devid != 0; q++)
3854 				if (q->devid == devid &&
3855 				    q->type == PCI_QUIRK_UNMAP_REG &&
3856 				    q->arg1 == PCIR_BAR(i))
3857 					break;
3858 			if (q->devid != 0) {
3859 				i++;
3860 				continue;
3861 			}
3862 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3863 			    prefetchmask & (1 << i));
3864 		}
3865 
3866 	/*
3867 	 * Add additional, quirked resources.
3868 	 */
3869 	for (q = &pci_quirks[0]; q->devid != 0; q++)
3870 		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3871 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3872 
3873 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3874 #ifdef __PCI_REROUTE_INTERRUPT
3875 		/*
3876 		 * Try to re-route interrupts. Sometimes the BIOS or
3877 		 * firmware may leave bogus values in these registers.
3878 		 * If the re-route fails, then just stick with what we
3879 		 * have.
3880 		 */
3881 		pci_assign_interrupt(bus, dev, 1);
3882 #else
3883 		pci_assign_interrupt(bus, dev, 0);
3884 #endif
3885 	}
3886 
3887 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3888 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3889 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3890 			xhci_early_takeover(dev);
3891 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3892 			ehci_early_takeover(dev);
3893 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3894 			ohci_early_takeover(dev);
3895 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3896 			uhci_early_takeover(dev);
3897 	}
3898 
3899 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3900 	/*
3901 	 * Reserve resources for secondary bus ranges behind bridge
3902 	 * devices.
3903 	 */
3904 	pci_reserve_secbus(bus, dev, cfg, rl);
3905 #endif
3906 }
3907 
3908 static struct pci_devinfo *
3909 pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
3910     int slot, int func)
3911 {
3912 	struct pci_devinfo *dinfo;
3913 
3914 	dinfo = pci_read_device(pcib, dev, domain, busno, slot, func);
3915 	if (dinfo != NULL)
3916 		pci_add_child(dev, dinfo);
3917 
3918 	return (dinfo);
3919 }
3920 
3921 void
3922 pci_add_children(device_t dev, int domain, int busno)
3923 {
3924 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3925 	device_t pcib = device_get_parent(dev);
3926 	struct pci_devinfo *dinfo;
3927 	int maxslots;
3928 	int s, f, pcifunchigh;
3929 	uint8_t hdrtype;
3930 	int first_func;
3931 
3932 	/*
3933 	 * Try to detect a device at slot 0, function 0.  If it exists, try to
3934 	 * enable ARI.  We must enable ARI before detecting the rest of the
3935 	 * functions on this bus as ARI changes the set of slots and functions
3936 	 * that are legal on this bus.
3937 	 */
3938 	dinfo = pci_identify_function(pcib, dev, domain, busno, 0, 0);
3939 	if (dinfo != NULL && pci_enable_ari)
3940 		PCIB_TRY_ENABLE_ARI(pcib, dinfo->cfg.dev);
3941 
3942 	/*
3943 	 * Start looking for new devices on slot 0 at function 1 because we
3944 	 * just identified the device at slot 0, function 0.
3945 	 */
3946 	first_func = 1;
3947 
3948 	maxslots = PCIB_MAXSLOTS(pcib);
3949 	for (s = 0; s <= maxslots; s++, first_func = 0) {
3950 		pcifunchigh = 0;
3951 		f = 0;
3952 		DELAY(1);
3953 		hdrtype = REG(PCIR_HDRTYPE, 1);
3954 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3955 			continue;
3956 		if (hdrtype & PCIM_MFDEV)
3957 			pcifunchigh = PCIB_MAXFUNCS(pcib);
3958 		for (f = first_func; f <= pcifunchigh; f++)
3959 			pci_identify_function(pcib, dev, domain, busno, s, f);
3960 	}
3961 #undef REG
3962 }
3963 
3964 int
3965 pci_rescan_method(device_t dev)
3966 {
3967 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3968 	device_t pcib = device_get_parent(dev);
3969 	struct pci_softc *sc;
3970 	device_t child, *devlist, *unchanged;
3971 	int devcount, error, i, j, maxslots, oldcount;
3972 	int busno, domain, s, f, pcifunchigh;
3973 	uint8_t hdrtype;
3974 
3975 	/* No need to check for ARI on a rescan. */
3976 	error = device_get_children(dev, &devlist, &devcount);
3977 	if (error)
3978 		return (error);
3979 	if (devcount != 0) {
3980 		unchanged = malloc(devcount * sizeof(device_t), M_TEMP,
3981 		    M_NOWAIT | M_ZERO);
3982 		if (unchanged == NULL) {
3983 			free(devlist, M_TEMP);
3984 			return (ENOMEM);
3985 		}
3986 	} else
3987 		unchanged = NULL;
3988 
3989 	sc = device_get_softc(dev);
3990 	domain = pcib_get_domain(dev);
3991 	busno = pcib_get_bus(dev);
3992 	maxslots = PCIB_MAXSLOTS(pcib);
3993 	for (s = 0; s <= maxslots; s++) {
3994 		/* If function 0 is not present, skip to the next slot. */
3995 		f = 0;
3996 		if (REG(PCIR_VENDOR, 2) == 0xffff)
3997 			continue;
3998 		pcifunchigh = 0;
3999 		hdrtype = REG(PCIR_HDRTYPE, 1);
4000 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
4001 			continue;
4002 		if (hdrtype & PCIM_MFDEV)
4003 			pcifunchigh = PCIB_MAXFUNCS(pcib);
4004 		for (f = 0; f <= pcifunchigh; f++) {
4005 			if (REG(PCIR_VENDOR, 2) == 0xffff)
4006 				continue;
4007 
4008 			/*
4009 			 * Found a valid function.  Check if a
4010 			 * device_t for this device already exists.
4011 			 */
4012 			for (i = 0; i < devcount; i++) {
4013 				child = devlist[i];
4014 				if (child == NULL)
4015 					continue;
4016 				if (pci_get_slot(child) == s &&
4017 				    pci_get_function(child) == f) {
4018 					unchanged[i] = child;
4019 					goto next_func;
4020 				}
4021 			}
4022 
4023 			pci_identify_function(pcib, dev, domain, busno, s, f);
4024 		next_func:;
4025 		}
4026 	}
4027 
4028 	/* Remove devices that are no longer present. */
4029 	for (i = 0; i < devcount; i++) {
4030 		if (unchanged[i] != NULL)
4031 			continue;
4032 		device_delete_child(dev, devlist[i]);
4033 	}
4034 
4035 	free(devlist, M_TEMP);
4036 	oldcount = devcount;
4037 
4038 	/* Try to attach the devices just added. */
4039 	error = device_get_children(dev, &devlist, &devcount);
4040 	if (error) {
4041 		free(unchanged, M_TEMP);
4042 		return (error);
4043 	}
4044 
4045 	for (i = 0; i < devcount; i++) {
4046 		for (j = 0; j < oldcount; j++) {
4047 			if (devlist[i] == unchanged[j])
4048 				goto next_device;
4049 		}
4050 
4051 		device_probe_and_attach(devlist[i]);
4052 	next_device:;
4053 	}
4054 
4055 	free(unchanged, M_TEMP);
4056 	free(devlist, M_TEMP);
4057 	return (0);
4058 #undef REG
4059 }
4060 
4061 #ifdef PCI_IOV
4062 device_t
4063 pci_add_iov_child(device_t bus, device_t pf, uint16_t rid, uint16_t vid,
4064     uint16_t did)
4065 {
4066 	struct pci_devinfo *pf_dinfo, *vf_dinfo;
4067 	device_t pcib;
4068 	int busno, slot, func;
4069 
4070 	pf_dinfo = device_get_ivars(pf);
4071 
4072 	pcib = device_get_parent(bus);
4073 
4074 	PCIB_DECODE_RID(pcib, rid, &busno, &slot, &func);
4075 
4076 	vf_dinfo = pci_fill_devinfo(pcib, bus, pci_get_domain(pcib), busno,
4077 	    slot, func, vid, did);
4078 
4079 	vf_dinfo->cfg.flags |= PCICFG_VF;
4080 	pci_add_child(bus, vf_dinfo);
4081 
4082 	return (vf_dinfo->cfg.dev);
4083 }
4084 
4085 device_t
4086 pci_create_iov_child_method(device_t bus, device_t pf, uint16_t rid,
4087     uint16_t vid, uint16_t did)
4088 {
4089 
4090 	return (pci_add_iov_child(bus, pf, rid, vid, did));
4091 }
4092 #endif
4093 
4094 void
4095 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
4096 {
4097 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
4098 	device_set_ivars(dinfo->cfg.dev, dinfo);
4099 	resource_list_init(&dinfo->resources);
4100 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
4101 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
4102 	pci_print_verbose(dinfo);
4103 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
4104 	pci_child_added(dinfo->cfg.dev);
4105 	EVENTHANDLER_INVOKE(pci_add_device, dinfo->cfg.dev);
4106 }
4107 
4108 void
4109 pci_child_added_method(device_t dev, device_t child)
4110 {
4111 
4112 }
4113 
4114 static int
4115 pci_probe(device_t dev)
4116 {
4117 
4118 	device_set_desc(dev, "PCI bus");
4119 
4120 	/* Allow other subclasses to override this driver. */
4121 	return (BUS_PROBE_GENERIC);
4122 }
4123 
4124 int
4125 pci_attach_common(device_t dev)
4126 {
4127 	struct pci_softc *sc;
4128 	int busno, domain;
4129 #ifdef PCI_DMA_BOUNDARY
4130 	int error, tag_valid;
4131 #endif
4132 #ifdef PCI_RES_BUS
4133 	int rid;
4134 #endif
4135 
4136 	sc = device_get_softc(dev);
4137 	domain = pcib_get_domain(dev);
4138 	busno = pcib_get_bus(dev);
4139 #ifdef PCI_RES_BUS
4140 	rid = 0;
4141 	sc->sc_bus = bus_alloc_resource(dev, PCI_RES_BUS, &rid, busno, busno,
4142 	    1, 0);
4143 	if (sc->sc_bus == NULL) {
4144 		device_printf(dev, "failed to allocate bus number\n");
4145 		return (ENXIO);
4146 	}
4147 #endif
4148 	if (bootverbose)
4149 		device_printf(dev, "domain=%d, physical bus=%d\n",
4150 		    domain, busno);
4151 #ifdef PCI_DMA_BOUNDARY
4152 	tag_valid = 0;
4153 	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
4154 	    devclass_find("pci")) {
4155 		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
4156 		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
4157 		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
4158 		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
4159 		if (error)
4160 			device_printf(dev, "Failed to create DMA tag: %d\n",
4161 			    error);
4162 		else
4163 			tag_valid = 1;
4164 	}
4165 	if (!tag_valid)
4166 #endif
4167 		sc->sc_dma_tag = bus_get_dma_tag(dev);
4168 	return (0);
4169 }
4170 
4171 static int
4172 pci_attach(device_t dev)
4173 {
4174 	int busno, domain, error;
4175 
4176 	error = pci_attach_common(dev);
4177 	if (error)
4178 		return (error);
4179 
4180 	/*
4181 	 * Since there can be multiple independently numbered PCI
4182 	 * buses on systems with multiple PCI domains, we can't use
4183 	 * the unit number to decide which bus we are probing. We ask
4184 	 * the parent pcib what our domain and bus numbers are.
4185 	 */
4186 	domain = pcib_get_domain(dev);
4187 	busno = pcib_get_bus(dev);
4188 	pci_add_children(dev, domain, busno);
4189 	return (bus_generic_attach(dev));
4190 }
4191 
4192 static int
4193 pci_detach(device_t dev)
4194 {
4195 #ifdef PCI_RES_BUS
4196 	struct pci_softc *sc;
4197 #endif
4198 	int error;
4199 
4200 	error = bus_generic_detach(dev);
4201 	if (error)
4202 		return (error);
4203 #ifdef PCI_RES_BUS
4204 	sc = device_get_softc(dev);
4205 	error = bus_release_resource(dev, PCI_RES_BUS, 0, sc->sc_bus);
4206 	if (error)
4207 		return (error);
4208 #endif
4209 	return (device_delete_children(dev));
4210 }
4211 
4212 static void
4213 pci_set_power_child(device_t dev, device_t child, int state)
4214 {
4215 	device_t pcib;
4216 	int dstate;
4217 
4218 	/*
4219 	 * Set the device to the given state.  If the firmware suggests
4220 	 * a different power state, use it instead.  If power management
4221 	 * is not present, the firmware is responsible for managing
4222 	 * device power.  Skip children who aren't attached since they
4223 	 * are handled separately.
4224 	 */
4225 	pcib = device_get_parent(dev);
4226 	dstate = state;
4227 	if (device_is_attached(child) &&
4228 	    PCIB_POWER_FOR_SLEEP(pcib, child, &dstate) == 0)
4229 		pci_set_powerstate(child, dstate);
4230 }
4231 
4232 int
4233 pci_suspend_child(device_t dev, device_t child)
4234 {
4235 	struct pci_devinfo *dinfo;
4236 	int error;
4237 
4238 	dinfo = device_get_ivars(child);
4239 
4240 	/*
4241 	 * Save the PCI configuration space for the child and set the
4242 	 * device in the appropriate power state for this sleep state.
4243 	 */
4244 	pci_cfg_save(child, dinfo, 0);
4245 
4246 	/* Suspend devices before potentially powering them down. */
4247 	error = bus_generic_suspend_child(dev, child);
4248 
4249 	if (error)
4250 		return (error);
4251 
4252 	if (pci_do_power_suspend)
4253 		pci_set_power_child(dev, child, PCI_POWERSTATE_D3);
4254 
4255 	return (0);
4256 }
4257 
4258 int
4259 pci_resume_child(device_t dev, device_t child)
4260 {
4261 	struct pci_devinfo *dinfo;
4262 
4263 	if (pci_do_power_resume)
4264 		pci_set_power_child(dev, child, PCI_POWERSTATE_D0);
4265 
4266 	dinfo = device_get_ivars(child);
4267 	pci_cfg_restore(child, dinfo);
4268 	if (!device_is_attached(child))
4269 		pci_cfg_save(child, dinfo, 1);
4270 
4271 	bus_generic_resume_child(dev, child);
4272 
4273 	return (0);
4274 }
4275 
4276 int
4277 pci_resume(device_t dev)
4278 {
4279 	device_t child, *devlist;
4280 	int error, i, numdevs;
4281 
4282 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
4283 		return (error);
4284 
4285 	/*
4286 	 * Resume critical devices first, then everything else later.
4287 	 */
4288 	for (i = 0; i < numdevs; i++) {
4289 		child = devlist[i];
4290 		switch (pci_get_class(child)) {
4291 		case PCIC_DISPLAY:
4292 		case PCIC_MEMORY:
4293 		case PCIC_BRIDGE:
4294 		case PCIC_BASEPERIPH:
4295 			BUS_RESUME_CHILD(dev, child);
4296 			break;
4297 		}
4298 	}
4299 	for (i = 0; i < numdevs; i++) {
4300 		child = devlist[i];
4301 		switch (pci_get_class(child)) {
4302 		case PCIC_DISPLAY:
4303 		case PCIC_MEMORY:
4304 		case PCIC_BRIDGE:
4305 		case PCIC_BASEPERIPH:
4306 			break;
4307 		default:
4308 			BUS_RESUME_CHILD(dev, child);
4309 		}
4310 	}
4311 	free(devlist, M_TEMP);
4312 	return (0);
4313 }
4314 
4315 static void
4316 pci_load_vendor_data(void)
4317 {
4318 	caddr_t data;
4319 	void *ptr;
4320 	size_t sz;
4321 
4322 	data = preload_search_by_type("pci_vendor_data");
4323 	if (data != NULL) {
4324 		ptr = preload_fetch_addr(data);
4325 		sz = preload_fetch_size(data);
4326 		if (ptr != NULL && sz != 0) {
4327 			pci_vendordata = ptr;
4328 			pci_vendordata_size = sz;
4329 			/* terminate the database */
4330 			pci_vendordata[pci_vendordata_size] = '\n';
4331 		}
4332 	}
4333 }
4334 
4335 void
4336 pci_driver_added(device_t dev, driver_t *driver)
4337 {
4338 	int numdevs;
4339 	device_t *devlist;
4340 	device_t child;
4341 	struct pci_devinfo *dinfo;
4342 	int i;
4343 
4344 	if (bootverbose)
4345 		device_printf(dev, "driver added\n");
4346 	DEVICE_IDENTIFY(driver, dev);
4347 	if (device_get_children(dev, &devlist, &numdevs) != 0)
4348 		return;
4349 	for (i = 0; i < numdevs; i++) {
4350 		child = devlist[i];
4351 		if (device_get_state(child) != DS_NOTPRESENT)
4352 			continue;
4353 		dinfo = device_get_ivars(child);
4354 		pci_print_verbose(dinfo);
4355 		if (bootverbose)
4356 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
4357 		pci_cfg_restore(child, dinfo);
4358 		if (device_probe_and_attach(child) != 0)
4359 			pci_child_detached(dev, child);
4360 	}
4361 	free(devlist, M_TEMP);
4362 }
4363 
4364 int
4365 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
4366     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
4367 {
4368 	struct pci_devinfo *dinfo;
4369 	struct msix_table_entry *mte;
4370 	struct msix_vector *mv;
4371 	uint64_t addr;
4372 	uint32_t data;
4373 	void *cookie;
4374 	int error, rid;
4375 
4376 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
4377 	    arg, &cookie);
4378 	if (error)
4379 		return (error);
4380 
4381 	/* If this is not a direct child, just bail out. */
4382 	if (device_get_parent(child) != dev) {
4383 		*cookiep = cookie;
4384 		return(0);
4385 	}
4386 
4387 	rid = rman_get_rid(irq);
4388 	if (rid == 0) {
4389 		/* Make sure that INTx is enabled */
4390 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
4391 	} else {
4392 		/*
4393 		 * Check to see if the interrupt is MSI or MSI-X.
4394 		 * Ask our parent to map the MSI and give
4395 		 * us the address and data register values.
4396 		 * If we fail for some reason, teardown the
4397 		 * interrupt handler.
4398 		 */
4399 		dinfo = device_get_ivars(child);
4400 		if (dinfo->cfg.msi.msi_alloc > 0) {
4401 			if (dinfo->cfg.msi.msi_addr == 0) {
4402 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
4403 			    ("MSI has handlers, but vectors not mapped"));
4404 				error = PCIB_MAP_MSI(device_get_parent(dev),
4405 				    child, rman_get_start(irq), &addr, &data);
4406 				if (error)
4407 					goto bad;
4408 				dinfo->cfg.msi.msi_addr = addr;
4409 				dinfo->cfg.msi.msi_data = data;
4410 			}
4411 			if (dinfo->cfg.msi.msi_handlers == 0)
4412 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
4413 				    dinfo->cfg.msi.msi_data);
4414 			dinfo->cfg.msi.msi_handlers++;
4415 		} else {
4416 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4417 			    ("No MSI or MSI-X interrupts allocated"));
4418 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4419 			    ("MSI-X index too high"));
4420 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4421 			KASSERT(mte->mte_vector != 0, ("no message vector"));
4422 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
4423 			KASSERT(mv->mv_irq == rman_get_start(irq),
4424 			    ("IRQ mismatch"));
4425 			if (mv->mv_address == 0) {
4426 				KASSERT(mte->mte_handlers == 0,
4427 		    ("MSI-X table entry has handlers, but vector not mapped"));
4428 				error = PCIB_MAP_MSI(device_get_parent(dev),
4429 				    child, rman_get_start(irq), &addr, &data);
4430 				if (error)
4431 					goto bad;
4432 				mv->mv_address = addr;
4433 				mv->mv_data = data;
4434 			}
4435 
4436 			/*
4437 			 * The MSIX table entry must be made valid by
4438 			 * incrementing the mte_handlers before
4439 			 * calling pci_enable_msix() and
4440 			 * pci_resume_msix(). Else the MSIX rewrite
4441 			 * table quirk will not work as expected.
4442 			 */
4443 			mte->mte_handlers++;
4444 			if (mte->mte_handlers == 1) {
4445 				pci_enable_msix(child, rid - 1, mv->mv_address,
4446 				    mv->mv_data);
4447 				pci_unmask_msix(child, rid - 1);
4448 			}
4449 		}
4450 
4451 		/*
4452 		 * Make sure that INTx is disabled if we are using MSI/MSI-X,
4453 		 * unless the device is affected by PCI_QUIRK_MSI_INTX_BUG,
4454 		 * in which case we "enable" INTx so MSI/MSI-X actually works.
4455 		 */
4456 		if (!pci_has_quirk(pci_get_devid(child),
4457 		    PCI_QUIRK_MSI_INTX_BUG))
4458 			pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4459 		else
4460 			pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
4461 	bad:
4462 		if (error) {
4463 			(void)bus_generic_teardown_intr(dev, child, irq,
4464 			    cookie);
4465 			return (error);
4466 		}
4467 	}
4468 	*cookiep = cookie;
4469 	return (0);
4470 }
4471 
4472 int
4473 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
4474     void *cookie)
4475 {
4476 	struct msix_table_entry *mte;
4477 	struct resource_list_entry *rle;
4478 	struct pci_devinfo *dinfo;
4479 	int error, rid;
4480 
4481 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
4482 		return (EINVAL);
4483 
4484 	/* If this isn't a direct child, just bail out */
4485 	if (device_get_parent(child) != dev)
4486 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
4487 
4488 	rid = rman_get_rid(irq);
4489 	if (rid == 0) {
4490 		/* Mask INTx */
4491 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4492 	} else {
4493 		/*
4494 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
4495 		 * decrement the appropriate handlers count and mask the
4496 		 * MSI-X message, or disable MSI messages if the count
4497 		 * drops to 0.
4498 		 */
4499 		dinfo = device_get_ivars(child);
4500 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
4501 		if (rle->res != irq)
4502 			return (EINVAL);
4503 		if (dinfo->cfg.msi.msi_alloc > 0) {
4504 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
4505 			    ("MSI-X index too high"));
4506 			if (dinfo->cfg.msi.msi_handlers == 0)
4507 				return (EINVAL);
4508 			dinfo->cfg.msi.msi_handlers--;
4509 			if (dinfo->cfg.msi.msi_handlers == 0)
4510 				pci_disable_msi(child);
4511 		} else {
4512 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4513 			    ("No MSI or MSI-X interrupts allocated"));
4514 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4515 			    ("MSI-X index too high"));
4516 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4517 			if (mte->mte_handlers == 0)
4518 				return (EINVAL);
4519 			mte->mte_handlers--;
4520 			if (mte->mte_handlers == 0)
4521 				pci_mask_msix(child, rid - 1);
4522 		}
4523 	}
4524 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
4525 	if (rid > 0)
4526 		KASSERT(error == 0,
4527 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
4528 	return (error);
4529 }
4530 
4531 int
4532 pci_print_child(device_t dev, device_t child)
4533 {
4534 	struct pci_devinfo *dinfo;
4535 	struct resource_list *rl;
4536 	int retval = 0;
4537 
4538 	dinfo = device_get_ivars(child);
4539 	rl = &dinfo->resources;
4540 
4541 	retval += bus_print_child_header(dev, child);
4542 
4543 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#jx");
4544 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#jx");
4545 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%jd");
4546 	if (device_get_flags(dev))
4547 		retval += printf(" flags %#x", device_get_flags(dev));
4548 
4549 	retval += printf(" at device %d.%d", pci_get_slot(child),
4550 	    pci_get_function(child));
4551 
4552 	retval += bus_print_child_domain(dev, child);
4553 	retval += bus_print_child_footer(dev, child);
4554 
4555 	return (retval);
4556 }
4557 
4558 static const struct
4559 {
4560 	int		class;
4561 	int		subclass;
4562 	int		report; /* 0 = bootverbose, 1 = always */
4563 	const char	*desc;
4564 } pci_nomatch_tab[] = {
4565 	{PCIC_OLD,		-1,			1, "old"},
4566 	{PCIC_OLD,		PCIS_OLD_NONVGA,	1, "non-VGA display device"},
4567 	{PCIC_OLD,		PCIS_OLD_VGA,		1, "VGA-compatible display device"},
4568 	{PCIC_STORAGE,		-1,			1, "mass storage"},
4569 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	1, "SCSI"},
4570 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	1, "ATA"},
4571 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	1, "floppy disk"},
4572 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	1, "IPI"},
4573 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	1, "RAID"},
4574 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	1, "ATA (ADMA)"},
4575 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	1, "SATA"},
4576 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	1, "SAS"},
4577 	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	1, "NVM"},
4578 	{PCIC_NETWORK,		-1,			1, "network"},
4579 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	1, "ethernet"},
4580 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	1, "token ring"},
4581 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	1, "fddi"},
4582 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	1, "ATM"},
4583 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	1, "ISDN"},
4584 	{PCIC_DISPLAY,		-1,			1, "display"},
4585 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	1, "VGA"},
4586 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	1, "XGA"},
4587 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	1, "3D"},
4588 	{PCIC_MULTIMEDIA,	-1,			1, "multimedia"},
4589 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	1, "video"},
4590 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	1, "audio"},
4591 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	1, "telephony"},
4592 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	1, "HDA"},
4593 	{PCIC_MEMORY,		-1,			1, "memory"},
4594 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	1, "RAM"},
4595 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	1, "flash"},
4596 	{PCIC_BRIDGE,		-1,			1, "bridge"},
4597 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	1, "HOST-PCI"},
4598 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	1, "PCI-ISA"},
4599 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	1, "PCI-EISA"},
4600 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	1, "PCI-MCA"},
4601 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	1, "PCI-PCI"},
4602 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	1, "PCI-PCMCIA"},
4603 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	1, "PCI-NuBus"},
4604 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	1, "PCI-CardBus"},
4605 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	1, "PCI-RACEway"},
4606 	{PCIC_SIMPLECOMM,	-1,			1, "simple comms"},
4607 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	1, "UART"},	/* could detect 16550 */
4608 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	1, "parallel port"},
4609 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	1, "multiport serial"},
4610 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	1, "generic modem"},
4611 	{PCIC_BASEPERIPH,	-1,			0, "base peripheral"},
4612 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	1, "interrupt controller"},
4613 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	1, "DMA controller"},
4614 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	1, "timer"},
4615 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	1, "realtime clock"},
4616 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	1, "PCI hot-plug controller"},
4617 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	1, "SD host controller"},
4618 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_IOMMU,	1, "IOMMU"},
4619 	{PCIC_INPUTDEV,		-1,			1, "input device"},
4620 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	1, "keyboard"},
4621 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,1, "digitizer"},
4622 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	1, "mouse"},
4623 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	1, "scanner"},
4624 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	1, "gameport"},
4625 	{PCIC_DOCKING,		-1,			1, "docking station"},
4626 	{PCIC_PROCESSOR,	-1,			1, "processor"},
4627 	{PCIC_SERIALBUS,	-1,			1, "serial bus"},
4628 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	1, "FireWire"},
4629 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	1, "AccessBus"},
4630 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	1, "SSA"},
4631 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	1, "USB"},
4632 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	1, "Fibre Channel"},
4633 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	0, "SMBus"},
4634 	{PCIC_WIRELESS,		-1,			1, "wireless controller"},
4635 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	1, "iRDA"},
4636 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	1, "IR"},
4637 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	1, "RF"},
4638 	{PCIC_INTELLIIO,	-1,			1, "intelligent I/O controller"},
4639 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	1, "I2O"},
4640 	{PCIC_SATCOM,		-1,			1, "satellite communication"},
4641 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		1, "sat TV"},
4642 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	1, "sat audio"},
4643 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	1, "sat voice"},
4644 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	1, "sat data"},
4645 	{PCIC_CRYPTO,		-1,			1, "encrypt/decrypt"},
4646 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	1, "network/computer crypto"},
4647 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	1, "entertainment crypto"},
4648 	{PCIC_DASP,		-1,			0, "dasp"},
4649 	{PCIC_DASP,		PCIS_DASP_DPIO,		1, "DPIO module"},
4650 	{PCIC_DASP,		PCIS_DASP_PERFCNTRS,	1, "performance counters"},
4651 	{PCIC_DASP,		PCIS_DASP_COMM_SYNC,	1, "communication synchronizer"},
4652 	{PCIC_DASP,		PCIS_DASP_MGMT_CARD,	1, "signal processing management"},
4653 	{0, 0, 0,		NULL}
4654 };
4655 
4656 void
4657 pci_probe_nomatch(device_t dev, device_t child)
4658 {
4659 	int i, report;
4660 	const char *cp, *scp;
4661 	char *device;
4662 
4663 	/*
4664 	 * Look for a listing for this device in a loaded device database.
4665 	 */
4666 	report = 1;
4667 	if ((device = pci_describe_device(child)) != NULL) {
4668 		device_printf(dev, "<%s>", device);
4669 		free(device, M_DEVBUF);
4670 	} else {
4671 		/*
4672 		 * Scan the class/subclass descriptions for a general
4673 		 * description.
4674 		 */
4675 		cp = "unknown";
4676 		scp = NULL;
4677 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
4678 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
4679 				if (pci_nomatch_tab[i].subclass == -1) {
4680 					cp = pci_nomatch_tab[i].desc;
4681 					report = pci_nomatch_tab[i].report;
4682 				} else if (pci_nomatch_tab[i].subclass ==
4683 				    pci_get_subclass(child)) {
4684 					scp = pci_nomatch_tab[i].desc;
4685 					report = pci_nomatch_tab[i].report;
4686 				}
4687 			}
4688 		}
4689 		if (report || bootverbose) {
4690 			device_printf(dev, "<%s%s%s>",
4691 			    cp ? cp : "",
4692 			    ((cp != NULL) && (scp != NULL)) ? ", " : "",
4693 			    scp ? scp : "");
4694 		}
4695 	}
4696 	if (report || bootverbose) {
4697 		printf(" at device %d.%d (no driver attached)\n",
4698 		    pci_get_slot(child), pci_get_function(child));
4699 	}
4700 	pci_cfg_save(child, device_get_ivars(child), 1);
4701 }
4702 
4703 void
4704 pci_child_detached(device_t dev, device_t child)
4705 {
4706 	struct pci_devinfo *dinfo;
4707 	struct resource_list *rl;
4708 
4709 	dinfo = device_get_ivars(child);
4710 	rl = &dinfo->resources;
4711 
4712 	/*
4713 	 * Have to deallocate IRQs before releasing any MSI messages and
4714 	 * have to release MSI messages before deallocating any memory
4715 	 * BARs.
4716 	 */
4717 	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
4718 		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
4719 	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
4720 		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
4721 		(void)pci_release_msi(child);
4722 	}
4723 	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
4724 		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
4725 	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
4726 		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
4727 #ifdef PCI_RES_BUS
4728 	if (resource_list_release_active(rl, dev, child, PCI_RES_BUS) != 0)
4729 		pci_printf(&dinfo->cfg, "Device leaked PCI bus numbers\n");
4730 #endif
4731 
4732 	pci_cfg_save(child, dinfo, 1);
4733 }
4734 
4735 /*
4736  * Parse the PCI device database, if loaded, and return a pointer to a
4737  * description of the device.
4738  *
4739  * The database is flat text formatted as follows:
4740  *
4741  * Any line not in a valid format is ignored.
4742  * Lines are terminated with newline '\n' characters.
4743  *
4744  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
4745  * the vendor name.
4746  *
4747  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
4748  * - devices cannot be listed without a corresponding VENDOR line.
4749  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
4750  * another TAB, then the device name.
4751  */
4752 
4753 /*
4754  * Assuming (ptr) points to the beginning of a line in the database,
4755  * return the vendor or device and description of the next entry.
4756  * The value of (vendor) or (device) inappropriate for the entry type
4757  * is set to -1.  Returns nonzero at the end of the database.
4758  *
4759  * Note that this is slightly unrobust in the face of corrupt data;
4760  * we attempt to safeguard against this by spamming the end of the
4761  * database with a newline when we initialise.
4762  */
4763 static int
4764 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
4765 {
4766 	char	*cp = *ptr;
4767 	int	left;
4768 
4769 	*device = -1;
4770 	*vendor = -1;
4771 	**desc = '\0';
4772 	for (;;) {
4773 		left = pci_vendordata_size - (cp - pci_vendordata);
4774 		if (left <= 0) {
4775 			*ptr = cp;
4776 			return(1);
4777 		}
4778 
4779 		/* vendor entry? */
4780 		if (*cp != '\t' &&
4781 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
4782 			break;
4783 		/* device entry? */
4784 		if (*cp == '\t' &&
4785 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
4786 			break;
4787 
4788 		/* skip to next line */
4789 		while (*cp != '\n' && left > 0) {
4790 			cp++;
4791 			left--;
4792 		}
4793 		if (*cp == '\n') {
4794 			cp++;
4795 			left--;
4796 		}
4797 	}
4798 	/* skip to next line */
4799 	while (*cp != '\n' && left > 0) {
4800 		cp++;
4801 		left--;
4802 	}
4803 	if (*cp == '\n' && left > 0)
4804 		cp++;
4805 	*ptr = cp;
4806 	return(0);
4807 }
4808 
4809 static char *
4810 pci_describe_device(device_t dev)
4811 {
4812 	int	vendor, device;
4813 	char	*desc, *vp, *dp, *line;
4814 
4815 	desc = vp = dp = NULL;
4816 
4817 	/*
4818 	 * If we have no vendor data, we can't do anything.
4819 	 */
4820 	if (pci_vendordata == NULL)
4821 		goto out;
4822 
4823 	/*
4824 	 * Scan the vendor data looking for this device
4825 	 */
4826 	line = pci_vendordata;
4827 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4828 		goto out;
4829 	for (;;) {
4830 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
4831 			goto out;
4832 		if (vendor == pci_get_vendor(dev))
4833 			break;
4834 	}
4835 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4836 		goto out;
4837 	for (;;) {
4838 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
4839 			*dp = 0;
4840 			break;
4841 		}
4842 		if (vendor != -1) {
4843 			*dp = 0;
4844 			break;
4845 		}
4846 		if (device == pci_get_device(dev))
4847 			break;
4848 	}
4849 	if (dp[0] == '\0')
4850 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
4851 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
4852 	    NULL)
4853 		sprintf(desc, "%s, %s", vp, dp);
4854 out:
4855 	if (vp != NULL)
4856 		free(vp, M_DEVBUF);
4857 	if (dp != NULL)
4858 		free(dp, M_DEVBUF);
4859 	return(desc);
4860 }
4861 
4862 int
4863 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
4864 {
4865 	struct pci_devinfo *dinfo;
4866 	pcicfgregs *cfg;
4867 
4868 	dinfo = device_get_ivars(child);
4869 	cfg = &dinfo->cfg;
4870 
4871 	switch (which) {
4872 	case PCI_IVAR_ETHADDR:
4873 		/*
4874 		 * The generic accessor doesn't deal with failure, so
4875 		 * we set the return value, then return an error.
4876 		 */
4877 		*((uint8_t **) result) = NULL;
4878 		return (EINVAL);
4879 	case PCI_IVAR_SUBVENDOR:
4880 		*result = cfg->subvendor;
4881 		break;
4882 	case PCI_IVAR_SUBDEVICE:
4883 		*result = cfg->subdevice;
4884 		break;
4885 	case PCI_IVAR_VENDOR:
4886 		*result = cfg->vendor;
4887 		break;
4888 	case PCI_IVAR_DEVICE:
4889 		*result = cfg->device;
4890 		break;
4891 	case PCI_IVAR_DEVID:
4892 		*result = (cfg->device << 16) | cfg->vendor;
4893 		break;
4894 	case PCI_IVAR_CLASS:
4895 		*result = cfg->baseclass;
4896 		break;
4897 	case PCI_IVAR_SUBCLASS:
4898 		*result = cfg->subclass;
4899 		break;
4900 	case PCI_IVAR_PROGIF:
4901 		*result = cfg->progif;
4902 		break;
4903 	case PCI_IVAR_REVID:
4904 		*result = cfg->revid;
4905 		break;
4906 	case PCI_IVAR_INTPIN:
4907 		*result = cfg->intpin;
4908 		break;
4909 	case PCI_IVAR_IRQ:
4910 		*result = cfg->intline;
4911 		break;
4912 	case PCI_IVAR_DOMAIN:
4913 		*result = cfg->domain;
4914 		break;
4915 	case PCI_IVAR_BUS:
4916 		*result = cfg->bus;
4917 		break;
4918 	case PCI_IVAR_SLOT:
4919 		*result = cfg->slot;
4920 		break;
4921 	case PCI_IVAR_FUNCTION:
4922 		*result = cfg->func;
4923 		break;
4924 	case PCI_IVAR_CMDREG:
4925 		*result = cfg->cmdreg;
4926 		break;
4927 	case PCI_IVAR_CACHELNSZ:
4928 		*result = cfg->cachelnsz;
4929 		break;
4930 	case PCI_IVAR_MINGNT:
4931 		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4932 			*result = -1;
4933 			return (EINVAL);
4934 		}
4935 		*result = cfg->mingnt;
4936 		break;
4937 	case PCI_IVAR_MAXLAT:
4938 		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4939 			*result = -1;
4940 			return (EINVAL);
4941 		}
4942 		*result = cfg->maxlat;
4943 		break;
4944 	case PCI_IVAR_LATTIMER:
4945 		*result = cfg->lattimer;
4946 		break;
4947 	default:
4948 		return (ENOENT);
4949 	}
4950 	return (0);
4951 }
4952 
4953 int
4954 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4955 {
4956 	struct pci_devinfo *dinfo;
4957 
4958 	dinfo = device_get_ivars(child);
4959 
4960 	switch (which) {
4961 	case PCI_IVAR_INTPIN:
4962 		dinfo->cfg.intpin = value;
4963 		return (0);
4964 	case PCI_IVAR_ETHADDR:
4965 	case PCI_IVAR_SUBVENDOR:
4966 	case PCI_IVAR_SUBDEVICE:
4967 	case PCI_IVAR_VENDOR:
4968 	case PCI_IVAR_DEVICE:
4969 	case PCI_IVAR_DEVID:
4970 	case PCI_IVAR_CLASS:
4971 	case PCI_IVAR_SUBCLASS:
4972 	case PCI_IVAR_PROGIF:
4973 	case PCI_IVAR_REVID:
4974 	case PCI_IVAR_IRQ:
4975 	case PCI_IVAR_DOMAIN:
4976 	case PCI_IVAR_BUS:
4977 	case PCI_IVAR_SLOT:
4978 	case PCI_IVAR_FUNCTION:
4979 		return (EINVAL);	/* disallow for now */
4980 
4981 	default:
4982 		return (ENOENT);
4983 	}
4984 }
4985 
4986 #include "opt_ddb.h"
4987 #ifdef DDB
4988 #include <ddb/ddb.h>
4989 #include <sys/cons.h>
4990 
4991 /*
4992  * List resources based on pci map registers, used for within ddb
4993  */
4994 
4995 DB_SHOW_COMMAND(pciregs, db_pci_dump)
4996 {
4997 	struct pci_devinfo *dinfo;
4998 	struct devlist *devlist_head;
4999 	struct pci_conf *p;
5000 	const char *name;
5001 	int i, error, none_count;
5002 
5003 	none_count = 0;
5004 	/* get the head of the device queue */
5005 	devlist_head = &pci_devq;
5006 
5007 	/*
5008 	 * Go through the list of devices and print out devices
5009 	 */
5010 	for (error = 0, i = 0,
5011 	     dinfo = STAILQ_FIRST(devlist_head);
5012 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
5013 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
5014 
5015 		/* Populate pd_name and pd_unit */
5016 		name = NULL;
5017 		if (dinfo->cfg.dev)
5018 			name = device_get_name(dinfo->cfg.dev);
5019 
5020 		p = &dinfo->conf;
5021 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
5022 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
5023 			(name && *name) ? name : "none",
5024 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
5025 			none_count++,
5026 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
5027 			p->pc_sel.pc_func, (p->pc_class << 16) |
5028 			(p->pc_subclass << 8) | p->pc_progif,
5029 			(p->pc_subdevice << 16) | p->pc_subvendor,
5030 			(p->pc_device << 16) | p->pc_vendor,
5031 			p->pc_revid, p->pc_hdr);
5032 	}
5033 }
5034 #endif /* DDB */
5035 
5036 static struct resource *
5037 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
5038     rman_res_t start, rman_res_t end, rman_res_t count, u_int num,
5039     u_int flags)
5040 {
5041 	struct pci_devinfo *dinfo = device_get_ivars(child);
5042 	struct resource_list *rl = &dinfo->resources;
5043 	struct resource *res;
5044 	struct pci_map *pm;
5045 	uint16_t cmd;
5046 	pci_addr_t map, testval;
5047 	int mapsize;
5048 
5049 	res = NULL;
5050 
5051 	/* If rid is managed by EA, ignore it */
5052 	if (pci_ea_is_enabled(child, *rid))
5053 		goto out;
5054 
5055 	pm = pci_find_bar(child, *rid);
5056 	if (pm != NULL) {
5057 		/* This is a BAR that we failed to allocate earlier. */
5058 		mapsize = pm->pm_size;
5059 		map = pm->pm_value;
5060 	} else {
5061 		/*
5062 		 * Weed out the bogons, and figure out how large the
5063 		 * BAR/map is.  BARs that read back 0 here are bogus
5064 		 * and unimplemented.  Note: atapci in legacy mode are
5065 		 * special and handled elsewhere in the code.  If you
5066 		 * have a atapci device in legacy mode and it fails
5067 		 * here, that other code is broken.
5068 		 */
5069 		pci_read_bar(child, *rid, &map, &testval, NULL);
5070 
5071 		/*
5072 		 * Determine the size of the BAR and ignore BARs with a size
5073 		 * of 0.  Device ROM BARs use a different mask value.
5074 		 */
5075 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
5076 			mapsize = pci_romsize(testval);
5077 		else
5078 			mapsize = pci_mapsize(testval);
5079 		if (mapsize == 0)
5080 			goto out;
5081 		pm = pci_add_bar(child, *rid, map, mapsize);
5082 	}
5083 
5084 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
5085 		if (type != SYS_RES_MEMORY) {
5086 			if (bootverbose)
5087 				device_printf(dev,
5088 				    "child %s requested type %d for rid %#x,"
5089 				    " but the BAR says it is an memio\n",
5090 				    device_get_nameunit(child), type, *rid);
5091 			goto out;
5092 		}
5093 	} else {
5094 		if (type != SYS_RES_IOPORT) {
5095 			if (bootverbose)
5096 				device_printf(dev,
5097 				    "child %s requested type %d for rid %#x,"
5098 				    " but the BAR says it is an ioport\n",
5099 				    device_get_nameunit(child), type, *rid);
5100 			goto out;
5101 		}
5102 	}
5103 
5104 	/*
5105 	 * For real BARs, we need to override the size that
5106 	 * the driver requests, because that's what the BAR
5107 	 * actually uses and we would otherwise have a
5108 	 * situation where we might allocate the excess to
5109 	 * another driver, which won't work.
5110 	 */
5111 	count = ((pci_addr_t)1 << mapsize) * num;
5112 	if (RF_ALIGNMENT(flags) < mapsize)
5113 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
5114 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
5115 		flags |= RF_PREFETCHABLE;
5116 
5117 	/*
5118 	 * Allocate enough resource, and then write back the
5119 	 * appropriate BAR for that resource.
5120 	 */
5121 	resource_list_add(rl, type, *rid, start, end, count);
5122 	res = resource_list_reserve(rl, dev, child, type, rid, start, end,
5123 	    count, flags & ~RF_ACTIVE);
5124 	if (res == NULL) {
5125 		resource_list_delete(rl, type, *rid);
5126 		device_printf(child,
5127 		    "%#jx bytes of rid %#x res %d failed (%#jx, %#jx).\n",
5128 		    count, *rid, type, start, end);
5129 		goto out;
5130 	}
5131 	if (bootverbose)
5132 		device_printf(child,
5133 		    "Lazy allocation of %#jx bytes rid %#x type %d at %#jx\n",
5134 		    count, *rid, type, rman_get_start(res));
5135 
5136 	/* Disable decoding via the CMD register before updating the BAR */
5137 	cmd = pci_read_config(child, PCIR_COMMAND, 2);
5138 	pci_write_config(child, PCIR_COMMAND,
5139 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
5140 
5141 	map = rman_get_start(res);
5142 	pci_write_bar(child, pm, map);
5143 
5144 	/* Restore the original value of the CMD register */
5145 	pci_write_config(child, PCIR_COMMAND, cmd, 2);
5146 out:
5147 	return (res);
5148 }
5149 
5150 struct resource *
5151 pci_alloc_multi_resource(device_t dev, device_t child, int type, int *rid,
5152     rman_res_t start, rman_res_t end, rman_res_t count, u_long num,
5153     u_int flags)
5154 {
5155 	struct pci_devinfo *dinfo;
5156 	struct resource_list *rl;
5157 	struct resource_list_entry *rle;
5158 	struct resource *res;
5159 	pcicfgregs *cfg;
5160 
5161 	/*
5162 	 * Perform lazy resource allocation
5163 	 */
5164 	dinfo = device_get_ivars(child);
5165 	rl = &dinfo->resources;
5166 	cfg = &dinfo->cfg;
5167 	switch (type) {
5168 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
5169 	case PCI_RES_BUS:
5170 		return (pci_alloc_secbus(dev, child, rid, start, end, count,
5171 		    flags));
5172 #endif
5173 	case SYS_RES_IRQ:
5174 		/*
5175 		 * Can't alloc legacy interrupt once MSI messages have
5176 		 * been allocated.
5177 		 */
5178 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
5179 		    cfg->msix.msix_alloc > 0))
5180 			return (NULL);
5181 
5182 		/*
5183 		 * If the child device doesn't have an interrupt
5184 		 * routed and is deserving of an interrupt, try to
5185 		 * assign it one.
5186 		 */
5187 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
5188 		    (cfg->intpin != 0))
5189 			pci_assign_interrupt(dev, child, 0);
5190 		break;
5191 	case SYS_RES_IOPORT:
5192 	case SYS_RES_MEMORY:
5193 #ifdef NEW_PCIB
5194 		/*
5195 		 * PCI-PCI bridge I/O window resources are not BARs.
5196 		 * For those allocations just pass the request up the
5197 		 * tree.
5198 		 */
5199 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
5200 			switch (*rid) {
5201 			case PCIR_IOBASEL_1:
5202 			case PCIR_MEMBASE_1:
5203 			case PCIR_PMBASEL_1:
5204 				/*
5205 				 * XXX: Should we bother creating a resource
5206 				 * list entry?
5207 				 */
5208 				return (bus_generic_alloc_resource(dev, child,
5209 				    type, rid, start, end, count, flags));
5210 			}
5211 		}
5212 #endif
5213 		/* Reserve resources for this BAR if needed. */
5214 		rle = resource_list_find(rl, type, *rid);
5215 		if (rle == NULL) {
5216 			res = pci_reserve_map(dev, child, type, rid, start, end,
5217 			    count, num, flags);
5218 			if (res == NULL)
5219 				return (NULL);
5220 		}
5221 	}
5222 	return (resource_list_alloc(rl, dev, child, type, rid,
5223 	    start, end, count, flags));
5224 }
5225 
5226 struct resource *
5227 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
5228     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
5229 {
5230 #ifdef PCI_IOV
5231 	struct pci_devinfo *dinfo;
5232 #endif
5233 
5234 	if (device_get_parent(child) != dev)
5235 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
5236 		    type, rid, start, end, count, flags));
5237 
5238 #ifdef PCI_IOV
5239 	dinfo = device_get_ivars(child);
5240 	if (dinfo->cfg.flags & PCICFG_VF) {
5241 		switch (type) {
5242 		/* VFs can't have I/O BARs. */
5243 		case SYS_RES_IOPORT:
5244 			return (NULL);
5245 		case SYS_RES_MEMORY:
5246 			return (pci_vf_alloc_mem_resource(dev, child, rid,
5247 			    start, end, count, flags));
5248 		}
5249 
5250 		/* Fall through for other types of resource allocations. */
5251 	}
5252 #endif
5253 
5254 	return (pci_alloc_multi_resource(dev, child, type, rid, start, end,
5255 	    count, 1, flags));
5256 }
5257 
5258 int
5259 pci_release_resource(device_t dev, device_t child, int type, int rid,
5260     struct resource *r)
5261 {
5262 	struct pci_devinfo *dinfo;
5263 	struct resource_list *rl;
5264 	pcicfgregs *cfg;
5265 
5266 	if (device_get_parent(child) != dev)
5267 		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
5268 		    type, rid, r));
5269 
5270 	dinfo = device_get_ivars(child);
5271 	cfg = &dinfo->cfg;
5272 
5273 #ifdef PCI_IOV
5274 	if (dinfo->cfg.flags & PCICFG_VF) {
5275 		switch (type) {
5276 		/* VFs can't have I/O BARs. */
5277 		case SYS_RES_IOPORT:
5278 			return (EDOOFUS);
5279 		case SYS_RES_MEMORY:
5280 			return (pci_vf_release_mem_resource(dev, child, rid,
5281 			    r));
5282 		}
5283 
5284 		/* Fall through for other types of resource allocations. */
5285 	}
5286 #endif
5287 
5288 #ifdef NEW_PCIB
5289 	/*
5290 	 * PCI-PCI bridge I/O window resources are not BARs.  For
5291 	 * those allocations just pass the request up the tree.
5292 	 */
5293 	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
5294 	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
5295 		switch (rid) {
5296 		case PCIR_IOBASEL_1:
5297 		case PCIR_MEMBASE_1:
5298 		case PCIR_PMBASEL_1:
5299 			return (bus_generic_release_resource(dev, child, type,
5300 			    rid, r));
5301 		}
5302 	}
5303 #endif
5304 
5305 	rl = &dinfo->resources;
5306 	return (resource_list_release(rl, dev, child, type, rid, r));
5307 }
5308 
5309 int
5310 pci_activate_resource(device_t dev, device_t child, int type, int rid,
5311     struct resource *r)
5312 {
5313 	struct pci_devinfo *dinfo;
5314 	int error;
5315 
5316 	error = bus_generic_activate_resource(dev, child, type, rid, r);
5317 	if (error)
5318 		return (error);
5319 
5320 	/* Enable decoding in the command register when activating BARs. */
5321 	if (device_get_parent(child) == dev) {
5322 		/* Device ROMs need their decoding explicitly enabled. */
5323 		dinfo = device_get_ivars(child);
5324 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
5325 			pci_write_bar(child, pci_find_bar(child, rid),
5326 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
5327 		switch (type) {
5328 		case SYS_RES_IOPORT:
5329 		case SYS_RES_MEMORY:
5330 			error = PCI_ENABLE_IO(dev, child, type);
5331 			break;
5332 		}
5333 	}
5334 	return (error);
5335 }
5336 
5337 int
5338 pci_deactivate_resource(device_t dev, device_t child, int type,
5339     int rid, struct resource *r)
5340 {
5341 	struct pci_devinfo *dinfo;
5342 	int error;
5343 
5344 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
5345 	if (error)
5346 		return (error);
5347 
5348 	/* Disable decoding for device ROMs. */
5349 	if (device_get_parent(child) == dev) {
5350 		dinfo = device_get_ivars(child);
5351 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
5352 			pci_write_bar(child, pci_find_bar(child, rid),
5353 			    rman_get_start(r));
5354 	}
5355 	return (0);
5356 }
5357 
5358 void
5359 pci_child_deleted(device_t dev, device_t child)
5360 {
5361 	struct resource_list_entry *rle;
5362 	struct resource_list *rl;
5363 	struct pci_devinfo *dinfo;
5364 
5365 	dinfo = device_get_ivars(child);
5366 	rl = &dinfo->resources;
5367 
5368 	EVENTHANDLER_INVOKE(pci_delete_device, child);
5369 
5370 	/* Turn off access to resources we're about to free */
5371 	if (bus_child_present(child) != 0) {
5372 		pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
5373 		    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
5374 
5375 		pci_disable_busmaster(child);
5376 	}
5377 
5378 	/* Free all allocated resources */
5379 	STAILQ_FOREACH(rle, rl, link) {
5380 		if (rle->res) {
5381 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
5382 			    resource_list_busy(rl, rle->type, rle->rid)) {
5383 				pci_printf(&dinfo->cfg,
5384 				    "Resource still owned, oops. "
5385 				    "(type=%d, rid=%d, addr=%lx)\n",
5386 				    rle->type, rle->rid,
5387 				    rman_get_start(rle->res));
5388 				bus_release_resource(child, rle->type, rle->rid,
5389 				    rle->res);
5390 			}
5391 			resource_list_unreserve(rl, dev, child, rle->type,
5392 			    rle->rid);
5393 		}
5394 	}
5395 	resource_list_free(rl);
5396 
5397 	pci_freecfg(dinfo);
5398 }
5399 
5400 void
5401 pci_delete_resource(device_t dev, device_t child, int type, int rid)
5402 {
5403 	struct pci_devinfo *dinfo;
5404 	struct resource_list *rl;
5405 	struct resource_list_entry *rle;
5406 
5407 	if (device_get_parent(child) != dev)
5408 		return;
5409 
5410 	dinfo = device_get_ivars(child);
5411 	rl = &dinfo->resources;
5412 	rle = resource_list_find(rl, type, rid);
5413 	if (rle == NULL)
5414 		return;
5415 
5416 	if (rle->res) {
5417 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
5418 		    resource_list_busy(rl, type, rid)) {
5419 			device_printf(dev, "delete_resource: "
5420 			    "Resource still owned by child, oops. "
5421 			    "(type=%d, rid=%d, addr=%jx)\n",
5422 			    type, rid, rman_get_start(rle->res));
5423 			return;
5424 		}
5425 		resource_list_unreserve(rl, dev, child, type, rid);
5426 	}
5427 	resource_list_delete(rl, type, rid);
5428 }
5429 
5430 struct resource_list *
5431 pci_get_resource_list (device_t dev, device_t child)
5432 {
5433 	struct pci_devinfo *dinfo = device_get_ivars(child);
5434 
5435 	return (&dinfo->resources);
5436 }
5437 
5438 bus_dma_tag_t
5439 pci_get_dma_tag(device_t bus, device_t dev)
5440 {
5441 	struct pci_softc *sc = device_get_softc(bus);
5442 
5443 	return (sc->sc_dma_tag);
5444 }
5445 
5446 uint32_t
5447 pci_read_config_method(device_t dev, device_t child, int reg, int width)
5448 {
5449 	struct pci_devinfo *dinfo = device_get_ivars(child);
5450 	pcicfgregs *cfg = &dinfo->cfg;
5451 
5452 #ifdef PCI_IOV
5453 	/*
5454 	 * SR-IOV VFs don't implement the VID or DID registers, so we have to
5455 	 * emulate them here.
5456 	 */
5457 	if (cfg->flags & PCICFG_VF) {
5458 		if (reg == PCIR_VENDOR) {
5459 			switch (width) {
5460 			case 4:
5461 				return (cfg->device << 16 | cfg->vendor);
5462 			case 2:
5463 				return (cfg->vendor);
5464 			case 1:
5465 				return (cfg->vendor & 0xff);
5466 			default:
5467 				return (0xffffffff);
5468 			}
5469 		} else if (reg == PCIR_DEVICE) {
5470 			switch (width) {
5471 			/* Note that an unaligned 4-byte read is an error. */
5472 			case 2:
5473 				return (cfg->device);
5474 			case 1:
5475 				return (cfg->device & 0xff);
5476 			default:
5477 				return (0xffffffff);
5478 			}
5479 		}
5480 	}
5481 #endif
5482 
5483 	return (PCIB_READ_CONFIG(device_get_parent(dev),
5484 	    cfg->bus, cfg->slot, cfg->func, reg, width));
5485 }
5486 
5487 void
5488 pci_write_config_method(device_t dev, device_t child, int reg,
5489     uint32_t val, int width)
5490 {
5491 	struct pci_devinfo *dinfo = device_get_ivars(child);
5492 	pcicfgregs *cfg = &dinfo->cfg;
5493 
5494 	PCIB_WRITE_CONFIG(device_get_parent(dev),
5495 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
5496 }
5497 
5498 int
5499 pci_child_location_str_method(device_t dev, device_t child, char *buf,
5500     size_t buflen)
5501 {
5502 
5503 	snprintf(buf, buflen, "slot=%d function=%d dbsf=pci%d:%d:%d:%d",
5504 	    pci_get_slot(child), pci_get_function(child), pci_get_domain(child),
5505 	    pci_get_bus(child), pci_get_slot(child), pci_get_function(child));
5506 	return (0);
5507 }
5508 
5509 int
5510 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
5511     size_t buflen)
5512 {
5513 	struct pci_devinfo *dinfo;
5514 	pcicfgregs *cfg;
5515 
5516 	dinfo = device_get_ivars(child);
5517 	cfg = &dinfo->cfg;
5518 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
5519 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
5520 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
5521 	    cfg->progif);
5522 	return (0);
5523 }
5524 
5525 int
5526 pci_assign_interrupt_method(device_t dev, device_t child)
5527 {
5528 	struct pci_devinfo *dinfo = device_get_ivars(child);
5529 	pcicfgregs *cfg = &dinfo->cfg;
5530 
5531 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
5532 	    cfg->intpin));
5533 }
5534 
5535 static void
5536 pci_lookup(void *arg, const char *name, device_t *dev)
5537 {
5538 	long val;
5539 	char *end;
5540 	int domain, bus, slot, func;
5541 
5542 	if (*dev != NULL)
5543 		return;
5544 
5545 	/*
5546 	 * Accept pciconf-style selectors of either pciD:B:S:F or
5547 	 * pciB:S:F.  In the latter case, the domain is assumed to
5548 	 * be zero.
5549 	 */
5550 	if (strncmp(name, "pci", 3) != 0)
5551 		return;
5552 	val = strtol(name + 3, &end, 10);
5553 	if (val < 0 || val > INT_MAX || *end != ':')
5554 		return;
5555 	domain = val;
5556 	val = strtol(end + 1, &end, 10);
5557 	if (val < 0 || val > INT_MAX || *end != ':')
5558 		return;
5559 	bus = val;
5560 	val = strtol(end + 1, &end, 10);
5561 	if (val < 0 || val > INT_MAX)
5562 		return;
5563 	slot = val;
5564 	if (*end == ':') {
5565 		val = strtol(end + 1, &end, 10);
5566 		if (val < 0 || val > INT_MAX || *end != '\0')
5567 			return;
5568 		func = val;
5569 	} else if (*end == '\0') {
5570 		func = slot;
5571 		slot = bus;
5572 		bus = domain;
5573 		domain = 0;
5574 	} else
5575 		return;
5576 
5577 	if (domain > PCI_DOMAINMAX || bus > PCI_BUSMAX || slot > PCI_SLOTMAX ||
5578 	    func > PCIE_ARI_FUNCMAX || (slot != 0 && func > PCI_FUNCMAX))
5579 		return;
5580 
5581 	*dev = pci_find_dbsf(domain, bus, slot, func);
5582 }
5583 
5584 static int
5585 pci_modevent(module_t mod, int what, void *arg)
5586 {
5587 	static struct cdev *pci_cdev;
5588 	static eventhandler_tag tag;
5589 
5590 	switch (what) {
5591 	case MOD_LOAD:
5592 		STAILQ_INIT(&pci_devq);
5593 		pci_generation = 0;
5594 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
5595 		    "pci");
5596 		pci_load_vendor_data();
5597 		tag = EVENTHANDLER_REGISTER(dev_lookup, pci_lookup, NULL,
5598 		    1000);
5599 		break;
5600 
5601 	case MOD_UNLOAD:
5602 		if (tag != NULL)
5603 			EVENTHANDLER_DEREGISTER(dev_lookup, tag);
5604 		destroy_dev(pci_cdev);
5605 		break;
5606 	}
5607 
5608 	return (0);
5609 }
5610 
5611 static void
5612 pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
5613 {
5614 #define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
5615 	struct pcicfg_pcie *cfg;
5616 	int version, pos;
5617 
5618 	cfg = &dinfo->cfg.pcie;
5619 	pos = cfg->pcie_location;
5620 
5621 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5622 
5623 	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
5624 
5625 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5626 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5627 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5628 		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
5629 
5630 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5631 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5632 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5633 		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
5634 
5635 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5636 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5637 		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
5638 
5639 	if (version > 1) {
5640 		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
5641 		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
5642 		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
5643 	}
5644 #undef WREG
5645 }
5646 
5647 static void
5648 pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
5649 {
5650 	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
5651 	    dinfo->cfg.pcix.pcix_command,  2);
5652 }
5653 
5654 void
5655 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
5656 {
5657 
5658 	/*
5659 	 * Restore the device to full power mode.  We must do this
5660 	 * before we restore the registers because moving from D3 to
5661 	 * D0 will cause the chip's BARs and some other registers to
5662 	 * be reset to some unknown power on reset values.  Cut down
5663 	 * the noise on boot by doing nothing if we are already in
5664 	 * state D0.
5665 	 */
5666 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
5667 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5668 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
5669 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
5670 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
5671 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
5672 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
5673 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
5674 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
5675 	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5676 	case PCIM_HDRTYPE_NORMAL:
5677 		pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
5678 		pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
5679 		break;
5680 	case PCIM_HDRTYPE_BRIDGE:
5681 		pci_write_config(dev, PCIR_SECLAT_1,
5682 		    dinfo->cfg.bridge.br_seclat, 1);
5683 		pci_write_config(dev, PCIR_SUBBUS_1,
5684 		    dinfo->cfg.bridge.br_subbus, 1);
5685 		pci_write_config(dev, PCIR_SECBUS_1,
5686 		    dinfo->cfg.bridge.br_secbus, 1);
5687 		pci_write_config(dev, PCIR_PRIBUS_1,
5688 		    dinfo->cfg.bridge.br_pribus, 1);
5689 		pci_write_config(dev, PCIR_BRIDGECTL_1,
5690 		    dinfo->cfg.bridge.br_control, 2);
5691 		break;
5692 	case PCIM_HDRTYPE_CARDBUS:
5693 		pci_write_config(dev, PCIR_SECLAT_2,
5694 		    dinfo->cfg.bridge.br_seclat, 1);
5695 		pci_write_config(dev, PCIR_SUBBUS_2,
5696 		    dinfo->cfg.bridge.br_subbus, 1);
5697 		pci_write_config(dev, PCIR_SECBUS_2,
5698 		    dinfo->cfg.bridge.br_secbus, 1);
5699 		pci_write_config(dev, PCIR_PRIBUS_2,
5700 		    dinfo->cfg.bridge.br_pribus, 1);
5701 		pci_write_config(dev, PCIR_BRIDGECTL_2,
5702 		    dinfo->cfg.bridge.br_control, 2);
5703 		break;
5704 	}
5705 	pci_restore_bars(dev);
5706 
5707 	/*
5708 	 * Restore extended capabilities for PCI-Express and PCI-X
5709 	 */
5710 	if (dinfo->cfg.pcie.pcie_location != 0)
5711 		pci_cfg_restore_pcie(dev, dinfo);
5712 	if (dinfo->cfg.pcix.pcix_location != 0)
5713 		pci_cfg_restore_pcix(dev, dinfo);
5714 
5715 	/* Restore MSI and MSI-X configurations if they are present. */
5716 	if (dinfo->cfg.msi.msi_location != 0)
5717 		pci_resume_msi(dev);
5718 	if (dinfo->cfg.msix.msix_location != 0)
5719 		pci_resume_msix(dev);
5720 
5721 #ifdef PCI_IOV
5722 	if (dinfo->cfg.iov != NULL)
5723 		pci_iov_cfg_restore(dev, dinfo);
5724 #endif
5725 }
5726 
5727 static void
5728 pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
5729 {
5730 #define	RREG(n)	pci_read_config(dev, pos + (n), 2)
5731 	struct pcicfg_pcie *cfg;
5732 	int version, pos;
5733 
5734 	cfg = &dinfo->cfg.pcie;
5735 	pos = cfg->pcie_location;
5736 
5737 	cfg->pcie_flags = RREG(PCIER_FLAGS);
5738 
5739 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5740 
5741 	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
5742 
5743 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5744 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5745 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5746 		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
5747 
5748 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5749 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5750 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5751 		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
5752 
5753 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5754 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5755 		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
5756 
5757 	if (version > 1) {
5758 		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
5759 		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
5760 		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
5761 	}
5762 #undef RREG
5763 }
5764 
5765 static void
5766 pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
5767 {
5768 	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
5769 	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
5770 }
5771 
5772 void
5773 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
5774 {
5775 	uint32_t cls;
5776 	int ps;
5777 
5778 	/*
5779 	 * Some drivers apparently write to these registers w/o updating our
5780 	 * cached copy.  No harm happens if we update the copy, so do so here
5781 	 * so we can restore them.  The COMMAND register is modified by the
5782 	 * bus w/o updating the cache.  This should represent the normally
5783 	 * writable portion of the 'defined' part of type 0/1/2 headers.
5784 	 */
5785 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
5786 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
5787 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
5788 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
5789 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
5790 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
5791 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
5792 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
5793 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
5794 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
5795 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
5796 	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5797 	case PCIM_HDRTYPE_NORMAL:
5798 		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
5799 		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
5800 		dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
5801 		dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
5802 		break;
5803 	case PCIM_HDRTYPE_BRIDGE:
5804 		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
5805 		    PCIR_SECLAT_1, 1);
5806 		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
5807 		    PCIR_SUBBUS_1, 1);
5808 		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
5809 		    PCIR_SECBUS_1, 1);
5810 		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
5811 		    PCIR_PRIBUS_1, 1);
5812 		dinfo->cfg.bridge.br_control = pci_read_config(dev,
5813 		    PCIR_BRIDGECTL_1, 2);
5814 		break;
5815 	case PCIM_HDRTYPE_CARDBUS:
5816 		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
5817 		    PCIR_SECLAT_2, 1);
5818 		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
5819 		    PCIR_SUBBUS_2, 1);
5820 		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
5821 		    PCIR_SECBUS_2, 1);
5822 		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
5823 		    PCIR_PRIBUS_2, 1);
5824 		dinfo->cfg.bridge.br_control = pci_read_config(dev,
5825 		    PCIR_BRIDGECTL_2, 2);
5826 		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_2, 2);
5827 		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_2, 2);
5828 		break;
5829 	}
5830 
5831 	if (dinfo->cfg.pcie.pcie_location != 0)
5832 		pci_cfg_save_pcie(dev, dinfo);
5833 
5834 	if (dinfo->cfg.pcix.pcix_location != 0)
5835 		pci_cfg_save_pcix(dev, dinfo);
5836 
5837 #ifdef PCI_IOV
5838 	if (dinfo->cfg.iov != NULL)
5839 		pci_iov_cfg_save(dev, dinfo);
5840 #endif
5841 
5842 	/*
5843 	 * don't set the state for display devices, base peripherals and
5844 	 * memory devices since bad things happen when they are powered down.
5845 	 * We should (a) have drivers that can easily detach and (b) use
5846 	 * generic drivers for these devices so that some device actually
5847 	 * attaches.  We need to make sure that when we implement (a) we don't
5848 	 * power the device down on a reattach.
5849 	 */
5850 	cls = pci_get_class(dev);
5851 	if (!setstate)
5852 		return;
5853 	switch (pci_do_power_nodriver)
5854 	{
5855 		case 0:		/* NO powerdown at all */
5856 			return;
5857 		case 1:		/* Conservative about what to power down */
5858 			if (cls == PCIC_STORAGE)
5859 				return;
5860 			/*FALLTHROUGH*/
5861 		case 2:		/* Aggressive about what to power down */
5862 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
5863 			    cls == PCIC_BASEPERIPH)
5864 				return;
5865 			/*FALLTHROUGH*/
5866 		case 3:		/* Power down everything */
5867 			break;
5868 	}
5869 	/*
5870 	 * PCI spec says we can only go into D3 state from D0 state.
5871 	 * Transition from D[12] into D0 before going to D3 state.
5872 	 */
5873 	ps = pci_get_powerstate(dev);
5874 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
5875 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5876 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
5877 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
5878 }
5879 
5880 /* Wrapper APIs suitable for device driver use. */
5881 void
5882 pci_save_state(device_t dev)
5883 {
5884 	struct pci_devinfo *dinfo;
5885 
5886 	dinfo = device_get_ivars(dev);
5887 	pci_cfg_save(dev, dinfo, 0);
5888 }
5889 
5890 void
5891 pci_restore_state(device_t dev)
5892 {
5893 	struct pci_devinfo *dinfo;
5894 
5895 	dinfo = device_get_ivars(dev);
5896 	pci_cfg_restore(dev, dinfo);
5897 }
5898 
5899 static int
5900 pci_get_id_method(device_t dev, device_t child, enum pci_id_type type,
5901     uintptr_t *id)
5902 {
5903 
5904 	return (PCIB_GET_ID(device_get_parent(dev), child, type, id));
5905 }
5906 
5907 /* Find the upstream port of a given PCI device in a root complex. */
5908 device_t
5909 pci_find_pcie_root_port(device_t dev)
5910 {
5911 	struct pci_devinfo *dinfo;
5912 	devclass_t pci_class;
5913 	device_t pcib, bus;
5914 
5915 	pci_class = devclass_find("pci");
5916 	KASSERT(device_get_devclass(device_get_parent(dev)) == pci_class,
5917 	    ("%s: non-pci device %s", __func__, device_get_nameunit(dev)));
5918 
5919 	/*
5920 	 * Walk the bridge hierarchy until we find a PCI-e root
5921 	 * port or a non-PCI device.
5922 	 */
5923 	for (;;) {
5924 		bus = device_get_parent(dev);
5925 		KASSERT(bus != NULL, ("%s: null parent of %s", __func__,
5926 		    device_get_nameunit(dev)));
5927 
5928 		pcib = device_get_parent(bus);
5929 		KASSERT(pcib != NULL, ("%s: null bridge of %s", __func__,
5930 		    device_get_nameunit(bus)));
5931 
5932 		/*
5933 		 * pcib's parent must be a PCI bus for this to be a
5934 		 * PCI-PCI bridge.
5935 		 */
5936 		if (device_get_devclass(device_get_parent(pcib)) != pci_class)
5937 			return (NULL);
5938 
5939 		dinfo = device_get_ivars(pcib);
5940 		if (dinfo->cfg.pcie.pcie_location != 0 &&
5941 		    dinfo->cfg.pcie.pcie_type == PCIEM_TYPE_ROOT_PORT)
5942 			return (pcib);
5943 
5944 		dev = pcib;
5945 	}
5946 }
5947 
5948 /*
5949  * Wait for pending transactions to complete on a PCI-express function.
5950  *
5951  * The maximum delay is specified in milliseconds in max_delay.  Note
5952  * that this function may sleep.
5953  *
5954  * Returns true if the function is idle and false if the timeout is
5955  * exceeded.  If dev is not a PCI-express function, this returns true.
5956  */
5957 bool
5958 pcie_wait_for_pending_transactions(device_t dev, u_int max_delay)
5959 {
5960 	struct pci_devinfo *dinfo = device_get_ivars(dev);
5961 	uint16_t sta;
5962 	int cap;
5963 
5964 	cap = dinfo->cfg.pcie.pcie_location;
5965 	if (cap == 0)
5966 		return (true);
5967 
5968 	sta = pci_read_config(dev, cap + PCIER_DEVICE_STA, 2);
5969 	while (sta & PCIEM_STA_TRANSACTION_PND) {
5970 		if (max_delay == 0)
5971 			return (false);
5972 
5973 		/* Poll once every 100 milliseconds up to the timeout. */
5974 		if (max_delay > 100) {
5975 			pause_sbt("pcietp", 100 * SBT_1MS, 0, C_HARDCLOCK);
5976 			max_delay -= 100;
5977 		} else {
5978 			pause_sbt("pcietp", max_delay * SBT_1MS, 0,
5979 			    C_HARDCLOCK);
5980 			max_delay = 0;
5981 		}
5982 		sta = pci_read_config(dev, cap + PCIER_DEVICE_STA, 2);
5983 	}
5984 
5985 	return (true);
5986 }
5987 
5988 /*
5989  * Determine the maximum Completion Timeout in microseconds.
5990  *
5991  * For non-PCI-express functions this returns 0.
5992  */
5993 int
5994 pcie_get_max_completion_timeout(device_t dev)
5995 {
5996 	struct pci_devinfo *dinfo = device_get_ivars(dev);
5997 	int cap;
5998 
5999 	cap = dinfo->cfg.pcie.pcie_location;
6000 	if (cap == 0)
6001 		return (0);
6002 
6003 	/*
6004 	 * Functions using the 1.x spec use the default timeout range of
6005 	 * 50 microseconds to 50 milliseconds.  Functions that do not
6006 	 * support programmable timeouts also use this range.
6007 	 */
6008 	if ((dinfo->cfg.pcie.pcie_flags & PCIEM_FLAGS_VERSION) < 2 ||
6009 	    (pci_read_config(dev, cap + PCIER_DEVICE_CAP2, 4) &
6010 	    PCIEM_CAP2_COMP_TIMO_RANGES) == 0)
6011 		return (50 * 1000);
6012 
6013 	switch (pci_read_config(dev, cap + PCIER_DEVICE_CTL2, 2) &
6014 	    PCIEM_CTL2_COMP_TIMO_VAL) {
6015 	case PCIEM_CTL2_COMP_TIMO_100US:
6016 		return (100);
6017 	case PCIEM_CTL2_COMP_TIMO_10MS:
6018 		return (10 * 1000);
6019 	case PCIEM_CTL2_COMP_TIMO_55MS:
6020 		return (55 * 1000);
6021 	case PCIEM_CTL2_COMP_TIMO_210MS:
6022 		return (210 * 1000);
6023 	case PCIEM_CTL2_COMP_TIMO_900MS:
6024 		return (900 * 1000);
6025 	case PCIEM_CTL2_COMP_TIMO_3500MS:
6026 		return (3500 * 1000);
6027 	case PCIEM_CTL2_COMP_TIMO_13S:
6028 		return (13 * 1000 * 1000);
6029 	case PCIEM_CTL2_COMP_TIMO_64S:
6030 		return (64 * 1000 * 1000);
6031 	default:
6032 		return (50 * 1000);
6033 	}
6034 }
6035 
6036 /*
6037  * Perform a Function Level Reset (FLR) on a device.
6038  *
6039  * This function first waits for any pending transactions to complete
6040  * within the timeout specified by max_delay.  If transactions are
6041  * still pending, the function will return false without attempting a
6042  * reset.
6043  *
6044  * If dev is not a PCI-express function or does not support FLR, this
6045  * function returns false.
6046  *
6047  * Note that no registers are saved or restored.  The caller is
6048  * responsible for saving and restoring any registers including
6049  * PCI-standard registers via pci_save_state() and
6050  * pci_restore_state().
6051  */
6052 bool
6053 pcie_flr(device_t dev, u_int max_delay, bool force)
6054 {
6055 	struct pci_devinfo *dinfo = device_get_ivars(dev);
6056 	uint16_t cmd, ctl;
6057 	int compl_delay;
6058 	int cap;
6059 
6060 	cap = dinfo->cfg.pcie.pcie_location;
6061 	if (cap == 0)
6062 		return (false);
6063 
6064 	if (!(pci_read_config(dev, cap + PCIER_DEVICE_CAP, 4) & PCIEM_CAP_FLR))
6065 		return (false);
6066 
6067 	/*
6068 	 * Disable busmastering to prevent generation of new
6069 	 * transactions while waiting for the device to go idle.  If
6070 	 * the idle timeout fails, the command register is restored
6071 	 * which will re-enable busmastering.
6072 	 */
6073 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
6074 	pci_write_config(dev, PCIR_COMMAND, cmd & ~(PCIM_CMD_BUSMASTEREN), 2);
6075 	if (!pcie_wait_for_pending_transactions(dev, max_delay)) {
6076 		if (!force) {
6077 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
6078 			return (false);
6079 		}
6080 		pci_printf(&dinfo->cfg,
6081 		    "Resetting with transactions pending after %d ms\n",
6082 		    max_delay);
6083 
6084 		/*
6085 		 * Extend the post-FLR delay to cover the maximum
6086 		 * Completion Timeout delay of anything in flight
6087 		 * during the FLR delay.  Enforce a minimum delay of
6088 		 * at least 10ms.
6089 		 */
6090 		compl_delay = pcie_get_max_completion_timeout(dev) / 1000;
6091 		if (compl_delay < 10)
6092 			compl_delay = 10;
6093 	} else
6094 		compl_delay = 0;
6095 
6096 	/* Initiate the reset. */
6097 	ctl = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
6098 	pci_write_config(dev, cap + PCIER_DEVICE_CTL, ctl |
6099 	    PCIEM_CTL_INITIATE_FLR, 2);
6100 
6101 	/* Wait for 100ms. */
6102 	pause_sbt("pcieflr", (100 + compl_delay) * SBT_1MS, 0, C_HARDCLOCK);
6103 
6104 	if (pci_read_config(dev, cap + PCIER_DEVICE_STA, 2) &
6105 	    PCIEM_STA_TRANSACTION_PND)
6106 		pci_printf(&dinfo->cfg, "Transactions pending after FLR!\n");
6107 	return (true);
6108 }
6109