xref: /freebsd/sys/dev/pci/pci.c (revision cc349066556bcdeed0d6cc72aad340d0f383e35c)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/limits.h>
39 #include <sys/linker.h>
40 #include <sys/fcntl.h>
41 #include <sys/conf.h>
42 #include <sys/kernel.h>
43 #include <sys/queue.h>
44 #include <sys/sysctl.h>
45 #include <sys/endian.h>
46 
47 #include <vm/vm.h>
48 #include <vm/pmap.h>
49 #include <vm/vm_extern.h>
50 
51 #include <sys/bus.h>
52 #include <machine/bus.h>
53 #include <sys/rman.h>
54 #include <machine/resource.h>
55 #include <machine/stdarg.h>
56 
57 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
58 #include <machine/intr_machdep.h>
59 #endif
60 
61 #include <sys/pciio.h>
62 #include <dev/pci/pcireg.h>
63 #include <dev/pci/pcivar.h>
64 #include <dev/pci/pci_private.h>
65 
66 #ifdef PCI_IOV
67 #include <sys/nv.h>
68 #include <dev/pci/pci_iov_private.h>
69 #endif
70 
71 #include <dev/usb/controller/xhcireg.h>
72 #include <dev/usb/controller/ehcireg.h>
73 #include <dev/usb/controller/ohcireg.h>
74 #include <dev/usb/controller/uhcireg.h>
75 
76 #include "pcib_if.h"
77 #include "pci_if.h"
78 
79 #define	PCIR_IS_BIOS(cfg, reg)						\
80 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
81 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
82 
83 static int		pci_has_quirk(uint32_t devid, int quirk);
84 static pci_addr_t	pci_mapbase(uint64_t mapreg);
85 static const char	*pci_maptype(uint64_t mapreg);
86 static int		pci_maprange(uint64_t mapreg);
87 static pci_addr_t	pci_rombase(uint64_t mapreg);
88 static int		pci_romsize(uint64_t testval);
89 static void		pci_fixancient(pcicfgregs *cfg);
90 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
91 
92 static int		pci_porten(device_t dev);
93 static int		pci_memen(device_t dev);
94 static void		pci_assign_interrupt(device_t bus, device_t dev,
95 			    int force_route);
96 static int		pci_add_map(device_t bus, device_t dev, int reg,
97 			    struct resource_list *rl, int force, int prefetch);
98 static int		pci_probe(device_t dev);
99 static int		pci_attach(device_t dev);
100 static int		pci_detach(device_t dev);
101 static void		pci_load_vendor_data(void);
102 static int		pci_describe_parse_line(char **ptr, int *vendor,
103 			    int *device, char **desc);
104 static char		*pci_describe_device(device_t dev);
105 static int		pci_modevent(module_t mod, int what, void *arg);
106 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
107 			    pcicfgregs *cfg);
108 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
109 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
110 			    int reg, uint32_t *data);
111 #if 0
112 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
113 			    int reg, uint32_t data);
114 #endif
115 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
116 static void		pci_mask_msix(device_t dev, u_int index);
117 static void		pci_unmask_msix(device_t dev, u_int index);
118 static int		pci_msi_blacklisted(void);
119 static int		pci_msix_blacklisted(void);
120 static void		pci_resume_msi(device_t dev);
121 static void		pci_resume_msix(device_t dev);
122 static int		pci_remap_intr_method(device_t bus, device_t dev,
123 			    u_int irq);
124 
125 static int		pci_get_id_method(device_t dev, device_t child,
126 			    enum pci_id_type type, uintptr_t *rid);
127 
128 static struct pci_devinfo * pci_fill_devinfo(device_t pcib, device_t bus, int d,
129     int b, int s, int f, uint16_t vid, uint16_t did);
130 
131 static device_method_t pci_methods[] = {
132 	/* Device interface */
133 	DEVMETHOD(device_probe,		pci_probe),
134 	DEVMETHOD(device_attach,	pci_attach),
135 	DEVMETHOD(device_detach,	pci_detach),
136 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
137 	DEVMETHOD(device_suspend,	bus_generic_suspend),
138 	DEVMETHOD(device_resume,	pci_resume),
139 
140 	/* Bus interface */
141 	DEVMETHOD(bus_print_child,	pci_print_child),
142 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
143 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
144 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
145 	DEVMETHOD(bus_driver_added,	pci_driver_added),
146 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
147 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
148 
149 	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
150 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
151 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
152 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
153 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
154 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
155 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
156 	DEVMETHOD(bus_release_resource,	pci_release_resource),
157 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
158 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
159 	DEVMETHOD(bus_child_deleted,	pci_child_deleted),
160 	DEVMETHOD(bus_child_detached,	pci_child_detached),
161 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
162 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
163 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
164 	DEVMETHOD(bus_suspend_child,	pci_suspend_child),
165 	DEVMETHOD(bus_resume_child,	pci_resume_child),
166 	DEVMETHOD(bus_rescan,		pci_rescan_method),
167 
168 	/* PCI interface */
169 	DEVMETHOD(pci_read_config,	pci_read_config_method),
170 	DEVMETHOD(pci_write_config,	pci_write_config_method),
171 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
172 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
173 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
174 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
175 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
176 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
177 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
178 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
179 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
180 	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
181 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
182 	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
183 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
184 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
185 	DEVMETHOD(pci_enable_msi,	pci_enable_msi_method),
186 	DEVMETHOD(pci_enable_msix,	pci_enable_msix_method),
187 	DEVMETHOD(pci_disable_msi,	pci_disable_msi_method),
188 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
189 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
190 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
191 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
192 	DEVMETHOD(pci_msix_pba_bar,	pci_msix_pba_bar_method),
193 	DEVMETHOD(pci_msix_table_bar,	pci_msix_table_bar_method),
194 	DEVMETHOD(pci_get_id,		pci_get_id_method),
195 	DEVMETHOD(pci_alloc_devinfo,	pci_alloc_devinfo_method),
196 	DEVMETHOD(pci_child_added,	pci_child_added_method),
197 #ifdef PCI_IOV
198 	DEVMETHOD(pci_iov_attach,	pci_iov_attach_method),
199 	DEVMETHOD(pci_iov_detach,	pci_iov_detach_method),
200 	DEVMETHOD(pci_create_iov_child,	pci_create_iov_child_method),
201 #endif
202 
203 	DEVMETHOD_END
204 };
205 
206 DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
207 
208 static devclass_t pci_devclass;
209 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
210 MODULE_VERSION(pci, 1);
211 
212 static char	*pci_vendordata;
213 static size_t	pci_vendordata_size;
214 
215 struct pci_quirk {
216 	uint32_t devid;	/* Vendor/device of the card */
217 	int	type;
218 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
219 #define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
220 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
221 #define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
222 #define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
223 #define	PCI_QUIRK_MSI_INTX_BUG	6 /* PCIM_CMD_INTxDIS disables MSI */
224 	int	arg1;
225 	int	arg2;
226 };
227 
228 static const struct pci_quirk pci_quirks[] = {
229 	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
230 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
231 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
232 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
233 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
234 
235 	/*
236 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
237 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
238 	 */
239 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
240 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
241 
242 	/*
243 	 * MSI doesn't work on earlier Intel chipsets including
244 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
245 	 */
246 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
247 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
248 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
249 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
250 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
251 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
252 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
253 
254 	/*
255 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
256 	 * bridge.
257 	 */
258 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
259 
260 	/*
261 	 * MSI-X allocation doesn't work properly for devices passed through
262 	 * by VMware up to at least ESXi 5.1.
263 	 */
264 	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
265 	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
266 
267 	/*
268 	 * Some virtualization environments emulate an older chipset
269 	 * but support MSI just fine.  QEMU uses the Intel 82440.
270 	 */
271 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
272 
273 	/*
274 	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
275 	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
276 	 * It prevents us from attaching hpet(4) when the bit is unset.
277 	 * Note this quirk only affects SB600 revision A13 and earlier.
278 	 * For SB600 A21 and later, firmware must set the bit to hide it.
279 	 * For SB700 and later, it is unused and hardcoded to zero.
280 	 */
281 	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
282 
283 	/*
284 	 * Atheros AR8161/AR8162/E2200/E2400 Ethernet controllers have a
285 	 * bug that MSI interrupt does not assert if PCIM_CMD_INTxDIS bit
286 	 * of the command register is set.
287 	 */
288 	{ 0x10911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
289 	{ 0xE0911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
290 	{ 0xE0A11969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
291 	{ 0x10901969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
292 
293 	/*
294 	 * Broadcom BCM5714(S)/BCM5715(S)/BCM5780(S) Ethernet MACs don't
295 	 * issue MSI interrupts with PCIM_CMD_INTxDIS set either.
296 	 */
297 	{ 0x166814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714 */
298 	{ 0x166914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714S */
299 	{ 0x166a14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780 */
300 	{ 0x166b14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780S */
301 	{ 0x167814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715 */
302 	{ 0x167914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715S */
303 
304 	{ 0 }
305 };
306 
307 /* map register information */
308 #define	PCI_MAPMEM	0x01	/* memory map */
309 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
310 #define	PCI_MAPPORT	0x04	/* port map */
311 
312 struct devlist pci_devq;
313 uint32_t pci_generation;
314 uint32_t pci_numdevs = 0;
315 static int pcie_chipset, pcix_chipset;
316 
317 /* sysctl vars */
318 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
319 
320 static int pci_enable_io_modes = 1;
321 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RWTUN,
322     &pci_enable_io_modes, 1,
323     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
324 enable these bits correctly.  We'd like to do this all the time, but there\n\
325 are some peripherals that this causes problems with.");
326 
327 static int pci_do_realloc_bars = 0;
328 SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RWTUN,
329     &pci_do_realloc_bars, 0,
330     "Attempt to allocate a new range for any BARs whose original "
331     "firmware-assigned ranges fail to allocate during the initial device scan.");
332 
333 static int pci_do_power_nodriver = 0;
334 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RWTUN,
335     &pci_do_power_nodriver, 0,
336   "Place a function into D3 state when no driver attaches to it.  0 means\n\
337 disable.  1 means conservatively place devices into D3 state.  2 means\n\
338 aggressively place devices into D3 state.  3 means put absolutely everything\n\
339 in D3 state.");
340 
341 int pci_do_power_resume = 1;
342 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RWTUN,
343     &pci_do_power_resume, 1,
344   "Transition from D3 -> D0 on resume.");
345 
346 int pci_do_power_suspend = 1;
347 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RWTUN,
348     &pci_do_power_suspend, 1,
349   "Transition from D0 -> D3 on suspend.");
350 
351 static int pci_do_msi = 1;
352 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RWTUN, &pci_do_msi, 1,
353     "Enable support for MSI interrupts");
354 
355 static int pci_do_msix = 1;
356 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RWTUN, &pci_do_msix, 1,
357     "Enable support for MSI-X interrupts");
358 
359 static int pci_msix_rewrite_table = 0;
360 SYSCTL_INT(_hw_pci, OID_AUTO, msix_rewrite_table, CTLFLAG_RWTUN,
361     &pci_msix_rewrite_table, 0,
362     "Rewrite entire MSI-X table when updating MSI-X entries");
363 
364 static int pci_honor_msi_blacklist = 1;
365 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RDTUN,
366     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
367 
368 #if defined(__i386__) || defined(__amd64__)
369 static int pci_usb_takeover = 1;
370 #else
371 static int pci_usb_takeover = 0;
372 #endif
373 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
374     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
375 Disable this if you depend on BIOS emulation of USB devices, that is\n\
376 you use USB devices (like keyboard or mouse) but do not load USB drivers");
377 
378 static int pci_clear_bars;
379 SYSCTL_INT(_hw_pci, OID_AUTO, clear_bars, CTLFLAG_RDTUN, &pci_clear_bars, 0,
380     "Ignore firmware-assigned resources for BARs.");
381 
382 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
383 static int pci_clear_buses;
384 SYSCTL_INT(_hw_pci, OID_AUTO, clear_buses, CTLFLAG_RDTUN, &pci_clear_buses, 0,
385     "Ignore firmware-assigned bus numbers.");
386 #endif
387 
388 static int pci_enable_ari = 1;
389 SYSCTL_INT(_hw_pci, OID_AUTO, enable_ari, CTLFLAG_RDTUN, &pci_enable_ari,
390     0, "Enable support for PCIe Alternative RID Interpretation");
391 
392 static int
393 pci_has_quirk(uint32_t devid, int quirk)
394 {
395 	const struct pci_quirk *q;
396 
397 	for (q = &pci_quirks[0]; q->devid; q++) {
398 		if (q->devid == devid && q->type == quirk)
399 			return (1);
400 	}
401 	return (0);
402 }
403 
404 /* Find a device_t by bus/slot/function in domain 0 */
405 
406 device_t
407 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
408 {
409 
410 	return (pci_find_dbsf(0, bus, slot, func));
411 }
412 
413 /* Find a device_t by domain/bus/slot/function */
414 
415 device_t
416 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
417 {
418 	struct pci_devinfo *dinfo;
419 
420 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
421 		if ((dinfo->cfg.domain == domain) &&
422 		    (dinfo->cfg.bus == bus) &&
423 		    (dinfo->cfg.slot == slot) &&
424 		    (dinfo->cfg.func == func)) {
425 			return (dinfo->cfg.dev);
426 		}
427 	}
428 
429 	return (NULL);
430 }
431 
432 /* Find a device_t by vendor/device ID */
433 
434 device_t
435 pci_find_device(uint16_t vendor, uint16_t device)
436 {
437 	struct pci_devinfo *dinfo;
438 
439 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
440 		if ((dinfo->cfg.vendor == vendor) &&
441 		    (dinfo->cfg.device == device)) {
442 			return (dinfo->cfg.dev);
443 		}
444 	}
445 
446 	return (NULL);
447 }
448 
449 device_t
450 pci_find_class(uint8_t class, uint8_t subclass)
451 {
452 	struct pci_devinfo *dinfo;
453 
454 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
455 		if (dinfo->cfg.baseclass == class &&
456 		    dinfo->cfg.subclass == subclass) {
457 			return (dinfo->cfg.dev);
458 		}
459 	}
460 
461 	return (NULL);
462 }
463 
464 static int
465 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
466 {
467 	va_list ap;
468 	int retval;
469 
470 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
471 	    cfg->func);
472 	va_start(ap, fmt);
473 	retval += vprintf(fmt, ap);
474 	va_end(ap);
475 	return (retval);
476 }
477 
478 /* return base address of memory or port map */
479 
480 static pci_addr_t
481 pci_mapbase(uint64_t mapreg)
482 {
483 
484 	if (PCI_BAR_MEM(mapreg))
485 		return (mapreg & PCIM_BAR_MEM_BASE);
486 	else
487 		return (mapreg & PCIM_BAR_IO_BASE);
488 }
489 
490 /* return map type of memory or port map */
491 
492 static const char *
493 pci_maptype(uint64_t mapreg)
494 {
495 
496 	if (PCI_BAR_IO(mapreg))
497 		return ("I/O Port");
498 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
499 		return ("Prefetchable Memory");
500 	return ("Memory");
501 }
502 
503 /* return log2 of map size decoded for memory or port map */
504 
505 int
506 pci_mapsize(uint64_t testval)
507 {
508 	int ln2size;
509 
510 	testval = pci_mapbase(testval);
511 	ln2size = 0;
512 	if (testval != 0) {
513 		while ((testval & 1) == 0)
514 		{
515 			ln2size++;
516 			testval >>= 1;
517 		}
518 	}
519 	return (ln2size);
520 }
521 
522 /* return base address of device ROM */
523 
524 static pci_addr_t
525 pci_rombase(uint64_t mapreg)
526 {
527 
528 	return (mapreg & PCIM_BIOS_ADDR_MASK);
529 }
530 
531 /* return log2 of map size decided for device ROM */
532 
533 static int
534 pci_romsize(uint64_t testval)
535 {
536 	int ln2size;
537 
538 	testval = pci_rombase(testval);
539 	ln2size = 0;
540 	if (testval != 0) {
541 		while ((testval & 1) == 0)
542 		{
543 			ln2size++;
544 			testval >>= 1;
545 		}
546 	}
547 	return (ln2size);
548 }
549 
550 /* return log2 of address range supported by map register */
551 
552 static int
553 pci_maprange(uint64_t mapreg)
554 {
555 	int ln2range = 0;
556 
557 	if (PCI_BAR_IO(mapreg))
558 		ln2range = 32;
559 	else
560 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
561 		case PCIM_BAR_MEM_32:
562 			ln2range = 32;
563 			break;
564 		case PCIM_BAR_MEM_1MB:
565 			ln2range = 20;
566 			break;
567 		case PCIM_BAR_MEM_64:
568 			ln2range = 64;
569 			break;
570 		}
571 	return (ln2range);
572 }
573 
574 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
575 
576 static void
577 pci_fixancient(pcicfgregs *cfg)
578 {
579 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
580 		return;
581 
582 	/* PCI to PCI bridges use header type 1 */
583 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
584 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
585 }
586 
587 /* extract header type specific config data */
588 
589 static void
590 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
591 {
592 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
593 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
594 	case PCIM_HDRTYPE_NORMAL:
595 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
596 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
597 		cfg->mingnt         = REG(PCIR_MINGNT, 1);
598 		cfg->maxlat         = REG(PCIR_MAXLAT, 1);
599 		cfg->nummaps	    = PCI_MAXMAPS_0;
600 		break;
601 	case PCIM_HDRTYPE_BRIDGE:
602 		cfg->bridge.br_seclat = REG(PCIR_SECLAT_1, 1);
603 		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_1, 1);
604 		cfg->bridge.br_secbus = REG(PCIR_SECBUS_1, 1);
605 		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_1, 1);
606 		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_1, 2);
607 		cfg->nummaps	    = PCI_MAXMAPS_1;
608 		break;
609 	case PCIM_HDRTYPE_CARDBUS:
610 		cfg->bridge.br_seclat = REG(PCIR_SECLAT_2, 1);
611 		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_2, 1);
612 		cfg->bridge.br_secbus = REG(PCIR_SECBUS_2, 1);
613 		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_2, 1);
614 		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_2, 2);
615 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
616 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
617 		cfg->nummaps	    = PCI_MAXMAPS_2;
618 		break;
619 	}
620 #undef REG
621 }
622 
623 /* read configuration header into pcicfgregs structure */
624 struct pci_devinfo *
625 pci_read_device(device_t pcib, device_t bus, int d, int b, int s, int f)
626 {
627 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
628 	uint16_t vid, did;
629 
630 	vid = REG(PCIR_VENDOR, 2);
631 	did = REG(PCIR_DEVICE, 2);
632 	if (vid != 0xffff)
633 		return (pci_fill_devinfo(pcib, bus, d, b, s, f, vid, did));
634 
635 	return (NULL);
636 }
637 
638 struct pci_devinfo *
639 pci_alloc_devinfo_method(device_t dev)
640 {
641 
642 	return (malloc(sizeof(struct pci_devinfo), M_DEVBUF,
643 	    M_WAITOK | M_ZERO));
644 }
645 
646 static struct pci_devinfo *
647 pci_fill_devinfo(device_t pcib, device_t bus, int d, int b, int s, int f,
648     uint16_t vid, uint16_t did)
649 {
650 	struct pci_devinfo *devlist_entry;
651 	pcicfgregs *cfg;
652 
653 	devlist_entry = PCI_ALLOC_DEVINFO(bus);
654 
655 	cfg = &devlist_entry->cfg;
656 
657 	cfg->domain		= d;
658 	cfg->bus		= b;
659 	cfg->slot		= s;
660 	cfg->func		= f;
661 	cfg->vendor		= vid;
662 	cfg->device		= did;
663 	cfg->cmdreg		= REG(PCIR_COMMAND, 2);
664 	cfg->statreg		= REG(PCIR_STATUS, 2);
665 	cfg->baseclass		= REG(PCIR_CLASS, 1);
666 	cfg->subclass		= REG(PCIR_SUBCLASS, 1);
667 	cfg->progif		= REG(PCIR_PROGIF, 1);
668 	cfg->revid		= REG(PCIR_REVID, 1);
669 	cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
670 	cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
671 	cfg->lattimer		= REG(PCIR_LATTIMER, 1);
672 	cfg->intpin		= REG(PCIR_INTPIN, 1);
673 	cfg->intline		= REG(PCIR_INTLINE, 1);
674 
675 	cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
676 	cfg->hdrtype		&= ~PCIM_MFDEV;
677 	STAILQ_INIT(&cfg->maps);
678 
679 	cfg->iov		= NULL;
680 
681 	pci_fixancient(cfg);
682 	pci_hdrtypedata(pcib, b, s, f, cfg);
683 
684 	if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
685 		pci_read_cap(pcib, cfg);
686 
687 	STAILQ_INSERT_TAIL(&pci_devq, devlist_entry, pci_links);
688 
689 	devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
690 	devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
691 	devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
692 	devlist_entry->conf.pc_sel.pc_func = cfg->func;
693 	devlist_entry->conf.pc_hdr = cfg->hdrtype;
694 
695 	devlist_entry->conf.pc_subvendor = cfg->subvendor;
696 	devlist_entry->conf.pc_subdevice = cfg->subdevice;
697 	devlist_entry->conf.pc_vendor = cfg->vendor;
698 	devlist_entry->conf.pc_device = cfg->device;
699 
700 	devlist_entry->conf.pc_class = cfg->baseclass;
701 	devlist_entry->conf.pc_subclass = cfg->subclass;
702 	devlist_entry->conf.pc_progif = cfg->progif;
703 	devlist_entry->conf.pc_revid = cfg->revid;
704 
705 	pci_numdevs++;
706 	pci_generation++;
707 
708 	return (devlist_entry);
709 }
710 #undef REG
711 
712 static void
713 pci_ea_fill_info(device_t pcib, pcicfgregs *cfg)
714 {
715 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, \
716     cfg->ea.ea_location + (n), w)
717 	int num_ent;
718 	int ptr;
719 	int a, b;
720 	uint32_t val;
721 	int ent_size;
722 	uint32_t dw[4];
723 	uint64_t base, max_offset;
724 	struct pci_ea_entry *eae;
725 
726 	if (cfg->ea.ea_location == 0)
727 		return;
728 
729 	STAILQ_INIT(&cfg->ea.ea_entries);
730 
731 	/* Determine the number of entries */
732 	num_ent = REG(PCIR_EA_NUM_ENT, 2);
733 	num_ent &= PCIM_EA_NUM_ENT_MASK;
734 
735 	/* Find the first entry to care of */
736 	ptr = PCIR_EA_FIRST_ENT;
737 
738 	/* Skip DWORD 2 for type 1 functions */
739 	if ((cfg->hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_BRIDGE)
740 		ptr += 4;
741 
742 	for (a = 0; a < num_ent; a++) {
743 
744 		eae = malloc(sizeof(*eae), M_DEVBUF, M_WAITOK | M_ZERO);
745 		eae->eae_cfg_offset = cfg->ea.ea_location + ptr;
746 
747 		/* Read a number of dwords in the entry */
748 		val = REG(ptr, 4);
749 		ptr += 4;
750 		ent_size = (val & PCIM_EA_ES);
751 
752 		for (b = 0; b < ent_size; b++) {
753 			dw[b] = REG(ptr, 4);
754 			ptr += 4;
755 		}
756 
757 		eae->eae_flags = val;
758 		eae->eae_bei = (PCIM_EA_BEI & val) >> PCIM_EA_BEI_OFFSET;
759 
760 		base = dw[0] & PCIM_EA_FIELD_MASK;
761 		max_offset = dw[1] | ~PCIM_EA_FIELD_MASK;
762 		b = 2;
763 		if (((dw[0] & PCIM_EA_IS_64) != 0) && (b < ent_size)) {
764 			base |= (uint64_t)dw[b] << 32UL;
765 			b++;
766 		}
767 		if (((dw[1] & PCIM_EA_IS_64) != 0)
768 		    && (b < ent_size)) {
769 			max_offset |= (uint64_t)dw[b] << 32UL;
770 			b++;
771 		}
772 
773 		eae->eae_base = base;
774 		eae->eae_max_offset = max_offset;
775 
776 		STAILQ_INSERT_TAIL(&cfg->ea.ea_entries, eae, eae_link);
777 
778 		if (bootverbose) {
779 			printf("PCI(EA) dev %04x:%04x, bei %d, flags #%x, base #%jx, max_offset #%jx\n",
780 			    cfg->vendor, cfg->device, eae->eae_bei, eae->eae_flags,
781 			    (uintmax_t)eae->eae_base, (uintmax_t)eae->eae_max_offset);
782 		}
783 	}
784 }
785 #undef REG
786 
787 static void
788 pci_read_cap(device_t pcib, pcicfgregs *cfg)
789 {
790 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
791 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
792 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
793 	uint64_t addr;
794 #endif
795 	uint32_t val;
796 	int	ptr, nextptr, ptrptr;
797 
798 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
799 	case PCIM_HDRTYPE_NORMAL:
800 	case PCIM_HDRTYPE_BRIDGE:
801 		ptrptr = PCIR_CAP_PTR;
802 		break;
803 	case PCIM_HDRTYPE_CARDBUS:
804 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
805 		break;
806 	default:
807 		return;		/* no extended capabilities support */
808 	}
809 	nextptr = REG(ptrptr, 1);	/* sanity check? */
810 
811 	/*
812 	 * Read capability entries.
813 	 */
814 	while (nextptr != 0) {
815 		/* Sanity check */
816 		if (nextptr > 255) {
817 			printf("illegal PCI extended capability offset %d\n",
818 			    nextptr);
819 			return;
820 		}
821 		/* Find the next entry */
822 		ptr = nextptr;
823 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
824 
825 		/* Process this entry */
826 		switch (REG(ptr + PCICAP_ID, 1)) {
827 		case PCIY_PMG:		/* PCI power management */
828 			if (cfg->pp.pp_cap == 0) {
829 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
830 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
831 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
832 				if ((nextptr - ptr) > PCIR_POWER_DATA)
833 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
834 			}
835 			break;
836 		case PCIY_HT:		/* HyperTransport */
837 			/* Determine HT-specific capability type. */
838 			val = REG(ptr + PCIR_HT_COMMAND, 2);
839 
840 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
841 				cfg->ht.ht_slave = ptr;
842 
843 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
844 			switch (val & PCIM_HTCMD_CAP_MASK) {
845 			case PCIM_HTCAP_MSI_MAPPING:
846 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
847 					/* Sanity check the mapping window. */
848 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
849 					    4);
850 					addr <<= 32;
851 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
852 					    4);
853 					if (addr != MSI_INTEL_ADDR_BASE)
854 						device_printf(pcib,
855 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
856 						    cfg->domain, cfg->bus,
857 						    cfg->slot, cfg->func,
858 						    (long long)addr);
859 				} else
860 					addr = MSI_INTEL_ADDR_BASE;
861 
862 				cfg->ht.ht_msimap = ptr;
863 				cfg->ht.ht_msictrl = val;
864 				cfg->ht.ht_msiaddr = addr;
865 				break;
866 			}
867 #endif
868 			break;
869 		case PCIY_MSI:		/* PCI MSI */
870 			cfg->msi.msi_location = ptr;
871 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
872 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
873 						     PCIM_MSICTRL_MMC_MASK)>>1);
874 			break;
875 		case PCIY_MSIX:		/* PCI MSI-X */
876 			cfg->msix.msix_location = ptr;
877 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
878 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
879 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
880 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
881 			cfg->msix.msix_table_bar = PCIR_BAR(val &
882 			    PCIM_MSIX_BIR_MASK);
883 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
884 			val = REG(ptr + PCIR_MSIX_PBA, 4);
885 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
886 			    PCIM_MSIX_BIR_MASK);
887 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
888 			break;
889 		case PCIY_VPD:		/* PCI Vital Product Data */
890 			cfg->vpd.vpd_reg = ptr;
891 			break;
892 		case PCIY_SUBVENDOR:
893 			/* Should always be true. */
894 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
895 			    PCIM_HDRTYPE_BRIDGE) {
896 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
897 				cfg->subvendor = val & 0xffff;
898 				cfg->subdevice = val >> 16;
899 			}
900 			break;
901 		case PCIY_PCIX:		/* PCI-X */
902 			/*
903 			 * Assume we have a PCI-X chipset if we have
904 			 * at least one PCI-PCI bridge with a PCI-X
905 			 * capability.  Note that some systems with
906 			 * PCI-express or HT chipsets might match on
907 			 * this check as well.
908 			 */
909 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
910 			    PCIM_HDRTYPE_BRIDGE)
911 				pcix_chipset = 1;
912 			cfg->pcix.pcix_location = ptr;
913 			break;
914 		case PCIY_EXPRESS:	/* PCI-express */
915 			/*
916 			 * Assume we have a PCI-express chipset if we have
917 			 * at least one PCI-express device.
918 			 */
919 			pcie_chipset = 1;
920 			cfg->pcie.pcie_location = ptr;
921 			val = REG(ptr + PCIER_FLAGS, 2);
922 			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
923 			break;
924 		case PCIY_EA:		/* Enhanced Allocation */
925 			cfg->ea.ea_location = ptr;
926 			pci_ea_fill_info(pcib, cfg);
927 			break;
928 		default:
929 			break;
930 		}
931 	}
932 
933 #if defined(__powerpc__)
934 	/*
935 	 * Enable the MSI mapping window for all HyperTransport
936 	 * slaves.  PCI-PCI bridges have their windows enabled via
937 	 * PCIB_MAP_MSI().
938 	 */
939 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
940 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
941 		device_printf(pcib,
942 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
943 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
944 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
945 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
946 		     2);
947 	}
948 #endif
949 /* REG and WREG use carry through to next functions */
950 }
951 
952 /*
953  * PCI Vital Product Data
954  */
955 
956 #define	PCI_VPD_TIMEOUT		1000000
957 
958 static int
959 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
960 {
961 	int count = PCI_VPD_TIMEOUT;
962 
963 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
964 
965 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
966 
967 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
968 		if (--count < 0)
969 			return (ENXIO);
970 		DELAY(1);	/* limit looping */
971 	}
972 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
973 
974 	return (0);
975 }
976 
977 #if 0
978 static int
979 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
980 {
981 	int count = PCI_VPD_TIMEOUT;
982 
983 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
984 
985 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
986 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
987 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
988 		if (--count < 0)
989 			return (ENXIO);
990 		DELAY(1);	/* limit looping */
991 	}
992 
993 	return (0);
994 }
995 #endif
996 
997 #undef PCI_VPD_TIMEOUT
998 
999 struct vpd_readstate {
1000 	device_t	pcib;
1001 	pcicfgregs	*cfg;
1002 	uint32_t	val;
1003 	int		bytesinval;
1004 	int		off;
1005 	uint8_t		cksum;
1006 };
1007 
1008 static int
1009 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
1010 {
1011 	uint32_t reg;
1012 	uint8_t byte;
1013 
1014 	if (vrs->bytesinval == 0) {
1015 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
1016 			return (ENXIO);
1017 		vrs->val = le32toh(reg);
1018 		vrs->off += 4;
1019 		byte = vrs->val & 0xff;
1020 		vrs->bytesinval = 3;
1021 	} else {
1022 		vrs->val = vrs->val >> 8;
1023 		byte = vrs->val & 0xff;
1024 		vrs->bytesinval--;
1025 	}
1026 
1027 	vrs->cksum += byte;
1028 	*data = byte;
1029 	return (0);
1030 }
1031 
1032 static void
1033 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
1034 {
1035 	struct vpd_readstate vrs;
1036 	int state;
1037 	int name;
1038 	int remain;
1039 	int i;
1040 	int alloc, off;		/* alloc/off for RO/W arrays */
1041 	int cksumvalid;
1042 	int dflen;
1043 	uint8_t byte;
1044 	uint8_t byte2;
1045 
1046 	/* init vpd reader */
1047 	vrs.bytesinval = 0;
1048 	vrs.off = 0;
1049 	vrs.pcib = pcib;
1050 	vrs.cfg = cfg;
1051 	vrs.cksum = 0;
1052 
1053 	state = 0;
1054 	name = remain = i = 0;	/* shut up stupid gcc */
1055 	alloc = off = 0;	/* shut up stupid gcc */
1056 	dflen = 0;		/* shut up stupid gcc */
1057 	cksumvalid = -1;
1058 	while (state >= 0) {
1059 		if (vpd_nextbyte(&vrs, &byte)) {
1060 			state = -2;
1061 			break;
1062 		}
1063 #if 0
1064 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
1065 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
1066 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
1067 #endif
1068 		switch (state) {
1069 		case 0:		/* item name */
1070 			if (byte & 0x80) {
1071 				if (vpd_nextbyte(&vrs, &byte2)) {
1072 					state = -2;
1073 					break;
1074 				}
1075 				remain = byte2;
1076 				if (vpd_nextbyte(&vrs, &byte2)) {
1077 					state = -2;
1078 					break;
1079 				}
1080 				remain |= byte2 << 8;
1081 				if (remain > (0x7f*4 - vrs.off)) {
1082 					state = -1;
1083 					pci_printf(cfg,
1084 					    "invalid VPD data, remain %#x\n",
1085 					    remain);
1086 				}
1087 				name = byte & 0x7f;
1088 			} else {
1089 				remain = byte & 0x7;
1090 				name = (byte >> 3) & 0xf;
1091 			}
1092 			switch (name) {
1093 			case 0x2:	/* String */
1094 				cfg->vpd.vpd_ident = malloc(remain + 1,
1095 				    M_DEVBUF, M_WAITOK);
1096 				i = 0;
1097 				state = 1;
1098 				break;
1099 			case 0xf:	/* End */
1100 				state = -1;
1101 				break;
1102 			case 0x10:	/* VPD-R */
1103 				alloc = 8;
1104 				off = 0;
1105 				cfg->vpd.vpd_ros = malloc(alloc *
1106 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1107 				    M_WAITOK | M_ZERO);
1108 				state = 2;
1109 				break;
1110 			case 0x11:	/* VPD-W */
1111 				alloc = 8;
1112 				off = 0;
1113 				cfg->vpd.vpd_w = malloc(alloc *
1114 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1115 				    M_WAITOK | M_ZERO);
1116 				state = 5;
1117 				break;
1118 			default:	/* Invalid data, abort */
1119 				state = -1;
1120 				break;
1121 			}
1122 			break;
1123 
1124 		case 1:	/* Identifier String */
1125 			cfg->vpd.vpd_ident[i++] = byte;
1126 			remain--;
1127 			if (remain == 0)  {
1128 				cfg->vpd.vpd_ident[i] = '\0';
1129 				state = 0;
1130 			}
1131 			break;
1132 
1133 		case 2:	/* VPD-R Keyword Header */
1134 			if (off == alloc) {
1135 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1136 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1137 				    M_DEVBUF, M_WAITOK | M_ZERO);
1138 			}
1139 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1140 			if (vpd_nextbyte(&vrs, &byte2)) {
1141 				state = -2;
1142 				break;
1143 			}
1144 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1145 			if (vpd_nextbyte(&vrs, &byte2)) {
1146 				state = -2;
1147 				break;
1148 			}
1149 			cfg->vpd.vpd_ros[off].len = dflen = byte2;
1150 			if (dflen == 0 &&
1151 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1152 			    2) == 0) {
1153 				/*
1154 				 * if this happens, we can't trust the rest
1155 				 * of the VPD.
1156 				 */
1157 				pci_printf(cfg, "bad keyword length: %d\n",
1158 				    dflen);
1159 				cksumvalid = 0;
1160 				state = -1;
1161 				break;
1162 			} else if (dflen == 0) {
1163 				cfg->vpd.vpd_ros[off].value = malloc(1 *
1164 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1165 				    M_DEVBUF, M_WAITOK);
1166 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1167 			} else
1168 				cfg->vpd.vpd_ros[off].value = malloc(
1169 				    (dflen + 1) *
1170 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1171 				    M_DEVBUF, M_WAITOK);
1172 			remain -= 3;
1173 			i = 0;
1174 			/* keep in sync w/ state 3's transistions */
1175 			if (dflen == 0 && remain == 0)
1176 				state = 0;
1177 			else if (dflen == 0)
1178 				state = 2;
1179 			else
1180 				state = 3;
1181 			break;
1182 
1183 		case 3:	/* VPD-R Keyword Value */
1184 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1185 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1186 			    "RV", 2) == 0 && cksumvalid == -1) {
1187 				if (vrs.cksum == 0)
1188 					cksumvalid = 1;
1189 				else {
1190 					if (bootverbose)
1191 						pci_printf(cfg,
1192 					    "bad VPD cksum, remain %hhu\n",
1193 						    vrs.cksum);
1194 					cksumvalid = 0;
1195 					state = -1;
1196 					break;
1197 				}
1198 			}
1199 			dflen--;
1200 			remain--;
1201 			/* keep in sync w/ state 2's transistions */
1202 			if (dflen == 0)
1203 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1204 			if (dflen == 0 && remain == 0) {
1205 				cfg->vpd.vpd_rocnt = off;
1206 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1207 				    off * sizeof(*cfg->vpd.vpd_ros),
1208 				    M_DEVBUF, M_WAITOK | M_ZERO);
1209 				state = 0;
1210 			} else if (dflen == 0)
1211 				state = 2;
1212 			break;
1213 
1214 		case 4:
1215 			remain--;
1216 			if (remain == 0)
1217 				state = 0;
1218 			break;
1219 
1220 		case 5:	/* VPD-W Keyword Header */
1221 			if (off == alloc) {
1222 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1223 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1224 				    M_DEVBUF, M_WAITOK | M_ZERO);
1225 			}
1226 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1227 			if (vpd_nextbyte(&vrs, &byte2)) {
1228 				state = -2;
1229 				break;
1230 			}
1231 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1232 			if (vpd_nextbyte(&vrs, &byte2)) {
1233 				state = -2;
1234 				break;
1235 			}
1236 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1237 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1238 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1239 			    sizeof(*cfg->vpd.vpd_w[off].value),
1240 			    M_DEVBUF, M_WAITOK);
1241 			remain -= 3;
1242 			i = 0;
1243 			/* keep in sync w/ state 6's transistions */
1244 			if (dflen == 0 && remain == 0)
1245 				state = 0;
1246 			else if (dflen == 0)
1247 				state = 5;
1248 			else
1249 				state = 6;
1250 			break;
1251 
1252 		case 6:	/* VPD-W Keyword Value */
1253 			cfg->vpd.vpd_w[off].value[i++] = byte;
1254 			dflen--;
1255 			remain--;
1256 			/* keep in sync w/ state 5's transistions */
1257 			if (dflen == 0)
1258 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1259 			if (dflen == 0 && remain == 0) {
1260 				cfg->vpd.vpd_wcnt = off;
1261 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1262 				    off * sizeof(*cfg->vpd.vpd_w),
1263 				    M_DEVBUF, M_WAITOK | M_ZERO);
1264 				state = 0;
1265 			} else if (dflen == 0)
1266 				state = 5;
1267 			break;
1268 
1269 		default:
1270 			pci_printf(cfg, "invalid state: %d\n", state);
1271 			state = -1;
1272 			break;
1273 		}
1274 	}
1275 
1276 	if (cksumvalid == 0 || state < -1) {
1277 		/* read-only data bad, clean up */
1278 		if (cfg->vpd.vpd_ros != NULL) {
1279 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1280 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1281 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1282 			cfg->vpd.vpd_ros = NULL;
1283 		}
1284 	}
1285 	if (state < -1) {
1286 		/* I/O error, clean up */
1287 		pci_printf(cfg, "failed to read VPD data.\n");
1288 		if (cfg->vpd.vpd_ident != NULL) {
1289 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1290 			cfg->vpd.vpd_ident = NULL;
1291 		}
1292 		if (cfg->vpd.vpd_w != NULL) {
1293 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1294 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1295 			free(cfg->vpd.vpd_w, M_DEVBUF);
1296 			cfg->vpd.vpd_w = NULL;
1297 		}
1298 	}
1299 	cfg->vpd.vpd_cached = 1;
1300 #undef REG
1301 #undef WREG
1302 }
1303 
1304 int
1305 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1306 {
1307 	struct pci_devinfo *dinfo = device_get_ivars(child);
1308 	pcicfgregs *cfg = &dinfo->cfg;
1309 
1310 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1311 		pci_read_vpd(device_get_parent(dev), cfg);
1312 
1313 	*identptr = cfg->vpd.vpd_ident;
1314 
1315 	if (*identptr == NULL)
1316 		return (ENXIO);
1317 
1318 	return (0);
1319 }
1320 
1321 int
1322 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1323 	const char **vptr)
1324 {
1325 	struct pci_devinfo *dinfo = device_get_ivars(child);
1326 	pcicfgregs *cfg = &dinfo->cfg;
1327 	int i;
1328 
1329 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1330 		pci_read_vpd(device_get_parent(dev), cfg);
1331 
1332 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1333 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1334 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1335 			*vptr = cfg->vpd.vpd_ros[i].value;
1336 			return (0);
1337 		}
1338 
1339 	*vptr = NULL;
1340 	return (ENXIO);
1341 }
1342 
1343 struct pcicfg_vpd *
1344 pci_fetch_vpd_list(device_t dev)
1345 {
1346 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1347 	pcicfgregs *cfg = &dinfo->cfg;
1348 
1349 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1350 		pci_read_vpd(device_get_parent(device_get_parent(dev)), cfg);
1351 	return (&cfg->vpd);
1352 }
1353 
1354 /*
1355  * Find the requested HyperTransport capability and return the offset
1356  * in configuration space via the pointer provided.  The function
1357  * returns 0 on success and an error code otherwise.
1358  */
1359 int
1360 pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1361 {
1362 	int ptr, error;
1363 	uint16_t val;
1364 
1365 	error = pci_find_cap(child, PCIY_HT, &ptr);
1366 	if (error)
1367 		return (error);
1368 
1369 	/*
1370 	 * Traverse the capabilities list checking each HT capability
1371 	 * to see if it matches the requested HT capability.
1372 	 */
1373 	while (ptr != 0) {
1374 		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1375 		if (capability == PCIM_HTCAP_SLAVE ||
1376 		    capability == PCIM_HTCAP_HOST)
1377 			val &= 0xe000;
1378 		else
1379 			val &= PCIM_HTCMD_CAP_MASK;
1380 		if (val == capability) {
1381 			if (capreg != NULL)
1382 				*capreg = ptr;
1383 			return (0);
1384 		}
1385 
1386 		/* Skip to the next HT capability. */
1387 		while (ptr != 0) {
1388 			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1389 			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1390 			    PCIY_HT)
1391 				break;
1392 		}
1393 	}
1394 	return (ENOENT);
1395 }
1396 
1397 /*
1398  * Find the requested capability and return the offset in
1399  * configuration space via the pointer provided.  The function returns
1400  * 0 on success and an error code otherwise.
1401  */
1402 int
1403 pci_find_cap_method(device_t dev, device_t child, int capability,
1404     int *capreg)
1405 {
1406 	struct pci_devinfo *dinfo = device_get_ivars(child);
1407 	pcicfgregs *cfg = &dinfo->cfg;
1408 	u_int32_t status;
1409 	u_int8_t ptr;
1410 
1411 	/*
1412 	 * Check the CAP_LIST bit of the PCI status register first.
1413 	 */
1414 	status = pci_read_config(child, PCIR_STATUS, 2);
1415 	if (!(status & PCIM_STATUS_CAPPRESENT))
1416 		return (ENXIO);
1417 
1418 	/*
1419 	 * Determine the start pointer of the capabilities list.
1420 	 */
1421 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1422 	case PCIM_HDRTYPE_NORMAL:
1423 	case PCIM_HDRTYPE_BRIDGE:
1424 		ptr = PCIR_CAP_PTR;
1425 		break;
1426 	case PCIM_HDRTYPE_CARDBUS:
1427 		ptr = PCIR_CAP_PTR_2;
1428 		break;
1429 	default:
1430 		/* XXX: panic? */
1431 		return (ENXIO);		/* no extended capabilities support */
1432 	}
1433 	ptr = pci_read_config(child, ptr, 1);
1434 
1435 	/*
1436 	 * Traverse the capabilities list.
1437 	 */
1438 	while (ptr != 0) {
1439 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1440 			if (capreg != NULL)
1441 				*capreg = ptr;
1442 			return (0);
1443 		}
1444 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1445 	}
1446 
1447 	return (ENOENT);
1448 }
1449 
1450 /*
1451  * Find the requested extended capability and return the offset in
1452  * configuration space via the pointer provided.  The function returns
1453  * 0 on success and an error code otherwise.
1454  */
1455 int
1456 pci_find_extcap_method(device_t dev, device_t child, int capability,
1457     int *capreg)
1458 {
1459 	struct pci_devinfo *dinfo = device_get_ivars(child);
1460 	pcicfgregs *cfg = &dinfo->cfg;
1461 	uint32_t ecap;
1462 	uint16_t ptr;
1463 
1464 	/* Only supported for PCI-express devices. */
1465 	if (cfg->pcie.pcie_location == 0)
1466 		return (ENXIO);
1467 
1468 	ptr = PCIR_EXTCAP;
1469 	ecap = pci_read_config(child, ptr, 4);
1470 	if (ecap == 0xffffffff || ecap == 0)
1471 		return (ENOENT);
1472 	for (;;) {
1473 		if (PCI_EXTCAP_ID(ecap) == capability) {
1474 			if (capreg != NULL)
1475 				*capreg = ptr;
1476 			return (0);
1477 		}
1478 		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1479 		if (ptr == 0)
1480 			break;
1481 		ecap = pci_read_config(child, ptr, 4);
1482 	}
1483 
1484 	return (ENOENT);
1485 }
1486 
1487 /*
1488  * Support for MSI-X message interrupts.
1489  */
1490 static void
1491 pci_write_msix_entry(device_t dev, u_int index, uint64_t address, uint32_t data)
1492 {
1493 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1494 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1495 	uint32_t offset;
1496 
1497 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1498 	offset = msix->msix_table_offset + index * 16;
1499 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1500 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1501 	bus_write_4(msix->msix_table_res, offset + 8, data);
1502 }
1503 
1504 void
1505 pci_enable_msix_method(device_t dev, device_t child, u_int index,
1506     uint64_t address, uint32_t data)
1507 {
1508 
1509 	if (pci_msix_rewrite_table) {
1510 		struct pci_devinfo *dinfo = device_get_ivars(child);
1511 		struct pcicfg_msix *msix = &dinfo->cfg.msix;
1512 
1513 		/*
1514 		 * Some VM hosts require MSIX to be disabled in the
1515 		 * control register before updating the MSIX table
1516 		 * entries are allowed. It is not enough to only
1517 		 * disable MSIX while updating a single entry. MSIX
1518 		 * must be disabled while updating all entries in the
1519 		 * table.
1520 		 */
1521 		pci_write_config(child,
1522 		    msix->msix_location + PCIR_MSIX_CTRL,
1523 		    msix->msix_ctrl & ~PCIM_MSIXCTRL_MSIX_ENABLE, 2);
1524 		pci_resume_msix(child);
1525 	} else
1526 		pci_write_msix_entry(child, index, address, data);
1527 
1528 	/* Enable MSI -> HT mapping. */
1529 	pci_ht_map_msi(child, address);
1530 }
1531 
1532 void
1533 pci_mask_msix(device_t dev, u_int index)
1534 {
1535 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1536 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1537 	uint32_t offset, val;
1538 
1539 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1540 	offset = msix->msix_table_offset + index * 16 + 12;
1541 	val = bus_read_4(msix->msix_table_res, offset);
1542 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1543 		val |= PCIM_MSIX_VCTRL_MASK;
1544 		bus_write_4(msix->msix_table_res, offset, val);
1545 	}
1546 }
1547 
1548 void
1549 pci_unmask_msix(device_t dev, u_int index)
1550 {
1551 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1552 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1553 	uint32_t offset, val;
1554 
1555 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1556 	offset = msix->msix_table_offset + index * 16 + 12;
1557 	val = bus_read_4(msix->msix_table_res, offset);
1558 	if (val & PCIM_MSIX_VCTRL_MASK) {
1559 		val &= ~PCIM_MSIX_VCTRL_MASK;
1560 		bus_write_4(msix->msix_table_res, offset, val);
1561 	}
1562 }
1563 
1564 int
1565 pci_pending_msix(device_t dev, u_int index)
1566 {
1567 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1568 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1569 	uint32_t offset, bit;
1570 
1571 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1572 	offset = msix->msix_pba_offset + (index / 32) * 4;
1573 	bit = 1 << index % 32;
1574 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1575 }
1576 
1577 /*
1578  * Restore MSI-X registers and table during resume.  If MSI-X is
1579  * enabled then walk the virtual table to restore the actual MSI-X
1580  * table.
1581  */
1582 static void
1583 pci_resume_msix(device_t dev)
1584 {
1585 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1586 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1587 	struct msix_table_entry *mte;
1588 	struct msix_vector *mv;
1589 	int i;
1590 
1591 	if (msix->msix_alloc > 0) {
1592 		/* First, mask all vectors. */
1593 		for (i = 0; i < msix->msix_msgnum; i++)
1594 			pci_mask_msix(dev, i);
1595 
1596 		/* Second, program any messages with at least one handler. */
1597 		for (i = 0; i < msix->msix_table_len; i++) {
1598 			mte = &msix->msix_table[i];
1599 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1600 				continue;
1601 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1602 			pci_write_msix_entry(dev, i, mv->mv_address,
1603 			    mv->mv_data);
1604 			pci_unmask_msix(dev, i);
1605 		}
1606 	}
1607 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1608 	    msix->msix_ctrl, 2);
1609 }
1610 
1611 /*
1612  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1613  * returned in *count.  After this function returns, each message will be
1614  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1615  */
1616 int
1617 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1618 {
1619 	struct pci_devinfo *dinfo = device_get_ivars(child);
1620 	pcicfgregs *cfg = &dinfo->cfg;
1621 	struct resource_list_entry *rle;
1622 	int actual, error, i, irq, max;
1623 
1624 	/* Don't let count == 0 get us into trouble. */
1625 	if (*count == 0)
1626 		return (EINVAL);
1627 
1628 	/* If rid 0 is allocated, then fail. */
1629 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1630 	if (rle != NULL && rle->res != NULL)
1631 		return (ENXIO);
1632 
1633 	/* Already have allocated messages? */
1634 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1635 		return (ENXIO);
1636 
1637 	/* If MSI-X is blacklisted for this system, fail. */
1638 	if (pci_msix_blacklisted())
1639 		return (ENXIO);
1640 
1641 	/* MSI-X capability present? */
1642 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1643 		return (ENODEV);
1644 
1645 	/* Make sure the appropriate BARs are mapped. */
1646 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1647 	    cfg->msix.msix_table_bar);
1648 	if (rle == NULL || rle->res == NULL ||
1649 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1650 		return (ENXIO);
1651 	cfg->msix.msix_table_res = rle->res;
1652 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1653 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1654 		    cfg->msix.msix_pba_bar);
1655 		if (rle == NULL || rle->res == NULL ||
1656 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1657 			return (ENXIO);
1658 	}
1659 	cfg->msix.msix_pba_res = rle->res;
1660 
1661 	if (bootverbose)
1662 		device_printf(child,
1663 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1664 		    *count, cfg->msix.msix_msgnum);
1665 	max = min(*count, cfg->msix.msix_msgnum);
1666 	for (i = 0; i < max; i++) {
1667 		/* Allocate a message. */
1668 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1669 		if (error) {
1670 			if (i == 0)
1671 				return (error);
1672 			break;
1673 		}
1674 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1675 		    irq, 1);
1676 	}
1677 	actual = i;
1678 
1679 	if (bootverbose) {
1680 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1681 		if (actual == 1)
1682 			device_printf(child, "using IRQ %ju for MSI-X\n",
1683 			    rle->start);
1684 		else {
1685 			int run;
1686 
1687 			/*
1688 			 * Be fancy and try to print contiguous runs of
1689 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1690 			 * 'run' is true if we are in a range.
1691 			 */
1692 			device_printf(child, "using IRQs %ju", rle->start);
1693 			irq = rle->start;
1694 			run = 0;
1695 			for (i = 1; i < actual; i++) {
1696 				rle = resource_list_find(&dinfo->resources,
1697 				    SYS_RES_IRQ, i + 1);
1698 
1699 				/* Still in a run? */
1700 				if (rle->start == irq + 1) {
1701 					run = 1;
1702 					irq++;
1703 					continue;
1704 				}
1705 
1706 				/* Finish previous range. */
1707 				if (run) {
1708 					printf("-%d", irq);
1709 					run = 0;
1710 				}
1711 
1712 				/* Start new range. */
1713 				printf(",%ju", rle->start);
1714 				irq = rle->start;
1715 			}
1716 
1717 			/* Unfinished range? */
1718 			if (run)
1719 				printf("-%d", irq);
1720 			printf(" for MSI-X\n");
1721 		}
1722 	}
1723 
1724 	/* Mask all vectors. */
1725 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1726 		pci_mask_msix(child, i);
1727 
1728 	/* Allocate and initialize vector data and virtual table. */
1729 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1730 	    M_DEVBUF, M_WAITOK | M_ZERO);
1731 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1732 	    M_DEVBUF, M_WAITOK | M_ZERO);
1733 	for (i = 0; i < actual; i++) {
1734 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1735 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1736 		cfg->msix.msix_table[i].mte_vector = i + 1;
1737 	}
1738 
1739 	/* Update control register to enable MSI-X. */
1740 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1741 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1742 	    cfg->msix.msix_ctrl, 2);
1743 
1744 	/* Update counts of alloc'd messages. */
1745 	cfg->msix.msix_alloc = actual;
1746 	cfg->msix.msix_table_len = actual;
1747 	*count = actual;
1748 	return (0);
1749 }
1750 
1751 /*
1752  * By default, pci_alloc_msix() will assign the allocated IRQ
1753  * resources consecutively to the first N messages in the MSI-X table.
1754  * However, device drivers may want to use different layouts if they
1755  * either receive fewer messages than they asked for, or they wish to
1756  * populate the MSI-X table sparsely.  This method allows the driver
1757  * to specify what layout it wants.  It must be called after a
1758  * successful pci_alloc_msix() but before any of the associated
1759  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1760  *
1761  * The 'vectors' array contains 'count' message vectors.  The array
1762  * maps directly to the MSI-X table in that index 0 in the array
1763  * specifies the vector for the first message in the MSI-X table, etc.
1764  * The vector value in each array index can either be 0 to indicate
1765  * that no vector should be assigned to a message slot, or it can be a
1766  * number from 1 to N (where N is the count returned from a
1767  * succcessful call to pci_alloc_msix()) to indicate which message
1768  * vector (IRQ) to be used for the corresponding message.
1769  *
1770  * On successful return, each message with a non-zero vector will have
1771  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1772  * 1.  Additionally, if any of the IRQs allocated via the previous
1773  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1774  * will be freed back to the system automatically.
1775  *
1776  * For example, suppose a driver has a MSI-X table with 6 messages and
1777  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1778  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1779  * C.  After the call to pci_alloc_msix(), the device will be setup to
1780  * have an MSI-X table of ABC--- (where - means no vector assigned).
1781  * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1782  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1783  * be freed back to the system.  This device will also have valid
1784  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1785  *
1786  * In any case, the SYS_RES_IRQ rid X will always map to the message
1787  * at MSI-X table index X - 1 and will only be valid if a vector is
1788  * assigned to that table entry.
1789  */
1790 int
1791 pci_remap_msix_method(device_t dev, device_t child, int count,
1792     const u_int *vectors)
1793 {
1794 	struct pci_devinfo *dinfo = device_get_ivars(child);
1795 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1796 	struct resource_list_entry *rle;
1797 	int i, irq, j, *used;
1798 
1799 	/*
1800 	 * Have to have at least one message in the table but the
1801 	 * table can't be bigger than the actual MSI-X table in the
1802 	 * device.
1803 	 */
1804 	if (count == 0 || count > msix->msix_msgnum)
1805 		return (EINVAL);
1806 
1807 	/* Sanity check the vectors. */
1808 	for (i = 0; i < count; i++)
1809 		if (vectors[i] > msix->msix_alloc)
1810 			return (EINVAL);
1811 
1812 	/*
1813 	 * Make sure there aren't any holes in the vectors to be used.
1814 	 * It's a big pain to support it, and it doesn't really make
1815 	 * sense anyway.  Also, at least one vector must be used.
1816 	 */
1817 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1818 	    M_ZERO);
1819 	for (i = 0; i < count; i++)
1820 		if (vectors[i] != 0)
1821 			used[vectors[i] - 1] = 1;
1822 	for (i = 0; i < msix->msix_alloc - 1; i++)
1823 		if (used[i] == 0 && used[i + 1] == 1) {
1824 			free(used, M_DEVBUF);
1825 			return (EINVAL);
1826 		}
1827 	if (used[0] != 1) {
1828 		free(used, M_DEVBUF);
1829 		return (EINVAL);
1830 	}
1831 
1832 	/* Make sure none of the resources are allocated. */
1833 	for (i = 0; i < msix->msix_table_len; i++) {
1834 		if (msix->msix_table[i].mte_vector == 0)
1835 			continue;
1836 		if (msix->msix_table[i].mte_handlers > 0) {
1837 			free(used, M_DEVBUF);
1838 			return (EBUSY);
1839 		}
1840 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1841 		KASSERT(rle != NULL, ("missing resource"));
1842 		if (rle->res != NULL) {
1843 			free(used, M_DEVBUF);
1844 			return (EBUSY);
1845 		}
1846 	}
1847 
1848 	/* Free the existing resource list entries. */
1849 	for (i = 0; i < msix->msix_table_len; i++) {
1850 		if (msix->msix_table[i].mte_vector == 0)
1851 			continue;
1852 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1853 	}
1854 
1855 	/*
1856 	 * Build the new virtual table keeping track of which vectors are
1857 	 * used.
1858 	 */
1859 	free(msix->msix_table, M_DEVBUF);
1860 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1861 	    M_DEVBUF, M_WAITOK | M_ZERO);
1862 	for (i = 0; i < count; i++)
1863 		msix->msix_table[i].mte_vector = vectors[i];
1864 	msix->msix_table_len = count;
1865 
1866 	/* Free any unused IRQs and resize the vectors array if necessary. */
1867 	j = msix->msix_alloc - 1;
1868 	if (used[j] == 0) {
1869 		struct msix_vector *vec;
1870 
1871 		while (used[j] == 0) {
1872 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1873 			    msix->msix_vectors[j].mv_irq);
1874 			j--;
1875 		}
1876 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1877 		    M_WAITOK);
1878 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1879 		    (j + 1));
1880 		free(msix->msix_vectors, M_DEVBUF);
1881 		msix->msix_vectors = vec;
1882 		msix->msix_alloc = j + 1;
1883 	}
1884 	free(used, M_DEVBUF);
1885 
1886 	/* Map the IRQs onto the rids. */
1887 	for (i = 0; i < count; i++) {
1888 		if (vectors[i] == 0)
1889 			continue;
1890 		irq = msix->msix_vectors[vectors[i] - 1].mv_irq;
1891 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1892 		    irq, 1);
1893 	}
1894 
1895 	if (bootverbose) {
1896 		device_printf(child, "Remapped MSI-X IRQs as: ");
1897 		for (i = 0; i < count; i++) {
1898 			if (i != 0)
1899 				printf(", ");
1900 			if (vectors[i] == 0)
1901 				printf("---");
1902 			else
1903 				printf("%d",
1904 				    msix->msix_vectors[vectors[i] - 1].mv_irq);
1905 		}
1906 		printf("\n");
1907 	}
1908 
1909 	return (0);
1910 }
1911 
1912 static int
1913 pci_release_msix(device_t dev, device_t child)
1914 {
1915 	struct pci_devinfo *dinfo = device_get_ivars(child);
1916 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1917 	struct resource_list_entry *rle;
1918 	int i;
1919 
1920 	/* Do we have any messages to release? */
1921 	if (msix->msix_alloc == 0)
1922 		return (ENODEV);
1923 
1924 	/* Make sure none of the resources are allocated. */
1925 	for (i = 0; i < msix->msix_table_len; i++) {
1926 		if (msix->msix_table[i].mte_vector == 0)
1927 			continue;
1928 		if (msix->msix_table[i].mte_handlers > 0)
1929 			return (EBUSY);
1930 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1931 		KASSERT(rle != NULL, ("missing resource"));
1932 		if (rle->res != NULL)
1933 			return (EBUSY);
1934 	}
1935 
1936 	/* Update control register to disable MSI-X. */
1937 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1938 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1939 	    msix->msix_ctrl, 2);
1940 
1941 	/* Free the resource list entries. */
1942 	for (i = 0; i < msix->msix_table_len; i++) {
1943 		if (msix->msix_table[i].mte_vector == 0)
1944 			continue;
1945 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1946 	}
1947 	free(msix->msix_table, M_DEVBUF);
1948 	msix->msix_table_len = 0;
1949 
1950 	/* Release the IRQs. */
1951 	for (i = 0; i < msix->msix_alloc; i++)
1952 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1953 		    msix->msix_vectors[i].mv_irq);
1954 	free(msix->msix_vectors, M_DEVBUF);
1955 	msix->msix_alloc = 0;
1956 	return (0);
1957 }
1958 
1959 /*
1960  * Return the max supported MSI-X messages this device supports.
1961  * Basically, assuming the MD code can alloc messages, this function
1962  * should return the maximum value that pci_alloc_msix() can return.
1963  * Thus, it is subject to the tunables, etc.
1964  */
1965 int
1966 pci_msix_count_method(device_t dev, device_t child)
1967 {
1968 	struct pci_devinfo *dinfo = device_get_ivars(child);
1969 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1970 
1971 	if (pci_do_msix && msix->msix_location != 0)
1972 		return (msix->msix_msgnum);
1973 	return (0);
1974 }
1975 
1976 int
1977 pci_msix_pba_bar_method(device_t dev, device_t child)
1978 {
1979 	struct pci_devinfo *dinfo = device_get_ivars(child);
1980 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1981 
1982 	if (pci_do_msix && msix->msix_location != 0)
1983 		return (msix->msix_pba_bar);
1984 	return (-1);
1985 }
1986 
1987 int
1988 pci_msix_table_bar_method(device_t dev, device_t child)
1989 {
1990 	struct pci_devinfo *dinfo = device_get_ivars(child);
1991 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1992 
1993 	if (pci_do_msix && msix->msix_location != 0)
1994 		return (msix->msix_table_bar);
1995 	return (-1);
1996 }
1997 
1998 /*
1999  * HyperTransport MSI mapping control
2000  */
2001 void
2002 pci_ht_map_msi(device_t dev, uint64_t addr)
2003 {
2004 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2005 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
2006 
2007 	if (!ht->ht_msimap)
2008 		return;
2009 
2010 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
2011 	    ht->ht_msiaddr >> 20 == addr >> 20) {
2012 		/* Enable MSI -> HT mapping. */
2013 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
2014 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
2015 		    ht->ht_msictrl, 2);
2016 	}
2017 
2018 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
2019 		/* Disable MSI -> HT mapping. */
2020 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
2021 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
2022 		    ht->ht_msictrl, 2);
2023 	}
2024 }
2025 
2026 int
2027 pci_get_max_payload(device_t dev)
2028 {
2029 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2030 	int cap;
2031 	uint16_t val;
2032 
2033 	cap = dinfo->cfg.pcie.pcie_location;
2034 	if (cap == 0)
2035 		return (0);
2036 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2037 	val &= PCIEM_CTL_MAX_PAYLOAD;
2038 	val >>= 5;
2039 	return (1 << (val + 7));
2040 }
2041 
2042 int
2043 pci_get_max_read_req(device_t dev)
2044 {
2045 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2046 	int cap;
2047 	uint16_t val;
2048 
2049 	cap = dinfo->cfg.pcie.pcie_location;
2050 	if (cap == 0)
2051 		return (0);
2052 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2053 	val &= PCIEM_CTL_MAX_READ_REQUEST;
2054 	val >>= 12;
2055 	return (1 << (val + 7));
2056 }
2057 
2058 int
2059 pci_set_max_read_req(device_t dev, int size)
2060 {
2061 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2062 	int cap;
2063 	uint16_t val;
2064 
2065 	cap = dinfo->cfg.pcie.pcie_location;
2066 	if (cap == 0)
2067 		return (0);
2068 	if (size < 128)
2069 		size = 128;
2070 	if (size > 4096)
2071 		size = 4096;
2072 	size = (1 << (fls(size) - 1));
2073 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2074 	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
2075 	val |= (fls(size) - 8) << 12;
2076 	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
2077 	return (size);
2078 }
2079 
2080 uint32_t
2081 pcie_read_config(device_t dev, int reg, int width)
2082 {
2083 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2084 	int cap;
2085 
2086 	cap = dinfo->cfg.pcie.pcie_location;
2087 	if (cap == 0) {
2088 		if (width == 2)
2089 			return (0xffff);
2090 		return (0xffffffff);
2091 	}
2092 
2093 	return (pci_read_config(dev, cap + reg, width));
2094 }
2095 
2096 void
2097 pcie_write_config(device_t dev, int reg, uint32_t value, int width)
2098 {
2099 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2100 	int cap;
2101 
2102 	cap = dinfo->cfg.pcie.pcie_location;
2103 	if (cap == 0)
2104 		return;
2105 	pci_write_config(dev, cap + reg, value, width);
2106 }
2107 
2108 /*
2109  * Adjusts a PCI-e capability register by clearing the bits in mask
2110  * and setting the bits in (value & mask).  Bits not set in mask are
2111  * not adjusted.
2112  *
2113  * Returns the old value on success or all ones on failure.
2114  */
2115 uint32_t
2116 pcie_adjust_config(device_t dev, int reg, uint32_t mask, uint32_t value,
2117     int width)
2118 {
2119 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2120 	uint32_t old, new;
2121 	int cap;
2122 
2123 	cap = dinfo->cfg.pcie.pcie_location;
2124 	if (cap == 0) {
2125 		if (width == 2)
2126 			return (0xffff);
2127 		return (0xffffffff);
2128 	}
2129 
2130 	old = pci_read_config(dev, cap + reg, width);
2131 	new = old & ~mask;
2132 	new |= (value & mask);
2133 	pci_write_config(dev, cap + reg, new, width);
2134 	return (old);
2135 }
2136 
2137 /*
2138  * Support for MSI message signalled interrupts.
2139  */
2140 void
2141 pci_enable_msi_method(device_t dev, device_t child, uint64_t address,
2142     uint16_t data)
2143 {
2144 	struct pci_devinfo *dinfo = device_get_ivars(child);
2145 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2146 
2147 	/* Write data and address values. */
2148 	pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR,
2149 	    address & 0xffffffff, 4);
2150 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2151 		pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR_HIGH,
2152 		    address >> 32, 4);
2153 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA_64BIT,
2154 		    data, 2);
2155 	} else
2156 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA, data,
2157 		    2);
2158 
2159 	/* Enable MSI in the control register. */
2160 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
2161 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2162 	    msi->msi_ctrl, 2);
2163 
2164 	/* Enable MSI -> HT mapping. */
2165 	pci_ht_map_msi(child, address);
2166 }
2167 
2168 void
2169 pci_disable_msi_method(device_t dev, device_t child)
2170 {
2171 	struct pci_devinfo *dinfo = device_get_ivars(child);
2172 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2173 
2174 	/* Disable MSI -> HT mapping. */
2175 	pci_ht_map_msi(child, 0);
2176 
2177 	/* Disable MSI in the control register. */
2178 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
2179 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2180 	    msi->msi_ctrl, 2);
2181 }
2182 
2183 /*
2184  * Restore MSI registers during resume.  If MSI is enabled then
2185  * restore the data and address registers in addition to the control
2186  * register.
2187  */
2188 static void
2189 pci_resume_msi(device_t dev)
2190 {
2191 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2192 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2193 	uint64_t address;
2194 	uint16_t data;
2195 
2196 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
2197 		address = msi->msi_addr;
2198 		data = msi->msi_data;
2199 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
2200 		    address & 0xffffffff, 4);
2201 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2202 			pci_write_config(dev, msi->msi_location +
2203 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
2204 			pci_write_config(dev, msi->msi_location +
2205 			    PCIR_MSI_DATA_64BIT, data, 2);
2206 		} else
2207 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
2208 			    data, 2);
2209 	}
2210 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
2211 	    2);
2212 }
2213 
2214 static int
2215 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
2216 {
2217 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2218 	pcicfgregs *cfg = &dinfo->cfg;
2219 	struct resource_list_entry *rle;
2220 	struct msix_table_entry *mte;
2221 	struct msix_vector *mv;
2222 	uint64_t addr;
2223 	uint32_t data;
2224 	int error, i, j;
2225 
2226 	/*
2227 	 * Handle MSI first.  We try to find this IRQ among our list
2228 	 * of MSI IRQs.  If we find it, we request updated address and
2229 	 * data registers and apply the results.
2230 	 */
2231 	if (cfg->msi.msi_alloc > 0) {
2232 
2233 		/* If we don't have any active handlers, nothing to do. */
2234 		if (cfg->msi.msi_handlers == 0)
2235 			return (0);
2236 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
2237 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
2238 			    i + 1);
2239 			if (rle->start == irq) {
2240 				error = PCIB_MAP_MSI(device_get_parent(bus),
2241 				    dev, irq, &addr, &data);
2242 				if (error)
2243 					return (error);
2244 				pci_disable_msi(dev);
2245 				dinfo->cfg.msi.msi_addr = addr;
2246 				dinfo->cfg.msi.msi_data = data;
2247 				pci_enable_msi(dev, addr, data);
2248 				return (0);
2249 			}
2250 		}
2251 		return (ENOENT);
2252 	}
2253 
2254 	/*
2255 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
2256 	 * we request the updated mapping info.  If that works, we go
2257 	 * through all the slots that use this IRQ and update them.
2258 	 */
2259 	if (cfg->msix.msix_alloc > 0) {
2260 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
2261 			mv = &cfg->msix.msix_vectors[i];
2262 			if (mv->mv_irq == irq) {
2263 				error = PCIB_MAP_MSI(device_get_parent(bus),
2264 				    dev, irq, &addr, &data);
2265 				if (error)
2266 					return (error);
2267 				mv->mv_address = addr;
2268 				mv->mv_data = data;
2269 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
2270 					mte = &cfg->msix.msix_table[j];
2271 					if (mte->mte_vector != i + 1)
2272 						continue;
2273 					if (mte->mte_handlers == 0)
2274 						continue;
2275 					pci_mask_msix(dev, j);
2276 					pci_enable_msix(dev, j, addr, data);
2277 					pci_unmask_msix(dev, j);
2278 				}
2279 			}
2280 		}
2281 		return (ENOENT);
2282 	}
2283 
2284 	return (ENOENT);
2285 }
2286 
2287 /*
2288  * Returns true if the specified device is blacklisted because MSI
2289  * doesn't work.
2290  */
2291 int
2292 pci_msi_device_blacklisted(device_t dev)
2293 {
2294 
2295 	if (!pci_honor_msi_blacklist)
2296 		return (0);
2297 
2298 	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2299 }
2300 
2301 /*
2302  * Determine if MSI is blacklisted globally on this system.  Currently,
2303  * we just check for blacklisted chipsets as represented by the
2304  * host-PCI bridge at device 0:0:0.  In the future, it may become
2305  * necessary to check other system attributes, such as the kenv values
2306  * that give the motherboard manufacturer and model number.
2307  */
2308 static int
2309 pci_msi_blacklisted(void)
2310 {
2311 	device_t dev;
2312 
2313 	if (!pci_honor_msi_blacklist)
2314 		return (0);
2315 
2316 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2317 	if (!(pcie_chipset || pcix_chipset)) {
2318 		if (vm_guest != VM_GUEST_NO) {
2319 			/*
2320 			 * Whitelist older chipsets in virtual
2321 			 * machines known to support MSI.
2322 			 */
2323 			dev = pci_find_bsf(0, 0, 0);
2324 			if (dev != NULL)
2325 				return (!pci_has_quirk(pci_get_devid(dev),
2326 					PCI_QUIRK_ENABLE_MSI_VM));
2327 		}
2328 		return (1);
2329 	}
2330 
2331 	dev = pci_find_bsf(0, 0, 0);
2332 	if (dev != NULL)
2333 		return (pci_msi_device_blacklisted(dev));
2334 	return (0);
2335 }
2336 
2337 /*
2338  * Returns true if the specified device is blacklisted because MSI-X
2339  * doesn't work.  Note that this assumes that if MSI doesn't work,
2340  * MSI-X doesn't either.
2341  */
2342 int
2343 pci_msix_device_blacklisted(device_t dev)
2344 {
2345 
2346 	if (!pci_honor_msi_blacklist)
2347 		return (0);
2348 
2349 	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2350 		return (1);
2351 
2352 	return (pci_msi_device_blacklisted(dev));
2353 }
2354 
2355 /*
2356  * Determine if MSI-X is blacklisted globally on this system.  If MSI
2357  * is blacklisted, assume that MSI-X is as well.  Check for additional
2358  * chipsets where MSI works but MSI-X does not.
2359  */
2360 static int
2361 pci_msix_blacklisted(void)
2362 {
2363 	device_t dev;
2364 
2365 	if (!pci_honor_msi_blacklist)
2366 		return (0);
2367 
2368 	dev = pci_find_bsf(0, 0, 0);
2369 	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2370 	    PCI_QUIRK_DISABLE_MSIX))
2371 		return (1);
2372 
2373 	return (pci_msi_blacklisted());
2374 }
2375 
2376 /*
2377  * Attempt to allocate *count MSI messages.  The actual number allocated is
2378  * returned in *count.  After this function returns, each message will be
2379  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2380  */
2381 int
2382 pci_alloc_msi_method(device_t dev, device_t child, int *count)
2383 {
2384 	struct pci_devinfo *dinfo = device_get_ivars(child);
2385 	pcicfgregs *cfg = &dinfo->cfg;
2386 	struct resource_list_entry *rle;
2387 	int actual, error, i, irqs[32];
2388 	uint16_t ctrl;
2389 
2390 	/* Don't let count == 0 get us into trouble. */
2391 	if (*count == 0)
2392 		return (EINVAL);
2393 
2394 	/* If rid 0 is allocated, then fail. */
2395 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2396 	if (rle != NULL && rle->res != NULL)
2397 		return (ENXIO);
2398 
2399 	/* Already have allocated messages? */
2400 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2401 		return (ENXIO);
2402 
2403 	/* If MSI is blacklisted for this system, fail. */
2404 	if (pci_msi_blacklisted())
2405 		return (ENXIO);
2406 
2407 	/* MSI capability present? */
2408 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2409 		return (ENODEV);
2410 
2411 	if (bootverbose)
2412 		device_printf(child,
2413 		    "attempting to allocate %d MSI vectors (%d supported)\n",
2414 		    *count, cfg->msi.msi_msgnum);
2415 
2416 	/* Don't ask for more than the device supports. */
2417 	actual = min(*count, cfg->msi.msi_msgnum);
2418 
2419 	/* Don't ask for more than 32 messages. */
2420 	actual = min(actual, 32);
2421 
2422 	/* MSI requires power of 2 number of messages. */
2423 	if (!powerof2(actual))
2424 		return (EINVAL);
2425 
2426 	for (;;) {
2427 		/* Try to allocate N messages. */
2428 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2429 		    actual, irqs);
2430 		if (error == 0)
2431 			break;
2432 		if (actual == 1)
2433 			return (error);
2434 
2435 		/* Try N / 2. */
2436 		actual >>= 1;
2437 	}
2438 
2439 	/*
2440 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2441 	 * resources in the irqs[] array, so add new resources
2442 	 * starting at rid 1.
2443 	 */
2444 	for (i = 0; i < actual; i++)
2445 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2446 		    irqs[i], irqs[i], 1);
2447 
2448 	if (bootverbose) {
2449 		if (actual == 1)
2450 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2451 		else {
2452 			int run;
2453 
2454 			/*
2455 			 * Be fancy and try to print contiguous runs
2456 			 * of IRQ values as ranges.  'run' is true if
2457 			 * we are in a range.
2458 			 */
2459 			device_printf(child, "using IRQs %d", irqs[0]);
2460 			run = 0;
2461 			for (i = 1; i < actual; i++) {
2462 
2463 				/* Still in a run? */
2464 				if (irqs[i] == irqs[i - 1] + 1) {
2465 					run = 1;
2466 					continue;
2467 				}
2468 
2469 				/* Finish previous range. */
2470 				if (run) {
2471 					printf("-%d", irqs[i - 1]);
2472 					run = 0;
2473 				}
2474 
2475 				/* Start new range. */
2476 				printf(",%d", irqs[i]);
2477 			}
2478 
2479 			/* Unfinished range? */
2480 			if (run)
2481 				printf("-%d", irqs[actual - 1]);
2482 			printf(" for MSI\n");
2483 		}
2484 	}
2485 
2486 	/* Update control register with actual count. */
2487 	ctrl = cfg->msi.msi_ctrl;
2488 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2489 	ctrl |= (ffs(actual) - 1) << 4;
2490 	cfg->msi.msi_ctrl = ctrl;
2491 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2492 
2493 	/* Update counts of alloc'd messages. */
2494 	cfg->msi.msi_alloc = actual;
2495 	cfg->msi.msi_handlers = 0;
2496 	*count = actual;
2497 	return (0);
2498 }
2499 
2500 /* Release the MSI messages associated with this device. */
2501 int
2502 pci_release_msi_method(device_t dev, device_t child)
2503 {
2504 	struct pci_devinfo *dinfo = device_get_ivars(child);
2505 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2506 	struct resource_list_entry *rle;
2507 	int error, i, irqs[32];
2508 
2509 	/* Try MSI-X first. */
2510 	error = pci_release_msix(dev, child);
2511 	if (error != ENODEV)
2512 		return (error);
2513 
2514 	/* Do we have any messages to release? */
2515 	if (msi->msi_alloc == 0)
2516 		return (ENODEV);
2517 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2518 
2519 	/* Make sure none of the resources are allocated. */
2520 	if (msi->msi_handlers > 0)
2521 		return (EBUSY);
2522 	for (i = 0; i < msi->msi_alloc; i++) {
2523 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2524 		KASSERT(rle != NULL, ("missing MSI resource"));
2525 		if (rle->res != NULL)
2526 			return (EBUSY);
2527 		irqs[i] = rle->start;
2528 	}
2529 
2530 	/* Update control register with 0 count. */
2531 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2532 	    ("%s: MSI still enabled", __func__));
2533 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2534 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2535 	    msi->msi_ctrl, 2);
2536 
2537 	/* Release the messages. */
2538 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2539 	for (i = 0; i < msi->msi_alloc; i++)
2540 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2541 
2542 	/* Update alloc count. */
2543 	msi->msi_alloc = 0;
2544 	msi->msi_addr = 0;
2545 	msi->msi_data = 0;
2546 	return (0);
2547 }
2548 
2549 /*
2550  * Return the max supported MSI messages this device supports.
2551  * Basically, assuming the MD code can alloc messages, this function
2552  * should return the maximum value that pci_alloc_msi() can return.
2553  * Thus, it is subject to the tunables, etc.
2554  */
2555 int
2556 pci_msi_count_method(device_t dev, device_t child)
2557 {
2558 	struct pci_devinfo *dinfo = device_get_ivars(child);
2559 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2560 
2561 	if (pci_do_msi && msi->msi_location != 0)
2562 		return (msi->msi_msgnum);
2563 	return (0);
2564 }
2565 
2566 /* free pcicfgregs structure and all depending data structures */
2567 
2568 int
2569 pci_freecfg(struct pci_devinfo *dinfo)
2570 {
2571 	struct devlist *devlist_head;
2572 	struct pci_map *pm, *next;
2573 	int i;
2574 
2575 	devlist_head = &pci_devq;
2576 
2577 	if (dinfo->cfg.vpd.vpd_reg) {
2578 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2579 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2580 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2581 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2582 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2583 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2584 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2585 	}
2586 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2587 		free(pm, M_DEVBUF);
2588 	}
2589 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2590 	free(dinfo, M_DEVBUF);
2591 
2592 	/* increment the generation count */
2593 	pci_generation++;
2594 
2595 	/* we're losing one device */
2596 	pci_numdevs--;
2597 	return (0);
2598 }
2599 
2600 /*
2601  * PCI power manangement
2602  */
2603 int
2604 pci_set_powerstate_method(device_t dev, device_t child, int state)
2605 {
2606 	struct pci_devinfo *dinfo = device_get_ivars(child);
2607 	pcicfgregs *cfg = &dinfo->cfg;
2608 	uint16_t status;
2609 	int oldstate, highest, delay;
2610 
2611 	if (cfg->pp.pp_cap == 0)
2612 		return (EOPNOTSUPP);
2613 
2614 	/*
2615 	 * Optimize a no state change request away.  While it would be OK to
2616 	 * write to the hardware in theory, some devices have shown odd
2617 	 * behavior when going from D3 -> D3.
2618 	 */
2619 	oldstate = pci_get_powerstate(child);
2620 	if (oldstate == state)
2621 		return (0);
2622 
2623 	/*
2624 	 * The PCI power management specification states that after a state
2625 	 * transition between PCI power states, system software must
2626 	 * guarantee a minimal delay before the function accesses the device.
2627 	 * Compute the worst case delay that we need to guarantee before we
2628 	 * access the device.  Many devices will be responsive much more
2629 	 * quickly than this delay, but there are some that don't respond
2630 	 * instantly to state changes.  Transitions to/from D3 state require
2631 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2632 	 * is done below with DELAY rather than a sleeper function because
2633 	 * this function can be called from contexts where we cannot sleep.
2634 	 */
2635 	highest = (oldstate > state) ? oldstate : state;
2636 	if (highest == PCI_POWERSTATE_D3)
2637 	    delay = 10000;
2638 	else if (highest == PCI_POWERSTATE_D2)
2639 	    delay = 200;
2640 	else
2641 	    delay = 0;
2642 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2643 	    & ~PCIM_PSTAT_DMASK;
2644 	switch (state) {
2645 	case PCI_POWERSTATE_D0:
2646 		status |= PCIM_PSTAT_D0;
2647 		break;
2648 	case PCI_POWERSTATE_D1:
2649 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2650 			return (EOPNOTSUPP);
2651 		status |= PCIM_PSTAT_D1;
2652 		break;
2653 	case PCI_POWERSTATE_D2:
2654 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2655 			return (EOPNOTSUPP);
2656 		status |= PCIM_PSTAT_D2;
2657 		break;
2658 	case PCI_POWERSTATE_D3:
2659 		status |= PCIM_PSTAT_D3;
2660 		break;
2661 	default:
2662 		return (EINVAL);
2663 	}
2664 
2665 	if (bootverbose)
2666 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2667 		    state);
2668 
2669 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2670 	if (delay)
2671 		DELAY(delay);
2672 	return (0);
2673 }
2674 
2675 int
2676 pci_get_powerstate_method(device_t dev, device_t child)
2677 {
2678 	struct pci_devinfo *dinfo = device_get_ivars(child);
2679 	pcicfgregs *cfg = &dinfo->cfg;
2680 	uint16_t status;
2681 	int result;
2682 
2683 	if (cfg->pp.pp_cap != 0) {
2684 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2685 		switch (status & PCIM_PSTAT_DMASK) {
2686 		case PCIM_PSTAT_D0:
2687 			result = PCI_POWERSTATE_D0;
2688 			break;
2689 		case PCIM_PSTAT_D1:
2690 			result = PCI_POWERSTATE_D1;
2691 			break;
2692 		case PCIM_PSTAT_D2:
2693 			result = PCI_POWERSTATE_D2;
2694 			break;
2695 		case PCIM_PSTAT_D3:
2696 			result = PCI_POWERSTATE_D3;
2697 			break;
2698 		default:
2699 			result = PCI_POWERSTATE_UNKNOWN;
2700 			break;
2701 		}
2702 	} else {
2703 		/* No support, device is always at D0 */
2704 		result = PCI_POWERSTATE_D0;
2705 	}
2706 	return (result);
2707 }
2708 
2709 /*
2710  * Some convenience functions for PCI device drivers.
2711  */
2712 
2713 static __inline void
2714 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2715 {
2716 	uint16_t	command;
2717 
2718 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2719 	command |= bit;
2720 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2721 }
2722 
2723 static __inline void
2724 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2725 {
2726 	uint16_t	command;
2727 
2728 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2729 	command &= ~bit;
2730 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2731 }
2732 
2733 int
2734 pci_enable_busmaster_method(device_t dev, device_t child)
2735 {
2736 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2737 	return (0);
2738 }
2739 
2740 int
2741 pci_disable_busmaster_method(device_t dev, device_t child)
2742 {
2743 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2744 	return (0);
2745 }
2746 
2747 int
2748 pci_enable_io_method(device_t dev, device_t child, int space)
2749 {
2750 	uint16_t bit;
2751 
2752 	switch(space) {
2753 	case SYS_RES_IOPORT:
2754 		bit = PCIM_CMD_PORTEN;
2755 		break;
2756 	case SYS_RES_MEMORY:
2757 		bit = PCIM_CMD_MEMEN;
2758 		break;
2759 	default:
2760 		return (EINVAL);
2761 	}
2762 	pci_set_command_bit(dev, child, bit);
2763 	return (0);
2764 }
2765 
2766 int
2767 pci_disable_io_method(device_t dev, device_t child, int space)
2768 {
2769 	uint16_t bit;
2770 
2771 	switch(space) {
2772 	case SYS_RES_IOPORT:
2773 		bit = PCIM_CMD_PORTEN;
2774 		break;
2775 	case SYS_RES_MEMORY:
2776 		bit = PCIM_CMD_MEMEN;
2777 		break;
2778 	default:
2779 		return (EINVAL);
2780 	}
2781 	pci_clear_command_bit(dev, child, bit);
2782 	return (0);
2783 }
2784 
2785 /*
2786  * New style pci driver.  Parent device is either a pci-host-bridge or a
2787  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2788  */
2789 
2790 void
2791 pci_print_verbose(struct pci_devinfo *dinfo)
2792 {
2793 
2794 	if (bootverbose) {
2795 		pcicfgregs *cfg = &dinfo->cfg;
2796 
2797 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2798 		    cfg->vendor, cfg->device, cfg->revid);
2799 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2800 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2801 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2802 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2803 		    cfg->mfdev);
2804 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2805 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2806 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2807 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2808 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2809 		if (cfg->intpin > 0)
2810 			printf("\tintpin=%c, irq=%d\n",
2811 			    cfg->intpin +'a' -1, cfg->intline);
2812 		if (cfg->pp.pp_cap) {
2813 			uint16_t status;
2814 
2815 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2816 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2817 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2818 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2819 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2820 			    status & PCIM_PSTAT_DMASK);
2821 		}
2822 		if (cfg->msi.msi_location) {
2823 			int ctrl;
2824 
2825 			ctrl = cfg->msi.msi_ctrl;
2826 			printf("\tMSI supports %d message%s%s%s\n",
2827 			    cfg->msi.msi_msgnum,
2828 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2829 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2830 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2831 		}
2832 		if (cfg->msix.msix_location) {
2833 			printf("\tMSI-X supports %d message%s ",
2834 			    cfg->msix.msix_msgnum,
2835 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2836 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2837 				printf("in map 0x%x\n",
2838 				    cfg->msix.msix_table_bar);
2839 			else
2840 				printf("in maps 0x%x and 0x%x\n",
2841 				    cfg->msix.msix_table_bar,
2842 				    cfg->msix.msix_pba_bar);
2843 		}
2844 	}
2845 }
2846 
2847 static int
2848 pci_porten(device_t dev)
2849 {
2850 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2851 }
2852 
2853 static int
2854 pci_memen(device_t dev)
2855 {
2856 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2857 }
2858 
2859 void
2860 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp,
2861     int *bar64)
2862 {
2863 	struct pci_devinfo *dinfo;
2864 	pci_addr_t map, testval;
2865 	int ln2range;
2866 	uint16_t cmd;
2867 
2868 	/*
2869 	 * The device ROM BAR is special.  It is always a 32-bit
2870 	 * memory BAR.  Bit 0 is special and should not be set when
2871 	 * sizing the BAR.
2872 	 */
2873 	dinfo = device_get_ivars(dev);
2874 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2875 		map = pci_read_config(dev, reg, 4);
2876 		pci_write_config(dev, reg, 0xfffffffe, 4);
2877 		testval = pci_read_config(dev, reg, 4);
2878 		pci_write_config(dev, reg, map, 4);
2879 		*mapp = map;
2880 		*testvalp = testval;
2881 		if (bar64 != NULL)
2882 			*bar64 = 0;
2883 		return;
2884 	}
2885 
2886 	map = pci_read_config(dev, reg, 4);
2887 	ln2range = pci_maprange(map);
2888 	if (ln2range == 64)
2889 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2890 
2891 	/*
2892 	 * Disable decoding via the command register before
2893 	 * determining the BAR's length since we will be placing it in
2894 	 * a weird state.
2895 	 */
2896 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2897 	pci_write_config(dev, PCIR_COMMAND,
2898 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2899 
2900 	/*
2901 	 * Determine the BAR's length by writing all 1's.  The bottom
2902 	 * log_2(size) bits of the BAR will stick as 0 when we read
2903 	 * the value back.
2904 	 */
2905 	pci_write_config(dev, reg, 0xffffffff, 4);
2906 	testval = pci_read_config(dev, reg, 4);
2907 	if (ln2range == 64) {
2908 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2909 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2910 	}
2911 
2912 	/*
2913 	 * Restore the original value of the BAR.  We may have reprogrammed
2914 	 * the BAR of the low-level console device and when booting verbose,
2915 	 * we need the console device addressable.
2916 	 */
2917 	pci_write_config(dev, reg, map, 4);
2918 	if (ln2range == 64)
2919 		pci_write_config(dev, reg + 4, map >> 32, 4);
2920 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2921 
2922 	*mapp = map;
2923 	*testvalp = testval;
2924 	if (bar64 != NULL)
2925 		*bar64 = (ln2range == 64);
2926 }
2927 
2928 static void
2929 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2930 {
2931 	struct pci_devinfo *dinfo;
2932 	int ln2range;
2933 
2934 	/* The device ROM BAR is always a 32-bit memory BAR. */
2935 	dinfo = device_get_ivars(dev);
2936 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2937 		ln2range = 32;
2938 	else
2939 		ln2range = pci_maprange(pm->pm_value);
2940 	pci_write_config(dev, pm->pm_reg, base, 4);
2941 	if (ln2range == 64)
2942 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2943 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2944 	if (ln2range == 64)
2945 		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2946 		    pm->pm_reg + 4, 4) << 32;
2947 }
2948 
2949 struct pci_map *
2950 pci_find_bar(device_t dev, int reg)
2951 {
2952 	struct pci_devinfo *dinfo;
2953 	struct pci_map *pm;
2954 
2955 	dinfo = device_get_ivars(dev);
2956 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2957 		if (pm->pm_reg == reg)
2958 			return (pm);
2959 	}
2960 	return (NULL);
2961 }
2962 
2963 int
2964 pci_bar_enabled(device_t dev, struct pci_map *pm)
2965 {
2966 	struct pci_devinfo *dinfo;
2967 	uint16_t cmd;
2968 
2969 	dinfo = device_get_ivars(dev);
2970 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2971 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2972 		return (0);
2973 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2974 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2975 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2976 	else
2977 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2978 }
2979 
2980 struct pci_map *
2981 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2982 {
2983 	struct pci_devinfo *dinfo;
2984 	struct pci_map *pm, *prev;
2985 
2986 	dinfo = device_get_ivars(dev);
2987 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2988 	pm->pm_reg = reg;
2989 	pm->pm_value = value;
2990 	pm->pm_size = size;
2991 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2992 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2993 		    reg));
2994 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2995 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2996 			break;
2997 	}
2998 	if (prev != NULL)
2999 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
3000 	else
3001 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
3002 	return (pm);
3003 }
3004 
3005 static void
3006 pci_restore_bars(device_t dev)
3007 {
3008 	struct pci_devinfo *dinfo;
3009 	struct pci_map *pm;
3010 	int ln2range;
3011 
3012 	dinfo = device_get_ivars(dev);
3013 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
3014 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
3015 			ln2range = 32;
3016 		else
3017 			ln2range = pci_maprange(pm->pm_value);
3018 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
3019 		if (ln2range == 64)
3020 			pci_write_config(dev, pm->pm_reg + 4,
3021 			    pm->pm_value >> 32, 4);
3022 	}
3023 }
3024 
3025 /*
3026  * Add a resource based on a pci map register. Return 1 if the map
3027  * register is a 32bit map register or 2 if it is a 64bit register.
3028  */
3029 static int
3030 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
3031     int force, int prefetch)
3032 {
3033 	struct pci_map *pm;
3034 	pci_addr_t base, map, testval;
3035 	pci_addr_t start, end, count;
3036 	int barlen, basezero, flags, maprange, mapsize, type;
3037 	uint16_t cmd;
3038 	struct resource *res;
3039 
3040 	/*
3041 	 * The BAR may already exist if the device is a CardBus card
3042 	 * whose CIS is stored in this BAR.
3043 	 */
3044 	pm = pci_find_bar(dev, reg);
3045 	if (pm != NULL) {
3046 		maprange = pci_maprange(pm->pm_value);
3047 		barlen = maprange == 64 ? 2 : 1;
3048 		return (barlen);
3049 	}
3050 
3051 	pci_read_bar(dev, reg, &map, &testval, NULL);
3052 	if (PCI_BAR_MEM(map)) {
3053 		type = SYS_RES_MEMORY;
3054 		if (map & PCIM_BAR_MEM_PREFETCH)
3055 			prefetch = 1;
3056 	} else
3057 		type = SYS_RES_IOPORT;
3058 	mapsize = pci_mapsize(testval);
3059 	base = pci_mapbase(map);
3060 #ifdef __PCI_BAR_ZERO_VALID
3061 	basezero = 0;
3062 #else
3063 	basezero = base == 0;
3064 #endif
3065 	maprange = pci_maprange(map);
3066 	barlen = maprange == 64 ? 2 : 1;
3067 
3068 	/*
3069 	 * For I/O registers, if bottom bit is set, and the next bit up
3070 	 * isn't clear, we know we have a BAR that doesn't conform to the
3071 	 * spec, so ignore it.  Also, sanity check the size of the data
3072 	 * areas to the type of memory involved.  Memory must be at least
3073 	 * 16 bytes in size, while I/O ranges must be at least 4.
3074 	 */
3075 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
3076 		return (barlen);
3077 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
3078 	    (type == SYS_RES_IOPORT && mapsize < 2))
3079 		return (barlen);
3080 
3081 	/* Save a record of this BAR. */
3082 	pm = pci_add_bar(dev, reg, map, mapsize);
3083 	if (bootverbose) {
3084 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
3085 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
3086 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
3087 			printf(", port disabled\n");
3088 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
3089 			printf(", memory disabled\n");
3090 		else
3091 			printf(", enabled\n");
3092 	}
3093 
3094 	/*
3095 	 * If base is 0, then we have problems if this architecture does
3096 	 * not allow that.  It is best to ignore such entries for the
3097 	 * moment.  These will be allocated later if the driver specifically
3098 	 * requests them.  However, some removable busses look better when
3099 	 * all resources are allocated, so allow '0' to be overriden.
3100 	 *
3101 	 * Similarly treat maps whose values is the same as the test value
3102 	 * read back.  These maps have had all f's written to them by the
3103 	 * BIOS in an attempt to disable the resources.
3104 	 */
3105 	if (!force && (basezero || map == testval))
3106 		return (barlen);
3107 	if ((u_long)base != base) {
3108 		device_printf(bus,
3109 		    "pci%d:%d:%d:%d bar %#x too many address bits",
3110 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
3111 		    pci_get_function(dev), reg);
3112 		return (barlen);
3113 	}
3114 
3115 	/*
3116 	 * This code theoretically does the right thing, but has
3117 	 * undesirable side effects in some cases where peripherals
3118 	 * respond oddly to having these bits enabled.  Let the user
3119 	 * be able to turn them off (since pci_enable_io_modes is 1 by
3120 	 * default).
3121 	 */
3122 	if (pci_enable_io_modes) {
3123 		/* Turn on resources that have been left off by a lazy BIOS */
3124 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
3125 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3126 			cmd |= PCIM_CMD_PORTEN;
3127 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3128 		}
3129 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
3130 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3131 			cmd |= PCIM_CMD_MEMEN;
3132 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3133 		}
3134 	} else {
3135 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
3136 			return (barlen);
3137 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
3138 			return (barlen);
3139 	}
3140 
3141 	count = (pci_addr_t)1 << mapsize;
3142 	flags = RF_ALIGNMENT_LOG2(mapsize);
3143 	if (prefetch)
3144 		flags |= RF_PREFETCHABLE;
3145 	if (basezero || base == pci_mapbase(testval) || pci_clear_bars) {
3146 		start = 0;	/* Let the parent decide. */
3147 		end = ~0;
3148 	} else {
3149 		start = base;
3150 		end = base + count - 1;
3151 	}
3152 	resource_list_add(rl, type, reg, start, end, count);
3153 
3154 	/*
3155 	 * Try to allocate the resource for this BAR from our parent
3156 	 * so that this resource range is already reserved.  The
3157 	 * driver for this device will later inherit this resource in
3158 	 * pci_alloc_resource().
3159 	 */
3160 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
3161 	    flags);
3162 	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0)) {
3163 		/*
3164 		 * If the allocation fails, try to allocate a resource for
3165 		 * this BAR using any available range.  The firmware felt
3166 		 * it was important enough to assign a resource, so don't
3167 		 * disable decoding if we can help it.
3168 		 */
3169 		resource_list_delete(rl, type, reg);
3170 		resource_list_add(rl, type, reg, 0, ~0, count);
3171 		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0,
3172 		    count, flags);
3173 	}
3174 	if (res == NULL) {
3175 		/*
3176 		 * If the allocation fails, delete the resource list entry
3177 		 * and disable decoding for this device.
3178 		 *
3179 		 * If the driver requests this resource in the future,
3180 		 * pci_reserve_map() will try to allocate a fresh
3181 		 * resource range.
3182 		 */
3183 		resource_list_delete(rl, type, reg);
3184 		pci_disable_io(dev, type);
3185 		if (bootverbose)
3186 			device_printf(bus,
3187 			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
3188 			    pci_get_domain(dev), pci_get_bus(dev),
3189 			    pci_get_slot(dev), pci_get_function(dev), reg);
3190 	} else {
3191 		start = rman_get_start(res);
3192 		pci_write_bar(dev, pm, start);
3193 	}
3194 	return (barlen);
3195 }
3196 
3197 /*
3198  * For ATA devices we need to decide early what addressing mode to use.
3199  * Legacy demands that the primary and secondary ATA ports sits on the
3200  * same addresses that old ISA hardware did. This dictates that we use
3201  * those addresses and ignore the BAR's if we cannot set PCI native
3202  * addressing mode.
3203  */
3204 static void
3205 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
3206     uint32_t prefetchmask)
3207 {
3208 	int rid, type, progif;
3209 #if 0
3210 	/* if this device supports PCI native addressing use it */
3211 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3212 	if ((progif & 0x8a) == 0x8a) {
3213 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
3214 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
3215 			printf("Trying ATA native PCI addressing mode\n");
3216 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
3217 		}
3218 	}
3219 #endif
3220 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3221 	type = SYS_RES_IOPORT;
3222 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
3223 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
3224 		    prefetchmask & (1 << 0));
3225 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
3226 		    prefetchmask & (1 << 1));
3227 	} else {
3228 		rid = PCIR_BAR(0);
3229 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
3230 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
3231 		    0x1f7, 8, 0);
3232 		rid = PCIR_BAR(1);
3233 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
3234 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
3235 		    0x3f6, 1, 0);
3236 	}
3237 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
3238 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
3239 		    prefetchmask & (1 << 2));
3240 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
3241 		    prefetchmask & (1 << 3));
3242 	} else {
3243 		rid = PCIR_BAR(2);
3244 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
3245 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
3246 		    0x177, 8, 0);
3247 		rid = PCIR_BAR(3);
3248 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
3249 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
3250 		    0x376, 1, 0);
3251 	}
3252 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
3253 	    prefetchmask & (1 << 4));
3254 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
3255 	    prefetchmask & (1 << 5));
3256 }
3257 
3258 static void
3259 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
3260 {
3261 	struct pci_devinfo *dinfo = device_get_ivars(dev);
3262 	pcicfgregs *cfg = &dinfo->cfg;
3263 	char tunable_name[64];
3264 	int irq;
3265 
3266 	/* Has to have an intpin to have an interrupt. */
3267 	if (cfg->intpin == 0)
3268 		return;
3269 
3270 	/* Let the user override the IRQ with a tunable. */
3271 	irq = PCI_INVALID_IRQ;
3272 	snprintf(tunable_name, sizeof(tunable_name),
3273 	    "hw.pci%d.%d.%d.INT%c.irq",
3274 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
3275 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
3276 		irq = PCI_INVALID_IRQ;
3277 
3278 	/*
3279 	 * If we didn't get an IRQ via the tunable, then we either use the
3280 	 * IRQ value in the intline register or we ask the bus to route an
3281 	 * interrupt for us.  If force_route is true, then we only use the
3282 	 * value in the intline register if the bus was unable to assign an
3283 	 * IRQ.
3284 	 */
3285 	if (!PCI_INTERRUPT_VALID(irq)) {
3286 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
3287 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
3288 		if (!PCI_INTERRUPT_VALID(irq))
3289 			irq = cfg->intline;
3290 	}
3291 
3292 	/* If after all that we don't have an IRQ, just bail. */
3293 	if (!PCI_INTERRUPT_VALID(irq))
3294 		return;
3295 
3296 	/* Update the config register if it changed. */
3297 	if (irq != cfg->intline) {
3298 		cfg->intline = irq;
3299 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
3300 	}
3301 
3302 	/* Add this IRQ as rid 0 interrupt resource. */
3303 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3304 }
3305 
3306 /* Perform early OHCI takeover from SMM. */
3307 static void
3308 ohci_early_takeover(device_t self)
3309 {
3310 	struct resource *res;
3311 	uint32_t ctl;
3312 	int rid;
3313 	int i;
3314 
3315 	rid = PCIR_BAR(0);
3316 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3317 	if (res == NULL)
3318 		return;
3319 
3320 	ctl = bus_read_4(res, OHCI_CONTROL);
3321 	if (ctl & OHCI_IR) {
3322 		if (bootverbose)
3323 			printf("ohci early: "
3324 			    "SMM active, request owner change\n");
3325 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3326 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3327 			DELAY(1000);
3328 			ctl = bus_read_4(res, OHCI_CONTROL);
3329 		}
3330 		if (ctl & OHCI_IR) {
3331 			if (bootverbose)
3332 				printf("ohci early: "
3333 				    "SMM does not respond, resetting\n");
3334 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3335 		}
3336 		/* Disable interrupts */
3337 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3338 	}
3339 
3340 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3341 }
3342 
3343 /* Perform early UHCI takeover from SMM. */
3344 static void
3345 uhci_early_takeover(device_t self)
3346 {
3347 	struct resource *res;
3348 	int rid;
3349 
3350 	/*
3351 	 * Set the PIRQD enable bit and switch off all the others. We don't
3352 	 * want legacy support to interfere with us XXX Does this also mean
3353 	 * that the BIOS won't touch the keyboard anymore if it is connected
3354 	 * to the ports of the root hub?
3355 	 */
3356 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3357 
3358 	/* Disable interrupts */
3359 	rid = PCI_UHCI_BASE_REG;
3360 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3361 	if (res != NULL) {
3362 		bus_write_2(res, UHCI_INTR, 0);
3363 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3364 	}
3365 }
3366 
3367 /* Perform early EHCI takeover from SMM. */
3368 static void
3369 ehci_early_takeover(device_t self)
3370 {
3371 	struct resource *res;
3372 	uint32_t cparams;
3373 	uint32_t eec;
3374 	uint8_t eecp;
3375 	uint8_t bios_sem;
3376 	uint8_t offs;
3377 	int rid;
3378 	int i;
3379 
3380 	rid = PCIR_BAR(0);
3381 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3382 	if (res == NULL)
3383 		return;
3384 
3385 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3386 
3387 	/* Synchronise with the BIOS if it owns the controller. */
3388 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3389 	    eecp = EHCI_EECP_NEXT(eec)) {
3390 		eec = pci_read_config(self, eecp, 4);
3391 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3392 			continue;
3393 		}
3394 		bios_sem = pci_read_config(self, eecp +
3395 		    EHCI_LEGSUP_BIOS_SEM, 1);
3396 		if (bios_sem == 0) {
3397 			continue;
3398 		}
3399 		if (bootverbose)
3400 			printf("ehci early: "
3401 			    "SMM active, request owner change\n");
3402 
3403 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3404 
3405 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3406 			DELAY(1000);
3407 			bios_sem = pci_read_config(self, eecp +
3408 			    EHCI_LEGSUP_BIOS_SEM, 1);
3409 		}
3410 
3411 		if (bios_sem != 0) {
3412 			if (bootverbose)
3413 				printf("ehci early: "
3414 				    "SMM does not respond\n");
3415 		}
3416 		/* Disable interrupts */
3417 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3418 		bus_write_4(res, offs + EHCI_USBINTR, 0);
3419 	}
3420 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3421 }
3422 
3423 /* Perform early XHCI takeover from SMM. */
3424 static void
3425 xhci_early_takeover(device_t self)
3426 {
3427 	struct resource *res;
3428 	uint32_t cparams;
3429 	uint32_t eec;
3430 	uint8_t eecp;
3431 	uint8_t bios_sem;
3432 	uint8_t offs;
3433 	int rid;
3434 	int i;
3435 
3436 	rid = PCIR_BAR(0);
3437 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3438 	if (res == NULL)
3439 		return;
3440 
3441 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3442 
3443 	eec = -1;
3444 
3445 	/* Synchronise with the BIOS if it owns the controller. */
3446 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3447 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3448 		eec = bus_read_4(res, eecp);
3449 
3450 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3451 			continue;
3452 
3453 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3454 		if (bios_sem == 0)
3455 			continue;
3456 
3457 		if (bootverbose)
3458 			printf("xhci early: "
3459 			    "SMM active, request owner change\n");
3460 
3461 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3462 
3463 		/* wait a maximum of 5 second */
3464 
3465 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3466 			DELAY(1000);
3467 			bios_sem = bus_read_1(res, eecp +
3468 			    XHCI_XECP_BIOS_SEM);
3469 		}
3470 
3471 		if (bios_sem != 0) {
3472 			if (bootverbose)
3473 				printf("xhci early: "
3474 				    "SMM does not respond\n");
3475 		}
3476 
3477 		/* Disable interrupts */
3478 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3479 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3480 		bus_read_4(res, offs + XHCI_USBSTS);
3481 	}
3482 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3483 }
3484 
3485 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3486 static void
3487 pci_reserve_secbus(device_t bus, device_t dev, pcicfgregs *cfg,
3488     struct resource_list *rl)
3489 {
3490 	struct resource *res;
3491 	char *cp;
3492 	rman_res_t start, end, count;
3493 	int rid, sec_bus, sec_reg, sub_bus, sub_reg, sup_bus;
3494 
3495 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3496 	case PCIM_HDRTYPE_BRIDGE:
3497 		sec_reg = PCIR_SECBUS_1;
3498 		sub_reg = PCIR_SUBBUS_1;
3499 		break;
3500 	case PCIM_HDRTYPE_CARDBUS:
3501 		sec_reg = PCIR_SECBUS_2;
3502 		sub_reg = PCIR_SUBBUS_2;
3503 		break;
3504 	default:
3505 		return;
3506 	}
3507 
3508 	/*
3509 	 * If the existing bus range is valid, attempt to reserve it
3510 	 * from our parent.  If this fails for any reason, clear the
3511 	 * secbus and subbus registers.
3512 	 *
3513 	 * XXX: Should we reset sub_bus to sec_bus if it is < sec_bus?
3514 	 * This would at least preserve the existing sec_bus if it is
3515 	 * valid.
3516 	 */
3517 	sec_bus = PCI_READ_CONFIG(bus, dev, sec_reg, 1);
3518 	sub_bus = PCI_READ_CONFIG(bus, dev, sub_reg, 1);
3519 
3520 	/* Quirk handling. */
3521 	switch (pci_get_devid(dev)) {
3522 	case 0x12258086:		/* Intel 82454KX/GX (Orion) */
3523 		sup_bus = pci_read_config(dev, 0x41, 1);
3524 		if (sup_bus != 0xff) {
3525 			sec_bus = sup_bus + 1;
3526 			sub_bus = sup_bus + 1;
3527 			PCI_WRITE_CONFIG(bus, dev, sec_reg, sec_bus, 1);
3528 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3529 		}
3530 		break;
3531 
3532 	case 0x00dd10de:
3533 		/* Compaq R3000 BIOS sets wrong subordinate bus number. */
3534 		if ((cp = kern_getenv("smbios.planar.maker")) == NULL)
3535 			break;
3536 		if (strncmp(cp, "Compal", 6) != 0) {
3537 			freeenv(cp);
3538 			break;
3539 		}
3540 		freeenv(cp);
3541 		if ((cp = kern_getenv("smbios.planar.product")) == NULL)
3542 			break;
3543 		if (strncmp(cp, "08A0", 4) != 0) {
3544 			freeenv(cp);
3545 			break;
3546 		}
3547 		freeenv(cp);
3548 		if (sub_bus < 0xa) {
3549 			sub_bus = 0xa;
3550 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3551 		}
3552 		break;
3553 	}
3554 
3555 	if (bootverbose)
3556 		printf("\tsecbus=%d, subbus=%d\n", sec_bus, sub_bus);
3557 	if (sec_bus > 0 && sub_bus >= sec_bus) {
3558 		start = sec_bus;
3559 		end = sub_bus;
3560 		count = end - start + 1;
3561 
3562 		resource_list_add(rl, PCI_RES_BUS, 0, 0, ~0, count);
3563 
3564 		/*
3565 		 * If requested, clear secondary bus registers in
3566 		 * bridge devices to force a complete renumbering
3567 		 * rather than reserving the existing range.  However,
3568 		 * preserve the existing size.
3569 		 */
3570 		if (pci_clear_buses)
3571 			goto clear;
3572 
3573 		rid = 0;
3574 		res = resource_list_reserve(rl, bus, dev, PCI_RES_BUS, &rid,
3575 		    start, end, count, 0);
3576 		if (res != NULL)
3577 			return;
3578 
3579 		if (bootverbose)
3580 			device_printf(bus,
3581 			    "pci%d:%d:%d:%d secbus failed to allocate\n",
3582 			    pci_get_domain(dev), pci_get_bus(dev),
3583 			    pci_get_slot(dev), pci_get_function(dev));
3584 	}
3585 
3586 clear:
3587 	PCI_WRITE_CONFIG(bus, dev, sec_reg, 0, 1);
3588 	PCI_WRITE_CONFIG(bus, dev, sub_reg, 0, 1);
3589 }
3590 
3591 static struct resource *
3592 pci_alloc_secbus(device_t dev, device_t child, int *rid, rman_res_t start,
3593     rman_res_t end, rman_res_t count, u_int flags)
3594 {
3595 	struct pci_devinfo *dinfo;
3596 	pcicfgregs *cfg;
3597 	struct resource_list *rl;
3598 	struct resource *res;
3599 	int sec_reg, sub_reg;
3600 
3601 	dinfo = device_get_ivars(child);
3602 	cfg = &dinfo->cfg;
3603 	rl = &dinfo->resources;
3604 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3605 	case PCIM_HDRTYPE_BRIDGE:
3606 		sec_reg = PCIR_SECBUS_1;
3607 		sub_reg = PCIR_SUBBUS_1;
3608 		break;
3609 	case PCIM_HDRTYPE_CARDBUS:
3610 		sec_reg = PCIR_SECBUS_2;
3611 		sub_reg = PCIR_SUBBUS_2;
3612 		break;
3613 	default:
3614 		return (NULL);
3615 	}
3616 
3617 	if (*rid != 0)
3618 		return (NULL);
3619 
3620 	if (resource_list_find(rl, PCI_RES_BUS, *rid) == NULL)
3621 		resource_list_add(rl, PCI_RES_BUS, *rid, start, end, count);
3622 	if (!resource_list_reserved(rl, PCI_RES_BUS, *rid)) {
3623 		res = resource_list_reserve(rl, dev, child, PCI_RES_BUS, rid,
3624 		    start, end, count, flags & ~RF_ACTIVE);
3625 		if (res == NULL) {
3626 			resource_list_delete(rl, PCI_RES_BUS, *rid);
3627 			device_printf(child, "allocating %ju bus%s failed\n",
3628 			    count, count == 1 ? "" : "es");
3629 			return (NULL);
3630 		}
3631 		if (bootverbose)
3632 			device_printf(child,
3633 			    "Lazy allocation of %ju bus%s at %ju\n", count,
3634 			    count == 1 ? "" : "es", rman_get_start(res));
3635 		PCI_WRITE_CONFIG(dev, child, sec_reg, rman_get_start(res), 1);
3636 		PCI_WRITE_CONFIG(dev, child, sub_reg, rman_get_end(res), 1);
3637 	}
3638 	return (resource_list_alloc(rl, dev, child, PCI_RES_BUS, rid, start,
3639 	    end, count, flags));
3640 }
3641 #endif
3642 
3643 static int
3644 pci_ea_bei_to_rid(device_t dev, int bei)
3645 {
3646 #ifdef PCI_IOV
3647 	struct pci_devinfo *dinfo;
3648 	int iov_pos;
3649 	struct pcicfg_iov *iov;
3650 
3651 	dinfo = device_get_ivars(dev);
3652 	iov = dinfo->cfg.iov;
3653 	if (iov != NULL)
3654 		iov_pos = iov->iov_pos;
3655 	else
3656 		iov_pos = 0;
3657 #endif
3658 
3659 	/* Check if matches BAR */
3660 	if ((bei >= PCIM_EA_BEI_BAR_0) &&
3661 	    (bei <= PCIM_EA_BEI_BAR_5))
3662 		return (PCIR_BAR(bei));
3663 
3664 	/* Check ROM */
3665 	if (bei == PCIM_EA_BEI_ROM)
3666 		return (PCIR_BIOS);
3667 
3668 #ifdef PCI_IOV
3669 	/* Check if matches VF_BAR */
3670 	if ((iov != NULL) && (bei >= PCIM_EA_BEI_VF_BAR_0) &&
3671 	    (bei <= PCIM_EA_BEI_VF_BAR_5))
3672 		return (PCIR_SRIOV_BAR(bei - PCIM_EA_BEI_VF_BAR_0) +
3673 		    iov_pos);
3674 #endif
3675 
3676 	return (-1);
3677 }
3678 
3679 int
3680 pci_ea_is_enabled(device_t dev, int rid)
3681 {
3682 	struct pci_ea_entry *ea;
3683 	struct pci_devinfo *dinfo;
3684 
3685 	dinfo = device_get_ivars(dev);
3686 
3687 	STAILQ_FOREACH(ea, &dinfo->cfg.ea.ea_entries, eae_link) {
3688 		if (pci_ea_bei_to_rid(dev, ea->eae_bei) == rid)
3689 			return ((ea->eae_flags & PCIM_EA_ENABLE) > 0);
3690 	}
3691 
3692 	return (0);
3693 }
3694 
3695 void
3696 pci_add_resources_ea(device_t bus, device_t dev, int alloc_iov)
3697 {
3698 	struct pci_ea_entry *ea;
3699 	struct pci_devinfo *dinfo;
3700 	pci_addr_t start, end, count;
3701 	struct resource_list *rl;
3702 	int type, flags, rid;
3703 	struct resource *res;
3704 	uint32_t tmp;
3705 #ifdef PCI_IOV
3706 	struct pcicfg_iov *iov;
3707 #endif
3708 
3709 	dinfo = device_get_ivars(dev);
3710 	rl = &dinfo->resources;
3711 	flags = 0;
3712 
3713 #ifdef PCI_IOV
3714 	iov = dinfo->cfg.iov;
3715 #endif
3716 
3717 	if (dinfo->cfg.ea.ea_location == 0)
3718 		return;
3719 
3720 	STAILQ_FOREACH(ea, &dinfo->cfg.ea.ea_entries, eae_link) {
3721 
3722 		/*
3723 		 * TODO: Ignore EA-BAR if is not enabled.
3724 		 *   Currently the EA implementation supports
3725 		 *   only situation, where EA structure contains
3726 		 *   predefined entries. In case they are not enabled
3727 		 *   leave them unallocated and proceed with
3728 		 *   a legacy-BAR mechanism.
3729 		 */
3730 		if ((ea->eae_flags & PCIM_EA_ENABLE) == 0)
3731 			continue;
3732 
3733 		switch ((ea->eae_flags & PCIM_EA_PP) >> PCIM_EA_PP_OFFSET) {
3734 		case PCIM_EA_P_MEM_PREFETCH:
3735 		case PCIM_EA_P_VF_MEM_PREFETCH:
3736 			flags = RF_PREFETCHABLE;
3737 			/* FALLTHROUGH */
3738 		case PCIM_EA_P_VF_MEM:
3739 		case PCIM_EA_P_MEM:
3740 			type = SYS_RES_MEMORY;
3741 			break;
3742 		case PCIM_EA_P_IO:
3743 			type = SYS_RES_IOPORT;
3744 			break;
3745 		default:
3746 			continue;
3747 		}
3748 
3749 		if (alloc_iov != 0) {
3750 #ifdef PCI_IOV
3751 			/* Allocating IOV, confirm BEI matches */
3752 			if ((ea->eae_bei < PCIM_EA_BEI_VF_BAR_0) ||
3753 			    (ea->eae_bei > PCIM_EA_BEI_VF_BAR_5))
3754 				continue;
3755 #else
3756 			continue;
3757 #endif
3758 		} else {
3759 			/* Allocating BAR, confirm BEI matches */
3760 			if (((ea->eae_bei < PCIM_EA_BEI_BAR_0) ||
3761 			    (ea->eae_bei > PCIM_EA_BEI_BAR_5)) &&
3762 			    (ea->eae_bei != PCIM_EA_BEI_ROM))
3763 				continue;
3764 		}
3765 
3766 		rid = pci_ea_bei_to_rid(dev, ea->eae_bei);
3767 		if (rid < 0)
3768 			continue;
3769 
3770 		/* Skip resources already allocated by EA */
3771 		if ((resource_list_find(rl, SYS_RES_MEMORY, rid) != NULL) ||
3772 		    (resource_list_find(rl, SYS_RES_IOPORT, rid) != NULL))
3773 			continue;
3774 
3775 		start = ea->eae_base;
3776 		count = ea->eae_max_offset + 1;
3777 #ifdef PCI_IOV
3778 		if (iov != NULL)
3779 			count = count * iov->iov_num_vfs;
3780 #endif
3781 		end = start + count - 1;
3782 		if (count == 0)
3783 			continue;
3784 
3785 		resource_list_add(rl, type, rid, start, end, count);
3786 		res = resource_list_reserve(rl, bus, dev, type, &rid, start, end, count,
3787 		    flags);
3788 		if (res == NULL) {
3789 			resource_list_delete(rl, type, rid);
3790 
3791 			/*
3792 			 * Failed to allocate using EA, disable entry.
3793 			 * Another attempt to allocation will be performed
3794 			 * further, but this time using legacy BAR registers
3795 			 */
3796 			tmp = pci_read_config(dev, ea->eae_cfg_offset, 4);
3797 			tmp &= ~PCIM_EA_ENABLE;
3798 			pci_write_config(dev, ea->eae_cfg_offset, tmp, 4);
3799 
3800 			/*
3801 			 * Disabling entry might fail in case it is hardwired.
3802 			 * Read flags again to match current status.
3803 			 */
3804 			ea->eae_flags = pci_read_config(dev, ea->eae_cfg_offset, 4);
3805 
3806 			continue;
3807 		}
3808 
3809 		/* As per specification, fill BAR with zeros */
3810 		pci_write_config(dev, rid, 0, 4);
3811 	}
3812 }
3813 
3814 void
3815 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3816 {
3817 	struct pci_devinfo *dinfo;
3818 	pcicfgregs *cfg;
3819 	struct resource_list *rl;
3820 	const struct pci_quirk *q;
3821 	uint32_t devid;
3822 	int i;
3823 
3824 	dinfo = device_get_ivars(dev);
3825 	cfg = &dinfo->cfg;
3826 	rl = &dinfo->resources;
3827 	devid = (cfg->device << 16) | cfg->vendor;
3828 
3829 	/* Allocate resources using Enhanced Allocation */
3830 	pci_add_resources_ea(bus, dev, 0);
3831 
3832 	/* ATA devices needs special map treatment */
3833 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3834 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3835 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3836 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3837 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3838 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3839 	else
3840 		for (i = 0; i < cfg->nummaps;) {
3841 			/* Skip resources already managed by EA */
3842 			if ((resource_list_find(rl, SYS_RES_MEMORY, PCIR_BAR(i)) != NULL) ||
3843 			    (resource_list_find(rl, SYS_RES_IOPORT, PCIR_BAR(i)) != NULL) ||
3844 			    pci_ea_is_enabled(dev, PCIR_BAR(i))) {
3845 				i++;
3846 				continue;
3847 			}
3848 
3849 			/*
3850 			 * Skip quirked resources.
3851 			 */
3852 			for (q = &pci_quirks[0]; q->devid != 0; q++)
3853 				if (q->devid == devid &&
3854 				    q->type == PCI_QUIRK_UNMAP_REG &&
3855 				    q->arg1 == PCIR_BAR(i))
3856 					break;
3857 			if (q->devid != 0) {
3858 				i++;
3859 				continue;
3860 			}
3861 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3862 			    prefetchmask & (1 << i));
3863 		}
3864 
3865 	/*
3866 	 * Add additional, quirked resources.
3867 	 */
3868 	for (q = &pci_quirks[0]; q->devid != 0; q++)
3869 		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3870 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3871 
3872 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3873 #ifdef __PCI_REROUTE_INTERRUPT
3874 		/*
3875 		 * Try to re-route interrupts. Sometimes the BIOS or
3876 		 * firmware may leave bogus values in these registers.
3877 		 * If the re-route fails, then just stick with what we
3878 		 * have.
3879 		 */
3880 		pci_assign_interrupt(bus, dev, 1);
3881 #else
3882 		pci_assign_interrupt(bus, dev, 0);
3883 #endif
3884 	}
3885 
3886 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3887 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3888 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3889 			xhci_early_takeover(dev);
3890 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3891 			ehci_early_takeover(dev);
3892 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3893 			ohci_early_takeover(dev);
3894 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3895 			uhci_early_takeover(dev);
3896 	}
3897 
3898 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3899 	/*
3900 	 * Reserve resources for secondary bus ranges behind bridge
3901 	 * devices.
3902 	 */
3903 	pci_reserve_secbus(bus, dev, cfg, rl);
3904 #endif
3905 }
3906 
3907 static struct pci_devinfo *
3908 pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
3909     int slot, int func)
3910 {
3911 	struct pci_devinfo *dinfo;
3912 
3913 	dinfo = pci_read_device(pcib, dev, domain, busno, slot, func);
3914 	if (dinfo != NULL)
3915 		pci_add_child(dev, dinfo);
3916 
3917 	return (dinfo);
3918 }
3919 
3920 void
3921 pci_add_children(device_t dev, int domain, int busno)
3922 {
3923 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3924 	device_t pcib = device_get_parent(dev);
3925 	struct pci_devinfo *dinfo;
3926 	int maxslots;
3927 	int s, f, pcifunchigh;
3928 	uint8_t hdrtype;
3929 	int first_func;
3930 
3931 	/*
3932 	 * Try to detect a device at slot 0, function 0.  If it exists, try to
3933 	 * enable ARI.  We must enable ARI before detecting the rest of the
3934 	 * functions on this bus as ARI changes the set of slots and functions
3935 	 * that are legal on this bus.
3936 	 */
3937 	dinfo = pci_identify_function(pcib, dev, domain, busno, 0, 0);
3938 	if (dinfo != NULL && pci_enable_ari)
3939 		PCIB_TRY_ENABLE_ARI(pcib, dinfo->cfg.dev);
3940 
3941 	/*
3942 	 * Start looking for new devices on slot 0 at function 1 because we
3943 	 * just identified the device at slot 0, function 0.
3944 	 */
3945 	first_func = 1;
3946 
3947 	maxslots = PCIB_MAXSLOTS(pcib);
3948 	for (s = 0; s <= maxslots; s++, first_func = 0) {
3949 		pcifunchigh = 0;
3950 		f = 0;
3951 		DELAY(1);
3952 		hdrtype = REG(PCIR_HDRTYPE, 1);
3953 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3954 			continue;
3955 		if (hdrtype & PCIM_MFDEV)
3956 			pcifunchigh = PCIB_MAXFUNCS(pcib);
3957 		for (f = first_func; f <= pcifunchigh; f++)
3958 			pci_identify_function(pcib, dev, domain, busno, s, f);
3959 	}
3960 #undef REG
3961 }
3962 
3963 int
3964 pci_rescan_method(device_t dev)
3965 {
3966 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3967 	device_t pcib = device_get_parent(dev);
3968 	struct pci_softc *sc;
3969 	device_t child, *devlist, *unchanged;
3970 	int devcount, error, i, j, maxslots, oldcount;
3971 	int busno, domain, s, f, pcifunchigh;
3972 	uint8_t hdrtype;
3973 
3974 	/* No need to check for ARI on a rescan. */
3975 	error = device_get_children(dev, &devlist, &devcount);
3976 	if (error)
3977 		return (error);
3978 	if (devcount != 0) {
3979 		unchanged = malloc(devcount * sizeof(device_t), M_TEMP,
3980 		    M_NOWAIT | M_ZERO);
3981 		if (unchanged == NULL) {
3982 			free(devlist, M_TEMP);
3983 			return (ENOMEM);
3984 		}
3985 	} else
3986 		unchanged = NULL;
3987 
3988 	sc = device_get_softc(dev);
3989 	domain = pcib_get_domain(dev);
3990 	busno = pcib_get_bus(dev);
3991 	maxslots = PCIB_MAXSLOTS(pcib);
3992 	for (s = 0; s <= maxslots; s++) {
3993 		/* If function 0 is not present, skip to the next slot. */
3994 		f = 0;
3995 		if (REG(PCIR_VENDOR, 2) == 0xffff)
3996 			continue;
3997 		pcifunchigh = 0;
3998 		hdrtype = REG(PCIR_HDRTYPE, 1);
3999 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
4000 			continue;
4001 		if (hdrtype & PCIM_MFDEV)
4002 			pcifunchigh = PCIB_MAXFUNCS(pcib);
4003 		for (f = 0; f <= pcifunchigh; f++) {
4004 			if (REG(PCIR_VENDOR, 2) == 0xffff)
4005 				continue;
4006 
4007 			/*
4008 			 * Found a valid function.  Check if a
4009 			 * device_t for this device already exists.
4010 			 */
4011 			for (i = 0; i < devcount; i++) {
4012 				child = devlist[i];
4013 				if (child == NULL)
4014 					continue;
4015 				if (pci_get_slot(child) == s &&
4016 				    pci_get_function(child) == f) {
4017 					unchanged[i] = child;
4018 					goto next_func;
4019 				}
4020 			}
4021 
4022 			pci_identify_function(pcib, dev, domain, busno, s, f);
4023 		next_func:;
4024 		}
4025 	}
4026 
4027 	/* Remove devices that are no longer present. */
4028 	for (i = 0; i < devcount; i++) {
4029 		if (unchanged[i] != NULL)
4030 			continue;
4031 		device_delete_child(dev, devlist[i]);
4032 	}
4033 
4034 	free(devlist, M_TEMP);
4035 	oldcount = devcount;
4036 
4037 	/* Try to attach the devices just added. */
4038 	error = device_get_children(dev, &devlist, &devcount);
4039 	if (error) {
4040 		free(unchanged, M_TEMP);
4041 		return (error);
4042 	}
4043 
4044 	for (i = 0; i < devcount; i++) {
4045 		for (j = 0; j < oldcount; j++) {
4046 			if (devlist[i] == unchanged[j])
4047 				goto next_device;
4048 		}
4049 
4050 		device_probe_and_attach(devlist[i]);
4051 	next_device:;
4052 	}
4053 
4054 	free(unchanged, M_TEMP);
4055 	free(devlist, M_TEMP);
4056 	return (0);
4057 #undef REG
4058 }
4059 
4060 #ifdef PCI_IOV
4061 device_t
4062 pci_add_iov_child(device_t bus, device_t pf, uint16_t rid, uint16_t vid,
4063     uint16_t did)
4064 {
4065 	struct pci_devinfo *pf_dinfo, *vf_dinfo;
4066 	device_t pcib;
4067 	int busno, slot, func;
4068 
4069 	pf_dinfo = device_get_ivars(pf);
4070 
4071 	pcib = device_get_parent(bus);
4072 
4073 	PCIB_DECODE_RID(pcib, rid, &busno, &slot, &func);
4074 
4075 	vf_dinfo = pci_fill_devinfo(pcib, bus, pci_get_domain(pcib), busno,
4076 	    slot, func, vid, did);
4077 
4078 	vf_dinfo->cfg.flags |= PCICFG_VF;
4079 	pci_add_child(bus, vf_dinfo);
4080 
4081 	return (vf_dinfo->cfg.dev);
4082 }
4083 
4084 device_t
4085 pci_create_iov_child_method(device_t bus, device_t pf, uint16_t rid,
4086     uint16_t vid, uint16_t did)
4087 {
4088 
4089 	return (pci_add_iov_child(bus, pf, rid, vid, did));
4090 }
4091 #endif
4092 
4093 void
4094 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
4095 {
4096 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
4097 	device_set_ivars(dinfo->cfg.dev, dinfo);
4098 	resource_list_init(&dinfo->resources);
4099 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
4100 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
4101 	pci_print_verbose(dinfo);
4102 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
4103 	pci_child_added(dinfo->cfg.dev);
4104 	EVENTHANDLER_INVOKE(pci_add_device, dinfo->cfg.dev);
4105 }
4106 
4107 void
4108 pci_child_added_method(device_t dev, device_t child)
4109 {
4110 
4111 }
4112 
4113 static int
4114 pci_probe(device_t dev)
4115 {
4116 
4117 	device_set_desc(dev, "PCI bus");
4118 
4119 	/* Allow other subclasses to override this driver. */
4120 	return (BUS_PROBE_GENERIC);
4121 }
4122 
4123 int
4124 pci_attach_common(device_t dev)
4125 {
4126 	struct pci_softc *sc;
4127 	int busno, domain;
4128 #ifdef PCI_DMA_BOUNDARY
4129 	int error, tag_valid;
4130 #endif
4131 #ifdef PCI_RES_BUS
4132 	int rid;
4133 #endif
4134 
4135 	sc = device_get_softc(dev);
4136 	domain = pcib_get_domain(dev);
4137 	busno = pcib_get_bus(dev);
4138 #ifdef PCI_RES_BUS
4139 	rid = 0;
4140 	sc->sc_bus = bus_alloc_resource(dev, PCI_RES_BUS, &rid, busno, busno,
4141 	    1, 0);
4142 	if (sc->sc_bus == NULL) {
4143 		device_printf(dev, "failed to allocate bus number\n");
4144 		return (ENXIO);
4145 	}
4146 #endif
4147 	if (bootverbose)
4148 		device_printf(dev, "domain=%d, physical bus=%d\n",
4149 		    domain, busno);
4150 #ifdef PCI_DMA_BOUNDARY
4151 	tag_valid = 0;
4152 	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
4153 	    devclass_find("pci")) {
4154 		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
4155 		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
4156 		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
4157 		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
4158 		if (error)
4159 			device_printf(dev, "Failed to create DMA tag: %d\n",
4160 			    error);
4161 		else
4162 			tag_valid = 1;
4163 	}
4164 	if (!tag_valid)
4165 #endif
4166 		sc->sc_dma_tag = bus_get_dma_tag(dev);
4167 	return (0);
4168 }
4169 
4170 static int
4171 pci_attach(device_t dev)
4172 {
4173 	int busno, domain, error;
4174 
4175 	error = pci_attach_common(dev);
4176 	if (error)
4177 		return (error);
4178 
4179 	/*
4180 	 * Since there can be multiple independently numbered PCI
4181 	 * busses on systems with multiple PCI domains, we can't use
4182 	 * the unit number to decide which bus we are probing. We ask
4183 	 * the parent pcib what our domain and bus numbers are.
4184 	 */
4185 	domain = pcib_get_domain(dev);
4186 	busno = pcib_get_bus(dev);
4187 	pci_add_children(dev, domain, busno);
4188 	return (bus_generic_attach(dev));
4189 }
4190 
4191 static int
4192 pci_detach(device_t dev)
4193 {
4194 #ifdef PCI_RES_BUS
4195 	struct pci_softc *sc;
4196 #endif
4197 	int error;
4198 
4199 	error = bus_generic_detach(dev);
4200 	if (error)
4201 		return (error);
4202 #ifdef PCI_RES_BUS
4203 	sc = device_get_softc(dev);
4204 	error = bus_release_resource(dev, PCI_RES_BUS, 0, sc->sc_bus);
4205 	if (error)
4206 		return (error);
4207 #endif
4208 	return (device_delete_children(dev));
4209 }
4210 
4211 static void
4212 pci_set_power_child(device_t dev, device_t child, int state)
4213 {
4214 	device_t pcib;
4215 	int dstate;
4216 
4217 	/*
4218 	 * Set the device to the given state.  If the firmware suggests
4219 	 * a different power state, use it instead.  If power management
4220 	 * is not present, the firmware is responsible for managing
4221 	 * device power.  Skip children who aren't attached since they
4222 	 * are handled separately.
4223 	 */
4224 	pcib = device_get_parent(dev);
4225 	dstate = state;
4226 	if (device_is_attached(child) &&
4227 	    PCIB_POWER_FOR_SLEEP(pcib, child, &dstate) == 0)
4228 		pci_set_powerstate(child, dstate);
4229 }
4230 
4231 int
4232 pci_suspend_child(device_t dev, device_t child)
4233 {
4234 	struct pci_devinfo *dinfo;
4235 	int error;
4236 
4237 	dinfo = device_get_ivars(child);
4238 
4239 	/*
4240 	 * Save the PCI configuration space for the child and set the
4241 	 * device in the appropriate power state for this sleep state.
4242 	 */
4243 	pci_cfg_save(child, dinfo, 0);
4244 
4245 	/* Suspend devices before potentially powering them down. */
4246 	error = bus_generic_suspend_child(dev, child);
4247 
4248 	if (error)
4249 		return (error);
4250 
4251 	if (pci_do_power_suspend)
4252 		pci_set_power_child(dev, child, PCI_POWERSTATE_D3);
4253 
4254 	return (0);
4255 }
4256 
4257 int
4258 pci_resume_child(device_t dev, device_t child)
4259 {
4260 	struct pci_devinfo *dinfo;
4261 
4262 	if (pci_do_power_resume)
4263 		pci_set_power_child(dev, child, PCI_POWERSTATE_D0);
4264 
4265 	dinfo = device_get_ivars(child);
4266 	pci_cfg_restore(child, dinfo);
4267 	if (!device_is_attached(child))
4268 		pci_cfg_save(child, dinfo, 1);
4269 
4270 	bus_generic_resume_child(dev, child);
4271 
4272 	return (0);
4273 }
4274 
4275 int
4276 pci_resume(device_t dev)
4277 {
4278 	device_t child, *devlist;
4279 	int error, i, numdevs;
4280 
4281 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
4282 		return (error);
4283 
4284 	/*
4285 	 * Resume critical devices first, then everything else later.
4286 	 */
4287 	for (i = 0; i < numdevs; i++) {
4288 		child = devlist[i];
4289 		switch (pci_get_class(child)) {
4290 		case PCIC_DISPLAY:
4291 		case PCIC_MEMORY:
4292 		case PCIC_BRIDGE:
4293 		case PCIC_BASEPERIPH:
4294 			BUS_RESUME_CHILD(dev, child);
4295 			break;
4296 		}
4297 	}
4298 	for (i = 0; i < numdevs; i++) {
4299 		child = devlist[i];
4300 		switch (pci_get_class(child)) {
4301 		case PCIC_DISPLAY:
4302 		case PCIC_MEMORY:
4303 		case PCIC_BRIDGE:
4304 		case PCIC_BASEPERIPH:
4305 			break;
4306 		default:
4307 			BUS_RESUME_CHILD(dev, child);
4308 		}
4309 	}
4310 	free(devlist, M_TEMP);
4311 	return (0);
4312 }
4313 
4314 static void
4315 pci_load_vendor_data(void)
4316 {
4317 	caddr_t data;
4318 	void *ptr;
4319 	size_t sz;
4320 
4321 	data = preload_search_by_type("pci_vendor_data");
4322 	if (data != NULL) {
4323 		ptr = preload_fetch_addr(data);
4324 		sz = preload_fetch_size(data);
4325 		if (ptr != NULL && sz != 0) {
4326 			pci_vendordata = ptr;
4327 			pci_vendordata_size = sz;
4328 			/* terminate the database */
4329 			pci_vendordata[pci_vendordata_size] = '\n';
4330 		}
4331 	}
4332 }
4333 
4334 void
4335 pci_driver_added(device_t dev, driver_t *driver)
4336 {
4337 	int numdevs;
4338 	device_t *devlist;
4339 	device_t child;
4340 	struct pci_devinfo *dinfo;
4341 	int i;
4342 
4343 	if (bootverbose)
4344 		device_printf(dev, "driver added\n");
4345 	DEVICE_IDENTIFY(driver, dev);
4346 	if (device_get_children(dev, &devlist, &numdevs) != 0)
4347 		return;
4348 	for (i = 0; i < numdevs; i++) {
4349 		child = devlist[i];
4350 		if (device_get_state(child) != DS_NOTPRESENT)
4351 			continue;
4352 		dinfo = device_get_ivars(child);
4353 		pci_print_verbose(dinfo);
4354 		if (bootverbose)
4355 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
4356 		pci_cfg_restore(child, dinfo);
4357 		if (device_probe_and_attach(child) != 0)
4358 			pci_child_detached(dev, child);
4359 	}
4360 	free(devlist, M_TEMP);
4361 }
4362 
4363 int
4364 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
4365     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
4366 {
4367 	struct pci_devinfo *dinfo;
4368 	struct msix_table_entry *mte;
4369 	struct msix_vector *mv;
4370 	uint64_t addr;
4371 	uint32_t data;
4372 	void *cookie;
4373 	int error, rid;
4374 
4375 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
4376 	    arg, &cookie);
4377 	if (error)
4378 		return (error);
4379 
4380 	/* If this is not a direct child, just bail out. */
4381 	if (device_get_parent(child) != dev) {
4382 		*cookiep = cookie;
4383 		return(0);
4384 	}
4385 
4386 	rid = rman_get_rid(irq);
4387 	if (rid == 0) {
4388 		/* Make sure that INTx is enabled */
4389 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
4390 	} else {
4391 		/*
4392 		 * Check to see if the interrupt is MSI or MSI-X.
4393 		 * Ask our parent to map the MSI and give
4394 		 * us the address and data register values.
4395 		 * If we fail for some reason, teardown the
4396 		 * interrupt handler.
4397 		 */
4398 		dinfo = device_get_ivars(child);
4399 		if (dinfo->cfg.msi.msi_alloc > 0) {
4400 			if (dinfo->cfg.msi.msi_addr == 0) {
4401 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
4402 			    ("MSI has handlers, but vectors not mapped"));
4403 				error = PCIB_MAP_MSI(device_get_parent(dev),
4404 				    child, rman_get_start(irq), &addr, &data);
4405 				if (error)
4406 					goto bad;
4407 				dinfo->cfg.msi.msi_addr = addr;
4408 				dinfo->cfg.msi.msi_data = data;
4409 			}
4410 			if (dinfo->cfg.msi.msi_handlers == 0)
4411 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
4412 				    dinfo->cfg.msi.msi_data);
4413 			dinfo->cfg.msi.msi_handlers++;
4414 		} else {
4415 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4416 			    ("No MSI or MSI-X interrupts allocated"));
4417 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4418 			    ("MSI-X index too high"));
4419 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4420 			KASSERT(mte->mte_vector != 0, ("no message vector"));
4421 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
4422 			KASSERT(mv->mv_irq == rman_get_start(irq),
4423 			    ("IRQ mismatch"));
4424 			if (mv->mv_address == 0) {
4425 				KASSERT(mte->mte_handlers == 0,
4426 		    ("MSI-X table entry has handlers, but vector not mapped"));
4427 				error = PCIB_MAP_MSI(device_get_parent(dev),
4428 				    child, rman_get_start(irq), &addr, &data);
4429 				if (error)
4430 					goto bad;
4431 				mv->mv_address = addr;
4432 				mv->mv_data = data;
4433 			}
4434 
4435 			/*
4436 			 * The MSIX table entry must be made valid by
4437 			 * incrementing the mte_handlers before
4438 			 * calling pci_enable_msix() and
4439 			 * pci_resume_msix(). Else the MSIX rewrite
4440 			 * table quirk will not work as expected.
4441 			 */
4442 			mte->mte_handlers++;
4443 			if (mte->mte_handlers == 1) {
4444 				pci_enable_msix(child, rid - 1, mv->mv_address,
4445 				    mv->mv_data);
4446 				pci_unmask_msix(child, rid - 1);
4447 			}
4448 		}
4449 
4450 		/*
4451 		 * Make sure that INTx is disabled if we are using MSI/MSI-X,
4452 		 * unless the device is affected by PCI_QUIRK_MSI_INTX_BUG,
4453 		 * in which case we "enable" INTx so MSI/MSI-X actually works.
4454 		 */
4455 		if (!pci_has_quirk(pci_get_devid(child),
4456 		    PCI_QUIRK_MSI_INTX_BUG))
4457 			pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4458 		else
4459 			pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
4460 	bad:
4461 		if (error) {
4462 			(void)bus_generic_teardown_intr(dev, child, irq,
4463 			    cookie);
4464 			return (error);
4465 		}
4466 	}
4467 	*cookiep = cookie;
4468 	return (0);
4469 }
4470 
4471 int
4472 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
4473     void *cookie)
4474 {
4475 	struct msix_table_entry *mte;
4476 	struct resource_list_entry *rle;
4477 	struct pci_devinfo *dinfo;
4478 	int error, rid;
4479 
4480 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
4481 		return (EINVAL);
4482 
4483 	/* If this isn't a direct child, just bail out */
4484 	if (device_get_parent(child) != dev)
4485 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
4486 
4487 	rid = rman_get_rid(irq);
4488 	if (rid == 0) {
4489 		/* Mask INTx */
4490 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4491 	} else {
4492 		/*
4493 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
4494 		 * decrement the appropriate handlers count and mask the
4495 		 * MSI-X message, or disable MSI messages if the count
4496 		 * drops to 0.
4497 		 */
4498 		dinfo = device_get_ivars(child);
4499 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
4500 		if (rle->res != irq)
4501 			return (EINVAL);
4502 		if (dinfo->cfg.msi.msi_alloc > 0) {
4503 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
4504 			    ("MSI-X index too high"));
4505 			if (dinfo->cfg.msi.msi_handlers == 0)
4506 				return (EINVAL);
4507 			dinfo->cfg.msi.msi_handlers--;
4508 			if (dinfo->cfg.msi.msi_handlers == 0)
4509 				pci_disable_msi(child);
4510 		} else {
4511 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4512 			    ("No MSI or MSI-X interrupts allocated"));
4513 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4514 			    ("MSI-X index too high"));
4515 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4516 			if (mte->mte_handlers == 0)
4517 				return (EINVAL);
4518 			mte->mte_handlers--;
4519 			if (mte->mte_handlers == 0)
4520 				pci_mask_msix(child, rid - 1);
4521 		}
4522 	}
4523 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
4524 	if (rid > 0)
4525 		KASSERT(error == 0,
4526 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
4527 	return (error);
4528 }
4529 
4530 int
4531 pci_print_child(device_t dev, device_t child)
4532 {
4533 	struct pci_devinfo *dinfo;
4534 	struct resource_list *rl;
4535 	int retval = 0;
4536 
4537 	dinfo = device_get_ivars(child);
4538 	rl = &dinfo->resources;
4539 
4540 	retval += bus_print_child_header(dev, child);
4541 
4542 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#jx");
4543 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#jx");
4544 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%jd");
4545 	if (device_get_flags(dev))
4546 		retval += printf(" flags %#x", device_get_flags(dev));
4547 
4548 	retval += printf(" at device %d.%d", pci_get_slot(child),
4549 	    pci_get_function(child));
4550 
4551 	retval += bus_print_child_domain(dev, child);
4552 	retval += bus_print_child_footer(dev, child);
4553 
4554 	return (retval);
4555 }
4556 
4557 static const struct
4558 {
4559 	int		class;
4560 	int		subclass;
4561 	int		report; /* 0 = bootverbose, 1 = always */
4562 	const char	*desc;
4563 } pci_nomatch_tab[] = {
4564 	{PCIC_OLD,		-1,			1, "old"},
4565 	{PCIC_OLD,		PCIS_OLD_NONVGA,	1, "non-VGA display device"},
4566 	{PCIC_OLD,		PCIS_OLD_VGA,		1, "VGA-compatible display device"},
4567 	{PCIC_STORAGE,		-1,			1, "mass storage"},
4568 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	1, "SCSI"},
4569 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	1, "ATA"},
4570 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	1, "floppy disk"},
4571 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	1, "IPI"},
4572 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	1, "RAID"},
4573 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	1, "ATA (ADMA)"},
4574 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	1, "SATA"},
4575 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	1, "SAS"},
4576 	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	1, "NVM"},
4577 	{PCIC_NETWORK,		-1,			1, "network"},
4578 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	1, "ethernet"},
4579 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	1, "token ring"},
4580 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	1, "fddi"},
4581 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	1, "ATM"},
4582 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	1, "ISDN"},
4583 	{PCIC_DISPLAY,		-1,			1, "display"},
4584 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	1, "VGA"},
4585 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	1, "XGA"},
4586 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	1, "3D"},
4587 	{PCIC_MULTIMEDIA,	-1,			1, "multimedia"},
4588 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	1, "video"},
4589 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	1, "audio"},
4590 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	1, "telephony"},
4591 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	1, "HDA"},
4592 	{PCIC_MEMORY,		-1,			1, "memory"},
4593 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	1, "RAM"},
4594 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	1, "flash"},
4595 	{PCIC_BRIDGE,		-1,			1, "bridge"},
4596 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	1, "HOST-PCI"},
4597 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	1, "PCI-ISA"},
4598 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	1, "PCI-EISA"},
4599 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	1, "PCI-MCA"},
4600 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	1, "PCI-PCI"},
4601 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	1, "PCI-PCMCIA"},
4602 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	1, "PCI-NuBus"},
4603 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	1, "PCI-CardBus"},
4604 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	1, "PCI-RACEway"},
4605 	{PCIC_SIMPLECOMM,	-1,			1, "simple comms"},
4606 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	1, "UART"},	/* could detect 16550 */
4607 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	1, "parallel port"},
4608 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	1, "multiport serial"},
4609 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	1, "generic modem"},
4610 	{PCIC_BASEPERIPH,	-1,			0, "base peripheral"},
4611 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	1, "interrupt controller"},
4612 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	1, "DMA controller"},
4613 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	1, "timer"},
4614 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	1, "realtime clock"},
4615 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	1, "PCI hot-plug controller"},
4616 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	1, "SD host controller"},
4617 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_IOMMU,	1, "IOMMU"},
4618 	{PCIC_INPUTDEV,		-1,			1, "input device"},
4619 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	1, "keyboard"},
4620 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,1, "digitizer"},
4621 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	1, "mouse"},
4622 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	1, "scanner"},
4623 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	1, "gameport"},
4624 	{PCIC_DOCKING,		-1,			1, "docking station"},
4625 	{PCIC_PROCESSOR,	-1,			1, "processor"},
4626 	{PCIC_SERIALBUS,	-1,			1, "serial bus"},
4627 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	1, "FireWire"},
4628 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	1, "AccessBus"},
4629 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	1, "SSA"},
4630 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	1, "USB"},
4631 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	1, "Fibre Channel"},
4632 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	0, "SMBus"},
4633 	{PCIC_WIRELESS,		-1,			1, "wireless controller"},
4634 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	1, "iRDA"},
4635 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	1, "IR"},
4636 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	1, "RF"},
4637 	{PCIC_INTELLIIO,	-1,			1, "intelligent I/O controller"},
4638 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	1, "I2O"},
4639 	{PCIC_SATCOM,		-1,			1, "satellite communication"},
4640 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		1, "sat TV"},
4641 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	1, "sat audio"},
4642 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	1, "sat voice"},
4643 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	1, "sat data"},
4644 	{PCIC_CRYPTO,		-1,			1, "encrypt/decrypt"},
4645 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	1, "network/computer crypto"},
4646 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	1, "entertainment crypto"},
4647 	{PCIC_DASP,		-1,			0, "dasp"},
4648 	{PCIC_DASP,		PCIS_DASP_DPIO,		1, "DPIO module"},
4649 	{PCIC_DASP,		PCIS_DASP_PERFCNTRS,	1, "performance counters"},
4650 	{PCIC_DASP,		PCIS_DASP_COMM_SYNC,	1, "communication synchronizer"},
4651 	{PCIC_DASP,		PCIS_DASP_MGMT_CARD,	1, "signal processing management"},
4652 	{0, 0, 0,		NULL}
4653 };
4654 
4655 void
4656 pci_probe_nomatch(device_t dev, device_t child)
4657 {
4658 	int i, report;
4659 	const char *cp, *scp;
4660 	char *device;
4661 
4662 	/*
4663 	 * Look for a listing for this device in a loaded device database.
4664 	 */
4665 	report = 1;
4666 	if ((device = pci_describe_device(child)) != NULL) {
4667 		device_printf(dev, "<%s>", device);
4668 		free(device, M_DEVBUF);
4669 	} else {
4670 		/*
4671 		 * Scan the class/subclass descriptions for a general
4672 		 * description.
4673 		 */
4674 		cp = "unknown";
4675 		scp = NULL;
4676 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
4677 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
4678 				if (pci_nomatch_tab[i].subclass == -1) {
4679 					cp = pci_nomatch_tab[i].desc;
4680 					report = pci_nomatch_tab[i].report;
4681 				} else if (pci_nomatch_tab[i].subclass ==
4682 				    pci_get_subclass(child)) {
4683 					scp = pci_nomatch_tab[i].desc;
4684 					report = pci_nomatch_tab[i].report;
4685 				}
4686 			}
4687 		}
4688 		if (report || bootverbose) {
4689 			device_printf(dev, "<%s%s%s>",
4690 			    cp ? cp : "",
4691 			    ((cp != NULL) && (scp != NULL)) ? ", " : "",
4692 			    scp ? scp : "");
4693 		}
4694 	}
4695 	if (report || bootverbose) {
4696 		printf(" at device %d.%d (no driver attached)\n",
4697 		    pci_get_slot(child), pci_get_function(child));
4698 	}
4699 	pci_cfg_save(child, device_get_ivars(child), 1);
4700 }
4701 
4702 void
4703 pci_child_detached(device_t dev, device_t child)
4704 {
4705 	struct pci_devinfo *dinfo;
4706 	struct resource_list *rl;
4707 
4708 	dinfo = device_get_ivars(child);
4709 	rl = &dinfo->resources;
4710 
4711 	/*
4712 	 * Have to deallocate IRQs before releasing any MSI messages and
4713 	 * have to release MSI messages before deallocating any memory
4714 	 * BARs.
4715 	 */
4716 	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
4717 		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
4718 	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
4719 		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
4720 		(void)pci_release_msi(child);
4721 	}
4722 	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
4723 		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
4724 	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
4725 		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
4726 #ifdef PCI_RES_BUS
4727 	if (resource_list_release_active(rl, dev, child, PCI_RES_BUS) != 0)
4728 		pci_printf(&dinfo->cfg, "Device leaked PCI bus numbers\n");
4729 #endif
4730 
4731 	pci_cfg_save(child, dinfo, 1);
4732 }
4733 
4734 /*
4735  * Parse the PCI device database, if loaded, and return a pointer to a
4736  * description of the device.
4737  *
4738  * The database is flat text formatted as follows:
4739  *
4740  * Any line not in a valid format is ignored.
4741  * Lines are terminated with newline '\n' characters.
4742  *
4743  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
4744  * the vendor name.
4745  *
4746  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
4747  * - devices cannot be listed without a corresponding VENDOR line.
4748  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
4749  * another TAB, then the device name.
4750  */
4751 
4752 /*
4753  * Assuming (ptr) points to the beginning of a line in the database,
4754  * return the vendor or device and description of the next entry.
4755  * The value of (vendor) or (device) inappropriate for the entry type
4756  * is set to -1.  Returns nonzero at the end of the database.
4757  *
4758  * Note that this is slightly unrobust in the face of corrupt data;
4759  * we attempt to safeguard against this by spamming the end of the
4760  * database with a newline when we initialise.
4761  */
4762 static int
4763 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
4764 {
4765 	char	*cp = *ptr;
4766 	int	left;
4767 
4768 	*device = -1;
4769 	*vendor = -1;
4770 	**desc = '\0';
4771 	for (;;) {
4772 		left = pci_vendordata_size - (cp - pci_vendordata);
4773 		if (left <= 0) {
4774 			*ptr = cp;
4775 			return(1);
4776 		}
4777 
4778 		/* vendor entry? */
4779 		if (*cp != '\t' &&
4780 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
4781 			break;
4782 		/* device entry? */
4783 		if (*cp == '\t' &&
4784 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
4785 			break;
4786 
4787 		/* skip to next line */
4788 		while (*cp != '\n' && left > 0) {
4789 			cp++;
4790 			left--;
4791 		}
4792 		if (*cp == '\n') {
4793 			cp++;
4794 			left--;
4795 		}
4796 	}
4797 	/* skip to next line */
4798 	while (*cp != '\n' && left > 0) {
4799 		cp++;
4800 		left--;
4801 	}
4802 	if (*cp == '\n' && left > 0)
4803 		cp++;
4804 	*ptr = cp;
4805 	return(0);
4806 }
4807 
4808 static char *
4809 pci_describe_device(device_t dev)
4810 {
4811 	int	vendor, device;
4812 	char	*desc, *vp, *dp, *line;
4813 
4814 	desc = vp = dp = NULL;
4815 
4816 	/*
4817 	 * If we have no vendor data, we can't do anything.
4818 	 */
4819 	if (pci_vendordata == NULL)
4820 		goto out;
4821 
4822 	/*
4823 	 * Scan the vendor data looking for this device
4824 	 */
4825 	line = pci_vendordata;
4826 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4827 		goto out;
4828 	for (;;) {
4829 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
4830 			goto out;
4831 		if (vendor == pci_get_vendor(dev))
4832 			break;
4833 	}
4834 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4835 		goto out;
4836 	for (;;) {
4837 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
4838 			*dp = 0;
4839 			break;
4840 		}
4841 		if (vendor != -1) {
4842 			*dp = 0;
4843 			break;
4844 		}
4845 		if (device == pci_get_device(dev))
4846 			break;
4847 	}
4848 	if (dp[0] == '\0')
4849 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
4850 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
4851 	    NULL)
4852 		sprintf(desc, "%s, %s", vp, dp);
4853 out:
4854 	if (vp != NULL)
4855 		free(vp, M_DEVBUF);
4856 	if (dp != NULL)
4857 		free(dp, M_DEVBUF);
4858 	return(desc);
4859 }
4860 
4861 int
4862 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
4863 {
4864 	struct pci_devinfo *dinfo;
4865 	pcicfgregs *cfg;
4866 
4867 	dinfo = device_get_ivars(child);
4868 	cfg = &dinfo->cfg;
4869 
4870 	switch (which) {
4871 	case PCI_IVAR_ETHADDR:
4872 		/*
4873 		 * The generic accessor doesn't deal with failure, so
4874 		 * we set the return value, then return an error.
4875 		 */
4876 		*((uint8_t **) result) = NULL;
4877 		return (EINVAL);
4878 	case PCI_IVAR_SUBVENDOR:
4879 		*result = cfg->subvendor;
4880 		break;
4881 	case PCI_IVAR_SUBDEVICE:
4882 		*result = cfg->subdevice;
4883 		break;
4884 	case PCI_IVAR_VENDOR:
4885 		*result = cfg->vendor;
4886 		break;
4887 	case PCI_IVAR_DEVICE:
4888 		*result = cfg->device;
4889 		break;
4890 	case PCI_IVAR_DEVID:
4891 		*result = (cfg->device << 16) | cfg->vendor;
4892 		break;
4893 	case PCI_IVAR_CLASS:
4894 		*result = cfg->baseclass;
4895 		break;
4896 	case PCI_IVAR_SUBCLASS:
4897 		*result = cfg->subclass;
4898 		break;
4899 	case PCI_IVAR_PROGIF:
4900 		*result = cfg->progif;
4901 		break;
4902 	case PCI_IVAR_REVID:
4903 		*result = cfg->revid;
4904 		break;
4905 	case PCI_IVAR_INTPIN:
4906 		*result = cfg->intpin;
4907 		break;
4908 	case PCI_IVAR_IRQ:
4909 		*result = cfg->intline;
4910 		break;
4911 	case PCI_IVAR_DOMAIN:
4912 		*result = cfg->domain;
4913 		break;
4914 	case PCI_IVAR_BUS:
4915 		*result = cfg->bus;
4916 		break;
4917 	case PCI_IVAR_SLOT:
4918 		*result = cfg->slot;
4919 		break;
4920 	case PCI_IVAR_FUNCTION:
4921 		*result = cfg->func;
4922 		break;
4923 	case PCI_IVAR_CMDREG:
4924 		*result = cfg->cmdreg;
4925 		break;
4926 	case PCI_IVAR_CACHELNSZ:
4927 		*result = cfg->cachelnsz;
4928 		break;
4929 	case PCI_IVAR_MINGNT:
4930 		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4931 			*result = -1;
4932 			return (EINVAL);
4933 		}
4934 		*result = cfg->mingnt;
4935 		break;
4936 	case PCI_IVAR_MAXLAT:
4937 		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4938 			*result = -1;
4939 			return (EINVAL);
4940 		}
4941 		*result = cfg->maxlat;
4942 		break;
4943 	case PCI_IVAR_LATTIMER:
4944 		*result = cfg->lattimer;
4945 		break;
4946 	default:
4947 		return (ENOENT);
4948 	}
4949 	return (0);
4950 }
4951 
4952 int
4953 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4954 {
4955 	struct pci_devinfo *dinfo;
4956 
4957 	dinfo = device_get_ivars(child);
4958 
4959 	switch (which) {
4960 	case PCI_IVAR_INTPIN:
4961 		dinfo->cfg.intpin = value;
4962 		return (0);
4963 	case PCI_IVAR_ETHADDR:
4964 	case PCI_IVAR_SUBVENDOR:
4965 	case PCI_IVAR_SUBDEVICE:
4966 	case PCI_IVAR_VENDOR:
4967 	case PCI_IVAR_DEVICE:
4968 	case PCI_IVAR_DEVID:
4969 	case PCI_IVAR_CLASS:
4970 	case PCI_IVAR_SUBCLASS:
4971 	case PCI_IVAR_PROGIF:
4972 	case PCI_IVAR_REVID:
4973 	case PCI_IVAR_IRQ:
4974 	case PCI_IVAR_DOMAIN:
4975 	case PCI_IVAR_BUS:
4976 	case PCI_IVAR_SLOT:
4977 	case PCI_IVAR_FUNCTION:
4978 		return (EINVAL);	/* disallow for now */
4979 
4980 	default:
4981 		return (ENOENT);
4982 	}
4983 }
4984 
4985 #include "opt_ddb.h"
4986 #ifdef DDB
4987 #include <ddb/ddb.h>
4988 #include <sys/cons.h>
4989 
4990 /*
4991  * List resources based on pci map registers, used for within ddb
4992  */
4993 
4994 DB_SHOW_COMMAND(pciregs, db_pci_dump)
4995 {
4996 	struct pci_devinfo *dinfo;
4997 	struct devlist *devlist_head;
4998 	struct pci_conf *p;
4999 	const char *name;
5000 	int i, error, none_count;
5001 
5002 	none_count = 0;
5003 	/* get the head of the device queue */
5004 	devlist_head = &pci_devq;
5005 
5006 	/*
5007 	 * Go through the list of devices and print out devices
5008 	 */
5009 	for (error = 0, i = 0,
5010 	     dinfo = STAILQ_FIRST(devlist_head);
5011 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
5012 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
5013 
5014 		/* Populate pd_name and pd_unit */
5015 		name = NULL;
5016 		if (dinfo->cfg.dev)
5017 			name = device_get_name(dinfo->cfg.dev);
5018 
5019 		p = &dinfo->conf;
5020 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
5021 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
5022 			(name && *name) ? name : "none",
5023 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
5024 			none_count++,
5025 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
5026 			p->pc_sel.pc_func, (p->pc_class << 16) |
5027 			(p->pc_subclass << 8) | p->pc_progif,
5028 			(p->pc_subdevice << 16) | p->pc_subvendor,
5029 			(p->pc_device << 16) | p->pc_vendor,
5030 			p->pc_revid, p->pc_hdr);
5031 	}
5032 }
5033 #endif /* DDB */
5034 
5035 static struct resource *
5036 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
5037     rman_res_t start, rman_res_t end, rman_res_t count, u_int num,
5038     u_int flags)
5039 {
5040 	struct pci_devinfo *dinfo = device_get_ivars(child);
5041 	struct resource_list *rl = &dinfo->resources;
5042 	struct resource *res;
5043 	struct pci_map *pm;
5044 	uint16_t cmd;
5045 	pci_addr_t map, testval;
5046 	int mapsize;
5047 
5048 	res = NULL;
5049 
5050 	/* If rid is managed by EA, ignore it */
5051 	if (pci_ea_is_enabled(child, *rid))
5052 		goto out;
5053 
5054 	pm = pci_find_bar(child, *rid);
5055 	if (pm != NULL) {
5056 		/* This is a BAR that we failed to allocate earlier. */
5057 		mapsize = pm->pm_size;
5058 		map = pm->pm_value;
5059 	} else {
5060 		/*
5061 		 * Weed out the bogons, and figure out how large the
5062 		 * BAR/map is.  BARs that read back 0 here are bogus
5063 		 * and unimplemented.  Note: atapci in legacy mode are
5064 		 * special and handled elsewhere in the code.  If you
5065 		 * have a atapci device in legacy mode and it fails
5066 		 * here, that other code is broken.
5067 		 */
5068 		pci_read_bar(child, *rid, &map, &testval, NULL);
5069 
5070 		/*
5071 		 * Determine the size of the BAR and ignore BARs with a size
5072 		 * of 0.  Device ROM BARs use a different mask value.
5073 		 */
5074 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
5075 			mapsize = pci_romsize(testval);
5076 		else
5077 			mapsize = pci_mapsize(testval);
5078 		if (mapsize == 0)
5079 			goto out;
5080 		pm = pci_add_bar(child, *rid, map, mapsize);
5081 	}
5082 
5083 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
5084 		if (type != SYS_RES_MEMORY) {
5085 			if (bootverbose)
5086 				device_printf(dev,
5087 				    "child %s requested type %d for rid %#x,"
5088 				    " but the BAR says it is an memio\n",
5089 				    device_get_nameunit(child), type, *rid);
5090 			goto out;
5091 		}
5092 	} else {
5093 		if (type != SYS_RES_IOPORT) {
5094 			if (bootverbose)
5095 				device_printf(dev,
5096 				    "child %s requested type %d for rid %#x,"
5097 				    " but the BAR says it is an ioport\n",
5098 				    device_get_nameunit(child), type, *rid);
5099 			goto out;
5100 		}
5101 	}
5102 
5103 	/*
5104 	 * For real BARs, we need to override the size that
5105 	 * the driver requests, because that's what the BAR
5106 	 * actually uses and we would otherwise have a
5107 	 * situation where we might allocate the excess to
5108 	 * another driver, which won't work.
5109 	 */
5110 	count = ((pci_addr_t)1 << mapsize) * num;
5111 	if (RF_ALIGNMENT(flags) < mapsize)
5112 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
5113 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
5114 		flags |= RF_PREFETCHABLE;
5115 
5116 	/*
5117 	 * Allocate enough resource, and then write back the
5118 	 * appropriate BAR for that resource.
5119 	 */
5120 	resource_list_add(rl, type, *rid, start, end, count);
5121 	res = resource_list_reserve(rl, dev, child, type, rid, start, end,
5122 	    count, flags & ~RF_ACTIVE);
5123 	if (res == NULL) {
5124 		resource_list_delete(rl, type, *rid);
5125 		device_printf(child,
5126 		    "%#jx bytes of rid %#x res %d failed (%#jx, %#jx).\n",
5127 		    count, *rid, type, start, end);
5128 		goto out;
5129 	}
5130 	if (bootverbose)
5131 		device_printf(child,
5132 		    "Lazy allocation of %#jx bytes rid %#x type %d at %#jx\n",
5133 		    count, *rid, type, rman_get_start(res));
5134 
5135 	/* Disable decoding via the CMD register before updating the BAR */
5136 	cmd = pci_read_config(child, PCIR_COMMAND, 2);
5137 	pci_write_config(child, PCIR_COMMAND,
5138 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
5139 
5140 	map = rman_get_start(res);
5141 	pci_write_bar(child, pm, map);
5142 
5143 	/* Restore the original value of the CMD register */
5144 	pci_write_config(child, PCIR_COMMAND, cmd, 2);
5145 out:
5146 	return (res);
5147 }
5148 
5149 struct resource *
5150 pci_alloc_multi_resource(device_t dev, device_t child, int type, int *rid,
5151     rman_res_t start, rman_res_t end, rman_res_t count, u_long num,
5152     u_int flags)
5153 {
5154 	struct pci_devinfo *dinfo;
5155 	struct resource_list *rl;
5156 	struct resource_list_entry *rle;
5157 	struct resource *res;
5158 	pcicfgregs *cfg;
5159 
5160 	/*
5161 	 * Perform lazy resource allocation
5162 	 */
5163 	dinfo = device_get_ivars(child);
5164 	rl = &dinfo->resources;
5165 	cfg = &dinfo->cfg;
5166 	switch (type) {
5167 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
5168 	case PCI_RES_BUS:
5169 		return (pci_alloc_secbus(dev, child, rid, start, end, count,
5170 		    flags));
5171 #endif
5172 	case SYS_RES_IRQ:
5173 		/*
5174 		 * Can't alloc legacy interrupt once MSI messages have
5175 		 * been allocated.
5176 		 */
5177 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
5178 		    cfg->msix.msix_alloc > 0))
5179 			return (NULL);
5180 
5181 		/*
5182 		 * If the child device doesn't have an interrupt
5183 		 * routed and is deserving of an interrupt, try to
5184 		 * assign it one.
5185 		 */
5186 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
5187 		    (cfg->intpin != 0))
5188 			pci_assign_interrupt(dev, child, 0);
5189 		break;
5190 	case SYS_RES_IOPORT:
5191 	case SYS_RES_MEMORY:
5192 #ifdef NEW_PCIB
5193 		/*
5194 		 * PCI-PCI bridge I/O window resources are not BARs.
5195 		 * For those allocations just pass the request up the
5196 		 * tree.
5197 		 */
5198 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
5199 			switch (*rid) {
5200 			case PCIR_IOBASEL_1:
5201 			case PCIR_MEMBASE_1:
5202 			case PCIR_PMBASEL_1:
5203 				/*
5204 				 * XXX: Should we bother creating a resource
5205 				 * list entry?
5206 				 */
5207 				return (bus_generic_alloc_resource(dev, child,
5208 				    type, rid, start, end, count, flags));
5209 			}
5210 		}
5211 #endif
5212 		/* Reserve resources for this BAR if needed. */
5213 		rle = resource_list_find(rl, type, *rid);
5214 		if (rle == NULL) {
5215 			res = pci_reserve_map(dev, child, type, rid, start, end,
5216 			    count, num, flags);
5217 			if (res == NULL)
5218 				return (NULL);
5219 		}
5220 	}
5221 	return (resource_list_alloc(rl, dev, child, type, rid,
5222 	    start, end, count, flags));
5223 }
5224 
5225 struct resource *
5226 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
5227     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
5228 {
5229 #ifdef PCI_IOV
5230 	struct pci_devinfo *dinfo;
5231 #endif
5232 
5233 	if (device_get_parent(child) != dev)
5234 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
5235 		    type, rid, start, end, count, flags));
5236 
5237 #ifdef PCI_IOV
5238 	dinfo = device_get_ivars(child);
5239 	if (dinfo->cfg.flags & PCICFG_VF) {
5240 		switch (type) {
5241 		/* VFs can't have I/O BARs. */
5242 		case SYS_RES_IOPORT:
5243 			return (NULL);
5244 		case SYS_RES_MEMORY:
5245 			return (pci_vf_alloc_mem_resource(dev, child, rid,
5246 			    start, end, count, flags));
5247 		}
5248 
5249 		/* Fall through for other types of resource allocations. */
5250 	}
5251 #endif
5252 
5253 	return (pci_alloc_multi_resource(dev, child, type, rid, start, end,
5254 	    count, 1, flags));
5255 }
5256 
5257 int
5258 pci_release_resource(device_t dev, device_t child, int type, int rid,
5259     struct resource *r)
5260 {
5261 	struct pci_devinfo *dinfo;
5262 	struct resource_list *rl;
5263 	pcicfgregs *cfg;
5264 
5265 	if (device_get_parent(child) != dev)
5266 		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
5267 		    type, rid, r));
5268 
5269 	dinfo = device_get_ivars(child);
5270 	cfg = &dinfo->cfg;
5271 
5272 #ifdef PCI_IOV
5273 	if (dinfo->cfg.flags & PCICFG_VF) {
5274 		switch (type) {
5275 		/* VFs can't have I/O BARs. */
5276 		case SYS_RES_IOPORT:
5277 			return (EDOOFUS);
5278 		case SYS_RES_MEMORY:
5279 			return (pci_vf_release_mem_resource(dev, child, rid,
5280 			    r));
5281 		}
5282 
5283 		/* Fall through for other types of resource allocations. */
5284 	}
5285 #endif
5286 
5287 #ifdef NEW_PCIB
5288 	/*
5289 	 * PCI-PCI bridge I/O window resources are not BARs.  For
5290 	 * those allocations just pass the request up the tree.
5291 	 */
5292 	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
5293 	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
5294 		switch (rid) {
5295 		case PCIR_IOBASEL_1:
5296 		case PCIR_MEMBASE_1:
5297 		case PCIR_PMBASEL_1:
5298 			return (bus_generic_release_resource(dev, child, type,
5299 			    rid, r));
5300 		}
5301 	}
5302 #endif
5303 
5304 	rl = &dinfo->resources;
5305 	return (resource_list_release(rl, dev, child, type, rid, r));
5306 }
5307 
5308 int
5309 pci_activate_resource(device_t dev, device_t child, int type, int rid,
5310     struct resource *r)
5311 {
5312 	struct pci_devinfo *dinfo;
5313 	int error;
5314 
5315 	error = bus_generic_activate_resource(dev, child, type, rid, r);
5316 	if (error)
5317 		return (error);
5318 
5319 	/* Enable decoding in the command register when activating BARs. */
5320 	if (device_get_parent(child) == dev) {
5321 		/* Device ROMs need their decoding explicitly enabled. */
5322 		dinfo = device_get_ivars(child);
5323 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
5324 			pci_write_bar(child, pci_find_bar(child, rid),
5325 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
5326 		switch (type) {
5327 		case SYS_RES_IOPORT:
5328 		case SYS_RES_MEMORY:
5329 			error = PCI_ENABLE_IO(dev, child, type);
5330 			break;
5331 		}
5332 	}
5333 	return (error);
5334 }
5335 
5336 int
5337 pci_deactivate_resource(device_t dev, device_t child, int type,
5338     int rid, struct resource *r)
5339 {
5340 	struct pci_devinfo *dinfo;
5341 	int error;
5342 
5343 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
5344 	if (error)
5345 		return (error);
5346 
5347 	/* Disable decoding for device ROMs. */
5348 	if (device_get_parent(child) == dev) {
5349 		dinfo = device_get_ivars(child);
5350 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
5351 			pci_write_bar(child, pci_find_bar(child, rid),
5352 			    rman_get_start(r));
5353 	}
5354 	return (0);
5355 }
5356 
5357 void
5358 pci_child_deleted(device_t dev, device_t child)
5359 {
5360 	struct resource_list_entry *rle;
5361 	struct resource_list *rl;
5362 	struct pci_devinfo *dinfo;
5363 
5364 	dinfo = device_get_ivars(child);
5365 	rl = &dinfo->resources;
5366 
5367 	EVENTHANDLER_INVOKE(pci_delete_device, child);
5368 
5369 	/* Turn off access to resources we're about to free */
5370 	if (bus_child_present(child) != 0) {
5371 		pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
5372 		    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
5373 
5374 		pci_disable_busmaster(child);
5375 	}
5376 
5377 	/* Free all allocated resources */
5378 	STAILQ_FOREACH(rle, rl, link) {
5379 		if (rle->res) {
5380 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
5381 			    resource_list_busy(rl, rle->type, rle->rid)) {
5382 				pci_printf(&dinfo->cfg,
5383 				    "Resource still owned, oops. "
5384 				    "(type=%d, rid=%d, addr=%lx)\n",
5385 				    rle->type, rle->rid,
5386 				    rman_get_start(rle->res));
5387 				bus_release_resource(child, rle->type, rle->rid,
5388 				    rle->res);
5389 			}
5390 			resource_list_unreserve(rl, dev, child, rle->type,
5391 			    rle->rid);
5392 		}
5393 	}
5394 	resource_list_free(rl);
5395 
5396 	pci_freecfg(dinfo);
5397 }
5398 
5399 void
5400 pci_delete_resource(device_t dev, device_t child, int type, int rid)
5401 {
5402 	struct pci_devinfo *dinfo;
5403 	struct resource_list *rl;
5404 	struct resource_list_entry *rle;
5405 
5406 	if (device_get_parent(child) != dev)
5407 		return;
5408 
5409 	dinfo = device_get_ivars(child);
5410 	rl = &dinfo->resources;
5411 	rle = resource_list_find(rl, type, rid);
5412 	if (rle == NULL)
5413 		return;
5414 
5415 	if (rle->res) {
5416 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
5417 		    resource_list_busy(rl, type, rid)) {
5418 			device_printf(dev, "delete_resource: "
5419 			    "Resource still owned by child, oops. "
5420 			    "(type=%d, rid=%d, addr=%jx)\n",
5421 			    type, rid, rman_get_start(rle->res));
5422 			return;
5423 		}
5424 		resource_list_unreserve(rl, dev, child, type, rid);
5425 	}
5426 	resource_list_delete(rl, type, rid);
5427 }
5428 
5429 struct resource_list *
5430 pci_get_resource_list (device_t dev, device_t child)
5431 {
5432 	struct pci_devinfo *dinfo = device_get_ivars(child);
5433 
5434 	return (&dinfo->resources);
5435 }
5436 
5437 bus_dma_tag_t
5438 pci_get_dma_tag(device_t bus, device_t dev)
5439 {
5440 	struct pci_softc *sc = device_get_softc(bus);
5441 
5442 	return (sc->sc_dma_tag);
5443 }
5444 
5445 uint32_t
5446 pci_read_config_method(device_t dev, device_t child, int reg, int width)
5447 {
5448 	struct pci_devinfo *dinfo = device_get_ivars(child);
5449 	pcicfgregs *cfg = &dinfo->cfg;
5450 
5451 #ifdef PCI_IOV
5452 	/*
5453 	 * SR-IOV VFs don't implement the VID or DID registers, so we have to
5454 	 * emulate them here.
5455 	 */
5456 	if (cfg->flags & PCICFG_VF) {
5457 		if (reg == PCIR_VENDOR) {
5458 			switch (width) {
5459 			case 4:
5460 				return (cfg->device << 16 | cfg->vendor);
5461 			case 2:
5462 				return (cfg->vendor);
5463 			case 1:
5464 				return (cfg->vendor & 0xff);
5465 			default:
5466 				return (0xffffffff);
5467 			}
5468 		} else if (reg == PCIR_DEVICE) {
5469 			switch (width) {
5470 			/* Note that an unaligned 4-byte read is an error. */
5471 			case 2:
5472 				return (cfg->device);
5473 			case 1:
5474 				return (cfg->device & 0xff);
5475 			default:
5476 				return (0xffffffff);
5477 			}
5478 		}
5479 	}
5480 #endif
5481 
5482 	return (PCIB_READ_CONFIG(device_get_parent(dev),
5483 	    cfg->bus, cfg->slot, cfg->func, reg, width));
5484 }
5485 
5486 void
5487 pci_write_config_method(device_t dev, device_t child, int reg,
5488     uint32_t val, int width)
5489 {
5490 	struct pci_devinfo *dinfo = device_get_ivars(child);
5491 	pcicfgregs *cfg = &dinfo->cfg;
5492 
5493 	PCIB_WRITE_CONFIG(device_get_parent(dev),
5494 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
5495 }
5496 
5497 int
5498 pci_child_location_str_method(device_t dev, device_t child, char *buf,
5499     size_t buflen)
5500 {
5501 
5502 	snprintf(buf, buflen, "slot=%d function=%d dbsf=pci%d:%d:%d:%d",
5503 	    pci_get_slot(child), pci_get_function(child), pci_get_domain(child),
5504 	    pci_get_bus(child), pci_get_slot(child), pci_get_function(child));
5505 	return (0);
5506 }
5507 
5508 int
5509 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
5510     size_t buflen)
5511 {
5512 	struct pci_devinfo *dinfo;
5513 	pcicfgregs *cfg;
5514 
5515 	dinfo = device_get_ivars(child);
5516 	cfg = &dinfo->cfg;
5517 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
5518 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
5519 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
5520 	    cfg->progif);
5521 	return (0);
5522 }
5523 
5524 int
5525 pci_assign_interrupt_method(device_t dev, device_t child)
5526 {
5527 	struct pci_devinfo *dinfo = device_get_ivars(child);
5528 	pcicfgregs *cfg = &dinfo->cfg;
5529 
5530 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
5531 	    cfg->intpin));
5532 }
5533 
5534 static void
5535 pci_lookup(void *arg, const char *name, device_t *dev)
5536 {
5537 	long val;
5538 	char *end;
5539 	int domain, bus, slot, func;
5540 
5541 	if (*dev != NULL)
5542 		return;
5543 
5544 	/*
5545 	 * Accept pciconf-style selectors of either pciD:B:S:F or
5546 	 * pciB:S:F.  In the latter case, the domain is assumed to
5547 	 * be zero.
5548 	 */
5549 	if (strncmp(name, "pci", 3) != 0)
5550 		return;
5551 	val = strtol(name + 3, &end, 10);
5552 	if (val < 0 || val > INT_MAX || *end != ':')
5553 		return;
5554 	domain = val;
5555 	val = strtol(end + 1, &end, 10);
5556 	if (val < 0 || val > INT_MAX || *end != ':')
5557 		return;
5558 	bus = val;
5559 	val = strtol(end + 1, &end, 10);
5560 	if (val < 0 || val > INT_MAX)
5561 		return;
5562 	slot = val;
5563 	if (*end == ':') {
5564 		val = strtol(end + 1, &end, 10);
5565 		if (val < 0 || val > INT_MAX || *end != '\0')
5566 			return;
5567 		func = val;
5568 	} else if (*end == '\0') {
5569 		func = slot;
5570 		slot = bus;
5571 		bus = domain;
5572 		domain = 0;
5573 	} else
5574 		return;
5575 
5576 	if (domain > PCI_DOMAINMAX || bus > PCI_BUSMAX || slot > PCI_SLOTMAX ||
5577 	    func > PCIE_ARI_FUNCMAX || (slot != 0 && func > PCI_FUNCMAX))
5578 		return;
5579 
5580 	*dev = pci_find_dbsf(domain, bus, slot, func);
5581 }
5582 
5583 static int
5584 pci_modevent(module_t mod, int what, void *arg)
5585 {
5586 	static struct cdev *pci_cdev;
5587 	static eventhandler_tag tag;
5588 
5589 	switch (what) {
5590 	case MOD_LOAD:
5591 		STAILQ_INIT(&pci_devq);
5592 		pci_generation = 0;
5593 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
5594 		    "pci");
5595 		pci_load_vendor_data();
5596 		tag = EVENTHANDLER_REGISTER(dev_lookup, pci_lookup, NULL,
5597 		    1000);
5598 		break;
5599 
5600 	case MOD_UNLOAD:
5601 		if (tag != NULL)
5602 			EVENTHANDLER_DEREGISTER(dev_lookup, tag);
5603 		destroy_dev(pci_cdev);
5604 		break;
5605 	}
5606 
5607 	return (0);
5608 }
5609 
5610 static void
5611 pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
5612 {
5613 #define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
5614 	struct pcicfg_pcie *cfg;
5615 	int version, pos;
5616 
5617 	cfg = &dinfo->cfg.pcie;
5618 	pos = cfg->pcie_location;
5619 
5620 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5621 
5622 	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
5623 
5624 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5625 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5626 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5627 		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
5628 
5629 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5630 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5631 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5632 		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
5633 
5634 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5635 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5636 		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
5637 
5638 	if (version > 1) {
5639 		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
5640 		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
5641 		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
5642 	}
5643 #undef WREG
5644 }
5645 
5646 static void
5647 pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
5648 {
5649 	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
5650 	    dinfo->cfg.pcix.pcix_command,  2);
5651 }
5652 
5653 void
5654 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
5655 {
5656 
5657 	/*
5658 	 * Restore the device to full power mode.  We must do this
5659 	 * before we restore the registers because moving from D3 to
5660 	 * D0 will cause the chip's BARs and some other registers to
5661 	 * be reset to some unknown power on reset values.  Cut down
5662 	 * the noise on boot by doing nothing if we are already in
5663 	 * state D0.
5664 	 */
5665 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
5666 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5667 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
5668 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
5669 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
5670 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
5671 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
5672 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
5673 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
5674 	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5675 	case PCIM_HDRTYPE_NORMAL:
5676 		pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
5677 		pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
5678 		break;
5679 	case PCIM_HDRTYPE_BRIDGE:
5680 		pci_write_config(dev, PCIR_SECLAT_1,
5681 		    dinfo->cfg.bridge.br_seclat, 1);
5682 		pci_write_config(dev, PCIR_SUBBUS_1,
5683 		    dinfo->cfg.bridge.br_subbus, 1);
5684 		pci_write_config(dev, PCIR_SECBUS_1,
5685 		    dinfo->cfg.bridge.br_secbus, 1);
5686 		pci_write_config(dev, PCIR_PRIBUS_1,
5687 		    dinfo->cfg.bridge.br_pribus, 1);
5688 		pci_write_config(dev, PCIR_BRIDGECTL_1,
5689 		    dinfo->cfg.bridge.br_control, 2);
5690 		break;
5691 	case PCIM_HDRTYPE_CARDBUS:
5692 		pci_write_config(dev, PCIR_SECLAT_2,
5693 		    dinfo->cfg.bridge.br_seclat, 1);
5694 		pci_write_config(dev, PCIR_SUBBUS_2,
5695 		    dinfo->cfg.bridge.br_subbus, 1);
5696 		pci_write_config(dev, PCIR_SECBUS_2,
5697 		    dinfo->cfg.bridge.br_secbus, 1);
5698 		pci_write_config(dev, PCIR_PRIBUS_2,
5699 		    dinfo->cfg.bridge.br_pribus, 1);
5700 		pci_write_config(dev, PCIR_BRIDGECTL_2,
5701 		    dinfo->cfg.bridge.br_control, 2);
5702 		break;
5703 	}
5704 	pci_restore_bars(dev);
5705 
5706 	/*
5707 	 * Restore extended capabilities for PCI-Express and PCI-X
5708 	 */
5709 	if (dinfo->cfg.pcie.pcie_location != 0)
5710 		pci_cfg_restore_pcie(dev, dinfo);
5711 	if (dinfo->cfg.pcix.pcix_location != 0)
5712 		pci_cfg_restore_pcix(dev, dinfo);
5713 
5714 	/* Restore MSI and MSI-X configurations if they are present. */
5715 	if (dinfo->cfg.msi.msi_location != 0)
5716 		pci_resume_msi(dev);
5717 	if (dinfo->cfg.msix.msix_location != 0)
5718 		pci_resume_msix(dev);
5719 
5720 #ifdef PCI_IOV
5721 	if (dinfo->cfg.iov != NULL)
5722 		pci_iov_cfg_restore(dev, dinfo);
5723 #endif
5724 }
5725 
5726 static void
5727 pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
5728 {
5729 #define	RREG(n)	pci_read_config(dev, pos + (n), 2)
5730 	struct pcicfg_pcie *cfg;
5731 	int version, pos;
5732 
5733 	cfg = &dinfo->cfg.pcie;
5734 	pos = cfg->pcie_location;
5735 
5736 	cfg->pcie_flags = RREG(PCIER_FLAGS);
5737 
5738 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5739 
5740 	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
5741 
5742 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5743 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5744 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5745 		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
5746 
5747 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5748 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5749 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5750 		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
5751 
5752 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5753 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5754 		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
5755 
5756 	if (version > 1) {
5757 		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
5758 		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
5759 		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
5760 	}
5761 #undef RREG
5762 }
5763 
5764 static void
5765 pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
5766 {
5767 	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
5768 	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
5769 }
5770 
5771 void
5772 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
5773 {
5774 	uint32_t cls;
5775 	int ps;
5776 
5777 	/*
5778 	 * Some drivers apparently write to these registers w/o updating our
5779 	 * cached copy.  No harm happens if we update the copy, so do so here
5780 	 * so we can restore them.  The COMMAND register is modified by the
5781 	 * bus w/o updating the cache.  This should represent the normally
5782 	 * writable portion of the 'defined' part of type 0/1/2 headers.
5783 	 */
5784 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
5785 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
5786 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
5787 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
5788 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
5789 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
5790 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
5791 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
5792 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
5793 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
5794 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
5795 	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5796 	case PCIM_HDRTYPE_NORMAL:
5797 		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
5798 		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
5799 		dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
5800 		dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
5801 		break;
5802 	case PCIM_HDRTYPE_BRIDGE:
5803 		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
5804 		    PCIR_SECLAT_1, 1);
5805 		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
5806 		    PCIR_SUBBUS_1, 1);
5807 		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
5808 		    PCIR_SECBUS_1, 1);
5809 		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
5810 		    PCIR_PRIBUS_1, 1);
5811 		dinfo->cfg.bridge.br_control = pci_read_config(dev,
5812 		    PCIR_BRIDGECTL_1, 2);
5813 		break;
5814 	case PCIM_HDRTYPE_CARDBUS:
5815 		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
5816 		    PCIR_SECLAT_2, 1);
5817 		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
5818 		    PCIR_SUBBUS_2, 1);
5819 		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
5820 		    PCIR_SECBUS_2, 1);
5821 		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
5822 		    PCIR_PRIBUS_2, 1);
5823 		dinfo->cfg.bridge.br_control = pci_read_config(dev,
5824 		    PCIR_BRIDGECTL_2, 2);
5825 		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_2, 2);
5826 		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_2, 2);
5827 		break;
5828 	}
5829 
5830 	if (dinfo->cfg.pcie.pcie_location != 0)
5831 		pci_cfg_save_pcie(dev, dinfo);
5832 
5833 	if (dinfo->cfg.pcix.pcix_location != 0)
5834 		pci_cfg_save_pcix(dev, dinfo);
5835 
5836 #ifdef PCI_IOV
5837 	if (dinfo->cfg.iov != NULL)
5838 		pci_iov_cfg_save(dev, dinfo);
5839 #endif
5840 
5841 	/*
5842 	 * don't set the state for display devices, base peripherals and
5843 	 * memory devices since bad things happen when they are powered down.
5844 	 * We should (a) have drivers that can easily detach and (b) use
5845 	 * generic drivers for these devices so that some device actually
5846 	 * attaches.  We need to make sure that when we implement (a) we don't
5847 	 * power the device down on a reattach.
5848 	 */
5849 	cls = pci_get_class(dev);
5850 	if (!setstate)
5851 		return;
5852 	switch (pci_do_power_nodriver)
5853 	{
5854 		case 0:		/* NO powerdown at all */
5855 			return;
5856 		case 1:		/* Conservative about what to power down */
5857 			if (cls == PCIC_STORAGE)
5858 				return;
5859 			/*FALLTHROUGH*/
5860 		case 2:		/* Aggressive about what to power down */
5861 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
5862 			    cls == PCIC_BASEPERIPH)
5863 				return;
5864 			/*FALLTHROUGH*/
5865 		case 3:		/* Power down everything */
5866 			break;
5867 	}
5868 	/*
5869 	 * PCI spec says we can only go into D3 state from D0 state.
5870 	 * Transition from D[12] into D0 before going to D3 state.
5871 	 */
5872 	ps = pci_get_powerstate(dev);
5873 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
5874 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5875 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
5876 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
5877 }
5878 
5879 /* Wrapper APIs suitable for device driver use. */
5880 void
5881 pci_save_state(device_t dev)
5882 {
5883 	struct pci_devinfo *dinfo;
5884 
5885 	dinfo = device_get_ivars(dev);
5886 	pci_cfg_save(dev, dinfo, 0);
5887 }
5888 
5889 void
5890 pci_restore_state(device_t dev)
5891 {
5892 	struct pci_devinfo *dinfo;
5893 
5894 	dinfo = device_get_ivars(dev);
5895 	pci_cfg_restore(dev, dinfo);
5896 }
5897 
5898 static int
5899 pci_get_id_method(device_t dev, device_t child, enum pci_id_type type,
5900     uintptr_t *id)
5901 {
5902 
5903 	return (PCIB_GET_ID(device_get_parent(dev), child, type, id));
5904 }
5905 
5906 /* Find the upstream port of a given PCI device in a root complex. */
5907 device_t
5908 pci_find_pcie_root_port(device_t dev)
5909 {
5910 	struct pci_devinfo *dinfo;
5911 	devclass_t pci_class;
5912 	device_t pcib, bus;
5913 
5914 	pci_class = devclass_find("pci");
5915 	KASSERT(device_get_devclass(device_get_parent(dev)) == pci_class,
5916 	    ("%s: non-pci device %s", __func__, device_get_nameunit(dev)));
5917 
5918 	/*
5919 	 * Walk the bridge hierarchy until we find a PCI-e root
5920 	 * port or a non-PCI device.
5921 	 */
5922 	for (;;) {
5923 		bus = device_get_parent(dev);
5924 		KASSERT(bus != NULL, ("%s: null parent of %s", __func__,
5925 		    device_get_nameunit(dev)));
5926 
5927 		pcib = device_get_parent(bus);
5928 		KASSERT(pcib != NULL, ("%s: null bridge of %s", __func__,
5929 		    device_get_nameunit(bus)));
5930 
5931 		/*
5932 		 * pcib's parent must be a PCI bus for this to be a
5933 		 * PCI-PCI bridge.
5934 		 */
5935 		if (device_get_devclass(device_get_parent(pcib)) != pci_class)
5936 			return (NULL);
5937 
5938 		dinfo = device_get_ivars(pcib);
5939 		if (dinfo->cfg.pcie.pcie_location != 0 &&
5940 		    dinfo->cfg.pcie.pcie_type == PCIEM_TYPE_ROOT_PORT)
5941 			return (pcib);
5942 
5943 		dev = pcib;
5944 	}
5945 }
5946 
5947 /*
5948  * Wait for pending transactions to complete on a PCI-express function.
5949  *
5950  * The maximum delay is specified in milliseconds in max_delay.  Note
5951  * that this function may sleep.
5952  *
5953  * Returns true if the function is idle and false if the timeout is
5954  * exceeded.  If dev is not a PCI-express function, this returns true.
5955  */
5956 bool
5957 pcie_wait_for_pending_transactions(device_t dev, u_int max_delay)
5958 {
5959 	struct pci_devinfo *dinfo = device_get_ivars(dev);
5960 	uint16_t sta;
5961 	int cap;
5962 
5963 	cap = dinfo->cfg.pcie.pcie_location;
5964 	if (cap == 0)
5965 		return (true);
5966 
5967 	sta = pci_read_config(dev, cap + PCIER_DEVICE_STA, 2);
5968 	while (sta & PCIEM_STA_TRANSACTION_PND) {
5969 		if (max_delay == 0)
5970 			return (false);
5971 
5972 		/* Poll once every 100 milliseconds up to the timeout. */
5973 		if (max_delay > 100) {
5974 			pause_sbt("pcietp", 100 * SBT_1MS, 0, C_HARDCLOCK);
5975 			max_delay -= 100;
5976 		} else {
5977 			pause_sbt("pcietp", max_delay * SBT_1MS, 0,
5978 			    C_HARDCLOCK);
5979 			max_delay = 0;
5980 		}
5981 		sta = pci_read_config(dev, cap + PCIER_DEVICE_STA, 2);
5982 	}
5983 
5984 	return (true);
5985 }
5986 
5987 /*
5988  * Determine the maximum Completion Timeout in microseconds.
5989  *
5990  * For non-PCI-express functions this returns 0.
5991  */
5992 int
5993 pcie_get_max_completion_timeout(device_t dev)
5994 {
5995 	struct pci_devinfo *dinfo = device_get_ivars(dev);
5996 	int cap;
5997 
5998 	cap = dinfo->cfg.pcie.pcie_location;
5999 	if (cap == 0)
6000 		return (0);
6001 
6002 	/*
6003 	 * Functions using the 1.x spec use the default timeout range of
6004 	 * 50 microseconds to 50 milliseconds.  Functions that do not
6005 	 * support programmable timeouts also use this range.
6006 	 */
6007 	if ((dinfo->cfg.pcie.pcie_flags & PCIEM_FLAGS_VERSION) < 2 ||
6008 	    (pci_read_config(dev, cap + PCIER_DEVICE_CAP2, 4) &
6009 	    PCIEM_CAP2_COMP_TIMO_RANGES) == 0)
6010 		return (50 * 1000);
6011 
6012 	switch (pci_read_config(dev, cap + PCIER_DEVICE_CTL2, 2) &
6013 	    PCIEM_CTL2_COMP_TIMO_VAL) {
6014 	case PCIEM_CTL2_COMP_TIMO_100US:
6015 		return (100);
6016 	case PCIEM_CTL2_COMP_TIMO_10MS:
6017 		return (10 * 1000);
6018 	case PCIEM_CTL2_COMP_TIMO_55MS:
6019 		return (55 * 1000);
6020 	case PCIEM_CTL2_COMP_TIMO_210MS:
6021 		return (210 * 1000);
6022 	case PCIEM_CTL2_COMP_TIMO_900MS:
6023 		return (900 * 1000);
6024 	case PCIEM_CTL2_COMP_TIMO_3500MS:
6025 		return (3500 * 1000);
6026 	case PCIEM_CTL2_COMP_TIMO_13S:
6027 		return (13 * 1000 * 1000);
6028 	case PCIEM_CTL2_COMP_TIMO_64S:
6029 		return (64 * 1000 * 1000);
6030 	default:
6031 		return (50 * 1000);
6032 	}
6033 }
6034 
6035 /*
6036  * Perform a Function Level Reset (FLR) on a device.
6037  *
6038  * This function first waits for any pending transactions to complete
6039  * within the timeout specified by max_delay.  If transactions are
6040  * still pending, the function will return false without attempting a
6041  * reset.
6042  *
6043  * If dev is not a PCI-express function or does not support FLR, this
6044  * function returns false.
6045  *
6046  * Note that no registers are saved or restored.  The caller is
6047  * responsible for saving and restoring any registers including
6048  * PCI-standard registers via pci_save_state() and
6049  * pci_restore_state().
6050  */
6051 bool
6052 pcie_flr(device_t dev, u_int max_delay, bool force)
6053 {
6054 	struct pci_devinfo *dinfo = device_get_ivars(dev);
6055 	uint16_t cmd, ctl;
6056 	int compl_delay;
6057 	int cap;
6058 
6059 	cap = dinfo->cfg.pcie.pcie_location;
6060 	if (cap == 0)
6061 		return (false);
6062 
6063 	if (!(pci_read_config(dev, cap + PCIER_DEVICE_CAP, 4) & PCIEM_CAP_FLR))
6064 		return (false);
6065 
6066 	/*
6067 	 * Disable busmastering to prevent generation of new
6068 	 * transactions while waiting for the device to go idle.  If
6069 	 * the idle timeout fails, the command register is restored
6070 	 * which will re-enable busmastering.
6071 	 */
6072 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
6073 	pci_write_config(dev, PCIR_COMMAND, cmd & ~(PCIM_CMD_BUSMASTEREN), 2);
6074 	if (!pcie_wait_for_pending_transactions(dev, max_delay)) {
6075 		if (!force) {
6076 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
6077 			return (false);
6078 		}
6079 		pci_printf(&dinfo->cfg,
6080 		    "Resetting with transactions pending after %d ms\n",
6081 		    max_delay);
6082 
6083 		/*
6084 		 * Extend the post-FLR delay to cover the maximum
6085 		 * Completion Timeout delay of anything in flight
6086 		 * during the FLR delay.  Enforce a minimum delay of
6087 		 * at least 10ms.
6088 		 */
6089 		compl_delay = pcie_get_max_completion_timeout(dev) / 1000;
6090 		if (compl_delay < 10)
6091 			compl_delay = 10;
6092 	} else
6093 		compl_delay = 0;
6094 
6095 	/* Initiate the reset. */
6096 	ctl = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
6097 	pci_write_config(dev, cap + PCIER_DEVICE_CTL, ctl |
6098 	    PCIEM_CTL_INITIATE_FLR, 2);
6099 
6100 	/* Wait for 100ms. */
6101 	pause_sbt("pcieflr", (100 + compl_delay) * SBT_1MS, 0, C_HARDCLOCK);
6102 
6103 	if (pci_read_config(dev, cap + PCIER_DEVICE_STA, 2) &
6104 	    PCIEM_STA_TRANSACTION_PND)
6105 		pci_printf(&dinfo->cfg, "Transactions pending after FLR!\n");
6106 	return (true);
6107 }
6108