xref: /freebsd/sys/dev/pci/pci.c (revision eaa797943eeac5614edfdc8f6309f332343c3dd2)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/limits.h>
39 #include <sys/linker.h>
40 #include <sys/fcntl.h>
41 #include <sys/conf.h>
42 #include <sys/kernel.h>
43 #include <sys/queue.h>
44 #include <sys/sysctl.h>
45 #include <sys/endian.h>
46 
47 #include <vm/vm.h>
48 #include <vm/pmap.h>
49 #include <vm/vm_extern.h>
50 
51 #include <sys/bus.h>
52 #include <machine/bus.h>
53 #include <sys/rman.h>
54 #include <machine/resource.h>
55 #include <machine/stdarg.h>
56 
57 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
58 #include <machine/intr_machdep.h>
59 #endif
60 
61 #include <sys/pciio.h>
62 #include <dev/pci/pcireg.h>
63 #include <dev/pci/pcivar.h>
64 #include <dev/pci/pci_private.h>
65 
66 #ifdef PCI_IOV
67 #include <sys/nv.h>
68 #include <dev/pci/pci_iov_private.h>
69 #endif
70 
71 #include <dev/usb/controller/xhcireg.h>
72 #include <dev/usb/controller/ehcireg.h>
73 #include <dev/usb/controller/ohcireg.h>
74 #include <dev/usb/controller/uhcireg.h>
75 
76 #include "pcib_if.h"
77 #include "pci_if.h"
78 
79 #define	PCIR_IS_BIOS(cfg, reg)						\
80 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
81 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
82 
83 static int		pci_has_quirk(uint32_t devid, int quirk);
84 static pci_addr_t	pci_mapbase(uint64_t mapreg);
85 static const char	*pci_maptype(uint64_t mapreg);
86 static int		pci_maprange(uint64_t mapreg);
87 static pci_addr_t	pci_rombase(uint64_t mapreg);
88 static int		pci_romsize(uint64_t testval);
89 static void		pci_fixancient(pcicfgregs *cfg);
90 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
91 
92 static int		pci_porten(device_t dev);
93 static int		pci_memen(device_t dev);
94 static void		pci_assign_interrupt(device_t bus, device_t dev,
95 			    int force_route);
96 static int		pci_add_map(device_t bus, device_t dev, int reg,
97 			    struct resource_list *rl, int force, int prefetch);
98 static int		pci_probe(device_t dev);
99 static int		pci_attach(device_t dev);
100 static int		pci_detach(device_t dev);
101 static void		pci_load_vendor_data(void);
102 static int		pci_describe_parse_line(char **ptr, int *vendor,
103 			    int *device, char **desc);
104 static char		*pci_describe_device(device_t dev);
105 static int		pci_modevent(module_t mod, int what, void *arg);
106 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
107 			    pcicfgregs *cfg);
108 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
109 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
110 			    int reg, uint32_t *data);
111 #if 0
112 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
113 			    int reg, uint32_t data);
114 #endif
115 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
116 static void		pci_mask_msix(device_t dev, u_int index);
117 static void		pci_unmask_msix(device_t dev, u_int index);
118 static int		pci_msi_blacklisted(void);
119 static int		pci_msix_blacklisted(void);
120 static void		pci_resume_msi(device_t dev);
121 static void		pci_resume_msix(device_t dev);
122 static int		pci_remap_intr_method(device_t bus, device_t dev,
123 			    u_int irq);
124 
125 static int		pci_get_id_method(device_t dev, device_t child,
126 			    enum pci_id_type type, uintptr_t *rid);
127 
128 static struct pci_devinfo * pci_fill_devinfo(device_t pcib, device_t bus, int d,
129     int b, int s, int f, uint16_t vid, uint16_t did);
130 
131 static device_method_t pci_methods[] = {
132 	/* Device interface */
133 	DEVMETHOD(device_probe,		pci_probe),
134 	DEVMETHOD(device_attach,	pci_attach),
135 	DEVMETHOD(device_detach,	pci_detach),
136 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
137 	DEVMETHOD(device_suspend,	bus_generic_suspend),
138 	DEVMETHOD(device_resume,	pci_resume),
139 
140 	/* Bus interface */
141 	DEVMETHOD(bus_print_child,	pci_print_child),
142 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
143 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
144 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
145 	DEVMETHOD(bus_driver_added,	pci_driver_added),
146 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
147 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
148 
149 	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
150 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
151 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
152 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
153 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
154 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
155 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
156 	DEVMETHOD(bus_release_resource,	pci_release_resource),
157 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
158 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
159 	DEVMETHOD(bus_child_deleted,	pci_child_deleted),
160 	DEVMETHOD(bus_child_detached,	pci_child_detached),
161 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
162 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
163 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
164 	DEVMETHOD(bus_suspend_child,	pci_suspend_child),
165 	DEVMETHOD(bus_resume_child,	pci_resume_child),
166 	DEVMETHOD(bus_rescan,		pci_rescan_method),
167 
168 	/* PCI interface */
169 	DEVMETHOD(pci_read_config,	pci_read_config_method),
170 	DEVMETHOD(pci_write_config,	pci_write_config_method),
171 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
172 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
173 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
174 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
175 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
176 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
177 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
178 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
179 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
180 	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
181 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
182 	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
183 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
184 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
185 	DEVMETHOD(pci_enable_msi,	pci_enable_msi_method),
186 	DEVMETHOD(pci_enable_msix,	pci_enable_msix_method),
187 	DEVMETHOD(pci_disable_msi,	pci_disable_msi_method),
188 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
189 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
190 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
191 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
192 	DEVMETHOD(pci_msix_pba_bar,	pci_msix_pba_bar_method),
193 	DEVMETHOD(pci_msix_table_bar,	pci_msix_table_bar_method),
194 	DEVMETHOD(pci_get_id,		pci_get_id_method),
195 	DEVMETHOD(pci_alloc_devinfo,	pci_alloc_devinfo_method),
196 	DEVMETHOD(pci_child_added,	pci_child_added_method),
197 #ifdef PCI_IOV
198 	DEVMETHOD(pci_iov_attach,	pci_iov_attach_method),
199 	DEVMETHOD(pci_iov_detach,	pci_iov_detach_method),
200 	DEVMETHOD(pci_create_iov_child,	pci_create_iov_child_method),
201 #endif
202 
203 	DEVMETHOD_END
204 };
205 
206 DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
207 
208 static devclass_t pci_devclass;
209 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
210 MODULE_VERSION(pci, 1);
211 
212 static char	*pci_vendordata;
213 static size_t	pci_vendordata_size;
214 
215 struct pci_quirk {
216 	uint32_t devid;	/* Vendor/device of the card */
217 	int	type;
218 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
219 #define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
220 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
221 #define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
222 #define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
223 #define	PCI_QUIRK_MSI_INTX_BUG	6 /* PCIM_CMD_INTxDIS disables MSI */
224 	int	arg1;
225 	int	arg2;
226 };
227 
228 static const struct pci_quirk pci_quirks[] = {
229 	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
230 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
231 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
232 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
233 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
234 
235 	/*
236 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
237 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
238 	 */
239 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
240 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
241 
242 	/*
243 	 * MSI doesn't work on earlier Intel chipsets including
244 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
245 	 */
246 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
247 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
248 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
249 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
250 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
251 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
252 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
253 
254 	/*
255 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
256 	 * bridge.
257 	 */
258 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
259 
260 	/*
261 	 * MSI-X allocation doesn't work properly for devices passed through
262 	 * by VMware up to at least ESXi 5.1.
263 	 */
264 	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
265 	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
266 
267 	/*
268 	 * Some virtualization environments emulate an older chipset
269 	 * but support MSI just fine.  QEMU uses the Intel 82440.
270 	 */
271 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
272 
273 	/*
274 	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
275 	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
276 	 * It prevents us from attaching hpet(4) when the bit is unset.
277 	 * Note this quirk only affects SB600 revision A13 and earlier.
278 	 * For SB600 A21 and later, firmware must set the bit to hide it.
279 	 * For SB700 and later, it is unused and hardcoded to zero.
280 	 */
281 	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
282 
283 	/*
284 	 * Atheros AR8161/AR8162/E2200/E2400 Ethernet controllers have a
285 	 * bug that MSI interrupt does not assert if PCIM_CMD_INTxDIS bit
286 	 * of the command register is set.
287 	 */
288 	{ 0x10911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
289 	{ 0xE0911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
290 	{ 0xE0A11969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
291 	{ 0x10901969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
292 
293 	/*
294 	 * Broadcom BCM5714(S)/BCM5715(S)/BCM5780(S) Ethernet MACs don't
295 	 * issue MSI interrupts with PCIM_CMD_INTxDIS set either.
296 	 */
297 	{ 0x166814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714 */
298 	{ 0x166914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714S */
299 	{ 0x166a14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780 */
300 	{ 0x166b14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780S */
301 	{ 0x167814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715 */
302 	{ 0x167914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715S */
303 
304 	{ 0 }
305 };
306 
307 /* map register information */
308 #define	PCI_MAPMEM	0x01	/* memory map */
309 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
310 #define	PCI_MAPPORT	0x04	/* port map */
311 
312 struct devlist pci_devq;
313 uint32_t pci_generation;
314 uint32_t pci_numdevs = 0;
315 static int pcie_chipset, pcix_chipset;
316 
317 /* sysctl vars */
318 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
319 
320 static int pci_enable_io_modes = 1;
321 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RWTUN,
322     &pci_enable_io_modes, 1,
323     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
324 enable these bits correctly.  We'd like to do this all the time, but there\n\
325 are some peripherals that this causes problems with.");
326 
327 static int pci_do_realloc_bars = 0;
328 SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RWTUN,
329     &pci_do_realloc_bars, 0,
330     "Attempt to allocate a new range for any BARs whose original "
331     "firmware-assigned ranges fail to allocate during the initial device scan.");
332 
333 static int pci_do_power_nodriver = 0;
334 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RWTUN,
335     &pci_do_power_nodriver, 0,
336   "Place a function into D3 state when no driver attaches to it.  0 means\n\
337 disable.  1 means conservatively place devices into D3 state.  2 means\n\
338 aggressively place devices into D3 state.  3 means put absolutely everything\n\
339 in D3 state.");
340 
341 int pci_do_power_resume = 1;
342 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RWTUN,
343     &pci_do_power_resume, 1,
344   "Transition from D3 -> D0 on resume.");
345 
346 int pci_do_power_suspend = 1;
347 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RWTUN,
348     &pci_do_power_suspend, 1,
349   "Transition from D0 -> D3 on suspend.");
350 
351 static int pci_do_msi = 1;
352 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RWTUN, &pci_do_msi, 1,
353     "Enable support for MSI interrupts");
354 
355 static int pci_do_msix = 1;
356 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RWTUN, &pci_do_msix, 1,
357     "Enable support for MSI-X interrupts");
358 
359 static int pci_honor_msi_blacklist = 1;
360 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RDTUN,
361     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
362 
363 #if defined(__i386__) || defined(__amd64__)
364 static int pci_usb_takeover = 1;
365 #else
366 static int pci_usb_takeover = 0;
367 #endif
368 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
369     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
370 Disable this if you depend on BIOS emulation of USB devices, that is\n\
371 you use USB devices (like keyboard or mouse) but do not load USB drivers");
372 
373 static int pci_clear_bars;
374 SYSCTL_INT(_hw_pci, OID_AUTO, clear_bars, CTLFLAG_RDTUN, &pci_clear_bars, 0,
375     "Ignore firmware-assigned resources for BARs.");
376 
377 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
378 static int pci_clear_buses;
379 SYSCTL_INT(_hw_pci, OID_AUTO, clear_buses, CTLFLAG_RDTUN, &pci_clear_buses, 0,
380     "Ignore firmware-assigned bus numbers.");
381 #endif
382 
383 static int pci_enable_ari = 1;
384 SYSCTL_INT(_hw_pci, OID_AUTO, enable_ari, CTLFLAG_RDTUN, &pci_enable_ari,
385     0, "Enable support for PCIe Alternative RID Interpretation");
386 
387 static int
388 pci_has_quirk(uint32_t devid, int quirk)
389 {
390 	const struct pci_quirk *q;
391 
392 	for (q = &pci_quirks[0]; q->devid; q++) {
393 		if (q->devid == devid && q->type == quirk)
394 			return (1);
395 	}
396 	return (0);
397 }
398 
399 /* Find a device_t by bus/slot/function in domain 0 */
400 
401 device_t
402 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
403 {
404 
405 	return (pci_find_dbsf(0, bus, slot, func));
406 }
407 
408 /* Find a device_t by domain/bus/slot/function */
409 
410 device_t
411 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
412 {
413 	struct pci_devinfo *dinfo;
414 
415 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
416 		if ((dinfo->cfg.domain == domain) &&
417 		    (dinfo->cfg.bus == bus) &&
418 		    (dinfo->cfg.slot == slot) &&
419 		    (dinfo->cfg.func == func)) {
420 			return (dinfo->cfg.dev);
421 		}
422 	}
423 
424 	return (NULL);
425 }
426 
427 /* Find a device_t by vendor/device ID */
428 
429 device_t
430 pci_find_device(uint16_t vendor, uint16_t device)
431 {
432 	struct pci_devinfo *dinfo;
433 
434 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
435 		if ((dinfo->cfg.vendor == vendor) &&
436 		    (dinfo->cfg.device == device)) {
437 			return (dinfo->cfg.dev);
438 		}
439 	}
440 
441 	return (NULL);
442 }
443 
444 device_t
445 pci_find_class(uint8_t class, uint8_t subclass)
446 {
447 	struct pci_devinfo *dinfo;
448 
449 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
450 		if (dinfo->cfg.baseclass == class &&
451 		    dinfo->cfg.subclass == subclass) {
452 			return (dinfo->cfg.dev);
453 		}
454 	}
455 
456 	return (NULL);
457 }
458 
459 static int
460 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
461 {
462 	va_list ap;
463 	int retval;
464 
465 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
466 	    cfg->func);
467 	va_start(ap, fmt);
468 	retval += vprintf(fmt, ap);
469 	va_end(ap);
470 	return (retval);
471 }
472 
473 /* return base address of memory or port map */
474 
475 static pci_addr_t
476 pci_mapbase(uint64_t mapreg)
477 {
478 
479 	if (PCI_BAR_MEM(mapreg))
480 		return (mapreg & PCIM_BAR_MEM_BASE);
481 	else
482 		return (mapreg & PCIM_BAR_IO_BASE);
483 }
484 
485 /* return map type of memory or port map */
486 
487 static const char *
488 pci_maptype(uint64_t mapreg)
489 {
490 
491 	if (PCI_BAR_IO(mapreg))
492 		return ("I/O Port");
493 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
494 		return ("Prefetchable Memory");
495 	return ("Memory");
496 }
497 
498 /* return log2 of map size decoded for memory or port map */
499 
500 int
501 pci_mapsize(uint64_t testval)
502 {
503 	int ln2size;
504 
505 	testval = pci_mapbase(testval);
506 	ln2size = 0;
507 	if (testval != 0) {
508 		while ((testval & 1) == 0)
509 		{
510 			ln2size++;
511 			testval >>= 1;
512 		}
513 	}
514 	return (ln2size);
515 }
516 
517 /* return base address of device ROM */
518 
519 static pci_addr_t
520 pci_rombase(uint64_t mapreg)
521 {
522 
523 	return (mapreg & PCIM_BIOS_ADDR_MASK);
524 }
525 
526 /* return log2 of map size decided for device ROM */
527 
528 static int
529 pci_romsize(uint64_t testval)
530 {
531 	int ln2size;
532 
533 	testval = pci_rombase(testval);
534 	ln2size = 0;
535 	if (testval != 0) {
536 		while ((testval & 1) == 0)
537 		{
538 			ln2size++;
539 			testval >>= 1;
540 		}
541 	}
542 	return (ln2size);
543 }
544 
545 /* return log2 of address range supported by map register */
546 
547 static int
548 pci_maprange(uint64_t mapreg)
549 {
550 	int ln2range = 0;
551 
552 	if (PCI_BAR_IO(mapreg))
553 		ln2range = 32;
554 	else
555 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
556 		case PCIM_BAR_MEM_32:
557 			ln2range = 32;
558 			break;
559 		case PCIM_BAR_MEM_1MB:
560 			ln2range = 20;
561 			break;
562 		case PCIM_BAR_MEM_64:
563 			ln2range = 64;
564 			break;
565 		}
566 	return (ln2range);
567 }
568 
569 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
570 
571 static void
572 pci_fixancient(pcicfgregs *cfg)
573 {
574 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
575 		return;
576 
577 	/* PCI to PCI bridges use header type 1 */
578 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
579 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
580 }
581 
582 /* extract header type specific config data */
583 
584 static void
585 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
586 {
587 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
588 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
589 	case PCIM_HDRTYPE_NORMAL:
590 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
591 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
592 		cfg->mingnt         = REG(PCIR_MINGNT, 1);
593 		cfg->maxlat         = REG(PCIR_MAXLAT, 1);
594 		cfg->nummaps	    = PCI_MAXMAPS_0;
595 		break;
596 	case PCIM_HDRTYPE_BRIDGE:
597 		cfg->bridge.br_seclat = REG(PCIR_SECLAT_1, 1);
598 		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_1, 1);
599 		cfg->bridge.br_secbus = REG(PCIR_SECBUS_1, 1);
600 		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_1, 1);
601 		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_1, 2);
602 		cfg->nummaps	    = PCI_MAXMAPS_1;
603 		break;
604 	case PCIM_HDRTYPE_CARDBUS:
605 		cfg->bridge.br_seclat = REG(PCIR_SECLAT_2, 1);
606 		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_2, 1);
607 		cfg->bridge.br_secbus = REG(PCIR_SECBUS_2, 1);
608 		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_2, 1);
609 		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_2, 2);
610 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
611 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
612 		cfg->nummaps	    = PCI_MAXMAPS_2;
613 		break;
614 	}
615 #undef REG
616 }
617 
618 /* read configuration header into pcicfgregs structure */
619 struct pci_devinfo *
620 pci_read_device(device_t pcib, device_t bus, int d, int b, int s, int f)
621 {
622 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
623 	uint16_t vid, did;
624 
625 	vid = REG(PCIR_VENDOR, 2);
626 	did = REG(PCIR_DEVICE, 2);
627 	if (vid != 0xffff)
628 		return (pci_fill_devinfo(pcib, bus, d, b, s, f, vid, did));
629 
630 	return (NULL);
631 }
632 
633 struct pci_devinfo *
634 pci_alloc_devinfo_method(device_t dev)
635 {
636 
637 	return (malloc(sizeof(struct pci_devinfo), M_DEVBUF,
638 	    M_WAITOK | M_ZERO));
639 }
640 
641 static struct pci_devinfo *
642 pci_fill_devinfo(device_t pcib, device_t bus, int d, int b, int s, int f,
643     uint16_t vid, uint16_t did)
644 {
645 	struct pci_devinfo *devlist_entry;
646 	pcicfgregs *cfg;
647 
648 	devlist_entry = PCI_ALLOC_DEVINFO(bus);
649 
650 	cfg = &devlist_entry->cfg;
651 
652 	cfg->domain		= d;
653 	cfg->bus		= b;
654 	cfg->slot		= s;
655 	cfg->func		= f;
656 	cfg->vendor		= vid;
657 	cfg->device		= did;
658 	cfg->cmdreg		= REG(PCIR_COMMAND, 2);
659 	cfg->statreg		= REG(PCIR_STATUS, 2);
660 	cfg->baseclass		= REG(PCIR_CLASS, 1);
661 	cfg->subclass		= REG(PCIR_SUBCLASS, 1);
662 	cfg->progif		= REG(PCIR_PROGIF, 1);
663 	cfg->revid		= REG(PCIR_REVID, 1);
664 	cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
665 	cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
666 	cfg->lattimer		= REG(PCIR_LATTIMER, 1);
667 	cfg->intpin		= REG(PCIR_INTPIN, 1);
668 	cfg->intline		= REG(PCIR_INTLINE, 1);
669 
670 	cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
671 	cfg->hdrtype		&= ~PCIM_MFDEV;
672 	STAILQ_INIT(&cfg->maps);
673 
674 	cfg->iov		= NULL;
675 
676 	pci_fixancient(cfg);
677 	pci_hdrtypedata(pcib, b, s, f, cfg);
678 
679 	if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
680 		pci_read_cap(pcib, cfg);
681 
682 	STAILQ_INSERT_TAIL(&pci_devq, devlist_entry, pci_links);
683 
684 	devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
685 	devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
686 	devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
687 	devlist_entry->conf.pc_sel.pc_func = cfg->func;
688 	devlist_entry->conf.pc_hdr = cfg->hdrtype;
689 
690 	devlist_entry->conf.pc_subvendor = cfg->subvendor;
691 	devlist_entry->conf.pc_subdevice = cfg->subdevice;
692 	devlist_entry->conf.pc_vendor = cfg->vendor;
693 	devlist_entry->conf.pc_device = cfg->device;
694 
695 	devlist_entry->conf.pc_class = cfg->baseclass;
696 	devlist_entry->conf.pc_subclass = cfg->subclass;
697 	devlist_entry->conf.pc_progif = cfg->progif;
698 	devlist_entry->conf.pc_revid = cfg->revid;
699 
700 	pci_numdevs++;
701 	pci_generation++;
702 
703 	return (devlist_entry);
704 }
705 #undef REG
706 
707 static void
708 pci_ea_fill_info(device_t pcib, pcicfgregs *cfg)
709 {
710 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, \
711     cfg->ea.ea_location + (n), w)
712 	int num_ent;
713 	int ptr;
714 	int a, b;
715 	uint32_t val;
716 	int ent_size;
717 	uint32_t dw[4];
718 	uint64_t base, max_offset;
719 	struct pci_ea_entry *eae;
720 
721 	if (cfg->ea.ea_location == 0)
722 		return;
723 
724 	STAILQ_INIT(&cfg->ea.ea_entries);
725 
726 	/* Determine the number of entries */
727 	num_ent = REG(PCIR_EA_NUM_ENT, 2);
728 	num_ent &= PCIM_EA_NUM_ENT_MASK;
729 
730 	/* Find the first entry to care of */
731 	ptr = PCIR_EA_FIRST_ENT;
732 
733 	/* Skip DWORD 2 for type 1 functions */
734 	if ((cfg->hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_BRIDGE)
735 		ptr += 4;
736 
737 	for (a = 0; a < num_ent; a++) {
738 
739 		eae = malloc(sizeof(*eae), M_DEVBUF, M_WAITOK | M_ZERO);
740 		eae->eae_cfg_offset = cfg->ea.ea_location + ptr;
741 
742 		/* Read a number of dwords in the entry */
743 		val = REG(ptr, 4);
744 		ptr += 4;
745 		ent_size = (val & PCIM_EA_ES);
746 
747 		for (b = 0; b < ent_size; b++) {
748 			dw[b] = REG(ptr, 4);
749 			ptr += 4;
750 		}
751 
752 		eae->eae_flags = val;
753 		eae->eae_bei = (PCIM_EA_BEI & val) >> PCIM_EA_BEI_OFFSET;
754 
755 		base = dw[0] & PCIM_EA_FIELD_MASK;
756 		max_offset = dw[1] | ~PCIM_EA_FIELD_MASK;
757 		b = 2;
758 		if (((dw[0] & PCIM_EA_IS_64) != 0) && (b < ent_size)) {
759 			base |= (uint64_t)dw[b] << 32UL;
760 			b++;
761 		}
762 		if (((dw[1] & PCIM_EA_IS_64) != 0)
763 		    && (b < ent_size)) {
764 			max_offset |= (uint64_t)dw[b] << 32UL;
765 			b++;
766 		}
767 
768 		eae->eae_base = base;
769 		eae->eae_max_offset = max_offset;
770 
771 		STAILQ_INSERT_TAIL(&cfg->ea.ea_entries, eae, eae_link);
772 
773 		if (bootverbose) {
774 			printf("PCI(EA) dev %04x:%04x, bei %d, flags #%x, base #%jx, max_offset #%jx\n",
775 			    cfg->vendor, cfg->device, eae->eae_bei, eae->eae_flags,
776 			    (uintmax_t)eae->eae_base, (uintmax_t)eae->eae_max_offset);
777 		}
778 	}
779 }
780 #undef REG
781 
782 static void
783 pci_read_cap(device_t pcib, pcicfgregs *cfg)
784 {
785 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
786 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
787 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
788 	uint64_t addr;
789 #endif
790 	uint32_t val;
791 	int	ptr, nextptr, ptrptr;
792 
793 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
794 	case PCIM_HDRTYPE_NORMAL:
795 	case PCIM_HDRTYPE_BRIDGE:
796 		ptrptr = PCIR_CAP_PTR;
797 		break;
798 	case PCIM_HDRTYPE_CARDBUS:
799 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
800 		break;
801 	default:
802 		return;		/* no extended capabilities support */
803 	}
804 	nextptr = REG(ptrptr, 1);	/* sanity check? */
805 
806 	/*
807 	 * Read capability entries.
808 	 */
809 	while (nextptr != 0) {
810 		/* Sanity check */
811 		if (nextptr > 255) {
812 			printf("illegal PCI extended capability offset %d\n",
813 			    nextptr);
814 			return;
815 		}
816 		/* Find the next entry */
817 		ptr = nextptr;
818 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
819 
820 		/* Process this entry */
821 		switch (REG(ptr + PCICAP_ID, 1)) {
822 		case PCIY_PMG:		/* PCI power management */
823 			if (cfg->pp.pp_cap == 0) {
824 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
825 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
826 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
827 				if ((nextptr - ptr) > PCIR_POWER_DATA)
828 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
829 			}
830 			break;
831 		case PCIY_HT:		/* HyperTransport */
832 			/* Determine HT-specific capability type. */
833 			val = REG(ptr + PCIR_HT_COMMAND, 2);
834 
835 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
836 				cfg->ht.ht_slave = ptr;
837 
838 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
839 			switch (val & PCIM_HTCMD_CAP_MASK) {
840 			case PCIM_HTCAP_MSI_MAPPING:
841 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
842 					/* Sanity check the mapping window. */
843 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
844 					    4);
845 					addr <<= 32;
846 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
847 					    4);
848 					if (addr != MSI_INTEL_ADDR_BASE)
849 						device_printf(pcib,
850 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
851 						    cfg->domain, cfg->bus,
852 						    cfg->slot, cfg->func,
853 						    (long long)addr);
854 				} else
855 					addr = MSI_INTEL_ADDR_BASE;
856 
857 				cfg->ht.ht_msimap = ptr;
858 				cfg->ht.ht_msictrl = val;
859 				cfg->ht.ht_msiaddr = addr;
860 				break;
861 			}
862 #endif
863 			break;
864 		case PCIY_MSI:		/* PCI MSI */
865 			cfg->msi.msi_location = ptr;
866 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
867 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
868 						     PCIM_MSICTRL_MMC_MASK)>>1);
869 			break;
870 		case PCIY_MSIX:		/* PCI MSI-X */
871 			cfg->msix.msix_location = ptr;
872 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
873 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
874 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
875 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
876 			cfg->msix.msix_table_bar = PCIR_BAR(val &
877 			    PCIM_MSIX_BIR_MASK);
878 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
879 			val = REG(ptr + PCIR_MSIX_PBA, 4);
880 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
881 			    PCIM_MSIX_BIR_MASK);
882 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
883 			break;
884 		case PCIY_VPD:		/* PCI Vital Product Data */
885 			cfg->vpd.vpd_reg = ptr;
886 			break;
887 		case PCIY_SUBVENDOR:
888 			/* Should always be true. */
889 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
890 			    PCIM_HDRTYPE_BRIDGE) {
891 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
892 				cfg->subvendor = val & 0xffff;
893 				cfg->subdevice = val >> 16;
894 			}
895 			break;
896 		case PCIY_PCIX:		/* PCI-X */
897 			/*
898 			 * Assume we have a PCI-X chipset if we have
899 			 * at least one PCI-PCI bridge with a PCI-X
900 			 * capability.  Note that some systems with
901 			 * PCI-express or HT chipsets might match on
902 			 * this check as well.
903 			 */
904 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
905 			    PCIM_HDRTYPE_BRIDGE)
906 				pcix_chipset = 1;
907 			cfg->pcix.pcix_location = ptr;
908 			break;
909 		case PCIY_EXPRESS:	/* PCI-express */
910 			/*
911 			 * Assume we have a PCI-express chipset if we have
912 			 * at least one PCI-express device.
913 			 */
914 			pcie_chipset = 1;
915 			cfg->pcie.pcie_location = ptr;
916 			val = REG(ptr + PCIER_FLAGS, 2);
917 			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
918 			break;
919 		case PCIY_EA:		/* Enhanced Allocation */
920 			cfg->ea.ea_location = ptr;
921 			pci_ea_fill_info(pcib, cfg);
922 			break;
923 		default:
924 			break;
925 		}
926 	}
927 
928 #if defined(__powerpc__)
929 	/*
930 	 * Enable the MSI mapping window for all HyperTransport
931 	 * slaves.  PCI-PCI bridges have their windows enabled via
932 	 * PCIB_MAP_MSI().
933 	 */
934 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
935 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
936 		device_printf(pcib,
937 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
938 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
939 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
940 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
941 		     2);
942 	}
943 #endif
944 /* REG and WREG use carry through to next functions */
945 }
946 
947 /*
948  * PCI Vital Product Data
949  */
950 
951 #define	PCI_VPD_TIMEOUT		1000000
952 
953 static int
954 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
955 {
956 	int count = PCI_VPD_TIMEOUT;
957 
958 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
959 
960 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
961 
962 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
963 		if (--count < 0)
964 			return (ENXIO);
965 		DELAY(1);	/* limit looping */
966 	}
967 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
968 
969 	return (0);
970 }
971 
972 #if 0
973 static int
974 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
975 {
976 	int count = PCI_VPD_TIMEOUT;
977 
978 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
979 
980 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
981 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
982 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
983 		if (--count < 0)
984 			return (ENXIO);
985 		DELAY(1);	/* limit looping */
986 	}
987 
988 	return (0);
989 }
990 #endif
991 
992 #undef PCI_VPD_TIMEOUT
993 
994 struct vpd_readstate {
995 	device_t	pcib;
996 	pcicfgregs	*cfg;
997 	uint32_t	val;
998 	int		bytesinval;
999 	int		off;
1000 	uint8_t		cksum;
1001 };
1002 
1003 static int
1004 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
1005 {
1006 	uint32_t reg;
1007 	uint8_t byte;
1008 
1009 	if (vrs->bytesinval == 0) {
1010 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
1011 			return (ENXIO);
1012 		vrs->val = le32toh(reg);
1013 		vrs->off += 4;
1014 		byte = vrs->val & 0xff;
1015 		vrs->bytesinval = 3;
1016 	} else {
1017 		vrs->val = vrs->val >> 8;
1018 		byte = vrs->val & 0xff;
1019 		vrs->bytesinval--;
1020 	}
1021 
1022 	vrs->cksum += byte;
1023 	*data = byte;
1024 	return (0);
1025 }
1026 
1027 static void
1028 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
1029 {
1030 	struct vpd_readstate vrs;
1031 	int state;
1032 	int name;
1033 	int remain;
1034 	int i;
1035 	int alloc, off;		/* alloc/off for RO/W arrays */
1036 	int cksumvalid;
1037 	int dflen;
1038 	uint8_t byte;
1039 	uint8_t byte2;
1040 
1041 	/* init vpd reader */
1042 	vrs.bytesinval = 0;
1043 	vrs.off = 0;
1044 	vrs.pcib = pcib;
1045 	vrs.cfg = cfg;
1046 	vrs.cksum = 0;
1047 
1048 	state = 0;
1049 	name = remain = i = 0;	/* shut up stupid gcc */
1050 	alloc = off = 0;	/* shut up stupid gcc */
1051 	dflen = 0;		/* shut up stupid gcc */
1052 	cksumvalid = -1;
1053 	while (state >= 0) {
1054 		if (vpd_nextbyte(&vrs, &byte)) {
1055 			state = -2;
1056 			break;
1057 		}
1058 #if 0
1059 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
1060 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
1061 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
1062 #endif
1063 		switch (state) {
1064 		case 0:		/* item name */
1065 			if (byte & 0x80) {
1066 				if (vpd_nextbyte(&vrs, &byte2)) {
1067 					state = -2;
1068 					break;
1069 				}
1070 				remain = byte2;
1071 				if (vpd_nextbyte(&vrs, &byte2)) {
1072 					state = -2;
1073 					break;
1074 				}
1075 				remain |= byte2 << 8;
1076 				if (remain > (0x7f*4 - vrs.off)) {
1077 					state = -1;
1078 					pci_printf(cfg,
1079 					    "invalid VPD data, remain %#x\n",
1080 					    remain);
1081 				}
1082 				name = byte & 0x7f;
1083 			} else {
1084 				remain = byte & 0x7;
1085 				name = (byte >> 3) & 0xf;
1086 			}
1087 			switch (name) {
1088 			case 0x2:	/* String */
1089 				cfg->vpd.vpd_ident = malloc(remain + 1,
1090 				    M_DEVBUF, M_WAITOK);
1091 				i = 0;
1092 				state = 1;
1093 				break;
1094 			case 0xf:	/* End */
1095 				state = -1;
1096 				break;
1097 			case 0x10:	/* VPD-R */
1098 				alloc = 8;
1099 				off = 0;
1100 				cfg->vpd.vpd_ros = malloc(alloc *
1101 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1102 				    M_WAITOK | M_ZERO);
1103 				state = 2;
1104 				break;
1105 			case 0x11:	/* VPD-W */
1106 				alloc = 8;
1107 				off = 0;
1108 				cfg->vpd.vpd_w = malloc(alloc *
1109 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1110 				    M_WAITOK | M_ZERO);
1111 				state = 5;
1112 				break;
1113 			default:	/* Invalid data, abort */
1114 				state = -1;
1115 				break;
1116 			}
1117 			break;
1118 
1119 		case 1:	/* Identifier String */
1120 			cfg->vpd.vpd_ident[i++] = byte;
1121 			remain--;
1122 			if (remain == 0)  {
1123 				cfg->vpd.vpd_ident[i] = '\0';
1124 				state = 0;
1125 			}
1126 			break;
1127 
1128 		case 2:	/* VPD-R Keyword Header */
1129 			if (off == alloc) {
1130 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1131 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1132 				    M_DEVBUF, M_WAITOK | M_ZERO);
1133 			}
1134 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1135 			if (vpd_nextbyte(&vrs, &byte2)) {
1136 				state = -2;
1137 				break;
1138 			}
1139 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1140 			if (vpd_nextbyte(&vrs, &byte2)) {
1141 				state = -2;
1142 				break;
1143 			}
1144 			cfg->vpd.vpd_ros[off].len = dflen = byte2;
1145 			if (dflen == 0 &&
1146 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1147 			    2) == 0) {
1148 				/*
1149 				 * if this happens, we can't trust the rest
1150 				 * of the VPD.
1151 				 */
1152 				pci_printf(cfg, "bad keyword length: %d\n",
1153 				    dflen);
1154 				cksumvalid = 0;
1155 				state = -1;
1156 				break;
1157 			} else if (dflen == 0) {
1158 				cfg->vpd.vpd_ros[off].value = malloc(1 *
1159 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1160 				    M_DEVBUF, M_WAITOK);
1161 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1162 			} else
1163 				cfg->vpd.vpd_ros[off].value = malloc(
1164 				    (dflen + 1) *
1165 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1166 				    M_DEVBUF, M_WAITOK);
1167 			remain -= 3;
1168 			i = 0;
1169 			/* keep in sync w/ state 3's transistions */
1170 			if (dflen == 0 && remain == 0)
1171 				state = 0;
1172 			else if (dflen == 0)
1173 				state = 2;
1174 			else
1175 				state = 3;
1176 			break;
1177 
1178 		case 3:	/* VPD-R Keyword Value */
1179 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1180 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1181 			    "RV", 2) == 0 && cksumvalid == -1) {
1182 				if (vrs.cksum == 0)
1183 					cksumvalid = 1;
1184 				else {
1185 					if (bootverbose)
1186 						pci_printf(cfg,
1187 					    "bad VPD cksum, remain %hhu\n",
1188 						    vrs.cksum);
1189 					cksumvalid = 0;
1190 					state = -1;
1191 					break;
1192 				}
1193 			}
1194 			dflen--;
1195 			remain--;
1196 			/* keep in sync w/ state 2's transistions */
1197 			if (dflen == 0)
1198 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1199 			if (dflen == 0 && remain == 0) {
1200 				cfg->vpd.vpd_rocnt = off;
1201 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1202 				    off * sizeof(*cfg->vpd.vpd_ros),
1203 				    M_DEVBUF, M_WAITOK | M_ZERO);
1204 				state = 0;
1205 			} else if (dflen == 0)
1206 				state = 2;
1207 			break;
1208 
1209 		case 4:
1210 			remain--;
1211 			if (remain == 0)
1212 				state = 0;
1213 			break;
1214 
1215 		case 5:	/* VPD-W Keyword Header */
1216 			if (off == alloc) {
1217 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1218 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1219 				    M_DEVBUF, M_WAITOK | M_ZERO);
1220 			}
1221 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1222 			if (vpd_nextbyte(&vrs, &byte2)) {
1223 				state = -2;
1224 				break;
1225 			}
1226 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1227 			if (vpd_nextbyte(&vrs, &byte2)) {
1228 				state = -2;
1229 				break;
1230 			}
1231 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1232 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1233 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1234 			    sizeof(*cfg->vpd.vpd_w[off].value),
1235 			    M_DEVBUF, M_WAITOK);
1236 			remain -= 3;
1237 			i = 0;
1238 			/* keep in sync w/ state 6's transistions */
1239 			if (dflen == 0 && remain == 0)
1240 				state = 0;
1241 			else if (dflen == 0)
1242 				state = 5;
1243 			else
1244 				state = 6;
1245 			break;
1246 
1247 		case 6:	/* VPD-W Keyword Value */
1248 			cfg->vpd.vpd_w[off].value[i++] = byte;
1249 			dflen--;
1250 			remain--;
1251 			/* keep in sync w/ state 5's transistions */
1252 			if (dflen == 0)
1253 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1254 			if (dflen == 0 && remain == 0) {
1255 				cfg->vpd.vpd_wcnt = off;
1256 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1257 				    off * sizeof(*cfg->vpd.vpd_w),
1258 				    M_DEVBUF, M_WAITOK | M_ZERO);
1259 				state = 0;
1260 			} else if (dflen == 0)
1261 				state = 5;
1262 			break;
1263 
1264 		default:
1265 			pci_printf(cfg, "invalid state: %d\n", state);
1266 			state = -1;
1267 			break;
1268 		}
1269 	}
1270 
1271 	if (cksumvalid == 0 || state < -1) {
1272 		/* read-only data bad, clean up */
1273 		if (cfg->vpd.vpd_ros != NULL) {
1274 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1275 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1276 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1277 			cfg->vpd.vpd_ros = NULL;
1278 		}
1279 	}
1280 	if (state < -1) {
1281 		/* I/O error, clean up */
1282 		pci_printf(cfg, "failed to read VPD data.\n");
1283 		if (cfg->vpd.vpd_ident != NULL) {
1284 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1285 			cfg->vpd.vpd_ident = NULL;
1286 		}
1287 		if (cfg->vpd.vpd_w != NULL) {
1288 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1289 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1290 			free(cfg->vpd.vpd_w, M_DEVBUF);
1291 			cfg->vpd.vpd_w = NULL;
1292 		}
1293 	}
1294 	cfg->vpd.vpd_cached = 1;
1295 #undef REG
1296 #undef WREG
1297 }
1298 
1299 int
1300 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1301 {
1302 	struct pci_devinfo *dinfo = device_get_ivars(child);
1303 	pcicfgregs *cfg = &dinfo->cfg;
1304 
1305 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1306 		pci_read_vpd(device_get_parent(dev), cfg);
1307 
1308 	*identptr = cfg->vpd.vpd_ident;
1309 
1310 	if (*identptr == NULL)
1311 		return (ENXIO);
1312 
1313 	return (0);
1314 }
1315 
1316 int
1317 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1318 	const char **vptr)
1319 {
1320 	struct pci_devinfo *dinfo = device_get_ivars(child);
1321 	pcicfgregs *cfg = &dinfo->cfg;
1322 	int i;
1323 
1324 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1325 		pci_read_vpd(device_get_parent(dev), cfg);
1326 
1327 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1328 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1329 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1330 			*vptr = cfg->vpd.vpd_ros[i].value;
1331 			return (0);
1332 		}
1333 
1334 	*vptr = NULL;
1335 	return (ENXIO);
1336 }
1337 
1338 struct pcicfg_vpd *
1339 pci_fetch_vpd_list(device_t dev)
1340 {
1341 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1342 	pcicfgregs *cfg = &dinfo->cfg;
1343 
1344 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1345 		pci_read_vpd(device_get_parent(device_get_parent(dev)), cfg);
1346 	return (&cfg->vpd);
1347 }
1348 
1349 /*
1350  * Find the requested HyperTransport capability and return the offset
1351  * in configuration space via the pointer provided.  The function
1352  * returns 0 on success and an error code otherwise.
1353  */
1354 int
1355 pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1356 {
1357 	int ptr, error;
1358 	uint16_t val;
1359 
1360 	error = pci_find_cap(child, PCIY_HT, &ptr);
1361 	if (error)
1362 		return (error);
1363 
1364 	/*
1365 	 * Traverse the capabilities list checking each HT capability
1366 	 * to see if it matches the requested HT capability.
1367 	 */
1368 	while (ptr != 0) {
1369 		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1370 		if (capability == PCIM_HTCAP_SLAVE ||
1371 		    capability == PCIM_HTCAP_HOST)
1372 			val &= 0xe000;
1373 		else
1374 			val &= PCIM_HTCMD_CAP_MASK;
1375 		if (val == capability) {
1376 			if (capreg != NULL)
1377 				*capreg = ptr;
1378 			return (0);
1379 		}
1380 
1381 		/* Skip to the next HT capability. */
1382 		while (ptr != 0) {
1383 			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1384 			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1385 			    PCIY_HT)
1386 				break;
1387 		}
1388 	}
1389 	return (ENOENT);
1390 }
1391 
1392 /*
1393  * Find the requested capability and return the offset in
1394  * configuration space via the pointer provided.  The function returns
1395  * 0 on success and an error code otherwise.
1396  */
1397 int
1398 pci_find_cap_method(device_t dev, device_t child, int capability,
1399     int *capreg)
1400 {
1401 	struct pci_devinfo *dinfo = device_get_ivars(child);
1402 	pcicfgregs *cfg = &dinfo->cfg;
1403 	u_int32_t status;
1404 	u_int8_t ptr;
1405 
1406 	/*
1407 	 * Check the CAP_LIST bit of the PCI status register first.
1408 	 */
1409 	status = pci_read_config(child, PCIR_STATUS, 2);
1410 	if (!(status & PCIM_STATUS_CAPPRESENT))
1411 		return (ENXIO);
1412 
1413 	/*
1414 	 * Determine the start pointer of the capabilities list.
1415 	 */
1416 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1417 	case PCIM_HDRTYPE_NORMAL:
1418 	case PCIM_HDRTYPE_BRIDGE:
1419 		ptr = PCIR_CAP_PTR;
1420 		break;
1421 	case PCIM_HDRTYPE_CARDBUS:
1422 		ptr = PCIR_CAP_PTR_2;
1423 		break;
1424 	default:
1425 		/* XXX: panic? */
1426 		return (ENXIO);		/* no extended capabilities support */
1427 	}
1428 	ptr = pci_read_config(child, ptr, 1);
1429 
1430 	/*
1431 	 * Traverse the capabilities list.
1432 	 */
1433 	while (ptr != 0) {
1434 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1435 			if (capreg != NULL)
1436 				*capreg = ptr;
1437 			return (0);
1438 		}
1439 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1440 	}
1441 
1442 	return (ENOENT);
1443 }
1444 
1445 /*
1446  * Find the requested extended capability and return the offset in
1447  * configuration space via the pointer provided.  The function returns
1448  * 0 on success and an error code otherwise.
1449  */
1450 int
1451 pci_find_extcap_method(device_t dev, device_t child, int capability,
1452     int *capreg)
1453 {
1454 	struct pci_devinfo *dinfo = device_get_ivars(child);
1455 	pcicfgregs *cfg = &dinfo->cfg;
1456 	uint32_t ecap;
1457 	uint16_t ptr;
1458 
1459 	/* Only supported for PCI-express devices. */
1460 	if (cfg->pcie.pcie_location == 0)
1461 		return (ENXIO);
1462 
1463 	ptr = PCIR_EXTCAP;
1464 	ecap = pci_read_config(child, ptr, 4);
1465 	if (ecap == 0xffffffff || ecap == 0)
1466 		return (ENOENT);
1467 	for (;;) {
1468 		if (PCI_EXTCAP_ID(ecap) == capability) {
1469 			if (capreg != NULL)
1470 				*capreg = ptr;
1471 			return (0);
1472 		}
1473 		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1474 		if (ptr == 0)
1475 			break;
1476 		ecap = pci_read_config(child, ptr, 4);
1477 	}
1478 
1479 	return (ENOENT);
1480 }
1481 
1482 /*
1483  * Support for MSI-X message interrupts.
1484  */
1485 void
1486 pci_enable_msix_method(device_t dev, device_t child, u_int index,
1487     uint64_t address, uint32_t data)
1488 {
1489 	struct pci_devinfo *dinfo = device_get_ivars(child);
1490 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1491 	uint32_t offset;
1492 
1493 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1494 	offset = msix->msix_table_offset + index * 16;
1495 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1496 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1497 	bus_write_4(msix->msix_table_res, offset + 8, data);
1498 
1499 	/* Enable MSI -> HT mapping. */
1500 	pci_ht_map_msi(child, address);
1501 }
1502 
1503 void
1504 pci_mask_msix(device_t dev, u_int index)
1505 {
1506 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1507 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1508 	uint32_t offset, val;
1509 
1510 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1511 	offset = msix->msix_table_offset + index * 16 + 12;
1512 	val = bus_read_4(msix->msix_table_res, offset);
1513 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1514 		val |= PCIM_MSIX_VCTRL_MASK;
1515 		bus_write_4(msix->msix_table_res, offset, val);
1516 	}
1517 }
1518 
1519 void
1520 pci_unmask_msix(device_t dev, u_int index)
1521 {
1522 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1523 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1524 	uint32_t offset, val;
1525 
1526 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1527 	offset = msix->msix_table_offset + index * 16 + 12;
1528 	val = bus_read_4(msix->msix_table_res, offset);
1529 	if (val & PCIM_MSIX_VCTRL_MASK) {
1530 		val &= ~PCIM_MSIX_VCTRL_MASK;
1531 		bus_write_4(msix->msix_table_res, offset, val);
1532 	}
1533 }
1534 
1535 int
1536 pci_pending_msix(device_t dev, u_int index)
1537 {
1538 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1539 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1540 	uint32_t offset, bit;
1541 
1542 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1543 	offset = msix->msix_pba_offset + (index / 32) * 4;
1544 	bit = 1 << index % 32;
1545 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1546 }
1547 
1548 /*
1549  * Restore MSI-X registers and table during resume.  If MSI-X is
1550  * enabled then walk the virtual table to restore the actual MSI-X
1551  * table.
1552  */
1553 static void
1554 pci_resume_msix(device_t dev)
1555 {
1556 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1557 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1558 	struct msix_table_entry *mte;
1559 	struct msix_vector *mv;
1560 	int i;
1561 
1562 	if (msix->msix_alloc > 0) {
1563 		/* First, mask all vectors. */
1564 		for (i = 0; i < msix->msix_msgnum; i++)
1565 			pci_mask_msix(dev, i);
1566 
1567 		/* Second, program any messages with at least one handler. */
1568 		for (i = 0; i < msix->msix_table_len; i++) {
1569 			mte = &msix->msix_table[i];
1570 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1571 				continue;
1572 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1573 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1574 			pci_unmask_msix(dev, i);
1575 		}
1576 	}
1577 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1578 	    msix->msix_ctrl, 2);
1579 }
1580 
1581 /*
1582  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1583  * returned in *count.  After this function returns, each message will be
1584  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1585  */
1586 int
1587 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1588 {
1589 	struct pci_devinfo *dinfo = device_get_ivars(child);
1590 	pcicfgregs *cfg = &dinfo->cfg;
1591 	struct resource_list_entry *rle;
1592 	int actual, error, i, irq, max;
1593 
1594 	/* Don't let count == 0 get us into trouble. */
1595 	if (*count == 0)
1596 		return (EINVAL);
1597 
1598 	/* If rid 0 is allocated, then fail. */
1599 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1600 	if (rle != NULL && rle->res != NULL)
1601 		return (ENXIO);
1602 
1603 	/* Already have allocated messages? */
1604 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1605 		return (ENXIO);
1606 
1607 	/* If MSI-X is blacklisted for this system, fail. */
1608 	if (pci_msix_blacklisted())
1609 		return (ENXIO);
1610 
1611 	/* MSI-X capability present? */
1612 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1613 		return (ENODEV);
1614 
1615 	/* Make sure the appropriate BARs are mapped. */
1616 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1617 	    cfg->msix.msix_table_bar);
1618 	if (rle == NULL || rle->res == NULL ||
1619 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1620 		return (ENXIO);
1621 	cfg->msix.msix_table_res = rle->res;
1622 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1623 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1624 		    cfg->msix.msix_pba_bar);
1625 		if (rle == NULL || rle->res == NULL ||
1626 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1627 			return (ENXIO);
1628 	}
1629 	cfg->msix.msix_pba_res = rle->res;
1630 
1631 	if (bootverbose)
1632 		device_printf(child,
1633 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1634 		    *count, cfg->msix.msix_msgnum);
1635 	max = min(*count, cfg->msix.msix_msgnum);
1636 	for (i = 0; i < max; i++) {
1637 		/* Allocate a message. */
1638 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1639 		if (error) {
1640 			if (i == 0)
1641 				return (error);
1642 			break;
1643 		}
1644 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1645 		    irq, 1);
1646 	}
1647 	actual = i;
1648 
1649 	if (bootverbose) {
1650 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1651 		if (actual == 1)
1652 			device_printf(child, "using IRQ %ju for MSI-X\n",
1653 			    rle->start);
1654 		else {
1655 			int run;
1656 
1657 			/*
1658 			 * Be fancy and try to print contiguous runs of
1659 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1660 			 * 'run' is true if we are in a range.
1661 			 */
1662 			device_printf(child, "using IRQs %ju", rle->start);
1663 			irq = rle->start;
1664 			run = 0;
1665 			for (i = 1; i < actual; i++) {
1666 				rle = resource_list_find(&dinfo->resources,
1667 				    SYS_RES_IRQ, i + 1);
1668 
1669 				/* Still in a run? */
1670 				if (rle->start == irq + 1) {
1671 					run = 1;
1672 					irq++;
1673 					continue;
1674 				}
1675 
1676 				/* Finish previous range. */
1677 				if (run) {
1678 					printf("-%d", irq);
1679 					run = 0;
1680 				}
1681 
1682 				/* Start new range. */
1683 				printf(",%ju", rle->start);
1684 				irq = rle->start;
1685 			}
1686 
1687 			/* Unfinished range? */
1688 			if (run)
1689 				printf("-%d", irq);
1690 			printf(" for MSI-X\n");
1691 		}
1692 	}
1693 
1694 	/* Mask all vectors. */
1695 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1696 		pci_mask_msix(child, i);
1697 
1698 	/* Allocate and initialize vector data and virtual table. */
1699 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1700 	    M_DEVBUF, M_WAITOK | M_ZERO);
1701 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1702 	    M_DEVBUF, M_WAITOK | M_ZERO);
1703 	for (i = 0; i < actual; i++) {
1704 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1705 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1706 		cfg->msix.msix_table[i].mte_vector = i + 1;
1707 	}
1708 
1709 	/* Update control register to enable MSI-X. */
1710 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1711 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1712 	    cfg->msix.msix_ctrl, 2);
1713 
1714 	/* Update counts of alloc'd messages. */
1715 	cfg->msix.msix_alloc = actual;
1716 	cfg->msix.msix_table_len = actual;
1717 	*count = actual;
1718 	return (0);
1719 }
1720 
1721 /*
1722  * By default, pci_alloc_msix() will assign the allocated IRQ
1723  * resources consecutively to the first N messages in the MSI-X table.
1724  * However, device drivers may want to use different layouts if they
1725  * either receive fewer messages than they asked for, or they wish to
1726  * populate the MSI-X table sparsely.  This method allows the driver
1727  * to specify what layout it wants.  It must be called after a
1728  * successful pci_alloc_msix() but before any of the associated
1729  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1730  *
1731  * The 'vectors' array contains 'count' message vectors.  The array
1732  * maps directly to the MSI-X table in that index 0 in the array
1733  * specifies the vector for the first message in the MSI-X table, etc.
1734  * The vector value in each array index can either be 0 to indicate
1735  * that no vector should be assigned to a message slot, or it can be a
1736  * number from 1 to N (where N is the count returned from a
1737  * succcessful call to pci_alloc_msix()) to indicate which message
1738  * vector (IRQ) to be used for the corresponding message.
1739  *
1740  * On successful return, each message with a non-zero vector will have
1741  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1742  * 1.  Additionally, if any of the IRQs allocated via the previous
1743  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1744  * will be freed back to the system automatically.
1745  *
1746  * For example, suppose a driver has a MSI-X table with 6 messages and
1747  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1748  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1749  * C.  After the call to pci_alloc_msix(), the device will be setup to
1750  * have an MSI-X table of ABC--- (where - means no vector assigned).
1751  * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1752  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1753  * be freed back to the system.  This device will also have valid
1754  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1755  *
1756  * In any case, the SYS_RES_IRQ rid X will always map to the message
1757  * at MSI-X table index X - 1 and will only be valid if a vector is
1758  * assigned to that table entry.
1759  */
1760 int
1761 pci_remap_msix_method(device_t dev, device_t child, int count,
1762     const u_int *vectors)
1763 {
1764 	struct pci_devinfo *dinfo = device_get_ivars(child);
1765 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1766 	struct resource_list_entry *rle;
1767 	int i, irq, j, *used;
1768 
1769 	/*
1770 	 * Have to have at least one message in the table but the
1771 	 * table can't be bigger than the actual MSI-X table in the
1772 	 * device.
1773 	 */
1774 	if (count == 0 || count > msix->msix_msgnum)
1775 		return (EINVAL);
1776 
1777 	/* Sanity check the vectors. */
1778 	for (i = 0; i < count; i++)
1779 		if (vectors[i] > msix->msix_alloc)
1780 			return (EINVAL);
1781 
1782 	/*
1783 	 * Make sure there aren't any holes in the vectors to be used.
1784 	 * It's a big pain to support it, and it doesn't really make
1785 	 * sense anyway.  Also, at least one vector must be used.
1786 	 */
1787 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1788 	    M_ZERO);
1789 	for (i = 0; i < count; i++)
1790 		if (vectors[i] != 0)
1791 			used[vectors[i] - 1] = 1;
1792 	for (i = 0; i < msix->msix_alloc - 1; i++)
1793 		if (used[i] == 0 && used[i + 1] == 1) {
1794 			free(used, M_DEVBUF);
1795 			return (EINVAL);
1796 		}
1797 	if (used[0] != 1) {
1798 		free(used, M_DEVBUF);
1799 		return (EINVAL);
1800 	}
1801 
1802 	/* Make sure none of the resources are allocated. */
1803 	for (i = 0; i < msix->msix_table_len; i++) {
1804 		if (msix->msix_table[i].mte_vector == 0)
1805 			continue;
1806 		if (msix->msix_table[i].mte_handlers > 0) {
1807 			free(used, M_DEVBUF);
1808 			return (EBUSY);
1809 		}
1810 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1811 		KASSERT(rle != NULL, ("missing resource"));
1812 		if (rle->res != NULL) {
1813 			free(used, M_DEVBUF);
1814 			return (EBUSY);
1815 		}
1816 	}
1817 
1818 	/* Free the existing resource list entries. */
1819 	for (i = 0; i < msix->msix_table_len; i++) {
1820 		if (msix->msix_table[i].mte_vector == 0)
1821 			continue;
1822 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1823 	}
1824 
1825 	/*
1826 	 * Build the new virtual table keeping track of which vectors are
1827 	 * used.
1828 	 */
1829 	free(msix->msix_table, M_DEVBUF);
1830 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1831 	    M_DEVBUF, M_WAITOK | M_ZERO);
1832 	for (i = 0; i < count; i++)
1833 		msix->msix_table[i].mte_vector = vectors[i];
1834 	msix->msix_table_len = count;
1835 
1836 	/* Free any unused IRQs and resize the vectors array if necessary. */
1837 	j = msix->msix_alloc - 1;
1838 	if (used[j] == 0) {
1839 		struct msix_vector *vec;
1840 
1841 		while (used[j] == 0) {
1842 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1843 			    msix->msix_vectors[j].mv_irq);
1844 			j--;
1845 		}
1846 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1847 		    M_WAITOK);
1848 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1849 		    (j + 1));
1850 		free(msix->msix_vectors, M_DEVBUF);
1851 		msix->msix_vectors = vec;
1852 		msix->msix_alloc = j + 1;
1853 	}
1854 	free(used, M_DEVBUF);
1855 
1856 	/* Map the IRQs onto the rids. */
1857 	for (i = 0; i < count; i++) {
1858 		if (vectors[i] == 0)
1859 			continue;
1860 		irq = msix->msix_vectors[vectors[i] - 1].mv_irq;
1861 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1862 		    irq, 1);
1863 	}
1864 
1865 	if (bootverbose) {
1866 		device_printf(child, "Remapped MSI-X IRQs as: ");
1867 		for (i = 0; i < count; i++) {
1868 			if (i != 0)
1869 				printf(", ");
1870 			if (vectors[i] == 0)
1871 				printf("---");
1872 			else
1873 				printf("%d",
1874 				    msix->msix_vectors[vectors[i] - 1].mv_irq);
1875 		}
1876 		printf("\n");
1877 	}
1878 
1879 	return (0);
1880 }
1881 
1882 static int
1883 pci_release_msix(device_t dev, device_t child)
1884 {
1885 	struct pci_devinfo *dinfo = device_get_ivars(child);
1886 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1887 	struct resource_list_entry *rle;
1888 	int i;
1889 
1890 	/* Do we have any messages to release? */
1891 	if (msix->msix_alloc == 0)
1892 		return (ENODEV);
1893 
1894 	/* Make sure none of the resources are allocated. */
1895 	for (i = 0; i < msix->msix_table_len; i++) {
1896 		if (msix->msix_table[i].mte_vector == 0)
1897 			continue;
1898 		if (msix->msix_table[i].mte_handlers > 0)
1899 			return (EBUSY);
1900 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1901 		KASSERT(rle != NULL, ("missing resource"));
1902 		if (rle->res != NULL)
1903 			return (EBUSY);
1904 	}
1905 
1906 	/* Update control register to disable MSI-X. */
1907 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1908 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1909 	    msix->msix_ctrl, 2);
1910 
1911 	/* Free the resource list entries. */
1912 	for (i = 0; i < msix->msix_table_len; i++) {
1913 		if (msix->msix_table[i].mte_vector == 0)
1914 			continue;
1915 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1916 	}
1917 	free(msix->msix_table, M_DEVBUF);
1918 	msix->msix_table_len = 0;
1919 
1920 	/* Release the IRQs. */
1921 	for (i = 0; i < msix->msix_alloc; i++)
1922 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1923 		    msix->msix_vectors[i].mv_irq);
1924 	free(msix->msix_vectors, M_DEVBUF);
1925 	msix->msix_alloc = 0;
1926 	return (0);
1927 }
1928 
1929 /*
1930  * Return the max supported MSI-X messages this device supports.
1931  * Basically, assuming the MD code can alloc messages, this function
1932  * should return the maximum value that pci_alloc_msix() can return.
1933  * Thus, it is subject to the tunables, etc.
1934  */
1935 int
1936 pci_msix_count_method(device_t dev, device_t child)
1937 {
1938 	struct pci_devinfo *dinfo = device_get_ivars(child);
1939 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1940 
1941 	if (pci_do_msix && msix->msix_location != 0)
1942 		return (msix->msix_msgnum);
1943 	return (0);
1944 }
1945 
1946 int
1947 pci_msix_pba_bar_method(device_t dev, device_t child)
1948 {
1949 	struct pci_devinfo *dinfo = device_get_ivars(child);
1950 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1951 
1952 	if (pci_do_msix && msix->msix_location != 0)
1953 		return (msix->msix_pba_bar);
1954 	return (-1);
1955 }
1956 
1957 int
1958 pci_msix_table_bar_method(device_t dev, device_t child)
1959 {
1960 	struct pci_devinfo *dinfo = device_get_ivars(child);
1961 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1962 
1963 	if (pci_do_msix && msix->msix_location != 0)
1964 		return (msix->msix_table_bar);
1965 	return (-1);
1966 }
1967 
1968 /*
1969  * HyperTransport MSI mapping control
1970  */
1971 void
1972 pci_ht_map_msi(device_t dev, uint64_t addr)
1973 {
1974 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1975 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1976 
1977 	if (!ht->ht_msimap)
1978 		return;
1979 
1980 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1981 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1982 		/* Enable MSI -> HT mapping. */
1983 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1984 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1985 		    ht->ht_msictrl, 2);
1986 	}
1987 
1988 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1989 		/* Disable MSI -> HT mapping. */
1990 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1991 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1992 		    ht->ht_msictrl, 2);
1993 	}
1994 }
1995 
1996 int
1997 pci_get_max_payload(device_t dev)
1998 {
1999 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2000 	int cap;
2001 	uint16_t val;
2002 
2003 	cap = dinfo->cfg.pcie.pcie_location;
2004 	if (cap == 0)
2005 		return (0);
2006 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2007 	val &= PCIEM_CTL_MAX_PAYLOAD;
2008 	val >>= 5;
2009 	return (1 << (val + 7));
2010 }
2011 
2012 int
2013 pci_get_max_read_req(device_t dev)
2014 {
2015 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2016 	int cap;
2017 	uint16_t val;
2018 
2019 	cap = dinfo->cfg.pcie.pcie_location;
2020 	if (cap == 0)
2021 		return (0);
2022 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2023 	val &= PCIEM_CTL_MAX_READ_REQUEST;
2024 	val >>= 12;
2025 	return (1 << (val + 7));
2026 }
2027 
2028 int
2029 pci_set_max_read_req(device_t dev, int size)
2030 {
2031 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2032 	int cap;
2033 	uint16_t val;
2034 
2035 	cap = dinfo->cfg.pcie.pcie_location;
2036 	if (cap == 0)
2037 		return (0);
2038 	if (size < 128)
2039 		size = 128;
2040 	if (size > 4096)
2041 		size = 4096;
2042 	size = (1 << (fls(size) - 1));
2043 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2044 	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
2045 	val |= (fls(size) - 8) << 12;
2046 	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
2047 	return (size);
2048 }
2049 
2050 uint32_t
2051 pcie_read_config(device_t dev, int reg, int width)
2052 {
2053 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2054 	int cap;
2055 
2056 	cap = dinfo->cfg.pcie.pcie_location;
2057 	if (cap == 0) {
2058 		if (width == 2)
2059 			return (0xffff);
2060 		return (0xffffffff);
2061 	}
2062 
2063 	return (pci_read_config(dev, cap + reg, width));
2064 }
2065 
2066 void
2067 pcie_write_config(device_t dev, int reg, uint32_t value, int width)
2068 {
2069 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2070 	int cap;
2071 
2072 	cap = dinfo->cfg.pcie.pcie_location;
2073 	if (cap == 0)
2074 		return;
2075 	pci_write_config(dev, cap + reg, value, width);
2076 }
2077 
2078 /*
2079  * Adjusts a PCI-e capability register by clearing the bits in mask
2080  * and setting the bits in (value & mask).  Bits not set in mask are
2081  * not adjusted.
2082  *
2083  * Returns the old value on success or all ones on failure.
2084  */
2085 uint32_t
2086 pcie_adjust_config(device_t dev, int reg, uint32_t mask, uint32_t value,
2087     int width)
2088 {
2089 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2090 	uint32_t old, new;
2091 	int cap;
2092 
2093 	cap = dinfo->cfg.pcie.pcie_location;
2094 	if (cap == 0) {
2095 		if (width == 2)
2096 			return (0xffff);
2097 		return (0xffffffff);
2098 	}
2099 
2100 	old = pci_read_config(dev, cap + reg, width);
2101 	new = old & ~mask;
2102 	new |= (value & mask);
2103 	pci_write_config(dev, cap + reg, new, width);
2104 	return (old);
2105 }
2106 
2107 /*
2108  * Support for MSI message signalled interrupts.
2109  */
2110 void
2111 pci_enable_msi_method(device_t dev, device_t child, uint64_t address,
2112     uint16_t data)
2113 {
2114 	struct pci_devinfo *dinfo = device_get_ivars(child);
2115 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2116 
2117 	/* Write data and address values. */
2118 	pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR,
2119 	    address & 0xffffffff, 4);
2120 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2121 		pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR_HIGH,
2122 		    address >> 32, 4);
2123 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA_64BIT,
2124 		    data, 2);
2125 	} else
2126 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA, data,
2127 		    2);
2128 
2129 	/* Enable MSI in the control register. */
2130 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
2131 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2132 	    msi->msi_ctrl, 2);
2133 
2134 	/* Enable MSI -> HT mapping. */
2135 	pci_ht_map_msi(child, address);
2136 }
2137 
2138 void
2139 pci_disable_msi_method(device_t dev, device_t child)
2140 {
2141 	struct pci_devinfo *dinfo = device_get_ivars(child);
2142 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2143 
2144 	/* Disable MSI -> HT mapping. */
2145 	pci_ht_map_msi(child, 0);
2146 
2147 	/* Disable MSI in the control register. */
2148 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
2149 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2150 	    msi->msi_ctrl, 2);
2151 }
2152 
2153 /*
2154  * Restore MSI registers during resume.  If MSI is enabled then
2155  * restore the data and address registers in addition to the control
2156  * register.
2157  */
2158 static void
2159 pci_resume_msi(device_t dev)
2160 {
2161 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2162 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2163 	uint64_t address;
2164 	uint16_t data;
2165 
2166 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
2167 		address = msi->msi_addr;
2168 		data = msi->msi_data;
2169 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
2170 		    address & 0xffffffff, 4);
2171 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2172 			pci_write_config(dev, msi->msi_location +
2173 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
2174 			pci_write_config(dev, msi->msi_location +
2175 			    PCIR_MSI_DATA_64BIT, data, 2);
2176 		} else
2177 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
2178 			    data, 2);
2179 	}
2180 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
2181 	    2);
2182 }
2183 
2184 static int
2185 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
2186 {
2187 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2188 	pcicfgregs *cfg = &dinfo->cfg;
2189 	struct resource_list_entry *rle;
2190 	struct msix_table_entry *mte;
2191 	struct msix_vector *mv;
2192 	uint64_t addr;
2193 	uint32_t data;
2194 	int error, i, j;
2195 
2196 	/*
2197 	 * Handle MSI first.  We try to find this IRQ among our list
2198 	 * of MSI IRQs.  If we find it, we request updated address and
2199 	 * data registers and apply the results.
2200 	 */
2201 	if (cfg->msi.msi_alloc > 0) {
2202 
2203 		/* If we don't have any active handlers, nothing to do. */
2204 		if (cfg->msi.msi_handlers == 0)
2205 			return (0);
2206 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
2207 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
2208 			    i + 1);
2209 			if (rle->start == irq) {
2210 				error = PCIB_MAP_MSI(device_get_parent(bus),
2211 				    dev, irq, &addr, &data);
2212 				if (error)
2213 					return (error);
2214 				pci_disable_msi(dev);
2215 				dinfo->cfg.msi.msi_addr = addr;
2216 				dinfo->cfg.msi.msi_data = data;
2217 				pci_enable_msi(dev, addr, data);
2218 				return (0);
2219 			}
2220 		}
2221 		return (ENOENT);
2222 	}
2223 
2224 	/*
2225 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
2226 	 * we request the updated mapping info.  If that works, we go
2227 	 * through all the slots that use this IRQ and update them.
2228 	 */
2229 	if (cfg->msix.msix_alloc > 0) {
2230 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
2231 			mv = &cfg->msix.msix_vectors[i];
2232 			if (mv->mv_irq == irq) {
2233 				error = PCIB_MAP_MSI(device_get_parent(bus),
2234 				    dev, irq, &addr, &data);
2235 				if (error)
2236 					return (error);
2237 				mv->mv_address = addr;
2238 				mv->mv_data = data;
2239 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
2240 					mte = &cfg->msix.msix_table[j];
2241 					if (mte->mte_vector != i + 1)
2242 						continue;
2243 					if (mte->mte_handlers == 0)
2244 						continue;
2245 					pci_mask_msix(dev, j);
2246 					pci_enable_msix(dev, j, addr, data);
2247 					pci_unmask_msix(dev, j);
2248 				}
2249 			}
2250 		}
2251 		return (ENOENT);
2252 	}
2253 
2254 	return (ENOENT);
2255 }
2256 
2257 /*
2258  * Returns true if the specified device is blacklisted because MSI
2259  * doesn't work.
2260  */
2261 int
2262 pci_msi_device_blacklisted(device_t dev)
2263 {
2264 
2265 	if (!pci_honor_msi_blacklist)
2266 		return (0);
2267 
2268 	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2269 }
2270 
2271 /*
2272  * Determine if MSI is blacklisted globally on this system.  Currently,
2273  * we just check for blacklisted chipsets as represented by the
2274  * host-PCI bridge at device 0:0:0.  In the future, it may become
2275  * necessary to check other system attributes, such as the kenv values
2276  * that give the motherboard manufacturer and model number.
2277  */
2278 static int
2279 pci_msi_blacklisted(void)
2280 {
2281 	device_t dev;
2282 
2283 	if (!pci_honor_msi_blacklist)
2284 		return (0);
2285 
2286 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2287 	if (!(pcie_chipset || pcix_chipset)) {
2288 		if (vm_guest != VM_GUEST_NO) {
2289 			/*
2290 			 * Whitelist older chipsets in virtual
2291 			 * machines known to support MSI.
2292 			 */
2293 			dev = pci_find_bsf(0, 0, 0);
2294 			if (dev != NULL)
2295 				return (!pci_has_quirk(pci_get_devid(dev),
2296 					PCI_QUIRK_ENABLE_MSI_VM));
2297 		}
2298 		return (1);
2299 	}
2300 
2301 	dev = pci_find_bsf(0, 0, 0);
2302 	if (dev != NULL)
2303 		return (pci_msi_device_blacklisted(dev));
2304 	return (0);
2305 }
2306 
2307 /*
2308  * Returns true if the specified device is blacklisted because MSI-X
2309  * doesn't work.  Note that this assumes that if MSI doesn't work,
2310  * MSI-X doesn't either.
2311  */
2312 int
2313 pci_msix_device_blacklisted(device_t dev)
2314 {
2315 
2316 	if (!pci_honor_msi_blacklist)
2317 		return (0);
2318 
2319 	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2320 		return (1);
2321 
2322 	return (pci_msi_device_blacklisted(dev));
2323 }
2324 
2325 /*
2326  * Determine if MSI-X is blacklisted globally on this system.  If MSI
2327  * is blacklisted, assume that MSI-X is as well.  Check for additional
2328  * chipsets where MSI works but MSI-X does not.
2329  */
2330 static int
2331 pci_msix_blacklisted(void)
2332 {
2333 	device_t dev;
2334 
2335 	if (!pci_honor_msi_blacklist)
2336 		return (0);
2337 
2338 	dev = pci_find_bsf(0, 0, 0);
2339 	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2340 	    PCI_QUIRK_DISABLE_MSIX))
2341 		return (1);
2342 
2343 	return (pci_msi_blacklisted());
2344 }
2345 
2346 /*
2347  * Attempt to allocate *count MSI messages.  The actual number allocated is
2348  * returned in *count.  After this function returns, each message will be
2349  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2350  */
2351 int
2352 pci_alloc_msi_method(device_t dev, device_t child, int *count)
2353 {
2354 	struct pci_devinfo *dinfo = device_get_ivars(child);
2355 	pcicfgregs *cfg = &dinfo->cfg;
2356 	struct resource_list_entry *rle;
2357 	int actual, error, i, irqs[32];
2358 	uint16_t ctrl;
2359 
2360 	/* Don't let count == 0 get us into trouble. */
2361 	if (*count == 0)
2362 		return (EINVAL);
2363 
2364 	/* If rid 0 is allocated, then fail. */
2365 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2366 	if (rle != NULL && rle->res != NULL)
2367 		return (ENXIO);
2368 
2369 	/* Already have allocated messages? */
2370 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2371 		return (ENXIO);
2372 
2373 	/* If MSI is blacklisted for this system, fail. */
2374 	if (pci_msi_blacklisted())
2375 		return (ENXIO);
2376 
2377 	/* MSI capability present? */
2378 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2379 		return (ENODEV);
2380 
2381 	if (bootverbose)
2382 		device_printf(child,
2383 		    "attempting to allocate %d MSI vectors (%d supported)\n",
2384 		    *count, cfg->msi.msi_msgnum);
2385 
2386 	/* Don't ask for more than the device supports. */
2387 	actual = min(*count, cfg->msi.msi_msgnum);
2388 
2389 	/* Don't ask for more than 32 messages. */
2390 	actual = min(actual, 32);
2391 
2392 	/* MSI requires power of 2 number of messages. */
2393 	if (!powerof2(actual))
2394 		return (EINVAL);
2395 
2396 	for (;;) {
2397 		/* Try to allocate N messages. */
2398 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2399 		    actual, irqs);
2400 		if (error == 0)
2401 			break;
2402 		if (actual == 1)
2403 			return (error);
2404 
2405 		/* Try N / 2. */
2406 		actual >>= 1;
2407 	}
2408 
2409 	/*
2410 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2411 	 * resources in the irqs[] array, so add new resources
2412 	 * starting at rid 1.
2413 	 */
2414 	for (i = 0; i < actual; i++)
2415 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2416 		    irqs[i], irqs[i], 1);
2417 
2418 	if (bootverbose) {
2419 		if (actual == 1)
2420 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2421 		else {
2422 			int run;
2423 
2424 			/*
2425 			 * Be fancy and try to print contiguous runs
2426 			 * of IRQ values as ranges.  'run' is true if
2427 			 * we are in a range.
2428 			 */
2429 			device_printf(child, "using IRQs %d", irqs[0]);
2430 			run = 0;
2431 			for (i = 1; i < actual; i++) {
2432 
2433 				/* Still in a run? */
2434 				if (irqs[i] == irqs[i - 1] + 1) {
2435 					run = 1;
2436 					continue;
2437 				}
2438 
2439 				/* Finish previous range. */
2440 				if (run) {
2441 					printf("-%d", irqs[i - 1]);
2442 					run = 0;
2443 				}
2444 
2445 				/* Start new range. */
2446 				printf(",%d", irqs[i]);
2447 			}
2448 
2449 			/* Unfinished range? */
2450 			if (run)
2451 				printf("-%d", irqs[actual - 1]);
2452 			printf(" for MSI\n");
2453 		}
2454 	}
2455 
2456 	/* Update control register with actual count. */
2457 	ctrl = cfg->msi.msi_ctrl;
2458 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2459 	ctrl |= (ffs(actual) - 1) << 4;
2460 	cfg->msi.msi_ctrl = ctrl;
2461 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2462 
2463 	/* Update counts of alloc'd messages. */
2464 	cfg->msi.msi_alloc = actual;
2465 	cfg->msi.msi_handlers = 0;
2466 	*count = actual;
2467 	return (0);
2468 }
2469 
2470 /* Release the MSI messages associated with this device. */
2471 int
2472 pci_release_msi_method(device_t dev, device_t child)
2473 {
2474 	struct pci_devinfo *dinfo = device_get_ivars(child);
2475 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2476 	struct resource_list_entry *rle;
2477 	int error, i, irqs[32];
2478 
2479 	/* Try MSI-X first. */
2480 	error = pci_release_msix(dev, child);
2481 	if (error != ENODEV)
2482 		return (error);
2483 
2484 	/* Do we have any messages to release? */
2485 	if (msi->msi_alloc == 0)
2486 		return (ENODEV);
2487 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2488 
2489 	/* Make sure none of the resources are allocated. */
2490 	if (msi->msi_handlers > 0)
2491 		return (EBUSY);
2492 	for (i = 0; i < msi->msi_alloc; i++) {
2493 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2494 		KASSERT(rle != NULL, ("missing MSI resource"));
2495 		if (rle->res != NULL)
2496 			return (EBUSY);
2497 		irqs[i] = rle->start;
2498 	}
2499 
2500 	/* Update control register with 0 count. */
2501 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2502 	    ("%s: MSI still enabled", __func__));
2503 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2504 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2505 	    msi->msi_ctrl, 2);
2506 
2507 	/* Release the messages. */
2508 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2509 	for (i = 0; i < msi->msi_alloc; i++)
2510 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2511 
2512 	/* Update alloc count. */
2513 	msi->msi_alloc = 0;
2514 	msi->msi_addr = 0;
2515 	msi->msi_data = 0;
2516 	return (0);
2517 }
2518 
2519 /*
2520  * Return the max supported MSI messages this device supports.
2521  * Basically, assuming the MD code can alloc messages, this function
2522  * should return the maximum value that pci_alloc_msi() can return.
2523  * Thus, it is subject to the tunables, etc.
2524  */
2525 int
2526 pci_msi_count_method(device_t dev, device_t child)
2527 {
2528 	struct pci_devinfo *dinfo = device_get_ivars(child);
2529 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2530 
2531 	if (pci_do_msi && msi->msi_location != 0)
2532 		return (msi->msi_msgnum);
2533 	return (0);
2534 }
2535 
2536 /* free pcicfgregs structure and all depending data structures */
2537 
2538 int
2539 pci_freecfg(struct pci_devinfo *dinfo)
2540 {
2541 	struct devlist *devlist_head;
2542 	struct pci_map *pm, *next;
2543 	int i;
2544 
2545 	devlist_head = &pci_devq;
2546 
2547 	if (dinfo->cfg.vpd.vpd_reg) {
2548 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2549 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2550 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2551 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2552 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2553 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2554 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2555 	}
2556 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2557 		free(pm, M_DEVBUF);
2558 	}
2559 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2560 	free(dinfo, M_DEVBUF);
2561 
2562 	/* increment the generation count */
2563 	pci_generation++;
2564 
2565 	/* we're losing one device */
2566 	pci_numdevs--;
2567 	return (0);
2568 }
2569 
2570 /*
2571  * PCI power manangement
2572  */
2573 int
2574 pci_set_powerstate_method(device_t dev, device_t child, int state)
2575 {
2576 	struct pci_devinfo *dinfo = device_get_ivars(child);
2577 	pcicfgregs *cfg = &dinfo->cfg;
2578 	uint16_t status;
2579 	int oldstate, highest, delay;
2580 
2581 	if (cfg->pp.pp_cap == 0)
2582 		return (EOPNOTSUPP);
2583 
2584 	/*
2585 	 * Optimize a no state change request away.  While it would be OK to
2586 	 * write to the hardware in theory, some devices have shown odd
2587 	 * behavior when going from D3 -> D3.
2588 	 */
2589 	oldstate = pci_get_powerstate(child);
2590 	if (oldstate == state)
2591 		return (0);
2592 
2593 	/*
2594 	 * The PCI power management specification states that after a state
2595 	 * transition between PCI power states, system software must
2596 	 * guarantee a minimal delay before the function accesses the device.
2597 	 * Compute the worst case delay that we need to guarantee before we
2598 	 * access the device.  Many devices will be responsive much more
2599 	 * quickly than this delay, but there are some that don't respond
2600 	 * instantly to state changes.  Transitions to/from D3 state require
2601 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2602 	 * is done below with DELAY rather than a sleeper function because
2603 	 * this function can be called from contexts where we cannot sleep.
2604 	 */
2605 	highest = (oldstate > state) ? oldstate : state;
2606 	if (highest == PCI_POWERSTATE_D3)
2607 	    delay = 10000;
2608 	else if (highest == PCI_POWERSTATE_D2)
2609 	    delay = 200;
2610 	else
2611 	    delay = 0;
2612 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2613 	    & ~PCIM_PSTAT_DMASK;
2614 	switch (state) {
2615 	case PCI_POWERSTATE_D0:
2616 		status |= PCIM_PSTAT_D0;
2617 		break;
2618 	case PCI_POWERSTATE_D1:
2619 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2620 			return (EOPNOTSUPP);
2621 		status |= PCIM_PSTAT_D1;
2622 		break;
2623 	case PCI_POWERSTATE_D2:
2624 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2625 			return (EOPNOTSUPP);
2626 		status |= PCIM_PSTAT_D2;
2627 		break;
2628 	case PCI_POWERSTATE_D3:
2629 		status |= PCIM_PSTAT_D3;
2630 		break;
2631 	default:
2632 		return (EINVAL);
2633 	}
2634 
2635 	if (bootverbose)
2636 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2637 		    state);
2638 
2639 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2640 	if (delay)
2641 		DELAY(delay);
2642 	return (0);
2643 }
2644 
2645 int
2646 pci_get_powerstate_method(device_t dev, device_t child)
2647 {
2648 	struct pci_devinfo *dinfo = device_get_ivars(child);
2649 	pcicfgregs *cfg = &dinfo->cfg;
2650 	uint16_t status;
2651 	int result;
2652 
2653 	if (cfg->pp.pp_cap != 0) {
2654 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2655 		switch (status & PCIM_PSTAT_DMASK) {
2656 		case PCIM_PSTAT_D0:
2657 			result = PCI_POWERSTATE_D0;
2658 			break;
2659 		case PCIM_PSTAT_D1:
2660 			result = PCI_POWERSTATE_D1;
2661 			break;
2662 		case PCIM_PSTAT_D2:
2663 			result = PCI_POWERSTATE_D2;
2664 			break;
2665 		case PCIM_PSTAT_D3:
2666 			result = PCI_POWERSTATE_D3;
2667 			break;
2668 		default:
2669 			result = PCI_POWERSTATE_UNKNOWN;
2670 			break;
2671 		}
2672 	} else {
2673 		/* No support, device is always at D0 */
2674 		result = PCI_POWERSTATE_D0;
2675 	}
2676 	return (result);
2677 }
2678 
2679 /*
2680  * Some convenience functions for PCI device drivers.
2681  */
2682 
2683 static __inline void
2684 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2685 {
2686 	uint16_t	command;
2687 
2688 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2689 	command |= bit;
2690 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2691 }
2692 
2693 static __inline void
2694 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2695 {
2696 	uint16_t	command;
2697 
2698 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2699 	command &= ~bit;
2700 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2701 }
2702 
2703 int
2704 pci_enable_busmaster_method(device_t dev, device_t child)
2705 {
2706 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2707 	return (0);
2708 }
2709 
2710 int
2711 pci_disable_busmaster_method(device_t dev, device_t child)
2712 {
2713 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2714 	return (0);
2715 }
2716 
2717 int
2718 pci_enable_io_method(device_t dev, device_t child, int space)
2719 {
2720 	uint16_t bit;
2721 
2722 	switch(space) {
2723 	case SYS_RES_IOPORT:
2724 		bit = PCIM_CMD_PORTEN;
2725 		break;
2726 	case SYS_RES_MEMORY:
2727 		bit = PCIM_CMD_MEMEN;
2728 		break;
2729 	default:
2730 		return (EINVAL);
2731 	}
2732 	pci_set_command_bit(dev, child, bit);
2733 	return (0);
2734 }
2735 
2736 int
2737 pci_disable_io_method(device_t dev, device_t child, int space)
2738 {
2739 	uint16_t bit;
2740 
2741 	switch(space) {
2742 	case SYS_RES_IOPORT:
2743 		bit = PCIM_CMD_PORTEN;
2744 		break;
2745 	case SYS_RES_MEMORY:
2746 		bit = PCIM_CMD_MEMEN;
2747 		break;
2748 	default:
2749 		return (EINVAL);
2750 	}
2751 	pci_clear_command_bit(dev, child, bit);
2752 	return (0);
2753 }
2754 
2755 /*
2756  * New style pci driver.  Parent device is either a pci-host-bridge or a
2757  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2758  */
2759 
2760 void
2761 pci_print_verbose(struct pci_devinfo *dinfo)
2762 {
2763 
2764 	if (bootverbose) {
2765 		pcicfgregs *cfg = &dinfo->cfg;
2766 
2767 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2768 		    cfg->vendor, cfg->device, cfg->revid);
2769 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2770 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2771 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2772 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2773 		    cfg->mfdev);
2774 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2775 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2776 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2777 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2778 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2779 		if (cfg->intpin > 0)
2780 			printf("\tintpin=%c, irq=%d\n",
2781 			    cfg->intpin +'a' -1, cfg->intline);
2782 		if (cfg->pp.pp_cap) {
2783 			uint16_t status;
2784 
2785 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2786 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2787 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2788 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2789 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2790 			    status & PCIM_PSTAT_DMASK);
2791 		}
2792 		if (cfg->msi.msi_location) {
2793 			int ctrl;
2794 
2795 			ctrl = cfg->msi.msi_ctrl;
2796 			printf("\tMSI supports %d message%s%s%s\n",
2797 			    cfg->msi.msi_msgnum,
2798 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2799 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2800 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2801 		}
2802 		if (cfg->msix.msix_location) {
2803 			printf("\tMSI-X supports %d message%s ",
2804 			    cfg->msix.msix_msgnum,
2805 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2806 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2807 				printf("in map 0x%x\n",
2808 				    cfg->msix.msix_table_bar);
2809 			else
2810 				printf("in maps 0x%x and 0x%x\n",
2811 				    cfg->msix.msix_table_bar,
2812 				    cfg->msix.msix_pba_bar);
2813 		}
2814 	}
2815 }
2816 
2817 static int
2818 pci_porten(device_t dev)
2819 {
2820 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2821 }
2822 
2823 static int
2824 pci_memen(device_t dev)
2825 {
2826 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2827 }
2828 
2829 void
2830 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp,
2831     int *bar64)
2832 {
2833 	struct pci_devinfo *dinfo;
2834 	pci_addr_t map, testval;
2835 	int ln2range;
2836 	uint16_t cmd;
2837 
2838 	/*
2839 	 * The device ROM BAR is special.  It is always a 32-bit
2840 	 * memory BAR.  Bit 0 is special and should not be set when
2841 	 * sizing the BAR.
2842 	 */
2843 	dinfo = device_get_ivars(dev);
2844 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2845 		map = pci_read_config(dev, reg, 4);
2846 		pci_write_config(dev, reg, 0xfffffffe, 4);
2847 		testval = pci_read_config(dev, reg, 4);
2848 		pci_write_config(dev, reg, map, 4);
2849 		*mapp = map;
2850 		*testvalp = testval;
2851 		if (bar64 != NULL)
2852 			*bar64 = 0;
2853 		return;
2854 	}
2855 
2856 	map = pci_read_config(dev, reg, 4);
2857 	ln2range = pci_maprange(map);
2858 	if (ln2range == 64)
2859 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2860 
2861 	/*
2862 	 * Disable decoding via the command register before
2863 	 * determining the BAR's length since we will be placing it in
2864 	 * a weird state.
2865 	 */
2866 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2867 	pci_write_config(dev, PCIR_COMMAND,
2868 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2869 
2870 	/*
2871 	 * Determine the BAR's length by writing all 1's.  The bottom
2872 	 * log_2(size) bits of the BAR will stick as 0 when we read
2873 	 * the value back.
2874 	 */
2875 	pci_write_config(dev, reg, 0xffffffff, 4);
2876 	testval = pci_read_config(dev, reg, 4);
2877 	if (ln2range == 64) {
2878 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2879 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2880 	}
2881 
2882 	/*
2883 	 * Restore the original value of the BAR.  We may have reprogrammed
2884 	 * the BAR of the low-level console device and when booting verbose,
2885 	 * we need the console device addressable.
2886 	 */
2887 	pci_write_config(dev, reg, map, 4);
2888 	if (ln2range == 64)
2889 		pci_write_config(dev, reg + 4, map >> 32, 4);
2890 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2891 
2892 	*mapp = map;
2893 	*testvalp = testval;
2894 	if (bar64 != NULL)
2895 		*bar64 = (ln2range == 64);
2896 }
2897 
2898 static void
2899 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2900 {
2901 	struct pci_devinfo *dinfo;
2902 	int ln2range;
2903 
2904 	/* The device ROM BAR is always a 32-bit memory BAR. */
2905 	dinfo = device_get_ivars(dev);
2906 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2907 		ln2range = 32;
2908 	else
2909 		ln2range = pci_maprange(pm->pm_value);
2910 	pci_write_config(dev, pm->pm_reg, base, 4);
2911 	if (ln2range == 64)
2912 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2913 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2914 	if (ln2range == 64)
2915 		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2916 		    pm->pm_reg + 4, 4) << 32;
2917 }
2918 
2919 struct pci_map *
2920 pci_find_bar(device_t dev, int reg)
2921 {
2922 	struct pci_devinfo *dinfo;
2923 	struct pci_map *pm;
2924 
2925 	dinfo = device_get_ivars(dev);
2926 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2927 		if (pm->pm_reg == reg)
2928 			return (pm);
2929 	}
2930 	return (NULL);
2931 }
2932 
2933 int
2934 pci_bar_enabled(device_t dev, struct pci_map *pm)
2935 {
2936 	struct pci_devinfo *dinfo;
2937 	uint16_t cmd;
2938 
2939 	dinfo = device_get_ivars(dev);
2940 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2941 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2942 		return (0);
2943 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2944 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2945 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2946 	else
2947 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2948 }
2949 
2950 struct pci_map *
2951 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2952 {
2953 	struct pci_devinfo *dinfo;
2954 	struct pci_map *pm, *prev;
2955 
2956 	dinfo = device_get_ivars(dev);
2957 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2958 	pm->pm_reg = reg;
2959 	pm->pm_value = value;
2960 	pm->pm_size = size;
2961 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2962 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2963 		    reg));
2964 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2965 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2966 			break;
2967 	}
2968 	if (prev != NULL)
2969 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2970 	else
2971 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2972 	return (pm);
2973 }
2974 
2975 static void
2976 pci_restore_bars(device_t dev)
2977 {
2978 	struct pci_devinfo *dinfo;
2979 	struct pci_map *pm;
2980 	int ln2range;
2981 
2982 	dinfo = device_get_ivars(dev);
2983 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2984 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2985 			ln2range = 32;
2986 		else
2987 			ln2range = pci_maprange(pm->pm_value);
2988 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2989 		if (ln2range == 64)
2990 			pci_write_config(dev, pm->pm_reg + 4,
2991 			    pm->pm_value >> 32, 4);
2992 	}
2993 }
2994 
2995 /*
2996  * Add a resource based on a pci map register. Return 1 if the map
2997  * register is a 32bit map register or 2 if it is a 64bit register.
2998  */
2999 static int
3000 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
3001     int force, int prefetch)
3002 {
3003 	struct pci_map *pm;
3004 	pci_addr_t base, map, testval;
3005 	pci_addr_t start, end, count;
3006 	int barlen, basezero, flags, maprange, mapsize, type;
3007 	uint16_t cmd;
3008 	struct resource *res;
3009 
3010 	/*
3011 	 * The BAR may already exist if the device is a CardBus card
3012 	 * whose CIS is stored in this BAR.
3013 	 */
3014 	pm = pci_find_bar(dev, reg);
3015 	if (pm != NULL) {
3016 		maprange = pci_maprange(pm->pm_value);
3017 		barlen = maprange == 64 ? 2 : 1;
3018 		return (barlen);
3019 	}
3020 
3021 	pci_read_bar(dev, reg, &map, &testval, NULL);
3022 	if (PCI_BAR_MEM(map)) {
3023 		type = SYS_RES_MEMORY;
3024 		if (map & PCIM_BAR_MEM_PREFETCH)
3025 			prefetch = 1;
3026 	} else
3027 		type = SYS_RES_IOPORT;
3028 	mapsize = pci_mapsize(testval);
3029 	base = pci_mapbase(map);
3030 #ifdef __PCI_BAR_ZERO_VALID
3031 	basezero = 0;
3032 #else
3033 	basezero = base == 0;
3034 #endif
3035 	maprange = pci_maprange(map);
3036 	barlen = maprange == 64 ? 2 : 1;
3037 
3038 	/*
3039 	 * For I/O registers, if bottom bit is set, and the next bit up
3040 	 * isn't clear, we know we have a BAR that doesn't conform to the
3041 	 * spec, so ignore it.  Also, sanity check the size of the data
3042 	 * areas to the type of memory involved.  Memory must be at least
3043 	 * 16 bytes in size, while I/O ranges must be at least 4.
3044 	 */
3045 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
3046 		return (barlen);
3047 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
3048 	    (type == SYS_RES_IOPORT && mapsize < 2))
3049 		return (barlen);
3050 
3051 	/* Save a record of this BAR. */
3052 	pm = pci_add_bar(dev, reg, map, mapsize);
3053 	if (bootverbose) {
3054 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
3055 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
3056 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
3057 			printf(", port disabled\n");
3058 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
3059 			printf(", memory disabled\n");
3060 		else
3061 			printf(", enabled\n");
3062 	}
3063 
3064 	/*
3065 	 * If base is 0, then we have problems if this architecture does
3066 	 * not allow that.  It is best to ignore such entries for the
3067 	 * moment.  These will be allocated later if the driver specifically
3068 	 * requests them.  However, some removable busses look better when
3069 	 * all resources are allocated, so allow '0' to be overriden.
3070 	 *
3071 	 * Similarly treat maps whose values is the same as the test value
3072 	 * read back.  These maps have had all f's written to them by the
3073 	 * BIOS in an attempt to disable the resources.
3074 	 */
3075 	if (!force && (basezero || map == testval))
3076 		return (barlen);
3077 	if ((u_long)base != base) {
3078 		device_printf(bus,
3079 		    "pci%d:%d:%d:%d bar %#x too many address bits",
3080 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
3081 		    pci_get_function(dev), reg);
3082 		return (barlen);
3083 	}
3084 
3085 	/*
3086 	 * This code theoretically does the right thing, but has
3087 	 * undesirable side effects in some cases where peripherals
3088 	 * respond oddly to having these bits enabled.  Let the user
3089 	 * be able to turn them off (since pci_enable_io_modes is 1 by
3090 	 * default).
3091 	 */
3092 	if (pci_enable_io_modes) {
3093 		/* Turn on resources that have been left off by a lazy BIOS */
3094 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
3095 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3096 			cmd |= PCIM_CMD_PORTEN;
3097 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3098 		}
3099 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
3100 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3101 			cmd |= PCIM_CMD_MEMEN;
3102 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3103 		}
3104 	} else {
3105 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
3106 			return (barlen);
3107 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
3108 			return (barlen);
3109 	}
3110 
3111 	count = (pci_addr_t)1 << mapsize;
3112 	flags = RF_ALIGNMENT_LOG2(mapsize);
3113 	if (prefetch)
3114 		flags |= RF_PREFETCHABLE;
3115 	if (basezero || base == pci_mapbase(testval) || pci_clear_bars) {
3116 		start = 0;	/* Let the parent decide. */
3117 		end = ~0;
3118 	} else {
3119 		start = base;
3120 		end = base + count - 1;
3121 	}
3122 	resource_list_add(rl, type, reg, start, end, count);
3123 
3124 	/*
3125 	 * Try to allocate the resource for this BAR from our parent
3126 	 * so that this resource range is already reserved.  The
3127 	 * driver for this device will later inherit this resource in
3128 	 * pci_alloc_resource().
3129 	 */
3130 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
3131 	    flags);
3132 	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0)) {
3133 		/*
3134 		 * If the allocation fails, try to allocate a resource for
3135 		 * this BAR using any available range.  The firmware felt
3136 		 * it was important enough to assign a resource, so don't
3137 		 * disable decoding if we can help it.
3138 		 */
3139 		resource_list_delete(rl, type, reg);
3140 		resource_list_add(rl, type, reg, 0, ~0, count);
3141 		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0,
3142 		    count, flags);
3143 	}
3144 	if (res == NULL) {
3145 		/*
3146 		 * If the allocation fails, delete the resource list entry
3147 		 * and disable decoding for this device.
3148 		 *
3149 		 * If the driver requests this resource in the future,
3150 		 * pci_reserve_map() will try to allocate a fresh
3151 		 * resource range.
3152 		 */
3153 		resource_list_delete(rl, type, reg);
3154 		pci_disable_io(dev, type);
3155 		if (bootverbose)
3156 			device_printf(bus,
3157 			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
3158 			    pci_get_domain(dev), pci_get_bus(dev),
3159 			    pci_get_slot(dev), pci_get_function(dev), reg);
3160 	} else {
3161 		start = rman_get_start(res);
3162 		pci_write_bar(dev, pm, start);
3163 	}
3164 	return (barlen);
3165 }
3166 
3167 /*
3168  * For ATA devices we need to decide early what addressing mode to use.
3169  * Legacy demands that the primary and secondary ATA ports sits on the
3170  * same addresses that old ISA hardware did. This dictates that we use
3171  * those addresses and ignore the BAR's if we cannot set PCI native
3172  * addressing mode.
3173  */
3174 static void
3175 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
3176     uint32_t prefetchmask)
3177 {
3178 	int rid, type, progif;
3179 #if 0
3180 	/* if this device supports PCI native addressing use it */
3181 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3182 	if ((progif & 0x8a) == 0x8a) {
3183 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
3184 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
3185 			printf("Trying ATA native PCI addressing mode\n");
3186 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
3187 		}
3188 	}
3189 #endif
3190 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3191 	type = SYS_RES_IOPORT;
3192 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
3193 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
3194 		    prefetchmask & (1 << 0));
3195 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
3196 		    prefetchmask & (1 << 1));
3197 	} else {
3198 		rid = PCIR_BAR(0);
3199 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
3200 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
3201 		    0x1f7, 8, 0);
3202 		rid = PCIR_BAR(1);
3203 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
3204 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
3205 		    0x3f6, 1, 0);
3206 	}
3207 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
3208 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
3209 		    prefetchmask & (1 << 2));
3210 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
3211 		    prefetchmask & (1 << 3));
3212 	} else {
3213 		rid = PCIR_BAR(2);
3214 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
3215 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
3216 		    0x177, 8, 0);
3217 		rid = PCIR_BAR(3);
3218 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
3219 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
3220 		    0x376, 1, 0);
3221 	}
3222 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
3223 	    prefetchmask & (1 << 4));
3224 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
3225 	    prefetchmask & (1 << 5));
3226 }
3227 
3228 static void
3229 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
3230 {
3231 	struct pci_devinfo *dinfo = device_get_ivars(dev);
3232 	pcicfgregs *cfg = &dinfo->cfg;
3233 	char tunable_name[64];
3234 	int irq;
3235 
3236 	/* Has to have an intpin to have an interrupt. */
3237 	if (cfg->intpin == 0)
3238 		return;
3239 
3240 	/* Let the user override the IRQ with a tunable. */
3241 	irq = PCI_INVALID_IRQ;
3242 	snprintf(tunable_name, sizeof(tunable_name),
3243 	    "hw.pci%d.%d.%d.INT%c.irq",
3244 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
3245 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
3246 		irq = PCI_INVALID_IRQ;
3247 
3248 	/*
3249 	 * If we didn't get an IRQ via the tunable, then we either use the
3250 	 * IRQ value in the intline register or we ask the bus to route an
3251 	 * interrupt for us.  If force_route is true, then we only use the
3252 	 * value in the intline register if the bus was unable to assign an
3253 	 * IRQ.
3254 	 */
3255 	if (!PCI_INTERRUPT_VALID(irq)) {
3256 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
3257 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
3258 		if (!PCI_INTERRUPT_VALID(irq))
3259 			irq = cfg->intline;
3260 	}
3261 
3262 	/* If after all that we don't have an IRQ, just bail. */
3263 	if (!PCI_INTERRUPT_VALID(irq))
3264 		return;
3265 
3266 	/* Update the config register if it changed. */
3267 	if (irq != cfg->intline) {
3268 		cfg->intline = irq;
3269 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
3270 	}
3271 
3272 	/* Add this IRQ as rid 0 interrupt resource. */
3273 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3274 }
3275 
3276 /* Perform early OHCI takeover from SMM. */
3277 static void
3278 ohci_early_takeover(device_t self)
3279 {
3280 	struct resource *res;
3281 	uint32_t ctl;
3282 	int rid;
3283 	int i;
3284 
3285 	rid = PCIR_BAR(0);
3286 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3287 	if (res == NULL)
3288 		return;
3289 
3290 	ctl = bus_read_4(res, OHCI_CONTROL);
3291 	if (ctl & OHCI_IR) {
3292 		if (bootverbose)
3293 			printf("ohci early: "
3294 			    "SMM active, request owner change\n");
3295 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3296 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3297 			DELAY(1000);
3298 			ctl = bus_read_4(res, OHCI_CONTROL);
3299 		}
3300 		if (ctl & OHCI_IR) {
3301 			if (bootverbose)
3302 				printf("ohci early: "
3303 				    "SMM does not respond, resetting\n");
3304 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3305 		}
3306 		/* Disable interrupts */
3307 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3308 	}
3309 
3310 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3311 }
3312 
3313 /* Perform early UHCI takeover from SMM. */
3314 static void
3315 uhci_early_takeover(device_t self)
3316 {
3317 	struct resource *res;
3318 	int rid;
3319 
3320 	/*
3321 	 * Set the PIRQD enable bit and switch off all the others. We don't
3322 	 * want legacy support to interfere with us XXX Does this also mean
3323 	 * that the BIOS won't touch the keyboard anymore if it is connected
3324 	 * to the ports of the root hub?
3325 	 */
3326 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3327 
3328 	/* Disable interrupts */
3329 	rid = PCI_UHCI_BASE_REG;
3330 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3331 	if (res != NULL) {
3332 		bus_write_2(res, UHCI_INTR, 0);
3333 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3334 	}
3335 }
3336 
3337 /* Perform early EHCI takeover from SMM. */
3338 static void
3339 ehci_early_takeover(device_t self)
3340 {
3341 	struct resource *res;
3342 	uint32_t cparams;
3343 	uint32_t eec;
3344 	uint8_t eecp;
3345 	uint8_t bios_sem;
3346 	uint8_t offs;
3347 	int rid;
3348 	int i;
3349 
3350 	rid = PCIR_BAR(0);
3351 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3352 	if (res == NULL)
3353 		return;
3354 
3355 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3356 
3357 	/* Synchronise with the BIOS if it owns the controller. */
3358 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3359 	    eecp = EHCI_EECP_NEXT(eec)) {
3360 		eec = pci_read_config(self, eecp, 4);
3361 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3362 			continue;
3363 		}
3364 		bios_sem = pci_read_config(self, eecp +
3365 		    EHCI_LEGSUP_BIOS_SEM, 1);
3366 		if (bios_sem == 0) {
3367 			continue;
3368 		}
3369 		if (bootverbose)
3370 			printf("ehci early: "
3371 			    "SMM active, request owner change\n");
3372 
3373 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3374 
3375 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3376 			DELAY(1000);
3377 			bios_sem = pci_read_config(self, eecp +
3378 			    EHCI_LEGSUP_BIOS_SEM, 1);
3379 		}
3380 
3381 		if (bios_sem != 0) {
3382 			if (bootverbose)
3383 				printf("ehci early: "
3384 				    "SMM does not respond\n");
3385 		}
3386 		/* Disable interrupts */
3387 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3388 		bus_write_4(res, offs + EHCI_USBINTR, 0);
3389 	}
3390 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3391 }
3392 
3393 /* Perform early XHCI takeover from SMM. */
3394 static void
3395 xhci_early_takeover(device_t self)
3396 {
3397 	struct resource *res;
3398 	uint32_t cparams;
3399 	uint32_t eec;
3400 	uint8_t eecp;
3401 	uint8_t bios_sem;
3402 	uint8_t offs;
3403 	int rid;
3404 	int i;
3405 
3406 	rid = PCIR_BAR(0);
3407 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3408 	if (res == NULL)
3409 		return;
3410 
3411 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3412 
3413 	eec = -1;
3414 
3415 	/* Synchronise with the BIOS if it owns the controller. */
3416 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3417 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3418 		eec = bus_read_4(res, eecp);
3419 
3420 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3421 			continue;
3422 
3423 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3424 		if (bios_sem == 0)
3425 			continue;
3426 
3427 		if (bootverbose)
3428 			printf("xhci early: "
3429 			    "SMM active, request owner change\n");
3430 
3431 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3432 
3433 		/* wait a maximum of 5 second */
3434 
3435 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3436 			DELAY(1000);
3437 			bios_sem = bus_read_1(res, eecp +
3438 			    XHCI_XECP_BIOS_SEM);
3439 		}
3440 
3441 		if (bios_sem != 0) {
3442 			if (bootverbose)
3443 				printf("xhci early: "
3444 				    "SMM does not respond\n");
3445 		}
3446 
3447 		/* Disable interrupts */
3448 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3449 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3450 		bus_read_4(res, offs + XHCI_USBSTS);
3451 	}
3452 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3453 }
3454 
3455 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3456 static void
3457 pci_reserve_secbus(device_t bus, device_t dev, pcicfgregs *cfg,
3458     struct resource_list *rl)
3459 {
3460 	struct resource *res;
3461 	char *cp;
3462 	rman_res_t start, end, count;
3463 	int rid, sec_bus, sec_reg, sub_bus, sub_reg, sup_bus;
3464 
3465 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3466 	case PCIM_HDRTYPE_BRIDGE:
3467 		sec_reg = PCIR_SECBUS_1;
3468 		sub_reg = PCIR_SUBBUS_1;
3469 		break;
3470 	case PCIM_HDRTYPE_CARDBUS:
3471 		sec_reg = PCIR_SECBUS_2;
3472 		sub_reg = PCIR_SUBBUS_2;
3473 		break;
3474 	default:
3475 		return;
3476 	}
3477 
3478 	/*
3479 	 * If the existing bus range is valid, attempt to reserve it
3480 	 * from our parent.  If this fails for any reason, clear the
3481 	 * secbus and subbus registers.
3482 	 *
3483 	 * XXX: Should we reset sub_bus to sec_bus if it is < sec_bus?
3484 	 * This would at least preserve the existing sec_bus if it is
3485 	 * valid.
3486 	 */
3487 	sec_bus = PCI_READ_CONFIG(bus, dev, sec_reg, 1);
3488 	sub_bus = PCI_READ_CONFIG(bus, dev, sub_reg, 1);
3489 
3490 	/* Quirk handling. */
3491 	switch (pci_get_devid(dev)) {
3492 	case 0x12258086:		/* Intel 82454KX/GX (Orion) */
3493 		sup_bus = pci_read_config(dev, 0x41, 1);
3494 		if (sup_bus != 0xff) {
3495 			sec_bus = sup_bus + 1;
3496 			sub_bus = sup_bus + 1;
3497 			PCI_WRITE_CONFIG(bus, dev, sec_reg, sec_bus, 1);
3498 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3499 		}
3500 		break;
3501 
3502 	case 0x00dd10de:
3503 		/* Compaq R3000 BIOS sets wrong subordinate bus number. */
3504 		if ((cp = kern_getenv("smbios.planar.maker")) == NULL)
3505 			break;
3506 		if (strncmp(cp, "Compal", 6) != 0) {
3507 			freeenv(cp);
3508 			break;
3509 		}
3510 		freeenv(cp);
3511 		if ((cp = kern_getenv("smbios.planar.product")) == NULL)
3512 			break;
3513 		if (strncmp(cp, "08A0", 4) != 0) {
3514 			freeenv(cp);
3515 			break;
3516 		}
3517 		freeenv(cp);
3518 		if (sub_bus < 0xa) {
3519 			sub_bus = 0xa;
3520 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3521 		}
3522 		break;
3523 	}
3524 
3525 	if (bootverbose)
3526 		printf("\tsecbus=%d, subbus=%d\n", sec_bus, sub_bus);
3527 	if (sec_bus > 0 && sub_bus >= sec_bus) {
3528 		start = sec_bus;
3529 		end = sub_bus;
3530 		count = end - start + 1;
3531 
3532 		resource_list_add(rl, PCI_RES_BUS, 0, 0, ~0, count);
3533 
3534 		/*
3535 		 * If requested, clear secondary bus registers in
3536 		 * bridge devices to force a complete renumbering
3537 		 * rather than reserving the existing range.  However,
3538 		 * preserve the existing size.
3539 		 */
3540 		if (pci_clear_buses)
3541 			goto clear;
3542 
3543 		rid = 0;
3544 		res = resource_list_reserve(rl, bus, dev, PCI_RES_BUS, &rid,
3545 		    start, end, count, 0);
3546 		if (res != NULL)
3547 			return;
3548 
3549 		if (bootverbose)
3550 			device_printf(bus,
3551 			    "pci%d:%d:%d:%d secbus failed to allocate\n",
3552 			    pci_get_domain(dev), pci_get_bus(dev),
3553 			    pci_get_slot(dev), pci_get_function(dev));
3554 	}
3555 
3556 clear:
3557 	PCI_WRITE_CONFIG(bus, dev, sec_reg, 0, 1);
3558 	PCI_WRITE_CONFIG(bus, dev, sub_reg, 0, 1);
3559 }
3560 
3561 static struct resource *
3562 pci_alloc_secbus(device_t dev, device_t child, int *rid, rman_res_t start,
3563     rman_res_t end, rman_res_t count, u_int flags)
3564 {
3565 	struct pci_devinfo *dinfo;
3566 	pcicfgregs *cfg;
3567 	struct resource_list *rl;
3568 	struct resource *res;
3569 	int sec_reg, sub_reg;
3570 
3571 	dinfo = device_get_ivars(child);
3572 	cfg = &dinfo->cfg;
3573 	rl = &dinfo->resources;
3574 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3575 	case PCIM_HDRTYPE_BRIDGE:
3576 		sec_reg = PCIR_SECBUS_1;
3577 		sub_reg = PCIR_SUBBUS_1;
3578 		break;
3579 	case PCIM_HDRTYPE_CARDBUS:
3580 		sec_reg = PCIR_SECBUS_2;
3581 		sub_reg = PCIR_SUBBUS_2;
3582 		break;
3583 	default:
3584 		return (NULL);
3585 	}
3586 
3587 	if (*rid != 0)
3588 		return (NULL);
3589 
3590 	if (resource_list_find(rl, PCI_RES_BUS, *rid) == NULL)
3591 		resource_list_add(rl, PCI_RES_BUS, *rid, start, end, count);
3592 	if (!resource_list_reserved(rl, PCI_RES_BUS, *rid)) {
3593 		res = resource_list_reserve(rl, dev, child, PCI_RES_BUS, rid,
3594 		    start, end, count, flags & ~RF_ACTIVE);
3595 		if (res == NULL) {
3596 			resource_list_delete(rl, PCI_RES_BUS, *rid);
3597 			device_printf(child, "allocating %ju bus%s failed\n",
3598 			    count, count == 1 ? "" : "es");
3599 			return (NULL);
3600 		}
3601 		if (bootverbose)
3602 			device_printf(child,
3603 			    "Lazy allocation of %ju bus%s at %ju\n", count,
3604 			    count == 1 ? "" : "es", rman_get_start(res));
3605 		PCI_WRITE_CONFIG(dev, child, sec_reg, rman_get_start(res), 1);
3606 		PCI_WRITE_CONFIG(dev, child, sub_reg, rman_get_end(res), 1);
3607 	}
3608 	return (resource_list_alloc(rl, dev, child, PCI_RES_BUS, rid, start,
3609 	    end, count, flags));
3610 }
3611 #endif
3612 
3613 static int
3614 pci_ea_bei_to_rid(device_t dev, int bei)
3615 {
3616 #ifdef PCI_IOV
3617 	struct pci_devinfo *dinfo;
3618 	int iov_pos;
3619 	struct pcicfg_iov *iov;
3620 
3621 	dinfo = device_get_ivars(dev);
3622 	iov = dinfo->cfg.iov;
3623 	if (iov != NULL)
3624 		iov_pos = iov->iov_pos;
3625 	else
3626 		iov_pos = 0;
3627 #endif
3628 
3629 	/* Check if matches BAR */
3630 	if ((bei >= PCIM_EA_BEI_BAR_0) &&
3631 	    (bei <= PCIM_EA_BEI_BAR_5))
3632 		return (PCIR_BAR(bei));
3633 
3634 	/* Check ROM */
3635 	if (bei == PCIM_EA_BEI_ROM)
3636 		return (PCIR_BIOS);
3637 
3638 #ifdef PCI_IOV
3639 	/* Check if matches VF_BAR */
3640 	if ((iov != NULL) && (bei >= PCIM_EA_BEI_VF_BAR_0) &&
3641 	    (bei <= PCIM_EA_BEI_VF_BAR_5))
3642 		return (PCIR_SRIOV_BAR(bei - PCIM_EA_BEI_VF_BAR_0) +
3643 		    iov_pos);
3644 #endif
3645 
3646 	return (-1);
3647 }
3648 
3649 int
3650 pci_ea_is_enabled(device_t dev, int rid)
3651 {
3652 	struct pci_ea_entry *ea;
3653 	struct pci_devinfo *dinfo;
3654 
3655 	dinfo = device_get_ivars(dev);
3656 
3657 	STAILQ_FOREACH(ea, &dinfo->cfg.ea.ea_entries, eae_link) {
3658 		if (pci_ea_bei_to_rid(dev, ea->eae_bei) == rid)
3659 			return ((ea->eae_flags & PCIM_EA_ENABLE) > 0);
3660 	}
3661 
3662 	return (0);
3663 }
3664 
3665 void
3666 pci_add_resources_ea(device_t bus, device_t dev, int alloc_iov)
3667 {
3668 	struct pci_ea_entry *ea;
3669 	struct pci_devinfo *dinfo;
3670 	pci_addr_t start, end, count;
3671 	struct resource_list *rl;
3672 	int type, flags, rid;
3673 	struct resource *res;
3674 	uint32_t tmp;
3675 #ifdef PCI_IOV
3676 	struct pcicfg_iov *iov;
3677 #endif
3678 
3679 	dinfo = device_get_ivars(dev);
3680 	rl = &dinfo->resources;
3681 	flags = 0;
3682 
3683 #ifdef PCI_IOV
3684 	iov = dinfo->cfg.iov;
3685 #endif
3686 
3687 	if (dinfo->cfg.ea.ea_location == 0)
3688 		return;
3689 
3690 	STAILQ_FOREACH(ea, &dinfo->cfg.ea.ea_entries, eae_link) {
3691 
3692 		/*
3693 		 * TODO: Ignore EA-BAR if is not enabled.
3694 		 *   Currently the EA implementation supports
3695 		 *   only situation, where EA structure contains
3696 		 *   predefined entries. In case they are not enabled
3697 		 *   leave them unallocated and proceed with
3698 		 *   a legacy-BAR mechanism.
3699 		 */
3700 		if ((ea->eae_flags & PCIM_EA_ENABLE) == 0)
3701 			continue;
3702 
3703 		switch ((ea->eae_flags & PCIM_EA_PP) >> PCIM_EA_PP_OFFSET) {
3704 		case PCIM_EA_P_MEM_PREFETCH:
3705 		case PCIM_EA_P_VF_MEM_PREFETCH:
3706 			flags = RF_PREFETCHABLE;
3707 			/* FALLTHROUGH */
3708 		case PCIM_EA_P_VF_MEM:
3709 		case PCIM_EA_P_MEM:
3710 			type = SYS_RES_MEMORY;
3711 			break;
3712 		case PCIM_EA_P_IO:
3713 			type = SYS_RES_IOPORT;
3714 			break;
3715 		default:
3716 			continue;
3717 		}
3718 
3719 		if (alloc_iov != 0) {
3720 #ifdef PCI_IOV
3721 			/* Allocating IOV, confirm BEI matches */
3722 			if ((ea->eae_bei < PCIM_EA_BEI_VF_BAR_0) ||
3723 			    (ea->eae_bei > PCIM_EA_BEI_VF_BAR_5))
3724 				continue;
3725 #else
3726 			continue;
3727 #endif
3728 		} else {
3729 			/* Allocating BAR, confirm BEI matches */
3730 			if (((ea->eae_bei < PCIM_EA_BEI_BAR_0) ||
3731 			    (ea->eae_bei > PCIM_EA_BEI_BAR_5)) &&
3732 			    (ea->eae_bei != PCIM_EA_BEI_ROM))
3733 				continue;
3734 		}
3735 
3736 		rid = pci_ea_bei_to_rid(dev, ea->eae_bei);
3737 		if (rid < 0)
3738 			continue;
3739 
3740 		/* Skip resources already allocated by EA */
3741 		if ((resource_list_find(rl, SYS_RES_MEMORY, rid) != NULL) ||
3742 		    (resource_list_find(rl, SYS_RES_IOPORT, rid) != NULL))
3743 			continue;
3744 
3745 		start = ea->eae_base;
3746 		count = ea->eae_max_offset + 1;
3747 #ifdef PCI_IOV
3748 		if (iov != NULL)
3749 			count = count * iov->iov_num_vfs;
3750 #endif
3751 		end = start + count - 1;
3752 		if (count == 0)
3753 			continue;
3754 
3755 		resource_list_add(rl, type, rid, start, end, count);
3756 		res = resource_list_reserve(rl, bus, dev, type, &rid, start, end, count,
3757 		    flags);
3758 		if (res == NULL) {
3759 			resource_list_delete(rl, type, rid);
3760 
3761 			/*
3762 			 * Failed to allocate using EA, disable entry.
3763 			 * Another attempt to allocation will be performed
3764 			 * further, but this time using legacy BAR registers
3765 			 */
3766 			tmp = pci_read_config(dev, ea->eae_cfg_offset, 4);
3767 			tmp &= ~PCIM_EA_ENABLE;
3768 			pci_write_config(dev, ea->eae_cfg_offset, tmp, 4);
3769 
3770 			/*
3771 			 * Disabling entry might fail in case it is hardwired.
3772 			 * Read flags again to match current status.
3773 			 */
3774 			ea->eae_flags = pci_read_config(dev, ea->eae_cfg_offset, 4);
3775 
3776 			continue;
3777 		}
3778 
3779 		/* As per specification, fill BAR with zeros */
3780 		pci_write_config(dev, rid, 0, 4);
3781 	}
3782 }
3783 
3784 void
3785 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3786 {
3787 	struct pci_devinfo *dinfo;
3788 	pcicfgregs *cfg;
3789 	struct resource_list *rl;
3790 	const struct pci_quirk *q;
3791 	uint32_t devid;
3792 	int i;
3793 
3794 	dinfo = device_get_ivars(dev);
3795 	cfg = &dinfo->cfg;
3796 	rl = &dinfo->resources;
3797 	devid = (cfg->device << 16) | cfg->vendor;
3798 
3799 	/* Allocate resources using Enhanced Allocation */
3800 	pci_add_resources_ea(bus, dev, 0);
3801 
3802 	/* ATA devices needs special map treatment */
3803 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3804 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3805 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3806 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3807 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3808 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3809 	else
3810 		for (i = 0; i < cfg->nummaps;) {
3811 			/* Skip resources already managed by EA */
3812 			if ((resource_list_find(rl, SYS_RES_MEMORY, PCIR_BAR(i)) != NULL) ||
3813 			    (resource_list_find(rl, SYS_RES_IOPORT, PCIR_BAR(i)) != NULL) ||
3814 			    pci_ea_is_enabled(dev, PCIR_BAR(i))) {
3815 				i++;
3816 				continue;
3817 			}
3818 
3819 			/*
3820 			 * Skip quirked resources.
3821 			 */
3822 			for (q = &pci_quirks[0]; q->devid != 0; q++)
3823 				if (q->devid == devid &&
3824 				    q->type == PCI_QUIRK_UNMAP_REG &&
3825 				    q->arg1 == PCIR_BAR(i))
3826 					break;
3827 			if (q->devid != 0) {
3828 				i++;
3829 				continue;
3830 			}
3831 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3832 			    prefetchmask & (1 << i));
3833 		}
3834 
3835 	/*
3836 	 * Add additional, quirked resources.
3837 	 */
3838 	for (q = &pci_quirks[0]; q->devid != 0; q++)
3839 		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3840 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3841 
3842 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3843 #ifdef __PCI_REROUTE_INTERRUPT
3844 		/*
3845 		 * Try to re-route interrupts. Sometimes the BIOS or
3846 		 * firmware may leave bogus values in these registers.
3847 		 * If the re-route fails, then just stick with what we
3848 		 * have.
3849 		 */
3850 		pci_assign_interrupt(bus, dev, 1);
3851 #else
3852 		pci_assign_interrupt(bus, dev, 0);
3853 #endif
3854 	}
3855 
3856 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3857 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3858 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3859 			xhci_early_takeover(dev);
3860 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3861 			ehci_early_takeover(dev);
3862 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3863 			ohci_early_takeover(dev);
3864 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3865 			uhci_early_takeover(dev);
3866 	}
3867 
3868 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3869 	/*
3870 	 * Reserve resources for secondary bus ranges behind bridge
3871 	 * devices.
3872 	 */
3873 	pci_reserve_secbus(bus, dev, cfg, rl);
3874 #endif
3875 }
3876 
3877 static struct pci_devinfo *
3878 pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
3879     int slot, int func)
3880 {
3881 	struct pci_devinfo *dinfo;
3882 
3883 	dinfo = pci_read_device(pcib, dev, domain, busno, slot, func);
3884 	if (dinfo != NULL)
3885 		pci_add_child(dev, dinfo);
3886 
3887 	return (dinfo);
3888 }
3889 
3890 void
3891 pci_add_children(device_t dev, int domain, int busno)
3892 {
3893 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3894 	device_t pcib = device_get_parent(dev);
3895 	struct pci_devinfo *dinfo;
3896 	int maxslots;
3897 	int s, f, pcifunchigh;
3898 	uint8_t hdrtype;
3899 	int first_func;
3900 
3901 	/*
3902 	 * Try to detect a device at slot 0, function 0.  If it exists, try to
3903 	 * enable ARI.  We must enable ARI before detecting the rest of the
3904 	 * functions on this bus as ARI changes the set of slots and functions
3905 	 * that are legal on this bus.
3906 	 */
3907 	dinfo = pci_identify_function(pcib, dev, domain, busno, 0, 0);
3908 	if (dinfo != NULL && pci_enable_ari)
3909 		PCIB_TRY_ENABLE_ARI(pcib, dinfo->cfg.dev);
3910 
3911 	/*
3912 	 * Start looking for new devices on slot 0 at function 1 because we
3913 	 * just identified the device at slot 0, function 0.
3914 	 */
3915 	first_func = 1;
3916 
3917 	maxslots = PCIB_MAXSLOTS(pcib);
3918 	for (s = 0; s <= maxslots; s++, first_func = 0) {
3919 		pcifunchigh = 0;
3920 		f = 0;
3921 		DELAY(1);
3922 		hdrtype = REG(PCIR_HDRTYPE, 1);
3923 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3924 			continue;
3925 		if (hdrtype & PCIM_MFDEV)
3926 			pcifunchigh = PCIB_MAXFUNCS(pcib);
3927 		for (f = first_func; f <= pcifunchigh; f++)
3928 			pci_identify_function(pcib, dev, domain, busno, s, f);
3929 	}
3930 #undef REG
3931 }
3932 
3933 int
3934 pci_rescan_method(device_t dev)
3935 {
3936 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3937 	device_t pcib = device_get_parent(dev);
3938 	struct pci_softc *sc;
3939 	device_t child, *devlist, *unchanged;
3940 	int devcount, error, i, j, maxslots, oldcount;
3941 	int busno, domain, s, f, pcifunchigh;
3942 	uint8_t hdrtype;
3943 
3944 	/* No need to check for ARI on a rescan. */
3945 	error = device_get_children(dev, &devlist, &devcount);
3946 	if (error)
3947 		return (error);
3948 	if (devcount != 0) {
3949 		unchanged = malloc(devcount * sizeof(device_t), M_TEMP,
3950 		    M_NOWAIT | M_ZERO);
3951 		if (unchanged == NULL) {
3952 			free(devlist, M_TEMP);
3953 			return (ENOMEM);
3954 		}
3955 	} else
3956 		unchanged = NULL;
3957 
3958 	sc = device_get_softc(dev);
3959 	domain = pcib_get_domain(dev);
3960 	busno = pcib_get_bus(dev);
3961 	maxslots = PCIB_MAXSLOTS(pcib);
3962 	for (s = 0; s <= maxslots; s++) {
3963 		/* If function 0 is not present, skip to the next slot. */
3964 		f = 0;
3965 		if (REG(PCIR_VENDOR, 2) == 0xffff)
3966 			continue;
3967 		pcifunchigh = 0;
3968 		hdrtype = REG(PCIR_HDRTYPE, 1);
3969 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3970 			continue;
3971 		if (hdrtype & PCIM_MFDEV)
3972 			pcifunchigh = PCIB_MAXFUNCS(pcib);
3973 		for (f = 0; f <= pcifunchigh; f++) {
3974 			if (REG(PCIR_VENDOR, 2) == 0xfff)
3975 				continue;
3976 
3977 			/*
3978 			 * Found a valid function.  Check if a
3979 			 * device_t for this device already exists.
3980 			 */
3981 			for (i = 0; i < devcount; i++) {
3982 				child = devlist[i];
3983 				if (child == NULL)
3984 					continue;
3985 				if (pci_get_slot(child) == s &&
3986 				    pci_get_function(child) == f) {
3987 					unchanged[i] = child;
3988 					goto next_func;
3989 				}
3990 			}
3991 
3992 			pci_identify_function(pcib, dev, domain, busno, s, f);
3993 		next_func:;
3994 		}
3995 	}
3996 
3997 	/* Remove devices that are no longer present. */
3998 	for (i = 0; i < devcount; i++) {
3999 		if (unchanged[i] != NULL)
4000 			continue;
4001 		device_delete_child(dev, devlist[i]);
4002 	}
4003 
4004 	free(devlist, M_TEMP);
4005 	oldcount = devcount;
4006 
4007 	/* Try to attach the devices just added. */
4008 	error = device_get_children(dev, &devlist, &devcount);
4009 	if (error) {
4010 		free(unchanged, M_TEMP);
4011 		return (error);
4012 	}
4013 
4014 	for (i = 0; i < devcount; i++) {
4015 		for (j = 0; j < oldcount; j++) {
4016 			if (devlist[i] == unchanged[j])
4017 				goto next_device;
4018 		}
4019 
4020 		device_probe_and_attach(devlist[i]);
4021 	next_device:;
4022 	}
4023 
4024 	free(unchanged, M_TEMP);
4025 	free(devlist, M_TEMP);
4026 	return (0);
4027 #undef REG
4028 }
4029 
4030 #ifdef PCI_IOV
4031 device_t
4032 pci_add_iov_child(device_t bus, device_t pf, uint16_t rid, uint16_t vid,
4033     uint16_t did)
4034 {
4035 	struct pci_devinfo *pf_dinfo, *vf_dinfo;
4036 	device_t pcib;
4037 	int busno, slot, func;
4038 
4039 	pf_dinfo = device_get_ivars(pf);
4040 
4041 	pcib = device_get_parent(bus);
4042 
4043 	PCIB_DECODE_RID(pcib, rid, &busno, &slot, &func);
4044 
4045 	vf_dinfo = pci_fill_devinfo(pcib, bus, pci_get_domain(pcib), busno,
4046 	    slot, func, vid, did);
4047 
4048 	vf_dinfo->cfg.flags |= PCICFG_VF;
4049 	pci_add_child(bus, vf_dinfo);
4050 
4051 	return (vf_dinfo->cfg.dev);
4052 }
4053 
4054 device_t
4055 pci_create_iov_child_method(device_t bus, device_t pf, uint16_t rid,
4056     uint16_t vid, uint16_t did)
4057 {
4058 
4059 	return (pci_add_iov_child(bus, pf, rid, vid, did));
4060 }
4061 #endif
4062 
4063 void
4064 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
4065 {
4066 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
4067 	device_set_ivars(dinfo->cfg.dev, dinfo);
4068 	resource_list_init(&dinfo->resources);
4069 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
4070 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
4071 	pci_print_verbose(dinfo);
4072 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
4073 	pci_child_added(dinfo->cfg.dev);
4074 	EVENTHANDLER_INVOKE(pci_add_device, dinfo->cfg.dev);
4075 }
4076 
4077 void
4078 pci_child_added_method(device_t dev, device_t child)
4079 {
4080 
4081 }
4082 
4083 static int
4084 pci_probe(device_t dev)
4085 {
4086 
4087 	device_set_desc(dev, "PCI bus");
4088 
4089 	/* Allow other subclasses to override this driver. */
4090 	return (BUS_PROBE_GENERIC);
4091 }
4092 
4093 int
4094 pci_attach_common(device_t dev)
4095 {
4096 	struct pci_softc *sc;
4097 	int busno, domain;
4098 #ifdef PCI_DMA_BOUNDARY
4099 	int error, tag_valid;
4100 #endif
4101 #ifdef PCI_RES_BUS
4102 	int rid;
4103 #endif
4104 
4105 	sc = device_get_softc(dev);
4106 	domain = pcib_get_domain(dev);
4107 	busno = pcib_get_bus(dev);
4108 #ifdef PCI_RES_BUS
4109 	rid = 0;
4110 	sc->sc_bus = bus_alloc_resource(dev, PCI_RES_BUS, &rid, busno, busno,
4111 	    1, 0);
4112 	if (sc->sc_bus == NULL) {
4113 		device_printf(dev, "failed to allocate bus number\n");
4114 		return (ENXIO);
4115 	}
4116 #endif
4117 	if (bootverbose)
4118 		device_printf(dev, "domain=%d, physical bus=%d\n",
4119 		    domain, busno);
4120 #ifdef PCI_DMA_BOUNDARY
4121 	tag_valid = 0;
4122 	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
4123 	    devclass_find("pci")) {
4124 		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
4125 		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
4126 		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
4127 		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
4128 		if (error)
4129 			device_printf(dev, "Failed to create DMA tag: %d\n",
4130 			    error);
4131 		else
4132 			tag_valid = 1;
4133 	}
4134 	if (!tag_valid)
4135 #endif
4136 		sc->sc_dma_tag = bus_get_dma_tag(dev);
4137 	return (0);
4138 }
4139 
4140 static int
4141 pci_attach(device_t dev)
4142 {
4143 	int busno, domain, error;
4144 
4145 	error = pci_attach_common(dev);
4146 	if (error)
4147 		return (error);
4148 
4149 	/*
4150 	 * Since there can be multiple independently numbered PCI
4151 	 * busses on systems with multiple PCI domains, we can't use
4152 	 * the unit number to decide which bus we are probing. We ask
4153 	 * the parent pcib what our domain and bus numbers are.
4154 	 */
4155 	domain = pcib_get_domain(dev);
4156 	busno = pcib_get_bus(dev);
4157 	pci_add_children(dev, domain, busno);
4158 	return (bus_generic_attach(dev));
4159 }
4160 
4161 static int
4162 pci_detach(device_t dev)
4163 {
4164 #ifdef PCI_RES_BUS
4165 	struct pci_softc *sc;
4166 #endif
4167 	int error;
4168 
4169 	error = bus_generic_detach(dev);
4170 	if (error)
4171 		return (error);
4172 #ifdef PCI_RES_BUS
4173 	sc = device_get_softc(dev);
4174 	error = bus_release_resource(dev, PCI_RES_BUS, 0, sc->sc_bus);
4175 	if (error)
4176 		return (error);
4177 #endif
4178 	return (device_delete_children(dev));
4179 }
4180 
4181 static void
4182 pci_set_power_child(device_t dev, device_t child, int state)
4183 {
4184 	device_t pcib;
4185 	int dstate;
4186 
4187 	/*
4188 	 * Set the device to the given state.  If the firmware suggests
4189 	 * a different power state, use it instead.  If power management
4190 	 * is not present, the firmware is responsible for managing
4191 	 * device power.  Skip children who aren't attached since they
4192 	 * are handled separately.
4193 	 */
4194 	pcib = device_get_parent(dev);
4195 	dstate = state;
4196 	if (device_is_attached(child) &&
4197 	    PCIB_POWER_FOR_SLEEP(pcib, child, &dstate) == 0)
4198 		pci_set_powerstate(child, dstate);
4199 }
4200 
4201 int
4202 pci_suspend_child(device_t dev, device_t child)
4203 {
4204 	struct pci_devinfo *dinfo;
4205 	int error;
4206 
4207 	dinfo = device_get_ivars(child);
4208 
4209 	/*
4210 	 * Save the PCI configuration space for the child and set the
4211 	 * device in the appropriate power state for this sleep state.
4212 	 */
4213 	pci_cfg_save(child, dinfo, 0);
4214 
4215 	/* Suspend devices before potentially powering them down. */
4216 	error = bus_generic_suspend_child(dev, child);
4217 
4218 	if (error)
4219 		return (error);
4220 
4221 	if (pci_do_power_suspend)
4222 		pci_set_power_child(dev, child, PCI_POWERSTATE_D3);
4223 
4224 	return (0);
4225 }
4226 
4227 int
4228 pci_resume_child(device_t dev, device_t child)
4229 {
4230 	struct pci_devinfo *dinfo;
4231 
4232 	if (pci_do_power_resume)
4233 		pci_set_power_child(dev, child, PCI_POWERSTATE_D0);
4234 
4235 	dinfo = device_get_ivars(child);
4236 	pci_cfg_restore(child, dinfo);
4237 	if (!device_is_attached(child))
4238 		pci_cfg_save(child, dinfo, 1);
4239 
4240 	bus_generic_resume_child(dev, child);
4241 
4242 	return (0);
4243 }
4244 
4245 int
4246 pci_resume(device_t dev)
4247 {
4248 	device_t child, *devlist;
4249 	int error, i, numdevs;
4250 
4251 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
4252 		return (error);
4253 
4254 	/*
4255 	 * Resume critical devices first, then everything else later.
4256 	 */
4257 	for (i = 0; i < numdevs; i++) {
4258 		child = devlist[i];
4259 		switch (pci_get_class(child)) {
4260 		case PCIC_DISPLAY:
4261 		case PCIC_MEMORY:
4262 		case PCIC_BRIDGE:
4263 		case PCIC_BASEPERIPH:
4264 			BUS_RESUME_CHILD(dev, child);
4265 			break;
4266 		}
4267 	}
4268 	for (i = 0; i < numdevs; i++) {
4269 		child = devlist[i];
4270 		switch (pci_get_class(child)) {
4271 		case PCIC_DISPLAY:
4272 		case PCIC_MEMORY:
4273 		case PCIC_BRIDGE:
4274 		case PCIC_BASEPERIPH:
4275 			break;
4276 		default:
4277 			BUS_RESUME_CHILD(dev, child);
4278 		}
4279 	}
4280 	free(devlist, M_TEMP);
4281 	return (0);
4282 }
4283 
4284 static void
4285 pci_load_vendor_data(void)
4286 {
4287 	caddr_t data;
4288 	void *ptr;
4289 	size_t sz;
4290 
4291 	data = preload_search_by_type("pci_vendor_data");
4292 	if (data != NULL) {
4293 		ptr = preload_fetch_addr(data);
4294 		sz = preload_fetch_size(data);
4295 		if (ptr != NULL && sz != 0) {
4296 			pci_vendordata = ptr;
4297 			pci_vendordata_size = sz;
4298 			/* terminate the database */
4299 			pci_vendordata[pci_vendordata_size] = '\n';
4300 		}
4301 	}
4302 }
4303 
4304 void
4305 pci_driver_added(device_t dev, driver_t *driver)
4306 {
4307 	int numdevs;
4308 	device_t *devlist;
4309 	device_t child;
4310 	struct pci_devinfo *dinfo;
4311 	int i;
4312 
4313 	if (bootverbose)
4314 		device_printf(dev, "driver added\n");
4315 	DEVICE_IDENTIFY(driver, dev);
4316 	if (device_get_children(dev, &devlist, &numdevs) != 0)
4317 		return;
4318 	for (i = 0; i < numdevs; i++) {
4319 		child = devlist[i];
4320 		if (device_get_state(child) != DS_NOTPRESENT)
4321 			continue;
4322 		dinfo = device_get_ivars(child);
4323 		pci_print_verbose(dinfo);
4324 		if (bootverbose)
4325 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
4326 		pci_cfg_restore(child, dinfo);
4327 		if (device_probe_and_attach(child) != 0)
4328 			pci_child_detached(dev, child);
4329 	}
4330 	free(devlist, M_TEMP);
4331 }
4332 
4333 int
4334 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
4335     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
4336 {
4337 	struct pci_devinfo *dinfo;
4338 	struct msix_table_entry *mte;
4339 	struct msix_vector *mv;
4340 	uint64_t addr;
4341 	uint32_t data;
4342 	void *cookie;
4343 	int error, rid;
4344 
4345 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
4346 	    arg, &cookie);
4347 	if (error)
4348 		return (error);
4349 
4350 	/* If this is not a direct child, just bail out. */
4351 	if (device_get_parent(child) != dev) {
4352 		*cookiep = cookie;
4353 		return(0);
4354 	}
4355 
4356 	rid = rman_get_rid(irq);
4357 	if (rid == 0) {
4358 		/* Make sure that INTx is enabled */
4359 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
4360 	} else {
4361 		/*
4362 		 * Check to see if the interrupt is MSI or MSI-X.
4363 		 * Ask our parent to map the MSI and give
4364 		 * us the address and data register values.
4365 		 * If we fail for some reason, teardown the
4366 		 * interrupt handler.
4367 		 */
4368 		dinfo = device_get_ivars(child);
4369 		if (dinfo->cfg.msi.msi_alloc > 0) {
4370 			if (dinfo->cfg.msi.msi_addr == 0) {
4371 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
4372 			    ("MSI has handlers, but vectors not mapped"));
4373 				error = PCIB_MAP_MSI(device_get_parent(dev),
4374 				    child, rman_get_start(irq), &addr, &data);
4375 				if (error)
4376 					goto bad;
4377 				dinfo->cfg.msi.msi_addr = addr;
4378 				dinfo->cfg.msi.msi_data = data;
4379 			}
4380 			if (dinfo->cfg.msi.msi_handlers == 0)
4381 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
4382 				    dinfo->cfg.msi.msi_data);
4383 			dinfo->cfg.msi.msi_handlers++;
4384 		} else {
4385 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4386 			    ("No MSI or MSI-X interrupts allocated"));
4387 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4388 			    ("MSI-X index too high"));
4389 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4390 			KASSERT(mte->mte_vector != 0, ("no message vector"));
4391 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
4392 			KASSERT(mv->mv_irq == rman_get_start(irq),
4393 			    ("IRQ mismatch"));
4394 			if (mv->mv_address == 0) {
4395 				KASSERT(mte->mte_handlers == 0,
4396 		    ("MSI-X table entry has handlers, but vector not mapped"));
4397 				error = PCIB_MAP_MSI(device_get_parent(dev),
4398 				    child, rman_get_start(irq), &addr, &data);
4399 				if (error)
4400 					goto bad;
4401 				mv->mv_address = addr;
4402 				mv->mv_data = data;
4403 			}
4404 			if (mte->mte_handlers == 0) {
4405 				pci_enable_msix(child, rid - 1, mv->mv_address,
4406 				    mv->mv_data);
4407 				pci_unmask_msix(child, rid - 1);
4408 			}
4409 			mte->mte_handlers++;
4410 		}
4411 
4412 		/*
4413 		 * Make sure that INTx is disabled if we are using MSI/MSI-X,
4414 		 * unless the device is affected by PCI_QUIRK_MSI_INTX_BUG,
4415 		 * in which case we "enable" INTx so MSI/MSI-X actually works.
4416 		 */
4417 		if (!pci_has_quirk(pci_get_devid(child),
4418 		    PCI_QUIRK_MSI_INTX_BUG))
4419 			pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4420 		else
4421 			pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
4422 	bad:
4423 		if (error) {
4424 			(void)bus_generic_teardown_intr(dev, child, irq,
4425 			    cookie);
4426 			return (error);
4427 		}
4428 	}
4429 	*cookiep = cookie;
4430 	return (0);
4431 }
4432 
4433 int
4434 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
4435     void *cookie)
4436 {
4437 	struct msix_table_entry *mte;
4438 	struct resource_list_entry *rle;
4439 	struct pci_devinfo *dinfo;
4440 	int error, rid;
4441 
4442 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
4443 		return (EINVAL);
4444 
4445 	/* If this isn't a direct child, just bail out */
4446 	if (device_get_parent(child) != dev)
4447 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
4448 
4449 	rid = rman_get_rid(irq);
4450 	if (rid == 0) {
4451 		/* Mask INTx */
4452 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4453 	} else {
4454 		/*
4455 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
4456 		 * decrement the appropriate handlers count and mask the
4457 		 * MSI-X message, or disable MSI messages if the count
4458 		 * drops to 0.
4459 		 */
4460 		dinfo = device_get_ivars(child);
4461 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
4462 		if (rle->res != irq)
4463 			return (EINVAL);
4464 		if (dinfo->cfg.msi.msi_alloc > 0) {
4465 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
4466 			    ("MSI-X index too high"));
4467 			if (dinfo->cfg.msi.msi_handlers == 0)
4468 				return (EINVAL);
4469 			dinfo->cfg.msi.msi_handlers--;
4470 			if (dinfo->cfg.msi.msi_handlers == 0)
4471 				pci_disable_msi(child);
4472 		} else {
4473 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4474 			    ("No MSI or MSI-X interrupts allocated"));
4475 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4476 			    ("MSI-X index too high"));
4477 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4478 			if (mte->mte_handlers == 0)
4479 				return (EINVAL);
4480 			mte->mte_handlers--;
4481 			if (mte->mte_handlers == 0)
4482 				pci_mask_msix(child, rid - 1);
4483 		}
4484 	}
4485 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
4486 	if (rid > 0)
4487 		KASSERT(error == 0,
4488 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
4489 	return (error);
4490 }
4491 
4492 int
4493 pci_print_child(device_t dev, device_t child)
4494 {
4495 	struct pci_devinfo *dinfo;
4496 	struct resource_list *rl;
4497 	int retval = 0;
4498 
4499 	dinfo = device_get_ivars(child);
4500 	rl = &dinfo->resources;
4501 
4502 	retval += bus_print_child_header(dev, child);
4503 
4504 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#jx");
4505 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#jx");
4506 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%jd");
4507 	if (device_get_flags(dev))
4508 		retval += printf(" flags %#x", device_get_flags(dev));
4509 
4510 	retval += printf(" at device %d.%d", pci_get_slot(child),
4511 	    pci_get_function(child));
4512 
4513 	retval += bus_print_child_domain(dev, child);
4514 	retval += bus_print_child_footer(dev, child);
4515 
4516 	return (retval);
4517 }
4518 
4519 static const struct
4520 {
4521 	int		class;
4522 	int		subclass;
4523 	int		report; /* 0 = bootverbose, 1 = always */
4524 	const char	*desc;
4525 } pci_nomatch_tab[] = {
4526 	{PCIC_OLD,		-1,			1, "old"},
4527 	{PCIC_OLD,		PCIS_OLD_NONVGA,	1, "non-VGA display device"},
4528 	{PCIC_OLD,		PCIS_OLD_VGA,		1, "VGA-compatible display device"},
4529 	{PCIC_STORAGE,		-1,			1, "mass storage"},
4530 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	1, "SCSI"},
4531 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	1, "ATA"},
4532 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	1, "floppy disk"},
4533 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	1, "IPI"},
4534 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	1, "RAID"},
4535 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	1, "ATA (ADMA)"},
4536 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	1, "SATA"},
4537 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	1, "SAS"},
4538 	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	1, "NVM"},
4539 	{PCIC_NETWORK,		-1,			1, "network"},
4540 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	1, "ethernet"},
4541 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	1, "token ring"},
4542 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	1, "fddi"},
4543 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	1, "ATM"},
4544 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	1, "ISDN"},
4545 	{PCIC_DISPLAY,		-1,			1, "display"},
4546 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	1, "VGA"},
4547 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	1, "XGA"},
4548 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	1, "3D"},
4549 	{PCIC_MULTIMEDIA,	-1,			1, "multimedia"},
4550 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	1, "video"},
4551 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	1, "audio"},
4552 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	1, "telephony"},
4553 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	1, "HDA"},
4554 	{PCIC_MEMORY,		-1,			1, "memory"},
4555 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	1, "RAM"},
4556 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	1, "flash"},
4557 	{PCIC_BRIDGE,		-1,			1, "bridge"},
4558 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	1, "HOST-PCI"},
4559 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	1, "PCI-ISA"},
4560 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	1, "PCI-EISA"},
4561 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	1, "PCI-MCA"},
4562 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	1, "PCI-PCI"},
4563 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	1, "PCI-PCMCIA"},
4564 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	1, "PCI-NuBus"},
4565 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	1, "PCI-CardBus"},
4566 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	1, "PCI-RACEway"},
4567 	{PCIC_SIMPLECOMM,	-1,			1, "simple comms"},
4568 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	1, "UART"},	/* could detect 16550 */
4569 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	1, "parallel port"},
4570 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	1, "multiport serial"},
4571 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	1, "generic modem"},
4572 	{PCIC_BASEPERIPH,	-1,			0, "base peripheral"},
4573 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	1, "interrupt controller"},
4574 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	1, "DMA controller"},
4575 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	1, "timer"},
4576 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	1, "realtime clock"},
4577 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	1, "PCI hot-plug controller"},
4578 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	1, "SD host controller"},
4579 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_IOMMU,	1, "IOMMU"},
4580 	{PCIC_INPUTDEV,		-1,			1, "input device"},
4581 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	1, "keyboard"},
4582 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,1, "digitizer"},
4583 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	1, "mouse"},
4584 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	1, "scanner"},
4585 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	1, "gameport"},
4586 	{PCIC_DOCKING,		-1,			1, "docking station"},
4587 	{PCIC_PROCESSOR,	-1,			1, "processor"},
4588 	{PCIC_SERIALBUS,	-1,			1, "serial bus"},
4589 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	1, "FireWire"},
4590 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	1, "AccessBus"},
4591 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	1, "SSA"},
4592 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	1, "USB"},
4593 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	1, "Fibre Channel"},
4594 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	0, "SMBus"},
4595 	{PCIC_WIRELESS,		-1,			1, "wireless controller"},
4596 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	1, "iRDA"},
4597 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	1, "IR"},
4598 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	1, "RF"},
4599 	{PCIC_INTELLIIO,	-1,			1, "intelligent I/O controller"},
4600 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	1, "I2O"},
4601 	{PCIC_SATCOM,		-1,			1, "satellite communication"},
4602 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		1, "sat TV"},
4603 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	1, "sat audio"},
4604 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	1, "sat voice"},
4605 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	1, "sat data"},
4606 	{PCIC_CRYPTO,		-1,			1, "encrypt/decrypt"},
4607 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	1, "network/computer crypto"},
4608 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	1, "entertainment crypto"},
4609 	{PCIC_DASP,		-1,			0, "dasp"},
4610 	{PCIC_DASP,		PCIS_DASP_DPIO,		1, "DPIO module"},
4611 	{0, 0, 0,		NULL}
4612 };
4613 
4614 void
4615 pci_probe_nomatch(device_t dev, device_t child)
4616 {
4617 	int i, report;
4618 	const char *cp, *scp;
4619 	char *device;
4620 
4621 	/*
4622 	 * Look for a listing for this device in a loaded device database.
4623 	 */
4624 	report = 1;
4625 	if ((device = pci_describe_device(child)) != NULL) {
4626 		device_printf(dev, "<%s>", device);
4627 		free(device, M_DEVBUF);
4628 	} else {
4629 		/*
4630 		 * Scan the class/subclass descriptions for a general
4631 		 * description.
4632 		 */
4633 		cp = "unknown";
4634 		scp = NULL;
4635 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
4636 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
4637 				if (pci_nomatch_tab[i].subclass == -1) {
4638 					cp = pci_nomatch_tab[i].desc;
4639 					report = pci_nomatch_tab[i].report;
4640 				} else if (pci_nomatch_tab[i].subclass ==
4641 				    pci_get_subclass(child)) {
4642 					scp = pci_nomatch_tab[i].desc;
4643 					report = pci_nomatch_tab[i].report;
4644 				}
4645 			}
4646 		}
4647 		if (report || bootverbose) {
4648 			device_printf(dev, "<%s%s%s>",
4649 			    cp ? cp : "",
4650 			    ((cp != NULL) && (scp != NULL)) ? ", " : "",
4651 			    scp ? scp : "");
4652 		}
4653 	}
4654 	if (report || bootverbose) {
4655 		printf(" at device %d.%d (no driver attached)\n",
4656 		    pci_get_slot(child), pci_get_function(child));
4657 	}
4658 	pci_cfg_save(child, device_get_ivars(child), 1);
4659 }
4660 
4661 void
4662 pci_child_detached(device_t dev, device_t child)
4663 {
4664 	struct pci_devinfo *dinfo;
4665 	struct resource_list *rl;
4666 
4667 	dinfo = device_get_ivars(child);
4668 	rl = &dinfo->resources;
4669 
4670 	/*
4671 	 * Have to deallocate IRQs before releasing any MSI messages and
4672 	 * have to release MSI messages before deallocating any memory
4673 	 * BARs.
4674 	 */
4675 	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
4676 		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
4677 	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
4678 		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
4679 		(void)pci_release_msi(child);
4680 	}
4681 	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
4682 		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
4683 	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
4684 		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
4685 #ifdef PCI_RES_BUS
4686 	if (resource_list_release_active(rl, dev, child, PCI_RES_BUS) != 0)
4687 		pci_printf(&dinfo->cfg, "Device leaked PCI bus numbers\n");
4688 #endif
4689 
4690 	pci_cfg_save(child, dinfo, 1);
4691 }
4692 
4693 /*
4694  * Parse the PCI device database, if loaded, and return a pointer to a
4695  * description of the device.
4696  *
4697  * The database is flat text formatted as follows:
4698  *
4699  * Any line not in a valid format is ignored.
4700  * Lines are terminated with newline '\n' characters.
4701  *
4702  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
4703  * the vendor name.
4704  *
4705  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
4706  * - devices cannot be listed without a corresponding VENDOR line.
4707  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
4708  * another TAB, then the device name.
4709  */
4710 
4711 /*
4712  * Assuming (ptr) points to the beginning of a line in the database,
4713  * return the vendor or device and description of the next entry.
4714  * The value of (vendor) or (device) inappropriate for the entry type
4715  * is set to -1.  Returns nonzero at the end of the database.
4716  *
4717  * Note that this is slightly unrobust in the face of corrupt data;
4718  * we attempt to safeguard against this by spamming the end of the
4719  * database with a newline when we initialise.
4720  */
4721 static int
4722 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
4723 {
4724 	char	*cp = *ptr;
4725 	int	left;
4726 
4727 	*device = -1;
4728 	*vendor = -1;
4729 	**desc = '\0';
4730 	for (;;) {
4731 		left = pci_vendordata_size - (cp - pci_vendordata);
4732 		if (left <= 0) {
4733 			*ptr = cp;
4734 			return(1);
4735 		}
4736 
4737 		/* vendor entry? */
4738 		if (*cp != '\t' &&
4739 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
4740 			break;
4741 		/* device entry? */
4742 		if (*cp == '\t' &&
4743 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
4744 			break;
4745 
4746 		/* skip to next line */
4747 		while (*cp != '\n' && left > 0) {
4748 			cp++;
4749 			left--;
4750 		}
4751 		if (*cp == '\n') {
4752 			cp++;
4753 			left--;
4754 		}
4755 	}
4756 	/* skip to next line */
4757 	while (*cp != '\n' && left > 0) {
4758 		cp++;
4759 		left--;
4760 	}
4761 	if (*cp == '\n' && left > 0)
4762 		cp++;
4763 	*ptr = cp;
4764 	return(0);
4765 }
4766 
4767 static char *
4768 pci_describe_device(device_t dev)
4769 {
4770 	int	vendor, device;
4771 	char	*desc, *vp, *dp, *line;
4772 
4773 	desc = vp = dp = NULL;
4774 
4775 	/*
4776 	 * If we have no vendor data, we can't do anything.
4777 	 */
4778 	if (pci_vendordata == NULL)
4779 		goto out;
4780 
4781 	/*
4782 	 * Scan the vendor data looking for this device
4783 	 */
4784 	line = pci_vendordata;
4785 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4786 		goto out;
4787 	for (;;) {
4788 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
4789 			goto out;
4790 		if (vendor == pci_get_vendor(dev))
4791 			break;
4792 	}
4793 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4794 		goto out;
4795 	for (;;) {
4796 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
4797 			*dp = 0;
4798 			break;
4799 		}
4800 		if (vendor != -1) {
4801 			*dp = 0;
4802 			break;
4803 		}
4804 		if (device == pci_get_device(dev))
4805 			break;
4806 	}
4807 	if (dp[0] == '\0')
4808 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
4809 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
4810 	    NULL)
4811 		sprintf(desc, "%s, %s", vp, dp);
4812 out:
4813 	if (vp != NULL)
4814 		free(vp, M_DEVBUF);
4815 	if (dp != NULL)
4816 		free(dp, M_DEVBUF);
4817 	return(desc);
4818 }
4819 
4820 int
4821 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
4822 {
4823 	struct pci_devinfo *dinfo;
4824 	pcicfgregs *cfg;
4825 
4826 	dinfo = device_get_ivars(child);
4827 	cfg = &dinfo->cfg;
4828 
4829 	switch (which) {
4830 	case PCI_IVAR_ETHADDR:
4831 		/*
4832 		 * The generic accessor doesn't deal with failure, so
4833 		 * we set the return value, then return an error.
4834 		 */
4835 		*((uint8_t **) result) = NULL;
4836 		return (EINVAL);
4837 	case PCI_IVAR_SUBVENDOR:
4838 		*result = cfg->subvendor;
4839 		break;
4840 	case PCI_IVAR_SUBDEVICE:
4841 		*result = cfg->subdevice;
4842 		break;
4843 	case PCI_IVAR_VENDOR:
4844 		*result = cfg->vendor;
4845 		break;
4846 	case PCI_IVAR_DEVICE:
4847 		*result = cfg->device;
4848 		break;
4849 	case PCI_IVAR_DEVID:
4850 		*result = (cfg->device << 16) | cfg->vendor;
4851 		break;
4852 	case PCI_IVAR_CLASS:
4853 		*result = cfg->baseclass;
4854 		break;
4855 	case PCI_IVAR_SUBCLASS:
4856 		*result = cfg->subclass;
4857 		break;
4858 	case PCI_IVAR_PROGIF:
4859 		*result = cfg->progif;
4860 		break;
4861 	case PCI_IVAR_REVID:
4862 		*result = cfg->revid;
4863 		break;
4864 	case PCI_IVAR_INTPIN:
4865 		*result = cfg->intpin;
4866 		break;
4867 	case PCI_IVAR_IRQ:
4868 		*result = cfg->intline;
4869 		break;
4870 	case PCI_IVAR_DOMAIN:
4871 		*result = cfg->domain;
4872 		break;
4873 	case PCI_IVAR_BUS:
4874 		*result = cfg->bus;
4875 		break;
4876 	case PCI_IVAR_SLOT:
4877 		*result = cfg->slot;
4878 		break;
4879 	case PCI_IVAR_FUNCTION:
4880 		*result = cfg->func;
4881 		break;
4882 	case PCI_IVAR_CMDREG:
4883 		*result = cfg->cmdreg;
4884 		break;
4885 	case PCI_IVAR_CACHELNSZ:
4886 		*result = cfg->cachelnsz;
4887 		break;
4888 	case PCI_IVAR_MINGNT:
4889 		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4890 			*result = -1;
4891 			return (EINVAL);
4892 		}
4893 		*result = cfg->mingnt;
4894 		break;
4895 	case PCI_IVAR_MAXLAT:
4896 		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4897 			*result = -1;
4898 			return (EINVAL);
4899 		}
4900 		*result = cfg->maxlat;
4901 		break;
4902 	case PCI_IVAR_LATTIMER:
4903 		*result = cfg->lattimer;
4904 		break;
4905 	default:
4906 		return (ENOENT);
4907 	}
4908 	return (0);
4909 }
4910 
4911 int
4912 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4913 {
4914 	struct pci_devinfo *dinfo;
4915 
4916 	dinfo = device_get_ivars(child);
4917 
4918 	switch (which) {
4919 	case PCI_IVAR_INTPIN:
4920 		dinfo->cfg.intpin = value;
4921 		return (0);
4922 	case PCI_IVAR_ETHADDR:
4923 	case PCI_IVAR_SUBVENDOR:
4924 	case PCI_IVAR_SUBDEVICE:
4925 	case PCI_IVAR_VENDOR:
4926 	case PCI_IVAR_DEVICE:
4927 	case PCI_IVAR_DEVID:
4928 	case PCI_IVAR_CLASS:
4929 	case PCI_IVAR_SUBCLASS:
4930 	case PCI_IVAR_PROGIF:
4931 	case PCI_IVAR_REVID:
4932 	case PCI_IVAR_IRQ:
4933 	case PCI_IVAR_DOMAIN:
4934 	case PCI_IVAR_BUS:
4935 	case PCI_IVAR_SLOT:
4936 	case PCI_IVAR_FUNCTION:
4937 		return (EINVAL);	/* disallow for now */
4938 
4939 	default:
4940 		return (ENOENT);
4941 	}
4942 }
4943 
4944 #include "opt_ddb.h"
4945 #ifdef DDB
4946 #include <ddb/ddb.h>
4947 #include <sys/cons.h>
4948 
4949 /*
4950  * List resources based on pci map registers, used for within ddb
4951  */
4952 
4953 DB_SHOW_COMMAND(pciregs, db_pci_dump)
4954 {
4955 	struct pci_devinfo *dinfo;
4956 	struct devlist *devlist_head;
4957 	struct pci_conf *p;
4958 	const char *name;
4959 	int i, error, none_count;
4960 
4961 	none_count = 0;
4962 	/* get the head of the device queue */
4963 	devlist_head = &pci_devq;
4964 
4965 	/*
4966 	 * Go through the list of devices and print out devices
4967 	 */
4968 	for (error = 0, i = 0,
4969 	     dinfo = STAILQ_FIRST(devlist_head);
4970 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4971 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4972 
4973 		/* Populate pd_name and pd_unit */
4974 		name = NULL;
4975 		if (dinfo->cfg.dev)
4976 			name = device_get_name(dinfo->cfg.dev);
4977 
4978 		p = &dinfo->conf;
4979 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4980 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4981 			(name && *name) ? name : "none",
4982 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4983 			none_count++,
4984 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4985 			p->pc_sel.pc_func, (p->pc_class << 16) |
4986 			(p->pc_subclass << 8) | p->pc_progif,
4987 			(p->pc_subdevice << 16) | p->pc_subvendor,
4988 			(p->pc_device << 16) | p->pc_vendor,
4989 			p->pc_revid, p->pc_hdr);
4990 	}
4991 }
4992 #endif /* DDB */
4993 
4994 static struct resource *
4995 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4996     rman_res_t start, rman_res_t end, rman_res_t count, u_int num,
4997     u_int flags)
4998 {
4999 	struct pci_devinfo *dinfo = device_get_ivars(child);
5000 	struct resource_list *rl = &dinfo->resources;
5001 	struct resource *res;
5002 	struct pci_map *pm;
5003 	pci_addr_t map, testval;
5004 	int mapsize;
5005 
5006 	res = NULL;
5007 
5008 	/* If rid is managed by EA, ignore it */
5009 	if (pci_ea_is_enabled(child, *rid))
5010 		goto out;
5011 
5012 	pm = pci_find_bar(child, *rid);
5013 	if (pm != NULL) {
5014 		/* This is a BAR that we failed to allocate earlier. */
5015 		mapsize = pm->pm_size;
5016 		map = pm->pm_value;
5017 	} else {
5018 		/*
5019 		 * Weed out the bogons, and figure out how large the
5020 		 * BAR/map is.  BARs that read back 0 here are bogus
5021 		 * and unimplemented.  Note: atapci in legacy mode are
5022 		 * special and handled elsewhere in the code.  If you
5023 		 * have a atapci device in legacy mode and it fails
5024 		 * here, that other code is broken.
5025 		 */
5026 		pci_read_bar(child, *rid, &map, &testval, NULL);
5027 
5028 		/*
5029 		 * Determine the size of the BAR and ignore BARs with a size
5030 		 * of 0.  Device ROM BARs use a different mask value.
5031 		 */
5032 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
5033 			mapsize = pci_romsize(testval);
5034 		else
5035 			mapsize = pci_mapsize(testval);
5036 		if (mapsize == 0)
5037 			goto out;
5038 		pm = pci_add_bar(child, *rid, map, mapsize);
5039 	}
5040 
5041 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
5042 		if (type != SYS_RES_MEMORY) {
5043 			if (bootverbose)
5044 				device_printf(dev,
5045 				    "child %s requested type %d for rid %#x,"
5046 				    " but the BAR says it is an memio\n",
5047 				    device_get_nameunit(child), type, *rid);
5048 			goto out;
5049 		}
5050 	} else {
5051 		if (type != SYS_RES_IOPORT) {
5052 			if (bootverbose)
5053 				device_printf(dev,
5054 				    "child %s requested type %d for rid %#x,"
5055 				    " but the BAR says it is an ioport\n",
5056 				    device_get_nameunit(child), type, *rid);
5057 			goto out;
5058 		}
5059 	}
5060 
5061 	/*
5062 	 * For real BARs, we need to override the size that
5063 	 * the driver requests, because that's what the BAR
5064 	 * actually uses and we would otherwise have a
5065 	 * situation where we might allocate the excess to
5066 	 * another driver, which won't work.
5067 	 */
5068 	count = ((pci_addr_t)1 << mapsize) * num;
5069 	if (RF_ALIGNMENT(flags) < mapsize)
5070 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
5071 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
5072 		flags |= RF_PREFETCHABLE;
5073 
5074 	/*
5075 	 * Allocate enough resource, and then write back the
5076 	 * appropriate BAR for that resource.
5077 	 */
5078 	resource_list_add(rl, type, *rid, start, end, count);
5079 	res = resource_list_reserve(rl, dev, child, type, rid, start, end,
5080 	    count, flags & ~RF_ACTIVE);
5081 	if (res == NULL) {
5082 		resource_list_delete(rl, type, *rid);
5083 		device_printf(child,
5084 		    "%#jx bytes of rid %#x res %d failed (%#jx, %#jx).\n",
5085 		    count, *rid, type, start, end);
5086 		goto out;
5087 	}
5088 	if (bootverbose)
5089 		device_printf(child,
5090 		    "Lazy allocation of %#jx bytes rid %#x type %d at %#jx\n",
5091 		    count, *rid, type, rman_get_start(res));
5092 	map = rman_get_start(res);
5093 	pci_write_bar(child, pm, map);
5094 out:
5095 	return (res);
5096 }
5097 
5098 struct resource *
5099 pci_alloc_multi_resource(device_t dev, device_t child, int type, int *rid,
5100     rman_res_t start, rman_res_t end, rman_res_t count, u_long num,
5101     u_int flags)
5102 {
5103 	struct pci_devinfo *dinfo;
5104 	struct resource_list *rl;
5105 	struct resource_list_entry *rle;
5106 	struct resource *res;
5107 	pcicfgregs *cfg;
5108 
5109 	/*
5110 	 * Perform lazy resource allocation
5111 	 */
5112 	dinfo = device_get_ivars(child);
5113 	rl = &dinfo->resources;
5114 	cfg = &dinfo->cfg;
5115 	switch (type) {
5116 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
5117 	case PCI_RES_BUS:
5118 		return (pci_alloc_secbus(dev, child, rid, start, end, count,
5119 		    flags));
5120 #endif
5121 	case SYS_RES_IRQ:
5122 		/*
5123 		 * Can't alloc legacy interrupt once MSI messages have
5124 		 * been allocated.
5125 		 */
5126 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
5127 		    cfg->msix.msix_alloc > 0))
5128 			return (NULL);
5129 
5130 		/*
5131 		 * If the child device doesn't have an interrupt
5132 		 * routed and is deserving of an interrupt, try to
5133 		 * assign it one.
5134 		 */
5135 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
5136 		    (cfg->intpin != 0))
5137 			pci_assign_interrupt(dev, child, 0);
5138 		break;
5139 	case SYS_RES_IOPORT:
5140 	case SYS_RES_MEMORY:
5141 #ifdef NEW_PCIB
5142 		/*
5143 		 * PCI-PCI bridge I/O window resources are not BARs.
5144 		 * For those allocations just pass the request up the
5145 		 * tree.
5146 		 */
5147 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
5148 			switch (*rid) {
5149 			case PCIR_IOBASEL_1:
5150 			case PCIR_MEMBASE_1:
5151 			case PCIR_PMBASEL_1:
5152 				/*
5153 				 * XXX: Should we bother creating a resource
5154 				 * list entry?
5155 				 */
5156 				return (bus_generic_alloc_resource(dev, child,
5157 				    type, rid, start, end, count, flags));
5158 			}
5159 		}
5160 #endif
5161 		/* Reserve resources for this BAR if needed. */
5162 		rle = resource_list_find(rl, type, *rid);
5163 		if (rle == NULL) {
5164 			res = pci_reserve_map(dev, child, type, rid, start, end,
5165 			    count, num, flags);
5166 			if (res == NULL)
5167 				return (NULL);
5168 		}
5169 	}
5170 	return (resource_list_alloc(rl, dev, child, type, rid,
5171 	    start, end, count, flags));
5172 }
5173 
5174 struct resource *
5175 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
5176     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
5177 {
5178 #ifdef PCI_IOV
5179 	struct pci_devinfo *dinfo;
5180 #endif
5181 
5182 	if (device_get_parent(child) != dev)
5183 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
5184 		    type, rid, start, end, count, flags));
5185 
5186 #ifdef PCI_IOV
5187 	dinfo = device_get_ivars(child);
5188 	if (dinfo->cfg.flags & PCICFG_VF) {
5189 		switch (type) {
5190 		/* VFs can't have I/O BARs. */
5191 		case SYS_RES_IOPORT:
5192 			return (NULL);
5193 		case SYS_RES_MEMORY:
5194 			return (pci_vf_alloc_mem_resource(dev, child, rid,
5195 			    start, end, count, flags));
5196 		}
5197 
5198 		/* Fall through for other types of resource allocations. */
5199 	}
5200 #endif
5201 
5202 	return (pci_alloc_multi_resource(dev, child, type, rid, start, end,
5203 	    count, 1, flags));
5204 }
5205 
5206 int
5207 pci_release_resource(device_t dev, device_t child, int type, int rid,
5208     struct resource *r)
5209 {
5210 	struct pci_devinfo *dinfo;
5211 	struct resource_list *rl;
5212 	pcicfgregs *cfg;
5213 
5214 	if (device_get_parent(child) != dev)
5215 		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
5216 		    type, rid, r));
5217 
5218 	dinfo = device_get_ivars(child);
5219 	cfg = &dinfo->cfg;
5220 
5221 #ifdef PCI_IOV
5222 	if (dinfo->cfg.flags & PCICFG_VF) {
5223 		switch (type) {
5224 		/* VFs can't have I/O BARs. */
5225 		case SYS_RES_IOPORT:
5226 			return (EDOOFUS);
5227 		case SYS_RES_MEMORY:
5228 			return (pci_vf_release_mem_resource(dev, child, rid,
5229 			    r));
5230 		}
5231 
5232 		/* Fall through for other types of resource allocations. */
5233 	}
5234 #endif
5235 
5236 #ifdef NEW_PCIB
5237 	/*
5238 	 * PCI-PCI bridge I/O window resources are not BARs.  For
5239 	 * those allocations just pass the request up the tree.
5240 	 */
5241 	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
5242 	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
5243 		switch (rid) {
5244 		case PCIR_IOBASEL_1:
5245 		case PCIR_MEMBASE_1:
5246 		case PCIR_PMBASEL_1:
5247 			return (bus_generic_release_resource(dev, child, type,
5248 			    rid, r));
5249 		}
5250 	}
5251 #endif
5252 
5253 	rl = &dinfo->resources;
5254 	return (resource_list_release(rl, dev, child, type, rid, r));
5255 }
5256 
5257 int
5258 pci_activate_resource(device_t dev, device_t child, int type, int rid,
5259     struct resource *r)
5260 {
5261 	struct pci_devinfo *dinfo;
5262 	int error;
5263 
5264 	error = bus_generic_activate_resource(dev, child, type, rid, r);
5265 	if (error)
5266 		return (error);
5267 
5268 	/* Enable decoding in the command register when activating BARs. */
5269 	if (device_get_parent(child) == dev) {
5270 		/* Device ROMs need their decoding explicitly enabled. */
5271 		dinfo = device_get_ivars(child);
5272 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
5273 			pci_write_bar(child, pci_find_bar(child, rid),
5274 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
5275 		switch (type) {
5276 		case SYS_RES_IOPORT:
5277 		case SYS_RES_MEMORY:
5278 			error = PCI_ENABLE_IO(dev, child, type);
5279 			break;
5280 		}
5281 	}
5282 	return (error);
5283 }
5284 
5285 int
5286 pci_deactivate_resource(device_t dev, device_t child, int type,
5287     int rid, struct resource *r)
5288 {
5289 	struct pci_devinfo *dinfo;
5290 	int error;
5291 
5292 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
5293 	if (error)
5294 		return (error);
5295 
5296 	/* Disable decoding for device ROMs. */
5297 	if (device_get_parent(child) == dev) {
5298 		dinfo = device_get_ivars(child);
5299 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
5300 			pci_write_bar(child, pci_find_bar(child, rid),
5301 			    rman_get_start(r));
5302 	}
5303 	return (0);
5304 }
5305 
5306 void
5307 pci_child_deleted(device_t dev, device_t child)
5308 {
5309 	struct resource_list_entry *rle;
5310 	struct resource_list *rl;
5311 	struct pci_devinfo *dinfo;
5312 
5313 	dinfo = device_get_ivars(child);
5314 	rl = &dinfo->resources;
5315 
5316 	EVENTHANDLER_INVOKE(pci_delete_device, child);
5317 
5318 	/* Turn off access to resources we're about to free */
5319 	if (bus_child_present(child) != 0) {
5320 		pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
5321 		    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
5322 
5323 		pci_disable_busmaster(child);
5324 	}
5325 
5326 	/* Free all allocated resources */
5327 	STAILQ_FOREACH(rle, rl, link) {
5328 		if (rle->res) {
5329 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
5330 			    resource_list_busy(rl, rle->type, rle->rid)) {
5331 				pci_printf(&dinfo->cfg,
5332 				    "Resource still owned, oops. "
5333 				    "(type=%d, rid=%d, addr=%lx)\n",
5334 				    rle->type, rle->rid,
5335 				    rman_get_start(rle->res));
5336 				bus_release_resource(child, rle->type, rle->rid,
5337 				    rle->res);
5338 			}
5339 			resource_list_unreserve(rl, dev, child, rle->type,
5340 			    rle->rid);
5341 		}
5342 	}
5343 	resource_list_free(rl);
5344 
5345 	pci_freecfg(dinfo);
5346 }
5347 
5348 void
5349 pci_delete_resource(device_t dev, device_t child, int type, int rid)
5350 {
5351 	struct pci_devinfo *dinfo;
5352 	struct resource_list *rl;
5353 	struct resource_list_entry *rle;
5354 
5355 	if (device_get_parent(child) != dev)
5356 		return;
5357 
5358 	dinfo = device_get_ivars(child);
5359 	rl = &dinfo->resources;
5360 	rle = resource_list_find(rl, type, rid);
5361 	if (rle == NULL)
5362 		return;
5363 
5364 	if (rle->res) {
5365 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
5366 		    resource_list_busy(rl, type, rid)) {
5367 			device_printf(dev, "delete_resource: "
5368 			    "Resource still owned by child, oops. "
5369 			    "(type=%d, rid=%d, addr=%jx)\n",
5370 			    type, rid, rman_get_start(rle->res));
5371 			return;
5372 		}
5373 		resource_list_unreserve(rl, dev, child, type, rid);
5374 	}
5375 	resource_list_delete(rl, type, rid);
5376 }
5377 
5378 struct resource_list *
5379 pci_get_resource_list (device_t dev, device_t child)
5380 {
5381 	struct pci_devinfo *dinfo = device_get_ivars(child);
5382 
5383 	return (&dinfo->resources);
5384 }
5385 
5386 bus_dma_tag_t
5387 pci_get_dma_tag(device_t bus, device_t dev)
5388 {
5389 	struct pci_softc *sc = device_get_softc(bus);
5390 
5391 	return (sc->sc_dma_tag);
5392 }
5393 
5394 uint32_t
5395 pci_read_config_method(device_t dev, device_t child, int reg, int width)
5396 {
5397 	struct pci_devinfo *dinfo = device_get_ivars(child);
5398 	pcicfgregs *cfg = &dinfo->cfg;
5399 
5400 #ifdef PCI_IOV
5401 	/*
5402 	 * SR-IOV VFs don't implement the VID or DID registers, so we have to
5403 	 * emulate them here.
5404 	 */
5405 	if (cfg->flags & PCICFG_VF) {
5406 		if (reg == PCIR_VENDOR) {
5407 			switch (width) {
5408 			case 4:
5409 				return (cfg->device << 16 | cfg->vendor);
5410 			case 2:
5411 				return (cfg->vendor);
5412 			case 1:
5413 				return (cfg->vendor & 0xff);
5414 			default:
5415 				return (0xffffffff);
5416 			}
5417 		} else if (reg == PCIR_DEVICE) {
5418 			switch (width) {
5419 			/* Note that an unaligned 4-byte read is an error. */
5420 			case 2:
5421 				return (cfg->device);
5422 			case 1:
5423 				return (cfg->device & 0xff);
5424 			default:
5425 				return (0xffffffff);
5426 			}
5427 		}
5428 	}
5429 #endif
5430 
5431 	return (PCIB_READ_CONFIG(device_get_parent(dev),
5432 	    cfg->bus, cfg->slot, cfg->func, reg, width));
5433 }
5434 
5435 void
5436 pci_write_config_method(device_t dev, device_t child, int reg,
5437     uint32_t val, int width)
5438 {
5439 	struct pci_devinfo *dinfo = device_get_ivars(child);
5440 	pcicfgregs *cfg = &dinfo->cfg;
5441 
5442 	PCIB_WRITE_CONFIG(device_get_parent(dev),
5443 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
5444 }
5445 
5446 int
5447 pci_child_location_str_method(device_t dev, device_t child, char *buf,
5448     size_t buflen)
5449 {
5450 
5451 	snprintf(buf, buflen, "slot=%d function=%d dbsf=pci%d:%d:%d:%d",
5452 	    pci_get_slot(child), pci_get_function(child), pci_get_domain(child),
5453 	    pci_get_bus(child), pci_get_slot(child), pci_get_function(child));
5454 	return (0);
5455 }
5456 
5457 int
5458 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
5459     size_t buflen)
5460 {
5461 	struct pci_devinfo *dinfo;
5462 	pcicfgregs *cfg;
5463 
5464 	dinfo = device_get_ivars(child);
5465 	cfg = &dinfo->cfg;
5466 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
5467 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
5468 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
5469 	    cfg->progif);
5470 	return (0);
5471 }
5472 
5473 int
5474 pci_assign_interrupt_method(device_t dev, device_t child)
5475 {
5476 	struct pci_devinfo *dinfo = device_get_ivars(child);
5477 	pcicfgregs *cfg = &dinfo->cfg;
5478 
5479 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
5480 	    cfg->intpin));
5481 }
5482 
5483 static void
5484 pci_lookup(void *arg, const char *name, device_t *dev)
5485 {
5486 	long val;
5487 	char *end;
5488 	int domain, bus, slot, func;
5489 
5490 	if (*dev != NULL)
5491 		return;
5492 
5493 	/*
5494 	 * Accept pciconf-style selectors of either pciD:B:S:F or
5495 	 * pciB:S:F.  In the latter case, the domain is assumed to
5496 	 * be zero.
5497 	 */
5498 	if (strncmp(name, "pci", 3) != 0)
5499 		return;
5500 	val = strtol(name + 3, &end, 10);
5501 	if (val < 0 || val > INT_MAX || *end != ':')
5502 		return;
5503 	domain = val;
5504 	val = strtol(end + 1, &end, 10);
5505 	if (val < 0 || val > INT_MAX || *end != ':')
5506 		return;
5507 	bus = val;
5508 	val = strtol(end + 1, &end, 10);
5509 	if (val < 0 || val > INT_MAX)
5510 		return;
5511 	slot = val;
5512 	if (*end == ':') {
5513 		val = strtol(end + 1, &end, 10);
5514 		if (val < 0 || val > INT_MAX || *end != '\0')
5515 			return;
5516 		func = val;
5517 	} else if (*end == '\0') {
5518 		func = slot;
5519 		slot = bus;
5520 		bus = domain;
5521 		domain = 0;
5522 	} else
5523 		return;
5524 
5525 	if (domain > PCI_DOMAINMAX || bus > PCI_BUSMAX || slot > PCI_SLOTMAX ||
5526 	    func > PCIE_ARI_FUNCMAX || (slot != 0 && func > PCI_FUNCMAX))
5527 		return;
5528 
5529 	*dev = pci_find_dbsf(domain, bus, slot, func);
5530 }
5531 
5532 static int
5533 pci_modevent(module_t mod, int what, void *arg)
5534 {
5535 	static struct cdev *pci_cdev;
5536 	static eventhandler_tag tag;
5537 
5538 	switch (what) {
5539 	case MOD_LOAD:
5540 		STAILQ_INIT(&pci_devq);
5541 		pci_generation = 0;
5542 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
5543 		    "pci");
5544 		pci_load_vendor_data();
5545 		tag = EVENTHANDLER_REGISTER(dev_lookup, pci_lookup, NULL,
5546 		    1000);
5547 		break;
5548 
5549 	case MOD_UNLOAD:
5550 		if (tag != NULL)
5551 			EVENTHANDLER_DEREGISTER(dev_lookup, tag);
5552 		destroy_dev(pci_cdev);
5553 		break;
5554 	}
5555 
5556 	return (0);
5557 }
5558 
5559 static void
5560 pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
5561 {
5562 #define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
5563 	struct pcicfg_pcie *cfg;
5564 	int version, pos;
5565 
5566 	cfg = &dinfo->cfg.pcie;
5567 	pos = cfg->pcie_location;
5568 
5569 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5570 
5571 	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
5572 
5573 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5574 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5575 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5576 		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
5577 
5578 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5579 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5580 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5581 		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
5582 
5583 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5584 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5585 		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
5586 
5587 	if (version > 1) {
5588 		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
5589 		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
5590 		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
5591 	}
5592 #undef WREG
5593 }
5594 
5595 static void
5596 pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
5597 {
5598 	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
5599 	    dinfo->cfg.pcix.pcix_command,  2);
5600 }
5601 
5602 void
5603 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
5604 {
5605 
5606 	/*
5607 	 * Restore the device to full power mode.  We must do this
5608 	 * before we restore the registers because moving from D3 to
5609 	 * D0 will cause the chip's BARs and some other registers to
5610 	 * be reset to some unknown power on reset values.  Cut down
5611 	 * the noise on boot by doing nothing if we are already in
5612 	 * state D0.
5613 	 */
5614 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
5615 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5616 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
5617 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
5618 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
5619 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
5620 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
5621 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
5622 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
5623 	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5624 	case PCIM_HDRTYPE_NORMAL:
5625 		pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
5626 		pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
5627 		break;
5628 	case PCIM_HDRTYPE_BRIDGE:
5629 		pci_write_config(dev, PCIR_SECLAT_1,
5630 		    dinfo->cfg.bridge.br_seclat, 1);
5631 		pci_write_config(dev, PCIR_SUBBUS_1,
5632 		    dinfo->cfg.bridge.br_subbus, 1);
5633 		pci_write_config(dev, PCIR_SECBUS_1,
5634 		    dinfo->cfg.bridge.br_secbus, 1);
5635 		pci_write_config(dev, PCIR_PRIBUS_1,
5636 		    dinfo->cfg.bridge.br_pribus, 1);
5637 		pci_write_config(dev, PCIR_BRIDGECTL_1,
5638 		    dinfo->cfg.bridge.br_control, 2);
5639 		break;
5640 	case PCIM_HDRTYPE_CARDBUS:
5641 		pci_write_config(dev, PCIR_SECLAT_2,
5642 		    dinfo->cfg.bridge.br_seclat, 1);
5643 		pci_write_config(dev, PCIR_SUBBUS_2,
5644 		    dinfo->cfg.bridge.br_subbus, 1);
5645 		pci_write_config(dev, PCIR_SECBUS_2,
5646 		    dinfo->cfg.bridge.br_secbus, 1);
5647 		pci_write_config(dev, PCIR_PRIBUS_2,
5648 		    dinfo->cfg.bridge.br_pribus, 1);
5649 		pci_write_config(dev, PCIR_BRIDGECTL_2,
5650 		    dinfo->cfg.bridge.br_control, 2);
5651 		break;
5652 	}
5653 	pci_restore_bars(dev);
5654 
5655 	/*
5656 	 * Restore extended capabilities for PCI-Express and PCI-X
5657 	 */
5658 	if (dinfo->cfg.pcie.pcie_location != 0)
5659 		pci_cfg_restore_pcie(dev, dinfo);
5660 	if (dinfo->cfg.pcix.pcix_location != 0)
5661 		pci_cfg_restore_pcix(dev, dinfo);
5662 
5663 	/* Restore MSI and MSI-X configurations if they are present. */
5664 	if (dinfo->cfg.msi.msi_location != 0)
5665 		pci_resume_msi(dev);
5666 	if (dinfo->cfg.msix.msix_location != 0)
5667 		pci_resume_msix(dev);
5668 
5669 #ifdef PCI_IOV
5670 	if (dinfo->cfg.iov != NULL)
5671 		pci_iov_cfg_restore(dev, dinfo);
5672 #endif
5673 }
5674 
5675 static void
5676 pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
5677 {
5678 #define	RREG(n)	pci_read_config(dev, pos + (n), 2)
5679 	struct pcicfg_pcie *cfg;
5680 	int version, pos;
5681 
5682 	cfg = &dinfo->cfg.pcie;
5683 	pos = cfg->pcie_location;
5684 
5685 	cfg->pcie_flags = RREG(PCIER_FLAGS);
5686 
5687 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5688 
5689 	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
5690 
5691 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5692 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5693 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5694 		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
5695 
5696 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5697 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5698 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5699 		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
5700 
5701 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5702 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5703 		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
5704 
5705 	if (version > 1) {
5706 		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
5707 		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
5708 		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
5709 	}
5710 #undef RREG
5711 }
5712 
5713 static void
5714 pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
5715 {
5716 	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
5717 	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
5718 }
5719 
5720 void
5721 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
5722 {
5723 	uint32_t cls;
5724 	int ps;
5725 
5726 	/*
5727 	 * Some drivers apparently write to these registers w/o updating our
5728 	 * cached copy.  No harm happens if we update the copy, so do so here
5729 	 * so we can restore them.  The COMMAND register is modified by the
5730 	 * bus w/o updating the cache.  This should represent the normally
5731 	 * writable portion of the 'defined' part of type 0/1/2 headers.
5732 	 */
5733 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
5734 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
5735 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
5736 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
5737 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
5738 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
5739 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
5740 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
5741 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
5742 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
5743 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
5744 	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5745 	case PCIM_HDRTYPE_NORMAL:
5746 		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
5747 		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
5748 		dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
5749 		dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
5750 		break;
5751 	case PCIM_HDRTYPE_BRIDGE:
5752 		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
5753 		    PCIR_SECLAT_1, 1);
5754 		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
5755 		    PCIR_SUBBUS_1, 1);
5756 		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
5757 		    PCIR_SECBUS_1, 1);
5758 		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
5759 		    PCIR_PRIBUS_1, 1);
5760 		dinfo->cfg.bridge.br_control = pci_read_config(dev,
5761 		    PCIR_BRIDGECTL_1, 2);
5762 		break;
5763 	case PCIM_HDRTYPE_CARDBUS:
5764 		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
5765 		    PCIR_SECLAT_2, 1);
5766 		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
5767 		    PCIR_SUBBUS_2, 1);
5768 		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
5769 		    PCIR_SECBUS_2, 1);
5770 		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
5771 		    PCIR_PRIBUS_2, 1);
5772 		dinfo->cfg.bridge.br_control = pci_read_config(dev,
5773 		    PCIR_BRIDGECTL_2, 2);
5774 		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_2, 2);
5775 		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_2, 2);
5776 		break;
5777 	}
5778 
5779 	if (dinfo->cfg.pcie.pcie_location != 0)
5780 		pci_cfg_save_pcie(dev, dinfo);
5781 
5782 	if (dinfo->cfg.pcix.pcix_location != 0)
5783 		pci_cfg_save_pcix(dev, dinfo);
5784 
5785 #ifdef PCI_IOV
5786 	if (dinfo->cfg.iov != NULL)
5787 		pci_iov_cfg_save(dev, dinfo);
5788 #endif
5789 
5790 	/*
5791 	 * don't set the state for display devices, base peripherals and
5792 	 * memory devices since bad things happen when they are powered down.
5793 	 * We should (a) have drivers that can easily detach and (b) use
5794 	 * generic drivers for these devices so that some device actually
5795 	 * attaches.  We need to make sure that when we implement (a) we don't
5796 	 * power the device down on a reattach.
5797 	 */
5798 	cls = pci_get_class(dev);
5799 	if (!setstate)
5800 		return;
5801 	switch (pci_do_power_nodriver)
5802 	{
5803 		case 0:		/* NO powerdown at all */
5804 			return;
5805 		case 1:		/* Conservative about what to power down */
5806 			if (cls == PCIC_STORAGE)
5807 				return;
5808 			/*FALLTHROUGH*/
5809 		case 2:		/* Aggressive about what to power down */
5810 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
5811 			    cls == PCIC_BASEPERIPH)
5812 				return;
5813 			/*FALLTHROUGH*/
5814 		case 3:		/* Power down everything */
5815 			break;
5816 	}
5817 	/*
5818 	 * PCI spec says we can only go into D3 state from D0 state.
5819 	 * Transition from D[12] into D0 before going to D3 state.
5820 	 */
5821 	ps = pci_get_powerstate(dev);
5822 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
5823 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5824 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
5825 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
5826 }
5827 
5828 /* Wrapper APIs suitable for device driver use. */
5829 void
5830 pci_save_state(device_t dev)
5831 {
5832 	struct pci_devinfo *dinfo;
5833 
5834 	dinfo = device_get_ivars(dev);
5835 	pci_cfg_save(dev, dinfo, 0);
5836 }
5837 
5838 void
5839 pci_restore_state(device_t dev)
5840 {
5841 	struct pci_devinfo *dinfo;
5842 
5843 	dinfo = device_get_ivars(dev);
5844 	pci_cfg_restore(dev, dinfo);
5845 }
5846 
5847 static int
5848 pci_get_id_method(device_t dev, device_t child, enum pci_id_type type,
5849     uintptr_t *id)
5850 {
5851 
5852 	return (PCIB_GET_ID(device_get_parent(dev), child, type, id));
5853 }
5854 
5855 /* Find the upstream port of a given PCI device in a root complex. */
5856 device_t
5857 pci_find_pcie_root_port(device_t dev)
5858 {
5859 	struct pci_devinfo *dinfo;
5860 	devclass_t pci_class;
5861 	device_t pcib, bus;
5862 
5863 	pci_class = devclass_find("pci");
5864 	KASSERT(device_get_devclass(device_get_parent(dev)) == pci_class,
5865 	    ("%s: non-pci device %s", __func__, device_get_nameunit(dev)));
5866 
5867 	/*
5868 	 * Walk the bridge hierarchy until we find a PCI-e root
5869 	 * port or a non-PCI device.
5870 	 */
5871 	for (;;) {
5872 		bus = device_get_parent(dev);
5873 		KASSERT(bus != NULL, ("%s: null parent of %s", __func__,
5874 		    device_get_nameunit(dev)));
5875 
5876 		pcib = device_get_parent(bus);
5877 		KASSERT(pcib != NULL, ("%s: null bridge of %s", __func__,
5878 		    device_get_nameunit(bus)));
5879 
5880 		/*
5881 		 * pcib's parent must be a PCI bus for this to be a
5882 		 * PCI-PCI bridge.
5883 		 */
5884 		if (device_get_devclass(device_get_parent(pcib)) != pci_class)
5885 			return (NULL);
5886 
5887 		dinfo = device_get_ivars(pcib);
5888 		if (dinfo->cfg.pcie.pcie_location != 0 &&
5889 		    dinfo->cfg.pcie.pcie_type == PCIEM_TYPE_ROOT_PORT)
5890 			return (pcib);
5891 
5892 		dev = pcib;
5893 	}
5894 }
5895 
5896 /*
5897  * Wait for pending transactions to complete on a PCI-express function.
5898  *
5899  * The maximum delay is specified in milliseconds in max_delay.  Note
5900  * that this function may sleep.
5901  *
5902  * Returns true if the function is idle and false if the timeout is
5903  * exceeded.  If dev is not a PCI-express function, this returns true.
5904  */
5905 bool
5906 pcie_wait_for_pending_transactions(device_t dev, u_int max_delay)
5907 {
5908 	struct pci_devinfo *dinfo = device_get_ivars(dev);
5909 	uint16_t sta;
5910 	int cap;
5911 
5912 	cap = dinfo->cfg.pcie.pcie_location;
5913 	if (cap == 0)
5914 		return (true);
5915 
5916 	sta = pci_read_config(dev, cap + PCIER_DEVICE_STA, 2);
5917 	while (sta & PCIEM_STA_TRANSACTION_PND) {
5918 		if (max_delay == 0)
5919 			return (false);
5920 
5921 		/* Poll once every 100 milliseconds up to the timeout. */
5922 		if (max_delay > 100) {
5923 			pause_sbt("pcietp", 100 * SBT_1MS, 0, C_HARDCLOCK);
5924 			max_delay -= 100;
5925 		} else {
5926 			pause_sbt("pcietp", max_delay * SBT_1MS, 0,
5927 			    C_HARDCLOCK);
5928 			max_delay = 0;
5929 		}
5930 		sta = pci_read_config(dev, cap + PCIER_DEVICE_STA, 2);
5931 	}
5932 
5933 	return (true);
5934 }
5935 
5936 /*
5937  * Determine the maximum Completion Timeout in microseconds.
5938  *
5939  * For non-PCI-express functions this returns 0.
5940  */
5941 int
5942 pcie_get_max_completion_timeout(device_t dev)
5943 {
5944 	struct pci_devinfo *dinfo = device_get_ivars(dev);
5945 	int cap;
5946 
5947 	cap = dinfo->cfg.pcie.pcie_location;
5948 	if (cap == 0)
5949 		return (0);
5950 
5951 	/*
5952 	 * Functions using the 1.x spec use the default timeout range of
5953 	 * 50 microseconds to 50 milliseconds.  Functions that do not
5954 	 * support programmable timeouts also use this range.
5955 	 */
5956 	if ((dinfo->cfg.pcie.pcie_flags & PCIEM_FLAGS_VERSION) < 2 ||
5957 	    (pci_read_config(dev, cap + PCIER_DEVICE_CAP2, 4) &
5958 	    PCIEM_CAP2_COMP_TIMO_RANGES) == 0)
5959 		return (50 * 1000);
5960 
5961 	switch (pci_read_config(dev, cap + PCIER_DEVICE_CTL2, 2) &
5962 	    PCIEM_CTL2_COMP_TIMO_VAL) {
5963 	case PCIEM_CTL2_COMP_TIMO_100US:
5964 		return (100);
5965 	case PCIEM_CTL2_COMP_TIMO_10MS:
5966 		return (10 * 1000);
5967 	case PCIEM_CTL2_COMP_TIMO_55MS:
5968 		return (55 * 1000);
5969 	case PCIEM_CTL2_COMP_TIMO_210MS:
5970 		return (210 * 1000);
5971 	case PCIEM_CTL2_COMP_TIMO_900MS:
5972 		return (900 * 1000);
5973 	case PCIEM_CTL2_COMP_TIMO_3500MS:
5974 		return (3500 * 1000);
5975 	case PCIEM_CTL2_COMP_TIMO_13S:
5976 		return (13 * 1000 * 1000);
5977 	case PCIEM_CTL2_COMP_TIMO_64S:
5978 		return (64 * 1000 * 1000);
5979 	default:
5980 		return (50 * 1000);
5981 	}
5982 }
5983 
5984 /*
5985  * Perform a Function Level Reset (FLR) on a device.
5986  *
5987  * This function first waits for any pending transactions to complete
5988  * within the timeout specified by max_delay.  If transactions are
5989  * still pending, the function will return false without attempting a
5990  * reset.
5991  *
5992  * If dev is not a PCI-express function or does not support FLR, this
5993  * function returns false.
5994  *
5995  * Note that no registers are saved or restored.  The caller is
5996  * responsible for saving and restoring any registers including
5997  * PCI-standard registers via pci_save_state() and
5998  * pci_restore_state().
5999  */
6000 bool
6001 pcie_flr(device_t dev, u_int max_delay, bool force)
6002 {
6003 	struct pci_devinfo *dinfo = device_get_ivars(dev);
6004 	uint16_t cmd, ctl;
6005 	int compl_delay;
6006 	int cap;
6007 
6008 	cap = dinfo->cfg.pcie.pcie_location;
6009 	if (cap == 0)
6010 		return (false);
6011 
6012 	if (!(pci_read_config(dev, cap + PCIER_DEVICE_CAP, 4) & PCIEM_CAP_FLR))
6013 		return (false);
6014 
6015 	/*
6016 	 * Disable busmastering to prevent generation of new
6017 	 * transactions while waiting for the device to go idle.  If
6018 	 * the idle timeout fails, the command register is restored
6019 	 * which will re-enable busmastering.
6020 	 */
6021 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
6022 	pci_write_config(dev, PCIR_COMMAND, cmd & ~(PCIM_CMD_BUSMASTEREN), 2);
6023 	if (!pcie_wait_for_pending_transactions(dev, max_delay)) {
6024 		if (!force) {
6025 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
6026 			return (false);
6027 		}
6028 		pci_printf(&dinfo->cfg,
6029 		    "Resetting with transactions pending after %d ms\n",
6030 		    max_delay);
6031 
6032 		/*
6033 		 * Extend the post-FLR delay to cover the maximum
6034 		 * Completion Timeout delay of anything in flight
6035 		 * during the FLR delay.  Enforce a minimum delay of
6036 		 * at least 10ms.
6037 		 */
6038 		compl_delay = pcie_get_max_completion_timeout(dev) / 1000;
6039 		if (compl_delay < 10)
6040 			compl_delay = 10;
6041 	} else
6042 		compl_delay = 0;
6043 
6044 	/* Initiate the reset. */
6045 	ctl = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
6046 	pci_write_config(dev, cap + PCIER_DEVICE_CTL, ctl |
6047 	    PCIEM_CTL_INITIATE_FLR, 2);
6048 
6049 	/* Wait for 100ms. */
6050 	pause_sbt("pcieflr", (100 + compl_delay) * SBT_1MS, 0, C_HARDCLOCK);
6051 
6052 	if (pci_read_config(dev, cap + PCIER_DEVICE_STA, 2) &
6053 	    PCIEM_STA_TRANSACTION_PND)
6054 		pci_printf(&dinfo->cfg, "Transactions pending after FLR!\n");
6055 	return (true);
6056 }
6057