xref: /freebsd/sys/dev/pci/pci.c (revision c6a33c8e88c5684876e670c8189d03ad25108d8a)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/limits.h>
39 #include <sys/linker.h>
40 #include <sys/fcntl.h>
41 #include <sys/conf.h>
42 #include <sys/kernel.h>
43 #include <sys/queue.h>
44 #include <sys/sysctl.h>
45 #include <sys/endian.h>
46 
47 #include <vm/vm.h>
48 #include <vm/pmap.h>
49 #include <vm/vm_extern.h>
50 
51 #include <sys/bus.h>
52 #include <machine/bus.h>
53 #include <sys/rman.h>
54 #include <machine/resource.h>
55 #include <machine/stdarg.h>
56 
57 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
58 #include <machine/intr_machdep.h>
59 #endif
60 
61 #include <sys/pciio.h>
62 #include <dev/pci/pcireg.h>
63 #include <dev/pci/pcivar.h>
64 #include <dev/pci/pci_private.h>
65 
66 #include <dev/usb/controller/xhcireg.h>
67 #include <dev/usb/controller/ehcireg.h>
68 #include <dev/usb/controller/ohcireg.h>
69 #include <dev/usb/controller/uhcireg.h>
70 
71 #include "pcib_if.h"
72 #include "pci_if.h"
73 
74 #define	PCIR_IS_BIOS(cfg, reg)						\
75 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
76 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
77 
78 static int		pci_has_quirk(uint32_t devid, int quirk);
79 static pci_addr_t	pci_mapbase(uint64_t mapreg);
80 static const char	*pci_maptype(uint64_t mapreg);
81 static int		pci_maprange(uint64_t mapreg);
82 static pci_addr_t	pci_rombase(uint64_t mapreg);
83 static int		pci_romsize(uint64_t testval);
84 static void		pci_fixancient(pcicfgregs *cfg);
85 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
86 
87 static int		pci_porten(device_t dev);
88 static int		pci_memen(device_t dev);
89 static void		pci_assign_interrupt(device_t bus, device_t dev,
90 			    int force_route);
91 static int		pci_add_map(device_t bus, device_t dev, int reg,
92 			    struct resource_list *rl, int force, int prefetch);
93 static int		pci_probe(device_t dev);
94 static int		pci_attach(device_t dev);
95 #ifdef PCI_RES_BUS
96 static int		pci_detach(device_t dev);
97 #endif
98 static void		pci_load_vendor_data(void);
99 static int		pci_describe_parse_line(char **ptr, int *vendor,
100 			    int *device, char **desc);
101 static char		*pci_describe_device(device_t dev);
102 static int		pci_modevent(module_t mod, int what, void *arg);
103 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
104 			    pcicfgregs *cfg);
105 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
106 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
107 			    int reg, uint32_t *data);
108 #if 0
109 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
110 			    int reg, uint32_t data);
111 #endif
112 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
113 static void		pci_mask_msix(device_t dev, u_int index);
114 static void		pci_unmask_msix(device_t dev, u_int index);
115 static int		pci_msi_blacklisted(void);
116 static int		pci_msix_blacklisted(void);
117 static void		pci_resume_msi(device_t dev);
118 static void		pci_resume_msix(device_t dev);
119 static int		pci_remap_intr_method(device_t bus, device_t dev,
120 			    u_int irq);
121 
122 static uint16_t		pci_get_rid_method(device_t dev, device_t child);
123 
124 static struct pci_devinfo * pci_fill_devinfo(device_t pcib, int d, int b, int s,
125     int f, uint16_t vid, uint16_t did, size_t size);
126 
127 static device_method_t pci_methods[] = {
128 	/* Device interface */
129 	DEVMETHOD(device_probe,		pci_probe),
130 	DEVMETHOD(device_attach,	pci_attach),
131 #ifdef PCI_RES_BUS
132 	DEVMETHOD(device_detach,	pci_detach),
133 #else
134 	DEVMETHOD(device_detach,	bus_generic_detach),
135 #endif
136 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
137 	DEVMETHOD(device_suspend,	bus_generic_suspend),
138 	DEVMETHOD(device_resume,	pci_resume),
139 
140 	/* Bus interface */
141 	DEVMETHOD(bus_print_child,	pci_print_child),
142 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
143 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
144 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
145 	DEVMETHOD(bus_driver_added,	pci_driver_added),
146 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
147 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
148 
149 	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
150 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
151 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
152 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
153 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
154 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
155 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
156 	DEVMETHOD(bus_release_resource,	pci_release_resource),
157 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
158 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
159 	DEVMETHOD(bus_child_detached,	pci_child_detached),
160 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
161 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
162 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
163 	DEVMETHOD(bus_suspend_child,	pci_suspend_child),
164 	DEVMETHOD(bus_resume_child,	pci_resume_child),
165 
166 	/* PCI interface */
167 	DEVMETHOD(pci_read_config,	pci_read_config_method),
168 	DEVMETHOD(pci_write_config,	pci_write_config_method),
169 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
170 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
171 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
172 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
173 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
174 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
175 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
176 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
177 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
178 	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
179 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
180 	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
181 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
182 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
183 	DEVMETHOD(pci_enable_msi,	pci_enable_msi_method),
184 	DEVMETHOD(pci_enable_msix,	pci_enable_msix_method),
185 	DEVMETHOD(pci_disable_msi,	pci_disable_msi_method),
186 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
187 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
188 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
189 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
190 	DEVMETHOD(pci_get_rid,		pci_get_rid_method),
191 	DEVMETHOD(pci_child_added,	pci_child_added_method),
192 #ifdef PCI_IOV
193 	DEVMETHOD(pci_iov_attach,	pci_iov_attach_method),
194 	DEVMETHOD(pci_iov_detach,	pci_iov_detach_method),
195 	DEVMETHOD(pci_create_iov_child,	pci_create_iov_child_method),
196 #endif
197 
198 	DEVMETHOD_END
199 };
200 
201 DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
202 
203 static devclass_t pci_devclass;
204 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
205 MODULE_VERSION(pci, 1);
206 
207 static char	*pci_vendordata;
208 static size_t	pci_vendordata_size;
209 
210 struct pci_quirk {
211 	uint32_t devid;	/* Vendor/device of the card */
212 	int	type;
213 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
214 #define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
215 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
216 #define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
217 #define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
218 #define	PCI_QUIRK_MSI_INTX_BUG	6 /* PCIM_CMD_INTxDIS disables MSI */
219 	int	arg1;
220 	int	arg2;
221 };
222 
223 static const struct pci_quirk pci_quirks[] = {
224 	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
225 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
226 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
227 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
228 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
229 
230 	/*
231 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
232 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
233 	 */
234 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
235 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
236 
237 	/*
238 	 * MSI doesn't work on earlier Intel chipsets including
239 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
240 	 */
241 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
242 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
243 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
244 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
245 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
246 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
247 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
248 
249 	/*
250 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
251 	 * bridge.
252 	 */
253 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
254 
255 	/*
256 	 * MSI-X allocation doesn't work properly for devices passed through
257 	 * by VMware up to at least ESXi 5.1.
258 	 */
259 	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
260 	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
261 
262 	/*
263 	 * Some virtualization environments emulate an older chipset
264 	 * but support MSI just fine.  QEMU uses the Intel 82440.
265 	 */
266 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
267 
268 	/*
269 	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
270 	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
271 	 * It prevents us from attaching hpet(4) when the bit is unset.
272 	 * Note this quirk only affects SB600 revision A13 and earlier.
273 	 * For SB600 A21 and later, firmware must set the bit to hide it.
274 	 * For SB700 and later, it is unused and hardcoded to zero.
275 	 */
276 	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
277 
278 	/*
279 	 * Atheros AR8161/AR8162/E2200 Ethernet controllers have a bug that
280 	 * MSI interrupt does not assert if PCIM_CMD_INTxDIS bit of the
281 	 * command register is set.
282 	 */
283 	{ 0x10911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
284 	{ 0xE0911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
285 	{ 0x10901969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
286 
287 	/*
288 	 * Broadcom BCM5714(S)/BCM5715(S)/BCM5780(S) Ethernet MACs don't
289 	 * issue MSI interrupts with PCIM_CMD_INTxDIS set either.
290 	 */
291 	{ 0x166814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714 */
292 	{ 0x166914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714S */
293 	{ 0x166a14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780 */
294 	{ 0x166b14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780S */
295 	{ 0x167814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715 */
296 	{ 0x167914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715S */
297 
298 	{ 0 }
299 };
300 
301 /* map register information */
302 #define	PCI_MAPMEM	0x01	/* memory map */
303 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
304 #define	PCI_MAPPORT	0x04	/* port map */
305 
306 struct devlist pci_devq;
307 uint32_t pci_generation;
308 uint32_t pci_numdevs = 0;
309 static int pcie_chipset, pcix_chipset;
310 
311 /* sysctl vars */
312 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
313 
314 static int pci_enable_io_modes = 1;
315 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RWTUN,
316     &pci_enable_io_modes, 1,
317     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
318 enable these bits correctly.  We'd like to do this all the time, but there\n\
319 are some peripherals that this causes problems with.");
320 
321 static int pci_do_realloc_bars = 0;
322 SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RWTUN,
323     &pci_do_realloc_bars, 0,
324     "Attempt to allocate a new range for any BARs whose original "
325     "firmware-assigned ranges fail to allocate during the initial device scan.");
326 
327 static int pci_do_power_nodriver = 0;
328 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RWTUN,
329     &pci_do_power_nodriver, 0,
330   "Place a function into D3 state when no driver attaches to it.  0 means\n\
331 disable.  1 means conservatively place devices into D3 state.  2 means\n\
332 agressively place devices into D3 state.  3 means put absolutely everything\n\
333 in D3 state.");
334 
335 int pci_do_power_resume = 1;
336 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RWTUN,
337     &pci_do_power_resume, 1,
338   "Transition from D3 -> D0 on resume.");
339 
340 int pci_do_power_suspend = 1;
341 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RWTUN,
342     &pci_do_power_suspend, 1,
343   "Transition from D0 -> D3 on suspend.");
344 
345 static int pci_do_msi = 1;
346 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RWTUN, &pci_do_msi, 1,
347     "Enable support for MSI interrupts");
348 
349 static int pci_do_msix = 1;
350 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RWTUN, &pci_do_msix, 1,
351     "Enable support for MSI-X interrupts");
352 
353 static int pci_honor_msi_blacklist = 1;
354 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RDTUN,
355     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
356 
357 #if defined(__i386__) || defined(__amd64__)
358 static int pci_usb_takeover = 1;
359 #else
360 static int pci_usb_takeover = 0;
361 #endif
362 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
363     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
364 Disable this if you depend on BIOS emulation of USB devices, that is\n\
365 you use USB devices (like keyboard or mouse) but do not load USB drivers");
366 
367 static int pci_clear_bars;
368 SYSCTL_INT(_hw_pci, OID_AUTO, clear_bars, CTLFLAG_RDTUN, &pci_clear_bars, 0,
369     "Ignore firmware-assigned resources for BARs.");
370 
371 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
372 static int pci_clear_buses;
373 SYSCTL_INT(_hw_pci, OID_AUTO, clear_buses, CTLFLAG_RDTUN, &pci_clear_buses, 0,
374     "Ignore firmware-assigned bus numbers.");
375 #endif
376 
377 static int pci_enable_ari = 1;
378 SYSCTL_INT(_hw_pci, OID_AUTO, enable_ari, CTLFLAG_RDTUN, &pci_enable_ari,
379     0, "Enable support for PCIe Alternative RID Interpretation");
380 
381 static int
382 pci_has_quirk(uint32_t devid, int quirk)
383 {
384 	const struct pci_quirk *q;
385 
386 	for (q = &pci_quirks[0]; q->devid; q++) {
387 		if (q->devid == devid && q->type == quirk)
388 			return (1);
389 	}
390 	return (0);
391 }
392 
393 /* Find a device_t by bus/slot/function in domain 0 */
394 
395 device_t
396 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
397 {
398 
399 	return (pci_find_dbsf(0, bus, slot, func));
400 }
401 
402 /* Find a device_t by domain/bus/slot/function */
403 
404 device_t
405 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
406 {
407 	struct pci_devinfo *dinfo;
408 
409 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
410 		if ((dinfo->cfg.domain == domain) &&
411 		    (dinfo->cfg.bus == bus) &&
412 		    (dinfo->cfg.slot == slot) &&
413 		    (dinfo->cfg.func == func)) {
414 			return (dinfo->cfg.dev);
415 		}
416 	}
417 
418 	return (NULL);
419 }
420 
421 /* Find a device_t by vendor/device ID */
422 
423 device_t
424 pci_find_device(uint16_t vendor, uint16_t device)
425 {
426 	struct pci_devinfo *dinfo;
427 
428 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
429 		if ((dinfo->cfg.vendor == vendor) &&
430 		    (dinfo->cfg.device == device)) {
431 			return (dinfo->cfg.dev);
432 		}
433 	}
434 
435 	return (NULL);
436 }
437 
438 device_t
439 pci_find_class(uint8_t class, uint8_t subclass)
440 {
441 	struct pci_devinfo *dinfo;
442 
443 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
444 		if (dinfo->cfg.baseclass == class &&
445 		    dinfo->cfg.subclass == subclass) {
446 			return (dinfo->cfg.dev);
447 		}
448 	}
449 
450 	return (NULL);
451 }
452 
453 static int
454 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
455 {
456 	va_list ap;
457 	int retval;
458 
459 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
460 	    cfg->func);
461 	va_start(ap, fmt);
462 	retval += vprintf(fmt, ap);
463 	va_end(ap);
464 	return (retval);
465 }
466 
467 /* return base address of memory or port map */
468 
469 static pci_addr_t
470 pci_mapbase(uint64_t mapreg)
471 {
472 
473 	if (PCI_BAR_MEM(mapreg))
474 		return (mapreg & PCIM_BAR_MEM_BASE);
475 	else
476 		return (mapreg & PCIM_BAR_IO_BASE);
477 }
478 
479 /* return map type of memory or port map */
480 
481 static const char *
482 pci_maptype(uint64_t mapreg)
483 {
484 
485 	if (PCI_BAR_IO(mapreg))
486 		return ("I/O Port");
487 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
488 		return ("Prefetchable Memory");
489 	return ("Memory");
490 }
491 
492 /* return log2 of map size decoded for memory or port map */
493 
494 int
495 pci_mapsize(uint64_t testval)
496 {
497 	int ln2size;
498 
499 	testval = pci_mapbase(testval);
500 	ln2size = 0;
501 	if (testval != 0) {
502 		while ((testval & 1) == 0)
503 		{
504 			ln2size++;
505 			testval >>= 1;
506 		}
507 	}
508 	return (ln2size);
509 }
510 
511 /* return base address of device ROM */
512 
513 static pci_addr_t
514 pci_rombase(uint64_t mapreg)
515 {
516 
517 	return (mapreg & PCIM_BIOS_ADDR_MASK);
518 }
519 
520 /* return log2 of map size decided for device ROM */
521 
522 static int
523 pci_romsize(uint64_t testval)
524 {
525 	int ln2size;
526 
527 	testval = pci_rombase(testval);
528 	ln2size = 0;
529 	if (testval != 0) {
530 		while ((testval & 1) == 0)
531 		{
532 			ln2size++;
533 			testval >>= 1;
534 		}
535 	}
536 	return (ln2size);
537 }
538 
539 /* return log2 of address range supported by map register */
540 
541 static int
542 pci_maprange(uint64_t mapreg)
543 {
544 	int ln2range = 0;
545 
546 	if (PCI_BAR_IO(mapreg))
547 		ln2range = 32;
548 	else
549 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
550 		case PCIM_BAR_MEM_32:
551 			ln2range = 32;
552 			break;
553 		case PCIM_BAR_MEM_1MB:
554 			ln2range = 20;
555 			break;
556 		case PCIM_BAR_MEM_64:
557 			ln2range = 64;
558 			break;
559 		}
560 	return (ln2range);
561 }
562 
563 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
564 
565 static void
566 pci_fixancient(pcicfgregs *cfg)
567 {
568 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
569 		return;
570 
571 	/* PCI to PCI bridges use header type 1 */
572 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
573 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
574 }
575 
576 /* extract header type specific config data */
577 
578 static void
579 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
580 {
581 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
582 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
583 	case PCIM_HDRTYPE_NORMAL:
584 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
585 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
586 		cfg->mingnt         = REG(PCIR_MINGNT, 1);
587 		cfg->maxlat         = REG(PCIR_MAXLAT, 1);
588 		cfg->nummaps	    = PCI_MAXMAPS_0;
589 		break;
590 	case PCIM_HDRTYPE_BRIDGE:
591 		cfg->bridge.br_seclat = REG(PCIR_SECLAT_1, 1);
592 		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_1, 1);
593 		cfg->bridge.br_secbus = REG(PCIR_SECBUS_1, 1);
594 		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_1, 1);
595 		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_1, 2);
596 		cfg->nummaps	    = PCI_MAXMAPS_1;
597 		break;
598 	case PCIM_HDRTYPE_CARDBUS:
599 		cfg->bridge.br_seclat = REG(PCIR_SECLAT_2, 1);
600 		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_2, 1);
601 		cfg->bridge.br_secbus = REG(PCIR_SECBUS_2, 1);
602 		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_2, 1);
603 		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_2, 2);
604 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
605 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
606 		cfg->nummaps	    = PCI_MAXMAPS_2;
607 		break;
608 	}
609 #undef REG
610 }
611 
612 /* read configuration header into pcicfgregs structure */
613 struct pci_devinfo *
614 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
615 {
616 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
617 	uint16_t vid, did;
618 
619 	vid = REG(PCIR_VENDOR, 2);
620 	did = REG(PCIR_DEVICE, 2);
621 	if (vid != 0xffff)
622 		return (pci_fill_devinfo(pcib, d, b, s, f, vid, did, size));
623 
624 	return (NULL);
625 }
626 
627 static struct pci_devinfo *
628 pci_fill_devinfo(device_t pcib, int d, int b, int s, int f, uint16_t vid,
629     uint16_t did, size_t size)
630 {
631 	struct pci_devinfo *devlist_entry;
632 	pcicfgregs *cfg;
633 
634 	devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
635 
636 	cfg = &devlist_entry->cfg;
637 
638 	cfg->domain		= d;
639 	cfg->bus		= b;
640 	cfg->slot		= s;
641 	cfg->func		= f;
642 	cfg->vendor		= vid;
643 	cfg->device		= did;
644 	cfg->cmdreg		= REG(PCIR_COMMAND, 2);
645 	cfg->statreg		= REG(PCIR_STATUS, 2);
646 	cfg->baseclass		= REG(PCIR_CLASS, 1);
647 	cfg->subclass		= REG(PCIR_SUBCLASS, 1);
648 	cfg->progif		= REG(PCIR_PROGIF, 1);
649 	cfg->revid		= REG(PCIR_REVID, 1);
650 	cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
651 	cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
652 	cfg->lattimer		= REG(PCIR_LATTIMER, 1);
653 	cfg->intpin		= REG(PCIR_INTPIN, 1);
654 	cfg->intline		= REG(PCIR_INTLINE, 1);
655 
656 	cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
657 	cfg->hdrtype		&= ~PCIM_MFDEV;
658 	STAILQ_INIT(&cfg->maps);
659 
660 	cfg->devinfo_size	= size;
661 	cfg->iov		= NULL;
662 
663 	pci_fixancient(cfg);
664 	pci_hdrtypedata(pcib, b, s, f, cfg);
665 
666 	if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
667 		pci_read_cap(pcib, cfg);
668 
669 	STAILQ_INSERT_TAIL(&pci_devq, devlist_entry, pci_links);
670 
671 	devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
672 	devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
673 	devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
674 	devlist_entry->conf.pc_sel.pc_func = cfg->func;
675 	devlist_entry->conf.pc_hdr = cfg->hdrtype;
676 
677 	devlist_entry->conf.pc_subvendor = cfg->subvendor;
678 	devlist_entry->conf.pc_subdevice = cfg->subdevice;
679 	devlist_entry->conf.pc_vendor = cfg->vendor;
680 	devlist_entry->conf.pc_device = cfg->device;
681 
682 	devlist_entry->conf.pc_class = cfg->baseclass;
683 	devlist_entry->conf.pc_subclass = cfg->subclass;
684 	devlist_entry->conf.pc_progif = cfg->progif;
685 	devlist_entry->conf.pc_revid = cfg->revid;
686 
687 	pci_numdevs++;
688 	pci_generation++;
689 
690 	return (devlist_entry);
691 }
692 #undef REG
693 
694 static void
695 pci_read_cap(device_t pcib, pcicfgregs *cfg)
696 {
697 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
698 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
699 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
700 	uint64_t addr;
701 #endif
702 	uint32_t val;
703 	int	ptr, nextptr, ptrptr;
704 
705 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
706 	case PCIM_HDRTYPE_NORMAL:
707 	case PCIM_HDRTYPE_BRIDGE:
708 		ptrptr = PCIR_CAP_PTR;
709 		break;
710 	case PCIM_HDRTYPE_CARDBUS:
711 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
712 		break;
713 	default:
714 		return;		/* no extended capabilities support */
715 	}
716 	nextptr = REG(ptrptr, 1);	/* sanity check? */
717 
718 	/*
719 	 * Read capability entries.
720 	 */
721 	while (nextptr != 0) {
722 		/* Sanity check */
723 		if (nextptr > 255) {
724 			printf("illegal PCI extended capability offset %d\n",
725 			    nextptr);
726 			return;
727 		}
728 		/* Find the next entry */
729 		ptr = nextptr;
730 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
731 
732 		/* Process this entry */
733 		switch (REG(ptr + PCICAP_ID, 1)) {
734 		case PCIY_PMG:		/* PCI power management */
735 			if (cfg->pp.pp_cap == 0) {
736 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
737 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
738 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
739 				if ((nextptr - ptr) > PCIR_POWER_DATA)
740 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
741 			}
742 			break;
743 		case PCIY_HT:		/* HyperTransport */
744 			/* Determine HT-specific capability type. */
745 			val = REG(ptr + PCIR_HT_COMMAND, 2);
746 
747 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
748 				cfg->ht.ht_slave = ptr;
749 
750 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
751 			switch (val & PCIM_HTCMD_CAP_MASK) {
752 			case PCIM_HTCAP_MSI_MAPPING:
753 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
754 					/* Sanity check the mapping window. */
755 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
756 					    4);
757 					addr <<= 32;
758 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
759 					    4);
760 					if (addr != MSI_INTEL_ADDR_BASE)
761 						device_printf(pcib,
762 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
763 						    cfg->domain, cfg->bus,
764 						    cfg->slot, cfg->func,
765 						    (long long)addr);
766 				} else
767 					addr = MSI_INTEL_ADDR_BASE;
768 
769 				cfg->ht.ht_msimap = ptr;
770 				cfg->ht.ht_msictrl = val;
771 				cfg->ht.ht_msiaddr = addr;
772 				break;
773 			}
774 #endif
775 			break;
776 		case PCIY_MSI:		/* PCI MSI */
777 			cfg->msi.msi_location = ptr;
778 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
779 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
780 						     PCIM_MSICTRL_MMC_MASK)>>1);
781 			break;
782 		case PCIY_MSIX:		/* PCI MSI-X */
783 			cfg->msix.msix_location = ptr;
784 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
785 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
786 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
787 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
788 			cfg->msix.msix_table_bar = PCIR_BAR(val &
789 			    PCIM_MSIX_BIR_MASK);
790 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
791 			val = REG(ptr + PCIR_MSIX_PBA, 4);
792 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
793 			    PCIM_MSIX_BIR_MASK);
794 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
795 			break;
796 		case PCIY_VPD:		/* PCI Vital Product Data */
797 			cfg->vpd.vpd_reg = ptr;
798 			break;
799 		case PCIY_SUBVENDOR:
800 			/* Should always be true. */
801 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
802 			    PCIM_HDRTYPE_BRIDGE) {
803 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
804 				cfg->subvendor = val & 0xffff;
805 				cfg->subdevice = val >> 16;
806 			}
807 			break;
808 		case PCIY_PCIX:		/* PCI-X */
809 			/*
810 			 * Assume we have a PCI-X chipset if we have
811 			 * at least one PCI-PCI bridge with a PCI-X
812 			 * capability.  Note that some systems with
813 			 * PCI-express or HT chipsets might match on
814 			 * this check as well.
815 			 */
816 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
817 			    PCIM_HDRTYPE_BRIDGE)
818 				pcix_chipset = 1;
819 			cfg->pcix.pcix_location = ptr;
820 			break;
821 		case PCIY_EXPRESS:	/* PCI-express */
822 			/*
823 			 * Assume we have a PCI-express chipset if we have
824 			 * at least one PCI-express device.
825 			 */
826 			pcie_chipset = 1;
827 			cfg->pcie.pcie_location = ptr;
828 			val = REG(ptr + PCIER_FLAGS, 2);
829 			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
830 			break;
831 		default:
832 			break;
833 		}
834 	}
835 
836 #if defined(__powerpc__)
837 	/*
838 	 * Enable the MSI mapping window for all HyperTransport
839 	 * slaves.  PCI-PCI bridges have their windows enabled via
840 	 * PCIB_MAP_MSI().
841 	 */
842 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
843 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
844 		device_printf(pcib,
845 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
846 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
847 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
848 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
849 		     2);
850 	}
851 #endif
852 /* REG and WREG use carry through to next functions */
853 }
854 
855 /*
856  * PCI Vital Product Data
857  */
858 
859 #define	PCI_VPD_TIMEOUT		1000000
860 
861 static int
862 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
863 {
864 	int count = PCI_VPD_TIMEOUT;
865 
866 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
867 
868 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
869 
870 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
871 		if (--count < 0)
872 			return (ENXIO);
873 		DELAY(1);	/* limit looping */
874 	}
875 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
876 
877 	return (0);
878 }
879 
880 #if 0
881 static int
882 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
883 {
884 	int count = PCI_VPD_TIMEOUT;
885 
886 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
887 
888 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
889 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
890 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
891 		if (--count < 0)
892 			return (ENXIO);
893 		DELAY(1);	/* limit looping */
894 	}
895 
896 	return (0);
897 }
898 #endif
899 
900 #undef PCI_VPD_TIMEOUT
901 
902 struct vpd_readstate {
903 	device_t	pcib;
904 	pcicfgregs	*cfg;
905 	uint32_t	val;
906 	int		bytesinval;
907 	int		off;
908 	uint8_t		cksum;
909 };
910 
911 static int
912 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
913 {
914 	uint32_t reg;
915 	uint8_t byte;
916 
917 	if (vrs->bytesinval == 0) {
918 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
919 			return (ENXIO);
920 		vrs->val = le32toh(reg);
921 		vrs->off += 4;
922 		byte = vrs->val & 0xff;
923 		vrs->bytesinval = 3;
924 	} else {
925 		vrs->val = vrs->val >> 8;
926 		byte = vrs->val & 0xff;
927 		vrs->bytesinval--;
928 	}
929 
930 	vrs->cksum += byte;
931 	*data = byte;
932 	return (0);
933 }
934 
935 static void
936 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
937 {
938 	struct vpd_readstate vrs;
939 	int state;
940 	int name;
941 	int remain;
942 	int i;
943 	int alloc, off;		/* alloc/off for RO/W arrays */
944 	int cksumvalid;
945 	int dflen;
946 	uint8_t byte;
947 	uint8_t byte2;
948 
949 	/* init vpd reader */
950 	vrs.bytesinval = 0;
951 	vrs.off = 0;
952 	vrs.pcib = pcib;
953 	vrs.cfg = cfg;
954 	vrs.cksum = 0;
955 
956 	state = 0;
957 	name = remain = i = 0;	/* shut up stupid gcc */
958 	alloc = off = 0;	/* shut up stupid gcc */
959 	dflen = 0;		/* shut up stupid gcc */
960 	cksumvalid = -1;
961 	while (state >= 0) {
962 		if (vpd_nextbyte(&vrs, &byte)) {
963 			state = -2;
964 			break;
965 		}
966 #if 0
967 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
968 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
969 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
970 #endif
971 		switch (state) {
972 		case 0:		/* item name */
973 			if (byte & 0x80) {
974 				if (vpd_nextbyte(&vrs, &byte2)) {
975 					state = -2;
976 					break;
977 				}
978 				remain = byte2;
979 				if (vpd_nextbyte(&vrs, &byte2)) {
980 					state = -2;
981 					break;
982 				}
983 				remain |= byte2 << 8;
984 				if (remain > (0x7f*4 - vrs.off)) {
985 					state = -1;
986 					pci_printf(cfg,
987 					    "invalid VPD data, remain %#x\n",
988 					    remain);
989 				}
990 				name = byte & 0x7f;
991 			} else {
992 				remain = byte & 0x7;
993 				name = (byte >> 3) & 0xf;
994 			}
995 			switch (name) {
996 			case 0x2:	/* String */
997 				cfg->vpd.vpd_ident = malloc(remain + 1,
998 				    M_DEVBUF, M_WAITOK);
999 				i = 0;
1000 				state = 1;
1001 				break;
1002 			case 0xf:	/* End */
1003 				state = -1;
1004 				break;
1005 			case 0x10:	/* VPD-R */
1006 				alloc = 8;
1007 				off = 0;
1008 				cfg->vpd.vpd_ros = malloc(alloc *
1009 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1010 				    M_WAITOK | M_ZERO);
1011 				state = 2;
1012 				break;
1013 			case 0x11:	/* VPD-W */
1014 				alloc = 8;
1015 				off = 0;
1016 				cfg->vpd.vpd_w = malloc(alloc *
1017 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1018 				    M_WAITOK | M_ZERO);
1019 				state = 5;
1020 				break;
1021 			default:	/* Invalid data, abort */
1022 				state = -1;
1023 				break;
1024 			}
1025 			break;
1026 
1027 		case 1:	/* Identifier String */
1028 			cfg->vpd.vpd_ident[i++] = byte;
1029 			remain--;
1030 			if (remain == 0)  {
1031 				cfg->vpd.vpd_ident[i] = '\0';
1032 				state = 0;
1033 			}
1034 			break;
1035 
1036 		case 2:	/* VPD-R Keyword Header */
1037 			if (off == alloc) {
1038 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1039 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1040 				    M_DEVBUF, M_WAITOK | M_ZERO);
1041 			}
1042 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1043 			if (vpd_nextbyte(&vrs, &byte2)) {
1044 				state = -2;
1045 				break;
1046 			}
1047 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1048 			if (vpd_nextbyte(&vrs, &byte2)) {
1049 				state = -2;
1050 				break;
1051 			}
1052 			cfg->vpd.vpd_ros[off].len = dflen = byte2;
1053 			if (dflen == 0 &&
1054 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1055 			    2) == 0) {
1056 				/*
1057 				 * if this happens, we can't trust the rest
1058 				 * of the VPD.
1059 				 */
1060 				pci_printf(cfg, "bad keyword length: %d\n",
1061 				    dflen);
1062 				cksumvalid = 0;
1063 				state = -1;
1064 				break;
1065 			} else if (dflen == 0) {
1066 				cfg->vpd.vpd_ros[off].value = malloc(1 *
1067 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1068 				    M_DEVBUF, M_WAITOK);
1069 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1070 			} else
1071 				cfg->vpd.vpd_ros[off].value = malloc(
1072 				    (dflen + 1) *
1073 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1074 				    M_DEVBUF, M_WAITOK);
1075 			remain -= 3;
1076 			i = 0;
1077 			/* keep in sync w/ state 3's transistions */
1078 			if (dflen == 0 && remain == 0)
1079 				state = 0;
1080 			else if (dflen == 0)
1081 				state = 2;
1082 			else
1083 				state = 3;
1084 			break;
1085 
1086 		case 3:	/* VPD-R Keyword Value */
1087 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1088 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1089 			    "RV", 2) == 0 && cksumvalid == -1) {
1090 				if (vrs.cksum == 0)
1091 					cksumvalid = 1;
1092 				else {
1093 					if (bootverbose)
1094 						pci_printf(cfg,
1095 					    "bad VPD cksum, remain %hhu\n",
1096 						    vrs.cksum);
1097 					cksumvalid = 0;
1098 					state = -1;
1099 					break;
1100 				}
1101 			}
1102 			dflen--;
1103 			remain--;
1104 			/* keep in sync w/ state 2's transistions */
1105 			if (dflen == 0)
1106 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1107 			if (dflen == 0 && remain == 0) {
1108 				cfg->vpd.vpd_rocnt = off;
1109 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1110 				    off * sizeof(*cfg->vpd.vpd_ros),
1111 				    M_DEVBUF, M_WAITOK | M_ZERO);
1112 				state = 0;
1113 			} else if (dflen == 0)
1114 				state = 2;
1115 			break;
1116 
1117 		case 4:
1118 			remain--;
1119 			if (remain == 0)
1120 				state = 0;
1121 			break;
1122 
1123 		case 5:	/* VPD-W Keyword Header */
1124 			if (off == alloc) {
1125 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1126 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1127 				    M_DEVBUF, M_WAITOK | M_ZERO);
1128 			}
1129 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1130 			if (vpd_nextbyte(&vrs, &byte2)) {
1131 				state = -2;
1132 				break;
1133 			}
1134 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1135 			if (vpd_nextbyte(&vrs, &byte2)) {
1136 				state = -2;
1137 				break;
1138 			}
1139 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1140 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1141 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1142 			    sizeof(*cfg->vpd.vpd_w[off].value),
1143 			    M_DEVBUF, M_WAITOK);
1144 			remain -= 3;
1145 			i = 0;
1146 			/* keep in sync w/ state 6's transistions */
1147 			if (dflen == 0 && remain == 0)
1148 				state = 0;
1149 			else if (dflen == 0)
1150 				state = 5;
1151 			else
1152 				state = 6;
1153 			break;
1154 
1155 		case 6:	/* VPD-W Keyword Value */
1156 			cfg->vpd.vpd_w[off].value[i++] = byte;
1157 			dflen--;
1158 			remain--;
1159 			/* keep in sync w/ state 5's transistions */
1160 			if (dflen == 0)
1161 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1162 			if (dflen == 0 && remain == 0) {
1163 				cfg->vpd.vpd_wcnt = off;
1164 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1165 				    off * sizeof(*cfg->vpd.vpd_w),
1166 				    M_DEVBUF, M_WAITOK | M_ZERO);
1167 				state = 0;
1168 			} else if (dflen == 0)
1169 				state = 5;
1170 			break;
1171 
1172 		default:
1173 			pci_printf(cfg, "invalid state: %d\n", state);
1174 			state = -1;
1175 			break;
1176 		}
1177 	}
1178 
1179 	if (cksumvalid == 0 || state < -1) {
1180 		/* read-only data bad, clean up */
1181 		if (cfg->vpd.vpd_ros != NULL) {
1182 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1183 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1184 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1185 			cfg->vpd.vpd_ros = NULL;
1186 		}
1187 	}
1188 	if (state < -1) {
1189 		/* I/O error, clean up */
1190 		pci_printf(cfg, "failed to read VPD data.\n");
1191 		if (cfg->vpd.vpd_ident != NULL) {
1192 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1193 			cfg->vpd.vpd_ident = NULL;
1194 		}
1195 		if (cfg->vpd.vpd_w != NULL) {
1196 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1197 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1198 			free(cfg->vpd.vpd_w, M_DEVBUF);
1199 			cfg->vpd.vpd_w = NULL;
1200 		}
1201 	}
1202 	cfg->vpd.vpd_cached = 1;
1203 #undef REG
1204 #undef WREG
1205 }
1206 
1207 int
1208 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1209 {
1210 	struct pci_devinfo *dinfo = device_get_ivars(child);
1211 	pcicfgregs *cfg = &dinfo->cfg;
1212 
1213 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1214 		pci_read_vpd(device_get_parent(dev), cfg);
1215 
1216 	*identptr = cfg->vpd.vpd_ident;
1217 
1218 	if (*identptr == NULL)
1219 		return (ENXIO);
1220 
1221 	return (0);
1222 }
1223 
1224 int
1225 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1226 	const char **vptr)
1227 {
1228 	struct pci_devinfo *dinfo = device_get_ivars(child);
1229 	pcicfgregs *cfg = &dinfo->cfg;
1230 	int i;
1231 
1232 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1233 		pci_read_vpd(device_get_parent(dev), cfg);
1234 
1235 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1236 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1237 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1238 			*vptr = cfg->vpd.vpd_ros[i].value;
1239 			return (0);
1240 		}
1241 
1242 	*vptr = NULL;
1243 	return (ENXIO);
1244 }
1245 
1246 struct pcicfg_vpd *
1247 pci_fetch_vpd_list(device_t dev)
1248 {
1249 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1250 	pcicfgregs *cfg = &dinfo->cfg;
1251 
1252 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1253 		pci_read_vpd(device_get_parent(device_get_parent(dev)), cfg);
1254 	return (&cfg->vpd);
1255 }
1256 
1257 /*
1258  * Find the requested HyperTransport capability and return the offset
1259  * in configuration space via the pointer provided.  The function
1260  * returns 0 on success and an error code otherwise.
1261  */
1262 int
1263 pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1264 {
1265 	int ptr, error;
1266 	uint16_t val;
1267 
1268 	error = pci_find_cap(child, PCIY_HT, &ptr);
1269 	if (error)
1270 		return (error);
1271 
1272 	/*
1273 	 * Traverse the capabilities list checking each HT capability
1274 	 * to see if it matches the requested HT capability.
1275 	 */
1276 	while (ptr != 0) {
1277 		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1278 		if (capability == PCIM_HTCAP_SLAVE ||
1279 		    capability == PCIM_HTCAP_HOST)
1280 			val &= 0xe000;
1281 		else
1282 			val &= PCIM_HTCMD_CAP_MASK;
1283 		if (val == capability) {
1284 			if (capreg != NULL)
1285 				*capreg = ptr;
1286 			return (0);
1287 		}
1288 
1289 		/* Skip to the next HT capability. */
1290 		while (ptr != 0) {
1291 			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1292 			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1293 			    PCIY_HT)
1294 				break;
1295 		}
1296 	}
1297 	return (ENOENT);
1298 }
1299 
1300 /*
1301  * Find the requested capability and return the offset in
1302  * configuration space via the pointer provided.  The function returns
1303  * 0 on success and an error code otherwise.
1304  */
1305 int
1306 pci_find_cap_method(device_t dev, device_t child, int capability,
1307     int *capreg)
1308 {
1309 	struct pci_devinfo *dinfo = device_get_ivars(child);
1310 	pcicfgregs *cfg = &dinfo->cfg;
1311 	u_int32_t status;
1312 	u_int8_t ptr;
1313 
1314 	/*
1315 	 * Check the CAP_LIST bit of the PCI status register first.
1316 	 */
1317 	status = pci_read_config(child, PCIR_STATUS, 2);
1318 	if (!(status & PCIM_STATUS_CAPPRESENT))
1319 		return (ENXIO);
1320 
1321 	/*
1322 	 * Determine the start pointer of the capabilities list.
1323 	 */
1324 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1325 	case PCIM_HDRTYPE_NORMAL:
1326 	case PCIM_HDRTYPE_BRIDGE:
1327 		ptr = PCIR_CAP_PTR;
1328 		break;
1329 	case PCIM_HDRTYPE_CARDBUS:
1330 		ptr = PCIR_CAP_PTR_2;
1331 		break;
1332 	default:
1333 		/* XXX: panic? */
1334 		return (ENXIO);		/* no extended capabilities support */
1335 	}
1336 	ptr = pci_read_config(child, ptr, 1);
1337 
1338 	/*
1339 	 * Traverse the capabilities list.
1340 	 */
1341 	while (ptr != 0) {
1342 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1343 			if (capreg != NULL)
1344 				*capreg = ptr;
1345 			return (0);
1346 		}
1347 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1348 	}
1349 
1350 	return (ENOENT);
1351 }
1352 
1353 /*
1354  * Find the requested extended capability and return the offset in
1355  * configuration space via the pointer provided.  The function returns
1356  * 0 on success and an error code otherwise.
1357  */
1358 int
1359 pci_find_extcap_method(device_t dev, device_t child, int capability,
1360     int *capreg)
1361 {
1362 	struct pci_devinfo *dinfo = device_get_ivars(child);
1363 	pcicfgregs *cfg = &dinfo->cfg;
1364 	uint32_t ecap;
1365 	uint16_t ptr;
1366 
1367 	/* Only supported for PCI-express devices. */
1368 	if (cfg->pcie.pcie_location == 0)
1369 		return (ENXIO);
1370 
1371 	ptr = PCIR_EXTCAP;
1372 	ecap = pci_read_config(child, ptr, 4);
1373 	if (ecap == 0xffffffff || ecap == 0)
1374 		return (ENOENT);
1375 	for (;;) {
1376 		if (PCI_EXTCAP_ID(ecap) == capability) {
1377 			if (capreg != NULL)
1378 				*capreg = ptr;
1379 			return (0);
1380 		}
1381 		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1382 		if (ptr == 0)
1383 			break;
1384 		ecap = pci_read_config(child, ptr, 4);
1385 	}
1386 
1387 	return (ENOENT);
1388 }
1389 
1390 /*
1391  * Support for MSI-X message interrupts.
1392  */
1393 void
1394 pci_enable_msix_method(device_t dev, device_t child, u_int index,
1395     uint64_t address, uint32_t data)
1396 {
1397 	struct pci_devinfo *dinfo = device_get_ivars(child);
1398 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1399 	uint32_t offset;
1400 
1401 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1402 	offset = msix->msix_table_offset + index * 16;
1403 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1404 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1405 	bus_write_4(msix->msix_table_res, offset + 8, data);
1406 
1407 	/* Enable MSI -> HT mapping. */
1408 	pci_ht_map_msi(child, address);
1409 }
1410 
1411 void
1412 pci_mask_msix(device_t dev, u_int index)
1413 {
1414 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1415 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1416 	uint32_t offset, val;
1417 
1418 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1419 	offset = msix->msix_table_offset + index * 16 + 12;
1420 	val = bus_read_4(msix->msix_table_res, offset);
1421 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1422 		val |= PCIM_MSIX_VCTRL_MASK;
1423 		bus_write_4(msix->msix_table_res, offset, val);
1424 	}
1425 }
1426 
1427 void
1428 pci_unmask_msix(device_t dev, u_int index)
1429 {
1430 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1431 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1432 	uint32_t offset, val;
1433 
1434 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1435 	offset = msix->msix_table_offset + index * 16 + 12;
1436 	val = bus_read_4(msix->msix_table_res, offset);
1437 	if (val & PCIM_MSIX_VCTRL_MASK) {
1438 		val &= ~PCIM_MSIX_VCTRL_MASK;
1439 		bus_write_4(msix->msix_table_res, offset, val);
1440 	}
1441 }
1442 
1443 int
1444 pci_pending_msix(device_t dev, u_int index)
1445 {
1446 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1447 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1448 	uint32_t offset, bit;
1449 
1450 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1451 	offset = msix->msix_pba_offset + (index / 32) * 4;
1452 	bit = 1 << index % 32;
1453 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1454 }
1455 
1456 /*
1457  * Restore MSI-X registers and table during resume.  If MSI-X is
1458  * enabled then walk the virtual table to restore the actual MSI-X
1459  * table.
1460  */
1461 static void
1462 pci_resume_msix(device_t dev)
1463 {
1464 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1465 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1466 	struct msix_table_entry *mte;
1467 	struct msix_vector *mv;
1468 	int i;
1469 
1470 	if (msix->msix_alloc > 0) {
1471 		/* First, mask all vectors. */
1472 		for (i = 0; i < msix->msix_msgnum; i++)
1473 			pci_mask_msix(dev, i);
1474 
1475 		/* Second, program any messages with at least one handler. */
1476 		for (i = 0; i < msix->msix_table_len; i++) {
1477 			mte = &msix->msix_table[i];
1478 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1479 				continue;
1480 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1481 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1482 			pci_unmask_msix(dev, i);
1483 		}
1484 	}
1485 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1486 	    msix->msix_ctrl, 2);
1487 }
1488 
1489 /*
1490  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1491  * returned in *count.  After this function returns, each message will be
1492  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1493  */
1494 int
1495 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1496 {
1497 	struct pci_devinfo *dinfo = device_get_ivars(child);
1498 	pcicfgregs *cfg = &dinfo->cfg;
1499 	struct resource_list_entry *rle;
1500 	int actual, error, i, irq, max;
1501 
1502 	/* Don't let count == 0 get us into trouble. */
1503 	if (*count == 0)
1504 		return (EINVAL);
1505 
1506 	/* If rid 0 is allocated, then fail. */
1507 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1508 	if (rle != NULL && rle->res != NULL)
1509 		return (ENXIO);
1510 
1511 	/* Already have allocated messages? */
1512 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1513 		return (ENXIO);
1514 
1515 	/* If MSI-X is blacklisted for this system, fail. */
1516 	if (pci_msix_blacklisted())
1517 		return (ENXIO);
1518 
1519 	/* MSI-X capability present? */
1520 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1521 		return (ENODEV);
1522 
1523 	/* Make sure the appropriate BARs are mapped. */
1524 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1525 	    cfg->msix.msix_table_bar);
1526 	if (rle == NULL || rle->res == NULL ||
1527 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1528 		return (ENXIO);
1529 	cfg->msix.msix_table_res = rle->res;
1530 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1531 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1532 		    cfg->msix.msix_pba_bar);
1533 		if (rle == NULL || rle->res == NULL ||
1534 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1535 			return (ENXIO);
1536 	}
1537 	cfg->msix.msix_pba_res = rle->res;
1538 
1539 	if (bootverbose)
1540 		device_printf(child,
1541 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1542 		    *count, cfg->msix.msix_msgnum);
1543 	max = min(*count, cfg->msix.msix_msgnum);
1544 	for (i = 0; i < max; i++) {
1545 		/* Allocate a message. */
1546 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1547 		if (error) {
1548 			if (i == 0)
1549 				return (error);
1550 			break;
1551 		}
1552 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1553 		    irq, 1);
1554 	}
1555 	actual = i;
1556 
1557 	if (bootverbose) {
1558 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1559 		if (actual == 1)
1560 			device_printf(child, "using IRQ %lu for MSI-X\n",
1561 			    rle->start);
1562 		else {
1563 			int run;
1564 
1565 			/*
1566 			 * Be fancy and try to print contiguous runs of
1567 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1568 			 * 'run' is true if we are in a range.
1569 			 */
1570 			device_printf(child, "using IRQs %lu", rle->start);
1571 			irq = rle->start;
1572 			run = 0;
1573 			for (i = 1; i < actual; i++) {
1574 				rle = resource_list_find(&dinfo->resources,
1575 				    SYS_RES_IRQ, i + 1);
1576 
1577 				/* Still in a run? */
1578 				if (rle->start == irq + 1) {
1579 					run = 1;
1580 					irq++;
1581 					continue;
1582 				}
1583 
1584 				/* Finish previous range. */
1585 				if (run) {
1586 					printf("-%d", irq);
1587 					run = 0;
1588 				}
1589 
1590 				/* Start new range. */
1591 				printf(",%lu", rle->start);
1592 				irq = rle->start;
1593 			}
1594 
1595 			/* Unfinished range? */
1596 			if (run)
1597 				printf("-%d", irq);
1598 			printf(" for MSI-X\n");
1599 		}
1600 	}
1601 
1602 	/* Mask all vectors. */
1603 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1604 		pci_mask_msix(child, i);
1605 
1606 	/* Allocate and initialize vector data and virtual table. */
1607 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1608 	    M_DEVBUF, M_WAITOK | M_ZERO);
1609 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1610 	    M_DEVBUF, M_WAITOK | M_ZERO);
1611 	for (i = 0; i < actual; i++) {
1612 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1613 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1614 		cfg->msix.msix_table[i].mte_vector = i + 1;
1615 	}
1616 
1617 	/* Update control register to enable MSI-X. */
1618 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1619 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1620 	    cfg->msix.msix_ctrl, 2);
1621 
1622 	/* Update counts of alloc'd messages. */
1623 	cfg->msix.msix_alloc = actual;
1624 	cfg->msix.msix_table_len = actual;
1625 	*count = actual;
1626 	return (0);
1627 }
1628 
1629 /*
1630  * By default, pci_alloc_msix() will assign the allocated IRQ
1631  * resources consecutively to the first N messages in the MSI-X table.
1632  * However, device drivers may want to use different layouts if they
1633  * either receive fewer messages than they asked for, or they wish to
1634  * populate the MSI-X table sparsely.  This method allows the driver
1635  * to specify what layout it wants.  It must be called after a
1636  * successful pci_alloc_msix() but before any of the associated
1637  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1638  *
1639  * The 'vectors' array contains 'count' message vectors.  The array
1640  * maps directly to the MSI-X table in that index 0 in the array
1641  * specifies the vector for the first message in the MSI-X table, etc.
1642  * The vector value in each array index can either be 0 to indicate
1643  * that no vector should be assigned to a message slot, or it can be a
1644  * number from 1 to N (where N is the count returned from a
1645  * succcessful call to pci_alloc_msix()) to indicate which message
1646  * vector (IRQ) to be used for the corresponding message.
1647  *
1648  * On successful return, each message with a non-zero vector will have
1649  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1650  * 1.  Additionally, if any of the IRQs allocated via the previous
1651  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1652  * will be freed back to the system automatically.
1653  *
1654  * For example, suppose a driver has a MSI-X table with 6 messages and
1655  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1656  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1657  * C.  After the call to pci_alloc_msix(), the device will be setup to
1658  * have an MSI-X table of ABC--- (where - means no vector assigned).
1659  * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1660  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1661  * be freed back to the system.  This device will also have valid
1662  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1663  *
1664  * In any case, the SYS_RES_IRQ rid X will always map to the message
1665  * at MSI-X table index X - 1 and will only be valid if a vector is
1666  * assigned to that table entry.
1667  */
1668 int
1669 pci_remap_msix_method(device_t dev, device_t child, int count,
1670     const u_int *vectors)
1671 {
1672 	struct pci_devinfo *dinfo = device_get_ivars(child);
1673 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1674 	struct resource_list_entry *rle;
1675 	int i, irq, j, *used;
1676 
1677 	/*
1678 	 * Have to have at least one message in the table but the
1679 	 * table can't be bigger than the actual MSI-X table in the
1680 	 * device.
1681 	 */
1682 	if (count == 0 || count > msix->msix_msgnum)
1683 		return (EINVAL);
1684 
1685 	/* Sanity check the vectors. */
1686 	for (i = 0; i < count; i++)
1687 		if (vectors[i] > msix->msix_alloc)
1688 			return (EINVAL);
1689 
1690 	/*
1691 	 * Make sure there aren't any holes in the vectors to be used.
1692 	 * It's a big pain to support it, and it doesn't really make
1693 	 * sense anyway.  Also, at least one vector must be used.
1694 	 */
1695 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1696 	    M_ZERO);
1697 	for (i = 0; i < count; i++)
1698 		if (vectors[i] != 0)
1699 			used[vectors[i] - 1] = 1;
1700 	for (i = 0; i < msix->msix_alloc - 1; i++)
1701 		if (used[i] == 0 && used[i + 1] == 1) {
1702 			free(used, M_DEVBUF);
1703 			return (EINVAL);
1704 		}
1705 	if (used[0] != 1) {
1706 		free(used, M_DEVBUF);
1707 		return (EINVAL);
1708 	}
1709 
1710 	/* Make sure none of the resources are allocated. */
1711 	for (i = 0; i < msix->msix_table_len; i++) {
1712 		if (msix->msix_table[i].mte_vector == 0)
1713 			continue;
1714 		if (msix->msix_table[i].mte_handlers > 0) {
1715 			free(used, M_DEVBUF);
1716 			return (EBUSY);
1717 		}
1718 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1719 		KASSERT(rle != NULL, ("missing resource"));
1720 		if (rle->res != NULL) {
1721 			free(used, M_DEVBUF);
1722 			return (EBUSY);
1723 		}
1724 	}
1725 
1726 	/* Free the existing resource list entries. */
1727 	for (i = 0; i < msix->msix_table_len; i++) {
1728 		if (msix->msix_table[i].mte_vector == 0)
1729 			continue;
1730 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1731 	}
1732 
1733 	/*
1734 	 * Build the new virtual table keeping track of which vectors are
1735 	 * used.
1736 	 */
1737 	free(msix->msix_table, M_DEVBUF);
1738 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1739 	    M_DEVBUF, M_WAITOK | M_ZERO);
1740 	for (i = 0; i < count; i++)
1741 		msix->msix_table[i].mte_vector = vectors[i];
1742 	msix->msix_table_len = count;
1743 
1744 	/* Free any unused IRQs and resize the vectors array if necessary. */
1745 	j = msix->msix_alloc - 1;
1746 	if (used[j] == 0) {
1747 		struct msix_vector *vec;
1748 
1749 		while (used[j] == 0) {
1750 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1751 			    msix->msix_vectors[j].mv_irq);
1752 			j--;
1753 		}
1754 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1755 		    M_WAITOK);
1756 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1757 		    (j + 1));
1758 		free(msix->msix_vectors, M_DEVBUF);
1759 		msix->msix_vectors = vec;
1760 		msix->msix_alloc = j + 1;
1761 	}
1762 	free(used, M_DEVBUF);
1763 
1764 	/* Map the IRQs onto the rids. */
1765 	for (i = 0; i < count; i++) {
1766 		if (vectors[i] == 0)
1767 			continue;
1768 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1769 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1770 		    irq, 1);
1771 	}
1772 
1773 	if (bootverbose) {
1774 		device_printf(child, "Remapped MSI-X IRQs as: ");
1775 		for (i = 0; i < count; i++) {
1776 			if (i != 0)
1777 				printf(", ");
1778 			if (vectors[i] == 0)
1779 				printf("---");
1780 			else
1781 				printf("%d",
1782 				    msix->msix_vectors[vectors[i]].mv_irq);
1783 		}
1784 		printf("\n");
1785 	}
1786 
1787 	return (0);
1788 }
1789 
1790 static int
1791 pci_release_msix(device_t dev, device_t child)
1792 {
1793 	struct pci_devinfo *dinfo = device_get_ivars(child);
1794 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1795 	struct resource_list_entry *rle;
1796 	int i;
1797 
1798 	/* Do we have any messages to release? */
1799 	if (msix->msix_alloc == 0)
1800 		return (ENODEV);
1801 
1802 	/* Make sure none of the resources are allocated. */
1803 	for (i = 0; i < msix->msix_table_len; i++) {
1804 		if (msix->msix_table[i].mte_vector == 0)
1805 			continue;
1806 		if (msix->msix_table[i].mte_handlers > 0)
1807 			return (EBUSY);
1808 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1809 		KASSERT(rle != NULL, ("missing resource"));
1810 		if (rle->res != NULL)
1811 			return (EBUSY);
1812 	}
1813 
1814 	/* Update control register to disable MSI-X. */
1815 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1816 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1817 	    msix->msix_ctrl, 2);
1818 
1819 	/* Free the resource list entries. */
1820 	for (i = 0; i < msix->msix_table_len; i++) {
1821 		if (msix->msix_table[i].mte_vector == 0)
1822 			continue;
1823 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1824 	}
1825 	free(msix->msix_table, M_DEVBUF);
1826 	msix->msix_table_len = 0;
1827 
1828 	/* Release the IRQs. */
1829 	for (i = 0; i < msix->msix_alloc; i++)
1830 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1831 		    msix->msix_vectors[i].mv_irq);
1832 	free(msix->msix_vectors, M_DEVBUF);
1833 	msix->msix_alloc = 0;
1834 	return (0);
1835 }
1836 
1837 /*
1838  * Return the max supported MSI-X messages this device supports.
1839  * Basically, assuming the MD code can alloc messages, this function
1840  * should return the maximum value that pci_alloc_msix() can return.
1841  * Thus, it is subject to the tunables, etc.
1842  */
1843 int
1844 pci_msix_count_method(device_t dev, device_t child)
1845 {
1846 	struct pci_devinfo *dinfo = device_get_ivars(child);
1847 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1848 
1849 	if (pci_do_msix && msix->msix_location != 0)
1850 		return (msix->msix_msgnum);
1851 	return (0);
1852 }
1853 
1854 /*
1855  * HyperTransport MSI mapping control
1856  */
1857 void
1858 pci_ht_map_msi(device_t dev, uint64_t addr)
1859 {
1860 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1861 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1862 
1863 	if (!ht->ht_msimap)
1864 		return;
1865 
1866 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1867 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1868 		/* Enable MSI -> HT mapping. */
1869 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1870 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1871 		    ht->ht_msictrl, 2);
1872 	}
1873 
1874 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1875 		/* Disable MSI -> HT mapping. */
1876 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1877 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1878 		    ht->ht_msictrl, 2);
1879 	}
1880 }
1881 
1882 int
1883 pci_get_max_read_req(device_t dev)
1884 {
1885 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1886 	int cap;
1887 	uint16_t val;
1888 
1889 	cap = dinfo->cfg.pcie.pcie_location;
1890 	if (cap == 0)
1891 		return (0);
1892 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1893 	val &= PCIEM_CTL_MAX_READ_REQUEST;
1894 	val >>= 12;
1895 	return (1 << (val + 7));
1896 }
1897 
1898 int
1899 pci_set_max_read_req(device_t dev, int size)
1900 {
1901 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1902 	int cap;
1903 	uint16_t val;
1904 
1905 	cap = dinfo->cfg.pcie.pcie_location;
1906 	if (cap == 0)
1907 		return (0);
1908 	if (size < 128)
1909 		size = 128;
1910 	if (size > 4096)
1911 		size = 4096;
1912 	size = (1 << (fls(size) - 1));
1913 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1914 	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
1915 	val |= (fls(size) - 8) << 12;
1916 	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
1917 	return (size);
1918 }
1919 
1920 uint32_t
1921 pcie_read_config(device_t dev, int reg, int width)
1922 {
1923 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1924 	int cap;
1925 
1926 	cap = dinfo->cfg.pcie.pcie_location;
1927 	if (cap == 0) {
1928 		if (width == 2)
1929 			return (0xffff);
1930 		return (0xffffffff);
1931 	}
1932 
1933 	return (pci_read_config(dev, cap + reg, width));
1934 }
1935 
1936 void
1937 pcie_write_config(device_t dev, int reg, uint32_t value, int width)
1938 {
1939 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1940 	int cap;
1941 
1942 	cap = dinfo->cfg.pcie.pcie_location;
1943 	if (cap == 0)
1944 		return;
1945 	pci_write_config(dev, cap + reg, value, width);
1946 }
1947 
1948 /*
1949  * Adjusts a PCI-e capability register by clearing the bits in mask
1950  * and setting the bits in (value & mask).  Bits not set in mask are
1951  * not adjusted.
1952  *
1953  * Returns the old value on success or all ones on failure.
1954  */
1955 uint32_t
1956 pcie_adjust_config(device_t dev, int reg, uint32_t mask, uint32_t value,
1957     int width)
1958 {
1959 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1960 	uint32_t old, new;
1961 	int cap;
1962 
1963 	cap = dinfo->cfg.pcie.pcie_location;
1964 	if (cap == 0) {
1965 		if (width == 2)
1966 			return (0xffff);
1967 		return (0xffffffff);
1968 	}
1969 
1970 	old = pci_read_config(dev, cap + reg, width);
1971 	new = old & ~mask;
1972 	new |= (value & mask);
1973 	pci_write_config(dev, cap + reg, new, width);
1974 	return (old);
1975 }
1976 
1977 /*
1978  * Support for MSI message signalled interrupts.
1979  */
1980 void
1981 pci_enable_msi_method(device_t dev, device_t child, uint64_t address,
1982     uint16_t data)
1983 {
1984 	struct pci_devinfo *dinfo = device_get_ivars(child);
1985 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1986 
1987 	/* Write data and address values. */
1988 	pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR,
1989 	    address & 0xffffffff, 4);
1990 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1991 		pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1992 		    address >> 32, 4);
1993 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA_64BIT,
1994 		    data, 2);
1995 	} else
1996 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA, data,
1997 		    2);
1998 
1999 	/* Enable MSI in the control register. */
2000 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
2001 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2002 	    msi->msi_ctrl, 2);
2003 
2004 	/* Enable MSI -> HT mapping. */
2005 	pci_ht_map_msi(child, address);
2006 }
2007 
2008 void
2009 pci_disable_msi_method(device_t dev, device_t child)
2010 {
2011 	struct pci_devinfo *dinfo = device_get_ivars(child);
2012 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2013 
2014 	/* Disable MSI -> HT mapping. */
2015 	pci_ht_map_msi(child, 0);
2016 
2017 	/* Disable MSI in the control register. */
2018 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
2019 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2020 	    msi->msi_ctrl, 2);
2021 }
2022 
2023 /*
2024  * Restore MSI registers during resume.  If MSI is enabled then
2025  * restore the data and address registers in addition to the control
2026  * register.
2027  */
2028 static void
2029 pci_resume_msi(device_t dev)
2030 {
2031 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2032 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2033 	uint64_t address;
2034 	uint16_t data;
2035 
2036 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
2037 		address = msi->msi_addr;
2038 		data = msi->msi_data;
2039 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
2040 		    address & 0xffffffff, 4);
2041 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2042 			pci_write_config(dev, msi->msi_location +
2043 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
2044 			pci_write_config(dev, msi->msi_location +
2045 			    PCIR_MSI_DATA_64BIT, data, 2);
2046 		} else
2047 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
2048 			    data, 2);
2049 	}
2050 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
2051 	    2);
2052 }
2053 
2054 static int
2055 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
2056 {
2057 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2058 	pcicfgregs *cfg = &dinfo->cfg;
2059 	struct resource_list_entry *rle;
2060 	struct msix_table_entry *mte;
2061 	struct msix_vector *mv;
2062 	uint64_t addr;
2063 	uint32_t data;
2064 	int error, i, j;
2065 
2066 	/*
2067 	 * Handle MSI first.  We try to find this IRQ among our list
2068 	 * of MSI IRQs.  If we find it, we request updated address and
2069 	 * data registers and apply the results.
2070 	 */
2071 	if (cfg->msi.msi_alloc > 0) {
2072 
2073 		/* If we don't have any active handlers, nothing to do. */
2074 		if (cfg->msi.msi_handlers == 0)
2075 			return (0);
2076 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
2077 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
2078 			    i + 1);
2079 			if (rle->start == irq) {
2080 				error = PCIB_MAP_MSI(device_get_parent(bus),
2081 				    dev, irq, &addr, &data);
2082 				if (error)
2083 					return (error);
2084 				pci_disable_msi(dev);
2085 				dinfo->cfg.msi.msi_addr = addr;
2086 				dinfo->cfg.msi.msi_data = data;
2087 				pci_enable_msi(dev, addr, data);
2088 				return (0);
2089 			}
2090 		}
2091 		return (ENOENT);
2092 	}
2093 
2094 	/*
2095 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
2096 	 * we request the updated mapping info.  If that works, we go
2097 	 * through all the slots that use this IRQ and update them.
2098 	 */
2099 	if (cfg->msix.msix_alloc > 0) {
2100 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
2101 			mv = &cfg->msix.msix_vectors[i];
2102 			if (mv->mv_irq == irq) {
2103 				error = PCIB_MAP_MSI(device_get_parent(bus),
2104 				    dev, irq, &addr, &data);
2105 				if (error)
2106 					return (error);
2107 				mv->mv_address = addr;
2108 				mv->mv_data = data;
2109 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
2110 					mte = &cfg->msix.msix_table[j];
2111 					if (mte->mte_vector != i + 1)
2112 						continue;
2113 					if (mte->mte_handlers == 0)
2114 						continue;
2115 					pci_mask_msix(dev, j);
2116 					pci_enable_msix(dev, j, addr, data);
2117 					pci_unmask_msix(dev, j);
2118 				}
2119 			}
2120 		}
2121 		return (ENOENT);
2122 	}
2123 
2124 	return (ENOENT);
2125 }
2126 
2127 /*
2128  * Returns true if the specified device is blacklisted because MSI
2129  * doesn't work.
2130  */
2131 int
2132 pci_msi_device_blacklisted(device_t dev)
2133 {
2134 
2135 	if (!pci_honor_msi_blacklist)
2136 		return (0);
2137 
2138 	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2139 }
2140 
2141 /*
2142  * Determine if MSI is blacklisted globally on this system.  Currently,
2143  * we just check for blacklisted chipsets as represented by the
2144  * host-PCI bridge at device 0:0:0.  In the future, it may become
2145  * necessary to check other system attributes, such as the kenv values
2146  * that give the motherboard manufacturer and model number.
2147  */
2148 static int
2149 pci_msi_blacklisted(void)
2150 {
2151 	device_t dev;
2152 
2153 	if (!pci_honor_msi_blacklist)
2154 		return (0);
2155 
2156 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2157 	if (!(pcie_chipset || pcix_chipset)) {
2158 		if (vm_guest != VM_GUEST_NO) {
2159 			/*
2160 			 * Whitelist older chipsets in virtual
2161 			 * machines known to support MSI.
2162 			 */
2163 			dev = pci_find_bsf(0, 0, 0);
2164 			if (dev != NULL)
2165 				return (!pci_has_quirk(pci_get_devid(dev),
2166 					PCI_QUIRK_ENABLE_MSI_VM));
2167 		}
2168 		return (1);
2169 	}
2170 
2171 	dev = pci_find_bsf(0, 0, 0);
2172 	if (dev != NULL)
2173 		return (pci_msi_device_blacklisted(dev));
2174 	return (0);
2175 }
2176 
2177 /*
2178  * Returns true if the specified device is blacklisted because MSI-X
2179  * doesn't work.  Note that this assumes that if MSI doesn't work,
2180  * MSI-X doesn't either.
2181  */
2182 int
2183 pci_msix_device_blacklisted(device_t dev)
2184 {
2185 
2186 	if (!pci_honor_msi_blacklist)
2187 		return (0);
2188 
2189 	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2190 		return (1);
2191 
2192 	return (pci_msi_device_blacklisted(dev));
2193 }
2194 
2195 /*
2196  * Determine if MSI-X is blacklisted globally on this system.  If MSI
2197  * is blacklisted, assume that MSI-X is as well.  Check for additional
2198  * chipsets where MSI works but MSI-X does not.
2199  */
2200 static int
2201 pci_msix_blacklisted(void)
2202 {
2203 	device_t dev;
2204 
2205 	if (!pci_honor_msi_blacklist)
2206 		return (0);
2207 
2208 	dev = pci_find_bsf(0, 0, 0);
2209 	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2210 	    PCI_QUIRK_DISABLE_MSIX))
2211 		return (1);
2212 
2213 	return (pci_msi_blacklisted());
2214 }
2215 
2216 /*
2217  * Attempt to allocate *count MSI messages.  The actual number allocated is
2218  * returned in *count.  After this function returns, each message will be
2219  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2220  */
2221 int
2222 pci_alloc_msi_method(device_t dev, device_t child, int *count)
2223 {
2224 	struct pci_devinfo *dinfo = device_get_ivars(child);
2225 	pcicfgregs *cfg = &dinfo->cfg;
2226 	struct resource_list_entry *rle;
2227 	int actual, error, i, irqs[32];
2228 	uint16_t ctrl;
2229 
2230 	/* Don't let count == 0 get us into trouble. */
2231 	if (*count == 0)
2232 		return (EINVAL);
2233 
2234 	/* If rid 0 is allocated, then fail. */
2235 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2236 	if (rle != NULL && rle->res != NULL)
2237 		return (ENXIO);
2238 
2239 	/* Already have allocated messages? */
2240 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2241 		return (ENXIO);
2242 
2243 	/* If MSI is blacklisted for this system, fail. */
2244 	if (pci_msi_blacklisted())
2245 		return (ENXIO);
2246 
2247 	/* MSI capability present? */
2248 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2249 		return (ENODEV);
2250 
2251 	if (bootverbose)
2252 		device_printf(child,
2253 		    "attempting to allocate %d MSI vectors (%d supported)\n",
2254 		    *count, cfg->msi.msi_msgnum);
2255 
2256 	/* Don't ask for more than the device supports. */
2257 	actual = min(*count, cfg->msi.msi_msgnum);
2258 
2259 	/* Don't ask for more than 32 messages. */
2260 	actual = min(actual, 32);
2261 
2262 	/* MSI requires power of 2 number of messages. */
2263 	if (!powerof2(actual))
2264 		return (EINVAL);
2265 
2266 	for (;;) {
2267 		/* Try to allocate N messages. */
2268 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2269 		    actual, irqs);
2270 		if (error == 0)
2271 			break;
2272 		if (actual == 1)
2273 			return (error);
2274 
2275 		/* Try N / 2. */
2276 		actual >>= 1;
2277 	}
2278 
2279 	/*
2280 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2281 	 * resources in the irqs[] array, so add new resources
2282 	 * starting at rid 1.
2283 	 */
2284 	for (i = 0; i < actual; i++)
2285 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2286 		    irqs[i], irqs[i], 1);
2287 
2288 	if (bootverbose) {
2289 		if (actual == 1)
2290 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2291 		else {
2292 			int run;
2293 
2294 			/*
2295 			 * Be fancy and try to print contiguous runs
2296 			 * of IRQ values as ranges.  'run' is true if
2297 			 * we are in a range.
2298 			 */
2299 			device_printf(child, "using IRQs %d", irqs[0]);
2300 			run = 0;
2301 			for (i = 1; i < actual; i++) {
2302 
2303 				/* Still in a run? */
2304 				if (irqs[i] == irqs[i - 1] + 1) {
2305 					run = 1;
2306 					continue;
2307 				}
2308 
2309 				/* Finish previous range. */
2310 				if (run) {
2311 					printf("-%d", irqs[i - 1]);
2312 					run = 0;
2313 				}
2314 
2315 				/* Start new range. */
2316 				printf(",%d", irqs[i]);
2317 			}
2318 
2319 			/* Unfinished range? */
2320 			if (run)
2321 				printf("-%d", irqs[actual - 1]);
2322 			printf(" for MSI\n");
2323 		}
2324 	}
2325 
2326 	/* Update control register with actual count. */
2327 	ctrl = cfg->msi.msi_ctrl;
2328 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2329 	ctrl |= (ffs(actual) - 1) << 4;
2330 	cfg->msi.msi_ctrl = ctrl;
2331 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2332 
2333 	/* Update counts of alloc'd messages. */
2334 	cfg->msi.msi_alloc = actual;
2335 	cfg->msi.msi_handlers = 0;
2336 	*count = actual;
2337 	return (0);
2338 }
2339 
2340 /* Release the MSI messages associated with this device. */
2341 int
2342 pci_release_msi_method(device_t dev, device_t child)
2343 {
2344 	struct pci_devinfo *dinfo = device_get_ivars(child);
2345 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2346 	struct resource_list_entry *rle;
2347 	int error, i, irqs[32];
2348 
2349 	/* Try MSI-X first. */
2350 	error = pci_release_msix(dev, child);
2351 	if (error != ENODEV)
2352 		return (error);
2353 
2354 	/* Do we have any messages to release? */
2355 	if (msi->msi_alloc == 0)
2356 		return (ENODEV);
2357 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2358 
2359 	/* Make sure none of the resources are allocated. */
2360 	if (msi->msi_handlers > 0)
2361 		return (EBUSY);
2362 	for (i = 0; i < msi->msi_alloc; i++) {
2363 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2364 		KASSERT(rle != NULL, ("missing MSI resource"));
2365 		if (rle->res != NULL)
2366 			return (EBUSY);
2367 		irqs[i] = rle->start;
2368 	}
2369 
2370 	/* Update control register with 0 count. */
2371 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2372 	    ("%s: MSI still enabled", __func__));
2373 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2374 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2375 	    msi->msi_ctrl, 2);
2376 
2377 	/* Release the messages. */
2378 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2379 	for (i = 0; i < msi->msi_alloc; i++)
2380 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2381 
2382 	/* Update alloc count. */
2383 	msi->msi_alloc = 0;
2384 	msi->msi_addr = 0;
2385 	msi->msi_data = 0;
2386 	return (0);
2387 }
2388 
2389 /*
2390  * Return the max supported MSI messages this device supports.
2391  * Basically, assuming the MD code can alloc messages, this function
2392  * should return the maximum value that pci_alloc_msi() can return.
2393  * Thus, it is subject to the tunables, etc.
2394  */
2395 int
2396 pci_msi_count_method(device_t dev, device_t child)
2397 {
2398 	struct pci_devinfo *dinfo = device_get_ivars(child);
2399 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2400 
2401 	if (pci_do_msi && msi->msi_location != 0)
2402 		return (msi->msi_msgnum);
2403 	return (0);
2404 }
2405 
2406 /* free pcicfgregs structure and all depending data structures */
2407 
2408 int
2409 pci_freecfg(struct pci_devinfo *dinfo)
2410 {
2411 	struct devlist *devlist_head;
2412 	struct pci_map *pm, *next;
2413 	int i;
2414 
2415 	devlist_head = &pci_devq;
2416 
2417 	if (dinfo->cfg.vpd.vpd_reg) {
2418 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2419 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2420 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2421 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2422 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2423 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2424 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2425 	}
2426 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2427 		free(pm, M_DEVBUF);
2428 	}
2429 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2430 	free(dinfo, M_DEVBUF);
2431 
2432 	/* increment the generation count */
2433 	pci_generation++;
2434 
2435 	/* we're losing one device */
2436 	pci_numdevs--;
2437 	return (0);
2438 }
2439 
2440 /*
2441  * PCI power manangement
2442  */
2443 int
2444 pci_set_powerstate_method(device_t dev, device_t child, int state)
2445 {
2446 	struct pci_devinfo *dinfo = device_get_ivars(child);
2447 	pcicfgregs *cfg = &dinfo->cfg;
2448 	uint16_t status;
2449 	int oldstate, highest, delay;
2450 
2451 	if (cfg->pp.pp_cap == 0)
2452 		return (EOPNOTSUPP);
2453 
2454 	/*
2455 	 * Optimize a no state change request away.  While it would be OK to
2456 	 * write to the hardware in theory, some devices have shown odd
2457 	 * behavior when going from D3 -> D3.
2458 	 */
2459 	oldstate = pci_get_powerstate(child);
2460 	if (oldstate == state)
2461 		return (0);
2462 
2463 	/*
2464 	 * The PCI power management specification states that after a state
2465 	 * transition between PCI power states, system software must
2466 	 * guarantee a minimal delay before the function accesses the device.
2467 	 * Compute the worst case delay that we need to guarantee before we
2468 	 * access the device.  Many devices will be responsive much more
2469 	 * quickly than this delay, but there are some that don't respond
2470 	 * instantly to state changes.  Transitions to/from D3 state require
2471 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2472 	 * is done below with DELAY rather than a sleeper function because
2473 	 * this function can be called from contexts where we cannot sleep.
2474 	 */
2475 	highest = (oldstate > state) ? oldstate : state;
2476 	if (highest == PCI_POWERSTATE_D3)
2477 	    delay = 10000;
2478 	else if (highest == PCI_POWERSTATE_D2)
2479 	    delay = 200;
2480 	else
2481 	    delay = 0;
2482 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2483 	    & ~PCIM_PSTAT_DMASK;
2484 	switch (state) {
2485 	case PCI_POWERSTATE_D0:
2486 		status |= PCIM_PSTAT_D0;
2487 		break;
2488 	case PCI_POWERSTATE_D1:
2489 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2490 			return (EOPNOTSUPP);
2491 		status |= PCIM_PSTAT_D1;
2492 		break;
2493 	case PCI_POWERSTATE_D2:
2494 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2495 			return (EOPNOTSUPP);
2496 		status |= PCIM_PSTAT_D2;
2497 		break;
2498 	case PCI_POWERSTATE_D3:
2499 		status |= PCIM_PSTAT_D3;
2500 		break;
2501 	default:
2502 		return (EINVAL);
2503 	}
2504 
2505 	if (bootverbose)
2506 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2507 		    state);
2508 
2509 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2510 	if (delay)
2511 		DELAY(delay);
2512 	return (0);
2513 }
2514 
2515 int
2516 pci_get_powerstate_method(device_t dev, device_t child)
2517 {
2518 	struct pci_devinfo *dinfo = device_get_ivars(child);
2519 	pcicfgregs *cfg = &dinfo->cfg;
2520 	uint16_t status;
2521 	int result;
2522 
2523 	if (cfg->pp.pp_cap != 0) {
2524 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2525 		switch (status & PCIM_PSTAT_DMASK) {
2526 		case PCIM_PSTAT_D0:
2527 			result = PCI_POWERSTATE_D0;
2528 			break;
2529 		case PCIM_PSTAT_D1:
2530 			result = PCI_POWERSTATE_D1;
2531 			break;
2532 		case PCIM_PSTAT_D2:
2533 			result = PCI_POWERSTATE_D2;
2534 			break;
2535 		case PCIM_PSTAT_D3:
2536 			result = PCI_POWERSTATE_D3;
2537 			break;
2538 		default:
2539 			result = PCI_POWERSTATE_UNKNOWN;
2540 			break;
2541 		}
2542 	} else {
2543 		/* No support, device is always at D0 */
2544 		result = PCI_POWERSTATE_D0;
2545 	}
2546 	return (result);
2547 }
2548 
2549 /*
2550  * Some convenience functions for PCI device drivers.
2551  */
2552 
2553 static __inline void
2554 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2555 {
2556 	uint16_t	command;
2557 
2558 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2559 	command |= bit;
2560 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2561 }
2562 
2563 static __inline void
2564 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2565 {
2566 	uint16_t	command;
2567 
2568 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2569 	command &= ~bit;
2570 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2571 }
2572 
2573 int
2574 pci_enable_busmaster_method(device_t dev, device_t child)
2575 {
2576 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2577 	return (0);
2578 }
2579 
2580 int
2581 pci_disable_busmaster_method(device_t dev, device_t child)
2582 {
2583 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2584 	return (0);
2585 }
2586 
2587 int
2588 pci_enable_io_method(device_t dev, device_t child, int space)
2589 {
2590 	uint16_t bit;
2591 
2592 	switch(space) {
2593 	case SYS_RES_IOPORT:
2594 		bit = PCIM_CMD_PORTEN;
2595 		break;
2596 	case SYS_RES_MEMORY:
2597 		bit = PCIM_CMD_MEMEN;
2598 		break;
2599 	default:
2600 		return (EINVAL);
2601 	}
2602 	pci_set_command_bit(dev, child, bit);
2603 	return (0);
2604 }
2605 
2606 int
2607 pci_disable_io_method(device_t dev, device_t child, int space)
2608 {
2609 	uint16_t bit;
2610 
2611 	switch(space) {
2612 	case SYS_RES_IOPORT:
2613 		bit = PCIM_CMD_PORTEN;
2614 		break;
2615 	case SYS_RES_MEMORY:
2616 		bit = PCIM_CMD_MEMEN;
2617 		break;
2618 	default:
2619 		return (EINVAL);
2620 	}
2621 	pci_clear_command_bit(dev, child, bit);
2622 	return (0);
2623 }
2624 
2625 /*
2626  * New style pci driver.  Parent device is either a pci-host-bridge or a
2627  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2628  */
2629 
2630 void
2631 pci_print_verbose(struct pci_devinfo *dinfo)
2632 {
2633 
2634 	if (bootverbose) {
2635 		pcicfgregs *cfg = &dinfo->cfg;
2636 
2637 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2638 		    cfg->vendor, cfg->device, cfg->revid);
2639 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2640 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2641 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2642 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2643 		    cfg->mfdev);
2644 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2645 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2646 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2647 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2648 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2649 		if (cfg->intpin > 0)
2650 			printf("\tintpin=%c, irq=%d\n",
2651 			    cfg->intpin +'a' -1, cfg->intline);
2652 		if (cfg->pp.pp_cap) {
2653 			uint16_t status;
2654 
2655 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2656 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2657 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2658 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2659 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2660 			    status & PCIM_PSTAT_DMASK);
2661 		}
2662 		if (cfg->msi.msi_location) {
2663 			int ctrl;
2664 
2665 			ctrl = cfg->msi.msi_ctrl;
2666 			printf("\tMSI supports %d message%s%s%s\n",
2667 			    cfg->msi.msi_msgnum,
2668 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2669 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2670 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2671 		}
2672 		if (cfg->msix.msix_location) {
2673 			printf("\tMSI-X supports %d message%s ",
2674 			    cfg->msix.msix_msgnum,
2675 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2676 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2677 				printf("in map 0x%x\n",
2678 				    cfg->msix.msix_table_bar);
2679 			else
2680 				printf("in maps 0x%x and 0x%x\n",
2681 				    cfg->msix.msix_table_bar,
2682 				    cfg->msix.msix_pba_bar);
2683 		}
2684 	}
2685 }
2686 
2687 static int
2688 pci_porten(device_t dev)
2689 {
2690 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2691 }
2692 
2693 static int
2694 pci_memen(device_t dev)
2695 {
2696 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2697 }
2698 
2699 void
2700 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp,
2701     int *bar64)
2702 {
2703 	struct pci_devinfo *dinfo;
2704 	pci_addr_t map, testval;
2705 	int ln2range;
2706 	uint16_t cmd;
2707 
2708 	/*
2709 	 * The device ROM BAR is special.  It is always a 32-bit
2710 	 * memory BAR.  Bit 0 is special and should not be set when
2711 	 * sizing the BAR.
2712 	 */
2713 	dinfo = device_get_ivars(dev);
2714 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2715 		map = pci_read_config(dev, reg, 4);
2716 		pci_write_config(dev, reg, 0xfffffffe, 4);
2717 		testval = pci_read_config(dev, reg, 4);
2718 		pci_write_config(dev, reg, map, 4);
2719 		*mapp = map;
2720 		*testvalp = testval;
2721 		if (bar64 != NULL)
2722 			*bar64 = 0;
2723 		return;
2724 	}
2725 
2726 	map = pci_read_config(dev, reg, 4);
2727 	ln2range = pci_maprange(map);
2728 	if (ln2range == 64)
2729 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2730 
2731 	/*
2732 	 * Disable decoding via the command register before
2733 	 * determining the BAR's length since we will be placing it in
2734 	 * a weird state.
2735 	 */
2736 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2737 	pci_write_config(dev, PCIR_COMMAND,
2738 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2739 
2740 	/*
2741 	 * Determine the BAR's length by writing all 1's.  The bottom
2742 	 * log_2(size) bits of the BAR will stick as 0 when we read
2743 	 * the value back.
2744 	 */
2745 	pci_write_config(dev, reg, 0xffffffff, 4);
2746 	testval = pci_read_config(dev, reg, 4);
2747 	if (ln2range == 64) {
2748 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2749 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2750 	}
2751 
2752 	/*
2753 	 * Restore the original value of the BAR.  We may have reprogrammed
2754 	 * the BAR of the low-level console device and when booting verbose,
2755 	 * we need the console device addressable.
2756 	 */
2757 	pci_write_config(dev, reg, map, 4);
2758 	if (ln2range == 64)
2759 		pci_write_config(dev, reg + 4, map >> 32, 4);
2760 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2761 
2762 	*mapp = map;
2763 	*testvalp = testval;
2764 	if (bar64 != NULL)
2765 		*bar64 = (ln2range == 64);
2766 }
2767 
2768 static void
2769 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2770 {
2771 	struct pci_devinfo *dinfo;
2772 	int ln2range;
2773 
2774 	/* The device ROM BAR is always a 32-bit memory BAR. */
2775 	dinfo = device_get_ivars(dev);
2776 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2777 		ln2range = 32;
2778 	else
2779 		ln2range = pci_maprange(pm->pm_value);
2780 	pci_write_config(dev, pm->pm_reg, base, 4);
2781 	if (ln2range == 64)
2782 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2783 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2784 	if (ln2range == 64)
2785 		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2786 		    pm->pm_reg + 4, 4) << 32;
2787 }
2788 
2789 struct pci_map *
2790 pci_find_bar(device_t dev, int reg)
2791 {
2792 	struct pci_devinfo *dinfo;
2793 	struct pci_map *pm;
2794 
2795 	dinfo = device_get_ivars(dev);
2796 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2797 		if (pm->pm_reg == reg)
2798 			return (pm);
2799 	}
2800 	return (NULL);
2801 }
2802 
2803 int
2804 pci_bar_enabled(device_t dev, struct pci_map *pm)
2805 {
2806 	struct pci_devinfo *dinfo;
2807 	uint16_t cmd;
2808 
2809 	dinfo = device_get_ivars(dev);
2810 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2811 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2812 		return (0);
2813 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2814 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2815 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2816 	else
2817 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2818 }
2819 
2820 struct pci_map *
2821 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2822 {
2823 	struct pci_devinfo *dinfo;
2824 	struct pci_map *pm, *prev;
2825 
2826 	dinfo = device_get_ivars(dev);
2827 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2828 	pm->pm_reg = reg;
2829 	pm->pm_value = value;
2830 	pm->pm_size = size;
2831 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2832 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2833 		    reg));
2834 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2835 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2836 			break;
2837 	}
2838 	if (prev != NULL)
2839 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2840 	else
2841 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2842 	return (pm);
2843 }
2844 
2845 static void
2846 pci_restore_bars(device_t dev)
2847 {
2848 	struct pci_devinfo *dinfo;
2849 	struct pci_map *pm;
2850 	int ln2range;
2851 
2852 	dinfo = device_get_ivars(dev);
2853 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2854 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2855 			ln2range = 32;
2856 		else
2857 			ln2range = pci_maprange(pm->pm_value);
2858 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2859 		if (ln2range == 64)
2860 			pci_write_config(dev, pm->pm_reg + 4,
2861 			    pm->pm_value >> 32, 4);
2862 	}
2863 }
2864 
2865 /*
2866  * Add a resource based on a pci map register. Return 1 if the map
2867  * register is a 32bit map register or 2 if it is a 64bit register.
2868  */
2869 static int
2870 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2871     int force, int prefetch)
2872 {
2873 	struct pci_map *pm;
2874 	pci_addr_t base, map, testval;
2875 	pci_addr_t start, end, count;
2876 	int barlen, basezero, flags, maprange, mapsize, type;
2877 	uint16_t cmd;
2878 	struct resource *res;
2879 
2880 	/*
2881 	 * The BAR may already exist if the device is a CardBus card
2882 	 * whose CIS is stored in this BAR.
2883 	 */
2884 	pm = pci_find_bar(dev, reg);
2885 	if (pm != NULL) {
2886 		maprange = pci_maprange(pm->pm_value);
2887 		barlen = maprange == 64 ? 2 : 1;
2888 		return (barlen);
2889 	}
2890 
2891 	pci_read_bar(dev, reg, &map, &testval, NULL);
2892 	if (PCI_BAR_MEM(map)) {
2893 		type = SYS_RES_MEMORY;
2894 		if (map & PCIM_BAR_MEM_PREFETCH)
2895 			prefetch = 1;
2896 	} else
2897 		type = SYS_RES_IOPORT;
2898 	mapsize = pci_mapsize(testval);
2899 	base = pci_mapbase(map);
2900 #ifdef __PCI_BAR_ZERO_VALID
2901 	basezero = 0;
2902 #else
2903 	basezero = base == 0;
2904 #endif
2905 	maprange = pci_maprange(map);
2906 	barlen = maprange == 64 ? 2 : 1;
2907 
2908 	/*
2909 	 * For I/O registers, if bottom bit is set, and the next bit up
2910 	 * isn't clear, we know we have a BAR that doesn't conform to the
2911 	 * spec, so ignore it.  Also, sanity check the size of the data
2912 	 * areas to the type of memory involved.  Memory must be at least
2913 	 * 16 bytes in size, while I/O ranges must be at least 4.
2914 	 */
2915 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2916 		return (barlen);
2917 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2918 	    (type == SYS_RES_IOPORT && mapsize < 2))
2919 		return (barlen);
2920 
2921 	/* Save a record of this BAR. */
2922 	pm = pci_add_bar(dev, reg, map, mapsize);
2923 	if (bootverbose) {
2924 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2925 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2926 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2927 			printf(", port disabled\n");
2928 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2929 			printf(", memory disabled\n");
2930 		else
2931 			printf(", enabled\n");
2932 	}
2933 
2934 	/*
2935 	 * If base is 0, then we have problems if this architecture does
2936 	 * not allow that.  It is best to ignore such entries for the
2937 	 * moment.  These will be allocated later if the driver specifically
2938 	 * requests them.  However, some removable busses look better when
2939 	 * all resources are allocated, so allow '0' to be overriden.
2940 	 *
2941 	 * Similarly treat maps whose values is the same as the test value
2942 	 * read back.  These maps have had all f's written to them by the
2943 	 * BIOS in an attempt to disable the resources.
2944 	 */
2945 	if (!force && (basezero || map == testval))
2946 		return (barlen);
2947 	if ((u_long)base != base) {
2948 		device_printf(bus,
2949 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2950 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2951 		    pci_get_function(dev), reg);
2952 		return (barlen);
2953 	}
2954 
2955 	/*
2956 	 * This code theoretically does the right thing, but has
2957 	 * undesirable side effects in some cases where peripherals
2958 	 * respond oddly to having these bits enabled.  Let the user
2959 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2960 	 * default).
2961 	 */
2962 	if (pci_enable_io_modes) {
2963 		/* Turn on resources that have been left off by a lazy BIOS */
2964 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2965 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2966 			cmd |= PCIM_CMD_PORTEN;
2967 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2968 		}
2969 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2970 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2971 			cmd |= PCIM_CMD_MEMEN;
2972 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2973 		}
2974 	} else {
2975 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2976 			return (barlen);
2977 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2978 			return (barlen);
2979 	}
2980 
2981 	count = (pci_addr_t)1 << mapsize;
2982 	flags = RF_ALIGNMENT_LOG2(mapsize);
2983 	if (prefetch)
2984 		flags |= RF_PREFETCHABLE;
2985 	if (basezero || base == pci_mapbase(testval) || pci_clear_bars) {
2986 		start = 0;	/* Let the parent decide. */
2987 		end = ~0ul;
2988 	} else {
2989 		start = base;
2990 		end = base + count - 1;
2991 	}
2992 	resource_list_add(rl, type, reg, start, end, count);
2993 
2994 	/*
2995 	 * Try to allocate the resource for this BAR from our parent
2996 	 * so that this resource range is already reserved.  The
2997 	 * driver for this device will later inherit this resource in
2998 	 * pci_alloc_resource().
2999 	 */
3000 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
3001 	    flags);
3002 	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0ul)) {
3003 		/*
3004 		 * If the allocation fails, try to allocate a resource for
3005 		 * this BAR using any available range.  The firmware felt
3006 		 * it was important enough to assign a resource, so don't
3007 		 * disable decoding if we can help it.
3008 		 */
3009 		resource_list_delete(rl, type, reg);
3010 		resource_list_add(rl, type, reg, 0, ~0ul, count);
3011 		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0ul,
3012 		    count, flags);
3013 	}
3014 	if (res == NULL) {
3015 		/*
3016 		 * If the allocation fails, delete the resource list entry
3017 		 * and disable decoding for this device.
3018 		 *
3019 		 * If the driver requests this resource in the future,
3020 		 * pci_reserve_map() will try to allocate a fresh
3021 		 * resource range.
3022 		 */
3023 		resource_list_delete(rl, type, reg);
3024 		pci_disable_io(dev, type);
3025 		if (bootverbose)
3026 			device_printf(bus,
3027 			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
3028 			    pci_get_domain(dev), pci_get_bus(dev),
3029 			    pci_get_slot(dev), pci_get_function(dev), reg);
3030 	} else {
3031 		start = rman_get_start(res);
3032 		pci_write_bar(dev, pm, start);
3033 	}
3034 	return (barlen);
3035 }
3036 
3037 /*
3038  * For ATA devices we need to decide early what addressing mode to use.
3039  * Legacy demands that the primary and secondary ATA ports sits on the
3040  * same addresses that old ISA hardware did. This dictates that we use
3041  * those addresses and ignore the BAR's if we cannot set PCI native
3042  * addressing mode.
3043  */
3044 static void
3045 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
3046     uint32_t prefetchmask)
3047 {
3048 	int rid, type, progif;
3049 #if 0
3050 	/* if this device supports PCI native addressing use it */
3051 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3052 	if ((progif & 0x8a) == 0x8a) {
3053 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
3054 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
3055 			printf("Trying ATA native PCI addressing mode\n");
3056 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
3057 		}
3058 	}
3059 #endif
3060 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3061 	type = SYS_RES_IOPORT;
3062 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
3063 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
3064 		    prefetchmask & (1 << 0));
3065 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
3066 		    prefetchmask & (1 << 1));
3067 	} else {
3068 		rid = PCIR_BAR(0);
3069 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
3070 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
3071 		    0x1f7, 8, 0);
3072 		rid = PCIR_BAR(1);
3073 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
3074 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
3075 		    0x3f6, 1, 0);
3076 	}
3077 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
3078 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
3079 		    prefetchmask & (1 << 2));
3080 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
3081 		    prefetchmask & (1 << 3));
3082 	} else {
3083 		rid = PCIR_BAR(2);
3084 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
3085 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
3086 		    0x177, 8, 0);
3087 		rid = PCIR_BAR(3);
3088 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
3089 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
3090 		    0x376, 1, 0);
3091 	}
3092 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
3093 	    prefetchmask & (1 << 4));
3094 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
3095 	    prefetchmask & (1 << 5));
3096 }
3097 
3098 static void
3099 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
3100 {
3101 	struct pci_devinfo *dinfo = device_get_ivars(dev);
3102 	pcicfgregs *cfg = &dinfo->cfg;
3103 	char tunable_name[64];
3104 	int irq;
3105 
3106 	/* Has to have an intpin to have an interrupt. */
3107 	if (cfg->intpin == 0)
3108 		return;
3109 
3110 	/* Let the user override the IRQ with a tunable. */
3111 	irq = PCI_INVALID_IRQ;
3112 	snprintf(tunable_name, sizeof(tunable_name),
3113 	    "hw.pci%d.%d.%d.INT%c.irq",
3114 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
3115 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
3116 		irq = PCI_INVALID_IRQ;
3117 
3118 	/*
3119 	 * If we didn't get an IRQ via the tunable, then we either use the
3120 	 * IRQ value in the intline register or we ask the bus to route an
3121 	 * interrupt for us.  If force_route is true, then we only use the
3122 	 * value in the intline register if the bus was unable to assign an
3123 	 * IRQ.
3124 	 */
3125 	if (!PCI_INTERRUPT_VALID(irq)) {
3126 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
3127 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
3128 		if (!PCI_INTERRUPT_VALID(irq))
3129 			irq = cfg->intline;
3130 	}
3131 
3132 	/* If after all that we don't have an IRQ, just bail. */
3133 	if (!PCI_INTERRUPT_VALID(irq))
3134 		return;
3135 
3136 	/* Update the config register if it changed. */
3137 	if (irq != cfg->intline) {
3138 		cfg->intline = irq;
3139 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
3140 	}
3141 
3142 	/* Add this IRQ as rid 0 interrupt resource. */
3143 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3144 }
3145 
3146 /* Perform early OHCI takeover from SMM. */
3147 static void
3148 ohci_early_takeover(device_t self)
3149 {
3150 	struct resource *res;
3151 	uint32_t ctl;
3152 	int rid;
3153 	int i;
3154 
3155 	rid = PCIR_BAR(0);
3156 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3157 	if (res == NULL)
3158 		return;
3159 
3160 	ctl = bus_read_4(res, OHCI_CONTROL);
3161 	if (ctl & OHCI_IR) {
3162 		if (bootverbose)
3163 			printf("ohci early: "
3164 			    "SMM active, request owner change\n");
3165 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3166 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3167 			DELAY(1000);
3168 			ctl = bus_read_4(res, OHCI_CONTROL);
3169 		}
3170 		if (ctl & OHCI_IR) {
3171 			if (bootverbose)
3172 				printf("ohci early: "
3173 				    "SMM does not respond, resetting\n");
3174 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3175 		}
3176 		/* Disable interrupts */
3177 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3178 	}
3179 
3180 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3181 }
3182 
3183 /* Perform early UHCI takeover from SMM. */
3184 static void
3185 uhci_early_takeover(device_t self)
3186 {
3187 	struct resource *res;
3188 	int rid;
3189 
3190 	/*
3191 	 * Set the PIRQD enable bit and switch off all the others. We don't
3192 	 * want legacy support to interfere with us XXX Does this also mean
3193 	 * that the BIOS won't touch the keyboard anymore if it is connected
3194 	 * to the ports of the root hub?
3195 	 */
3196 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3197 
3198 	/* Disable interrupts */
3199 	rid = PCI_UHCI_BASE_REG;
3200 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3201 	if (res != NULL) {
3202 		bus_write_2(res, UHCI_INTR, 0);
3203 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3204 	}
3205 }
3206 
3207 /* Perform early EHCI takeover from SMM. */
3208 static void
3209 ehci_early_takeover(device_t self)
3210 {
3211 	struct resource *res;
3212 	uint32_t cparams;
3213 	uint32_t eec;
3214 	uint8_t eecp;
3215 	uint8_t bios_sem;
3216 	uint8_t offs;
3217 	int rid;
3218 	int i;
3219 
3220 	rid = PCIR_BAR(0);
3221 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3222 	if (res == NULL)
3223 		return;
3224 
3225 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3226 
3227 	/* Synchronise with the BIOS if it owns the controller. */
3228 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3229 	    eecp = EHCI_EECP_NEXT(eec)) {
3230 		eec = pci_read_config(self, eecp, 4);
3231 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3232 			continue;
3233 		}
3234 		bios_sem = pci_read_config(self, eecp +
3235 		    EHCI_LEGSUP_BIOS_SEM, 1);
3236 		if (bios_sem == 0) {
3237 			continue;
3238 		}
3239 		if (bootverbose)
3240 			printf("ehci early: "
3241 			    "SMM active, request owner change\n");
3242 
3243 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3244 
3245 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3246 			DELAY(1000);
3247 			bios_sem = pci_read_config(self, eecp +
3248 			    EHCI_LEGSUP_BIOS_SEM, 1);
3249 		}
3250 
3251 		if (bios_sem != 0) {
3252 			if (bootverbose)
3253 				printf("ehci early: "
3254 				    "SMM does not respond\n");
3255 		}
3256 		/* Disable interrupts */
3257 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3258 		bus_write_4(res, offs + EHCI_USBINTR, 0);
3259 	}
3260 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3261 }
3262 
3263 /* Perform early XHCI takeover from SMM. */
3264 static void
3265 xhci_early_takeover(device_t self)
3266 {
3267 	struct resource *res;
3268 	uint32_t cparams;
3269 	uint32_t eec;
3270 	uint8_t eecp;
3271 	uint8_t bios_sem;
3272 	uint8_t offs;
3273 	int rid;
3274 	int i;
3275 
3276 	rid = PCIR_BAR(0);
3277 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3278 	if (res == NULL)
3279 		return;
3280 
3281 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3282 
3283 	eec = -1;
3284 
3285 	/* Synchronise with the BIOS if it owns the controller. */
3286 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3287 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3288 		eec = bus_read_4(res, eecp);
3289 
3290 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3291 			continue;
3292 
3293 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3294 		if (bios_sem == 0)
3295 			continue;
3296 
3297 		if (bootverbose)
3298 			printf("xhci early: "
3299 			    "SMM active, request owner change\n");
3300 
3301 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3302 
3303 		/* wait a maximum of 5 second */
3304 
3305 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3306 			DELAY(1000);
3307 			bios_sem = bus_read_1(res, eecp +
3308 			    XHCI_XECP_BIOS_SEM);
3309 		}
3310 
3311 		if (bios_sem != 0) {
3312 			if (bootverbose)
3313 				printf("xhci early: "
3314 				    "SMM does not respond\n");
3315 		}
3316 
3317 		/* Disable interrupts */
3318 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3319 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3320 		bus_read_4(res, offs + XHCI_USBSTS);
3321 	}
3322 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3323 }
3324 
3325 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3326 static void
3327 pci_reserve_secbus(device_t bus, device_t dev, pcicfgregs *cfg,
3328     struct resource_list *rl)
3329 {
3330 	struct resource *res;
3331 	char *cp;
3332 	u_long start, end, count;
3333 	int rid, sec_bus, sec_reg, sub_bus, sub_reg, sup_bus;
3334 
3335 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3336 	case PCIM_HDRTYPE_BRIDGE:
3337 		sec_reg = PCIR_SECBUS_1;
3338 		sub_reg = PCIR_SUBBUS_1;
3339 		break;
3340 	case PCIM_HDRTYPE_CARDBUS:
3341 		sec_reg = PCIR_SECBUS_2;
3342 		sub_reg = PCIR_SUBBUS_2;
3343 		break;
3344 	default:
3345 		return;
3346 	}
3347 
3348 	/*
3349 	 * If the existing bus range is valid, attempt to reserve it
3350 	 * from our parent.  If this fails for any reason, clear the
3351 	 * secbus and subbus registers.
3352 	 *
3353 	 * XXX: Should we reset sub_bus to sec_bus if it is < sec_bus?
3354 	 * This would at least preserve the existing sec_bus if it is
3355 	 * valid.
3356 	 */
3357 	sec_bus = PCI_READ_CONFIG(bus, dev, sec_reg, 1);
3358 	sub_bus = PCI_READ_CONFIG(bus, dev, sub_reg, 1);
3359 
3360 	/* Quirk handling. */
3361 	switch (pci_get_devid(dev)) {
3362 	case 0x12258086:		/* Intel 82454KX/GX (Orion) */
3363 		sup_bus = pci_read_config(dev, 0x41, 1);
3364 		if (sup_bus != 0xff) {
3365 			sec_bus = sup_bus + 1;
3366 			sub_bus = sup_bus + 1;
3367 			PCI_WRITE_CONFIG(bus, dev, sec_reg, sec_bus, 1);
3368 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3369 		}
3370 		break;
3371 
3372 	case 0x00dd10de:
3373 		/* Compaq R3000 BIOS sets wrong subordinate bus number. */
3374 		if ((cp = kern_getenv("smbios.planar.maker")) == NULL)
3375 			break;
3376 		if (strncmp(cp, "Compal", 6) != 0) {
3377 			freeenv(cp);
3378 			break;
3379 		}
3380 		freeenv(cp);
3381 		if ((cp = kern_getenv("smbios.planar.product")) == NULL)
3382 			break;
3383 		if (strncmp(cp, "08A0", 4) != 0) {
3384 			freeenv(cp);
3385 			break;
3386 		}
3387 		freeenv(cp);
3388 		if (sub_bus < 0xa) {
3389 			sub_bus = 0xa;
3390 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3391 		}
3392 		break;
3393 	}
3394 
3395 	if (bootverbose)
3396 		printf("\tsecbus=%d, subbus=%d\n", sec_bus, sub_bus);
3397 	if (sec_bus > 0 && sub_bus >= sec_bus) {
3398 		start = sec_bus;
3399 		end = sub_bus;
3400 		count = end - start + 1;
3401 
3402 		resource_list_add(rl, PCI_RES_BUS, 0, 0ul, ~0ul, count);
3403 
3404 		/*
3405 		 * If requested, clear secondary bus registers in
3406 		 * bridge devices to force a complete renumbering
3407 		 * rather than reserving the existing range.  However,
3408 		 * preserve the existing size.
3409 		 */
3410 		if (pci_clear_buses)
3411 			goto clear;
3412 
3413 		rid = 0;
3414 		res = resource_list_reserve(rl, bus, dev, PCI_RES_BUS, &rid,
3415 		    start, end, count, 0);
3416 		if (res != NULL)
3417 			return;
3418 
3419 		if (bootverbose)
3420 			device_printf(bus,
3421 			    "pci%d:%d:%d:%d secbus failed to allocate\n",
3422 			    pci_get_domain(dev), pci_get_bus(dev),
3423 			    pci_get_slot(dev), pci_get_function(dev));
3424 	}
3425 
3426 clear:
3427 	PCI_WRITE_CONFIG(bus, dev, sec_reg, 0, 1);
3428 	PCI_WRITE_CONFIG(bus, dev, sub_reg, 0, 1);
3429 }
3430 
3431 static struct resource *
3432 pci_alloc_secbus(device_t dev, device_t child, int *rid, u_long start,
3433     u_long end, u_long count, u_int flags)
3434 {
3435 	struct pci_devinfo *dinfo;
3436 	pcicfgregs *cfg;
3437 	struct resource_list *rl;
3438 	struct resource *res;
3439 	int sec_reg, sub_reg;
3440 
3441 	dinfo = device_get_ivars(child);
3442 	cfg = &dinfo->cfg;
3443 	rl = &dinfo->resources;
3444 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3445 	case PCIM_HDRTYPE_BRIDGE:
3446 		sec_reg = PCIR_SECBUS_1;
3447 		sub_reg = PCIR_SUBBUS_1;
3448 		break;
3449 	case PCIM_HDRTYPE_CARDBUS:
3450 		sec_reg = PCIR_SECBUS_2;
3451 		sub_reg = PCIR_SUBBUS_2;
3452 		break;
3453 	default:
3454 		return (NULL);
3455 	}
3456 
3457 	if (*rid != 0)
3458 		return (NULL);
3459 
3460 	if (resource_list_find(rl, PCI_RES_BUS, *rid) == NULL)
3461 		resource_list_add(rl, PCI_RES_BUS, *rid, start, end, count);
3462 	if (!resource_list_reserved(rl, PCI_RES_BUS, *rid)) {
3463 		res = resource_list_reserve(rl, dev, child, PCI_RES_BUS, rid,
3464 		    start, end, count, flags & ~RF_ACTIVE);
3465 		if (res == NULL) {
3466 			resource_list_delete(rl, PCI_RES_BUS, *rid);
3467 			device_printf(child, "allocating %lu bus%s failed\n",
3468 			    count, count == 1 ? "" : "es");
3469 			return (NULL);
3470 		}
3471 		if (bootverbose)
3472 			device_printf(child,
3473 			    "Lazy allocation of %lu bus%s at %lu\n", count,
3474 			    count == 1 ? "" : "es", rman_get_start(res));
3475 		PCI_WRITE_CONFIG(dev, child, sec_reg, rman_get_start(res), 1);
3476 		PCI_WRITE_CONFIG(dev, child, sub_reg, rman_get_end(res), 1);
3477 	}
3478 	return (resource_list_alloc(rl, dev, child, PCI_RES_BUS, rid, start,
3479 	    end, count, flags));
3480 }
3481 #endif
3482 
3483 void
3484 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3485 {
3486 	struct pci_devinfo *dinfo;
3487 	pcicfgregs *cfg;
3488 	struct resource_list *rl;
3489 	const struct pci_quirk *q;
3490 	uint32_t devid;
3491 	int i;
3492 
3493 	dinfo = device_get_ivars(dev);
3494 	cfg = &dinfo->cfg;
3495 	rl = &dinfo->resources;
3496 	devid = (cfg->device << 16) | cfg->vendor;
3497 
3498 	/* ATA devices needs special map treatment */
3499 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3500 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3501 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3502 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3503 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3504 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3505 	else
3506 		for (i = 0; i < cfg->nummaps;) {
3507 			/*
3508 			 * Skip quirked resources.
3509 			 */
3510 			for (q = &pci_quirks[0]; q->devid != 0; q++)
3511 				if (q->devid == devid &&
3512 				    q->type == PCI_QUIRK_UNMAP_REG &&
3513 				    q->arg1 == PCIR_BAR(i))
3514 					break;
3515 			if (q->devid != 0) {
3516 				i++;
3517 				continue;
3518 			}
3519 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3520 			    prefetchmask & (1 << i));
3521 		}
3522 
3523 	/*
3524 	 * Add additional, quirked resources.
3525 	 */
3526 	for (q = &pci_quirks[0]; q->devid != 0; q++)
3527 		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3528 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3529 
3530 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3531 #ifdef __PCI_REROUTE_INTERRUPT
3532 		/*
3533 		 * Try to re-route interrupts. Sometimes the BIOS or
3534 		 * firmware may leave bogus values in these registers.
3535 		 * If the re-route fails, then just stick with what we
3536 		 * have.
3537 		 */
3538 		pci_assign_interrupt(bus, dev, 1);
3539 #else
3540 		pci_assign_interrupt(bus, dev, 0);
3541 #endif
3542 	}
3543 
3544 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3545 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3546 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3547 			xhci_early_takeover(dev);
3548 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3549 			ehci_early_takeover(dev);
3550 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3551 			ohci_early_takeover(dev);
3552 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3553 			uhci_early_takeover(dev);
3554 	}
3555 
3556 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3557 	/*
3558 	 * Reserve resources for secondary bus ranges behind bridge
3559 	 * devices.
3560 	 */
3561 	pci_reserve_secbus(bus, dev, cfg, rl);
3562 #endif
3563 }
3564 
3565 static struct pci_devinfo *
3566 pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
3567     int slot, int func, size_t dinfo_size)
3568 {
3569 	struct pci_devinfo *dinfo;
3570 
3571 	dinfo = pci_read_device(pcib, domain, busno, slot, func, dinfo_size);
3572 	if (dinfo != NULL)
3573 		pci_add_child(dev, dinfo);
3574 
3575 	return (dinfo);
3576 }
3577 
3578 void
3579 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3580 {
3581 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3582 	device_t pcib = device_get_parent(dev);
3583 	struct pci_devinfo *dinfo;
3584 	int maxslots;
3585 	int s, f, pcifunchigh;
3586 	uint8_t hdrtype;
3587 	int first_func;
3588 
3589 	/*
3590 	 * Try to detect a device at slot 0, function 0.  If it exists, try to
3591 	 * enable ARI.  We must enable ARI before detecting the rest of the
3592 	 * functions on this bus as ARI changes the set of slots and functions
3593 	 * that are legal on this bus.
3594 	 */
3595 	dinfo = pci_identify_function(pcib, dev, domain, busno, 0, 0,
3596 	    dinfo_size);
3597 	if (dinfo != NULL && pci_enable_ari)
3598 		PCIB_TRY_ENABLE_ARI(pcib, dinfo->cfg.dev);
3599 
3600 	/*
3601 	 * Start looking for new devices on slot 0 at function 1 because we
3602 	 * just identified the device at slot 0, function 0.
3603 	 */
3604 	first_func = 1;
3605 
3606 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3607 	    ("dinfo_size too small"));
3608 	maxslots = PCIB_MAXSLOTS(pcib);
3609 	for (s = 0; s <= maxslots; s++, first_func = 0) {
3610 		pcifunchigh = 0;
3611 		f = 0;
3612 		DELAY(1);
3613 		hdrtype = REG(PCIR_HDRTYPE, 1);
3614 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3615 			continue;
3616 		if (hdrtype & PCIM_MFDEV)
3617 			pcifunchigh = PCIB_MAXFUNCS(pcib);
3618 		for (f = first_func; f <= pcifunchigh; f++)
3619 			pci_identify_function(pcib, dev, domain, busno, s, f,
3620 			    dinfo_size);
3621 	}
3622 #undef REG
3623 }
3624 
3625 #ifdef PCI_IOV
3626 device_t
3627 pci_add_iov_child(device_t bus, device_t pf, size_t size, uint16_t rid,
3628     uint16_t vid, uint16_t did)
3629 {
3630 	struct pci_devinfo *pf_dinfo, *vf_dinfo;
3631 	device_t pcib;
3632 	int busno, slot, func;
3633 
3634 	pf_dinfo = device_get_ivars(pf);
3635 
3636 	/*
3637 	 * Do a sanity check that we have been passed the correct size.  If this
3638 	 * test fails then likely the pci subclass hasn't implemented the
3639 	 * pci_create_iov_child method like it's supposed it.
3640 	 */
3641 	if (size != pf_dinfo->cfg.devinfo_size) {
3642 		device_printf(pf,
3643 		    "PCI subclass does not properly implement PCI_IOV\n");
3644 		return (NULL);
3645 	}
3646 
3647 	pcib = device_get_parent(bus);
3648 
3649 	PCIB_DECODE_RID(pcib, rid, &busno, &slot, &func);
3650 
3651 	vf_dinfo = pci_fill_devinfo(pcib, pci_get_domain(pcib), busno, slot, func,
3652 	    vid, did, size);
3653 
3654 	vf_dinfo->cfg.flags |= PCICFG_VF;
3655 	pci_add_child(bus, vf_dinfo);
3656 
3657 	return (vf_dinfo->cfg.dev);
3658 }
3659 
3660 device_t
3661 pci_create_iov_child_method(device_t bus, device_t pf, uint16_t rid,
3662     uint16_t vid, uint16_t did)
3663 {
3664 
3665 	return (pci_add_iov_child(bus, pf, sizeof(struct pci_devinfo), rid, vid,
3666 	    did));
3667 }
3668 #endif
3669 
3670 void
3671 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3672 {
3673 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3674 	device_set_ivars(dinfo->cfg.dev, dinfo);
3675 	resource_list_init(&dinfo->resources);
3676 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3677 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3678 	pci_print_verbose(dinfo);
3679 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3680 	pci_child_added(dinfo->cfg.dev);
3681 }
3682 
3683 void
3684 pci_child_added_method(device_t dev, device_t child)
3685 {
3686 
3687 }
3688 
3689 static int
3690 pci_probe(device_t dev)
3691 {
3692 
3693 	device_set_desc(dev, "PCI bus");
3694 
3695 	/* Allow other subclasses to override this driver. */
3696 	return (BUS_PROBE_GENERIC);
3697 }
3698 
3699 int
3700 pci_attach_common(device_t dev)
3701 {
3702 	struct pci_softc *sc;
3703 	int busno, domain;
3704 #ifdef PCI_DMA_BOUNDARY
3705 	int error, tag_valid;
3706 #endif
3707 #ifdef PCI_RES_BUS
3708 	int rid;
3709 #endif
3710 
3711 	sc = device_get_softc(dev);
3712 	domain = pcib_get_domain(dev);
3713 	busno = pcib_get_bus(dev);
3714 #ifdef PCI_RES_BUS
3715 	rid = 0;
3716 	sc->sc_bus = bus_alloc_resource(dev, PCI_RES_BUS, &rid, busno, busno,
3717 	    1, 0);
3718 	if (sc->sc_bus == NULL) {
3719 		device_printf(dev, "failed to allocate bus number\n");
3720 		return (ENXIO);
3721 	}
3722 #endif
3723 	if (bootverbose)
3724 		device_printf(dev, "domain=%d, physical bus=%d\n",
3725 		    domain, busno);
3726 #ifdef PCI_DMA_BOUNDARY
3727 	tag_valid = 0;
3728 	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
3729 	    devclass_find("pci")) {
3730 		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
3731 		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3732 		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
3733 		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
3734 		if (error)
3735 			device_printf(dev, "Failed to create DMA tag: %d\n",
3736 			    error);
3737 		else
3738 			tag_valid = 1;
3739 	}
3740 	if (!tag_valid)
3741 #endif
3742 		sc->sc_dma_tag = bus_get_dma_tag(dev);
3743 	return (0);
3744 }
3745 
3746 static int
3747 pci_attach(device_t dev)
3748 {
3749 	int busno, domain, error;
3750 
3751 	error = pci_attach_common(dev);
3752 	if (error)
3753 		return (error);
3754 
3755 	/*
3756 	 * Since there can be multiple independantly numbered PCI
3757 	 * busses on systems with multiple PCI domains, we can't use
3758 	 * the unit number to decide which bus we are probing. We ask
3759 	 * the parent pcib what our domain and bus numbers are.
3760 	 */
3761 	domain = pcib_get_domain(dev);
3762 	busno = pcib_get_bus(dev);
3763 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3764 	return (bus_generic_attach(dev));
3765 }
3766 
3767 #ifdef PCI_RES_BUS
3768 static int
3769 pci_detach(device_t dev)
3770 {
3771 	struct pci_softc *sc;
3772 	int error;
3773 
3774 	error = bus_generic_detach(dev);
3775 	if (error)
3776 		return (error);
3777 	sc = device_get_softc(dev);
3778 	return (bus_release_resource(dev, PCI_RES_BUS, 0, sc->sc_bus));
3779 }
3780 #endif
3781 
3782 static void
3783 pci_set_power_child(device_t dev, device_t child, int state)
3784 {
3785 	device_t pcib;
3786 	int dstate;
3787 
3788 	/*
3789 	 * Set the device to the given state.  If the firmware suggests
3790 	 * a different power state, use it instead.  If power management
3791 	 * is not present, the firmware is responsible for managing
3792 	 * device power.  Skip children who aren't attached since they
3793 	 * are handled separately.
3794 	 */
3795 	pcib = device_get_parent(dev);
3796 	dstate = state;
3797 	if (device_is_attached(child) &&
3798 	    PCIB_POWER_FOR_SLEEP(pcib, child, &dstate) == 0)
3799 		pci_set_powerstate(child, dstate);
3800 }
3801 
3802 int
3803 pci_suspend_child(device_t dev, device_t child)
3804 {
3805 	struct pci_devinfo *dinfo;
3806 	int error;
3807 
3808 	dinfo = device_get_ivars(child);
3809 
3810 	/*
3811 	 * Save the PCI configuration space for the child and set the
3812 	 * device in the appropriate power state for this sleep state.
3813 	 */
3814 	pci_cfg_save(child, dinfo, 0);
3815 
3816 	/* Suspend devices before potentially powering them down. */
3817 	error = bus_generic_suspend_child(dev, child);
3818 
3819 	if (error)
3820 		return (error);
3821 
3822 	if (pci_do_power_suspend)
3823 		pci_set_power_child(dev, child, PCI_POWERSTATE_D3);
3824 
3825 	return (0);
3826 }
3827 
3828 int
3829 pci_resume_child(device_t dev, device_t child)
3830 {
3831 	struct pci_devinfo *dinfo;
3832 
3833 	if (pci_do_power_resume)
3834 		pci_set_power_child(dev, child, PCI_POWERSTATE_D0);
3835 
3836 	dinfo = device_get_ivars(child);
3837 	pci_cfg_restore(child, dinfo);
3838 	if (!device_is_attached(child))
3839 		pci_cfg_save(child, dinfo, 1);
3840 
3841 	bus_generic_resume_child(dev, child);
3842 
3843 	return (0);
3844 }
3845 
3846 int
3847 pci_resume(device_t dev)
3848 {
3849 	device_t child, *devlist;
3850 	int error, i, numdevs;
3851 
3852 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3853 		return (error);
3854 
3855 	/*
3856 	 * Resume critical devices first, then everything else later.
3857 	 */
3858 	for (i = 0; i < numdevs; i++) {
3859 		child = devlist[i];
3860 		switch (pci_get_class(child)) {
3861 		case PCIC_DISPLAY:
3862 		case PCIC_MEMORY:
3863 		case PCIC_BRIDGE:
3864 		case PCIC_BASEPERIPH:
3865 			BUS_RESUME_CHILD(dev, child);
3866 			break;
3867 		}
3868 	}
3869 	for (i = 0; i < numdevs; i++) {
3870 		child = devlist[i];
3871 		switch (pci_get_class(child)) {
3872 		case PCIC_DISPLAY:
3873 		case PCIC_MEMORY:
3874 		case PCIC_BRIDGE:
3875 		case PCIC_BASEPERIPH:
3876 			break;
3877 		default:
3878 			BUS_RESUME_CHILD(dev, child);
3879 		}
3880 	}
3881 	free(devlist, M_TEMP);
3882 	return (0);
3883 }
3884 
3885 static void
3886 pci_load_vendor_data(void)
3887 {
3888 	caddr_t data;
3889 	void *ptr;
3890 	size_t sz;
3891 
3892 	data = preload_search_by_type("pci_vendor_data");
3893 	if (data != NULL) {
3894 		ptr = preload_fetch_addr(data);
3895 		sz = preload_fetch_size(data);
3896 		if (ptr != NULL && sz != 0) {
3897 			pci_vendordata = ptr;
3898 			pci_vendordata_size = sz;
3899 			/* terminate the database */
3900 			pci_vendordata[pci_vendordata_size] = '\n';
3901 		}
3902 	}
3903 }
3904 
3905 void
3906 pci_driver_added(device_t dev, driver_t *driver)
3907 {
3908 	int numdevs;
3909 	device_t *devlist;
3910 	device_t child;
3911 	struct pci_devinfo *dinfo;
3912 	int i;
3913 
3914 	if (bootverbose)
3915 		device_printf(dev, "driver added\n");
3916 	DEVICE_IDENTIFY(driver, dev);
3917 	if (device_get_children(dev, &devlist, &numdevs) != 0)
3918 		return;
3919 	for (i = 0; i < numdevs; i++) {
3920 		child = devlist[i];
3921 		if (device_get_state(child) != DS_NOTPRESENT)
3922 			continue;
3923 		dinfo = device_get_ivars(child);
3924 		pci_print_verbose(dinfo);
3925 		if (bootverbose)
3926 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3927 		pci_cfg_restore(child, dinfo);
3928 		if (device_probe_and_attach(child) != 0)
3929 			pci_child_detached(dev, child);
3930 	}
3931 	free(devlist, M_TEMP);
3932 }
3933 
3934 int
3935 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3936     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3937 {
3938 	struct pci_devinfo *dinfo;
3939 	struct msix_table_entry *mte;
3940 	struct msix_vector *mv;
3941 	uint64_t addr;
3942 	uint32_t data;
3943 	void *cookie;
3944 	int error, rid;
3945 
3946 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3947 	    arg, &cookie);
3948 	if (error)
3949 		return (error);
3950 
3951 	/* If this is not a direct child, just bail out. */
3952 	if (device_get_parent(child) != dev) {
3953 		*cookiep = cookie;
3954 		return(0);
3955 	}
3956 
3957 	rid = rman_get_rid(irq);
3958 	if (rid == 0) {
3959 		/* Make sure that INTx is enabled */
3960 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3961 	} else {
3962 		/*
3963 		 * Check to see if the interrupt is MSI or MSI-X.
3964 		 * Ask our parent to map the MSI and give
3965 		 * us the address and data register values.
3966 		 * If we fail for some reason, teardown the
3967 		 * interrupt handler.
3968 		 */
3969 		dinfo = device_get_ivars(child);
3970 		if (dinfo->cfg.msi.msi_alloc > 0) {
3971 			if (dinfo->cfg.msi.msi_addr == 0) {
3972 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3973 			    ("MSI has handlers, but vectors not mapped"));
3974 				error = PCIB_MAP_MSI(device_get_parent(dev),
3975 				    child, rman_get_start(irq), &addr, &data);
3976 				if (error)
3977 					goto bad;
3978 				dinfo->cfg.msi.msi_addr = addr;
3979 				dinfo->cfg.msi.msi_data = data;
3980 			}
3981 			if (dinfo->cfg.msi.msi_handlers == 0)
3982 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3983 				    dinfo->cfg.msi.msi_data);
3984 			dinfo->cfg.msi.msi_handlers++;
3985 		} else {
3986 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3987 			    ("No MSI or MSI-X interrupts allocated"));
3988 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3989 			    ("MSI-X index too high"));
3990 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3991 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3992 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3993 			KASSERT(mv->mv_irq == rman_get_start(irq),
3994 			    ("IRQ mismatch"));
3995 			if (mv->mv_address == 0) {
3996 				KASSERT(mte->mte_handlers == 0,
3997 		    ("MSI-X table entry has handlers, but vector not mapped"));
3998 				error = PCIB_MAP_MSI(device_get_parent(dev),
3999 				    child, rman_get_start(irq), &addr, &data);
4000 				if (error)
4001 					goto bad;
4002 				mv->mv_address = addr;
4003 				mv->mv_data = data;
4004 			}
4005 			if (mte->mte_handlers == 0) {
4006 				pci_enable_msix(child, rid - 1, mv->mv_address,
4007 				    mv->mv_data);
4008 				pci_unmask_msix(child, rid - 1);
4009 			}
4010 			mte->mte_handlers++;
4011 		}
4012 
4013 		/*
4014 		 * Make sure that INTx is disabled if we are using MSI/MSI-X,
4015 		 * unless the device is affected by PCI_QUIRK_MSI_INTX_BUG,
4016 		 * in which case we "enable" INTx so MSI/MSI-X actually works.
4017 		 */
4018 		if (!pci_has_quirk(pci_get_devid(child),
4019 		    PCI_QUIRK_MSI_INTX_BUG))
4020 			pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4021 		else
4022 			pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
4023 	bad:
4024 		if (error) {
4025 			(void)bus_generic_teardown_intr(dev, child, irq,
4026 			    cookie);
4027 			return (error);
4028 		}
4029 	}
4030 	*cookiep = cookie;
4031 	return (0);
4032 }
4033 
4034 int
4035 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
4036     void *cookie)
4037 {
4038 	struct msix_table_entry *mte;
4039 	struct resource_list_entry *rle;
4040 	struct pci_devinfo *dinfo;
4041 	int error, rid;
4042 
4043 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
4044 		return (EINVAL);
4045 
4046 	/* If this isn't a direct child, just bail out */
4047 	if (device_get_parent(child) != dev)
4048 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
4049 
4050 	rid = rman_get_rid(irq);
4051 	if (rid == 0) {
4052 		/* Mask INTx */
4053 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4054 	} else {
4055 		/*
4056 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
4057 		 * decrement the appropriate handlers count and mask the
4058 		 * MSI-X message, or disable MSI messages if the count
4059 		 * drops to 0.
4060 		 */
4061 		dinfo = device_get_ivars(child);
4062 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
4063 		if (rle->res != irq)
4064 			return (EINVAL);
4065 		if (dinfo->cfg.msi.msi_alloc > 0) {
4066 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
4067 			    ("MSI-X index too high"));
4068 			if (dinfo->cfg.msi.msi_handlers == 0)
4069 				return (EINVAL);
4070 			dinfo->cfg.msi.msi_handlers--;
4071 			if (dinfo->cfg.msi.msi_handlers == 0)
4072 				pci_disable_msi(child);
4073 		} else {
4074 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4075 			    ("No MSI or MSI-X interrupts allocated"));
4076 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4077 			    ("MSI-X index too high"));
4078 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4079 			if (mte->mte_handlers == 0)
4080 				return (EINVAL);
4081 			mte->mte_handlers--;
4082 			if (mte->mte_handlers == 0)
4083 				pci_mask_msix(child, rid - 1);
4084 		}
4085 	}
4086 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
4087 	if (rid > 0)
4088 		KASSERT(error == 0,
4089 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
4090 	return (error);
4091 }
4092 
4093 int
4094 pci_print_child(device_t dev, device_t child)
4095 {
4096 	struct pci_devinfo *dinfo;
4097 	struct resource_list *rl;
4098 	int retval = 0;
4099 
4100 	dinfo = device_get_ivars(child);
4101 	rl = &dinfo->resources;
4102 
4103 	retval += bus_print_child_header(dev, child);
4104 
4105 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
4106 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
4107 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
4108 	if (device_get_flags(dev))
4109 		retval += printf(" flags %#x", device_get_flags(dev));
4110 
4111 	retval += printf(" at device %d.%d", pci_get_slot(child),
4112 	    pci_get_function(child));
4113 
4114 	retval += bus_print_child_domain(dev, child);
4115 	retval += bus_print_child_footer(dev, child);
4116 
4117 	return (retval);
4118 }
4119 
4120 static const struct
4121 {
4122 	int		class;
4123 	int		subclass;
4124 	int		report; /* 0 = bootverbose, 1 = always */
4125 	const char	*desc;
4126 } pci_nomatch_tab[] = {
4127 	{PCIC_OLD,		-1,			1, "old"},
4128 	{PCIC_OLD,		PCIS_OLD_NONVGA,	1, "non-VGA display device"},
4129 	{PCIC_OLD,		PCIS_OLD_VGA,		1, "VGA-compatible display device"},
4130 	{PCIC_STORAGE,		-1,			1, "mass storage"},
4131 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	1, "SCSI"},
4132 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	1, "ATA"},
4133 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	1, "floppy disk"},
4134 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	1, "IPI"},
4135 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	1, "RAID"},
4136 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	1, "ATA (ADMA)"},
4137 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	1, "SATA"},
4138 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	1, "SAS"},
4139 	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	1, "NVM"},
4140 	{PCIC_NETWORK,		-1,			1, "network"},
4141 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	1, "ethernet"},
4142 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	1, "token ring"},
4143 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	1, "fddi"},
4144 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	1, "ATM"},
4145 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	1, "ISDN"},
4146 	{PCIC_DISPLAY,		-1,			1, "display"},
4147 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	1, "VGA"},
4148 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	1, "XGA"},
4149 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	1, "3D"},
4150 	{PCIC_MULTIMEDIA,	-1,			1, "multimedia"},
4151 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	1, "video"},
4152 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	1, "audio"},
4153 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	1, "telephony"},
4154 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	1, "HDA"},
4155 	{PCIC_MEMORY,		-1,			1, "memory"},
4156 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	1, "RAM"},
4157 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	1, "flash"},
4158 	{PCIC_BRIDGE,		-1,			1, "bridge"},
4159 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	1, "HOST-PCI"},
4160 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	1, "PCI-ISA"},
4161 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	1, "PCI-EISA"},
4162 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	1, "PCI-MCA"},
4163 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	1, "PCI-PCI"},
4164 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	1, "PCI-PCMCIA"},
4165 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	1, "PCI-NuBus"},
4166 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	1, "PCI-CardBus"},
4167 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	1, "PCI-RACEway"},
4168 	{PCIC_SIMPLECOMM,	-1,			1, "simple comms"},
4169 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	1, "UART"},	/* could detect 16550 */
4170 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	1, "parallel port"},
4171 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	1, "multiport serial"},
4172 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	1, "generic modem"},
4173 	{PCIC_BASEPERIPH,	-1,			0, "base peripheral"},
4174 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	1, "interrupt controller"},
4175 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	1, "DMA controller"},
4176 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	1, "timer"},
4177 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	1, "realtime clock"},
4178 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	1, "PCI hot-plug controller"},
4179 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	1, "SD host controller"},
4180 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_IOMMU,	1, "IOMMU"},
4181 	{PCIC_INPUTDEV,		-1,			1, "input device"},
4182 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	1, "keyboard"},
4183 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,1, "digitizer"},
4184 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	1, "mouse"},
4185 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	1, "scanner"},
4186 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	1, "gameport"},
4187 	{PCIC_DOCKING,		-1,			1, "docking station"},
4188 	{PCIC_PROCESSOR,	-1,			1, "processor"},
4189 	{PCIC_SERIALBUS,	-1,			1, "serial bus"},
4190 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	1, "FireWire"},
4191 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	1, "AccessBus"},
4192 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	1, "SSA"},
4193 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	1, "USB"},
4194 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	1, "Fibre Channel"},
4195 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	0, "SMBus"},
4196 	{PCIC_WIRELESS,		-1,			1, "wireless controller"},
4197 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	1, "iRDA"},
4198 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	1, "IR"},
4199 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	1, "RF"},
4200 	{PCIC_INTELLIIO,	-1,			1, "intelligent I/O controller"},
4201 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	1, "I2O"},
4202 	{PCIC_SATCOM,		-1,			1, "satellite communication"},
4203 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		1, "sat TV"},
4204 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	1, "sat audio"},
4205 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	1, "sat voice"},
4206 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	1, "sat data"},
4207 	{PCIC_CRYPTO,		-1,			1, "encrypt/decrypt"},
4208 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	1, "network/computer crypto"},
4209 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	1, "entertainment crypto"},
4210 	{PCIC_DASP,		-1,			0, "dasp"},
4211 	{PCIC_DASP,		PCIS_DASP_DPIO,		1, "DPIO module"},
4212 	{0, 0, 0,		NULL}
4213 };
4214 
4215 void
4216 pci_probe_nomatch(device_t dev, device_t child)
4217 {
4218 	int i, report;
4219 	const char *cp, *scp;
4220 	char *device;
4221 
4222 	/*
4223 	 * Look for a listing for this device in a loaded device database.
4224 	 */
4225 	report = 1;
4226 	if ((device = pci_describe_device(child)) != NULL) {
4227 		device_printf(dev, "<%s>", device);
4228 		free(device, M_DEVBUF);
4229 	} else {
4230 		/*
4231 		 * Scan the class/subclass descriptions for a general
4232 		 * description.
4233 		 */
4234 		cp = "unknown";
4235 		scp = NULL;
4236 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
4237 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
4238 				if (pci_nomatch_tab[i].subclass == -1) {
4239 					cp = pci_nomatch_tab[i].desc;
4240 					report = pci_nomatch_tab[i].report;
4241 				} else if (pci_nomatch_tab[i].subclass ==
4242 				    pci_get_subclass(child)) {
4243 					scp = pci_nomatch_tab[i].desc;
4244 					report = pci_nomatch_tab[i].report;
4245 				}
4246 			}
4247 		}
4248 		if (report || bootverbose) {
4249 			device_printf(dev, "<%s%s%s>",
4250 			    cp ? cp : "",
4251 			    ((cp != NULL) && (scp != NULL)) ? ", " : "",
4252 			    scp ? scp : "");
4253 		}
4254 	}
4255 	if (report || bootverbose) {
4256 		printf(" at device %d.%d (no driver attached)\n",
4257 		    pci_get_slot(child), pci_get_function(child));
4258 	}
4259 	pci_cfg_save(child, device_get_ivars(child), 1);
4260 }
4261 
4262 void
4263 pci_child_detached(device_t dev, device_t child)
4264 {
4265 	struct pci_devinfo *dinfo;
4266 	struct resource_list *rl;
4267 
4268 	dinfo = device_get_ivars(child);
4269 	rl = &dinfo->resources;
4270 
4271 	/*
4272 	 * Have to deallocate IRQs before releasing any MSI messages and
4273 	 * have to release MSI messages before deallocating any memory
4274 	 * BARs.
4275 	 */
4276 	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
4277 		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
4278 	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
4279 		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
4280 		(void)pci_release_msi(child);
4281 	}
4282 	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
4283 		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
4284 	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
4285 		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
4286 #ifdef PCI_RES_BUS
4287 	if (resource_list_release_active(rl, dev, child, PCI_RES_BUS) != 0)
4288 		pci_printf(&dinfo->cfg, "Device leaked PCI bus numbers\n");
4289 #endif
4290 
4291 	pci_cfg_save(child, dinfo, 1);
4292 }
4293 
4294 /*
4295  * Parse the PCI device database, if loaded, and return a pointer to a
4296  * description of the device.
4297  *
4298  * The database is flat text formatted as follows:
4299  *
4300  * Any line not in a valid format is ignored.
4301  * Lines are terminated with newline '\n' characters.
4302  *
4303  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
4304  * the vendor name.
4305  *
4306  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
4307  * - devices cannot be listed without a corresponding VENDOR line.
4308  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
4309  * another TAB, then the device name.
4310  */
4311 
4312 /*
4313  * Assuming (ptr) points to the beginning of a line in the database,
4314  * return the vendor or device and description of the next entry.
4315  * The value of (vendor) or (device) inappropriate for the entry type
4316  * is set to -1.  Returns nonzero at the end of the database.
4317  *
4318  * Note that this is slightly unrobust in the face of corrupt data;
4319  * we attempt to safeguard against this by spamming the end of the
4320  * database with a newline when we initialise.
4321  */
4322 static int
4323 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
4324 {
4325 	char	*cp = *ptr;
4326 	int	left;
4327 
4328 	*device = -1;
4329 	*vendor = -1;
4330 	**desc = '\0';
4331 	for (;;) {
4332 		left = pci_vendordata_size - (cp - pci_vendordata);
4333 		if (left <= 0) {
4334 			*ptr = cp;
4335 			return(1);
4336 		}
4337 
4338 		/* vendor entry? */
4339 		if (*cp != '\t' &&
4340 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
4341 			break;
4342 		/* device entry? */
4343 		if (*cp == '\t' &&
4344 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
4345 			break;
4346 
4347 		/* skip to next line */
4348 		while (*cp != '\n' && left > 0) {
4349 			cp++;
4350 			left--;
4351 		}
4352 		if (*cp == '\n') {
4353 			cp++;
4354 			left--;
4355 		}
4356 	}
4357 	/* skip to next line */
4358 	while (*cp != '\n' && left > 0) {
4359 		cp++;
4360 		left--;
4361 	}
4362 	if (*cp == '\n' && left > 0)
4363 		cp++;
4364 	*ptr = cp;
4365 	return(0);
4366 }
4367 
4368 static char *
4369 pci_describe_device(device_t dev)
4370 {
4371 	int	vendor, device;
4372 	char	*desc, *vp, *dp, *line;
4373 
4374 	desc = vp = dp = NULL;
4375 
4376 	/*
4377 	 * If we have no vendor data, we can't do anything.
4378 	 */
4379 	if (pci_vendordata == NULL)
4380 		goto out;
4381 
4382 	/*
4383 	 * Scan the vendor data looking for this device
4384 	 */
4385 	line = pci_vendordata;
4386 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4387 		goto out;
4388 	for (;;) {
4389 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
4390 			goto out;
4391 		if (vendor == pci_get_vendor(dev))
4392 			break;
4393 	}
4394 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4395 		goto out;
4396 	for (;;) {
4397 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
4398 			*dp = 0;
4399 			break;
4400 		}
4401 		if (vendor != -1) {
4402 			*dp = 0;
4403 			break;
4404 		}
4405 		if (device == pci_get_device(dev))
4406 			break;
4407 	}
4408 	if (dp[0] == '\0')
4409 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
4410 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
4411 	    NULL)
4412 		sprintf(desc, "%s, %s", vp, dp);
4413 out:
4414 	if (vp != NULL)
4415 		free(vp, M_DEVBUF);
4416 	if (dp != NULL)
4417 		free(dp, M_DEVBUF);
4418 	return(desc);
4419 }
4420 
4421 int
4422 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
4423 {
4424 	struct pci_devinfo *dinfo;
4425 	pcicfgregs *cfg;
4426 
4427 	dinfo = device_get_ivars(child);
4428 	cfg = &dinfo->cfg;
4429 
4430 	switch (which) {
4431 	case PCI_IVAR_ETHADDR:
4432 		/*
4433 		 * The generic accessor doesn't deal with failure, so
4434 		 * we set the return value, then return an error.
4435 		 */
4436 		*((uint8_t **) result) = NULL;
4437 		return (EINVAL);
4438 	case PCI_IVAR_SUBVENDOR:
4439 		*result = cfg->subvendor;
4440 		break;
4441 	case PCI_IVAR_SUBDEVICE:
4442 		*result = cfg->subdevice;
4443 		break;
4444 	case PCI_IVAR_VENDOR:
4445 		*result = cfg->vendor;
4446 		break;
4447 	case PCI_IVAR_DEVICE:
4448 		*result = cfg->device;
4449 		break;
4450 	case PCI_IVAR_DEVID:
4451 		*result = (cfg->device << 16) | cfg->vendor;
4452 		break;
4453 	case PCI_IVAR_CLASS:
4454 		*result = cfg->baseclass;
4455 		break;
4456 	case PCI_IVAR_SUBCLASS:
4457 		*result = cfg->subclass;
4458 		break;
4459 	case PCI_IVAR_PROGIF:
4460 		*result = cfg->progif;
4461 		break;
4462 	case PCI_IVAR_REVID:
4463 		*result = cfg->revid;
4464 		break;
4465 	case PCI_IVAR_INTPIN:
4466 		*result = cfg->intpin;
4467 		break;
4468 	case PCI_IVAR_IRQ:
4469 		*result = cfg->intline;
4470 		break;
4471 	case PCI_IVAR_DOMAIN:
4472 		*result = cfg->domain;
4473 		break;
4474 	case PCI_IVAR_BUS:
4475 		*result = cfg->bus;
4476 		break;
4477 	case PCI_IVAR_SLOT:
4478 		*result = cfg->slot;
4479 		break;
4480 	case PCI_IVAR_FUNCTION:
4481 		*result = cfg->func;
4482 		break;
4483 	case PCI_IVAR_CMDREG:
4484 		*result = cfg->cmdreg;
4485 		break;
4486 	case PCI_IVAR_CACHELNSZ:
4487 		*result = cfg->cachelnsz;
4488 		break;
4489 	case PCI_IVAR_MINGNT:
4490 		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4491 			*result = -1;
4492 			return (EINVAL);
4493 		}
4494 		*result = cfg->mingnt;
4495 		break;
4496 	case PCI_IVAR_MAXLAT:
4497 		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4498 			*result = -1;
4499 			return (EINVAL);
4500 		}
4501 		*result = cfg->maxlat;
4502 		break;
4503 	case PCI_IVAR_LATTIMER:
4504 		*result = cfg->lattimer;
4505 		break;
4506 	default:
4507 		return (ENOENT);
4508 	}
4509 	return (0);
4510 }
4511 
4512 int
4513 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4514 {
4515 	struct pci_devinfo *dinfo;
4516 
4517 	dinfo = device_get_ivars(child);
4518 
4519 	switch (which) {
4520 	case PCI_IVAR_INTPIN:
4521 		dinfo->cfg.intpin = value;
4522 		return (0);
4523 	case PCI_IVAR_ETHADDR:
4524 	case PCI_IVAR_SUBVENDOR:
4525 	case PCI_IVAR_SUBDEVICE:
4526 	case PCI_IVAR_VENDOR:
4527 	case PCI_IVAR_DEVICE:
4528 	case PCI_IVAR_DEVID:
4529 	case PCI_IVAR_CLASS:
4530 	case PCI_IVAR_SUBCLASS:
4531 	case PCI_IVAR_PROGIF:
4532 	case PCI_IVAR_REVID:
4533 	case PCI_IVAR_IRQ:
4534 	case PCI_IVAR_DOMAIN:
4535 	case PCI_IVAR_BUS:
4536 	case PCI_IVAR_SLOT:
4537 	case PCI_IVAR_FUNCTION:
4538 		return (EINVAL);	/* disallow for now */
4539 
4540 	default:
4541 		return (ENOENT);
4542 	}
4543 }
4544 
4545 #include "opt_ddb.h"
4546 #ifdef DDB
4547 #include <ddb/ddb.h>
4548 #include <sys/cons.h>
4549 
4550 /*
4551  * List resources based on pci map registers, used for within ddb
4552  */
4553 
4554 DB_SHOW_COMMAND(pciregs, db_pci_dump)
4555 {
4556 	struct pci_devinfo *dinfo;
4557 	struct devlist *devlist_head;
4558 	struct pci_conf *p;
4559 	const char *name;
4560 	int i, error, none_count;
4561 
4562 	none_count = 0;
4563 	/* get the head of the device queue */
4564 	devlist_head = &pci_devq;
4565 
4566 	/*
4567 	 * Go through the list of devices and print out devices
4568 	 */
4569 	for (error = 0, i = 0,
4570 	     dinfo = STAILQ_FIRST(devlist_head);
4571 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4572 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4573 
4574 		/* Populate pd_name and pd_unit */
4575 		name = NULL;
4576 		if (dinfo->cfg.dev)
4577 			name = device_get_name(dinfo->cfg.dev);
4578 
4579 		p = &dinfo->conf;
4580 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4581 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4582 			(name && *name) ? name : "none",
4583 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4584 			none_count++,
4585 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4586 			p->pc_sel.pc_func, (p->pc_class << 16) |
4587 			(p->pc_subclass << 8) | p->pc_progif,
4588 			(p->pc_subdevice << 16) | p->pc_subvendor,
4589 			(p->pc_device << 16) | p->pc_vendor,
4590 			p->pc_revid, p->pc_hdr);
4591 	}
4592 }
4593 #endif /* DDB */
4594 
4595 static struct resource *
4596 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4597     u_long start, u_long end, u_long count, u_int num, u_int flags)
4598 {
4599 	struct pci_devinfo *dinfo = device_get_ivars(child);
4600 	struct resource_list *rl = &dinfo->resources;
4601 	struct resource *res;
4602 	struct pci_map *pm;
4603 	pci_addr_t map, testval;
4604 	int mapsize;
4605 
4606 	res = NULL;
4607 	pm = pci_find_bar(child, *rid);
4608 	if (pm != NULL) {
4609 		/* This is a BAR that we failed to allocate earlier. */
4610 		mapsize = pm->pm_size;
4611 		map = pm->pm_value;
4612 	} else {
4613 		/*
4614 		 * Weed out the bogons, and figure out how large the
4615 		 * BAR/map is.  BARs that read back 0 here are bogus
4616 		 * and unimplemented.  Note: atapci in legacy mode are
4617 		 * special and handled elsewhere in the code.  If you
4618 		 * have a atapci device in legacy mode and it fails
4619 		 * here, that other code is broken.
4620 		 */
4621 		pci_read_bar(child, *rid, &map, &testval, NULL);
4622 
4623 		/*
4624 		 * Determine the size of the BAR and ignore BARs with a size
4625 		 * of 0.  Device ROM BARs use a different mask value.
4626 		 */
4627 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
4628 			mapsize = pci_romsize(testval);
4629 		else
4630 			mapsize = pci_mapsize(testval);
4631 		if (mapsize == 0)
4632 			goto out;
4633 		pm = pci_add_bar(child, *rid, map, mapsize);
4634 	}
4635 
4636 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
4637 		if (type != SYS_RES_MEMORY) {
4638 			if (bootverbose)
4639 				device_printf(dev,
4640 				    "child %s requested type %d for rid %#x,"
4641 				    " but the BAR says it is an memio\n",
4642 				    device_get_nameunit(child), type, *rid);
4643 			goto out;
4644 		}
4645 	} else {
4646 		if (type != SYS_RES_IOPORT) {
4647 			if (bootverbose)
4648 				device_printf(dev,
4649 				    "child %s requested type %d for rid %#x,"
4650 				    " but the BAR says it is an ioport\n",
4651 				    device_get_nameunit(child), type, *rid);
4652 			goto out;
4653 		}
4654 	}
4655 
4656 	/*
4657 	 * For real BARs, we need to override the size that
4658 	 * the driver requests, because that's what the BAR
4659 	 * actually uses and we would otherwise have a
4660 	 * situation where we might allocate the excess to
4661 	 * another driver, which won't work.
4662 	 */
4663 	count = ((pci_addr_t)1 << mapsize) * num;
4664 	if (RF_ALIGNMENT(flags) < mapsize)
4665 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
4666 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
4667 		flags |= RF_PREFETCHABLE;
4668 
4669 	/*
4670 	 * Allocate enough resource, and then write back the
4671 	 * appropriate BAR for that resource.
4672 	 */
4673 	resource_list_add(rl, type, *rid, start, end, count);
4674 	res = resource_list_reserve(rl, dev, child, type, rid, start, end,
4675 	    count, flags & ~RF_ACTIVE);
4676 	if (res == NULL) {
4677 		resource_list_delete(rl, type, *rid);
4678 		device_printf(child,
4679 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4680 		    count, *rid, type, start, end);
4681 		goto out;
4682 	}
4683 	if (bootverbose)
4684 		device_printf(child,
4685 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4686 		    count, *rid, type, rman_get_start(res));
4687 	map = rman_get_start(res);
4688 	pci_write_bar(child, pm, map);
4689 out:
4690 	return (res);
4691 }
4692 
4693 struct resource *
4694 pci_alloc_multi_resource(device_t dev, device_t child, int type, int *rid,
4695     u_long start, u_long end, u_long count, u_long num, u_int flags)
4696 {
4697 	struct pci_devinfo *dinfo;
4698 	struct resource_list *rl;
4699 	struct resource_list_entry *rle;
4700 	struct resource *res;
4701 	pcicfgregs *cfg;
4702 
4703 	/*
4704 	 * Perform lazy resource allocation
4705 	 */
4706 	dinfo = device_get_ivars(child);
4707 	rl = &dinfo->resources;
4708 	cfg = &dinfo->cfg;
4709 	switch (type) {
4710 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
4711 	case PCI_RES_BUS:
4712 		return (pci_alloc_secbus(dev, child, rid, start, end, count,
4713 		    flags));
4714 #endif
4715 	case SYS_RES_IRQ:
4716 		/*
4717 		 * Can't alloc legacy interrupt once MSI messages have
4718 		 * been allocated.
4719 		 */
4720 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4721 		    cfg->msix.msix_alloc > 0))
4722 			return (NULL);
4723 
4724 		/*
4725 		 * If the child device doesn't have an interrupt
4726 		 * routed and is deserving of an interrupt, try to
4727 		 * assign it one.
4728 		 */
4729 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4730 		    (cfg->intpin != 0))
4731 			pci_assign_interrupt(dev, child, 0);
4732 		break;
4733 	case SYS_RES_IOPORT:
4734 	case SYS_RES_MEMORY:
4735 #ifdef NEW_PCIB
4736 		/*
4737 		 * PCI-PCI bridge I/O window resources are not BARs.
4738 		 * For those allocations just pass the request up the
4739 		 * tree.
4740 		 */
4741 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4742 			switch (*rid) {
4743 			case PCIR_IOBASEL_1:
4744 			case PCIR_MEMBASE_1:
4745 			case PCIR_PMBASEL_1:
4746 				/*
4747 				 * XXX: Should we bother creating a resource
4748 				 * list entry?
4749 				 */
4750 				return (bus_generic_alloc_resource(dev, child,
4751 				    type, rid, start, end, count, flags));
4752 			}
4753 		}
4754 #endif
4755 		/* Reserve resources for this BAR if needed. */
4756 		rle = resource_list_find(rl, type, *rid);
4757 		if (rle == NULL) {
4758 			res = pci_reserve_map(dev, child, type, rid, start, end,
4759 			    count, num, flags);
4760 			if (res == NULL)
4761 				return (NULL);
4762 		}
4763 	}
4764 	return (resource_list_alloc(rl, dev, child, type, rid,
4765 	    start, end, count, flags));
4766 }
4767 
4768 struct resource *
4769 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4770     u_long start, u_long end, u_long count, u_int flags)
4771 {
4772 #ifdef PCI_IOV
4773 	struct pci_devinfo *dinfo;
4774 #endif
4775 
4776 	if (device_get_parent(child) != dev)
4777 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4778 		    type, rid, start, end, count, flags));
4779 
4780 #ifdef PCI_IOV
4781 	dinfo = device_get_ivars(child);
4782 	if (dinfo->cfg.flags & PCICFG_VF) {
4783 		switch (type) {
4784 		/* VFs can't have I/O BARs. */
4785 		case SYS_RES_IOPORT:
4786 			return (NULL);
4787 		case SYS_RES_MEMORY:
4788 			return (pci_vf_alloc_mem_resource(dev, child, rid,
4789 			    start, end, count, flags));
4790 		}
4791 
4792 		/* Fall through for other types of resource allocations. */
4793 	}
4794 #endif
4795 
4796 	return (pci_alloc_multi_resource(dev, child, type, rid, start, end,
4797 	    count, 1, flags));
4798 }
4799 
4800 int
4801 pci_release_resource(device_t dev, device_t child, int type, int rid,
4802     struct resource *r)
4803 {
4804 	struct pci_devinfo *dinfo;
4805 	struct resource_list *rl;
4806 	pcicfgregs *cfg;
4807 
4808 	if (device_get_parent(child) != dev)
4809 		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
4810 		    type, rid, r));
4811 
4812 	dinfo = device_get_ivars(child);
4813 	cfg = &dinfo->cfg;
4814 
4815 #ifdef PCI_IOV
4816 	if (dinfo->cfg.flags & PCICFG_VF) {
4817 		switch (type) {
4818 		/* VFs can't have I/O BARs. */
4819 		case SYS_RES_IOPORT:
4820 			return (EDOOFUS);
4821 		case SYS_RES_MEMORY:
4822 			return (pci_vf_release_mem_resource(dev, child, rid,
4823 			    r));
4824 		}
4825 
4826 		/* Fall through for other types of resource allocations. */
4827 	}
4828 #endif
4829 
4830 #ifdef NEW_PCIB
4831 	/*
4832 	 * PCI-PCI bridge I/O window resources are not BARs.  For
4833 	 * those allocations just pass the request up the tree.
4834 	 */
4835 	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
4836 	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
4837 		switch (rid) {
4838 		case PCIR_IOBASEL_1:
4839 		case PCIR_MEMBASE_1:
4840 		case PCIR_PMBASEL_1:
4841 			return (bus_generic_release_resource(dev, child, type,
4842 			    rid, r));
4843 		}
4844 	}
4845 #endif
4846 
4847 	rl = &dinfo->resources;
4848 	return (resource_list_release(rl, dev, child, type, rid, r));
4849 }
4850 
4851 int
4852 pci_activate_resource(device_t dev, device_t child, int type, int rid,
4853     struct resource *r)
4854 {
4855 	struct pci_devinfo *dinfo;
4856 	int error;
4857 
4858 	error = bus_generic_activate_resource(dev, child, type, rid, r);
4859 	if (error)
4860 		return (error);
4861 
4862 	/* Enable decoding in the command register when activating BARs. */
4863 	if (device_get_parent(child) == dev) {
4864 		/* Device ROMs need their decoding explicitly enabled. */
4865 		dinfo = device_get_ivars(child);
4866 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4867 			pci_write_bar(child, pci_find_bar(child, rid),
4868 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4869 		switch (type) {
4870 		case SYS_RES_IOPORT:
4871 		case SYS_RES_MEMORY:
4872 			error = PCI_ENABLE_IO(dev, child, type);
4873 			break;
4874 		}
4875 	}
4876 	return (error);
4877 }
4878 
4879 int
4880 pci_deactivate_resource(device_t dev, device_t child, int type,
4881     int rid, struct resource *r)
4882 {
4883 	struct pci_devinfo *dinfo;
4884 	int error;
4885 
4886 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4887 	if (error)
4888 		return (error);
4889 
4890 	/* Disable decoding for device ROMs. */
4891 	if (device_get_parent(child) == dev) {
4892 		dinfo = device_get_ivars(child);
4893 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4894 			pci_write_bar(child, pci_find_bar(child, rid),
4895 			    rman_get_start(r));
4896 	}
4897 	return (0);
4898 }
4899 
4900 void
4901 pci_delete_child(device_t dev, device_t child)
4902 {
4903 	struct resource_list_entry *rle;
4904 	struct resource_list *rl;
4905 	struct pci_devinfo *dinfo;
4906 
4907 	dinfo = device_get_ivars(child);
4908 	rl = &dinfo->resources;
4909 
4910 	if (device_is_attached(child))
4911 		device_detach(child);
4912 
4913 	/* Turn off access to resources we're about to free */
4914 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4915 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4916 
4917 	/* Free all allocated resources */
4918 	STAILQ_FOREACH(rle, rl, link) {
4919 		if (rle->res) {
4920 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4921 			    resource_list_busy(rl, rle->type, rle->rid)) {
4922 				pci_printf(&dinfo->cfg,
4923 				    "Resource still owned, oops. "
4924 				    "(type=%d, rid=%d, addr=%lx)\n",
4925 				    rle->type, rle->rid,
4926 				    rman_get_start(rle->res));
4927 				bus_release_resource(child, rle->type, rle->rid,
4928 				    rle->res);
4929 			}
4930 			resource_list_unreserve(rl, dev, child, rle->type,
4931 			    rle->rid);
4932 		}
4933 	}
4934 	resource_list_free(rl);
4935 
4936 	device_delete_child(dev, child);
4937 	pci_freecfg(dinfo);
4938 }
4939 
4940 void
4941 pci_delete_resource(device_t dev, device_t child, int type, int rid)
4942 {
4943 	struct pci_devinfo *dinfo;
4944 	struct resource_list *rl;
4945 	struct resource_list_entry *rle;
4946 
4947 	if (device_get_parent(child) != dev)
4948 		return;
4949 
4950 	dinfo = device_get_ivars(child);
4951 	rl = &dinfo->resources;
4952 	rle = resource_list_find(rl, type, rid);
4953 	if (rle == NULL)
4954 		return;
4955 
4956 	if (rle->res) {
4957 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4958 		    resource_list_busy(rl, type, rid)) {
4959 			device_printf(dev, "delete_resource: "
4960 			    "Resource still owned by child, oops. "
4961 			    "(type=%d, rid=%d, addr=%lx)\n",
4962 			    type, rid, rman_get_start(rle->res));
4963 			return;
4964 		}
4965 		resource_list_unreserve(rl, dev, child, type, rid);
4966 	}
4967 	resource_list_delete(rl, type, rid);
4968 }
4969 
4970 struct resource_list *
4971 pci_get_resource_list (device_t dev, device_t child)
4972 {
4973 	struct pci_devinfo *dinfo = device_get_ivars(child);
4974 
4975 	return (&dinfo->resources);
4976 }
4977 
4978 bus_dma_tag_t
4979 pci_get_dma_tag(device_t bus, device_t dev)
4980 {
4981 	struct pci_softc *sc = device_get_softc(bus);
4982 
4983 	return (sc->sc_dma_tag);
4984 }
4985 
4986 uint32_t
4987 pci_read_config_method(device_t dev, device_t child, int reg, int width)
4988 {
4989 	struct pci_devinfo *dinfo = device_get_ivars(child);
4990 	pcicfgregs *cfg = &dinfo->cfg;
4991 
4992 #ifdef PCI_IOV
4993 	/*
4994 	 * SR-IOV VFs don't implement the VID or DID registers, so we have to
4995 	 * emulate them here.
4996 	 */
4997 	if (cfg->flags & PCICFG_VF) {
4998 		if (reg == PCIR_VENDOR) {
4999 			switch (width) {
5000 			case 4:
5001 				return (cfg->device << 16 | cfg->vendor);
5002 			case 2:
5003 				return (cfg->vendor);
5004 			case 1:
5005 				return (cfg->vendor & 0xff);
5006 			default:
5007 				return (0xffffffff);
5008 			}
5009 		} else if (reg == PCIR_DEVICE) {
5010 			switch (width) {
5011 			/* Note that an unaligned 4-byte read is an error. */
5012 			case 2:
5013 				return (cfg->device);
5014 			case 1:
5015 				return (cfg->device & 0xff);
5016 			default:
5017 				return (0xffffffff);
5018 			}
5019 		}
5020 	}
5021 #endif
5022 
5023 	return (PCIB_READ_CONFIG(device_get_parent(dev),
5024 	    cfg->bus, cfg->slot, cfg->func, reg, width));
5025 }
5026 
5027 void
5028 pci_write_config_method(device_t dev, device_t child, int reg,
5029     uint32_t val, int width)
5030 {
5031 	struct pci_devinfo *dinfo = device_get_ivars(child);
5032 	pcicfgregs *cfg = &dinfo->cfg;
5033 
5034 	PCIB_WRITE_CONFIG(device_get_parent(dev),
5035 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
5036 }
5037 
5038 int
5039 pci_child_location_str_method(device_t dev, device_t child, char *buf,
5040     size_t buflen)
5041 {
5042 
5043 	snprintf(buf, buflen, "pci%d:%d:%d:%d", pci_get_domain(child),
5044 	    pci_get_bus(child), pci_get_slot(child), pci_get_function(child));
5045 	return (0);
5046 }
5047 
5048 int
5049 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
5050     size_t buflen)
5051 {
5052 	struct pci_devinfo *dinfo;
5053 	pcicfgregs *cfg;
5054 
5055 	dinfo = device_get_ivars(child);
5056 	cfg = &dinfo->cfg;
5057 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
5058 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
5059 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
5060 	    cfg->progif);
5061 	return (0);
5062 }
5063 
5064 int
5065 pci_assign_interrupt_method(device_t dev, device_t child)
5066 {
5067 	struct pci_devinfo *dinfo = device_get_ivars(child);
5068 	pcicfgregs *cfg = &dinfo->cfg;
5069 
5070 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
5071 	    cfg->intpin));
5072 }
5073 
5074 static void
5075 pci_lookup(void *arg, const char *name, device_t *dev)
5076 {
5077 	long val;
5078 	char *end;
5079 	int domain, bus, slot, func;
5080 
5081 	if (*dev != NULL)
5082 		return;
5083 
5084 	/*
5085 	 * Accept pciconf-style selectors of either pciD:B:S:F or
5086 	 * pciB:S:F.  In the latter case, the domain is assumed to
5087 	 * be zero.
5088 	 */
5089 	if (strncmp(name, "pci", 3) != 0)
5090 		return;
5091 	val = strtol(name + 3, &end, 10);
5092 	if (val < 0 || val > INT_MAX || *end != ':')
5093 		return;
5094 	domain = val;
5095 	val = strtol(end + 1, &end, 10);
5096 	if (val < 0 || val > INT_MAX || *end != ':')
5097 		return;
5098 	bus = val;
5099 	val = strtol(end + 1, &end, 10);
5100 	if (val < 0 || val > INT_MAX)
5101 		return;
5102 	slot = val;
5103 	if (*end == ':') {
5104 		val = strtol(end + 1, &end, 10);
5105 		if (val < 0 || val > INT_MAX || *end != '\0')
5106 			return;
5107 		func = val;
5108 	} else if (*end == '\0') {
5109 		func = slot;
5110 		slot = bus;
5111 		bus = domain;
5112 		domain = 0;
5113 	} else
5114 		return;
5115 
5116 	if (domain > PCI_DOMAINMAX || bus > PCI_BUSMAX || slot > PCI_SLOTMAX ||
5117 	    func > PCIE_ARI_FUNCMAX || (slot != 0 && func > PCI_FUNCMAX))
5118 		return;
5119 
5120 	*dev = pci_find_dbsf(domain, bus, slot, func);
5121 }
5122 
5123 static int
5124 pci_modevent(module_t mod, int what, void *arg)
5125 {
5126 	static struct cdev *pci_cdev;
5127 	static eventhandler_tag tag;
5128 
5129 	switch (what) {
5130 	case MOD_LOAD:
5131 		STAILQ_INIT(&pci_devq);
5132 		pci_generation = 0;
5133 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
5134 		    "pci");
5135 		pci_load_vendor_data();
5136 		tag = EVENTHANDLER_REGISTER(dev_lookup, pci_lookup, NULL,
5137 		    1000);
5138 		break;
5139 
5140 	case MOD_UNLOAD:
5141 		if (tag != NULL)
5142 			EVENTHANDLER_DEREGISTER(dev_lookup, tag);
5143 		destroy_dev(pci_cdev);
5144 		break;
5145 	}
5146 
5147 	return (0);
5148 }
5149 
5150 static void
5151 pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
5152 {
5153 #define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
5154 	struct pcicfg_pcie *cfg;
5155 	int version, pos;
5156 
5157 	cfg = &dinfo->cfg.pcie;
5158 	pos = cfg->pcie_location;
5159 
5160 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5161 
5162 	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
5163 
5164 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5165 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5166 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5167 		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
5168 
5169 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5170 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5171 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5172 		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
5173 
5174 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5175 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5176 		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
5177 
5178 	if (version > 1) {
5179 		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
5180 		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
5181 		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
5182 	}
5183 #undef WREG
5184 }
5185 
5186 static void
5187 pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
5188 {
5189 	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
5190 	    dinfo->cfg.pcix.pcix_command,  2);
5191 }
5192 
5193 void
5194 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
5195 {
5196 
5197 	/*
5198 	 * Restore the device to full power mode.  We must do this
5199 	 * before we restore the registers because moving from D3 to
5200 	 * D0 will cause the chip's BARs and some other registers to
5201 	 * be reset to some unknown power on reset values.  Cut down
5202 	 * the noise on boot by doing nothing if we are already in
5203 	 * state D0.
5204 	 */
5205 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
5206 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5207 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
5208 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
5209 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
5210 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
5211 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
5212 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
5213 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
5214 	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5215 	case PCIM_HDRTYPE_NORMAL:
5216 		pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
5217 		pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
5218 		break;
5219 	case PCIM_HDRTYPE_BRIDGE:
5220 		pci_write_config(dev, PCIR_SECLAT_1,
5221 		    dinfo->cfg.bridge.br_seclat, 1);
5222 		pci_write_config(dev, PCIR_SUBBUS_1,
5223 		    dinfo->cfg.bridge.br_subbus, 1);
5224 		pci_write_config(dev, PCIR_SECBUS_1,
5225 		    dinfo->cfg.bridge.br_secbus, 1);
5226 		pci_write_config(dev, PCIR_PRIBUS_1,
5227 		    dinfo->cfg.bridge.br_pribus, 1);
5228 		pci_write_config(dev, PCIR_BRIDGECTL_1,
5229 		    dinfo->cfg.bridge.br_control, 2);
5230 		break;
5231 	case PCIM_HDRTYPE_CARDBUS:
5232 		pci_write_config(dev, PCIR_SECLAT_2,
5233 		    dinfo->cfg.bridge.br_seclat, 1);
5234 		pci_write_config(dev, PCIR_SUBBUS_2,
5235 		    dinfo->cfg.bridge.br_subbus, 1);
5236 		pci_write_config(dev, PCIR_SECBUS_2,
5237 		    dinfo->cfg.bridge.br_secbus, 1);
5238 		pci_write_config(dev, PCIR_PRIBUS_2,
5239 		    dinfo->cfg.bridge.br_pribus, 1);
5240 		pci_write_config(dev, PCIR_BRIDGECTL_2,
5241 		    dinfo->cfg.bridge.br_control, 2);
5242 		break;
5243 	}
5244 	pci_restore_bars(dev);
5245 
5246 	/*
5247 	 * Restore extended capabilities for PCI-Express and PCI-X
5248 	 */
5249 	if (dinfo->cfg.pcie.pcie_location != 0)
5250 		pci_cfg_restore_pcie(dev, dinfo);
5251 	if (dinfo->cfg.pcix.pcix_location != 0)
5252 		pci_cfg_restore_pcix(dev, dinfo);
5253 
5254 	/* Restore MSI and MSI-X configurations if they are present. */
5255 	if (dinfo->cfg.msi.msi_location != 0)
5256 		pci_resume_msi(dev);
5257 	if (dinfo->cfg.msix.msix_location != 0)
5258 		pci_resume_msix(dev);
5259 }
5260 
5261 static void
5262 pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
5263 {
5264 #define	RREG(n)	pci_read_config(dev, pos + (n), 2)
5265 	struct pcicfg_pcie *cfg;
5266 	int version, pos;
5267 
5268 	cfg = &dinfo->cfg.pcie;
5269 	pos = cfg->pcie_location;
5270 
5271 	cfg->pcie_flags = RREG(PCIER_FLAGS);
5272 
5273 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5274 
5275 	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
5276 
5277 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5278 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5279 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5280 		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
5281 
5282 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5283 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5284 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5285 		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
5286 
5287 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5288 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5289 		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
5290 
5291 	if (version > 1) {
5292 		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
5293 		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
5294 		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
5295 	}
5296 #undef RREG
5297 }
5298 
5299 static void
5300 pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
5301 {
5302 	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
5303 	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
5304 }
5305 
5306 void
5307 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
5308 {
5309 	uint32_t cls;
5310 	int ps;
5311 
5312 	/*
5313 	 * Some drivers apparently write to these registers w/o updating our
5314 	 * cached copy.  No harm happens if we update the copy, so do so here
5315 	 * so we can restore them.  The COMMAND register is modified by the
5316 	 * bus w/o updating the cache.  This should represent the normally
5317 	 * writable portion of the 'defined' part of type 0/1/2 headers.
5318 	 */
5319 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
5320 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
5321 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
5322 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
5323 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
5324 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
5325 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
5326 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
5327 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
5328 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
5329 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
5330 	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5331 	case PCIM_HDRTYPE_NORMAL:
5332 		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
5333 		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
5334 		dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
5335 		dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
5336 		break;
5337 	case PCIM_HDRTYPE_BRIDGE:
5338 		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
5339 		    PCIR_SECLAT_1, 1);
5340 		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
5341 		    PCIR_SUBBUS_1, 1);
5342 		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
5343 		    PCIR_SECBUS_1, 1);
5344 		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
5345 		    PCIR_PRIBUS_1, 1);
5346 		dinfo->cfg.bridge.br_control = pci_read_config(dev,
5347 		    PCIR_BRIDGECTL_1, 2);
5348 		break;
5349 	case PCIM_HDRTYPE_CARDBUS:
5350 		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
5351 		    PCIR_SECLAT_2, 1);
5352 		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
5353 		    PCIR_SUBBUS_2, 1);
5354 		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
5355 		    PCIR_SECBUS_2, 1);
5356 		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
5357 		    PCIR_PRIBUS_2, 1);
5358 		dinfo->cfg.bridge.br_control = pci_read_config(dev,
5359 		    PCIR_BRIDGECTL_2, 2);
5360 		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_2, 2);
5361 		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_2, 2);
5362 		break;
5363 	}
5364 
5365 	if (dinfo->cfg.pcie.pcie_location != 0)
5366 		pci_cfg_save_pcie(dev, dinfo);
5367 
5368 	if (dinfo->cfg.pcix.pcix_location != 0)
5369 		pci_cfg_save_pcix(dev, dinfo);
5370 
5371 	/*
5372 	 * don't set the state for display devices, base peripherals and
5373 	 * memory devices since bad things happen when they are powered down.
5374 	 * We should (a) have drivers that can easily detach and (b) use
5375 	 * generic drivers for these devices so that some device actually
5376 	 * attaches.  We need to make sure that when we implement (a) we don't
5377 	 * power the device down on a reattach.
5378 	 */
5379 	cls = pci_get_class(dev);
5380 	if (!setstate)
5381 		return;
5382 	switch (pci_do_power_nodriver)
5383 	{
5384 		case 0:		/* NO powerdown at all */
5385 			return;
5386 		case 1:		/* Conservative about what to power down */
5387 			if (cls == PCIC_STORAGE)
5388 				return;
5389 			/*FALLTHROUGH*/
5390 		case 2:		/* Agressive about what to power down */
5391 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
5392 			    cls == PCIC_BASEPERIPH)
5393 				return;
5394 			/*FALLTHROUGH*/
5395 		case 3:		/* Power down everything */
5396 			break;
5397 	}
5398 	/*
5399 	 * PCI spec says we can only go into D3 state from D0 state.
5400 	 * Transition from D[12] into D0 before going to D3 state.
5401 	 */
5402 	ps = pci_get_powerstate(dev);
5403 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
5404 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5405 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
5406 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
5407 }
5408 
5409 /* Wrapper APIs suitable for device driver use. */
5410 void
5411 pci_save_state(device_t dev)
5412 {
5413 	struct pci_devinfo *dinfo;
5414 
5415 	dinfo = device_get_ivars(dev);
5416 	pci_cfg_save(dev, dinfo, 0);
5417 }
5418 
5419 void
5420 pci_restore_state(device_t dev)
5421 {
5422 	struct pci_devinfo *dinfo;
5423 
5424 	dinfo = device_get_ivars(dev);
5425 	pci_cfg_restore(dev, dinfo);
5426 }
5427 
5428 static uint16_t
5429 pci_get_rid_method(device_t dev, device_t child)
5430 {
5431 
5432 	return (PCIB_GET_RID(device_get_parent(dev), child));
5433 }
5434 
5435 /* Find the upstream port of a given PCI device in a root complex. */
5436 device_t
5437 pci_find_pcie_root_port(device_t dev)
5438 {
5439 	struct pci_devinfo *dinfo;
5440 	devclass_t pci_class;
5441 	device_t pcib, bus;
5442 
5443 	pci_class = devclass_find("pci");
5444 	KASSERT(device_get_devclass(device_get_parent(dev)) == pci_class,
5445 	    ("%s: non-pci device %s", __func__, device_get_nameunit(dev)));
5446 
5447 	/*
5448 	 * Walk the bridge hierarchy until we find a PCI-e root
5449 	 * port or a non-PCI device.
5450 	 */
5451 	for (;;) {
5452 		bus = device_get_parent(dev);
5453 		KASSERT(bus != NULL, ("%s: null parent of %s", __func__,
5454 		    device_get_nameunit(dev)));
5455 
5456 		pcib = device_get_parent(bus);
5457 		KASSERT(pcib != NULL, ("%s: null bridge of %s", __func__,
5458 		    device_get_nameunit(bus)));
5459 
5460 		/*
5461 		 * pcib's parent must be a PCI bus for this to be a
5462 		 * PCI-PCI bridge.
5463 		 */
5464 		if (device_get_devclass(device_get_parent(pcib)) != pci_class)
5465 			return (NULL);
5466 
5467 		dinfo = device_get_ivars(pcib);
5468 		if (dinfo->cfg.pcie.pcie_location != 0 &&
5469 		    dinfo->cfg.pcie.pcie_type == PCIEM_TYPE_ROOT_PORT)
5470 			return (pcib);
5471 
5472 		dev = pcib;
5473 	}
5474 }
5475