xref: /freebsd/sys/dev/pci/pci.c (revision fafb1ee7bdc5d8a7d07cd03b2fb0bbb76f7a9d7c)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/limits.h>
39 #include <sys/linker.h>
40 #include <sys/fcntl.h>
41 #include <sys/conf.h>
42 #include <sys/kernel.h>
43 #include <sys/queue.h>
44 #include <sys/sysctl.h>
45 #include <sys/endian.h>
46 
47 #include <vm/vm.h>
48 #include <vm/pmap.h>
49 #include <vm/vm_extern.h>
50 
51 #include <sys/bus.h>
52 #include <machine/bus.h>
53 #include <sys/rman.h>
54 #include <machine/resource.h>
55 #include <machine/stdarg.h>
56 
57 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
58 #include <machine/intr_machdep.h>
59 #endif
60 
61 #include <sys/pciio.h>
62 #include <dev/pci/pcireg.h>
63 #include <dev/pci/pcivar.h>
64 #include <dev/pci/pci_private.h>
65 
66 #include <dev/usb/controller/xhcireg.h>
67 #include <dev/usb/controller/ehcireg.h>
68 #include <dev/usb/controller/ohcireg.h>
69 #include <dev/usb/controller/uhcireg.h>
70 
71 #include "pcib_if.h"
72 #include "pci_if.h"
73 
74 #define	PCIR_IS_BIOS(cfg, reg)						\
75 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
76 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
77 
78 static int		pci_has_quirk(uint32_t devid, int quirk);
79 static pci_addr_t	pci_mapbase(uint64_t mapreg);
80 static const char	*pci_maptype(uint64_t mapreg);
81 static int		pci_maprange(uint64_t mapreg);
82 static pci_addr_t	pci_rombase(uint64_t mapreg);
83 static int		pci_romsize(uint64_t testval);
84 static void		pci_fixancient(pcicfgregs *cfg);
85 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
86 
87 static int		pci_porten(device_t dev);
88 static int		pci_memen(device_t dev);
89 static void		pci_assign_interrupt(device_t bus, device_t dev,
90 			    int force_route);
91 static int		pci_add_map(device_t bus, device_t dev, int reg,
92 			    struct resource_list *rl, int force, int prefetch);
93 static int		pci_probe(device_t dev);
94 static int		pci_attach(device_t dev);
95 #ifdef PCI_RES_BUS
96 static int		pci_detach(device_t dev);
97 #endif
98 static void		pci_load_vendor_data(void);
99 static int		pci_describe_parse_line(char **ptr, int *vendor,
100 			    int *device, char **desc);
101 static char		*pci_describe_device(device_t dev);
102 static int		pci_modevent(module_t mod, int what, void *arg);
103 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
104 			    pcicfgregs *cfg);
105 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
106 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
107 			    int reg, uint32_t *data);
108 #if 0
109 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
110 			    int reg, uint32_t data);
111 #endif
112 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
113 static void		pci_mask_msix(device_t dev, u_int index);
114 static void		pci_unmask_msix(device_t dev, u_int index);
115 static int		pci_msi_blacklisted(void);
116 static int		pci_msix_blacklisted(void);
117 static void		pci_resume_msi(device_t dev);
118 static void		pci_resume_msix(device_t dev);
119 static int		pci_remap_intr_method(device_t bus, device_t dev,
120 			    u_int irq);
121 
122 static uint16_t		pci_get_rid_method(device_t dev, device_t child);
123 
124 static struct pci_devinfo * pci_fill_devinfo(device_t pcib, int d, int b, int s,
125     int f, uint16_t vid, uint16_t did, size_t size);
126 
127 static device_method_t pci_methods[] = {
128 	/* Device interface */
129 	DEVMETHOD(device_probe,		pci_probe),
130 	DEVMETHOD(device_attach,	pci_attach),
131 #ifdef PCI_RES_BUS
132 	DEVMETHOD(device_detach,	pci_detach),
133 #else
134 	DEVMETHOD(device_detach,	bus_generic_detach),
135 #endif
136 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
137 	DEVMETHOD(device_suspend,	bus_generic_suspend),
138 	DEVMETHOD(device_resume,	pci_resume),
139 
140 	/* Bus interface */
141 	DEVMETHOD(bus_print_child,	pci_print_child),
142 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
143 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
144 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
145 	DEVMETHOD(bus_driver_added,	pci_driver_added),
146 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
147 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
148 
149 	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
150 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
151 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
152 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
153 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
154 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
155 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
156 	DEVMETHOD(bus_release_resource,	pci_release_resource),
157 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
158 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
159 	DEVMETHOD(bus_child_detached,	pci_child_detached),
160 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
161 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
162 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
163 	DEVMETHOD(bus_suspend_child,	pci_suspend_child),
164 	DEVMETHOD(bus_resume_child,	pci_resume_child),
165 
166 	/* PCI interface */
167 	DEVMETHOD(pci_read_config,	pci_read_config_method),
168 	DEVMETHOD(pci_write_config,	pci_write_config_method),
169 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
170 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
171 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
172 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
173 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
174 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
175 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
176 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
177 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
178 	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
179 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
180 	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
181 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
182 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
183 	DEVMETHOD(pci_enable_msi,	pci_enable_msi_method),
184 	DEVMETHOD(pci_enable_msix,	pci_enable_msix_method),
185 	DEVMETHOD(pci_disable_msi,	pci_disable_msi_method),
186 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
187 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
188 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
189 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
190 	DEVMETHOD(pci_msix_pba_bar,	pci_msix_pba_bar_method),
191 	DEVMETHOD(pci_msix_table_bar,	pci_msix_table_bar_method),
192 	DEVMETHOD(pci_get_rid,		pci_get_rid_method),
193 	DEVMETHOD(pci_child_added,	pci_child_added_method),
194 #ifdef PCI_IOV
195 	DEVMETHOD(pci_iov_attach,	pci_iov_attach_method),
196 	DEVMETHOD(pci_iov_detach,	pci_iov_detach_method),
197 	DEVMETHOD(pci_create_iov_child,	pci_create_iov_child_method),
198 #endif
199 
200 	DEVMETHOD_END
201 };
202 
203 DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
204 
205 static devclass_t pci_devclass;
206 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
207 MODULE_VERSION(pci, 1);
208 
209 static char	*pci_vendordata;
210 static size_t	pci_vendordata_size;
211 
212 struct pci_quirk {
213 	uint32_t devid;	/* Vendor/device of the card */
214 	int	type;
215 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
216 #define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
217 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
218 #define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
219 #define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
220 #define	PCI_QUIRK_MSI_INTX_BUG	6 /* PCIM_CMD_INTxDIS disables MSI */
221 	int	arg1;
222 	int	arg2;
223 };
224 
225 static const struct pci_quirk pci_quirks[] = {
226 	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
227 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
228 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
229 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
230 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
231 
232 	/*
233 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
234 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
235 	 */
236 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
237 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
238 
239 	/*
240 	 * MSI doesn't work on earlier Intel chipsets including
241 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
242 	 */
243 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
244 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
245 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
246 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
247 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
248 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
249 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
250 
251 	/*
252 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
253 	 * bridge.
254 	 */
255 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
256 
257 	/*
258 	 * MSI-X allocation doesn't work properly for devices passed through
259 	 * by VMware up to at least ESXi 5.1.
260 	 */
261 	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
262 	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
263 
264 	/*
265 	 * Some virtualization environments emulate an older chipset
266 	 * but support MSI just fine.  QEMU uses the Intel 82440.
267 	 */
268 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
269 
270 	/*
271 	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
272 	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
273 	 * It prevents us from attaching hpet(4) when the bit is unset.
274 	 * Note this quirk only affects SB600 revision A13 and earlier.
275 	 * For SB600 A21 and later, firmware must set the bit to hide it.
276 	 * For SB700 and later, it is unused and hardcoded to zero.
277 	 */
278 	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
279 
280 	/*
281 	 * Atheros AR8161/AR8162/E2200 Ethernet controllers have a bug that
282 	 * MSI interrupt does not assert if PCIM_CMD_INTxDIS bit of the
283 	 * command register is set.
284 	 */
285 	{ 0x10911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
286 	{ 0xE0911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
287 	{ 0x10901969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
288 
289 	/*
290 	 * Broadcom BCM5714(S)/BCM5715(S)/BCM5780(S) Ethernet MACs don't
291 	 * issue MSI interrupts with PCIM_CMD_INTxDIS set either.
292 	 */
293 	{ 0x166814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714 */
294 	{ 0x166914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714S */
295 	{ 0x166a14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780 */
296 	{ 0x166b14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780S */
297 	{ 0x167814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715 */
298 	{ 0x167914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715S */
299 
300 	{ 0 }
301 };
302 
303 /* map register information */
304 #define	PCI_MAPMEM	0x01	/* memory map */
305 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
306 #define	PCI_MAPPORT	0x04	/* port map */
307 
308 struct devlist pci_devq;
309 uint32_t pci_generation;
310 uint32_t pci_numdevs = 0;
311 static int pcie_chipset, pcix_chipset;
312 
313 /* sysctl vars */
314 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
315 
316 static int pci_enable_io_modes = 1;
317 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RWTUN,
318     &pci_enable_io_modes, 1,
319     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
320 enable these bits correctly.  We'd like to do this all the time, but there\n\
321 are some peripherals that this causes problems with.");
322 
323 static int pci_do_realloc_bars = 0;
324 SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RWTUN,
325     &pci_do_realloc_bars, 0,
326     "Attempt to allocate a new range for any BARs whose original "
327     "firmware-assigned ranges fail to allocate during the initial device scan.");
328 
329 static int pci_do_power_nodriver = 0;
330 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RWTUN,
331     &pci_do_power_nodriver, 0,
332   "Place a function into D3 state when no driver attaches to it.  0 means\n\
333 disable.  1 means conservatively place devices into D3 state.  2 means\n\
334 agressively place devices into D3 state.  3 means put absolutely everything\n\
335 in D3 state.");
336 
337 int pci_do_power_resume = 1;
338 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RWTUN,
339     &pci_do_power_resume, 1,
340   "Transition from D3 -> D0 on resume.");
341 
342 int pci_do_power_suspend = 1;
343 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RWTUN,
344     &pci_do_power_suspend, 1,
345   "Transition from D0 -> D3 on suspend.");
346 
347 static int pci_do_msi = 1;
348 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RWTUN, &pci_do_msi, 1,
349     "Enable support for MSI interrupts");
350 
351 static int pci_do_msix = 1;
352 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RWTUN, &pci_do_msix, 1,
353     "Enable support for MSI-X interrupts");
354 
355 static int pci_honor_msi_blacklist = 1;
356 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RDTUN,
357     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
358 
359 #if defined(__i386__) || defined(__amd64__)
360 static int pci_usb_takeover = 1;
361 #else
362 static int pci_usb_takeover = 0;
363 #endif
364 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
365     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
366 Disable this if you depend on BIOS emulation of USB devices, that is\n\
367 you use USB devices (like keyboard or mouse) but do not load USB drivers");
368 
369 static int pci_clear_bars;
370 SYSCTL_INT(_hw_pci, OID_AUTO, clear_bars, CTLFLAG_RDTUN, &pci_clear_bars, 0,
371     "Ignore firmware-assigned resources for BARs.");
372 
373 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
374 static int pci_clear_buses;
375 SYSCTL_INT(_hw_pci, OID_AUTO, clear_buses, CTLFLAG_RDTUN, &pci_clear_buses, 0,
376     "Ignore firmware-assigned bus numbers.");
377 #endif
378 
379 static int pci_enable_ari = 1;
380 SYSCTL_INT(_hw_pci, OID_AUTO, enable_ari, CTLFLAG_RDTUN, &pci_enable_ari,
381     0, "Enable support for PCIe Alternative RID Interpretation");
382 
383 static int
384 pci_has_quirk(uint32_t devid, int quirk)
385 {
386 	const struct pci_quirk *q;
387 
388 	for (q = &pci_quirks[0]; q->devid; q++) {
389 		if (q->devid == devid && q->type == quirk)
390 			return (1);
391 	}
392 	return (0);
393 }
394 
395 /* Find a device_t by bus/slot/function in domain 0 */
396 
397 device_t
398 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
399 {
400 
401 	return (pci_find_dbsf(0, bus, slot, func));
402 }
403 
404 /* Find a device_t by domain/bus/slot/function */
405 
406 device_t
407 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
408 {
409 	struct pci_devinfo *dinfo;
410 
411 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
412 		if ((dinfo->cfg.domain == domain) &&
413 		    (dinfo->cfg.bus == bus) &&
414 		    (dinfo->cfg.slot == slot) &&
415 		    (dinfo->cfg.func == func)) {
416 			return (dinfo->cfg.dev);
417 		}
418 	}
419 
420 	return (NULL);
421 }
422 
423 /* Find a device_t by vendor/device ID */
424 
425 device_t
426 pci_find_device(uint16_t vendor, uint16_t device)
427 {
428 	struct pci_devinfo *dinfo;
429 
430 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
431 		if ((dinfo->cfg.vendor == vendor) &&
432 		    (dinfo->cfg.device == device)) {
433 			return (dinfo->cfg.dev);
434 		}
435 	}
436 
437 	return (NULL);
438 }
439 
440 device_t
441 pci_find_class(uint8_t class, uint8_t subclass)
442 {
443 	struct pci_devinfo *dinfo;
444 
445 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
446 		if (dinfo->cfg.baseclass == class &&
447 		    dinfo->cfg.subclass == subclass) {
448 			return (dinfo->cfg.dev);
449 		}
450 	}
451 
452 	return (NULL);
453 }
454 
455 static int
456 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
457 {
458 	va_list ap;
459 	int retval;
460 
461 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
462 	    cfg->func);
463 	va_start(ap, fmt);
464 	retval += vprintf(fmt, ap);
465 	va_end(ap);
466 	return (retval);
467 }
468 
469 /* return base address of memory or port map */
470 
471 static pci_addr_t
472 pci_mapbase(uint64_t mapreg)
473 {
474 
475 	if (PCI_BAR_MEM(mapreg))
476 		return (mapreg & PCIM_BAR_MEM_BASE);
477 	else
478 		return (mapreg & PCIM_BAR_IO_BASE);
479 }
480 
481 /* return map type of memory or port map */
482 
483 static const char *
484 pci_maptype(uint64_t mapreg)
485 {
486 
487 	if (PCI_BAR_IO(mapreg))
488 		return ("I/O Port");
489 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
490 		return ("Prefetchable Memory");
491 	return ("Memory");
492 }
493 
494 /* return log2 of map size decoded for memory or port map */
495 
496 int
497 pci_mapsize(uint64_t testval)
498 {
499 	int ln2size;
500 
501 	testval = pci_mapbase(testval);
502 	ln2size = 0;
503 	if (testval != 0) {
504 		while ((testval & 1) == 0)
505 		{
506 			ln2size++;
507 			testval >>= 1;
508 		}
509 	}
510 	return (ln2size);
511 }
512 
513 /* return base address of device ROM */
514 
515 static pci_addr_t
516 pci_rombase(uint64_t mapreg)
517 {
518 
519 	return (mapreg & PCIM_BIOS_ADDR_MASK);
520 }
521 
522 /* return log2 of map size decided for device ROM */
523 
524 static int
525 pci_romsize(uint64_t testval)
526 {
527 	int ln2size;
528 
529 	testval = pci_rombase(testval);
530 	ln2size = 0;
531 	if (testval != 0) {
532 		while ((testval & 1) == 0)
533 		{
534 			ln2size++;
535 			testval >>= 1;
536 		}
537 	}
538 	return (ln2size);
539 }
540 
541 /* return log2 of address range supported by map register */
542 
543 static int
544 pci_maprange(uint64_t mapreg)
545 {
546 	int ln2range = 0;
547 
548 	if (PCI_BAR_IO(mapreg))
549 		ln2range = 32;
550 	else
551 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
552 		case PCIM_BAR_MEM_32:
553 			ln2range = 32;
554 			break;
555 		case PCIM_BAR_MEM_1MB:
556 			ln2range = 20;
557 			break;
558 		case PCIM_BAR_MEM_64:
559 			ln2range = 64;
560 			break;
561 		}
562 	return (ln2range);
563 }
564 
565 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
566 
567 static void
568 pci_fixancient(pcicfgregs *cfg)
569 {
570 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
571 		return;
572 
573 	/* PCI to PCI bridges use header type 1 */
574 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
575 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
576 }
577 
578 /* extract header type specific config data */
579 
580 static void
581 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
582 {
583 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
584 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
585 	case PCIM_HDRTYPE_NORMAL:
586 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
587 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
588 		cfg->mingnt         = REG(PCIR_MINGNT, 1);
589 		cfg->maxlat         = REG(PCIR_MAXLAT, 1);
590 		cfg->nummaps	    = PCI_MAXMAPS_0;
591 		break;
592 	case PCIM_HDRTYPE_BRIDGE:
593 		cfg->bridge.br_seclat = REG(PCIR_SECLAT_1, 1);
594 		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_1, 1);
595 		cfg->bridge.br_secbus = REG(PCIR_SECBUS_1, 1);
596 		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_1, 1);
597 		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_1, 2);
598 		cfg->nummaps	    = PCI_MAXMAPS_1;
599 		break;
600 	case PCIM_HDRTYPE_CARDBUS:
601 		cfg->bridge.br_seclat = REG(PCIR_SECLAT_2, 1);
602 		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_2, 1);
603 		cfg->bridge.br_secbus = REG(PCIR_SECBUS_2, 1);
604 		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_2, 1);
605 		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_2, 2);
606 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
607 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
608 		cfg->nummaps	    = PCI_MAXMAPS_2;
609 		break;
610 	}
611 #undef REG
612 }
613 
614 /* read configuration header into pcicfgregs structure */
615 struct pci_devinfo *
616 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
617 {
618 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
619 	uint16_t vid, did;
620 
621 	vid = REG(PCIR_VENDOR, 2);
622 	did = REG(PCIR_DEVICE, 2);
623 	if (vid != 0xffff)
624 		return (pci_fill_devinfo(pcib, d, b, s, f, vid, did, size));
625 
626 	return (NULL);
627 }
628 
629 static struct pci_devinfo *
630 pci_fill_devinfo(device_t pcib, int d, int b, int s, int f, uint16_t vid,
631     uint16_t did, size_t size)
632 {
633 	struct pci_devinfo *devlist_entry;
634 	pcicfgregs *cfg;
635 
636 	devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
637 
638 	cfg = &devlist_entry->cfg;
639 
640 	cfg->domain		= d;
641 	cfg->bus		= b;
642 	cfg->slot		= s;
643 	cfg->func		= f;
644 	cfg->vendor		= vid;
645 	cfg->device		= did;
646 	cfg->cmdreg		= REG(PCIR_COMMAND, 2);
647 	cfg->statreg		= REG(PCIR_STATUS, 2);
648 	cfg->baseclass		= REG(PCIR_CLASS, 1);
649 	cfg->subclass		= REG(PCIR_SUBCLASS, 1);
650 	cfg->progif		= REG(PCIR_PROGIF, 1);
651 	cfg->revid		= REG(PCIR_REVID, 1);
652 	cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
653 	cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
654 	cfg->lattimer		= REG(PCIR_LATTIMER, 1);
655 	cfg->intpin		= REG(PCIR_INTPIN, 1);
656 	cfg->intline		= REG(PCIR_INTLINE, 1);
657 
658 	cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
659 	cfg->hdrtype		&= ~PCIM_MFDEV;
660 	STAILQ_INIT(&cfg->maps);
661 
662 	cfg->devinfo_size	= size;
663 	cfg->iov		= NULL;
664 
665 	pci_fixancient(cfg);
666 	pci_hdrtypedata(pcib, b, s, f, cfg);
667 
668 	if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
669 		pci_read_cap(pcib, cfg);
670 
671 	STAILQ_INSERT_TAIL(&pci_devq, devlist_entry, pci_links);
672 
673 	devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
674 	devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
675 	devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
676 	devlist_entry->conf.pc_sel.pc_func = cfg->func;
677 	devlist_entry->conf.pc_hdr = cfg->hdrtype;
678 
679 	devlist_entry->conf.pc_subvendor = cfg->subvendor;
680 	devlist_entry->conf.pc_subdevice = cfg->subdevice;
681 	devlist_entry->conf.pc_vendor = cfg->vendor;
682 	devlist_entry->conf.pc_device = cfg->device;
683 
684 	devlist_entry->conf.pc_class = cfg->baseclass;
685 	devlist_entry->conf.pc_subclass = cfg->subclass;
686 	devlist_entry->conf.pc_progif = cfg->progif;
687 	devlist_entry->conf.pc_revid = cfg->revid;
688 
689 	pci_numdevs++;
690 	pci_generation++;
691 
692 	return (devlist_entry);
693 }
694 #undef REG
695 
696 static void
697 pci_read_cap(device_t pcib, pcicfgregs *cfg)
698 {
699 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
700 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
701 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
702 	uint64_t addr;
703 #endif
704 	uint32_t val;
705 	int	ptr, nextptr, ptrptr;
706 
707 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
708 	case PCIM_HDRTYPE_NORMAL:
709 	case PCIM_HDRTYPE_BRIDGE:
710 		ptrptr = PCIR_CAP_PTR;
711 		break;
712 	case PCIM_HDRTYPE_CARDBUS:
713 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
714 		break;
715 	default:
716 		return;		/* no extended capabilities support */
717 	}
718 	nextptr = REG(ptrptr, 1);	/* sanity check? */
719 
720 	/*
721 	 * Read capability entries.
722 	 */
723 	while (nextptr != 0) {
724 		/* Sanity check */
725 		if (nextptr > 255) {
726 			printf("illegal PCI extended capability offset %d\n",
727 			    nextptr);
728 			return;
729 		}
730 		/* Find the next entry */
731 		ptr = nextptr;
732 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
733 
734 		/* Process this entry */
735 		switch (REG(ptr + PCICAP_ID, 1)) {
736 		case PCIY_PMG:		/* PCI power management */
737 			if (cfg->pp.pp_cap == 0) {
738 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
739 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
740 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
741 				if ((nextptr - ptr) > PCIR_POWER_DATA)
742 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
743 			}
744 			break;
745 		case PCIY_HT:		/* HyperTransport */
746 			/* Determine HT-specific capability type. */
747 			val = REG(ptr + PCIR_HT_COMMAND, 2);
748 
749 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
750 				cfg->ht.ht_slave = ptr;
751 
752 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
753 			switch (val & PCIM_HTCMD_CAP_MASK) {
754 			case PCIM_HTCAP_MSI_MAPPING:
755 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
756 					/* Sanity check the mapping window. */
757 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
758 					    4);
759 					addr <<= 32;
760 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
761 					    4);
762 					if (addr != MSI_INTEL_ADDR_BASE)
763 						device_printf(pcib,
764 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
765 						    cfg->domain, cfg->bus,
766 						    cfg->slot, cfg->func,
767 						    (long long)addr);
768 				} else
769 					addr = MSI_INTEL_ADDR_BASE;
770 
771 				cfg->ht.ht_msimap = ptr;
772 				cfg->ht.ht_msictrl = val;
773 				cfg->ht.ht_msiaddr = addr;
774 				break;
775 			}
776 #endif
777 			break;
778 		case PCIY_MSI:		/* PCI MSI */
779 			cfg->msi.msi_location = ptr;
780 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
781 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
782 						     PCIM_MSICTRL_MMC_MASK)>>1);
783 			break;
784 		case PCIY_MSIX:		/* PCI MSI-X */
785 			cfg->msix.msix_location = ptr;
786 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
787 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
788 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
789 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
790 			cfg->msix.msix_table_bar = PCIR_BAR(val &
791 			    PCIM_MSIX_BIR_MASK);
792 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
793 			val = REG(ptr + PCIR_MSIX_PBA, 4);
794 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
795 			    PCIM_MSIX_BIR_MASK);
796 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
797 			break;
798 		case PCIY_VPD:		/* PCI Vital Product Data */
799 			cfg->vpd.vpd_reg = ptr;
800 			break;
801 		case PCIY_SUBVENDOR:
802 			/* Should always be true. */
803 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
804 			    PCIM_HDRTYPE_BRIDGE) {
805 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
806 				cfg->subvendor = val & 0xffff;
807 				cfg->subdevice = val >> 16;
808 			}
809 			break;
810 		case PCIY_PCIX:		/* PCI-X */
811 			/*
812 			 * Assume we have a PCI-X chipset if we have
813 			 * at least one PCI-PCI bridge with a PCI-X
814 			 * capability.  Note that some systems with
815 			 * PCI-express or HT chipsets might match on
816 			 * this check as well.
817 			 */
818 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
819 			    PCIM_HDRTYPE_BRIDGE)
820 				pcix_chipset = 1;
821 			cfg->pcix.pcix_location = ptr;
822 			break;
823 		case PCIY_EXPRESS:	/* PCI-express */
824 			/*
825 			 * Assume we have a PCI-express chipset if we have
826 			 * at least one PCI-express device.
827 			 */
828 			pcie_chipset = 1;
829 			cfg->pcie.pcie_location = ptr;
830 			val = REG(ptr + PCIER_FLAGS, 2);
831 			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
832 			break;
833 		default:
834 			break;
835 		}
836 	}
837 
838 #if defined(__powerpc__)
839 	/*
840 	 * Enable the MSI mapping window for all HyperTransport
841 	 * slaves.  PCI-PCI bridges have their windows enabled via
842 	 * PCIB_MAP_MSI().
843 	 */
844 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
845 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
846 		device_printf(pcib,
847 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
848 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
849 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
850 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
851 		     2);
852 	}
853 #endif
854 /* REG and WREG use carry through to next functions */
855 }
856 
857 /*
858  * PCI Vital Product Data
859  */
860 
861 #define	PCI_VPD_TIMEOUT		1000000
862 
863 static int
864 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
865 {
866 	int count = PCI_VPD_TIMEOUT;
867 
868 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
869 
870 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
871 
872 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
873 		if (--count < 0)
874 			return (ENXIO);
875 		DELAY(1);	/* limit looping */
876 	}
877 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
878 
879 	return (0);
880 }
881 
882 #if 0
883 static int
884 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
885 {
886 	int count = PCI_VPD_TIMEOUT;
887 
888 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
889 
890 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
891 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
892 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
893 		if (--count < 0)
894 			return (ENXIO);
895 		DELAY(1);	/* limit looping */
896 	}
897 
898 	return (0);
899 }
900 #endif
901 
902 #undef PCI_VPD_TIMEOUT
903 
904 struct vpd_readstate {
905 	device_t	pcib;
906 	pcicfgregs	*cfg;
907 	uint32_t	val;
908 	int		bytesinval;
909 	int		off;
910 	uint8_t		cksum;
911 };
912 
913 static int
914 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
915 {
916 	uint32_t reg;
917 	uint8_t byte;
918 
919 	if (vrs->bytesinval == 0) {
920 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
921 			return (ENXIO);
922 		vrs->val = le32toh(reg);
923 		vrs->off += 4;
924 		byte = vrs->val & 0xff;
925 		vrs->bytesinval = 3;
926 	} else {
927 		vrs->val = vrs->val >> 8;
928 		byte = vrs->val & 0xff;
929 		vrs->bytesinval--;
930 	}
931 
932 	vrs->cksum += byte;
933 	*data = byte;
934 	return (0);
935 }
936 
937 static void
938 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
939 {
940 	struct vpd_readstate vrs;
941 	int state;
942 	int name;
943 	int remain;
944 	int i;
945 	int alloc, off;		/* alloc/off for RO/W arrays */
946 	int cksumvalid;
947 	int dflen;
948 	uint8_t byte;
949 	uint8_t byte2;
950 
951 	/* init vpd reader */
952 	vrs.bytesinval = 0;
953 	vrs.off = 0;
954 	vrs.pcib = pcib;
955 	vrs.cfg = cfg;
956 	vrs.cksum = 0;
957 
958 	state = 0;
959 	name = remain = i = 0;	/* shut up stupid gcc */
960 	alloc = off = 0;	/* shut up stupid gcc */
961 	dflen = 0;		/* shut up stupid gcc */
962 	cksumvalid = -1;
963 	while (state >= 0) {
964 		if (vpd_nextbyte(&vrs, &byte)) {
965 			state = -2;
966 			break;
967 		}
968 #if 0
969 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
970 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
971 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
972 #endif
973 		switch (state) {
974 		case 0:		/* item name */
975 			if (byte & 0x80) {
976 				if (vpd_nextbyte(&vrs, &byte2)) {
977 					state = -2;
978 					break;
979 				}
980 				remain = byte2;
981 				if (vpd_nextbyte(&vrs, &byte2)) {
982 					state = -2;
983 					break;
984 				}
985 				remain |= byte2 << 8;
986 				if (remain > (0x7f*4 - vrs.off)) {
987 					state = -1;
988 					pci_printf(cfg,
989 					    "invalid VPD data, remain %#x\n",
990 					    remain);
991 				}
992 				name = byte & 0x7f;
993 			} else {
994 				remain = byte & 0x7;
995 				name = (byte >> 3) & 0xf;
996 			}
997 			switch (name) {
998 			case 0x2:	/* String */
999 				cfg->vpd.vpd_ident = malloc(remain + 1,
1000 				    M_DEVBUF, M_WAITOK);
1001 				i = 0;
1002 				state = 1;
1003 				break;
1004 			case 0xf:	/* End */
1005 				state = -1;
1006 				break;
1007 			case 0x10:	/* VPD-R */
1008 				alloc = 8;
1009 				off = 0;
1010 				cfg->vpd.vpd_ros = malloc(alloc *
1011 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1012 				    M_WAITOK | M_ZERO);
1013 				state = 2;
1014 				break;
1015 			case 0x11:	/* VPD-W */
1016 				alloc = 8;
1017 				off = 0;
1018 				cfg->vpd.vpd_w = malloc(alloc *
1019 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1020 				    M_WAITOK | M_ZERO);
1021 				state = 5;
1022 				break;
1023 			default:	/* Invalid data, abort */
1024 				state = -1;
1025 				break;
1026 			}
1027 			break;
1028 
1029 		case 1:	/* Identifier String */
1030 			cfg->vpd.vpd_ident[i++] = byte;
1031 			remain--;
1032 			if (remain == 0)  {
1033 				cfg->vpd.vpd_ident[i] = '\0';
1034 				state = 0;
1035 			}
1036 			break;
1037 
1038 		case 2:	/* VPD-R Keyword Header */
1039 			if (off == alloc) {
1040 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1041 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1042 				    M_DEVBUF, M_WAITOK | M_ZERO);
1043 			}
1044 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1045 			if (vpd_nextbyte(&vrs, &byte2)) {
1046 				state = -2;
1047 				break;
1048 			}
1049 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1050 			if (vpd_nextbyte(&vrs, &byte2)) {
1051 				state = -2;
1052 				break;
1053 			}
1054 			cfg->vpd.vpd_ros[off].len = dflen = byte2;
1055 			if (dflen == 0 &&
1056 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1057 			    2) == 0) {
1058 				/*
1059 				 * if this happens, we can't trust the rest
1060 				 * of the VPD.
1061 				 */
1062 				pci_printf(cfg, "bad keyword length: %d\n",
1063 				    dflen);
1064 				cksumvalid = 0;
1065 				state = -1;
1066 				break;
1067 			} else if (dflen == 0) {
1068 				cfg->vpd.vpd_ros[off].value = malloc(1 *
1069 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1070 				    M_DEVBUF, M_WAITOK);
1071 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1072 			} else
1073 				cfg->vpd.vpd_ros[off].value = malloc(
1074 				    (dflen + 1) *
1075 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1076 				    M_DEVBUF, M_WAITOK);
1077 			remain -= 3;
1078 			i = 0;
1079 			/* keep in sync w/ state 3's transistions */
1080 			if (dflen == 0 && remain == 0)
1081 				state = 0;
1082 			else if (dflen == 0)
1083 				state = 2;
1084 			else
1085 				state = 3;
1086 			break;
1087 
1088 		case 3:	/* VPD-R Keyword Value */
1089 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1090 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1091 			    "RV", 2) == 0 && cksumvalid == -1) {
1092 				if (vrs.cksum == 0)
1093 					cksumvalid = 1;
1094 				else {
1095 					if (bootverbose)
1096 						pci_printf(cfg,
1097 					    "bad VPD cksum, remain %hhu\n",
1098 						    vrs.cksum);
1099 					cksumvalid = 0;
1100 					state = -1;
1101 					break;
1102 				}
1103 			}
1104 			dflen--;
1105 			remain--;
1106 			/* keep in sync w/ state 2's transistions */
1107 			if (dflen == 0)
1108 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1109 			if (dflen == 0 && remain == 0) {
1110 				cfg->vpd.vpd_rocnt = off;
1111 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1112 				    off * sizeof(*cfg->vpd.vpd_ros),
1113 				    M_DEVBUF, M_WAITOK | M_ZERO);
1114 				state = 0;
1115 			} else if (dflen == 0)
1116 				state = 2;
1117 			break;
1118 
1119 		case 4:
1120 			remain--;
1121 			if (remain == 0)
1122 				state = 0;
1123 			break;
1124 
1125 		case 5:	/* VPD-W Keyword Header */
1126 			if (off == alloc) {
1127 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1128 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1129 				    M_DEVBUF, M_WAITOK | M_ZERO);
1130 			}
1131 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1132 			if (vpd_nextbyte(&vrs, &byte2)) {
1133 				state = -2;
1134 				break;
1135 			}
1136 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1137 			if (vpd_nextbyte(&vrs, &byte2)) {
1138 				state = -2;
1139 				break;
1140 			}
1141 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1142 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1143 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1144 			    sizeof(*cfg->vpd.vpd_w[off].value),
1145 			    M_DEVBUF, M_WAITOK);
1146 			remain -= 3;
1147 			i = 0;
1148 			/* keep in sync w/ state 6's transistions */
1149 			if (dflen == 0 && remain == 0)
1150 				state = 0;
1151 			else if (dflen == 0)
1152 				state = 5;
1153 			else
1154 				state = 6;
1155 			break;
1156 
1157 		case 6:	/* VPD-W Keyword Value */
1158 			cfg->vpd.vpd_w[off].value[i++] = byte;
1159 			dflen--;
1160 			remain--;
1161 			/* keep in sync w/ state 5's transistions */
1162 			if (dflen == 0)
1163 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1164 			if (dflen == 0 && remain == 0) {
1165 				cfg->vpd.vpd_wcnt = off;
1166 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1167 				    off * sizeof(*cfg->vpd.vpd_w),
1168 				    M_DEVBUF, M_WAITOK | M_ZERO);
1169 				state = 0;
1170 			} else if (dflen == 0)
1171 				state = 5;
1172 			break;
1173 
1174 		default:
1175 			pci_printf(cfg, "invalid state: %d\n", state);
1176 			state = -1;
1177 			break;
1178 		}
1179 	}
1180 
1181 	if (cksumvalid == 0 || state < -1) {
1182 		/* read-only data bad, clean up */
1183 		if (cfg->vpd.vpd_ros != NULL) {
1184 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1185 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1186 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1187 			cfg->vpd.vpd_ros = NULL;
1188 		}
1189 	}
1190 	if (state < -1) {
1191 		/* I/O error, clean up */
1192 		pci_printf(cfg, "failed to read VPD data.\n");
1193 		if (cfg->vpd.vpd_ident != NULL) {
1194 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1195 			cfg->vpd.vpd_ident = NULL;
1196 		}
1197 		if (cfg->vpd.vpd_w != NULL) {
1198 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1199 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1200 			free(cfg->vpd.vpd_w, M_DEVBUF);
1201 			cfg->vpd.vpd_w = NULL;
1202 		}
1203 	}
1204 	cfg->vpd.vpd_cached = 1;
1205 #undef REG
1206 #undef WREG
1207 }
1208 
1209 int
1210 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1211 {
1212 	struct pci_devinfo *dinfo = device_get_ivars(child);
1213 	pcicfgregs *cfg = &dinfo->cfg;
1214 
1215 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1216 		pci_read_vpd(device_get_parent(dev), cfg);
1217 
1218 	*identptr = cfg->vpd.vpd_ident;
1219 
1220 	if (*identptr == NULL)
1221 		return (ENXIO);
1222 
1223 	return (0);
1224 }
1225 
1226 int
1227 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1228 	const char **vptr)
1229 {
1230 	struct pci_devinfo *dinfo = device_get_ivars(child);
1231 	pcicfgregs *cfg = &dinfo->cfg;
1232 	int i;
1233 
1234 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1235 		pci_read_vpd(device_get_parent(dev), cfg);
1236 
1237 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1238 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1239 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1240 			*vptr = cfg->vpd.vpd_ros[i].value;
1241 			return (0);
1242 		}
1243 
1244 	*vptr = NULL;
1245 	return (ENXIO);
1246 }
1247 
1248 struct pcicfg_vpd *
1249 pci_fetch_vpd_list(device_t dev)
1250 {
1251 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1252 	pcicfgregs *cfg = &dinfo->cfg;
1253 
1254 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1255 		pci_read_vpd(device_get_parent(device_get_parent(dev)), cfg);
1256 	return (&cfg->vpd);
1257 }
1258 
1259 /*
1260  * Find the requested HyperTransport capability and return the offset
1261  * in configuration space via the pointer provided.  The function
1262  * returns 0 on success and an error code otherwise.
1263  */
1264 int
1265 pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1266 {
1267 	int ptr, error;
1268 	uint16_t val;
1269 
1270 	error = pci_find_cap(child, PCIY_HT, &ptr);
1271 	if (error)
1272 		return (error);
1273 
1274 	/*
1275 	 * Traverse the capabilities list checking each HT capability
1276 	 * to see if it matches the requested HT capability.
1277 	 */
1278 	while (ptr != 0) {
1279 		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1280 		if (capability == PCIM_HTCAP_SLAVE ||
1281 		    capability == PCIM_HTCAP_HOST)
1282 			val &= 0xe000;
1283 		else
1284 			val &= PCIM_HTCMD_CAP_MASK;
1285 		if (val == capability) {
1286 			if (capreg != NULL)
1287 				*capreg = ptr;
1288 			return (0);
1289 		}
1290 
1291 		/* Skip to the next HT capability. */
1292 		while (ptr != 0) {
1293 			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1294 			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1295 			    PCIY_HT)
1296 				break;
1297 		}
1298 	}
1299 	return (ENOENT);
1300 }
1301 
1302 /*
1303  * Find the requested capability and return the offset in
1304  * configuration space via the pointer provided.  The function returns
1305  * 0 on success and an error code otherwise.
1306  */
1307 int
1308 pci_find_cap_method(device_t dev, device_t child, int capability,
1309     int *capreg)
1310 {
1311 	struct pci_devinfo *dinfo = device_get_ivars(child);
1312 	pcicfgregs *cfg = &dinfo->cfg;
1313 	u_int32_t status;
1314 	u_int8_t ptr;
1315 
1316 	/*
1317 	 * Check the CAP_LIST bit of the PCI status register first.
1318 	 */
1319 	status = pci_read_config(child, PCIR_STATUS, 2);
1320 	if (!(status & PCIM_STATUS_CAPPRESENT))
1321 		return (ENXIO);
1322 
1323 	/*
1324 	 * Determine the start pointer of the capabilities list.
1325 	 */
1326 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1327 	case PCIM_HDRTYPE_NORMAL:
1328 	case PCIM_HDRTYPE_BRIDGE:
1329 		ptr = PCIR_CAP_PTR;
1330 		break;
1331 	case PCIM_HDRTYPE_CARDBUS:
1332 		ptr = PCIR_CAP_PTR_2;
1333 		break;
1334 	default:
1335 		/* XXX: panic? */
1336 		return (ENXIO);		/* no extended capabilities support */
1337 	}
1338 	ptr = pci_read_config(child, ptr, 1);
1339 
1340 	/*
1341 	 * Traverse the capabilities list.
1342 	 */
1343 	while (ptr != 0) {
1344 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1345 			if (capreg != NULL)
1346 				*capreg = ptr;
1347 			return (0);
1348 		}
1349 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1350 	}
1351 
1352 	return (ENOENT);
1353 }
1354 
1355 /*
1356  * Find the requested extended capability and return the offset in
1357  * configuration space via the pointer provided.  The function returns
1358  * 0 on success and an error code otherwise.
1359  */
1360 int
1361 pci_find_extcap_method(device_t dev, device_t child, int capability,
1362     int *capreg)
1363 {
1364 	struct pci_devinfo *dinfo = device_get_ivars(child);
1365 	pcicfgregs *cfg = &dinfo->cfg;
1366 	uint32_t ecap;
1367 	uint16_t ptr;
1368 
1369 	/* Only supported for PCI-express devices. */
1370 	if (cfg->pcie.pcie_location == 0)
1371 		return (ENXIO);
1372 
1373 	ptr = PCIR_EXTCAP;
1374 	ecap = pci_read_config(child, ptr, 4);
1375 	if (ecap == 0xffffffff || ecap == 0)
1376 		return (ENOENT);
1377 	for (;;) {
1378 		if (PCI_EXTCAP_ID(ecap) == capability) {
1379 			if (capreg != NULL)
1380 				*capreg = ptr;
1381 			return (0);
1382 		}
1383 		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1384 		if (ptr == 0)
1385 			break;
1386 		ecap = pci_read_config(child, ptr, 4);
1387 	}
1388 
1389 	return (ENOENT);
1390 }
1391 
1392 /*
1393  * Support for MSI-X message interrupts.
1394  */
1395 void
1396 pci_enable_msix_method(device_t dev, device_t child, u_int index,
1397     uint64_t address, uint32_t data)
1398 {
1399 	struct pci_devinfo *dinfo = device_get_ivars(child);
1400 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1401 	uint32_t offset;
1402 
1403 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1404 	offset = msix->msix_table_offset + index * 16;
1405 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1406 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1407 	bus_write_4(msix->msix_table_res, offset + 8, data);
1408 
1409 	/* Enable MSI -> HT mapping. */
1410 	pci_ht_map_msi(child, address);
1411 }
1412 
1413 void
1414 pci_mask_msix(device_t dev, u_int index)
1415 {
1416 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1417 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1418 	uint32_t offset, val;
1419 
1420 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1421 	offset = msix->msix_table_offset + index * 16 + 12;
1422 	val = bus_read_4(msix->msix_table_res, offset);
1423 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1424 		val |= PCIM_MSIX_VCTRL_MASK;
1425 		bus_write_4(msix->msix_table_res, offset, val);
1426 	}
1427 }
1428 
1429 void
1430 pci_unmask_msix(device_t dev, u_int index)
1431 {
1432 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1433 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1434 	uint32_t offset, val;
1435 
1436 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1437 	offset = msix->msix_table_offset + index * 16 + 12;
1438 	val = bus_read_4(msix->msix_table_res, offset);
1439 	if (val & PCIM_MSIX_VCTRL_MASK) {
1440 		val &= ~PCIM_MSIX_VCTRL_MASK;
1441 		bus_write_4(msix->msix_table_res, offset, val);
1442 	}
1443 }
1444 
1445 int
1446 pci_pending_msix(device_t dev, u_int index)
1447 {
1448 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1449 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1450 	uint32_t offset, bit;
1451 
1452 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1453 	offset = msix->msix_pba_offset + (index / 32) * 4;
1454 	bit = 1 << index % 32;
1455 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1456 }
1457 
1458 /*
1459  * Restore MSI-X registers and table during resume.  If MSI-X is
1460  * enabled then walk the virtual table to restore the actual MSI-X
1461  * table.
1462  */
1463 static void
1464 pci_resume_msix(device_t dev)
1465 {
1466 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1467 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1468 	struct msix_table_entry *mte;
1469 	struct msix_vector *mv;
1470 	int i;
1471 
1472 	if (msix->msix_alloc > 0) {
1473 		/* First, mask all vectors. */
1474 		for (i = 0; i < msix->msix_msgnum; i++)
1475 			pci_mask_msix(dev, i);
1476 
1477 		/* Second, program any messages with at least one handler. */
1478 		for (i = 0; i < msix->msix_table_len; i++) {
1479 			mte = &msix->msix_table[i];
1480 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1481 				continue;
1482 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1483 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1484 			pci_unmask_msix(dev, i);
1485 		}
1486 	}
1487 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1488 	    msix->msix_ctrl, 2);
1489 }
1490 
1491 /*
1492  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1493  * returned in *count.  After this function returns, each message will be
1494  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1495  */
1496 int
1497 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1498 {
1499 	struct pci_devinfo *dinfo = device_get_ivars(child);
1500 	pcicfgregs *cfg = &dinfo->cfg;
1501 	struct resource_list_entry *rle;
1502 	int actual, error, i, irq, max;
1503 
1504 	/* Don't let count == 0 get us into trouble. */
1505 	if (*count == 0)
1506 		return (EINVAL);
1507 
1508 	/* If rid 0 is allocated, then fail. */
1509 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1510 	if (rle != NULL && rle->res != NULL)
1511 		return (ENXIO);
1512 
1513 	/* Already have allocated messages? */
1514 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1515 		return (ENXIO);
1516 
1517 	/* If MSI-X is blacklisted for this system, fail. */
1518 	if (pci_msix_blacklisted())
1519 		return (ENXIO);
1520 
1521 	/* MSI-X capability present? */
1522 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1523 		return (ENODEV);
1524 
1525 	/* Make sure the appropriate BARs are mapped. */
1526 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1527 	    cfg->msix.msix_table_bar);
1528 	if (rle == NULL || rle->res == NULL ||
1529 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1530 		return (ENXIO);
1531 	cfg->msix.msix_table_res = rle->res;
1532 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1533 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1534 		    cfg->msix.msix_pba_bar);
1535 		if (rle == NULL || rle->res == NULL ||
1536 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1537 			return (ENXIO);
1538 	}
1539 	cfg->msix.msix_pba_res = rle->res;
1540 
1541 	if (bootverbose)
1542 		device_printf(child,
1543 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1544 		    *count, cfg->msix.msix_msgnum);
1545 	max = min(*count, cfg->msix.msix_msgnum);
1546 	for (i = 0; i < max; i++) {
1547 		/* Allocate a message. */
1548 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1549 		if (error) {
1550 			if (i == 0)
1551 				return (error);
1552 			break;
1553 		}
1554 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1555 		    irq, 1);
1556 	}
1557 	actual = i;
1558 
1559 	if (bootverbose) {
1560 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1561 		if (actual == 1)
1562 			device_printf(child, "using IRQ %lu for MSI-X\n",
1563 			    rle->start);
1564 		else {
1565 			int run;
1566 
1567 			/*
1568 			 * Be fancy and try to print contiguous runs of
1569 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1570 			 * 'run' is true if we are in a range.
1571 			 */
1572 			device_printf(child, "using IRQs %lu", rle->start);
1573 			irq = rle->start;
1574 			run = 0;
1575 			for (i = 1; i < actual; i++) {
1576 				rle = resource_list_find(&dinfo->resources,
1577 				    SYS_RES_IRQ, i + 1);
1578 
1579 				/* Still in a run? */
1580 				if (rle->start == irq + 1) {
1581 					run = 1;
1582 					irq++;
1583 					continue;
1584 				}
1585 
1586 				/* Finish previous range. */
1587 				if (run) {
1588 					printf("-%d", irq);
1589 					run = 0;
1590 				}
1591 
1592 				/* Start new range. */
1593 				printf(",%lu", rle->start);
1594 				irq = rle->start;
1595 			}
1596 
1597 			/* Unfinished range? */
1598 			if (run)
1599 				printf("-%d", irq);
1600 			printf(" for MSI-X\n");
1601 		}
1602 	}
1603 
1604 	/* Mask all vectors. */
1605 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1606 		pci_mask_msix(child, i);
1607 
1608 	/* Allocate and initialize vector data and virtual table. */
1609 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1610 	    M_DEVBUF, M_WAITOK | M_ZERO);
1611 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1612 	    M_DEVBUF, M_WAITOK | M_ZERO);
1613 	for (i = 0; i < actual; i++) {
1614 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1615 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1616 		cfg->msix.msix_table[i].mte_vector = i + 1;
1617 	}
1618 
1619 	/* Update control register to enable MSI-X. */
1620 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1621 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1622 	    cfg->msix.msix_ctrl, 2);
1623 
1624 	/* Update counts of alloc'd messages. */
1625 	cfg->msix.msix_alloc = actual;
1626 	cfg->msix.msix_table_len = actual;
1627 	*count = actual;
1628 	return (0);
1629 }
1630 
1631 /*
1632  * By default, pci_alloc_msix() will assign the allocated IRQ
1633  * resources consecutively to the first N messages in the MSI-X table.
1634  * However, device drivers may want to use different layouts if they
1635  * either receive fewer messages than they asked for, or they wish to
1636  * populate the MSI-X table sparsely.  This method allows the driver
1637  * to specify what layout it wants.  It must be called after a
1638  * successful pci_alloc_msix() but before any of the associated
1639  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1640  *
1641  * The 'vectors' array contains 'count' message vectors.  The array
1642  * maps directly to the MSI-X table in that index 0 in the array
1643  * specifies the vector for the first message in the MSI-X table, etc.
1644  * The vector value in each array index can either be 0 to indicate
1645  * that no vector should be assigned to a message slot, or it can be a
1646  * number from 1 to N (where N is the count returned from a
1647  * succcessful call to pci_alloc_msix()) to indicate which message
1648  * vector (IRQ) to be used for the corresponding message.
1649  *
1650  * On successful return, each message with a non-zero vector will have
1651  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1652  * 1.  Additionally, if any of the IRQs allocated via the previous
1653  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1654  * will be freed back to the system automatically.
1655  *
1656  * For example, suppose a driver has a MSI-X table with 6 messages and
1657  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1658  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1659  * C.  After the call to pci_alloc_msix(), the device will be setup to
1660  * have an MSI-X table of ABC--- (where - means no vector assigned).
1661  * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1662  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1663  * be freed back to the system.  This device will also have valid
1664  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1665  *
1666  * In any case, the SYS_RES_IRQ rid X will always map to the message
1667  * at MSI-X table index X - 1 and will only be valid if a vector is
1668  * assigned to that table entry.
1669  */
1670 int
1671 pci_remap_msix_method(device_t dev, device_t child, int count,
1672     const u_int *vectors)
1673 {
1674 	struct pci_devinfo *dinfo = device_get_ivars(child);
1675 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1676 	struct resource_list_entry *rle;
1677 	int i, irq, j, *used;
1678 
1679 	/*
1680 	 * Have to have at least one message in the table but the
1681 	 * table can't be bigger than the actual MSI-X table in the
1682 	 * device.
1683 	 */
1684 	if (count == 0 || count > msix->msix_msgnum)
1685 		return (EINVAL);
1686 
1687 	/* Sanity check the vectors. */
1688 	for (i = 0; i < count; i++)
1689 		if (vectors[i] > msix->msix_alloc)
1690 			return (EINVAL);
1691 
1692 	/*
1693 	 * Make sure there aren't any holes in the vectors to be used.
1694 	 * It's a big pain to support it, and it doesn't really make
1695 	 * sense anyway.  Also, at least one vector must be used.
1696 	 */
1697 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1698 	    M_ZERO);
1699 	for (i = 0; i < count; i++)
1700 		if (vectors[i] != 0)
1701 			used[vectors[i] - 1] = 1;
1702 	for (i = 0; i < msix->msix_alloc - 1; i++)
1703 		if (used[i] == 0 && used[i + 1] == 1) {
1704 			free(used, M_DEVBUF);
1705 			return (EINVAL);
1706 		}
1707 	if (used[0] != 1) {
1708 		free(used, M_DEVBUF);
1709 		return (EINVAL);
1710 	}
1711 
1712 	/* Make sure none of the resources are allocated. */
1713 	for (i = 0; i < msix->msix_table_len; i++) {
1714 		if (msix->msix_table[i].mte_vector == 0)
1715 			continue;
1716 		if (msix->msix_table[i].mte_handlers > 0) {
1717 			free(used, M_DEVBUF);
1718 			return (EBUSY);
1719 		}
1720 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1721 		KASSERT(rle != NULL, ("missing resource"));
1722 		if (rle->res != NULL) {
1723 			free(used, M_DEVBUF);
1724 			return (EBUSY);
1725 		}
1726 	}
1727 
1728 	/* Free the existing resource list entries. */
1729 	for (i = 0; i < msix->msix_table_len; i++) {
1730 		if (msix->msix_table[i].mte_vector == 0)
1731 			continue;
1732 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1733 	}
1734 
1735 	/*
1736 	 * Build the new virtual table keeping track of which vectors are
1737 	 * used.
1738 	 */
1739 	free(msix->msix_table, M_DEVBUF);
1740 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1741 	    M_DEVBUF, M_WAITOK | M_ZERO);
1742 	for (i = 0; i < count; i++)
1743 		msix->msix_table[i].mte_vector = vectors[i];
1744 	msix->msix_table_len = count;
1745 
1746 	/* Free any unused IRQs and resize the vectors array if necessary. */
1747 	j = msix->msix_alloc - 1;
1748 	if (used[j] == 0) {
1749 		struct msix_vector *vec;
1750 
1751 		while (used[j] == 0) {
1752 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1753 			    msix->msix_vectors[j].mv_irq);
1754 			j--;
1755 		}
1756 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1757 		    M_WAITOK);
1758 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1759 		    (j + 1));
1760 		free(msix->msix_vectors, M_DEVBUF);
1761 		msix->msix_vectors = vec;
1762 		msix->msix_alloc = j + 1;
1763 	}
1764 	free(used, M_DEVBUF);
1765 
1766 	/* Map the IRQs onto the rids. */
1767 	for (i = 0; i < count; i++) {
1768 		if (vectors[i] == 0)
1769 			continue;
1770 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1771 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1772 		    irq, 1);
1773 	}
1774 
1775 	if (bootverbose) {
1776 		device_printf(child, "Remapped MSI-X IRQs as: ");
1777 		for (i = 0; i < count; i++) {
1778 			if (i != 0)
1779 				printf(", ");
1780 			if (vectors[i] == 0)
1781 				printf("---");
1782 			else
1783 				printf("%d",
1784 				    msix->msix_vectors[vectors[i]].mv_irq);
1785 		}
1786 		printf("\n");
1787 	}
1788 
1789 	return (0);
1790 }
1791 
1792 static int
1793 pci_release_msix(device_t dev, device_t child)
1794 {
1795 	struct pci_devinfo *dinfo = device_get_ivars(child);
1796 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1797 	struct resource_list_entry *rle;
1798 	int i;
1799 
1800 	/* Do we have any messages to release? */
1801 	if (msix->msix_alloc == 0)
1802 		return (ENODEV);
1803 
1804 	/* Make sure none of the resources are allocated. */
1805 	for (i = 0; i < msix->msix_table_len; i++) {
1806 		if (msix->msix_table[i].mte_vector == 0)
1807 			continue;
1808 		if (msix->msix_table[i].mte_handlers > 0)
1809 			return (EBUSY);
1810 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1811 		KASSERT(rle != NULL, ("missing resource"));
1812 		if (rle->res != NULL)
1813 			return (EBUSY);
1814 	}
1815 
1816 	/* Update control register to disable MSI-X. */
1817 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1818 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1819 	    msix->msix_ctrl, 2);
1820 
1821 	/* Free the resource list entries. */
1822 	for (i = 0; i < msix->msix_table_len; i++) {
1823 		if (msix->msix_table[i].mte_vector == 0)
1824 			continue;
1825 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1826 	}
1827 	free(msix->msix_table, M_DEVBUF);
1828 	msix->msix_table_len = 0;
1829 
1830 	/* Release the IRQs. */
1831 	for (i = 0; i < msix->msix_alloc; i++)
1832 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1833 		    msix->msix_vectors[i].mv_irq);
1834 	free(msix->msix_vectors, M_DEVBUF);
1835 	msix->msix_alloc = 0;
1836 	return (0);
1837 }
1838 
1839 /*
1840  * Return the max supported MSI-X messages this device supports.
1841  * Basically, assuming the MD code can alloc messages, this function
1842  * should return the maximum value that pci_alloc_msix() can return.
1843  * Thus, it is subject to the tunables, etc.
1844  */
1845 int
1846 pci_msix_count_method(device_t dev, device_t child)
1847 {
1848 	struct pci_devinfo *dinfo = device_get_ivars(child);
1849 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1850 
1851 	if (pci_do_msix && msix->msix_location != 0)
1852 		return (msix->msix_msgnum);
1853 	return (0);
1854 }
1855 
1856 int
1857 pci_msix_pba_bar_method(device_t dev, device_t child)
1858 {
1859 	struct pci_devinfo *dinfo = device_get_ivars(child);
1860 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1861 
1862 	if (pci_do_msix && msix->msix_location != 0)
1863 		return (msix->msix_pba_bar);
1864 	return (-1);
1865 }
1866 
1867 int
1868 pci_msix_table_bar_method(device_t dev, device_t child)
1869 {
1870 	struct pci_devinfo *dinfo = device_get_ivars(child);
1871 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1872 
1873 	if (pci_do_msix && msix->msix_location != 0)
1874 		return (msix->msix_table_bar);
1875 	return (-1);
1876 }
1877 
1878 /*
1879  * HyperTransport MSI mapping control
1880  */
1881 void
1882 pci_ht_map_msi(device_t dev, uint64_t addr)
1883 {
1884 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1885 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1886 
1887 	if (!ht->ht_msimap)
1888 		return;
1889 
1890 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1891 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1892 		/* Enable MSI -> HT mapping. */
1893 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1894 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1895 		    ht->ht_msictrl, 2);
1896 	}
1897 
1898 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1899 		/* Disable MSI -> HT mapping. */
1900 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1901 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1902 		    ht->ht_msictrl, 2);
1903 	}
1904 }
1905 
1906 int
1907 pci_get_max_read_req(device_t dev)
1908 {
1909 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1910 	int cap;
1911 	uint16_t val;
1912 
1913 	cap = dinfo->cfg.pcie.pcie_location;
1914 	if (cap == 0)
1915 		return (0);
1916 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1917 	val &= PCIEM_CTL_MAX_READ_REQUEST;
1918 	val >>= 12;
1919 	return (1 << (val + 7));
1920 }
1921 
1922 int
1923 pci_set_max_read_req(device_t dev, int size)
1924 {
1925 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1926 	int cap;
1927 	uint16_t val;
1928 
1929 	cap = dinfo->cfg.pcie.pcie_location;
1930 	if (cap == 0)
1931 		return (0);
1932 	if (size < 128)
1933 		size = 128;
1934 	if (size > 4096)
1935 		size = 4096;
1936 	size = (1 << (fls(size) - 1));
1937 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1938 	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
1939 	val |= (fls(size) - 8) << 12;
1940 	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
1941 	return (size);
1942 }
1943 
1944 uint32_t
1945 pcie_read_config(device_t dev, int reg, int width)
1946 {
1947 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1948 	int cap;
1949 
1950 	cap = dinfo->cfg.pcie.pcie_location;
1951 	if (cap == 0) {
1952 		if (width == 2)
1953 			return (0xffff);
1954 		return (0xffffffff);
1955 	}
1956 
1957 	return (pci_read_config(dev, cap + reg, width));
1958 }
1959 
1960 void
1961 pcie_write_config(device_t dev, int reg, uint32_t value, int width)
1962 {
1963 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1964 	int cap;
1965 
1966 	cap = dinfo->cfg.pcie.pcie_location;
1967 	if (cap == 0)
1968 		return;
1969 	pci_write_config(dev, cap + reg, value, width);
1970 }
1971 
1972 /*
1973  * Adjusts a PCI-e capability register by clearing the bits in mask
1974  * and setting the bits in (value & mask).  Bits not set in mask are
1975  * not adjusted.
1976  *
1977  * Returns the old value on success or all ones on failure.
1978  */
1979 uint32_t
1980 pcie_adjust_config(device_t dev, int reg, uint32_t mask, uint32_t value,
1981     int width)
1982 {
1983 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1984 	uint32_t old, new;
1985 	int cap;
1986 
1987 	cap = dinfo->cfg.pcie.pcie_location;
1988 	if (cap == 0) {
1989 		if (width == 2)
1990 			return (0xffff);
1991 		return (0xffffffff);
1992 	}
1993 
1994 	old = pci_read_config(dev, cap + reg, width);
1995 	new = old & ~mask;
1996 	new |= (value & mask);
1997 	pci_write_config(dev, cap + reg, new, width);
1998 	return (old);
1999 }
2000 
2001 /*
2002  * Support for MSI message signalled interrupts.
2003  */
2004 void
2005 pci_enable_msi_method(device_t dev, device_t child, uint64_t address,
2006     uint16_t data)
2007 {
2008 	struct pci_devinfo *dinfo = device_get_ivars(child);
2009 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2010 
2011 	/* Write data and address values. */
2012 	pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR,
2013 	    address & 0xffffffff, 4);
2014 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2015 		pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR_HIGH,
2016 		    address >> 32, 4);
2017 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA_64BIT,
2018 		    data, 2);
2019 	} else
2020 		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA, data,
2021 		    2);
2022 
2023 	/* Enable MSI in the control register. */
2024 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
2025 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2026 	    msi->msi_ctrl, 2);
2027 
2028 	/* Enable MSI -> HT mapping. */
2029 	pci_ht_map_msi(child, address);
2030 }
2031 
2032 void
2033 pci_disable_msi_method(device_t dev, device_t child)
2034 {
2035 	struct pci_devinfo *dinfo = device_get_ivars(child);
2036 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2037 
2038 	/* Disable MSI -> HT mapping. */
2039 	pci_ht_map_msi(child, 0);
2040 
2041 	/* Disable MSI in the control register. */
2042 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
2043 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2044 	    msi->msi_ctrl, 2);
2045 }
2046 
2047 /*
2048  * Restore MSI registers during resume.  If MSI is enabled then
2049  * restore the data and address registers in addition to the control
2050  * register.
2051  */
2052 static void
2053 pci_resume_msi(device_t dev)
2054 {
2055 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2056 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2057 	uint64_t address;
2058 	uint16_t data;
2059 
2060 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
2061 		address = msi->msi_addr;
2062 		data = msi->msi_data;
2063 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
2064 		    address & 0xffffffff, 4);
2065 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2066 			pci_write_config(dev, msi->msi_location +
2067 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
2068 			pci_write_config(dev, msi->msi_location +
2069 			    PCIR_MSI_DATA_64BIT, data, 2);
2070 		} else
2071 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
2072 			    data, 2);
2073 	}
2074 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
2075 	    2);
2076 }
2077 
2078 static int
2079 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
2080 {
2081 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2082 	pcicfgregs *cfg = &dinfo->cfg;
2083 	struct resource_list_entry *rle;
2084 	struct msix_table_entry *mte;
2085 	struct msix_vector *mv;
2086 	uint64_t addr;
2087 	uint32_t data;
2088 	int error, i, j;
2089 
2090 	/*
2091 	 * Handle MSI first.  We try to find this IRQ among our list
2092 	 * of MSI IRQs.  If we find it, we request updated address and
2093 	 * data registers and apply the results.
2094 	 */
2095 	if (cfg->msi.msi_alloc > 0) {
2096 
2097 		/* If we don't have any active handlers, nothing to do. */
2098 		if (cfg->msi.msi_handlers == 0)
2099 			return (0);
2100 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
2101 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
2102 			    i + 1);
2103 			if (rle->start == irq) {
2104 				error = PCIB_MAP_MSI(device_get_parent(bus),
2105 				    dev, irq, &addr, &data);
2106 				if (error)
2107 					return (error);
2108 				pci_disable_msi(dev);
2109 				dinfo->cfg.msi.msi_addr = addr;
2110 				dinfo->cfg.msi.msi_data = data;
2111 				pci_enable_msi(dev, addr, data);
2112 				return (0);
2113 			}
2114 		}
2115 		return (ENOENT);
2116 	}
2117 
2118 	/*
2119 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
2120 	 * we request the updated mapping info.  If that works, we go
2121 	 * through all the slots that use this IRQ and update them.
2122 	 */
2123 	if (cfg->msix.msix_alloc > 0) {
2124 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
2125 			mv = &cfg->msix.msix_vectors[i];
2126 			if (mv->mv_irq == irq) {
2127 				error = PCIB_MAP_MSI(device_get_parent(bus),
2128 				    dev, irq, &addr, &data);
2129 				if (error)
2130 					return (error);
2131 				mv->mv_address = addr;
2132 				mv->mv_data = data;
2133 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
2134 					mte = &cfg->msix.msix_table[j];
2135 					if (mte->mte_vector != i + 1)
2136 						continue;
2137 					if (mte->mte_handlers == 0)
2138 						continue;
2139 					pci_mask_msix(dev, j);
2140 					pci_enable_msix(dev, j, addr, data);
2141 					pci_unmask_msix(dev, j);
2142 				}
2143 			}
2144 		}
2145 		return (ENOENT);
2146 	}
2147 
2148 	return (ENOENT);
2149 }
2150 
2151 /*
2152  * Returns true if the specified device is blacklisted because MSI
2153  * doesn't work.
2154  */
2155 int
2156 pci_msi_device_blacklisted(device_t dev)
2157 {
2158 
2159 	if (!pci_honor_msi_blacklist)
2160 		return (0);
2161 
2162 	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2163 }
2164 
2165 /*
2166  * Determine if MSI is blacklisted globally on this system.  Currently,
2167  * we just check for blacklisted chipsets as represented by the
2168  * host-PCI bridge at device 0:0:0.  In the future, it may become
2169  * necessary to check other system attributes, such as the kenv values
2170  * that give the motherboard manufacturer and model number.
2171  */
2172 static int
2173 pci_msi_blacklisted(void)
2174 {
2175 	device_t dev;
2176 
2177 	if (!pci_honor_msi_blacklist)
2178 		return (0);
2179 
2180 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2181 	if (!(pcie_chipset || pcix_chipset)) {
2182 		if (vm_guest != VM_GUEST_NO) {
2183 			/*
2184 			 * Whitelist older chipsets in virtual
2185 			 * machines known to support MSI.
2186 			 */
2187 			dev = pci_find_bsf(0, 0, 0);
2188 			if (dev != NULL)
2189 				return (!pci_has_quirk(pci_get_devid(dev),
2190 					PCI_QUIRK_ENABLE_MSI_VM));
2191 		}
2192 		return (1);
2193 	}
2194 
2195 	dev = pci_find_bsf(0, 0, 0);
2196 	if (dev != NULL)
2197 		return (pci_msi_device_blacklisted(dev));
2198 	return (0);
2199 }
2200 
2201 /*
2202  * Returns true if the specified device is blacklisted because MSI-X
2203  * doesn't work.  Note that this assumes that if MSI doesn't work,
2204  * MSI-X doesn't either.
2205  */
2206 int
2207 pci_msix_device_blacklisted(device_t dev)
2208 {
2209 
2210 	if (!pci_honor_msi_blacklist)
2211 		return (0);
2212 
2213 	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2214 		return (1);
2215 
2216 	return (pci_msi_device_blacklisted(dev));
2217 }
2218 
2219 /*
2220  * Determine if MSI-X is blacklisted globally on this system.  If MSI
2221  * is blacklisted, assume that MSI-X is as well.  Check for additional
2222  * chipsets where MSI works but MSI-X does not.
2223  */
2224 static int
2225 pci_msix_blacklisted(void)
2226 {
2227 	device_t dev;
2228 
2229 	if (!pci_honor_msi_blacklist)
2230 		return (0);
2231 
2232 	dev = pci_find_bsf(0, 0, 0);
2233 	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2234 	    PCI_QUIRK_DISABLE_MSIX))
2235 		return (1);
2236 
2237 	return (pci_msi_blacklisted());
2238 }
2239 
2240 /*
2241  * Attempt to allocate *count MSI messages.  The actual number allocated is
2242  * returned in *count.  After this function returns, each message will be
2243  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2244  */
2245 int
2246 pci_alloc_msi_method(device_t dev, device_t child, int *count)
2247 {
2248 	struct pci_devinfo *dinfo = device_get_ivars(child);
2249 	pcicfgregs *cfg = &dinfo->cfg;
2250 	struct resource_list_entry *rle;
2251 	int actual, error, i, irqs[32];
2252 	uint16_t ctrl;
2253 
2254 	/* Don't let count == 0 get us into trouble. */
2255 	if (*count == 0)
2256 		return (EINVAL);
2257 
2258 	/* If rid 0 is allocated, then fail. */
2259 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2260 	if (rle != NULL && rle->res != NULL)
2261 		return (ENXIO);
2262 
2263 	/* Already have allocated messages? */
2264 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2265 		return (ENXIO);
2266 
2267 	/* If MSI is blacklisted for this system, fail. */
2268 	if (pci_msi_blacklisted())
2269 		return (ENXIO);
2270 
2271 	/* MSI capability present? */
2272 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2273 		return (ENODEV);
2274 
2275 	if (bootverbose)
2276 		device_printf(child,
2277 		    "attempting to allocate %d MSI vectors (%d supported)\n",
2278 		    *count, cfg->msi.msi_msgnum);
2279 
2280 	/* Don't ask for more than the device supports. */
2281 	actual = min(*count, cfg->msi.msi_msgnum);
2282 
2283 	/* Don't ask for more than 32 messages. */
2284 	actual = min(actual, 32);
2285 
2286 	/* MSI requires power of 2 number of messages. */
2287 	if (!powerof2(actual))
2288 		return (EINVAL);
2289 
2290 	for (;;) {
2291 		/* Try to allocate N messages. */
2292 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2293 		    actual, irqs);
2294 		if (error == 0)
2295 			break;
2296 		if (actual == 1)
2297 			return (error);
2298 
2299 		/* Try N / 2. */
2300 		actual >>= 1;
2301 	}
2302 
2303 	/*
2304 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2305 	 * resources in the irqs[] array, so add new resources
2306 	 * starting at rid 1.
2307 	 */
2308 	for (i = 0; i < actual; i++)
2309 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2310 		    irqs[i], irqs[i], 1);
2311 
2312 	if (bootverbose) {
2313 		if (actual == 1)
2314 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2315 		else {
2316 			int run;
2317 
2318 			/*
2319 			 * Be fancy and try to print contiguous runs
2320 			 * of IRQ values as ranges.  'run' is true if
2321 			 * we are in a range.
2322 			 */
2323 			device_printf(child, "using IRQs %d", irqs[0]);
2324 			run = 0;
2325 			for (i = 1; i < actual; i++) {
2326 
2327 				/* Still in a run? */
2328 				if (irqs[i] == irqs[i - 1] + 1) {
2329 					run = 1;
2330 					continue;
2331 				}
2332 
2333 				/* Finish previous range. */
2334 				if (run) {
2335 					printf("-%d", irqs[i - 1]);
2336 					run = 0;
2337 				}
2338 
2339 				/* Start new range. */
2340 				printf(",%d", irqs[i]);
2341 			}
2342 
2343 			/* Unfinished range? */
2344 			if (run)
2345 				printf("-%d", irqs[actual - 1]);
2346 			printf(" for MSI\n");
2347 		}
2348 	}
2349 
2350 	/* Update control register with actual count. */
2351 	ctrl = cfg->msi.msi_ctrl;
2352 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2353 	ctrl |= (ffs(actual) - 1) << 4;
2354 	cfg->msi.msi_ctrl = ctrl;
2355 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2356 
2357 	/* Update counts of alloc'd messages. */
2358 	cfg->msi.msi_alloc = actual;
2359 	cfg->msi.msi_handlers = 0;
2360 	*count = actual;
2361 	return (0);
2362 }
2363 
2364 /* Release the MSI messages associated with this device. */
2365 int
2366 pci_release_msi_method(device_t dev, device_t child)
2367 {
2368 	struct pci_devinfo *dinfo = device_get_ivars(child);
2369 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2370 	struct resource_list_entry *rle;
2371 	int error, i, irqs[32];
2372 
2373 	/* Try MSI-X first. */
2374 	error = pci_release_msix(dev, child);
2375 	if (error != ENODEV)
2376 		return (error);
2377 
2378 	/* Do we have any messages to release? */
2379 	if (msi->msi_alloc == 0)
2380 		return (ENODEV);
2381 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2382 
2383 	/* Make sure none of the resources are allocated. */
2384 	if (msi->msi_handlers > 0)
2385 		return (EBUSY);
2386 	for (i = 0; i < msi->msi_alloc; i++) {
2387 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2388 		KASSERT(rle != NULL, ("missing MSI resource"));
2389 		if (rle->res != NULL)
2390 			return (EBUSY);
2391 		irqs[i] = rle->start;
2392 	}
2393 
2394 	/* Update control register with 0 count. */
2395 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2396 	    ("%s: MSI still enabled", __func__));
2397 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2398 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2399 	    msi->msi_ctrl, 2);
2400 
2401 	/* Release the messages. */
2402 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2403 	for (i = 0; i < msi->msi_alloc; i++)
2404 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2405 
2406 	/* Update alloc count. */
2407 	msi->msi_alloc = 0;
2408 	msi->msi_addr = 0;
2409 	msi->msi_data = 0;
2410 	return (0);
2411 }
2412 
2413 /*
2414  * Return the max supported MSI messages this device supports.
2415  * Basically, assuming the MD code can alloc messages, this function
2416  * should return the maximum value that pci_alloc_msi() can return.
2417  * Thus, it is subject to the tunables, etc.
2418  */
2419 int
2420 pci_msi_count_method(device_t dev, device_t child)
2421 {
2422 	struct pci_devinfo *dinfo = device_get_ivars(child);
2423 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2424 
2425 	if (pci_do_msi && msi->msi_location != 0)
2426 		return (msi->msi_msgnum);
2427 	return (0);
2428 }
2429 
2430 /* free pcicfgregs structure and all depending data structures */
2431 
2432 int
2433 pci_freecfg(struct pci_devinfo *dinfo)
2434 {
2435 	struct devlist *devlist_head;
2436 	struct pci_map *pm, *next;
2437 	int i;
2438 
2439 	devlist_head = &pci_devq;
2440 
2441 	if (dinfo->cfg.vpd.vpd_reg) {
2442 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2443 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2444 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2445 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2446 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2447 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2448 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2449 	}
2450 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2451 		free(pm, M_DEVBUF);
2452 	}
2453 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2454 	free(dinfo, M_DEVBUF);
2455 
2456 	/* increment the generation count */
2457 	pci_generation++;
2458 
2459 	/* we're losing one device */
2460 	pci_numdevs--;
2461 	return (0);
2462 }
2463 
2464 /*
2465  * PCI power manangement
2466  */
2467 int
2468 pci_set_powerstate_method(device_t dev, device_t child, int state)
2469 {
2470 	struct pci_devinfo *dinfo = device_get_ivars(child);
2471 	pcicfgregs *cfg = &dinfo->cfg;
2472 	uint16_t status;
2473 	int oldstate, highest, delay;
2474 
2475 	if (cfg->pp.pp_cap == 0)
2476 		return (EOPNOTSUPP);
2477 
2478 	/*
2479 	 * Optimize a no state change request away.  While it would be OK to
2480 	 * write to the hardware in theory, some devices have shown odd
2481 	 * behavior when going from D3 -> D3.
2482 	 */
2483 	oldstate = pci_get_powerstate(child);
2484 	if (oldstate == state)
2485 		return (0);
2486 
2487 	/*
2488 	 * The PCI power management specification states that after a state
2489 	 * transition between PCI power states, system software must
2490 	 * guarantee a minimal delay before the function accesses the device.
2491 	 * Compute the worst case delay that we need to guarantee before we
2492 	 * access the device.  Many devices will be responsive much more
2493 	 * quickly than this delay, but there are some that don't respond
2494 	 * instantly to state changes.  Transitions to/from D3 state require
2495 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2496 	 * is done below with DELAY rather than a sleeper function because
2497 	 * this function can be called from contexts where we cannot sleep.
2498 	 */
2499 	highest = (oldstate > state) ? oldstate : state;
2500 	if (highest == PCI_POWERSTATE_D3)
2501 	    delay = 10000;
2502 	else if (highest == PCI_POWERSTATE_D2)
2503 	    delay = 200;
2504 	else
2505 	    delay = 0;
2506 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2507 	    & ~PCIM_PSTAT_DMASK;
2508 	switch (state) {
2509 	case PCI_POWERSTATE_D0:
2510 		status |= PCIM_PSTAT_D0;
2511 		break;
2512 	case PCI_POWERSTATE_D1:
2513 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2514 			return (EOPNOTSUPP);
2515 		status |= PCIM_PSTAT_D1;
2516 		break;
2517 	case PCI_POWERSTATE_D2:
2518 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2519 			return (EOPNOTSUPP);
2520 		status |= PCIM_PSTAT_D2;
2521 		break;
2522 	case PCI_POWERSTATE_D3:
2523 		status |= PCIM_PSTAT_D3;
2524 		break;
2525 	default:
2526 		return (EINVAL);
2527 	}
2528 
2529 	if (bootverbose)
2530 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2531 		    state);
2532 
2533 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2534 	if (delay)
2535 		DELAY(delay);
2536 	return (0);
2537 }
2538 
2539 int
2540 pci_get_powerstate_method(device_t dev, device_t child)
2541 {
2542 	struct pci_devinfo *dinfo = device_get_ivars(child);
2543 	pcicfgregs *cfg = &dinfo->cfg;
2544 	uint16_t status;
2545 	int result;
2546 
2547 	if (cfg->pp.pp_cap != 0) {
2548 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2549 		switch (status & PCIM_PSTAT_DMASK) {
2550 		case PCIM_PSTAT_D0:
2551 			result = PCI_POWERSTATE_D0;
2552 			break;
2553 		case PCIM_PSTAT_D1:
2554 			result = PCI_POWERSTATE_D1;
2555 			break;
2556 		case PCIM_PSTAT_D2:
2557 			result = PCI_POWERSTATE_D2;
2558 			break;
2559 		case PCIM_PSTAT_D3:
2560 			result = PCI_POWERSTATE_D3;
2561 			break;
2562 		default:
2563 			result = PCI_POWERSTATE_UNKNOWN;
2564 			break;
2565 		}
2566 	} else {
2567 		/* No support, device is always at D0 */
2568 		result = PCI_POWERSTATE_D0;
2569 	}
2570 	return (result);
2571 }
2572 
2573 /*
2574  * Some convenience functions for PCI device drivers.
2575  */
2576 
2577 static __inline void
2578 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2579 {
2580 	uint16_t	command;
2581 
2582 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2583 	command |= bit;
2584 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2585 }
2586 
2587 static __inline void
2588 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2589 {
2590 	uint16_t	command;
2591 
2592 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2593 	command &= ~bit;
2594 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2595 }
2596 
2597 int
2598 pci_enable_busmaster_method(device_t dev, device_t child)
2599 {
2600 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2601 	return (0);
2602 }
2603 
2604 int
2605 pci_disable_busmaster_method(device_t dev, device_t child)
2606 {
2607 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2608 	return (0);
2609 }
2610 
2611 int
2612 pci_enable_io_method(device_t dev, device_t child, int space)
2613 {
2614 	uint16_t bit;
2615 
2616 	switch(space) {
2617 	case SYS_RES_IOPORT:
2618 		bit = PCIM_CMD_PORTEN;
2619 		break;
2620 	case SYS_RES_MEMORY:
2621 		bit = PCIM_CMD_MEMEN;
2622 		break;
2623 	default:
2624 		return (EINVAL);
2625 	}
2626 	pci_set_command_bit(dev, child, bit);
2627 	return (0);
2628 }
2629 
2630 int
2631 pci_disable_io_method(device_t dev, device_t child, int space)
2632 {
2633 	uint16_t bit;
2634 
2635 	switch(space) {
2636 	case SYS_RES_IOPORT:
2637 		bit = PCIM_CMD_PORTEN;
2638 		break;
2639 	case SYS_RES_MEMORY:
2640 		bit = PCIM_CMD_MEMEN;
2641 		break;
2642 	default:
2643 		return (EINVAL);
2644 	}
2645 	pci_clear_command_bit(dev, child, bit);
2646 	return (0);
2647 }
2648 
2649 /*
2650  * New style pci driver.  Parent device is either a pci-host-bridge or a
2651  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2652  */
2653 
2654 void
2655 pci_print_verbose(struct pci_devinfo *dinfo)
2656 {
2657 
2658 	if (bootverbose) {
2659 		pcicfgregs *cfg = &dinfo->cfg;
2660 
2661 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2662 		    cfg->vendor, cfg->device, cfg->revid);
2663 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2664 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2665 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2666 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2667 		    cfg->mfdev);
2668 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2669 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2670 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2671 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2672 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2673 		if (cfg->intpin > 0)
2674 			printf("\tintpin=%c, irq=%d\n",
2675 			    cfg->intpin +'a' -1, cfg->intline);
2676 		if (cfg->pp.pp_cap) {
2677 			uint16_t status;
2678 
2679 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2680 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2681 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2682 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2683 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2684 			    status & PCIM_PSTAT_DMASK);
2685 		}
2686 		if (cfg->msi.msi_location) {
2687 			int ctrl;
2688 
2689 			ctrl = cfg->msi.msi_ctrl;
2690 			printf("\tMSI supports %d message%s%s%s\n",
2691 			    cfg->msi.msi_msgnum,
2692 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2693 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2694 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2695 		}
2696 		if (cfg->msix.msix_location) {
2697 			printf("\tMSI-X supports %d message%s ",
2698 			    cfg->msix.msix_msgnum,
2699 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2700 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2701 				printf("in map 0x%x\n",
2702 				    cfg->msix.msix_table_bar);
2703 			else
2704 				printf("in maps 0x%x and 0x%x\n",
2705 				    cfg->msix.msix_table_bar,
2706 				    cfg->msix.msix_pba_bar);
2707 		}
2708 	}
2709 }
2710 
2711 static int
2712 pci_porten(device_t dev)
2713 {
2714 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2715 }
2716 
2717 static int
2718 pci_memen(device_t dev)
2719 {
2720 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2721 }
2722 
2723 void
2724 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp,
2725     int *bar64)
2726 {
2727 	struct pci_devinfo *dinfo;
2728 	pci_addr_t map, testval;
2729 	int ln2range;
2730 	uint16_t cmd;
2731 
2732 	/*
2733 	 * The device ROM BAR is special.  It is always a 32-bit
2734 	 * memory BAR.  Bit 0 is special and should not be set when
2735 	 * sizing the BAR.
2736 	 */
2737 	dinfo = device_get_ivars(dev);
2738 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2739 		map = pci_read_config(dev, reg, 4);
2740 		pci_write_config(dev, reg, 0xfffffffe, 4);
2741 		testval = pci_read_config(dev, reg, 4);
2742 		pci_write_config(dev, reg, map, 4);
2743 		*mapp = map;
2744 		*testvalp = testval;
2745 		if (bar64 != NULL)
2746 			*bar64 = 0;
2747 		return;
2748 	}
2749 
2750 	map = pci_read_config(dev, reg, 4);
2751 	ln2range = pci_maprange(map);
2752 	if (ln2range == 64)
2753 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2754 
2755 	/*
2756 	 * Disable decoding via the command register before
2757 	 * determining the BAR's length since we will be placing it in
2758 	 * a weird state.
2759 	 */
2760 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2761 	pci_write_config(dev, PCIR_COMMAND,
2762 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2763 
2764 	/*
2765 	 * Determine the BAR's length by writing all 1's.  The bottom
2766 	 * log_2(size) bits of the BAR will stick as 0 when we read
2767 	 * the value back.
2768 	 */
2769 	pci_write_config(dev, reg, 0xffffffff, 4);
2770 	testval = pci_read_config(dev, reg, 4);
2771 	if (ln2range == 64) {
2772 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2773 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2774 	}
2775 
2776 	/*
2777 	 * Restore the original value of the BAR.  We may have reprogrammed
2778 	 * the BAR of the low-level console device and when booting verbose,
2779 	 * we need the console device addressable.
2780 	 */
2781 	pci_write_config(dev, reg, map, 4);
2782 	if (ln2range == 64)
2783 		pci_write_config(dev, reg + 4, map >> 32, 4);
2784 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2785 
2786 	*mapp = map;
2787 	*testvalp = testval;
2788 	if (bar64 != NULL)
2789 		*bar64 = (ln2range == 64);
2790 }
2791 
2792 static void
2793 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2794 {
2795 	struct pci_devinfo *dinfo;
2796 	int ln2range;
2797 
2798 	/* The device ROM BAR is always a 32-bit memory BAR. */
2799 	dinfo = device_get_ivars(dev);
2800 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2801 		ln2range = 32;
2802 	else
2803 		ln2range = pci_maprange(pm->pm_value);
2804 	pci_write_config(dev, pm->pm_reg, base, 4);
2805 	if (ln2range == 64)
2806 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2807 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2808 	if (ln2range == 64)
2809 		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2810 		    pm->pm_reg + 4, 4) << 32;
2811 }
2812 
2813 struct pci_map *
2814 pci_find_bar(device_t dev, int reg)
2815 {
2816 	struct pci_devinfo *dinfo;
2817 	struct pci_map *pm;
2818 
2819 	dinfo = device_get_ivars(dev);
2820 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2821 		if (pm->pm_reg == reg)
2822 			return (pm);
2823 	}
2824 	return (NULL);
2825 }
2826 
2827 int
2828 pci_bar_enabled(device_t dev, struct pci_map *pm)
2829 {
2830 	struct pci_devinfo *dinfo;
2831 	uint16_t cmd;
2832 
2833 	dinfo = device_get_ivars(dev);
2834 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2835 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2836 		return (0);
2837 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2838 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2839 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2840 	else
2841 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2842 }
2843 
2844 struct pci_map *
2845 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2846 {
2847 	struct pci_devinfo *dinfo;
2848 	struct pci_map *pm, *prev;
2849 
2850 	dinfo = device_get_ivars(dev);
2851 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2852 	pm->pm_reg = reg;
2853 	pm->pm_value = value;
2854 	pm->pm_size = size;
2855 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2856 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2857 		    reg));
2858 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2859 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2860 			break;
2861 	}
2862 	if (prev != NULL)
2863 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2864 	else
2865 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2866 	return (pm);
2867 }
2868 
2869 static void
2870 pci_restore_bars(device_t dev)
2871 {
2872 	struct pci_devinfo *dinfo;
2873 	struct pci_map *pm;
2874 	int ln2range;
2875 
2876 	dinfo = device_get_ivars(dev);
2877 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2878 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2879 			ln2range = 32;
2880 		else
2881 			ln2range = pci_maprange(pm->pm_value);
2882 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2883 		if (ln2range == 64)
2884 			pci_write_config(dev, pm->pm_reg + 4,
2885 			    pm->pm_value >> 32, 4);
2886 	}
2887 }
2888 
2889 /*
2890  * Add a resource based on a pci map register. Return 1 if the map
2891  * register is a 32bit map register or 2 if it is a 64bit register.
2892  */
2893 static int
2894 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2895     int force, int prefetch)
2896 {
2897 	struct pci_map *pm;
2898 	pci_addr_t base, map, testval;
2899 	pci_addr_t start, end, count;
2900 	int barlen, basezero, flags, maprange, mapsize, type;
2901 	uint16_t cmd;
2902 	struct resource *res;
2903 
2904 	/*
2905 	 * The BAR may already exist if the device is a CardBus card
2906 	 * whose CIS is stored in this BAR.
2907 	 */
2908 	pm = pci_find_bar(dev, reg);
2909 	if (pm != NULL) {
2910 		maprange = pci_maprange(pm->pm_value);
2911 		barlen = maprange == 64 ? 2 : 1;
2912 		return (barlen);
2913 	}
2914 
2915 	pci_read_bar(dev, reg, &map, &testval, NULL);
2916 	if (PCI_BAR_MEM(map)) {
2917 		type = SYS_RES_MEMORY;
2918 		if (map & PCIM_BAR_MEM_PREFETCH)
2919 			prefetch = 1;
2920 	} else
2921 		type = SYS_RES_IOPORT;
2922 	mapsize = pci_mapsize(testval);
2923 	base = pci_mapbase(map);
2924 #ifdef __PCI_BAR_ZERO_VALID
2925 	basezero = 0;
2926 #else
2927 	basezero = base == 0;
2928 #endif
2929 	maprange = pci_maprange(map);
2930 	barlen = maprange == 64 ? 2 : 1;
2931 
2932 	/*
2933 	 * For I/O registers, if bottom bit is set, and the next bit up
2934 	 * isn't clear, we know we have a BAR that doesn't conform to the
2935 	 * spec, so ignore it.  Also, sanity check the size of the data
2936 	 * areas to the type of memory involved.  Memory must be at least
2937 	 * 16 bytes in size, while I/O ranges must be at least 4.
2938 	 */
2939 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2940 		return (barlen);
2941 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2942 	    (type == SYS_RES_IOPORT && mapsize < 2))
2943 		return (barlen);
2944 
2945 	/* Save a record of this BAR. */
2946 	pm = pci_add_bar(dev, reg, map, mapsize);
2947 	if (bootverbose) {
2948 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2949 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2950 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2951 			printf(", port disabled\n");
2952 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2953 			printf(", memory disabled\n");
2954 		else
2955 			printf(", enabled\n");
2956 	}
2957 
2958 	/*
2959 	 * If base is 0, then we have problems if this architecture does
2960 	 * not allow that.  It is best to ignore such entries for the
2961 	 * moment.  These will be allocated later if the driver specifically
2962 	 * requests them.  However, some removable busses look better when
2963 	 * all resources are allocated, so allow '0' to be overriden.
2964 	 *
2965 	 * Similarly treat maps whose values is the same as the test value
2966 	 * read back.  These maps have had all f's written to them by the
2967 	 * BIOS in an attempt to disable the resources.
2968 	 */
2969 	if (!force && (basezero || map == testval))
2970 		return (barlen);
2971 	if ((u_long)base != base) {
2972 		device_printf(bus,
2973 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2974 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2975 		    pci_get_function(dev), reg);
2976 		return (barlen);
2977 	}
2978 
2979 	/*
2980 	 * This code theoretically does the right thing, but has
2981 	 * undesirable side effects in some cases where peripherals
2982 	 * respond oddly to having these bits enabled.  Let the user
2983 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2984 	 * default).
2985 	 */
2986 	if (pci_enable_io_modes) {
2987 		/* Turn on resources that have been left off by a lazy BIOS */
2988 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2989 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2990 			cmd |= PCIM_CMD_PORTEN;
2991 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2992 		}
2993 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2994 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2995 			cmd |= PCIM_CMD_MEMEN;
2996 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2997 		}
2998 	} else {
2999 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
3000 			return (barlen);
3001 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
3002 			return (barlen);
3003 	}
3004 
3005 	count = (pci_addr_t)1 << mapsize;
3006 	flags = RF_ALIGNMENT_LOG2(mapsize);
3007 	if (prefetch)
3008 		flags |= RF_PREFETCHABLE;
3009 	if (basezero || base == pci_mapbase(testval) || pci_clear_bars) {
3010 		start = 0;	/* Let the parent decide. */
3011 		end = ~0ul;
3012 	} else {
3013 		start = base;
3014 		end = base + count - 1;
3015 	}
3016 	resource_list_add(rl, type, reg, start, end, count);
3017 
3018 	/*
3019 	 * Try to allocate the resource for this BAR from our parent
3020 	 * so that this resource range is already reserved.  The
3021 	 * driver for this device will later inherit this resource in
3022 	 * pci_alloc_resource().
3023 	 */
3024 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
3025 	    flags);
3026 	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0ul)) {
3027 		/*
3028 		 * If the allocation fails, try to allocate a resource for
3029 		 * this BAR using any available range.  The firmware felt
3030 		 * it was important enough to assign a resource, so don't
3031 		 * disable decoding if we can help it.
3032 		 */
3033 		resource_list_delete(rl, type, reg);
3034 		resource_list_add(rl, type, reg, 0, ~0ul, count);
3035 		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0ul,
3036 		    count, flags);
3037 	}
3038 	if (res == NULL) {
3039 		/*
3040 		 * If the allocation fails, delete the resource list entry
3041 		 * and disable decoding for this device.
3042 		 *
3043 		 * If the driver requests this resource in the future,
3044 		 * pci_reserve_map() will try to allocate a fresh
3045 		 * resource range.
3046 		 */
3047 		resource_list_delete(rl, type, reg);
3048 		pci_disable_io(dev, type);
3049 		if (bootverbose)
3050 			device_printf(bus,
3051 			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
3052 			    pci_get_domain(dev), pci_get_bus(dev),
3053 			    pci_get_slot(dev), pci_get_function(dev), reg);
3054 	} else {
3055 		start = rman_get_start(res);
3056 		pci_write_bar(dev, pm, start);
3057 	}
3058 	return (barlen);
3059 }
3060 
3061 /*
3062  * For ATA devices we need to decide early what addressing mode to use.
3063  * Legacy demands that the primary and secondary ATA ports sits on the
3064  * same addresses that old ISA hardware did. This dictates that we use
3065  * those addresses and ignore the BAR's if we cannot set PCI native
3066  * addressing mode.
3067  */
3068 static void
3069 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
3070     uint32_t prefetchmask)
3071 {
3072 	int rid, type, progif;
3073 #if 0
3074 	/* if this device supports PCI native addressing use it */
3075 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3076 	if ((progif & 0x8a) == 0x8a) {
3077 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
3078 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
3079 			printf("Trying ATA native PCI addressing mode\n");
3080 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
3081 		}
3082 	}
3083 #endif
3084 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3085 	type = SYS_RES_IOPORT;
3086 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
3087 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
3088 		    prefetchmask & (1 << 0));
3089 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
3090 		    prefetchmask & (1 << 1));
3091 	} else {
3092 		rid = PCIR_BAR(0);
3093 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
3094 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
3095 		    0x1f7, 8, 0);
3096 		rid = PCIR_BAR(1);
3097 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
3098 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
3099 		    0x3f6, 1, 0);
3100 	}
3101 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
3102 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
3103 		    prefetchmask & (1 << 2));
3104 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
3105 		    prefetchmask & (1 << 3));
3106 	} else {
3107 		rid = PCIR_BAR(2);
3108 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
3109 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
3110 		    0x177, 8, 0);
3111 		rid = PCIR_BAR(3);
3112 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
3113 		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
3114 		    0x376, 1, 0);
3115 	}
3116 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
3117 	    prefetchmask & (1 << 4));
3118 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
3119 	    prefetchmask & (1 << 5));
3120 }
3121 
3122 static void
3123 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
3124 {
3125 	struct pci_devinfo *dinfo = device_get_ivars(dev);
3126 	pcicfgregs *cfg = &dinfo->cfg;
3127 	char tunable_name[64];
3128 	int irq;
3129 
3130 	/* Has to have an intpin to have an interrupt. */
3131 	if (cfg->intpin == 0)
3132 		return;
3133 
3134 	/* Let the user override the IRQ with a tunable. */
3135 	irq = PCI_INVALID_IRQ;
3136 	snprintf(tunable_name, sizeof(tunable_name),
3137 	    "hw.pci%d.%d.%d.INT%c.irq",
3138 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
3139 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
3140 		irq = PCI_INVALID_IRQ;
3141 
3142 	/*
3143 	 * If we didn't get an IRQ via the tunable, then we either use the
3144 	 * IRQ value in the intline register or we ask the bus to route an
3145 	 * interrupt for us.  If force_route is true, then we only use the
3146 	 * value in the intline register if the bus was unable to assign an
3147 	 * IRQ.
3148 	 */
3149 	if (!PCI_INTERRUPT_VALID(irq)) {
3150 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
3151 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
3152 		if (!PCI_INTERRUPT_VALID(irq))
3153 			irq = cfg->intline;
3154 	}
3155 
3156 	/* If after all that we don't have an IRQ, just bail. */
3157 	if (!PCI_INTERRUPT_VALID(irq))
3158 		return;
3159 
3160 	/* Update the config register if it changed. */
3161 	if (irq != cfg->intline) {
3162 		cfg->intline = irq;
3163 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
3164 	}
3165 
3166 	/* Add this IRQ as rid 0 interrupt resource. */
3167 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3168 }
3169 
3170 /* Perform early OHCI takeover from SMM. */
3171 static void
3172 ohci_early_takeover(device_t self)
3173 {
3174 	struct resource *res;
3175 	uint32_t ctl;
3176 	int rid;
3177 	int i;
3178 
3179 	rid = PCIR_BAR(0);
3180 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3181 	if (res == NULL)
3182 		return;
3183 
3184 	ctl = bus_read_4(res, OHCI_CONTROL);
3185 	if (ctl & OHCI_IR) {
3186 		if (bootverbose)
3187 			printf("ohci early: "
3188 			    "SMM active, request owner change\n");
3189 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3190 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3191 			DELAY(1000);
3192 			ctl = bus_read_4(res, OHCI_CONTROL);
3193 		}
3194 		if (ctl & OHCI_IR) {
3195 			if (bootverbose)
3196 				printf("ohci early: "
3197 				    "SMM does not respond, resetting\n");
3198 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3199 		}
3200 		/* Disable interrupts */
3201 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3202 	}
3203 
3204 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3205 }
3206 
3207 /* Perform early UHCI takeover from SMM. */
3208 static void
3209 uhci_early_takeover(device_t self)
3210 {
3211 	struct resource *res;
3212 	int rid;
3213 
3214 	/*
3215 	 * Set the PIRQD enable bit and switch off all the others. We don't
3216 	 * want legacy support to interfere with us XXX Does this also mean
3217 	 * that the BIOS won't touch the keyboard anymore if it is connected
3218 	 * to the ports of the root hub?
3219 	 */
3220 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3221 
3222 	/* Disable interrupts */
3223 	rid = PCI_UHCI_BASE_REG;
3224 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3225 	if (res != NULL) {
3226 		bus_write_2(res, UHCI_INTR, 0);
3227 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3228 	}
3229 }
3230 
3231 /* Perform early EHCI takeover from SMM. */
3232 static void
3233 ehci_early_takeover(device_t self)
3234 {
3235 	struct resource *res;
3236 	uint32_t cparams;
3237 	uint32_t eec;
3238 	uint8_t eecp;
3239 	uint8_t bios_sem;
3240 	uint8_t offs;
3241 	int rid;
3242 	int i;
3243 
3244 	rid = PCIR_BAR(0);
3245 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3246 	if (res == NULL)
3247 		return;
3248 
3249 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3250 
3251 	/* Synchronise with the BIOS if it owns the controller. */
3252 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3253 	    eecp = EHCI_EECP_NEXT(eec)) {
3254 		eec = pci_read_config(self, eecp, 4);
3255 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3256 			continue;
3257 		}
3258 		bios_sem = pci_read_config(self, eecp +
3259 		    EHCI_LEGSUP_BIOS_SEM, 1);
3260 		if (bios_sem == 0) {
3261 			continue;
3262 		}
3263 		if (bootverbose)
3264 			printf("ehci early: "
3265 			    "SMM active, request owner change\n");
3266 
3267 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3268 
3269 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3270 			DELAY(1000);
3271 			bios_sem = pci_read_config(self, eecp +
3272 			    EHCI_LEGSUP_BIOS_SEM, 1);
3273 		}
3274 
3275 		if (bios_sem != 0) {
3276 			if (bootverbose)
3277 				printf("ehci early: "
3278 				    "SMM does not respond\n");
3279 		}
3280 		/* Disable interrupts */
3281 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3282 		bus_write_4(res, offs + EHCI_USBINTR, 0);
3283 	}
3284 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3285 }
3286 
3287 /* Perform early XHCI takeover from SMM. */
3288 static void
3289 xhci_early_takeover(device_t self)
3290 {
3291 	struct resource *res;
3292 	uint32_t cparams;
3293 	uint32_t eec;
3294 	uint8_t eecp;
3295 	uint8_t bios_sem;
3296 	uint8_t offs;
3297 	int rid;
3298 	int i;
3299 
3300 	rid = PCIR_BAR(0);
3301 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3302 	if (res == NULL)
3303 		return;
3304 
3305 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3306 
3307 	eec = -1;
3308 
3309 	/* Synchronise with the BIOS if it owns the controller. */
3310 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3311 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3312 		eec = bus_read_4(res, eecp);
3313 
3314 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3315 			continue;
3316 
3317 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3318 		if (bios_sem == 0)
3319 			continue;
3320 
3321 		if (bootverbose)
3322 			printf("xhci early: "
3323 			    "SMM active, request owner change\n");
3324 
3325 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3326 
3327 		/* wait a maximum of 5 second */
3328 
3329 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3330 			DELAY(1000);
3331 			bios_sem = bus_read_1(res, eecp +
3332 			    XHCI_XECP_BIOS_SEM);
3333 		}
3334 
3335 		if (bios_sem != 0) {
3336 			if (bootverbose)
3337 				printf("xhci early: "
3338 				    "SMM does not respond\n");
3339 		}
3340 
3341 		/* Disable interrupts */
3342 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3343 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3344 		bus_read_4(res, offs + XHCI_USBSTS);
3345 	}
3346 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3347 }
3348 
3349 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3350 static void
3351 pci_reserve_secbus(device_t bus, device_t dev, pcicfgregs *cfg,
3352     struct resource_list *rl)
3353 {
3354 	struct resource *res;
3355 	char *cp;
3356 	u_long start, end, count;
3357 	int rid, sec_bus, sec_reg, sub_bus, sub_reg, sup_bus;
3358 
3359 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3360 	case PCIM_HDRTYPE_BRIDGE:
3361 		sec_reg = PCIR_SECBUS_1;
3362 		sub_reg = PCIR_SUBBUS_1;
3363 		break;
3364 	case PCIM_HDRTYPE_CARDBUS:
3365 		sec_reg = PCIR_SECBUS_2;
3366 		sub_reg = PCIR_SUBBUS_2;
3367 		break;
3368 	default:
3369 		return;
3370 	}
3371 
3372 	/*
3373 	 * If the existing bus range is valid, attempt to reserve it
3374 	 * from our parent.  If this fails for any reason, clear the
3375 	 * secbus and subbus registers.
3376 	 *
3377 	 * XXX: Should we reset sub_bus to sec_bus if it is < sec_bus?
3378 	 * This would at least preserve the existing sec_bus if it is
3379 	 * valid.
3380 	 */
3381 	sec_bus = PCI_READ_CONFIG(bus, dev, sec_reg, 1);
3382 	sub_bus = PCI_READ_CONFIG(bus, dev, sub_reg, 1);
3383 
3384 	/* Quirk handling. */
3385 	switch (pci_get_devid(dev)) {
3386 	case 0x12258086:		/* Intel 82454KX/GX (Orion) */
3387 		sup_bus = pci_read_config(dev, 0x41, 1);
3388 		if (sup_bus != 0xff) {
3389 			sec_bus = sup_bus + 1;
3390 			sub_bus = sup_bus + 1;
3391 			PCI_WRITE_CONFIG(bus, dev, sec_reg, sec_bus, 1);
3392 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3393 		}
3394 		break;
3395 
3396 	case 0x00dd10de:
3397 		/* Compaq R3000 BIOS sets wrong subordinate bus number. */
3398 		if ((cp = kern_getenv("smbios.planar.maker")) == NULL)
3399 			break;
3400 		if (strncmp(cp, "Compal", 6) != 0) {
3401 			freeenv(cp);
3402 			break;
3403 		}
3404 		freeenv(cp);
3405 		if ((cp = kern_getenv("smbios.planar.product")) == NULL)
3406 			break;
3407 		if (strncmp(cp, "08A0", 4) != 0) {
3408 			freeenv(cp);
3409 			break;
3410 		}
3411 		freeenv(cp);
3412 		if (sub_bus < 0xa) {
3413 			sub_bus = 0xa;
3414 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3415 		}
3416 		break;
3417 	}
3418 
3419 	if (bootverbose)
3420 		printf("\tsecbus=%d, subbus=%d\n", sec_bus, sub_bus);
3421 	if (sec_bus > 0 && sub_bus >= sec_bus) {
3422 		start = sec_bus;
3423 		end = sub_bus;
3424 		count = end - start + 1;
3425 
3426 		resource_list_add(rl, PCI_RES_BUS, 0, 0ul, ~0ul, count);
3427 
3428 		/*
3429 		 * If requested, clear secondary bus registers in
3430 		 * bridge devices to force a complete renumbering
3431 		 * rather than reserving the existing range.  However,
3432 		 * preserve the existing size.
3433 		 */
3434 		if (pci_clear_buses)
3435 			goto clear;
3436 
3437 		rid = 0;
3438 		res = resource_list_reserve(rl, bus, dev, PCI_RES_BUS, &rid,
3439 		    start, end, count, 0);
3440 		if (res != NULL)
3441 			return;
3442 
3443 		if (bootverbose)
3444 			device_printf(bus,
3445 			    "pci%d:%d:%d:%d secbus failed to allocate\n",
3446 			    pci_get_domain(dev), pci_get_bus(dev),
3447 			    pci_get_slot(dev), pci_get_function(dev));
3448 	}
3449 
3450 clear:
3451 	PCI_WRITE_CONFIG(bus, dev, sec_reg, 0, 1);
3452 	PCI_WRITE_CONFIG(bus, dev, sub_reg, 0, 1);
3453 }
3454 
3455 static struct resource *
3456 pci_alloc_secbus(device_t dev, device_t child, int *rid, u_long start,
3457     u_long end, u_long count, u_int flags)
3458 {
3459 	struct pci_devinfo *dinfo;
3460 	pcicfgregs *cfg;
3461 	struct resource_list *rl;
3462 	struct resource *res;
3463 	int sec_reg, sub_reg;
3464 
3465 	dinfo = device_get_ivars(child);
3466 	cfg = &dinfo->cfg;
3467 	rl = &dinfo->resources;
3468 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3469 	case PCIM_HDRTYPE_BRIDGE:
3470 		sec_reg = PCIR_SECBUS_1;
3471 		sub_reg = PCIR_SUBBUS_1;
3472 		break;
3473 	case PCIM_HDRTYPE_CARDBUS:
3474 		sec_reg = PCIR_SECBUS_2;
3475 		sub_reg = PCIR_SUBBUS_2;
3476 		break;
3477 	default:
3478 		return (NULL);
3479 	}
3480 
3481 	if (*rid != 0)
3482 		return (NULL);
3483 
3484 	if (resource_list_find(rl, PCI_RES_BUS, *rid) == NULL)
3485 		resource_list_add(rl, PCI_RES_BUS, *rid, start, end, count);
3486 	if (!resource_list_reserved(rl, PCI_RES_BUS, *rid)) {
3487 		res = resource_list_reserve(rl, dev, child, PCI_RES_BUS, rid,
3488 		    start, end, count, flags & ~RF_ACTIVE);
3489 		if (res == NULL) {
3490 			resource_list_delete(rl, PCI_RES_BUS, *rid);
3491 			device_printf(child, "allocating %lu bus%s failed\n",
3492 			    count, count == 1 ? "" : "es");
3493 			return (NULL);
3494 		}
3495 		if (bootverbose)
3496 			device_printf(child,
3497 			    "Lazy allocation of %lu bus%s at %lu\n", count,
3498 			    count == 1 ? "" : "es", rman_get_start(res));
3499 		PCI_WRITE_CONFIG(dev, child, sec_reg, rman_get_start(res), 1);
3500 		PCI_WRITE_CONFIG(dev, child, sub_reg, rman_get_end(res), 1);
3501 	}
3502 	return (resource_list_alloc(rl, dev, child, PCI_RES_BUS, rid, start,
3503 	    end, count, flags));
3504 }
3505 #endif
3506 
3507 void
3508 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3509 {
3510 	struct pci_devinfo *dinfo;
3511 	pcicfgregs *cfg;
3512 	struct resource_list *rl;
3513 	const struct pci_quirk *q;
3514 	uint32_t devid;
3515 	int i;
3516 
3517 	dinfo = device_get_ivars(dev);
3518 	cfg = &dinfo->cfg;
3519 	rl = &dinfo->resources;
3520 	devid = (cfg->device << 16) | cfg->vendor;
3521 
3522 	/* ATA devices needs special map treatment */
3523 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3524 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3525 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3526 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3527 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3528 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3529 	else
3530 		for (i = 0; i < cfg->nummaps;) {
3531 			/*
3532 			 * Skip quirked resources.
3533 			 */
3534 			for (q = &pci_quirks[0]; q->devid != 0; q++)
3535 				if (q->devid == devid &&
3536 				    q->type == PCI_QUIRK_UNMAP_REG &&
3537 				    q->arg1 == PCIR_BAR(i))
3538 					break;
3539 			if (q->devid != 0) {
3540 				i++;
3541 				continue;
3542 			}
3543 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3544 			    prefetchmask & (1 << i));
3545 		}
3546 
3547 	/*
3548 	 * Add additional, quirked resources.
3549 	 */
3550 	for (q = &pci_quirks[0]; q->devid != 0; q++)
3551 		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3552 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3553 
3554 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3555 #ifdef __PCI_REROUTE_INTERRUPT
3556 		/*
3557 		 * Try to re-route interrupts. Sometimes the BIOS or
3558 		 * firmware may leave bogus values in these registers.
3559 		 * If the re-route fails, then just stick with what we
3560 		 * have.
3561 		 */
3562 		pci_assign_interrupt(bus, dev, 1);
3563 #else
3564 		pci_assign_interrupt(bus, dev, 0);
3565 #endif
3566 	}
3567 
3568 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3569 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3570 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3571 			xhci_early_takeover(dev);
3572 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3573 			ehci_early_takeover(dev);
3574 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3575 			ohci_early_takeover(dev);
3576 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3577 			uhci_early_takeover(dev);
3578 	}
3579 
3580 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3581 	/*
3582 	 * Reserve resources for secondary bus ranges behind bridge
3583 	 * devices.
3584 	 */
3585 	pci_reserve_secbus(bus, dev, cfg, rl);
3586 #endif
3587 }
3588 
3589 static struct pci_devinfo *
3590 pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
3591     int slot, int func, size_t dinfo_size)
3592 {
3593 	struct pci_devinfo *dinfo;
3594 
3595 	dinfo = pci_read_device(pcib, domain, busno, slot, func, dinfo_size);
3596 	if (dinfo != NULL)
3597 		pci_add_child(dev, dinfo);
3598 
3599 	return (dinfo);
3600 }
3601 
3602 void
3603 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3604 {
3605 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3606 	device_t pcib = device_get_parent(dev);
3607 	struct pci_devinfo *dinfo;
3608 	int maxslots;
3609 	int s, f, pcifunchigh;
3610 	uint8_t hdrtype;
3611 	int first_func;
3612 
3613 	/*
3614 	 * Try to detect a device at slot 0, function 0.  If it exists, try to
3615 	 * enable ARI.  We must enable ARI before detecting the rest of the
3616 	 * functions on this bus as ARI changes the set of slots and functions
3617 	 * that are legal on this bus.
3618 	 */
3619 	dinfo = pci_identify_function(pcib, dev, domain, busno, 0, 0,
3620 	    dinfo_size);
3621 	if (dinfo != NULL && pci_enable_ari)
3622 		PCIB_TRY_ENABLE_ARI(pcib, dinfo->cfg.dev);
3623 
3624 	/*
3625 	 * Start looking for new devices on slot 0 at function 1 because we
3626 	 * just identified the device at slot 0, function 0.
3627 	 */
3628 	first_func = 1;
3629 
3630 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3631 	    ("dinfo_size too small"));
3632 	maxslots = PCIB_MAXSLOTS(pcib);
3633 	for (s = 0; s <= maxslots; s++, first_func = 0) {
3634 		pcifunchigh = 0;
3635 		f = 0;
3636 		DELAY(1);
3637 		hdrtype = REG(PCIR_HDRTYPE, 1);
3638 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3639 			continue;
3640 		if (hdrtype & PCIM_MFDEV)
3641 			pcifunchigh = PCIB_MAXFUNCS(pcib);
3642 		for (f = first_func; f <= pcifunchigh; f++)
3643 			pci_identify_function(pcib, dev, domain, busno, s, f,
3644 			    dinfo_size);
3645 	}
3646 #undef REG
3647 }
3648 
3649 #ifdef PCI_IOV
3650 device_t
3651 pci_add_iov_child(device_t bus, device_t pf, size_t size, uint16_t rid,
3652     uint16_t vid, uint16_t did)
3653 {
3654 	struct pci_devinfo *pf_dinfo, *vf_dinfo;
3655 	device_t pcib;
3656 	int busno, slot, func;
3657 
3658 	pf_dinfo = device_get_ivars(pf);
3659 
3660 	/*
3661 	 * Do a sanity check that we have been passed the correct size.  If this
3662 	 * test fails then likely the pci subclass hasn't implemented the
3663 	 * pci_create_iov_child method like it's supposed it.
3664 	 */
3665 	if (size != pf_dinfo->cfg.devinfo_size) {
3666 		device_printf(pf,
3667 		    "PCI subclass does not properly implement PCI_IOV\n");
3668 		return (NULL);
3669 	}
3670 
3671 	pcib = device_get_parent(bus);
3672 
3673 	PCIB_DECODE_RID(pcib, rid, &busno, &slot, &func);
3674 
3675 	vf_dinfo = pci_fill_devinfo(pcib, pci_get_domain(pcib), busno, slot, func,
3676 	    vid, did, size);
3677 
3678 	vf_dinfo->cfg.flags |= PCICFG_VF;
3679 	pci_add_child(bus, vf_dinfo);
3680 
3681 	return (vf_dinfo->cfg.dev);
3682 }
3683 
3684 device_t
3685 pci_create_iov_child_method(device_t bus, device_t pf, uint16_t rid,
3686     uint16_t vid, uint16_t did)
3687 {
3688 
3689 	return (pci_add_iov_child(bus, pf, sizeof(struct pci_devinfo), rid, vid,
3690 	    did));
3691 }
3692 #endif
3693 
3694 void
3695 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3696 {
3697 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3698 	device_set_ivars(dinfo->cfg.dev, dinfo);
3699 	resource_list_init(&dinfo->resources);
3700 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3701 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3702 	pci_print_verbose(dinfo);
3703 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3704 	pci_child_added(dinfo->cfg.dev);
3705 }
3706 
3707 void
3708 pci_child_added_method(device_t dev, device_t child)
3709 {
3710 
3711 }
3712 
3713 static int
3714 pci_probe(device_t dev)
3715 {
3716 
3717 	device_set_desc(dev, "PCI bus");
3718 
3719 	/* Allow other subclasses to override this driver. */
3720 	return (BUS_PROBE_GENERIC);
3721 }
3722 
3723 int
3724 pci_attach_common(device_t dev)
3725 {
3726 	struct pci_softc *sc;
3727 	int busno, domain;
3728 #ifdef PCI_DMA_BOUNDARY
3729 	int error, tag_valid;
3730 #endif
3731 #ifdef PCI_RES_BUS
3732 	int rid;
3733 #endif
3734 
3735 	sc = device_get_softc(dev);
3736 	domain = pcib_get_domain(dev);
3737 	busno = pcib_get_bus(dev);
3738 #ifdef PCI_RES_BUS
3739 	rid = 0;
3740 	sc->sc_bus = bus_alloc_resource(dev, PCI_RES_BUS, &rid, busno, busno,
3741 	    1, 0);
3742 	if (sc->sc_bus == NULL) {
3743 		device_printf(dev, "failed to allocate bus number\n");
3744 		return (ENXIO);
3745 	}
3746 #endif
3747 	if (bootverbose)
3748 		device_printf(dev, "domain=%d, physical bus=%d\n",
3749 		    domain, busno);
3750 #ifdef PCI_DMA_BOUNDARY
3751 	tag_valid = 0;
3752 	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
3753 	    devclass_find("pci")) {
3754 		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
3755 		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3756 		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
3757 		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
3758 		if (error)
3759 			device_printf(dev, "Failed to create DMA tag: %d\n",
3760 			    error);
3761 		else
3762 			tag_valid = 1;
3763 	}
3764 	if (!tag_valid)
3765 #endif
3766 		sc->sc_dma_tag = bus_get_dma_tag(dev);
3767 	return (0);
3768 }
3769 
3770 static int
3771 pci_attach(device_t dev)
3772 {
3773 	int busno, domain, error;
3774 
3775 	error = pci_attach_common(dev);
3776 	if (error)
3777 		return (error);
3778 
3779 	/*
3780 	 * Since there can be multiple independantly numbered PCI
3781 	 * busses on systems with multiple PCI domains, we can't use
3782 	 * the unit number to decide which bus we are probing. We ask
3783 	 * the parent pcib what our domain and bus numbers are.
3784 	 */
3785 	domain = pcib_get_domain(dev);
3786 	busno = pcib_get_bus(dev);
3787 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3788 	return (bus_generic_attach(dev));
3789 }
3790 
3791 #ifdef PCI_RES_BUS
3792 static int
3793 pci_detach(device_t dev)
3794 {
3795 	struct pci_softc *sc;
3796 	int error;
3797 
3798 	error = bus_generic_detach(dev);
3799 	if (error)
3800 		return (error);
3801 	sc = device_get_softc(dev);
3802 	return (bus_release_resource(dev, PCI_RES_BUS, 0, sc->sc_bus));
3803 }
3804 #endif
3805 
3806 static void
3807 pci_set_power_child(device_t dev, device_t child, int state)
3808 {
3809 	device_t pcib;
3810 	int dstate;
3811 
3812 	/*
3813 	 * Set the device to the given state.  If the firmware suggests
3814 	 * a different power state, use it instead.  If power management
3815 	 * is not present, the firmware is responsible for managing
3816 	 * device power.  Skip children who aren't attached since they
3817 	 * are handled separately.
3818 	 */
3819 	pcib = device_get_parent(dev);
3820 	dstate = state;
3821 	if (device_is_attached(child) &&
3822 	    PCIB_POWER_FOR_SLEEP(pcib, child, &dstate) == 0)
3823 		pci_set_powerstate(child, dstate);
3824 }
3825 
3826 int
3827 pci_suspend_child(device_t dev, device_t child)
3828 {
3829 	struct pci_devinfo *dinfo;
3830 	int error;
3831 
3832 	dinfo = device_get_ivars(child);
3833 
3834 	/*
3835 	 * Save the PCI configuration space for the child and set the
3836 	 * device in the appropriate power state for this sleep state.
3837 	 */
3838 	pci_cfg_save(child, dinfo, 0);
3839 
3840 	/* Suspend devices before potentially powering them down. */
3841 	error = bus_generic_suspend_child(dev, child);
3842 
3843 	if (error)
3844 		return (error);
3845 
3846 	if (pci_do_power_suspend)
3847 		pci_set_power_child(dev, child, PCI_POWERSTATE_D3);
3848 
3849 	return (0);
3850 }
3851 
3852 int
3853 pci_resume_child(device_t dev, device_t child)
3854 {
3855 	struct pci_devinfo *dinfo;
3856 
3857 	if (pci_do_power_resume)
3858 		pci_set_power_child(dev, child, PCI_POWERSTATE_D0);
3859 
3860 	dinfo = device_get_ivars(child);
3861 	pci_cfg_restore(child, dinfo);
3862 	if (!device_is_attached(child))
3863 		pci_cfg_save(child, dinfo, 1);
3864 
3865 	bus_generic_resume_child(dev, child);
3866 
3867 	return (0);
3868 }
3869 
3870 int
3871 pci_resume(device_t dev)
3872 {
3873 	device_t child, *devlist;
3874 	int error, i, numdevs;
3875 
3876 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3877 		return (error);
3878 
3879 	/*
3880 	 * Resume critical devices first, then everything else later.
3881 	 */
3882 	for (i = 0; i < numdevs; i++) {
3883 		child = devlist[i];
3884 		switch (pci_get_class(child)) {
3885 		case PCIC_DISPLAY:
3886 		case PCIC_MEMORY:
3887 		case PCIC_BRIDGE:
3888 		case PCIC_BASEPERIPH:
3889 			BUS_RESUME_CHILD(dev, child);
3890 			break;
3891 		}
3892 	}
3893 	for (i = 0; i < numdevs; i++) {
3894 		child = devlist[i];
3895 		switch (pci_get_class(child)) {
3896 		case PCIC_DISPLAY:
3897 		case PCIC_MEMORY:
3898 		case PCIC_BRIDGE:
3899 		case PCIC_BASEPERIPH:
3900 			break;
3901 		default:
3902 			BUS_RESUME_CHILD(dev, child);
3903 		}
3904 	}
3905 	free(devlist, M_TEMP);
3906 	return (0);
3907 }
3908 
3909 static void
3910 pci_load_vendor_data(void)
3911 {
3912 	caddr_t data;
3913 	void *ptr;
3914 	size_t sz;
3915 
3916 	data = preload_search_by_type("pci_vendor_data");
3917 	if (data != NULL) {
3918 		ptr = preload_fetch_addr(data);
3919 		sz = preload_fetch_size(data);
3920 		if (ptr != NULL && sz != 0) {
3921 			pci_vendordata = ptr;
3922 			pci_vendordata_size = sz;
3923 			/* terminate the database */
3924 			pci_vendordata[pci_vendordata_size] = '\n';
3925 		}
3926 	}
3927 }
3928 
3929 void
3930 pci_driver_added(device_t dev, driver_t *driver)
3931 {
3932 	int numdevs;
3933 	device_t *devlist;
3934 	device_t child;
3935 	struct pci_devinfo *dinfo;
3936 	int i;
3937 
3938 	if (bootverbose)
3939 		device_printf(dev, "driver added\n");
3940 	DEVICE_IDENTIFY(driver, dev);
3941 	if (device_get_children(dev, &devlist, &numdevs) != 0)
3942 		return;
3943 	for (i = 0; i < numdevs; i++) {
3944 		child = devlist[i];
3945 		if (device_get_state(child) != DS_NOTPRESENT)
3946 			continue;
3947 		dinfo = device_get_ivars(child);
3948 		pci_print_verbose(dinfo);
3949 		if (bootverbose)
3950 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3951 		pci_cfg_restore(child, dinfo);
3952 		if (device_probe_and_attach(child) != 0)
3953 			pci_child_detached(dev, child);
3954 	}
3955 	free(devlist, M_TEMP);
3956 }
3957 
3958 int
3959 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3960     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3961 {
3962 	struct pci_devinfo *dinfo;
3963 	struct msix_table_entry *mte;
3964 	struct msix_vector *mv;
3965 	uint64_t addr;
3966 	uint32_t data;
3967 	void *cookie;
3968 	int error, rid;
3969 
3970 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3971 	    arg, &cookie);
3972 	if (error)
3973 		return (error);
3974 
3975 	/* If this is not a direct child, just bail out. */
3976 	if (device_get_parent(child) != dev) {
3977 		*cookiep = cookie;
3978 		return(0);
3979 	}
3980 
3981 	rid = rman_get_rid(irq);
3982 	if (rid == 0) {
3983 		/* Make sure that INTx is enabled */
3984 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3985 	} else {
3986 		/*
3987 		 * Check to see if the interrupt is MSI or MSI-X.
3988 		 * Ask our parent to map the MSI and give
3989 		 * us the address and data register values.
3990 		 * If we fail for some reason, teardown the
3991 		 * interrupt handler.
3992 		 */
3993 		dinfo = device_get_ivars(child);
3994 		if (dinfo->cfg.msi.msi_alloc > 0) {
3995 			if (dinfo->cfg.msi.msi_addr == 0) {
3996 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3997 			    ("MSI has handlers, but vectors not mapped"));
3998 				error = PCIB_MAP_MSI(device_get_parent(dev),
3999 				    child, rman_get_start(irq), &addr, &data);
4000 				if (error)
4001 					goto bad;
4002 				dinfo->cfg.msi.msi_addr = addr;
4003 				dinfo->cfg.msi.msi_data = data;
4004 			}
4005 			if (dinfo->cfg.msi.msi_handlers == 0)
4006 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
4007 				    dinfo->cfg.msi.msi_data);
4008 			dinfo->cfg.msi.msi_handlers++;
4009 		} else {
4010 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4011 			    ("No MSI or MSI-X interrupts allocated"));
4012 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4013 			    ("MSI-X index too high"));
4014 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4015 			KASSERT(mte->mte_vector != 0, ("no message vector"));
4016 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
4017 			KASSERT(mv->mv_irq == rman_get_start(irq),
4018 			    ("IRQ mismatch"));
4019 			if (mv->mv_address == 0) {
4020 				KASSERT(mte->mte_handlers == 0,
4021 		    ("MSI-X table entry has handlers, but vector not mapped"));
4022 				error = PCIB_MAP_MSI(device_get_parent(dev),
4023 				    child, rman_get_start(irq), &addr, &data);
4024 				if (error)
4025 					goto bad;
4026 				mv->mv_address = addr;
4027 				mv->mv_data = data;
4028 			}
4029 			if (mte->mte_handlers == 0) {
4030 				pci_enable_msix(child, rid - 1, mv->mv_address,
4031 				    mv->mv_data);
4032 				pci_unmask_msix(child, rid - 1);
4033 			}
4034 			mte->mte_handlers++;
4035 		}
4036 
4037 		/*
4038 		 * Make sure that INTx is disabled if we are using MSI/MSI-X,
4039 		 * unless the device is affected by PCI_QUIRK_MSI_INTX_BUG,
4040 		 * in which case we "enable" INTx so MSI/MSI-X actually works.
4041 		 */
4042 		if (!pci_has_quirk(pci_get_devid(child),
4043 		    PCI_QUIRK_MSI_INTX_BUG))
4044 			pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4045 		else
4046 			pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
4047 	bad:
4048 		if (error) {
4049 			(void)bus_generic_teardown_intr(dev, child, irq,
4050 			    cookie);
4051 			return (error);
4052 		}
4053 	}
4054 	*cookiep = cookie;
4055 	return (0);
4056 }
4057 
4058 int
4059 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
4060     void *cookie)
4061 {
4062 	struct msix_table_entry *mte;
4063 	struct resource_list_entry *rle;
4064 	struct pci_devinfo *dinfo;
4065 	int error, rid;
4066 
4067 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
4068 		return (EINVAL);
4069 
4070 	/* If this isn't a direct child, just bail out */
4071 	if (device_get_parent(child) != dev)
4072 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
4073 
4074 	rid = rman_get_rid(irq);
4075 	if (rid == 0) {
4076 		/* Mask INTx */
4077 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4078 	} else {
4079 		/*
4080 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
4081 		 * decrement the appropriate handlers count and mask the
4082 		 * MSI-X message, or disable MSI messages if the count
4083 		 * drops to 0.
4084 		 */
4085 		dinfo = device_get_ivars(child);
4086 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
4087 		if (rle->res != irq)
4088 			return (EINVAL);
4089 		if (dinfo->cfg.msi.msi_alloc > 0) {
4090 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
4091 			    ("MSI-X index too high"));
4092 			if (dinfo->cfg.msi.msi_handlers == 0)
4093 				return (EINVAL);
4094 			dinfo->cfg.msi.msi_handlers--;
4095 			if (dinfo->cfg.msi.msi_handlers == 0)
4096 				pci_disable_msi(child);
4097 		} else {
4098 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4099 			    ("No MSI or MSI-X interrupts allocated"));
4100 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4101 			    ("MSI-X index too high"));
4102 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4103 			if (mte->mte_handlers == 0)
4104 				return (EINVAL);
4105 			mte->mte_handlers--;
4106 			if (mte->mte_handlers == 0)
4107 				pci_mask_msix(child, rid - 1);
4108 		}
4109 	}
4110 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
4111 	if (rid > 0)
4112 		KASSERT(error == 0,
4113 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
4114 	return (error);
4115 }
4116 
4117 int
4118 pci_print_child(device_t dev, device_t child)
4119 {
4120 	struct pci_devinfo *dinfo;
4121 	struct resource_list *rl;
4122 	int retval = 0;
4123 
4124 	dinfo = device_get_ivars(child);
4125 	rl = &dinfo->resources;
4126 
4127 	retval += bus_print_child_header(dev, child);
4128 
4129 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
4130 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
4131 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
4132 	if (device_get_flags(dev))
4133 		retval += printf(" flags %#x", device_get_flags(dev));
4134 
4135 	retval += printf(" at device %d.%d", pci_get_slot(child),
4136 	    pci_get_function(child));
4137 
4138 	retval += bus_print_child_domain(dev, child);
4139 	retval += bus_print_child_footer(dev, child);
4140 
4141 	return (retval);
4142 }
4143 
4144 static const struct
4145 {
4146 	int		class;
4147 	int		subclass;
4148 	int		report; /* 0 = bootverbose, 1 = always */
4149 	const char	*desc;
4150 } pci_nomatch_tab[] = {
4151 	{PCIC_OLD,		-1,			1, "old"},
4152 	{PCIC_OLD,		PCIS_OLD_NONVGA,	1, "non-VGA display device"},
4153 	{PCIC_OLD,		PCIS_OLD_VGA,		1, "VGA-compatible display device"},
4154 	{PCIC_STORAGE,		-1,			1, "mass storage"},
4155 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	1, "SCSI"},
4156 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	1, "ATA"},
4157 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	1, "floppy disk"},
4158 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	1, "IPI"},
4159 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	1, "RAID"},
4160 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	1, "ATA (ADMA)"},
4161 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	1, "SATA"},
4162 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	1, "SAS"},
4163 	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	1, "NVM"},
4164 	{PCIC_NETWORK,		-1,			1, "network"},
4165 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	1, "ethernet"},
4166 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	1, "token ring"},
4167 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	1, "fddi"},
4168 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	1, "ATM"},
4169 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	1, "ISDN"},
4170 	{PCIC_DISPLAY,		-1,			1, "display"},
4171 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	1, "VGA"},
4172 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	1, "XGA"},
4173 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	1, "3D"},
4174 	{PCIC_MULTIMEDIA,	-1,			1, "multimedia"},
4175 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	1, "video"},
4176 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	1, "audio"},
4177 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	1, "telephony"},
4178 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	1, "HDA"},
4179 	{PCIC_MEMORY,		-1,			1, "memory"},
4180 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	1, "RAM"},
4181 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	1, "flash"},
4182 	{PCIC_BRIDGE,		-1,			1, "bridge"},
4183 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	1, "HOST-PCI"},
4184 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	1, "PCI-ISA"},
4185 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	1, "PCI-EISA"},
4186 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	1, "PCI-MCA"},
4187 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	1, "PCI-PCI"},
4188 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	1, "PCI-PCMCIA"},
4189 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	1, "PCI-NuBus"},
4190 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	1, "PCI-CardBus"},
4191 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	1, "PCI-RACEway"},
4192 	{PCIC_SIMPLECOMM,	-1,			1, "simple comms"},
4193 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	1, "UART"},	/* could detect 16550 */
4194 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	1, "parallel port"},
4195 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	1, "multiport serial"},
4196 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	1, "generic modem"},
4197 	{PCIC_BASEPERIPH,	-1,			0, "base peripheral"},
4198 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	1, "interrupt controller"},
4199 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	1, "DMA controller"},
4200 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	1, "timer"},
4201 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	1, "realtime clock"},
4202 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	1, "PCI hot-plug controller"},
4203 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	1, "SD host controller"},
4204 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_IOMMU,	1, "IOMMU"},
4205 	{PCIC_INPUTDEV,		-1,			1, "input device"},
4206 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	1, "keyboard"},
4207 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,1, "digitizer"},
4208 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	1, "mouse"},
4209 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	1, "scanner"},
4210 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	1, "gameport"},
4211 	{PCIC_DOCKING,		-1,			1, "docking station"},
4212 	{PCIC_PROCESSOR,	-1,			1, "processor"},
4213 	{PCIC_SERIALBUS,	-1,			1, "serial bus"},
4214 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	1, "FireWire"},
4215 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	1, "AccessBus"},
4216 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	1, "SSA"},
4217 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	1, "USB"},
4218 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	1, "Fibre Channel"},
4219 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	0, "SMBus"},
4220 	{PCIC_WIRELESS,		-1,			1, "wireless controller"},
4221 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	1, "iRDA"},
4222 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	1, "IR"},
4223 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	1, "RF"},
4224 	{PCIC_INTELLIIO,	-1,			1, "intelligent I/O controller"},
4225 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	1, "I2O"},
4226 	{PCIC_SATCOM,		-1,			1, "satellite communication"},
4227 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		1, "sat TV"},
4228 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	1, "sat audio"},
4229 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	1, "sat voice"},
4230 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	1, "sat data"},
4231 	{PCIC_CRYPTO,		-1,			1, "encrypt/decrypt"},
4232 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	1, "network/computer crypto"},
4233 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	1, "entertainment crypto"},
4234 	{PCIC_DASP,		-1,			0, "dasp"},
4235 	{PCIC_DASP,		PCIS_DASP_DPIO,		1, "DPIO module"},
4236 	{0, 0, 0,		NULL}
4237 };
4238 
4239 void
4240 pci_probe_nomatch(device_t dev, device_t child)
4241 {
4242 	int i, report;
4243 	const char *cp, *scp;
4244 	char *device;
4245 
4246 	/*
4247 	 * Look for a listing for this device in a loaded device database.
4248 	 */
4249 	report = 1;
4250 	if ((device = pci_describe_device(child)) != NULL) {
4251 		device_printf(dev, "<%s>", device);
4252 		free(device, M_DEVBUF);
4253 	} else {
4254 		/*
4255 		 * Scan the class/subclass descriptions for a general
4256 		 * description.
4257 		 */
4258 		cp = "unknown";
4259 		scp = NULL;
4260 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
4261 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
4262 				if (pci_nomatch_tab[i].subclass == -1) {
4263 					cp = pci_nomatch_tab[i].desc;
4264 					report = pci_nomatch_tab[i].report;
4265 				} else if (pci_nomatch_tab[i].subclass ==
4266 				    pci_get_subclass(child)) {
4267 					scp = pci_nomatch_tab[i].desc;
4268 					report = pci_nomatch_tab[i].report;
4269 				}
4270 			}
4271 		}
4272 		if (report || bootverbose) {
4273 			device_printf(dev, "<%s%s%s>",
4274 			    cp ? cp : "",
4275 			    ((cp != NULL) && (scp != NULL)) ? ", " : "",
4276 			    scp ? scp : "");
4277 		}
4278 	}
4279 	if (report || bootverbose) {
4280 		printf(" at device %d.%d (no driver attached)\n",
4281 		    pci_get_slot(child), pci_get_function(child));
4282 	}
4283 	pci_cfg_save(child, device_get_ivars(child), 1);
4284 }
4285 
4286 void
4287 pci_child_detached(device_t dev, device_t child)
4288 {
4289 	struct pci_devinfo *dinfo;
4290 	struct resource_list *rl;
4291 
4292 	dinfo = device_get_ivars(child);
4293 	rl = &dinfo->resources;
4294 
4295 	/*
4296 	 * Have to deallocate IRQs before releasing any MSI messages and
4297 	 * have to release MSI messages before deallocating any memory
4298 	 * BARs.
4299 	 */
4300 	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
4301 		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
4302 	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
4303 		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
4304 		(void)pci_release_msi(child);
4305 	}
4306 	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
4307 		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
4308 	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
4309 		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
4310 #ifdef PCI_RES_BUS
4311 	if (resource_list_release_active(rl, dev, child, PCI_RES_BUS) != 0)
4312 		pci_printf(&dinfo->cfg, "Device leaked PCI bus numbers\n");
4313 #endif
4314 
4315 	pci_cfg_save(child, dinfo, 1);
4316 }
4317 
4318 /*
4319  * Parse the PCI device database, if loaded, and return a pointer to a
4320  * description of the device.
4321  *
4322  * The database is flat text formatted as follows:
4323  *
4324  * Any line not in a valid format is ignored.
4325  * Lines are terminated with newline '\n' characters.
4326  *
4327  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
4328  * the vendor name.
4329  *
4330  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
4331  * - devices cannot be listed without a corresponding VENDOR line.
4332  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
4333  * another TAB, then the device name.
4334  */
4335 
4336 /*
4337  * Assuming (ptr) points to the beginning of a line in the database,
4338  * return the vendor or device and description of the next entry.
4339  * The value of (vendor) or (device) inappropriate for the entry type
4340  * is set to -1.  Returns nonzero at the end of the database.
4341  *
4342  * Note that this is slightly unrobust in the face of corrupt data;
4343  * we attempt to safeguard against this by spamming the end of the
4344  * database with a newline when we initialise.
4345  */
4346 static int
4347 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
4348 {
4349 	char	*cp = *ptr;
4350 	int	left;
4351 
4352 	*device = -1;
4353 	*vendor = -1;
4354 	**desc = '\0';
4355 	for (;;) {
4356 		left = pci_vendordata_size - (cp - pci_vendordata);
4357 		if (left <= 0) {
4358 			*ptr = cp;
4359 			return(1);
4360 		}
4361 
4362 		/* vendor entry? */
4363 		if (*cp != '\t' &&
4364 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
4365 			break;
4366 		/* device entry? */
4367 		if (*cp == '\t' &&
4368 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
4369 			break;
4370 
4371 		/* skip to next line */
4372 		while (*cp != '\n' && left > 0) {
4373 			cp++;
4374 			left--;
4375 		}
4376 		if (*cp == '\n') {
4377 			cp++;
4378 			left--;
4379 		}
4380 	}
4381 	/* skip to next line */
4382 	while (*cp != '\n' && left > 0) {
4383 		cp++;
4384 		left--;
4385 	}
4386 	if (*cp == '\n' && left > 0)
4387 		cp++;
4388 	*ptr = cp;
4389 	return(0);
4390 }
4391 
4392 static char *
4393 pci_describe_device(device_t dev)
4394 {
4395 	int	vendor, device;
4396 	char	*desc, *vp, *dp, *line;
4397 
4398 	desc = vp = dp = NULL;
4399 
4400 	/*
4401 	 * If we have no vendor data, we can't do anything.
4402 	 */
4403 	if (pci_vendordata == NULL)
4404 		goto out;
4405 
4406 	/*
4407 	 * Scan the vendor data looking for this device
4408 	 */
4409 	line = pci_vendordata;
4410 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4411 		goto out;
4412 	for (;;) {
4413 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
4414 			goto out;
4415 		if (vendor == pci_get_vendor(dev))
4416 			break;
4417 	}
4418 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4419 		goto out;
4420 	for (;;) {
4421 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
4422 			*dp = 0;
4423 			break;
4424 		}
4425 		if (vendor != -1) {
4426 			*dp = 0;
4427 			break;
4428 		}
4429 		if (device == pci_get_device(dev))
4430 			break;
4431 	}
4432 	if (dp[0] == '\0')
4433 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
4434 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
4435 	    NULL)
4436 		sprintf(desc, "%s, %s", vp, dp);
4437 out:
4438 	if (vp != NULL)
4439 		free(vp, M_DEVBUF);
4440 	if (dp != NULL)
4441 		free(dp, M_DEVBUF);
4442 	return(desc);
4443 }
4444 
4445 int
4446 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
4447 {
4448 	struct pci_devinfo *dinfo;
4449 	pcicfgregs *cfg;
4450 
4451 	dinfo = device_get_ivars(child);
4452 	cfg = &dinfo->cfg;
4453 
4454 	switch (which) {
4455 	case PCI_IVAR_ETHADDR:
4456 		/*
4457 		 * The generic accessor doesn't deal with failure, so
4458 		 * we set the return value, then return an error.
4459 		 */
4460 		*((uint8_t **) result) = NULL;
4461 		return (EINVAL);
4462 	case PCI_IVAR_SUBVENDOR:
4463 		*result = cfg->subvendor;
4464 		break;
4465 	case PCI_IVAR_SUBDEVICE:
4466 		*result = cfg->subdevice;
4467 		break;
4468 	case PCI_IVAR_VENDOR:
4469 		*result = cfg->vendor;
4470 		break;
4471 	case PCI_IVAR_DEVICE:
4472 		*result = cfg->device;
4473 		break;
4474 	case PCI_IVAR_DEVID:
4475 		*result = (cfg->device << 16) | cfg->vendor;
4476 		break;
4477 	case PCI_IVAR_CLASS:
4478 		*result = cfg->baseclass;
4479 		break;
4480 	case PCI_IVAR_SUBCLASS:
4481 		*result = cfg->subclass;
4482 		break;
4483 	case PCI_IVAR_PROGIF:
4484 		*result = cfg->progif;
4485 		break;
4486 	case PCI_IVAR_REVID:
4487 		*result = cfg->revid;
4488 		break;
4489 	case PCI_IVAR_INTPIN:
4490 		*result = cfg->intpin;
4491 		break;
4492 	case PCI_IVAR_IRQ:
4493 		*result = cfg->intline;
4494 		break;
4495 	case PCI_IVAR_DOMAIN:
4496 		*result = cfg->domain;
4497 		break;
4498 	case PCI_IVAR_BUS:
4499 		*result = cfg->bus;
4500 		break;
4501 	case PCI_IVAR_SLOT:
4502 		*result = cfg->slot;
4503 		break;
4504 	case PCI_IVAR_FUNCTION:
4505 		*result = cfg->func;
4506 		break;
4507 	case PCI_IVAR_CMDREG:
4508 		*result = cfg->cmdreg;
4509 		break;
4510 	case PCI_IVAR_CACHELNSZ:
4511 		*result = cfg->cachelnsz;
4512 		break;
4513 	case PCI_IVAR_MINGNT:
4514 		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4515 			*result = -1;
4516 			return (EINVAL);
4517 		}
4518 		*result = cfg->mingnt;
4519 		break;
4520 	case PCI_IVAR_MAXLAT:
4521 		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4522 			*result = -1;
4523 			return (EINVAL);
4524 		}
4525 		*result = cfg->maxlat;
4526 		break;
4527 	case PCI_IVAR_LATTIMER:
4528 		*result = cfg->lattimer;
4529 		break;
4530 	default:
4531 		return (ENOENT);
4532 	}
4533 	return (0);
4534 }
4535 
4536 int
4537 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4538 {
4539 	struct pci_devinfo *dinfo;
4540 
4541 	dinfo = device_get_ivars(child);
4542 
4543 	switch (which) {
4544 	case PCI_IVAR_INTPIN:
4545 		dinfo->cfg.intpin = value;
4546 		return (0);
4547 	case PCI_IVAR_ETHADDR:
4548 	case PCI_IVAR_SUBVENDOR:
4549 	case PCI_IVAR_SUBDEVICE:
4550 	case PCI_IVAR_VENDOR:
4551 	case PCI_IVAR_DEVICE:
4552 	case PCI_IVAR_DEVID:
4553 	case PCI_IVAR_CLASS:
4554 	case PCI_IVAR_SUBCLASS:
4555 	case PCI_IVAR_PROGIF:
4556 	case PCI_IVAR_REVID:
4557 	case PCI_IVAR_IRQ:
4558 	case PCI_IVAR_DOMAIN:
4559 	case PCI_IVAR_BUS:
4560 	case PCI_IVAR_SLOT:
4561 	case PCI_IVAR_FUNCTION:
4562 		return (EINVAL);	/* disallow for now */
4563 
4564 	default:
4565 		return (ENOENT);
4566 	}
4567 }
4568 
4569 #include "opt_ddb.h"
4570 #ifdef DDB
4571 #include <ddb/ddb.h>
4572 #include <sys/cons.h>
4573 
4574 /*
4575  * List resources based on pci map registers, used for within ddb
4576  */
4577 
4578 DB_SHOW_COMMAND(pciregs, db_pci_dump)
4579 {
4580 	struct pci_devinfo *dinfo;
4581 	struct devlist *devlist_head;
4582 	struct pci_conf *p;
4583 	const char *name;
4584 	int i, error, none_count;
4585 
4586 	none_count = 0;
4587 	/* get the head of the device queue */
4588 	devlist_head = &pci_devq;
4589 
4590 	/*
4591 	 * Go through the list of devices and print out devices
4592 	 */
4593 	for (error = 0, i = 0,
4594 	     dinfo = STAILQ_FIRST(devlist_head);
4595 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4596 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4597 
4598 		/* Populate pd_name and pd_unit */
4599 		name = NULL;
4600 		if (dinfo->cfg.dev)
4601 			name = device_get_name(dinfo->cfg.dev);
4602 
4603 		p = &dinfo->conf;
4604 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4605 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4606 			(name && *name) ? name : "none",
4607 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4608 			none_count++,
4609 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4610 			p->pc_sel.pc_func, (p->pc_class << 16) |
4611 			(p->pc_subclass << 8) | p->pc_progif,
4612 			(p->pc_subdevice << 16) | p->pc_subvendor,
4613 			(p->pc_device << 16) | p->pc_vendor,
4614 			p->pc_revid, p->pc_hdr);
4615 	}
4616 }
4617 #endif /* DDB */
4618 
4619 static struct resource *
4620 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4621     u_long start, u_long end, u_long count, u_int num, u_int flags)
4622 {
4623 	struct pci_devinfo *dinfo = device_get_ivars(child);
4624 	struct resource_list *rl = &dinfo->resources;
4625 	struct resource *res;
4626 	struct pci_map *pm;
4627 	pci_addr_t map, testval;
4628 	int mapsize;
4629 
4630 	res = NULL;
4631 	pm = pci_find_bar(child, *rid);
4632 	if (pm != NULL) {
4633 		/* This is a BAR that we failed to allocate earlier. */
4634 		mapsize = pm->pm_size;
4635 		map = pm->pm_value;
4636 	} else {
4637 		/*
4638 		 * Weed out the bogons, and figure out how large the
4639 		 * BAR/map is.  BARs that read back 0 here are bogus
4640 		 * and unimplemented.  Note: atapci in legacy mode are
4641 		 * special and handled elsewhere in the code.  If you
4642 		 * have a atapci device in legacy mode and it fails
4643 		 * here, that other code is broken.
4644 		 */
4645 		pci_read_bar(child, *rid, &map, &testval, NULL);
4646 
4647 		/*
4648 		 * Determine the size of the BAR and ignore BARs with a size
4649 		 * of 0.  Device ROM BARs use a different mask value.
4650 		 */
4651 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
4652 			mapsize = pci_romsize(testval);
4653 		else
4654 			mapsize = pci_mapsize(testval);
4655 		if (mapsize == 0)
4656 			goto out;
4657 		pm = pci_add_bar(child, *rid, map, mapsize);
4658 	}
4659 
4660 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
4661 		if (type != SYS_RES_MEMORY) {
4662 			if (bootverbose)
4663 				device_printf(dev,
4664 				    "child %s requested type %d for rid %#x,"
4665 				    " but the BAR says it is an memio\n",
4666 				    device_get_nameunit(child), type, *rid);
4667 			goto out;
4668 		}
4669 	} else {
4670 		if (type != SYS_RES_IOPORT) {
4671 			if (bootverbose)
4672 				device_printf(dev,
4673 				    "child %s requested type %d for rid %#x,"
4674 				    " but the BAR says it is an ioport\n",
4675 				    device_get_nameunit(child), type, *rid);
4676 			goto out;
4677 		}
4678 	}
4679 
4680 	/*
4681 	 * For real BARs, we need to override the size that
4682 	 * the driver requests, because that's what the BAR
4683 	 * actually uses and we would otherwise have a
4684 	 * situation where we might allocate the excess to
4685 	 * another driver, which won't work.
4686 	 */
4687 	count = ((pci_addr_t)1 << mapsize) * num;
4688 	if (RF_ALIGNMENT(flags) < mapsize)
4689 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
4690 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
4691 		flags |= RF_PREFETCHABLE;
4692 
4693 	/*
4694 	 * Allocate enough resource, and then write back the
4695 	 * appropriate BAR for that resource.
4696 	 */
4697 	resource_list_add(rl, type, *rid, start, end, count);
4698 	res = resource_list_reserve(rl, dev, child, type, rid, start, end,
4699 	    count, flags & ~RF_ACTIVE);
4700 	if (res == NULL) {
4701 		resource_list_delete(rl, type, *rid);
4702 		device_printf(child,
4703 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4704 		    count, *rid, type, start, end);
4705 		goto out;
4706 	}
4707 	if (bootverbose)
4708 		device_printf(child,
4709 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4710 		    count, *rid, type, rman_get_start(res));
4711 	map = rman_get_start(res);
4712 	pci_write_bar(child, pm, map);
4713 out:
4714 	return (res);
4715 }
4716 
4717 struct resource *
4718 pci_alloc_multi_resource(device_t dev, device_t child, int type, int *rid,
4719     u_long start, u_long end, u_long count, u_long num, u_int flags)
4720 {
4721 	struct pci_devinfo *dinfo;
4722 	struct resource_list *rl;
4723 	struct resource_list_entry *rle;
4724 	struct resource *res;
4725 	pcicfgregs *cfg;
4726 
4727 	/*
4728 	 * Perform lazy resource allocation
4729 	 */
4730 	dinfo = device_get_ivars(child);
4731 	rl = &dinfo->resources;
4732 	cfg = &dinfo->cfg;
4733 	switch (type) {
4734 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
4735 	case PCI_RES_BUS:
4736 		return (pci_alloc_secbus(dev, child, rid, start, end, count,
4737 		    flags));
4738 #endif
4739 	case SYS_RES_IRQ:
4740 		/*
4741 		 * Can't alloc legacy interrupt once MSI messages have
4742 		 * been allocated.
4743 		 */
4744 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4745 		    cfg->msix.msix_alloc > 0))
4746 			return (NULL);
4747 
4748 		/*
4749 		 * If the child device doesn't have an interrupt
4750 		 * routed and is deserving of an interrupt, try to
4751 		 * assign it one.
4752 		 */
4753 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4754 		    (cfg->intpin != 0))
4755 			pci_assign_interrupt(dev, child, 0);
4756 		break;
4757 	case SYS_RES_IOPORT:
4758 	case SYS_RES_MEMORY:
4759 #ifdef NEW_PCIB
4760 		/*
4761 		 * PCI-PCI bridge I/O window resources are not BARs.
4762 		 * For those allocations just pass the request up the
4763 		 * tree.
4764 		 */
4765 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4766 			switch (*rid) {
4767 			case PCIR_IOBASEL_1:
4768 			case PCIR_MEMBASE_1:
4769 			case PCIR_PMBASEL_1:
4770 				/*
4771 				 * XXX: Should we bother creating a resource
4772 				 * list entry?
4773 				 */
4774 				return (bus_generic_alloc_resource(dev, child,
4775 				    type, rid, start, end, count, flags));
4776 			}
4777 		}
4778 #endif
4779 		/* Reserve resources for this BAR if needed. */
4780 		rle = resource_list_find(rl, type, *rid);
4781 		if (rle == NULL) {
4782 			res = pci_reserve_map(dev, child, type, rid, start, end,
4783 			    count, num, flags);
4784 			if (res == NULL)
4785 				return (NULL);
4786 		}
4787 	}
4788 	return (resource_list_alloc(rl, dev, child, type, rid,
4789 	    start, end, count, flags));
4790 }
4791 
4792 struct resource *
4793 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4794     u_long start, u_long end, u_long count, u_int flags)
4795 {
4796 #ifdef PCI_IOV
4797 	struct pci_devinfo *dinfo;
4798 #endif
4799 
4800 	if (device_get_parent(child) != dev)
4801 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4802 		    type, rid, start, end, count, flags));
4803 
4804 #ifdef PCI_IOV
4805 	dinfo = device_get_ivars(child);
4806 	if (dinfo->cfg.flags & PCICFG_VF) {
4807 		switch (type) {
4808 		/* VFs can't have I/O BARs. */
4809 		case SYS_RES_IOPORT:
4810 			return (NULL);
4811 		case SYS_RES_MEMORY:
4812 			return (pci_vf_alloc_mem_resource(dev, child, rid,
4813 			    start, end, count, flags));
4814 		}
4815 
4816 		/* Fall through for other types of resource allocations. */
4817 	}
4818 #endif
4819 
4820 	return (pci_alloc_multi_resource(dev, child, type, rid, start, end,
4821 	    count, 1, flags));
4822 }
4823 
4824 int
4825 pci_release_resource(device_t dev, device_t child, int type, int rid,
4826     struct resource *r)
4827 {
4828 	struct pci_devinfo *dinfo;
4829 	struct resource_list *rl;
4830 	pcicfgregs *cfg;
4831 
4832 	if (device_get_parent(child) != dev)
4833 		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
4834 		    type, rid, r));
4835 
4836 	dinfo = device_get_ivars(child);
4837 	cfg = &dinfo->cfg;
4838 
4839 #ifdef PCI_IOV
4840 	if (dinfo->cfg.flags & PCICFG_VF) {
4841 		switch (type) {
4842 		/* VFs can't have I/O BARs. */
4843 		case SYS_RES_IOPORT:
4844 			return (EDOOFUS);
4845 		case SYS_RES_MEMORY:
4846 			return (pci_vf_release_mem_resource(dev, child, rid,
4847 			    r));
4848 		}
4849 
4850 		/* Fall through for other types of resource allocations. */
4851 	}
4852 #endif
4853 
4854 #ifdef NEW_PCIB
4855 	/*
4856 	 * PCI-PCI bridge I/O window resources are not BARs.  For
4857 	 * those allocations just pass the request up the tree.
4858 	 */
4859 	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
4860 	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
4861 		switch (rid) {
4862 		case PCIR_IOBASEL_1:
4863 		case PCIR_MEMBASE_1:
4864 		case PCIR_PMBASEL_1:
4865 			return (bus_generic_release_resource(dev, child, type,
4866 			    rid, r));
4867 		}
4868 	}
4869 #endif
4870 
4871 	rl = &dinfo->resources;
4872 	return (resource_list_release(rl, dev, child, type, rid, r));
4873 }
4874 
4875 int
4876 pci_activate_resource(device_t dev, device_t child, int type, int rid,
4877     struct resource *r)
4878 {
4879 	struct pci_devinfo *dinfo;
4880 	int error;
4881 
4882 	error = bus_generic_activate_resource(dev, child, type, rid, r);
4883 	if (error)
4884 		return (error);
4885 
4886 	/* Enable decoding in the command register when activating BARs. */
4887 	if (device_get_parent(child) == dev) {
4888 		/* Device ROMs need their decoding explicitly enabled. */
4889 		dinfo = device_get_ivars(child);
4890 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4891 			pci_write_bar(child, pci_find_bar(child, rid),
4892 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4893 		switch (type) {
4894 		case SYS_RES_IOPORT:
4895 		case SYS_RES_MEMORY:
4896 			error = PCI_ENABLE_IO(dev, child, type);
4897 			break;
4898 		}
4899 	}
4900 	return (error);
4901 }
4902 
4903 int
4904 pci_deactivate_resource(device_t dev, device_t child, int type,
4905     int rid, struct resource *r)
4906 {
4907 	struct pci_devinfo *dinfo;
4908 	int error;
4909 
4910 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4911 	if (error)
4912 		return (error);
4913 
4914 	/* Disable decoding for device ROMs. */
4915 	if (device_get_parent(child) == dev) {
4916 		dinfo = device_get_ivars(child);
4917 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4918 			pci_write_bar(child, pci_find_bar(child, rid),
4919 			    rman_get_start(r));
4920 	}
4921 	return (0);
4922 }
4923 
4924 void
4925 pci_delete_child(device_t dev, device_t child)
4926 {
4927 	struct resource_list_entry *rle;
4928 	struct resource_list *rl;
4929 	struct pci_devinfo *dinfo;
4930 
4931 	dinfo = device_get_ivars(child);
4932 	rl = &dinfo->resources;
4933 
4934 	if (device_is_attached(child))
4935 		device_detach(child);
4936 
4937 	/* Turn off access to resources we're about to free */
4938 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4939 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4940 
4941 	/* Free all allocated resources */
4942 	STAILQ_FOREACH(rle, rl, link) {
4943 		if (rle->res) {
4944 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4945 			    resource_list_busy(rl, rle->type, rle->rid)) {
4946 				pci_printf(&dinfo->cfg,
4947 				    "Resource still owned, oops. "
4948 				    "(type=%d, rid=%d, addr=%lx)\n",
4949 				    rle->type, rle->rid,
4950 				    rman_get_start(rle->res));
4951 				bus_release_resource(child, rle->type, rle->rid,
4952 				    rle->res);
4953 			}
4954 			resource_list_unreserve(rl, dev, child, rle->type,
4955 			    rle->rid);
4956 		}
4957 	}
4958 	resource_list_free(rl);
4959 
4960 	device_delete_child(dev, child);
4961 	pci_freecfg(dinfo);
4962 }
4963 
4964 void
4965 pci_delete_resource(device_t dev, device_t child, int type, int rid)
4966 {
4967 	struct pci_devinfo *dinfo;
4968 	struct resource_list *rl;
4969 	struct resource_list_entry *rle;
4970 
4971 	if (device_get_parent(child) != dev)
4972 		return;
4973 
4974 	dinfo = device_get_ivars(child);
4975 	rl = &dinfo->resources;
4976 	rle = resource_list_find(rl, type, rid);
4977 	if (rle == NULL)
4978 		return;
4979 
4980 	if (rle->res) {
4981 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4982 		    resource_list_busy(rl, type, rid)) {
4983 			device_printf(dev, "delete_resource: "
4984 			    "Resource still owned by child, oops. "
4985 			    "(type=%d, rid=%d, addr=%lx)\n",
4986 			    type, rid, rman_get_start(rle->res));
4987 			return;
4988 		}
4989 		resource_list_unreserve(rl, dev, child, type, rid);
4990 	}
4991 	resource_list_delete(rl, type, rid);
4992 }
4993 
4994 struct resource_list *
4995 pci_get_resource_list (device_t dev, device_t child)
4996 {
4997 	struct pci_devinfo *dinfo = device_get_ivars(child);
4998 
4999 	return (&dinfo->resources);
5000 }
5001 
5002 bus_dma_tag_t
5003 pci_get_dma_tag(device_t bus, device_t dev)
5004 {
5005 	struct pci_softc *sc = device_get_softc(bus);
5006 
5007 	return (sc->sc_dma_tag);
5008 }
5009 
5010 uint32_t
5011 pci_read_config_method(device_t dev, device_t child, int reg, int width)
5012 {
5013 	struct pci_devinfo *dinfo = device_get_ivars(child);
5014 	pcicfgregs *cfg = &dinfo->cfg;
5015 
5016 #ifdef PCI_IOV
5017 	/*
5018 	 * SR-IOV VFs don't implement the VID or DID registers, so we have to
5019 	 * emulate them here.
5020 	 */
5021 	if (cfg->flags & PCICFG_VF) {
5022 		if (reg == PCIR_VENDOR) {
5023 			switch (width) {
5024 			case 4:
5025 				return (cfg->device << 16 | cfg->vendor);
5026 			case 2:
5027 				return (cfg->vendor);
5028 			case 1:
5029 				return (cfg->vendor & 0xff);
5030 			default:
5031 				return (0xffffffff);
5032 			}
5033 		} else if (reg == PCIR_DEVICE) {
5034 			switch (width) {
5035 			/* Note that an unaligned 4-byte read is an error. */
5036 			case 2:
5037 				return (cfg->device);
5038 			case 1:
5039 				return (cfg->device & 0xff);
5040 			default:
5041 				return (0xffffffff);
5042 			}
5043 		}
5044 	}
5045 #endif
5046 
5047 	return (PCIB_READ_CONFIG(device_get_parent(dev),
5048 	    cfg->bus, cfg->slot, cfg->func, reg, width));
5049 }
5050 
5051 void
5052 pci_write_config_method(device_t dev, device_t child, int reg,
5053     uint32_t val, int width)
5054 {
5055 	struct pci_devinfo *dinfo = device_get_ivars(child);
5056 	pcicfgregs *cfg = &dinfo->cfg;
5057 
5058 	PCIB_WRITE_CONFIG(device_get_parent(dev),
5059 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
5060 }
5061 
5062 int
5063 pci_child_location_str_method(device_t dev, device_t child, char *buf,
5064     size_t buflen)
5065 {
5066 
5067 	snprintf(buf, buflen, "pci%d:%d:%d:%d", pci_get_domain(child),
5068 	    pci_get_bus(child), pci_get_slot(child), pci_get_function(child));
5069 	return (0);
5070 }
5071 
5072 int
5073 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
5074     size_t buflen)
5075 {
5076 	struct pci_devinfo *dinfo;
5077 	pcicfgregs *cfg;
5078 
5079 	dinfo = device_get_ivars(child);
5080 	cfg = &dinfo->cfg;
5081 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
5082 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
5083 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
5084 	    cfg->progif);
5085 	return (0);
5086 }
5087 
5088 int
5089 pci_assign_interrupt_method(device_t dev, device_t child)
5090 {
5091 	struct pci_devinfo *dinfo = device_get_ivars(child);
5092 	pcicfgregs *cfg = &dinfo->cfg;
5093 
5094 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
5095 	    cfg->intpin));
5096 }
5097 
5098 static void
5099 pci_lookup(void *arg, const char *name, device_t *dev)
5100 {
5101 	long val;
5102 	char *end;
5103 	int domain, bus, slot, func;
5104 
5105 	if (*dev != NULL)
5106 		return;
5107 
5108 	/*
5109 	 * Accept pciconf-style selectors of either pciD:B:S:F or
5110 	 * pciB:S:F.  In the latter case, the domain is assumed to
5111 	 * be zero.
5112 	 */
5113 	if (strncmp(name, "pci", 3) != 0)
5114 		return;
5115 	val = strtol(name + 3, &end, 10);
5116 	if (val < 0 || val > INT_MAX || *end != ':')
5117 		return;
5118 	domain = val;
5119 	val = strtol(end + 1, &end, 10);
5120 	if (val < 0 || val > INT_MAX || *end != ':')
5121 		return;
5122 	bus = val;
5123 	val = strtol(end + 1, &end, 10);
5124 	if (val < 0 || val > INT_MAX)
5125 		return;
5126 	slot = val;
5127 	if (*end == ':') {
5128 		val = strtol(end + 1, &end, 10);
5129 		if (val < 0 || val > INT_MAX || *end != '\0')
5130 			return;
5131 		func = val;
5132 	} else if (*end == '\0') {
5133 		func = slot;
5134 		slot = bus;
5135 		bus = domain;
5136 		domain = 0;
5137 	} else
5138 		return;
5139 
5140 	if (domain > PCI_DOMAINMAX || bus > PCI_BUSMAX || slot > PCI_SLOTMAX ||
5141 	    func > PCIE_ARI_FUNCMAX || (slot != 0 && func > PCI_FUNCMAX))
5142 		return;
5143 
5144 	*dev = pci_find_dbsf(domain, bus, slot, func);
5145 }
5146 
5147 static int
5148 pci_modevent(module_t mod, int what, void *arg)
5149 {
5150 	static struct cdev *pci_cdev;
5151 	static eventhandler_tag tag;
5152 
5153 	switch (what) {
5154 	case MOD_LOAD:
5155 		STAILQ_INIT(&pci_devq);
5156 		pci_generation = 0;
5157 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
5158 		    "pci");
5159 		pci_load_vendor_data();
5160 		tag = EVENTHANDLER_REGISTER(dev_lookup, pci_lookup, NULL,
5161 		    1000);
5162 		break;
5163 
5164 	case MOD_UNLOAD:
5165 		if (tag != NULL)
5166 			EVENTHANDLER_DEREGISTER(dev_lookup, tag);
5167 		destroy_dev(pci_cdev);
5168 		break;
5169 	}
5170 
5171 	return (0);
5172 }
5173 
5174 static void
5175 pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
5176 {
5177 #define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
5178 	struct pcicfg_pcie *cfg;
5179 	int version, pos;
5180 
5181 	cfg = &dinfo->cfg.pcie;
5182 	pos = cfg->pcie_location;
5183 
5184 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5185 
5186 	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
5187 
5188 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5189 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5190 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5191 		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
5192 
5193 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5194 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5195 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5196 		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
5197 
5198 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5199 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5200 		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
5201 
5202 	if (version > 1) {
5203 		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
5204 		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
5205 		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
5206 	}
5207 #undef WREG
5208 }
5209 
5210 static void
5211 pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
5212 {
5213 	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
5214 	    dinfo->cfg.pcix.pcix_command,  2);
5215 }
5216 
5217 void
5218 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
5219 {
5220 
5221 	/*
5222 	 * Restore the device to full power mode.  We must do this
5223 	 * before we restore the registers because moving from D3 to
5224 	 * D0 will cause the chip's BARs and some other registers to
5225 	 * be reset to some unknown power on reset values.  Cut down
5226 	 * the noise on boot by doing nothing if we are already in
5227 	 * state D0.
5228 	 */
5229 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
5230 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5231 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
5232 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
5233 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
5234 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
5235 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
5236 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
5237 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
5238 	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5239 	case PCIM_HDRTYPE_NORMAL:
5240 		pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
5241 		pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
5242 		break;
5243 	case PCIM_HDRTYPE_BRIDGE:
5244 		pci_write_config(dev, PCIR_SECLAT_1,
5245 		    dinfo->cfg.bridge.br_seclat, 1);
5246 		pci_write_config(dev, PCIR_SUBBUS_1,
5247 		    dinfo->cfg.bridge.br_subbus, 1);
5248 		pci_write_config(dev, PCIR_SECBUS_1,
5249 		    dinfo->cfg.bridge.br_secbus, 1);
5250 		pci_write_config(dev, PCIR_PRIBUS_1,
5251 		    dinfo->cfg.bridge.br_pribus, 1);
5252 		pci_write_config(dev, PCIR_BRIDGECTL_1,
5253 		    dinfo->cfg.bridge.br_control, 2);
5254 		break;
5255 	case PCIM_HDRTYPE_CARDBUS:
5256 		pci_write_config(dev, PCIR_SECLAT_2,
5257 		    dinfo->cfg.bridge.br_seclat, 1);
5258 		pci_write_config(dev, PCIR_SUBBUS_2,
5259 		    dinfo->cfg.bridge.br_subbus, 1);
5260 		pci_write_config(dev, PCIR_SECBUS_2,
5261 		    dinfo->cfg.bridge.br_secbus, 1);
5262 		pci_write_config(dev, PCIR_PRIBUS_2,
5263 		    dinfo->cfg.bridge.br_pribus, 1);
5264 		pci_write_config(dev, PCIR_BRIDGECTL_2,
5265 		    dinfo->cfg.bridge.br_control, 2);
5266 		break;
5267 	}
5268 	pci_restore_bars(dev);
5269 
5270 	/*
5271 	 * Restore extended capabilities for PCI-Express and PCI-X
5272 	 */
5273 	if (dinfo->cfg.pcie.pcie_location != 0)
5274 		pci_cfg_restore_pcie(dev, dinfo);
5275 	if (dinfo->cfg.pcix.pcix_location != 0)
5276 		pci_cfg_restore_pcix(dev, dinfo);
5277 
5278 	/* Restore MSI and MSI-X configurations if they are present. */
5279 	if (dinfo->cfg.msi.msi_location != 0)
5280 		pci_resume_msi(dev);
5281 	if (dinfo->cfg.msix.msix_location != 0)
5282 		pci_resume_msix(dev);
5283 }
5284 
5285 static void
5286 pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
5287 {
5288 #define	RREG(n)	pci_read_config(dev, pos + (n), 2)
5289 	struct pcicfg_pcie *cfg;
5290 	int version, pos;
5291 
5292 	cfg = &dinfo->cfg.pcie;
5293 	pos = cfg->pcie_location;
5294 
5295 	cfg->pcie_flags = RREG(PCIER_FLAGS);
5296 
5297 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5298 
5299 	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
5300 
5301 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5302 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5303 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5304 		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
5305 
5306 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5307 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5308 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5309 		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
5310 
5311 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5312 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5313 		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
5314 
5315 	if (version > 1) {
5316 		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
5317 		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
5318 		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
5319 	}
5320 #undef RREG
5321 }
5322 
5323 static void
5324 pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
5325 {
5326 	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
5327 	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
5328 }
5329 
5330 void
5331 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
5332 {
5333 	uint32_t cls;
5334 	int ps;
5335 
5336 	/*
5337 	 * Some drivers apparently write to these registers w/o updating our
5338 	 * cached copy.  No harm happens if we update the copy, so do so here
5339 	 * so we can restore them.  The COMMAND register is modified by the
5340 	 * bus w/o updating the cache.  This should represent the normally
5341 	 * writable portion of the 'defined' part of type 0/1/2 headers.
5342 	 */
5343 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
5344 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
5345 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
5346 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
5347 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
5348 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
5349 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
5350 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
5351 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
5352 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
5353 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
5354 	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5355 	case PCIM_HDRTYPE_NORMAL:
5356 		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
5357 		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
5358 		dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
5359 		dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
5360 		break;
5361 	case PCIM_HDRTYPE_BRIDGE:
5362 		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
5363 		    PCIR_SECLAT_1, 1);
5364 		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
5365 		    PCIR_SUBBUS_1, 1);
5366 		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
5367 		    PCIR_SECBUS_1, 1);
5368 		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
5369 		    PCIR_PRIBUS_1, 1);
5370 		dinfo->cfg.bridge.br_control = pci_read_config(dev,
5371 		    PCIR_BRIDGECTL_1, 2);
5372 		break;
5373 	case PCIM_HDRTYPE_CARDBUS:
5374 		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
5375 		    PCIR_SECLAT_2, 1);
5376 		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
5377 		    PCIR_SUBBUS_2, 1);
5378 		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
5379 		    PCIR_SECBUS_2, 1);
5380 		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
5381 		    PCIR_PRIBUS_2, 1);
5382 		dinfo->cfg.bridge.br_control = pci_read_config(dev,
5383 		    PCIR_BRIDGECTL_2, 2);
5384 		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_2, 2);
5385 		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_2, 2);
5386 		break;
5387 	}
5388 
5389 	if (dinfo->cfg.pcie.pcie_location != 0)
5390 		pci_cfg_save_pcie(dev, dinfo);
5391 
5392 	if (dinfo->cfg.pcix.pcix_location != 0)
5393 		pci_cfg_save_pcix(dev, dinfo);
5394 
5395 	/*
5396 	 * don't set the state for display devices, base peripherals and
5397 	 * memory devices since bad things happen when they are powered down.
5398 	 * We should (a) have drivers that can easily detach and (b) use
5399 	 * generic drivers for these devices so that some device actually
5400 	 * attaches.  We need to make sure that when we implement (a) we don't
5401 	 * power the device down on a reattach.
5402 	 */
5403 	cls = pci_get_class(dev);
5404 	if (!setstate)
5405 		return;
5406 	switch (pci_do_power_nodriver)
5407 	{
5408 		case 0:		/* NO powerdown at all */
5409 			return;
5410 		case 1:		/* Conservative about what to power down */
5411 			if (cls == PCIC_STORAGE)
5412 				return;
5413 			/*FALLTHROUGH*/
5414 		case 2:		/* Agressive about what to power down */
5415 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
5416 			    cls == PCIC_BASEPERIPH)
5417 				return;
5418 			/*FALLTHROUGH*/
5419 		case 3:		/* Power down everything */
5420 			break;
5421 	}
5422 	/*
5423 	 * PCI spec says we can only go into D3 state from D0 state.
5424 	 * Transition from D[12] into D0 before going to D3 state.
5425 	 */
5426 	ps = pci_get_powerstate(dev);
5427 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
5428 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5429 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
5430 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
5431 }
5432 
5433 /* Wrapper APIs suitable for device driver use. */
5434 void
5435 pci_save_state(device_t dev)
5436 {
5437 	struct pci_devinfo *dinfo;
5438 
5439 	dinfo = device_get_ivars(dev);
5440 	pci_cfg_save(dev, dinfo, 0);
5441 }
5442 
5443 void
5444 pci_restore_state(device_t dev)
5445 {
5446 	struct pci_devinfo *dinfo;
5447 
5448 	dinfo = device_get_ivars(dev);
5449 	pci_cfg_restore(dev, dinfo);
5450 }
5451 
5452 static uint16_t
5453 pci_get_rid_method(device_t dev, device_t child)
5454 {
5455 
5456 	return (PCIB_GET_RID(device_get_parent(dev), child));
5457 }
5458 
5459 /* Find the upstream port of a given PCI device in a root complex. */
5460 device_t
5461 pci_find_pcie_root_port(device_t dev)
5462 {
5463 	struct pci_devinfo *dinfo;
5464 	devclass_t pci_class;
5465 	device_t pcib, bus;
5466 
5467 	pci_class = devclass_find("pci");
5468 	KASSERT(device_get_devclass(device_get_parent(dev)) == pci_class,
5469 	    ("%s: non-pci device %s", __func__, device_get_nameunit(dev)));
5470 
5471 	/*
5472 	 * Walk the bridge hierarchy until we find a PCI-e root
5473 	 * port or a non-PCI device.
5474 	 */
5475 	for (;;) {
5476 		bus = device_get_parent(dev);
5477 		KASSERT(bus != NULL, ("%s: null parent of %s", __func__,
5478 		    device_get_nameunit(dev)));
5479 
5480 		pcib = device_get_parent(bus);
5481 		KASSERT(pcib != NULL, ("%s: null bridge of %s", __func__,
5482 		    device_get_nameunit(bus)));
5483 
5484 		/*
5485 		 * pcib's parent must be a PCI bus for this to be a
5486 		 * PCI-PCI bridge.
5487 		 */
5488 		if (device_get_devclass(device_get_parent(pcib)) != pci_class)
5489 			return (NULL);
5490 
5491 		dinfo = device_get_ivars(pcib);
5492 		if (dinfo->cfg.pcie.pcie_location != 0 &&
5493 		    dinfo->cfg.pcie.pcie_type == PCIEM_TYPE_ROOT_PORT)
5494 			return (pcib);
5495 
5496 		dev = pcib;
5497 	}
5498 }
5499