xref: /freebsd/sys/dev/pci/pci.c (revision 1a61beb0549e05b33df31380e427d90f6e46ff7e)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 
56 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57 #include <machine/intr_machdep.h>
58 #endif
59 
60 #include <sys/pciio.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/pci/pci_private.h>
64 
65 #include <dev/usb/controller/xhcireg.h>
66 #include <dev/usb/controller/ehcireg.h>
67 #include <dev/usb/controller/ohcireg.h>
68 #include <dev/usb/controller/uhcireg.h>
69 
70 #include "pcib_if.h"
71 #include "pci_if.h"
72 
73 #define	PCIR_IS_BIOS(cfg, reg)						\
74 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
75 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
76 
77 static int		pci_has_quirk(uint32_t devid, int quirk);
78 static pci_addr_t	pci_mapbase(uint64_t mapreg);
79 static const char	*pci_maptype(uint64_t mapreg);
80 static int		pci_mapsize(uint64_t testval);
81 static int		pci_maprange(uint64_t mapreg);
82 static pci_addr_t	pci_rombase(uint64_t mapreg);
83 static int		pci_romsize(uint64_t testval);
84 static void		pci_fixancient(pcicfgregs *cfg);
85 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
86 
87 static int		pci_porten(device_t dev);
88 static int		pci_memen(device_t dev);
89 static void		pci_assign_interrupt(device_t bus, device_t dev,
90 			    int force_route);
91 static int		pci_add_map(device_t bus, device_t dev, int reg,
92 			    struct resource_list *rl, int force, int prefetch);
93 static int		pci_probe(device_t dev);
94 static int		pci_attach(device_t dev);
95 #ifdef PCI_RES_BUS
96 static int		pci_detach(device_t dev);
97 #endif
98 static void		pci_load_vendor_data(void);
99 static int		pci_describe_parse_line(char **ptr, int *vendor,
100 			    int *device, char **desc);
101 static char		*pci_describe_device(device_t dev);
102 static int		pci_modevent(module_t mod, int what, void *arg);
103 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
104 			    pcicfgregs *cfg);
105 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
106 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
107 			    int reg, uint32_t *data);
108 #if 0
109 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
110 			    int reg, uint32_t data);
111 #endif
112 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
113 static void		pci_disable_msi(device_t dev);
114 static void		pci_enable_msi(device_t dev, uint64_t address,
115 			    uint16_t data);
116 static void		pci_enable_msix(device_t dev, u_int index,
117 			    uint64_t address, uint32_t data);
118 static void		pci_mask_msix(device_t dev, u_int index);
119 static void		pci_unmask_msix(device_t dev, u_int index);
120 static int		pci_msi_blacklisted(void);
121 static int		pci_msix_blacklisted(void);
122 static void		pci_resume_msi(device_t dev);
123 static void		pci_resume_msix(device_t dev);
124 static int		pci_remap_intr_method(device_t bus, device_t dev,
125 			    u_int irq);
126 
127 static device_method_t pci_methods[] = {
128 	/* Device interface */
129 	DEVMETHOD(device_probe,		pci_probe),
130 	DEVMETHOD(device_attach,	pci_attach),
131 #ifdef PCI_RES_BUS
132 	DEVMETHOD(device_detach,	pci_detach),
133 #else
134 	DEVMETHOD(device_detach,	bus_generic_detach),
135 #endif
136 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
137 	DEVMETHOD(device_suspend,	pci_suspend),
138 	DEVMETHOD(device_resume,	pci_resume),
139 
140 	/* Bus interface */
141 	DEVMETHOD(bus_print_child,	pci_print_child),
142 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
143 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
144 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
145 	DEVMETHOD(bus_driver_added,	pci_driver_added),
146 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
147 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
148 
149 	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
150 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
151 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
152 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
153 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
154 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
155 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
156 	DEVMETHOD(bus_release_resource,	pci_release_resource),
157 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
158 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
159 	DEVMETHOD(bus_child_detached,	pci_child_detached),
160 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
161 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
162 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
163 
164 	/* PCI interface */
165 	DEVMETHOD(pci_read_config,	pci_read_config_method),
166 	DEVMETHOD(pci_write_config,	pci_write_config_method),
167 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
168 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
169 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
170 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
171 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
172 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
173 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
174 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
175 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
176 	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
177 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
178 	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
179 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
180 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
181 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
182 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
183 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
184 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
185 
186 	DEVMETHOD_END
187 };
188 
189 DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
190 
191 static devclass_t pci_devclass;
192 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
193 MODULE_VERSION(pci, 1);
194 
195 static char	*pci_vendordata;
196 static size_t	pci_vendordata_size;
197 
198 struct pci_quirk {
199 	uint32_t devid;	/* Vendor/device of the card */
200 	int	type;
201 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
202 #define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
203 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
204 #define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
205 #define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
206 	int	arg1;
207 	int	arg2;
208 };
209 
210 static const struct pci_quirk pci_quirks[] = {
211 	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
212 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
213 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
214 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
215 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
216 
217 	/*
218 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
219 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
220 	 */
221 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
222 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
223 
224 	/*
225 	 * MSI doesn't work on earlier Intel chipsets including
226 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
227 	 */
228 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
229 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
230 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
231 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
232 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
233 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
234 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
235 
236 	/*
237 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
238 	 * bridge.
239 	 */
240 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
241 
242 	/*
243 	 * MSI-X allocation doesn't work properly for devices passed through
244 	 * by VMware up to at least ESXi 5.1.
245 	 */
246 	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
247 	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
248 
249 	/*
250 	 * Some virtualization environments emulate an older chipset
251 	 * but support MSI just fine.  QEMU uses the Intel 82440.
252 	 */
253 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
254 
255 	/*
256 	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
257 	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
258 	 * It prevents us from attaching hpet(4) when the bit is unset.
259 	 * Note this quirk only affects SB600 revision A13 and earlier.
260 	 * For SB600 A21 and later, firmware must set the bit to hide it.
261 	 * For SB700 and later, it is unused and hardcoded to zero.
262 	 */
263 	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
264 
265 	{ 0 }
266 };
267 
268 /* map register information */
269 #define	PCI_MAPMEM	0x01	/* memory map */
270 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
271 #define	PCI_MAPPORT	0x04	/* port map */
272 
273 struct devlist pci_devq;
274 uint32_t pci_generation;
275 uint32_t pci_numdevs = 0;
276 static int pcie_chipset, pcix_chipset;
277 
278 /* sysctl vars */
279 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
280 
281 static int pci_enable_io_modes = 1;
282 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
283 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
284     &pci_enable_io_modes, 1,
285     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
286 enable these bits correctly.  We'd like to do this all the time, but there\n\
287 are some peripherals that this causes problems with.");
288 
289 static int pci_do_realloc_bars = 0;
290 TUNABLE_INT("hw.pci.realloc_bars", &pci_do_realloc_bars);
291 SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RW,
292     &pci_do_realloc_bars, 0,
293     "Attempt to allocate a new range for any BARs whose original firmware-assigned ranges fail to allocate during the initial device scan.");
294 
295 static int pci_do_power_nodriver = 0;
296 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
297 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
298     &pci_do_power_nodriver, 0,
299   "Place a function into D3 state when no driver attaches to it.  0 means\n\
300 disable.  1 means conservatively place devices into D3 state.  2 means\n\
301 agressively place devices into D3 state.  3 means put absolutely everything\n\
302 in D3 state.");
303 
304 int pci_do_power_resume = 1;
305 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
306 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
307     &pci_do_power_resume, 1,
308   "Transition from D3 -> D0 on resume.");
309 
310 int pci_do_power_suspend = 1;
311 TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
312 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
313     &pci_do_power_suspend, 1,
314   "Transition from D0 -> D3 on suspend.");
315 
316 static int pci_do_msi = 1;
317 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
318 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
319     "Enable support for MSI interrupts");
320 
321 static int pci_do_msix = 1;
322 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
323 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
324     "Enable support for MSI-X interrupts");
325 
326 static int pci_honor_msi_blacklist = 1;
327 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
328 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
329     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
330 
331 #if defined(__i386__) || defined(__amd64__)
332 static int pci_usb_takeover = 1;
333 #else
334 static int pci_usb_takeover = 0;
335 #endif
336 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
337 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
338     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
339 Disable this if you depend on BIOS emulation of USB devices, that is\n\
340 you use USB devices (like keyboard or mouse) but do not load USB drivers");
341 
342 static int pci_clear_bars;
343 TUNABLE_INT("hw.pci.clear_bars", &pci_clear_bars);
344 SYSCTL_INT(_hw_pci, OID_AUTO, clear_bars, CTLFLAG_RDTUN, &pci_clear_bars, 0,
345     "Ignore firmware-assigned resources for BARs.");
346 
347 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
348 static int pci_clear_buses;
349 TUNABLE_INT("hw.pci.clear_buses", &pci_clear_buses);
350 SYSCTL_INT(_hw_pci, OID_AUTO, clear_buses, CTLFLAG_RDTUN, &pci_clear_buses, 0,
351     "Ignore firmware-assigned bus numbers.");
352 #endif
353 
354 static int
355 pci_has_quirk(uint32_t devid, int quirk)
356 {
357 	const struct pci_quirk *q;
358 
359 	for (q = &pci_quirks[0]; q->devid; q++) {
360 		if (q->devid == devid && q->type == quirk)
361 			return (1);
362 	}
363 	return (0);
364 }
365 
366 /* Find a device_t by bus/slot/function in domain 0 */
367 
368 device_t
369 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
370 {
371 
372 	return (pci_find_dbsf(0, bus, slot, func));
373 }
374 
375 /* Find a device_t by domain/bus/slot/function */
376 
377 device_t
378 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
379 {
380 	struct pci_devinfo *dinfo;
381 
382 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
383 		if ((dinfo->cfg.domain == domain) &&
384 		    (dinfo->cfg.bus == bus) &&
385 		    (dinfo->cfg.slot == slot) &&
386 		    (dinfo->cfg.func == func)) {
387 			return (dinfo->cfg.dev);
388 		}
389 	}
390 
391 	return (NULL);
392 }
393 
394 /* Find a device_t by vendor/device ID */
395 
396 device_t
397 pci_find_device(uint16_t vendor, uint16_t device)
398 {
399 	struct pci_devinfo *dinfo;
400 
401 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
402 		if ((dinfo->cfg.vendor == vendor) &&
403 		    (dinfo->cfg.device == device)) {
404 			return (dinfo->cfg.dev);
405 		}
406 	}
407 
408 	return (NULL);
409 }
410 
411 device_t
412 pci_find_class(uint8_t class, uint8_t subclass)
413 {
414 	struct pci_devinfo *dinfo;
415 
416 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
417 		if (dinfo->cfg.baseclass == class &&
418 		    dinfo->cfg.subclass == subclass) {
419 			return (dinfo->cfg.dev);
420 		}
421 	}
422 
423 	return (NULL);
424 }
425 
426 static int
427 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
428 {
429 	va_list ap;
430 	int retval;
431 
432 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
433 	    cfg->func);
434 	va_start(ap, fmt);
435 	retval += vprintf(fmt, ap);
436 	va_end(ap);
437 	return (retval);
438 }
439 
440 /* return base address of memory or port map */
441 
442 static pci_addr_t
443 pci_mapbase(uint64_t mapreg)
444 {
445 
446 	if (PCI_BAR_MEM(mapreg))
447 		return (mapreg & PCIM_BAR_MEM_BASE);
448 	else
449 		return (mapreg & PCIM_BAR_IO_BASE);
450 }
451 
452 /* return map type of memory or port map */
453 
454 static const char *
455 pci_maptype(uint64_t mapreg)
456 {
457 
458 	if (PCI_BAR_IO(mapreg))
459 		return ("I/O Port");
460 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
461 		return ("Prefetchable Memory");
462 	return ("Memory");
463 }
464 
465 /* return log2 of map size decoded for memory or port map */
466 
467 static int
468 pci_mapsize(uint64_t testval)
469 {
470 	int ln2size;
471 
472 	testval = pci_mapbase(testval);
473 	ln2size = 0;
474 	if (testval != 0) {
475 		while ((testval & 1) == 0)
476 		{
477 			ln2size++;
478 			testval >>= 1;
479 		}
480 	}
481 	return (ln2size);
482 }
483 
484 /* return base address of device ROM */
485 
486 static pci_addr_t
487 pci_rombase(uint64_t mapreg)
488 {
489 
490 	return (mapreg & PCIM_BIOS_ADDR_MASK);
491 }
492 
493 /* return log2 of map size decided for device ROM */
494 
495 static int
496 pci_romsize(uint64_t testval)
497 {
498 	int ln2size;
499 
500 	testval = pci_rombase(testval);
501 	ln2size = 0;
502 	if (testval != 0) {
503 		while ((testval & 1) == 0)
504 		{
505 			ln2size++;
506 			testval >>= 1;
507 		}
508 	}
509 	return (ln2size);
510 }
511 
512 /* return log2 of address range supported by map register */
513 
514 static int
515 pci_maprange(uint64_t mapreg)
516 {
517 	int ln2range = 0;
518 
519 	if (PCI_BAR_IO(mapreg))
520 		ln2range = 32;
521 	else
522 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
523 		case PCIM_BAR_MEM_32:
524 			ln2range = 32;
525 			break;
526 		case PCIM_BAR_MEM_1MB:
527 			ln2range = 20;
528 			break;
529 		case PCIM_BAR_MEM_64:
530 			ln2range = 64;
531 			break;
532 		}
533 	return (ln2range);
534 }
535 
536 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
537 
538 static void
539 pci_fixancient(pcicfgregs *cfg)
540 {
541 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
542 		return;
543 
544 	/* PCI to PCI bridges use header type 1 */
545 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
546 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
547 }
548 
549 /* extract header type specific config data */
550 
551 static void
552 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
553 {
554 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
555 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
556 	case PCIM_HDRTYPE_NORMAL:
557 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
558 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
559 		cfg->nummaps	    = PCI_MAXMAPS_0;
560 		break;
561 	case PCIM_HDRTYPE_BRIDGE:
562 		cfg->nummaps	    = PCI_MAXMAPS_1;
563 		break;
564 	case PCIM_HDRTYPE_CARDBUS:
565 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
566 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
567 		cfg->nummaps	    = PCI_MAXMAPS_2;
568 		break;
569 	}
570 #undef REG
571 }
572 
573 /* read configuration header into pcicfgregs structure */
574 struct pci_devinfo *
575 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
576 {
577 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
578 	pcicfgregs *cfg = NULL;
579 	struct pci_devinfo *devlist_entry;
580 	struct devlist *devlist_head;
581 
582 	devlist_head = &pci_devq;
583 
584 	devlist_entry = NULL;
585 
586 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
587 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
588 		if (devlist_entry == NULL)
589 			return (NULL);
590 
591 		cfg = &devlist_entry->cfg;
592 
593 		cfg->domain		= d;
594 		cfg->bus		= b;
595 		cfg->slot		= s;
596 		cfg->func		= f;
597 		cfg->vendor		= REG(PCIR_VENDOR, 2);
598 		cfg->device		= REG(PCIR_DEVICE, 2);
599 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
600 		cfg->statreg		= REG(PCIR_STATUS, 2);
601 		cfg->baseclass		= REG(PCIR_CLASS, 1);
602 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
603 		cfg->progif		= REG(PCIR_PROGIF, 1);
604 		cfg->revid		= REG(PCIR_REVID, 1);
605 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
606 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
607 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
608 		cfg->intpin		= REG(PCIR_INTPIN, 1);
609 		cfg->intline		= REG(PCIR_INTLINE, 1);
610 
611 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
612 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
613 
614 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
615 		cfg->hdrtype		&= ~PCIM_MFDEV;
616 		STAILQ_INIT(&cfg->maps);
617 
618 		pci_fixancient(cfg);
619 		pci_hdrtypedata(pcib, b, s, f, cfg);
620 
621 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
622 			pci_read_cap(pcib, cfg);
623 
624 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
625 
626 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
627 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
628 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
629 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
630 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
631 
632 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
633 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
634 		devlist_entry->conf.pc_vendor = cfg->vendor;
635 		devlist_entry->conf.pc_device = cfg->device;
636 
637 		devlist_entry->conf.pc_class = cfg->baseclass;
638 		devlist_entry->conf.pc_subclass = cfg->subclass;
639 		devlist_entry->conf.pc_progif = cfg->progif;
640 		devlist_entry->conf.pc_revid = cfg->revid;
641 
642 		pci_numdevs++;
643 		pci_generation++;
644 	}
645 	return (devlist_entry);
646 #undef REG
647 }
648 
649 static void
650 pci_read_cap(device_t pcib, pcicfgregs *cfg)
651 {
652 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
653 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
654 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
655 	uint64_t addr;
656 #endif
657 	uint32_t val;
658 	int	ptr, nextptr, ptrptr;
659 
660 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
661 	case PCIM_HDRTYPE_NORMAL:
662 	case PCIM_HDRTYPE_BRIDGE:
663 		ptrptr = PCIR_CAP_PTR;
664 		break;
665 	case PCIM_HDRTYPE_CARDBUS:
666 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
667 		break;
668 	default:
669 		return;		/* no extended capabilities support */
670 	}
671 	nextptr = REG(ptrptr, 1);	/* sanity check? */
672 
673 	/*
674 	 * Read capability entries.
675 	 */
676 	while (nextptr != 0) {
677 		/* Sanity check */
678 		if (nextptr > 255) {
679 			printf("illegal PCI extended capability offset %d\n",
680 			    nextptr);
681 			return;
682 		}
683 		/* Find the next entry */
684 		ptr = nextptr;
685 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
686 
687 		/* Process this entry */
688 		switch (REG(ptr + PCICAP_ID, 1)) {
689 		case PCIY_PMG:		/* PCI power management */
690 			if (cfg->pp.pp_cap == 0) {
691 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
692 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
693 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
694 				if ((nextptr - ptr) > PCIR_POWER_DATA)
695 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
696 			}
697 			break;
698 		case PCIY_HT:		/* HyperTransport */
699 			/* Determine HT-specific capability type. */
700 			val = REG(ptr + PCIR_HT_COMMAND, 2);
701 
702 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
703 				cfg->ht.ht_slave = ptr;
704 
705 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
706 			switch (val & PCIM_HTCMD_CAP_MASK) {
707 			case PCIM_HTCAP_MSI_MAPPING:
708 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
709 					/* Sanity check the mapping window. */
710 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
711 					    4);
712 					addr <<= 32;
713 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
714 					    4);
715 					if (addr != MSI_INTEL_ADDR_BASE)
716 						device_printf(pcib,
717 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
718 						    cfg->domain, cfg->bus,
719 						    cfg->slot, cfg->func,
720 						    (long long)addr);
721 				} else
722 					addr = MSI_INTEL_ADDR_BASE;
723 
724 				cfg->ht.ht_msimap = ptr;
725 				cfg->ht.ht_msictrl = val;
726 				cfg->ht.ht_msiaddr = addr;
727 				break;
728 			}
729 #endif
730 			break;
731 		case PCIY_MSI:		/* PCI MSI */
732 			cfg->msi.msi_location = ptr;
733 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
734 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
735 						     PCIM_MSICTRL_MMC_MASK)>>1);
736 			break;
737 		case PCIY_MSIX:		/* PCI MSI-X */
738 			cfg->msix.msix_location = ptr;
739 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
740 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
741 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
742 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
743 			cfg->msix.msix_table_bar = PCIR_BAR(val &
744 			    PCIM_MSIX_BIR_MASK);
745 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
746 			val = REG(ptr + PCIR_MSIX_PBA, 4);
747 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
748 			    PCIM_MSIX_BIR_MASK);
749 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
750 			break;
751 		case PCIY_VPD:		/* PCI Vital Product Data */
752 			cfg->vpd.vpd_reg = ptr;
753 			break;
754 		case PCIY_SUBVENDOR:
755 			/* Should always be true. */
756 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
757 			    PCIM_HDRTYPE_BRIDGE) {
758 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
759 				cfg->subvendor = val & 0xffff;
760 				cfg->subdevice = val >> 16;
761 			}
762 			break;
763 		case PCIY_PCIX:		/* PCI-X */
764 			/*
765 			 * Assume we have a PCI-X chipset if we have
766 			 * at least one PCI-PCI bridge with a PCI-X
767 			 * capability.  Note that some systems with
768 			 * PCI-express or HT chipsets might match on
769 			 * this check as well.
770 			 */
771 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
772 			    PCIM_HDRTYPE_BRIDGE)
773 				pcix_chipset = 1;
774 			cfg->pcix.pcix_location = ptr;
775 			break;
776 		case PCIY_EXPRESS:	/* PCI-express */
777 			/*
778 			 * Assume we have a PCI-express chipset if we have
779 			 * at least one PCI-express device.
780 			 */
781 			pcie_chipset = 1;
782 			cfg->pcie.pcie_location = ptr;
783 			val = REG(ptr + PCIER_FLAGS, 2);
784 			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
785 			break;
786 		default:
787 			break;
788 		}
789 	}
790 
791 #if defined(__powerpc__)
792 	/*
793 	 * Enable the MSI mapping window for all HyperTransport
794 	 * slaves.  PCI-PCI bridges have their windows enabled via
795 	 * PCIB_MAP_MSI().
796 	 */
797 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
798 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
799 		device_printf(pcib,
800 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
801 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
802 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
803 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
804 		     2);
805 	}
806 #endif
807 /* REG and WREG use carry through to next functions */
808 }
809 
810 /*
811  * PCI Vital Product Data
812  */
813 
814 #define	PCI_VPD_TIMEOUT		1000000
815 
816 static int
817 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
818 {
819 	int count = PCI_VPD_TIMEOUT;
820 
821 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
822 
823 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
824 
825 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
826 		if (--count < 0)
827 			return (ENXIO);
828 		DELAY(1);	/* limit looping */
829 	}
830 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
831 
832 	return (0);
833 }
834 
835 #if 0
836 static int
837 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
838 {
839 	int count = PCI_VPD_TIMEOUT;
840 
841 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
842 
843 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
844 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
845 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
846 		if (--count < 0)
847 			return (ENXIO);
848 		DELAY(1);	/* limit looping */
849 	}
850 
851 	return (0);
852 }
853 #endif
854 
855 #undef PCI_VPD_TIMEOUT
856 
857 struct vpd_readstate {
858 	device_t	pcib;
859 	pcicfgregs	*cfg;
860 	uint32_t	val;
861 	int		bytesinval;
862 	int		off;
863 	uint8_t		cksum;
864 };
865 
866 static int
867 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
868 {
869 	uint32_t reg;
870 	uint8_t byte;
871 
872 	if (vrs->bytesinval == 0) {
873 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
874 			return (ENXIO);
875 		vrs->val = le32toh(reg);
876 		vrs->off += 4;
877 		byte = vrs->val & 0xff;
878 		vrs->bytesinval = 3;
879 	} else {
880 		vrs->val = vrs->val >> 8;
881 		byte = vrs->val & 0xff;
882 		vrs->bytesinval--;
883 	}
884 
885 	vrs->cksum += byte;
886 	*data = byte;
887 	return (0);
888 }
889 
890 static void
891 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
892 {
893 	struct vpd_readstate vrs;
894 	int state;
895 	int name;
896 	int remain;
897 	int i;
898 	int alloc, off;		/* alloc/off for RO/W arrays */
899 	int cksumvalid;
900 	int dflen;
901 	uint8_t byte;
902 	uint8_t byte2;
903 
904 	/* init vpd reader */
905 	vrs.bytesinval = 0;
906 	vrs.off = 0;
907 	vrs.pcib = pcib;
908 	vrs.cfg = cfg;
909 	vrs.cksum = 0;
910 
911 	state = 0;
912 	name = remain = i = 0;	/* shut up stupid gcc */
913 	alloc = off = 0;	/* shut up stupid gcc */
914 	dflen = 0;		/* shut up stupid gcc */
915 	cksumvalid = -1;
916 	while (state >= 0) {
917 		if (vpd_nextbyte(&vrs, &byte)) {
918 			state = -2;
919 			break;
920 		}
921 #if 0
922 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
923 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
924 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
925 #endif
926 		switch (state) {
927 		case 0:		/* item name */
928 			if (byte & 0x80) {
929 				if (vpd_nextbyte(&vrs, &byte2)) {
930 					state = -2;
931 					break;
932 				}
933 				remain = byte2;
934 				if (vpd_nextbyte(&vrs, &byte2)) {
935 					state = -2;
936 					break;
937 				}
938 				remain |= byte2 << 8;
939 				if (remain > (0x7f*4 - vrs.off)) {
940 					state = -1;
941 					pci_printf(cfg,
942 					    "invalid VPD data, remain %#x\n",
943 					    remain);
944 				}
945 				name = byte & 0x7f;
946 			} else {
947 				remain = byte & 0x7;
948 				name = (byte >> 3) & 0xf;
949 			}
950 			switch (name) {
951 			case 0x2:	/* String */
952 				cfg->vpd.vpd_ident = malloc(remain + 1,
953 				    M_DEVBUF, M_WAITOK);
954 				i = 0;
955 				state = 1;
956 				break;
957 			case 0xf:	/* End */
958 				state = -1;
959 				break;
960 			case 0x10:	/* VPD-R */
961 				alloc = 8;
962 				off = 0;
963 				cfg->vpd.vpd_ros = malloc(alloc *
964 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
965 				    M_WAITOK | M_ZERO);
966 				state = 2;
967 				break;
968 			case 0x11:	/* VPD-W */
969 				alloc = 8;
970 				off = 0;
971 				cfg->vpd.vpd_w = malloc(alloc *
972 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
973 				    M_WAITOK | M_ZERO);
974 				state = 5;
975 				break;
976 			default:	/* Invalid data, abort */
977 				state = -1;
978 				break;
979 			}
980 			break;
981 
982 		case 1:	/* Identifier String */
983 			cfg->vpd.vpd_ident[i++] = byte;
984 			remain--;
985 			if (remain == 0)  {
986 				cfg->vpd.vpd_ident[i] = '\0';
987 				state = 0;
988 			}
989 			break;
990 
991 		case 2:	/* VPD-R Keyword Header */
992 			if (off == alloc) {
993 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
994 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
995 				    M_DEVBUF, M_WAITOK | M_ZERO);
996 			}
997 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
998 			if (vpd_nextbyte(&vrs, &byte2)) {
999 				state = -2;
1000 				break;
1001 			}
1002 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1003 			if (vpd_nextbyte(&vrs, &byte2)) {
1004 				state = -2;
1005 				break;
1006 			}
1007 			cfg->vpd.vpd_ros[off].len = dflen = byte2;
1008 			if (dflen == 0 &&
1009 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1010 			    2) == 0) {
1011 				/*
1012 				 * if this happens, we can't trust the rest
1013 				 * of the VPD.
1014 				 */
1015 				pci_printf(cfg, "bad keyword length: %d\n",
1016 				    dflen);
1017 				cksumvalid = 0;
1018 				state = -1;
1019 				break;
1020 			} else if (dflen == 0) {
1021 				cfg->vpd.vpd_ros[off].value = malloc(1 *
1022 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1023 				    M_DEVBUF, M_WAITOK);
1024 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1025 			} else
1026 				cfg->vpd.vpd_ros[off].value = malloc(
1027 				    (dflen + 1) *
1028 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1029 				    M_DEVBUF, M_WAITOK);
1030 			remain -= 3;
1031 			i = 0;
1032 			/* keep in sync w/ state 3's transistions */
1033 			if (dflen == 0 && remain == 0)
1034 				state = 0;
1035 			else if (dflen == 0)
1036 				state = 2;
1037 			else
1038 				state = 3;
1039 			break;
1040 
1041 		case 3:	/* VPD-R Keyword Value */
1042 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1043 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1044 			    "RV", 2) == 0 && cksumvalid == -1) {
1045 				if (vrs.cksum == 0)
1046 					cksumvalid = 1;
1047 				else {
1048 					if (bootverbose)
1049 						pci_printf(cfg,
1050 					    "bad VPD cksum, remain %hhu\n",
1051 						    vrs.cksum);
1052 					cksumvalid = 0;
1053 					state = -1;
1054 					break;
1055 				}
1056 			}
1057 			dflen--;
1058 			remain--;
1059 			/* keep in sync w/ state 2's transistions */
1060 			if (dflen == 0)
1061 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1062 			if (dflen == 0 && remain == 0) {
1063 				cfg->vpd.vpd_rocnt = off;
1064 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1065 				    off * sizeof(*cfg->vpd.vpd_ros),
1066 				    M_DEVBUF, M_WAITOK | M_ZERO);
1067 				state = 0;
1068 			} else if (dflen == 0)
1069 				state = 2;
1070 			break;
1071 
1072 		case 4:
1073 			remain--;
1074 			if (remain == 0)
1075 				state = 0;
1076 			break;
1077 
1078 		case 5:	/* VPD-W Keyword Header */
1079 			if (off == alloc) {
1080 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1081 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1082 				    M_DEVBUF, M_WAITOK | M_ZERO);
1083 			}
1084 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1085 			if (vpd_nextbyte(&vrs, &byte2)) {
1086 				state = -2;
1087 				break;
1088 			}
1089 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1090 			if (vpd_nextbyte(&vrs, &byte2)) {
1091 				state = -2;
1092 				break;
1093 			}
1094 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1095 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1096 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1097 			    sizeof(*cfg->vpd.vpd_w[off].value),
1098 			    M_DEVBUF, M_WAITOK);
1099 			remain -= 3;
1100 			i = 0;
1101 			/* keep in sync w/ state 6's transistions */
1102 			if (dflen == 0 && remain == 0)
1103 				state = 0;
1104 			else if (dflen == 0)
1105 				state = 5;
1106 			else
1107 				state = 6;
1108 			break;
1109 
1110 		case 6:	/* VPD-W Keyword Value */
1111 			cfg->vpd.vpd_w[off].value[i++] = byte;
1112 			dflen--;
1113 			remain--;
1114 			/* keep in sync w/ state 5's transistions */
1115 			if (dflen == 0)
1116 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1117 			if (dflen == 0 && remain == 0) {
1118 				cfg->vpd.vpd_wcnt = off;
1119 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1120 				    off * sizeof(*cfg->vpd.vpd_w),
1121 				    M_DEVBUF, M_WAITOK | M_ZERO);
1122 				state = 0;
1123 			} else if (dflen == 0)
1124 				state = 5;
1125 			break;
1126 
1127 		default:
1128 			pci_printf(cfg, "invalid state: %d\n", state);
1129 			state = -1;
1130 			break;
1131 		}
1132 	}
1133 
1134 	if (cksumvalid == 0 || state < -1) {
1135 		/* read-only data bad, clean up */
1136 		if (cfg->vpd.vpd_ros != NULL) {
1137 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1138 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1139 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1140 			cfg->vpd.vpd_ros = NULL;
1141 		}
1142 	}
1143 	if (state < -1) {
1144 		/* I/O error, clean up */
1145 		pci_printf(cfg, "failed to read VPD data.\n");
1146 		if (cfg->vpd.vpd_ident != NULL) {
1147 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1148 			cfg->vpd.vpd_ident = NULL;
1149 		}
1150 		if (cfg->vpd.vpd_w != NULL) {
1151 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1152 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1153 			free(cfg->vpd.vpd_w, M_DEVBUF);
1154 			cfg->vpd.vpd_w = NULL;
1155 		}
1156 	}
1157 	cfg->vpd.vpd_cached = 1;
1158 #undef REG
1159 #undef WREG
1160 }
1161 
1162 int
1163 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1164 {
1165 	struct pci_devinfo *dinfo = device_get_ivars(child);
1166 	pcicfgregs *cfg = &dinfo->cfg;
1167 
1168 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1169 		pci_read_vpd(device_get_parent(dev), cfg);
1170 
1171 	*identptr = cfg->vpd.vpd_ident;
1172 
1173 	if (*identptr == NULL)
1174 		return (ENXIO);
1175 
1176 	return (0);
1177 }
1178 
1179 int
1180 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1181 	const char **vptr)
1182 {
1183 	struct pci_devinfo *dinfo = device_get_ivars(child);
1184 	pcicfgregs *cfg = &dinfo->cfg;
1185 	int i;
1186 
1187 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1188 		pci_read_vpd(device_get_parent(dev), cfg);
1189 
1190 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1191 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1192 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1193 			*vptr = cfg->vpd.vpd_ros[i].value;
1194 			return (0);
1195 		}
1196 
1197 	*vptr = NULL;
1198 	return (ENXIO);
1199 }
1200 
1201 struct pcicfg_vpd *
1202 pci_fetch_vpd_list(device_t dev)
1203 {
1204 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1205 	pcicfgregs *cfg = &dinfo->cfg;
1206 
1207 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1208 		pci_read_vpd(device_get_parent(device_get_parent(dev)), cfg);
1209 	return (&cfg->vpd);
1210 }
1211 
1212 /*
1213  * Find the requested HyperTransport capability and return the offset
1214  * in configuration space via the pointer provided.  The function
1215  * returns 0 on success and an error code otherwise.
1216  */
1217 int
1218 pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1219 {
1220 	int ptr, error;
1221 	uint16_t val;
1222 
1223 	error = pci_find_cap(child, PCIY_HT, &ptr);
1224 	if (error)
1225 		return (error);
1226 
1227 	/*
1228 	 * Traverse the capabilities list checking each HT capability
1229 	 * to see if it matches the requested HT capability.
1230 	 */
1231 	while (ptr != 0) {
1232 		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1233 		if (capability == PCIM_HTCAP_SLAVE ||
1234 		    capability == PCIM_HTCAP_HOST)
1235 			val &= 0xe000;
1236 		else
1237 			val &= PCIM_HTCMD_CAP_MASK;
1238 		if (val == capability) {
1239 			if (capreg != NULL)
1240 				*capreg = ptr;
1241 			return (0);
1242 		}
1243 
1244 		/* Skip to the next HT capability. */
1245 		while (ptr != 0) {
1246 			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1247 			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1248 			    PCIY_HT)
1249 				break;
1250 		}
1251 	}
1252 	return (ENOENT);
1253 }
1254 
1255 /*
1256  * Find the requested capability and return the offset in
1257  * configuration space via the pointer provided.  The function returns
1258  * 0 on success and an error code otherwise.
1259  */
1260 int
1261 pci_find_cap_method(device_t dev, device_t child, int capability,
1262     int *capreg)
1263 {
1264 	struct pci_devinfo *dinfo = device_get_ivars(child);
1265 	pcicfgregs *cfg = &dinfo->cfg;
1266 	u_int32_t status;
1267 	u_int8_t ptr;
1268 
1269 	/*
1270 	 * Check the CAP_LIST bit of the PCI status register first.
1271 	 */
1272 	status = pci_read_config(child, PCIR_STATUS, 2);
1273 	if (!(status & PCIM_STATUS_CAPPRESENT))
1274 		return (ENXIO);
1275 
1276 	/*
1277 	 * Determine the start pointer of the capabilities list.
1278 	 */
1279 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1280 	case PCIM_HDRTYPE_NORMAL:
1281 	case PCIM_HDRTYPE_BRIDGE:
1282 		ptr = PCIR_CAP_PTR;
1283 		break;
1284 	case PCIM_HDRTYPE_CARDBUS:
1285 		ptr = PCIR_CAP_PTR_2;
1286 		break;
1287 	default:
1288 		/* XXX: panic? */
1289 		return (ENXIO);		/* no extended capabilities support */
1290 	}
1291 	ptr = pci_read_config(child, ptr, 1);
1292 
1293 	/*
1294 	 * Traverse the capabilities list.
1295 	 */
1296 	while (ptr != 0) {
1297 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1298 			if (capreg != NULL)
1299 				*capreg = ptr;
1300 			return (0);
1301 		}
1302 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1303 	}
1304 
1305 	return (ENOENT);
1306 }
1307 
1308 /*
1309  * Find the requested extended capability and return the offset in
1310  * configuration space via the pointer provided.  The function returns
1311  * 0 on success and an error code otherwise.
1312  */
1313 int
1314 pci_find_extcap_method(device_t dev, device_t child, int capability,
1315     int *capreg)
1316 {
1317 	struct pci_devinfo *dinfo = device_get_ivars(child);
1318 	pcicfgregs *cfg = &dinfo->cfg;
1319 	uint32_t ecap;
1320 	uint16_t ptr;
1321 
1322 	/* Only supported for PCI-express devices. */
1323 	if (cfg->pcie.pcie_location == 0)
1324 		return (ENXIO);
1325 
1326 	ptr = PCIR_EXTCAP;
1327 	ecap = pci_read_config(child, ptr, 4);
1328 	if (ecap == 0xffffffff || ecap == 0)
1329 		return (ENOENT);
1330 	for (;;) {
1331 		if (PCI_EXTCAP_ID(ecap) == capability) {
1332 			if (capreg != NULL)
1333 				*capreg = ptr;
1334 			return (0);
1335 		}
1336 		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1337 		if (ptr == 0)
1338 			break;
1339 		ecap = pci_read_config(child, ptr, 4);
1340 	}
1341 
1342 	return (ENOENT);
1343 }
1344 
1345 /*
1346  * Support for MSI-X message interrupts.
1347  */
1348 void
1349 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1350 {
1351 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1352 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1353 	uint32_t offset;
1354 
1355 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1356 	offset = msix->msix_table_offset + index * 16;
1357 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1358 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1359 	bus_write_4(msix->msix_table_res, offset + 8, data);
1360 
1361 	/* Enable MSI -> HT mapping. */
1362 	pci_ht_map_msi(dev, address);
1363 }
1364 
1365 void
1366 pci_mask_msix(device_t dev, u_int index)
1367 {
1368 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1369 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1370 	uint32_t offset, val;
1371 
1372 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1373 	offset = msix->msix_table_offset + index * 16 + 12;
1374 	val = bus_read_4(msix->msix_table_res, offset);
1375 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1376 		val |= PCIM_MSIX_VCTRL_MASK;
1377 		bus_write_4(msix->msix_table_res, offset, val);
1378 	}
1379 }
1380 
1381 void
1382 pci_unmask_msix(device_t dev, u_int index)
1383 {
1384 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1385 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1386 	uint32_t offset, val;
1387 
1388 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1389 	offset = msix->msix_table_offset + index * 16 + 12;
1390 	val = bus_read_4(msix->msix_table_res, offset);
1391 	if (val & PCIM_MSIX_VCTRL_MASK) {
1392 		val &= ~PCIM_MSIX_VCTRL_MASK;
1393 		bus_write_4(msix->msix_table_res, offset, val);
1394 	}
1395 }
1396 
1397 int
1398 pci_pending_msix(device_t dev, u_int index)
1399 {
1400 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1401 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1402 	uint32_t offset, bit;
1403 
1404 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1405 	offset = msix->msix_pba_offset + (index / 32) * 4;
1406 	bit = 1 << index % 32;
1407 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1408 }
1409 
1410 /*
1411  * Restore MSI-X registers and table during resume.  If MSI-X is
1412  * enabled then walk the virtual table to restore the actual MSI-X
1413  * table.
1414  */
1415 static void
1416 pci_resume_msix(device_t dev)
1417 {
1418 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1419 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1420 	struct msix_table_entry *mte;
1421 	struct msix_vector *mv;
1422 	int i;
1423 
1424 	if (msix->msix_alloc > 0) {
1425 		/* First, mask all vectors. */
1426 		for (i = 0; i < msix->msix_msgnum; i++)
1427 			pci_mask_msix(dev, i);
1428 
1429 		/* Second, program any messages with at least one handler. */
1430 		for (i = 0; i < msix->msix_table_len; i++) {
1431 			mte = &msix->msix_table[i];
1432 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1433 				continue;
1434 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1435 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1436 			pci_unmask_msix(dev, i);
1437 		}
1438 	}
1439 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1440 	    msix->msix_ctrl, 2);
1441 }
1442 
1443 /*
1444  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1445  * returned in *count.  After this function returns, each message will be
1446  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1447  */
1448 int
1449 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1450 {
1451 	struct pci_devinfo *dinfo = device_get_ivars(child);
1452 	pcicfgregs *cfg = &dinfo->cfg;
1453 	struct resource_list_entry *rle;
1454 	int actual, error, i, irq, max;
1455 
1456 	/* Don't let count == 0 get us into trouble. */
1457 	if (*count == 0)
1458 		return (EINVAL);
1459 
1460 	/* If rid 0 is allocated, then fail. */
1461 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1462 	if (rle != NULL && rle->res != NULL)
1463 		return (ENXIO);
1464 
1465 	/* Already have allocated messages? */
1466 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1467 		return (ENXIO);
1468 
1469 	/* If MSI-X is blacklisted for this system, fail. */
1470 	if (pci_msix_blacklisted())
1471 		return (ENXIO);
1472 
1473 	/* MSI-X capability present? */
1474 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1475 		return (ENODEV);
1476 
1477 	/* Make sure the appropriate BARs are mapped. */
1478 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1479 	    cfg->msix.msix_table_bar);
1480 	if (rle == NULL || rle->res == NULL ||
1481 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1482 		return (ENXIO);
1483 	cfg->msix.msix_table_res = rle->res;
1484 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1485 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1486 		    cfg->msix.msix_pba_bar);
1487 		if (rle == NULL || rle->res == NULL ||
1488 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1489 			return (ENXIO);
1490 	}
1491 	cfg->msix.msix_pba_res = rle->res;
1492 
1493 	if (bootverbose)
1494 		device_printf(child,
1495 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1496 		    *count, cfg->msix.msix_msgnum);
1497 	max = min(*count, cfg->msix.msix_msgnum);
1498 	for (i = 0; i < max; i++) {
1499 		/* Allocate a message. */
1500 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1501 		if (error) {
1502 			if (i == 0)
1503 				return (error);
1504 			break;
1505 		}
1506 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1507 		    irq, 1);
1508 	}
1509 	actual = i;
1510 
1511 	if (bootverbose) {
1512 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1513 		if (actual == 1)
1514 			device_printf(child, "using IRQ %lu for MSI-X\n",
1515 			    rle->start);
1516 		else {
1517 			int run;
1518 
1519 			/*
1520 			 * Be fancy and try to print contiguous runs of
1521 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1522 			 * 'run' is true if we are in a range.
1523 			 */
1524 			device_printf(child, "using IRQs %lu", rle->start);
1525 			irq = rle->start;
1526 			run = 0;
1527 			for (i = 1; i < actual; i++) {
1528 				rle = resource_list_find(&dinfo->resources,
1529 				    SYS_RES_IRQ, i + 1);
1530 
1531 				/* Still in a run? */
1532 				if (rle->start == irq + 1) {
1533 					run = 1;
1534 					irq++;
1535 					continue;
1536 				}
1537 
1538 				/* Finish previous range. */
1539 				if (run) {
1540 					printf("-%d", irq);
1541 					run = 0;
1542 				}
1543 
1544 				/* Start new range. */
1545 				printf(",%lu", rle->start);
1546 				irq = rle->start;
1547 			}
1548 
1549 			/* Unfinished range? */
1550 			if (run)
1551 				printf("-%d", irq);
1552 			printf(" for MSI-X\n");
1553 		}
1554 	}
1555 
1556 	/* Mask all vectors. */
1557 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1558 		pci_mask_msix(child, i);
1559 
1560 	/* Allocate and initialize vector data and virtual table. */
1561 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1562 	    M_DEVBUF, M_WAITOK | M_ZERO);
1563 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1564 	    M_DEVBUF, M_WAITOK | M_ZERO);
1565 	for (i = 0; i < actual; i++) {
1566 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1567 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1568 		cfg->msix.msix_table[i].mte_vector = i + 1;
1569 	}
1570 
1571 	/* Update control register to enable MSI-X. */
1572 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1573 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1574 	    cfg->msix.msix_ctrl, 2);
1575 
1576 	/* Update counts of alloc'd messages. */
1577 	cfg->msix.msix_alloc = actual;
1578 	cfg->msix.msix_table_len = actual;
1579 	*count = actual;
1580 	return (0);
1581 }
1582 
1583 /*
1584  * By default, pci_alloc_msix() will assign the allocated IRQ
1585  * resources consecutively to the first N messages in the MSI-X table.
1586  * However, device drivers may want to use different layouts if they
1587  * either receive fewer messages than they asked for, or they wish to
1588  * populate the MSI-X table sparsely.  This method allows the driver
1589  * to specify what layout it wants.  It must be called after a
1590  * successful pci_alloc_msix() but before any of the associated
1591  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1592  *
1593  * The 'vectors' array contains 'count' message vectors.  The array
1594  * maps directly to the MSI-X table in that index 0 in the array
1595  * specifies the vector for the first message in the MSI-X table, etc.
1596  * The vector value in each array index can either be 0 to indicate
1597  * that no vector should be assigned to a message slot, or it can be a
1598  * number from 1 to N (where N is the count returned from a
1599  * succcessful call to pci_alloc_msix()) to indicate which message
1600  * vector (IRQ) to be used for the corresponding message.
1601  *
1602  * On successful return, each message with a non-zero vector will have
1603  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1604  * 1.  Additionally, if any of the IRQs allocated via the previous
1605  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1606  * will be freed back to the system automatically.
1607  *
1608  * For example, suppose a driver has a MSI-X table with 6 messages and
1609  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1610  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1611  * C.  After the call to pci_alloc_msix(), the device will be setup to
1612  * have an MSI-X table of ABC--- (where - means no vector assigned).
1613  * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1614  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1615  * be freed back to the system.  This device will also have valid
1616  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1617  *
1618  * In any case, the SYS_RES_IRQ rid X will always map to the message
1619  * at MSI-X table index X - 1 and will only be valid if a vector is
1620  * assigned to that table entry.
1621  */
1622 int
1623 pci_remap_msix_method(device_t dev, device_t child, int count,
1624     const u_int *vectors)
1625 {
1626 	struct pci_devinfo *dinfo = device_get_ivars(child);
1627 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1628 	struct resource_list_entry *rle;
1629 	int i, irq, j, *used;
1630 
1631 	/*
1632 	 * Have to have at least one message in the table but the
1633 	 * table can't be bigger than the actual MSI-X table in the
1634 	 * device.
1635 	 */
1636 	if (count == 0 || count > msix->msix_msgnum)
1637 		return (EINVAL);
1638 
1639 	/* Sanity check the vectors. */
1640 	for (i = 0; i < count; i++)
1641 		if (vectors[i] > msix->msix_alloc)
1642 			return (EINVAL);
1643 
1644 	/*
1645 	 * Make sure there aren't any holes in the vectors to be used.
1646 	 * It's a big pain to support it, and it doesn't really make
1647 	 * sense anyway.  Also, at least one vector must be used.
1648 	 */
1649 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1650 	    M_ZERO);
1651 	for (i = 0; i < count; i++)
1652 		if (vectors[i] != 0)
1653 			used[vectors[i] - 1] = 1;
1654 	for (i = 0; i < msix->msix_alloc - 1; i++)
1655 		if (used[i] == 0 && used[i + 1] == 1) {
1656 			free(used, M_DEVBUF);
1657 			return (EINVAL);
1658 		}
1659 	if (used[0] != 1) {
1660 		free(used, M_DEVBUF);
1661 		return (EINVAL);
1662 	}
1663 
1664 	/* Make sure none of the resources are allocated. */
1665 	for (i = 0; i < msix->msix_table_len; i++) {
1666 		if (msix->msix_table[i].mte_vector == 0)
1667 			continue;
1668 		if (msix->msix_table[i].mte_handlers > 0)
1669 			return (EBUSY);
1670 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1671 		KASSERT(rle != NULL, ("missing resource"));
1672 		if (rle->res != NULL)
1673 			return (EBUSY);
1674 	}
1675 
1676 	/* Free the existing resource list entries. */
1677 	for (i = 0; i < msix->msix_table_len; i++) {
1678 		if (msix->msix_table[i].mte_vector == 0)
1679 			continue;
1680 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1681 	}
1682 
1683 	/*
1684 	 * Build the new virtual table keeping track of which vectors are
1685 	 * used.
1686 	 */
1687 	free(msix->msix_table, M_DEVBUF);
1688 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1689 	    M_DEVBUF, M_WAITOK | M_ZERO);
1690 	for (i = 0; i < count; i++)
1691 		msix->msix_table[i].mte_vector = vectors[i];
1692 	msix->msix_table_len = count;
1693 
1694 	/* Free any unused IRQs and resize the vectors array if necessary. */
1695 	j = msix->msix_alloc - 1;
1696 	if (used[j] == 0) {
1697 		struct msix_vector *vec;
1698 
1699 		while (used[j] == 0) {
1700 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1701 			    msix->msix_vectors[j].mv_irq);
1702 			j--;
1703 		}
1704 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1705 		    M_WAITOK);
1706 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1707 		    (j + 1));
1708 		free(msix->msix_vectors, M_DEVBUF);
1709 		msix->msix_vectors = vec;
1710 		msix->msix_alloc = j + 1;
1711 	}
1712 	free(used, M_DEVBUF);
1713 
1714 	/* Map the IRQs onto the rids. */
1715 	for (i = 0; i < count; i++) {
1716 		if (vectors[i] == 0)
1717 			continue;
1718 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1719 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1720 		    irq, 1);
1721 	}
1722 
1723 	if (bootverbose) {
1724 		device_printf(child, "Remapped MSI-X IRQs as: ");
1725 		for (i = 0; i < count; i++) {
1726 			if (i != 0)
1727 				printf(", ");
1728 			if (vectors[i] == 0)
1729 				printf("---");
1730 			else
1731 				printf("%d",
1732 				    msix->msix_vectors[vectors[i]].mv_irq);
1733 		}
1734 		printf("\n");
1735 	}
1736 
1737 	return (0);
1738 }
1739 
1740 static int
1741 pci_release_msix(device_t dev, device_t child)
1742 {
1743 	struct pci_devinfo *dinfo = device_get_ivars(child);
1744 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1745 	struct resource_list_entry *rle;
1746 	int i;
1747 
1748 	/* Do we have any messages to release? */
1749 	if (msix->msix_alloc == 0)
1750 		return (ENODEV);
1751 
1752 	/* Make sure none of the resources are allocated. */
1753 	for (i = 0; i < msix->msix_table_len; i++) {
1754 		if (msix->msix_table[i].mte_vector == 0)
1755 			continue;
1756 		if (msix->msix_table[i].mte_handlers > 0)
1757 			return (EBUSY);
1758 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1759 		KASSERT(rle != NULL, ("missing resource"));
1760 		if (rle->res != NULL)
1761 			return (EBUSY);
1762 	}
1763 
1764 	/* Update control register to disable MSI-X. */
1765 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1766 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1767 	    msix->msix_ctrl, 2);
1768 
1769 	/* Free the resource list entries. */
1770 	for (i = 0; i < msix->msix_table_len; i++) {
1771 		if (msix->msix_table[i].mte_vector == 0)
1772 			continue;
1773 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1774 	}
1775 	free(msix->msix_table, M_DEVBUF);
1776 	msix->msix_table_len = 0;
1777 
1778 	/* Release the IRQs. */
1779 	for (i = 0; i < msix->msix_alloc; i++)
1780 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1781 		    msix->msix_vectors[i].mv_irq);
1782 	free(msix->msix_vectors, M_DEVBUF);
1783 	msix->msix_alloc = 0;
1784 	return (0);
1785 }
1786 
1787 /*
1788  * Return the max supported MSI-X messages this device supports.
1789  * Basically, assuming the MD code can alloc messages, this function
1790  * should return the maximum value that pci_alloc_msix() can return.
1791  * Thus, it is subject to the tunables, etc.
1792  */
1793 int
1794 pci_msix_count_method(device_t dev, device_t child)
1795 {
1796 	struct pci_devinfo *dinfo = device_get_ivars(child);
1797 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1798 
1799 	if (pci_do_msix && msix->msix_location != 0)
1800 		return (msix->msix_msgnum);
1801 	return (0);
1802 }
1803 
1804 /*
1805  * HyperTransport MSI mapping control
1806  */
1807 void
1808 pci_ht_map_msi(device_t dev, uint64_t addr)
1809 {
1810 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1811 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1812 
1813 	if (!ht->ht_msimap)
1814 		return;
1815 
1816 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1817 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1818 		/* Enable MSI -> HT mapping. */
1819 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1820 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1821 		    ht->ht_msictrl, 2);
1822 	}
1823 
1824 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1825 		/* Disable MSI -> HT mapping. */
1826 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1827 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1828 		    ht->ht_msictrl, 2);
1829 	}
1830 }
1831 
1832 int
1833 pci_get_max_read_req(device_t dev)
1834 {
1835 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1836 	int cap;
1837 	uint16_t val;
1838 
1839 	cap = dinfo->cfg.pcie.pcie_location;
1840 	if (cap == 0)
1841 		return (0);
1842 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1843 	val &= PCIEM_CTL_MAX_READ_REQUEST;
1844 	val >>= 12;
1845 	return (1 << (val + 7));
1846 }
1847 
1848 int
1849 pci_set_max_read_req(device_t dev, int size)
1850 {
1851 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1852 	int cap;
1853 	uint16_t val;
1854 
1855 	cap = dinfo->cfg.pcie.pcie_location;
1856 	if (cap == 0)
1857 		return (0);
1858 	if (size < 128)
1859 		size = 128;
1860 	if (size > 4096)
1861 		size = 4096;
1862 	size = (1 << (fls(size) - 1));
1863 	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1864 	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
1865 	val |= (fls(size) - 8) << 12;
1866 	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
1867 	return (size);
1868 }
1869 
1870 /*
1871  * Support for MSI message signalled interrupts.
1872  */
1873 void
1874 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1875 {
1876 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1877 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1878 
1879 	/* Write data and address values. */
1880 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1881 	    address & 0xffffffff, 4);
1882 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1883 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1884 		    address >> 32, 4);
1885 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1886 		    data, 2);
1887 	} else
1888 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1889 		    2);
1890 
1891 	/* Enable MSI in the control register. */
1892 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1893 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1894 	    2);
1895 
1896 	/* Enable MSI -> HT mapping. */
1897 	pci_ht_map_msi(dev, address);
1898 }
1899 
1900 void
1901 pci_disable_msi(device_t dev)
1902 {
1903 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1904 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1905 
1906 	/* Disable MSI -> HT mapping. */
1907 	pci_ht_map_msi(dev, 0);
1908 
1909 	/* Disable MSI in the control register. */
1910 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1911 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1912 	    2);
1913 }
1914 
1915 /*
1916  * Restore MSI registers during resume.  If MSI is enabled then
1917  * restore the data and address registers in addition to the control
1918  * register.
1919  */
1920 static void
1921 pci_resume_msi(device_t dev)
1922 {
1923 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1924 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1925 	uint64_t address;
1926 	uint16_t data;
1927 
1928 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1929 		address = msi->msi_addr;
1930 		data = msi->msi_data;
1931 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1932 		    address & 0xffffffff, 4);
1933 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1934 			pci_write_config(dev, msi->msi_location +
1935 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1936 			pci_write_config(dev, msi->msi_location +
1937 			    PCIR_MSI_DATA_64BIT, data, 2);
1938 		} else
1939 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1940 			    data, 2);
1941 	}
1942 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1943 	    2);
1944 }
1945 
1946 static int
1947 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1948 {
1949 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1950 	pcicfgregs *cfg = &dinfo->cfg;
1951 	struct resource_list_entry *rle;
1952 	struct msix_table_entry *mte;
1953 	struct msix_vector *mv;
1954 	uint64_t addr;
1955 	uint32_t data;
1956 	int error, i, j;
1957 
1958 	/*
1959 	 * Handle MSI first.  We try to find this IRQ among our list
1960 	 * of MSI IRQs.  If we find it, we request updated address and
1961 	 * data registers and apply the results.
1962 	 */
1963 	if (cfg->msi.msi_alloc > 0) {
1964 
1965 		/* If we don't have any active handlers, nothing to do. */
1966 		if (cfg->msi.msi_handlers == 0)
1967 			return (0);
1968 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1969 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1970 			    i + 1);
1971 			if (rle->start == irq) {
1972 				error = PCIB_MAP_MSI(device_get_parent(bus),
1973 				    dev, irq, &addr, &data);
1974 				if (error)
1975 					return (error);
1976 				pci_disable_msi(dev);
1977 				dinfo->cfg.msi.msi_addr = addr;
1978 				dinfo->cfg.msi.msi_data = data;
1979 				pci_enable_msi(dev, addr, data);
1980 				return (0);
1981 			}
1982 		}
1983 		return (ENOENT);
1984 	}
1985 
1986 	/*
1987 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1988 	 * we request the updated mapping info.  If that works, we go
1989 	 * through all the slots that use this IRQ and update them.
1990 	 */
1991 	if (cfg->msix.msix_alloc > 0) {
1992 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1993 			mv = &cfg->msix.msix_vectors[i];
1994 			if (mv->mv_irq == irq) {
1995 				error = PCIB_MAP_MSI(device_get_parent(bus),
1996 				    dev, irq, &addr, &data);
1997 				if (error)
1998 					return (error);
1999 				mv->mv_address = addr;
2000 				mv->mv_data = data;
2001 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
2002 					mte = &cfg->msix.msix_table[j];
2003 					if (mte->mte_vector != i + 1)
2004 						continue;
2005 					if (mte->mte_handlers == 0)
2006 						continue;
2007 					pci_mask_msix(dev, j);
2008 					pci_enable_msix(dev, j, addr, data);
2009 					pci_unmask_msix(dev, j);
2010 				}
2011 			}
2012 		}
2013 		return (ENOENT);
2014 	}
2015 
2016 	return (ENOENT);
2017 }
2018 
2019 /*
2020  * Returns true if the specified device is blacklisted because MSI
2021  * doesn't work.
2022  */
2023 int
2024 pci_msi_device_blacklisted(device_t dev)
2025 {
2026 
2027 	if (!pci_honor_msi_blacklist)
2028 		return (0);
2029 
2030 	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2031 }
2032 
2033 /*
2034  * Determine if MSI is blacklisted globally on this system.  Currently,
2035  * we just check for blacklisted chipsets as represented by the
2036  * host-PCI bridge at device 0:0:0.  In the future, it may become
2037  * necessary to check other system attributes, such as the kenv values
2038  * that give the motherboard manufacturer and model number.
2039  */
2040 static int
2041 pci_msi_blacklisted(void)
2042 {
2043 	device_t dev;
2044 
2045 	if (!pci_honor_msi_blacklist)
2046 		return (0);
2047 
2048 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2049 	if (!(pcie_chipset || pcix_chipset)) {
2050 		if (vm_guest != VM_GUEST_NO) {
2051 			/*
2052 			 * Whitelist older chipsets in virtual
2053 			 * machines known to support MSI.
2054 			 */
2055 			dev = pci_find_bsf(0, 0, 0);
2056 			if (dev != NULL)
2057 				return (!pci_has_quirk(pci_get_devid(dev),
2058 					PCI_QUIRK_ENABLE_MSI_VM));
2059 		}
2060 		return (1);
2061 	}
2062 
2063 	dev = pci_find_bsf(0, 0, 0);
2064 	if (dev != NULL)
2065 		return (pci_msi_device_blacklisted(dev));
2066 	return (0);
2067 }
2068 
2069 /*
2070  * Returns true if the specified device is blacklisted because MSI-X
2071  * doesn't work.  Note that this assumes that if MSI doesn't work,
2072  * MSI-X doesn't either.
2073  */
2074 int
2075 pci_msix_device_blacklisted(device_t dev)
2076 {
2077 
2078 	if (!pci_honor_msi_blacklist)
2079 		return (0);
2080 
2081 	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2082 		return (1);
2083 
2084 	return (pci_msi_device_blacklisted(dev));
2085 }
2086 
2087 /*
2088  * Determine if MSI-X is blacklisted globally on this system.  If MSI
2089  * is blacklisted, assume that MSI-X is as well.  Check for additional
2090  * chipsets where MSI works but MSI-X does not.
2091  */
2092 static int
2093 pci_msix_blacklisted(void)
2094 {
2095 	device_t dev;
2096 
2097 	if (!pci_honor_msi_blacklist)
2098 		return (0);
2099 
2100 	dev = pci_find_bsf(0, 0, 0);
2101 	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2102 	    PCI_QUIRK_DISABLE_MSIX))
2103 		return (1);
2104 
2105 	return (pci_msi_blacklisted());
2106 }
2107 
2108 /*
2109  * Attempt to allocate *count MSI messages.  The actual number allocated is
2110  * returned in *count.  After this function returns, each message will be
2111  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2112  */
2113 int
2114 pci_alloc_msi_method(device_t dev, device_t child, int *count)
2115 {
2116 	struct pci_devinfo *dinfo = device_get_ivars(child);
2117 	pcicfgregs *cfg = &dinfo->cfg;
2118 	struct resource_list_entry *rle;
2119 	int actual, error, i, irqs[32];
2120 	uint16_t ctrl;
2121 
2122 	/* Don't let count == 0 get us into trouble. */
2123 	if (*count == 0)
2124 		return (EINVAL);
2125 
2126 	/* If rid 0 is allocated, then fail. */
2127 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2128 	if (rle != NULL && rle->res != NULL)
2129 		return (ENXIO);
2130 
2131 	/* Already have allocated messages? */
2132 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2133 		return (ENXIO);
2134 
2135 	/* If MSI is blacklisted for this system, fail. */
2136 	if (pci_msi_blacklisted())
2137 		return (ENXIO);
2138 
2139 	/* MSI capability present? */
2140 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2141 		return (ENODEV);
2142 
2143 	if (bootverbose)
2144 		device_printf(child,
2145 		    "attempting to allocate %d MSI vectors (%d supported)\n",
2146 		    *count, cfg->msi.msi_msgnum);
2147 
2148 	/* Don't ask for more than the device supports. */
2149 	actual = min(*count, cfg->msi.msi_msgnum);
2150 
2151 	/* Don't ask for more than 32 messages. */
2152 	actual = min(actual, 32);
2153 
2154 	/* MSI requires power of 2 number of messages. */
2155 	if (!powerof2(actual))
2156 		return (EINVAL);
2157 
2158 	for (;;) {
2159 		/* Try to allocate N messages. */
2160 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2161 		    actual, irqs);
2162 		if (error == 0)
2163 			break;
2164 		if (actual == 1)
2165 			return (error);
2166 
2167 		/* Try N / 2. */
2168 		actual >>= 1;
2169 	}
2170 
2171 	/*
2172 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2173 	 * resources in the irqs[] array, so add new resources
2174 	 * starting at rid 1.
2175 	 */
2176 	for (i = 0; i < actual; i++)
2177 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2178 		    irqs[i], irqs[i], 1);
2179 
2180 	if (bootverbose) {
2181 		if (actual == 1)
2182 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2183 		else {
2184 			int run;
2185 
2186 			/*
2187 			 * Be fancy and try to print contiguous runs
2188 			 * of IRQ values as ranges.  'run' is true if
2189 			 * we are in a range.
2190 			 */
2191 			device_printf(child, "using IRQs %d", irqs[0]);
2192 			run = 0;
2193 			for (i = 1; i < actual; i++) {
2194 
2195 				/* Still in a run? */
2196 				if (irqs[i] == irqs[i - 1] + 1) {
2197 					run = 1;
2198 					continue;
2199 				}
2200 
2201 				/* Finish previous range. */
2202 				if (run) {
2203 					printf("-%d", irqs[i - 1]);
2204 					run = 0;
2205 				}
2206 
2207 				/* Start new range. */
2208 				printf(",%d", irqs[i]);
2209 			}
2210 
2211 			/* Unfinished range? */
2212 			if (run)
2213 				printf("-%d", irqs[actual - 1]);
2214 			printf(" for MSI\n");
2215 		}
2216 	}
2217 
2218 	/* Update control register with actual count. */
2219 	ctrl = cfg->msi.msi_ctrl;
2220 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2221 	ctrl |= (ffs(actual) - 1) << 4;
2222 	cfg->msi.msi_ctrl = ctrl;
2223 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2224 
2225 	/* Update counts of alloc'd messages. */
2226 	cfg->msi.msi_alloc = actual;
2227 	cfg->msi.msi_handlers = 0;
2228 	*count = actual;
2229 	return (0);
2230 }
2231 
2232 /* Release the MSI messages associated with this device. */
2233 int
2234 pci_release_msi_method(device_t dev, device_t child)
2235 {
2236 	struct pci_devinfo *dinfo = device_get_ivars(child);
2237 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2238 	struct resource_list_entry *rle;
2239 	int error, i, irqs[32];
2240 
2241 	/* Try MSI-X first. */
2242 	error = pci_release_msix(dev, child);
2243 	if (error != ENODEV)
2244 		return (error);
2245 
2246 	/* Do we have any messages to release? */
2247 	if (msi->msi_alloc == 0)
2248 		return (ENODEV);
2249 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2250 
2251 	/* Make sure none of the resources are allocated. */
2252 	if (msi->msi_handlers > 0)
2253 		return (EBUSY);
2254 	for (i = 0; i < msi->msi_alloc; i++) {
2255 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2256 		KASSERT(rle != NULL, ("missing MSI resource"));
2257 		if (rle->res != NULL)
2258 			return (EBUSY);
2259 		irqs[i] = rle->start;
2260 	}
2261 
2262 	/* Update control register with 0 count. */
2263 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2264 	    ("%s: MSI still enabled", __func__));
2265 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2266 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2267 	    msi->msi_ctrl, 2);
2268 
2269 	/* Release the messages. */
2270 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2271 	for (i = 0; i < msi->msi_alloc; i++)
2272 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2273 
2274 	/* Update alloc count. */
2275 	msi->msi_alloc = 0;
2276 	msi->msi_addr = 0;
2277 	msi->msi_data = 0;
2278 	return (0);
2279 }
2280 
2281 /*
2282  * Return the max supported MSI messages this device supports.
2283  * Basically, assuming the MD code can alloc messages, this function
2284  * should return the maximum value that pci_alloc_msi() can return.
2285  * Thus, it is subject to the tunables, etc.
2286  */
2287 int
2288 pci_msi_count_method(device_t dev, device_t child)
2289 {
2290 	struct pci_devinfo *dinfo = device_get_ivars(child);
2291 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2292 
2293 	if (pci_do_msi && msi->msi_location != 0)
2294 		return (msi->msi_msgnum);
2295 	return (0);
2296 }
2297 
2298 /* free pcicfgregs structure and all depending data structures */
2299 
2300 int
2301 pci_freecfg(struct pci_devinfo *dinfo)
2302 {
2303 	struct devlist *devlist_head;
2304 	struct pci_map *pm, *next;
2305 	int i;
2306 
2307 	devlist_head = &pci_devq;
2308 
2309 	if (dinfo->cfg.vpd.vpd_reg) {
2310 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2311 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2312 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2313 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2314 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2315 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2316 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2317 	}
2318 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2319 		free(pm, M_DEVBUF);
2320 	}
2321 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2322 	free(dinfo, M_DEVBUF);
2323 
2324 	/* increment the generation count */
2325 	pci_generation++;
2326 
2327 	/* we're losing one device */
2328 	pci_numdevs--;
2329 	return (0);
2330 }
2331 
2332 /*
2333  * PCI power manangement
2334  */
2335 int
2336 pci_set_powerstate_method(device_t dev, device_t child, int state)
2337 {
2338 	struct pci_devinfo *dinfo = device_get_ivars(child);
2339 	pcicfgregs *cfg = &dinfo->cfg;
2340 	uint16_t status;
2341 	int result, oldstate, highest, delay;
2342 
2343 	if (cfg->pp.pp_cap == 0)
2344 		return (EOPNOTSUPP);
2345 
2346 	/*
2347 	 * Optimize a no state change request away.  While it would be OK to
2348 	 * write to the hardware in theory, some devices have shown odd
2349 	 * behavior when going from D3 -> D3.
2350 	 */
2351 	oldstate = pci_get_powerstate(child);
2352 	if (oldstate == state)
2353 		return (0);
2354 
2355 	/*
2356 	 * The PCI power management specification states that after a state
2357 	 * transition between PCI power states, system software must
2358 	 * guarantee a minimal delay before the function accesses the device.
2359 	 * Compute the worst case delay that we need to guarantee before we
2360 	 * access the device.  Many devices will be responsive much more
2361 	 * quickly than this delay, but there are some that don't respond
2362 	 * instantly to state changes.  Transitions to/from D3 state require
2363 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2364 	 * is done below with DELAY rather than a sleeper function because
2365 	 * this function can be called from contexts where we cannot sleep.
2366 	 */
2367 	highest = (oldstate > state) ? oldstate : state;
2368 	if (highest == PCI_POWERSTATE_D3)
2369 	    delay = 10000;
2370 	else if (highest == PCI_POWERSTATE_D2)
2371 	    delay = 200;
2372 	else
2373 	    delay = 0;
2374 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2375 	    & ~PCIM_PSTAT_DMASK;
2376 	result = 0;
2377 	switch (state) {
2378 	case PCI_POWERSTATE_D0:
2379 		status |= PCIM_PSTAT_D0;
2380 		break;
2381 	case PCI_POWERSTATE_D1:
2382 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2383 			return (EOPNOTSUPP);
2384 		status |= PCIM_PSTAT_D1;
2385 		break;
2386 	case PCI_POWERSTATE_D2:
2387 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2388 			return (EOPNOTSUPP);
2389 		status |= PCIM_PSTAT_D2;
2390 		break;
2391 	case PCI_POWERSTATE_D3:
2392 		status |= PCIM_PSTAT_D3;
2393 		break;
2394 	default:
2395 		return (EINVAL);
2396 	}
2397 
2398 	if (bootverbose)
2399 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2400 		    state);
2401 
2402 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2403 	if (delay)
2404 		DELAY(delay);
2405 	return (0);
2406 }
2407 
2408 int
2409 pci_get_powerstate_method(device_t dev, device_t child)
2410 {
2411 	struct pci_devinfo *dinfo = device_get_ivars(child);
2412 	pcicfgregs *cfg = &dinfo->cfg;
2413 	uint16_t status;
2414 	int result;
2415 
2416 	if (cfg->pp.pp_cap != 0) {
2417 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2418 		switch (status & PCIM_PSTAT_DMASK) {
2419 		case PCIM_PSTAT_D0:
2420 			result = PCI_POWERSTATE_D0;
2421 			break;
2422 		case PCIM_PSTAT_D1:
2423 			result = PCI_POWERSTATE_D1;
2424 			break;
2425 		case PCIM_PSTAT_D2:
2426 			result = PCI_POWERSTATE_D2;
2427 			break;
2428 		case PCIM_PSTAT_D3:
2429 			result = PCI_POWERSTATE_D3;
2430 			break;
2431 		default:
2432 			result = PCI_POWERSTATE_UNKNOWN;
2433 			break;
2434 		}
2435 	} else {
2436 		/* No support, device is always at D0 */
2437 		result = PCI_POWERSTATE_D0;
2438 	}
2439 	return (result);
2440 }
2441 
2442 /*
2443  * Some convenience functions for PCI device drivers.
2444  */
2445 
2446 static __inline void
2447 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2448 {
2449 	uint16_t	command;
2450 
2451 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2452 	command |= bit;
2453 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2454 }
2455 
2456 static __inline void
2457 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2458 {
2459 	uint16_t	command;
2460 
2461 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2462 	command &= ~bit;
2463 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2464 }
2465 
2466 int
2467 pci_enable_busmaster_method(device_t dev, device_t child)
2468 {
2469 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2470 	return (0);
2471 }
2472 
2473 int
2474 pci_disable_busmaster_method(device_t dev, device_t child)
2475 {
2476 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2477 	return (0);
2478 }
2479 
2480 int
2481 pci_enable_io_method(device_t dev, device_t child, int space)
2482 {
2483 	uint16_t bit;
2484 
2485 	switch(space) {
2486 	case SYS_RES_IOPORT:
2487 		bit = PCIM_CMD_PORTEN;
2488 		break;
2489 	case SYS_RES_MEMORY:
2490 		bit = PCIM_CMD_MEMEN;
2491 		break;
2492 	default:
2493 		return (EINVAL);
2494 	}
2495 	pci_set_command_bit(dev, child, bit);
2496 	return (0);
2497 }
2498 
2499 int
2500 pci_disable_io_method(device_t dev, device_t child, int space)
2501 {
2502 	uint16_t bit;
2503 
2504 	switch(space) {
2505 	case SYS_RES_IOPORT:
2506 		bit = PCIM_CMD_PORTEN;
2507 		break;
2508 	case SYS_RES_MEMORY:
2509 		bit = PCIM_CMD_MEMEN;
2510 		break;
2511 	default:
2512 		return (EINVAL);
2513 	}
2514 	pci_clear_command_bit(dev, child, bit);
2515 	return (0);
2516 }
2517 
2518 /*
2519  * New style pci driver.  Parent device is either a pci-host-bridge or a
2520  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2521  */
2522 
2523 void
2524 pci_print_verbose(struct pci_devinfo *dinfo)
2525 {
2526 
2527 	if (bootverbose) {
2528 		pcicfgregs *cfg = &dinfo->cfg;
2529 
2530 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2531 		    cfg->vendor, cfg->device, cfg->revid);
2532 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2533 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2534 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2535 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2536 		    cfg->mfdev);
2537 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2538 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2539 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2540 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2541 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2542 		if (cfg->intpin > 0)
2543 			printf("\tintpin=%c, irq=%d\n",
2544 			    cfg->intpin +'a' -1, cfg->intline);
2545 		if (cfg->pp.pp_cap) {
2546 			uint16_t status;
2547 
2548 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2549 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2550 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2551 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2552 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2553 			    status & PCIM_PSTAT_DMASK);
2554 		}
2555 		if (cfg->msi.msi_location) {
2556 			int ctrl;
2557 
2558 			ctrl = cfg->msi.msi_ctrl;
2559 			printf("\tMSI supports %d message%s%s%s\n",
2560 			    cfg->msi.msi_msgnum,
2561 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2562 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2563 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2564 		}
2565 		if (cfg->msix.msix_location) {
2566 			printf("\tMSI-X supports %d message%s ",
2567 			    cfg->msix.msix_msgnum,
2568 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2569 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2570 				printf("in map 0x%x\n",
2571 				    cfg->msix.msix_table_bar);
2572 			else
2573 				printf("in maps 0x%x and 0x%x\n",
2574 				    cfg->msix.msix_table_bar,
2575 				    cfg->msix.msix_pba_bar);
2576 		}
2577 	}
2578 }
2579 
2580 static int
2581 pci_porten(device_t dev)
2582 {
2583 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2584 }
2585 
2586 static int
2587 pci_memen(device_t dev)
2588 {
2589 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2590 }
2591 
2592 static void
2593 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2594 {
2595 	struct pci_devinfo *dinfo;
2596 	pci_addr_t map, testval;
2597 	int ln2range;
2598 	uint16_t cmd;
2599 
2600 	/*
2601 	 * The device ROM BAR is special.  It is always a 32-bit
2602 	 * memory BAR.  Bit 0 is special and should not be set when
2603 	 * sizing the BAR.
2604 	 */
2605 	dinfo = device_get_ivars(dev);
2606 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2607 		map = pci_read_config(dev, reg, 4);
2608 		pci_write_config(dev, reg, 0xfffffffe, 4);
2609 		testval = pci_read_config(dev, reg, 4);
2610 		pci_write_config(dev, reg, map, 4);
2611 		*mapp = map;
2612 		*testvalp = testval;
2613 		return;
2614 	}
2615 
2616 	map = pci_read_config(dev, reg, 4);
2617 	ln2range = pci_maprange(map);
2618 	if (ln2range == 64)
2619 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2620 
2621 	/*
2622 	 * Disable decoding via the command register before
2623 	 * determining the BAR's length since we will be placing it in
2624 	 * a weird state.
2625 	 */
2626 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2627 	pci_write_config(dev, PCIR_COMMAND,
2628 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2629 
2630 	/*
2631 	 * Determine the BAR's length by writing all 1's.  The bottom
2632 	 * log_2(size) bits of the BAR will stick as 0 when we read
2633 	 * the value back.
2634 	 */
2635 	pci_write_config(dev, reg, 0xffffffff, 4);
2636 	testval = pci_read_config(dev, reg, 4);
2637 	if (ln2range == 64) {
2638 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2639 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2640 	}
2641 
2642 	/*
2643 	 * Restore the original value of the BAR.  We may have reprogrammed
2644 	 * the BAR of the low-level console device and when booting verbose,
2645 	 * we need the console device addressable.
2646 	 */
2647 	pci_write_config(dev, reg, map, 4);
2648 	if (ln2range == 64)
2649 		pci_write_config(dev, reg + 4, map >> 32, 4);
2650 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2651 
2652 	*mapp = map;
2653 	*testvalp = testval;
2654 }
2655 
2656 static void
2657 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2658 {
2659 	struct pci_devinfo *dinfo;
2660 	int ln2range;
2661 
2662 	/* The device ROM BAR is always a 32-bit memory BAR. */
2663 	dinfo = device_get_ivars(dev);
2664 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2665 		ln2range = 32;
2666 	else
2667 		ln2range = pci_maprange(pm->pm_value);
2668 	pci_write_config(dev, pm->pm_reg, base, 4);
2669 	if (ln2range == 64)
2670 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2671 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2672 	if (ln2range == 64)
2673 		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2674 		    pm->pm_reg + 4, 4) << 32;
2675 }
2676 
2677 struct pci_map *
2678 pci_find_bar(device_t dev, int reg)
2679 {
2680 	struct pci_devinfo *dinfo;
2681 	struct pci_map *pm;
2682 
2683 	dinfo = device_get_ivars(dev);
2684 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2685 		if (pm->pm_reg == reg)
2686 			return (pm);
2687 	}
2688 	return (NULL);
2689 }
2690 
2691 int
2692 pci_bar_enabled(device_t dev, struct pci_map *pm)
2693 {
2694 	struct pci_devinfo *dinfo;
2695 	uint16_t cmd;
2696 
2697 	dinfo = device_get_ivars(dev);
2698 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2699 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2700 		return (0);
2701 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2702 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2703 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2704 	else
2705 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2706 }
2707 
2708 static struct pci_map *
2709 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2710 {
2711 	struct pci_devinfo *dinfo;
2712 	struct pci_map *pm, *prev;
2713 
2714 	dinfo = device_get_ivars(dev);
2715 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2716 	pm->pm_reg = reg;
2717 	pm->pm_value = value;
2718 	pm->pm_size = size;
2719 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2720 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2721 		    reg));
2722 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2723 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2724 			break;
2725 	}
2726 	if (prev != NULL)
2727 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2728 	else
2729 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2730 	return (pm);
2731 }
2732 
2733 static void
2734 pci_restore_bars(device_t dev)
2735 {
2736 	struct pci_devinfo *dinfo;
2737 	struct pci_map *pm;
2738 	int ln2range;
2739 
2740 	dinfo = device_get_ivars(dev);
2741 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2742 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2743 			ln2range = 32;
2744 		else
2745 			ln2range = pci_maprange(pm->pm_value);
2746 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2747 		if (ln2range == 64)
2748 			pci_write_config(dev, pm->pm_reg + 4,
2749 			    pm->pm_value >> 32, 4);
2750 	}
2751 }
2752 
2753 /*
2754  * Add a resource based on a pci map register. Return 1 if the map
2755  * register is a 32bit map register or 2 if it is a 64bit register.
2756  */
2757 static int
2758 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2759     int force, int prefetch)
2760 {
2761 	struct pci_map *pm;
2762 	pci_addr_t base, map, testval;
2763 	pci_addr_t start, end, count;
2764 	int barlen, basezero, flags, maprange, mapsize, type;
2765 	uint16_t cmd;
2766 	struct resource *res;
2767 
2768 	/*
2769 	 * The BAR may already exist if the device is a CardBus card
2770 	 * whose CIS is stored in this BAR.
2771 	 */
2772 	pm = pci_find_bar(dev, reg);
2773 	if (pm != NULL) {
2774 		maprange = pci_maprange(pm->pm_value);
2775 		barlen = maprange == 64 ? 2 : 1;
2776 		return (barlen);
2777 	}
2778 
2779 	pci_read_bar(dev, reg, &map, &testval);
2780 	if (PCI_BAR_MEM(map)) {
2781 		type = SYS_RES_MEMORY;
2782 		if (map & PCIM_BAR_MEM_PREFETCH)
2783 			prefetch = 1;
2784 	} else
2785 		type = SYS_RES_IOPORT;
2786 	mapsize = pci_mapsize(testval);
2787 	base = pci_mapbase(map);
2788 #ifdef __PCI_BAR_ZERO_VALID
2789 	basezero = 0;
2790 #else
2791 	basezero = base == 0;
2792 #endif
2793 	maprange = pci_maprange(map);
2794 	barlen = maprange == 64 ? 2 : 1;
2795 
2796 	/*
2797 	 * For I/O registers, if bottom bit is set, and the next bit up
2798 	 * isn't clear, we know we have a BAR that doesn't conform to the
2799 	 * spec, so ignore it.  Also, sanity check the size of the data
2800 	 * areas to the type of memory involved.  Memory must be at least
2801 	 * 16 bytes in size, while I/O ranges must be at least 4.
2802 	 */
2803 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2804 		return (barlen);
2805 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2806 	    (type == SYS_RES_IOPORT && mapsize < 2))
2807 		return (barlen);
2808 
2809 	/* Save a record of this BAR. */
2810 	pm = pci_add_bar(dev, reg, map, mapsize);
2811 	if (bootverbose) {
2812 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2813 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2814 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2815 			printf(", port disabled\n");
2816 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2817 			printf(", memory disabled\n");
2818 		else
2819 			printf(", enabled\n");
2820 	}
2821 
2822 	/*
2823 	 * If base is 0, then we have problems if this architecture does
2824 	 * not allow that.  It is best to ignore such entries for the
2825 	 * moment.  These will be allocated later if the driver specifically
2826 	 * requests them.  However, some removable busses look better when
2827 	 * all resources are allocated, so allow '0' to be overriden.
2828 	 *
2829 	 * Similarly treat maps whose values is the same as the test value
2830 	 * read back.  These maps have had all f's written to them by the
2831 	 * BIOS in an attempt to disable the resources.
2832 	 */
2833 	if (!force && (basezero || map == testval))
2834 		return (barlen);
2835 	if ((u_long)base != base) {
2836 		device_printf(bus,
2837 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2838 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2839 		    pci_get_function(dev), reg);
2840 		return (barlen);
2841 	}
2842 
2843 	/*
2844 	 * This code theoretically does the right thing, but has
2845 	 * undesirable side effects in some cases where peripherals
2846 	 * respond oddly to having these bits enabled.  Let the user
2847 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2848 	 * default).
2849 	 */
2850 	if (pci_enable_io_modes) {
2851 		/* Turn on resources that have been left off by a lazy BIOS */
2852 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2853 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2854 			cmd |= PCIM_CMD_PORTEN;
2855 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2856 		}
2857 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2858 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2859 			cmd |= PCIM_CMD_MEMEN;
2860 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2861 		}
2862 	} else {
2863 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2864 			return (barlen);
2865 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2866 			return (barlen);
2867 	}
2868 
2869 	count = (pci_addr_t)1 << mapsize;
2870 	flags = RF_ALIGNMENT_LOG2(mapsize);
2871 	if (prefetch)
2872 		flags |= RF_PREFETCHABLE;
2873 	if (basezero || base == pci_mapbase(testval) || pci_clear_bars) {
2874 		start = 0;	/* Let the parent decide. */
2875 		end = ~0ul;
2876 	} else {
2877 		start = base;
2878 		end = base + count - 1;
2879 	}
2880 	resource_list_add(rl, type, reg, start, end, count);
2881 
2882 	/*
2883 	 * Try to allocate the resource for this BAR from our parent
2884 	 * so that this resource range is already reserved.  The
2885 	 * driver for this device will later inherit this resource in
2886 	 * pci_alloc_resource().
2887 	 */
2888 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2889 	    flags);
2890 	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0ul)) {
2891 		/*
2892 		 * If the allocation fails, try to allocate a resource for
2893 		 * this BAR using any available range.  The firmware felt
2894 		 * it was important enough to assign a resource, so don't
2895 		 * disable decoding if we can help it.
2896 		 */
2897 		resource_list_delete(rl, type, reg);
2898 		resource_list_add(rl, type, reg, 0, ~0ul, count);
2899 		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0ul,
2900 		    count, flags);
2901 	}
2902 	if (res == NULL) {
2903 		/*
2904 		 * If the allocation fails, delete the resource list entry
2905 		 * and disable decoding for this device.
2906 		 *
2907 		 * If the driver requests this resource in the future,
2908 		 * pci_reserve_map() will try to allocate a fresh
2909 		 * resource range.
2910 		 */
2911 		resource_list_delete(rl, type, reg);
2912 		pci_disable_io(dev, type);
2913 		if (bootverbose)
2914 			device_printf(bus,
2915 			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
2916 			    pci_get_domain(dev), pci_get_bus(dev),
2917 			    pci_get_slot(dev), pci_get_function(dev), reg);
2918 	} else {
2919 		start = rman_get_start(res);
2920 		pci_write_bar(dev, pm, start);
2921 	}
2922 	return (barlen);
2923 }
2924 
2925 /*
2926  * For ATA devices we need to decide early what addressing mode to use.
2927  * Legacy demands that the primary and secondary ATA ports sits on the
2928  * same addresses that old ISA hardware did. This dictates that we use
2929  * those addresses and ignore the BAR's if we cannot set PCI native
2930  * addressing mode.
2931  */
2932 static void
2933 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2934     uint32_t prefetchmask)
2935 {
2936 	struct resource *r;
2937 	int rid, type, progif;
2938 #if 0
2939 	/* if this device supports PCI native addressing use it */
2940 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2941 	if ((progif & 0x8a) == 0x8a) {
2942 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2943 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2944 			printf("Trying ATA native PCI addressing mode\n");
2945 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2946 		}
2947 	}
2948 #endif
2949 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2950 	type = SYS_RES_IOPORT;
2951 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2952 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2953 		    prefetchmask & (1 << 0));
2954 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2955 		    prefetchmask & (1 << 1));
2956 	} else {
2957 		rid = PCIR_BAR(0);
2958 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2959 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2960 		    0x1f7, 8, 0);
2961 		rid = PCIR_BAR(1);
2962 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2963 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2964 		    0x3f6, 1, 0);
2965 	}
2966 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2967 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2968 		    prefetchmask & (1 << 2));
2969 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2970 		    prefetchmask & (1 << 3));
2971 	} else {
2972 		rid = PCIR_BAR(2);
2973 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2974 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2975 		    0x177, 8, 0);
2976 		rid = PCIR_BAR(3);
2977 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2978 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2979 		    0x376, 1, 0);
2980 	}
2981 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2982 	    prefetchmask & (1 << 4));
2983 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2984 	    prefetchmask & (1 << 5));
2985 }
2986 
2987 static void
2988 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2989 {
2990 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2991 	pcicfgregs *cfg = &dinfo->cfg;
2992 	char tunable_name[64];
2993 	int irq;
2994 
2995 	/* Has to have an intpin to have an interrupt. */
2996 	if (cfg->intpin == 0)
2997 		return;
2998 
2999 	/* Let the user override the IRQ with a tunable. */
3000 	irq = PCI_INVALID_IRQ;
3001 	snprintf(tunable_name, sizeof(tunable_name),
3002 	    "hw.pci%d.%d.%d.INT%c.irq",
3003 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
3004 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
3005 		irq = PCI_INVALID_IRQ;
3006 
3007 	/*
3008 	 * If we didn't get an IRQ via the tunable, then we either use the
3009 	 * IRQ value in the intline register or we ask the bus to route an
3010 	 * interrupt for us.  If force_route is true, then we only use the
3011 	 * value in the intline register if the bus was unable to assign an
3012 	 * IRQ.
3013 	 */
3014 	if (!PCI_INTERRUPT_VALID(irq)) {
3015 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
3016 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
3017 		if (!PCI_INTERRUPT_VALID(irq))
3018 			irq = cfg->intline;
3019 	}
3020 
3021 	/* If after all that we don't have an IRQ, just bail. */
3022 	if (!PCI_INTERRUPT_VALID(irq))
3023 		return;
3024 
3025 	/* Update the config register if it changed. */
3026 	if (irq != cfg->intline) {
3027 		cfg->intline = irq;
3028 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
3029 	}
3030 
3031 	/* Add this IRQ as rid 0 interrupt resource. */
3032 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3033 }
3034 
3035 /* Perform early OHCI takeover from SMM. */
3036 static void
3037 ohci_early_takeover(device_t self)
3038 {
3039 	struct resource *res;
3040 	uint32_t ctl;
3041 	int rid;
3042 	int i;
3043 
3044 	rid = PCIR_BAR(0);
3045 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3046 	if (res == NULL)
3047 		return;
3048 
3049 	ctl = bus_read_4(res, OHCI_CONTROL);
3050 	if (ctl & OHCI_IR) {
3051 		if (bootverbose)
3052 			printf("ohci early: "
3053 			    "SMM active, request owner change\n");
3054 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3055 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3056 			DELAY(1000);
3057 			ctl = bus_read_4(res, OHCI_CONTROL);
3058 		}
3059 		if (ctl & OHCI_IR) {
3060 			if (bootverbose)
3061 				printf("ohci early: "
3062 				    "SMM does not respond, resetting\n");
3063 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3064 		}
3065 		/* Disable interrupts */
3066 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3067 	}
3068 
3069 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3070 }
3071 
3072 /* Perform early UHCI takeover from SMM. */
3073 static void
3074 uhci_early_takeover(device_t self)
3075 {
3076 	struct resource *res;
3077 	int rid;
3078 
3079 	/*
3080 	 * Set the PIRQD enable bit and switch off all the others. We don't
3081 	 * want legacy support to interfere with us XXX Does this also mean
3082 	 * that the BIOS won't touch the keyboard anymore if it is connected
3083 	 * to the ports of the root hub?
3084 	 */
3085 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3086 
3087 	/* Disable interrupts */
3088 	rid = PCI_UHCI_BASE_REG;
3089 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3090 	if (res != NULL) {
3091 		bus_write_2(res, UHCI_INTR, 0);
3092 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3093 	}
3094 }
3095 
3096 /* Perform early EHCI takeover from SMM. */
3097 static void
3098 ehci_early_takeover(device_t self)
3099 {
3100 	struct resource *res;
3101 	uint32_t cparams;
3102 	uint32_t eec;
3103 	uint8_t eecp;
3104 	uint8_t bios_sem;
3105 	uint8_t offs;
3106 	int rid;
3107 	int i;
3108 
3109 	rid = PCIR_BAR(0);
3110 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3111 	if (res == NULL)
3112 		return;
3113 
3114 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3115 
3116 	/* Synchronise with the BIOS if it owns the controller. */
3117 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3118 	    eecp = EHCI_EECP_NEXT(eec)) {
3119 		eec = pci_read_config(self, eecp, 4);
3120 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3121 			continue;
3122 		}
3123 		bios_sem = pci_read_config(self, eecp +
3124 		    EHCI_LEGSUP_BIOS_SEM, 1);
3125 		if (bios_sem == 0) {
3126 			continue;
3127 		}
3128 		if (bootverbose)
3129 			printf("ehci early: "
3130 			    "SMM active, request owner change\n");
3131 
3132 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3133 
3134 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3135 			DELAY(1000);
3136 			bios_sem = pci_read_config(self, eecp +
3137 			    EHCI_LEGSUP_BIOS_SEM, 1);
3138 		}
3139 
3140 		if (bios_sem != 0) {
3141 			if (bootverbose)
3142 				printf("ehci early: "
3143 				    "SMM does not respond\n");
3144 		}
3145 		/* Disable interrupts */
3146 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3147 		bus_write_4(res, offs + EHCI_USBINTR, 0);
3148 	}
3149 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3150 }
3151 
3152 /* Perform early XHCI takeover from SMM. */
3153 static void
3154 xhci_early_takeover(device_t self)
3155 {
3156 	struct resource *res;
3157 	uint32_t cparams;
3158 	uint32_t eec;
3159 	uint8_t eecp;
3160 	uint8_t bios_sem;
3161 	uint8_t offs;
3162 	int rid;
3163 	int i;
3164 
3165 	rid = PCIR_BAR(0);
3166 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3167 	if (res == NULL)
3168 		return;
3169 
3170 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3171 
3172 	eec = -1;
3173 
3174 	/* Synchronise with the BIOS if it owns the controller. */
3175 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3176 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3177 		eec = bus_read_4(res, eecp);
3178 
3179 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3180 			continue;
3181 
3182 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3183 		if (bios_sem == 0)
3184 			continue;
3185 
3186 		if (bootverbose)
3187 			printf("xhci early: "
3188 			    "SMM active, request owner change\n");
3189 
3190 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3191 
3192 		/* wait a maximum of 5 second */
3193 
3194 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3195 			DELAY(1000);
3196 			bios_sem = bus_read_1(res, eecp +
3197 			    XHCI_XECP_BIOS_SEM);
3198 		}
3199 
3200 		if (bios_sem != 0) {
3201 			if (bootverbose)
3202 				printf("xhci early: "
3203 				    "SMM does not respond\n");
3204 		}
3205 
3206 		/* Disable interrupts */
3207 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3208 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3209 		bus_read_4(res, offs + XHCI_USBSTS);
3210 	}
3211 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3212 }
3213 
3214 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3215 static void
3216 pci_reserve_secbus(device_t bus, device_t dev, pcicfgregs *cfg,
3217     struct resource_list *rl)
3218 {
3219 	struct resource *res;
3220 	char *cp;
3221 	u_long start, end, count;
3222 	int rid, sec_bus, sec_reg, sub_bus, sub_reg, sup_bus;
3223 
3224 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3225 	case PCIM_HDRTYPE_BRIDGE:
3226 		sec_reg = PCIR_SECBUS_1;
3227 		sub_reg = PCIR_SUBBUS_1;
3228 		break;
3229 	case PCIM_HDRTYPE_CARDBUS:
3230 		sec_reg = PCIR_SECBUS_2;
3231 		sub_reg = PCIR_SUBBUS_2;
3232 		break;
3233 	default:
3234 		return;
3235 	}
3236 
3237 	/*
3238 	 * If the existing bus range is valid, attempt to reserve it
3239 	 * from our parent.  If this fails for any reason, clear the
3240 	 * secbus and subbus registers.
3241 	 *
3242 	 * XXX: Should we reset sub_bus to sec_bus if it is < sec_bus?
3243 	 * This would at least preserve the existing sec_bus if it is
3244 	 * valid.
3245 	 */
3246 	sec_bus = PCI_READ_CONFIG(bus, dev, sec_reg, 1);
3247 	sub_bus = PCI_READ_CONFIG(bus, dev, sub_reg, 1);
3248 
3249 	/* Quirk handling. */
3250 	switch (pci_get_devid(dev)) {
3251 	case 0x12258086:		/* Intel 82454KX/GX (Orion) */
3252 		sup_bus = pci_read_config(dev, 0x41, 1);
3253 		if (sup_bus != 0xff) {
3254 			sec_bus = sup_bus + 1;
3255 			sub_bus = sup_bus + 1;
3256 			PCI_WRITE_CONFIG(bus, dev, sec_reg, sec_bus, 1);
3257 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3258 		}
3259 		break;
3260 
3261 	case 0x00dd10de:
3262 		/* Compaq R3000 BIOS sets wrong subordinate bus number. */
3263 		if ((cp = getenv("smbios.planar.maker")) == NULL)
3264 			break;
3265 		if (strncmp(cp, "Compal", 6) != 0) {
3266 			freeenv(cp);
3267 			break;
3268 		}
3269 		freeenv(cp);
3270 		if ((cp = getenv("smbios.planar.product")) == NULL)
3271 			break;
3272 		if (strncmp(cp, "08A0", 4) != 0) {
3273 			freeenv(cp);
3274 			break;
3275 		}
3276 		freeenv(cp);
3277 		if (sub_bus < 0xa) {
3278 			sub_bus = 0xa;
3279 			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3280 		}
3281 		break;
3282 	}
3283 
3284 	if (bootverbose)
3285 		printf("\tsecbus=%d, subbus=%d\n", sec_bus, sub_bus);
3286 	if (sec_bus > 0 && sub_bus >= sec_bus) {
3287 		start = sec_bus;
3288 		end = sub_bus;
3289 		count = end - start + 1;
3290 
3291 		resource_list_add(rl, PCI_RES_BUS, 0, 0ul, ~0ul, count);
3292 
3293 		/*
3294 		 * If requested, clear secondary bus registers in
3295 		 * bridge devices to force a complete renumbering
3296 		 * rather than reserving the existing range.  However,
3297 		 * preserve the existing size.
3298 		 */
3299 		if (pci_clear_buses)
3300 			goto clear;
3301 
3302 		rid = 0;
3303 		res = resource_list_reserve(rl, bus, dev, PCI_RES_BUS, &rid,
3304 		    start, end, count, 0);
3305 		if (res != NULL)
3306 			return;
3307 
3308 		if (bootverbose)
3309 			device_printf(bus,
3310 			    "pci%d:%d:%d:%d secbus failed to allocate\n",
3311 			    pci_get_domain(dev), pci_get_bus(dev),
3312 			    pci_get_slot(dev), pci_get_function(dev));
3313 	}
3314 
3315 clear:
3316 	PCI_WRITE_CONFIG(bus, dev, sec_reg, 0, 1);
3317 	PCI_WRITE_CONFIG(bus, dev, sub_reg, 0, 1);
3318 }
3319 
3320 static struct resource *
3321 pci_alloc_secbus(device_t dev, device_t child, int *rid, u_long start,
3322     u_long end, u_long count, u_int flags)
3323 {
3324 	struct pci_devinfo *dinfo;
3325 	pcicfgregs *cfg;
3326 	struct resource_list *rl;
3327 	struct resource *res;
3328 	int sec_reg, sub_reg;
3329 
3330 	dinfo = device_get_ivars(child);
3331 	cfg = &dinfo->cfg;
3332 	rl = &dinfo->resources;
3333 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3334 	case PCIM_HDRTYPE_BRIDGE:
3335 		sec_reg = PCIR_SECBUS_1;
3336 		sub_reg = PCIR_SUBBUS_1;
3337 		break;
3338 	case PCIM_HDRTYPE_CARDBUS:
3339 		sec_reg = PCIR_SECBUS_2;
3340 		sub_reg = PCIR_SUBBUS_2;
3341 		break;
3342 	default:
3343 		return (NULL);
3344 	}
3345 
3346 	if (*rid != 0)
3347 		return (NULL);
3348 
3349 	if (resource_list_find(rl, PCI_RES_BUS, *rid) == NULL)
3350 		resource_list_add(rl, PCI_RES_BUS, *rid, start, end, count);
3351 	if (!resource_list_reserved(rl, PCI_RES_BUS, *rid)) {
3352 		res = resource_list_reserve(rl, dev, child, PCI_RES_BUS, rid,
3353 		    start, end, count, flags & ~RF_ACTIVE);
3354 		if (res == NULL) {
3355 			resource_list_delete(rl, PCI_RES_BUS, *rid);
3356 			device_printf(child, "allocating %lu bus%s failed\n",
3357 			    count, count == 1 ? "" : "es");
3358 			return (NULL);
3359 		}
3360 		if (bootverbose)
3361 			device_printf(child,
3362 			    "Lazy allocation of %lu bus%s at %lu\n", count,
3363 			    count == 1 ? "" : "es", rman_get_start(res));
3364 		PCI_WRITE_CONFIG(dev, child, sec_reg, rman_get_start(res), 1);
3365 		PCI_WRITE_CONFIG(dev, child, sub_reg, rman_get_end(res), 1);
3366 	}
3367 	return (resource_list_alloc(rl, dev, child, PCI_RES_BUS, rid, start,
3368 	    end, count, flags));
3369 }
3370 #endif
3371 
3372 void
3373 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3374 {
3375 	struct pci_devinfo *dinfo;
3376 	pcicfgregs *cfg;
3377 	struct resource_list *rl;
3378 	const struct pci_quirk *q;
3379 	uint32_t devid;
3380 	int i;
3381 
3382 	dinfo = device_get_ivars(dev);
3383 	cfg = &dinfo->cfg;
3384 	rl = &dinfo->resources;
3385 	devid = (cfg->device << 16) | cfg->vendor;
3386 
3387 	/* ATA devices needs special map treatment */
3388 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3389 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3390 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3391 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3392 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3393 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3394 	else
3395 		for (i = 0; i < cfg->nummaps;) {
3396 			/*
3397 			 * Skip quirked resources.
3398 			 */
3399 			for (q = &pci_quirks[0]; q->devid != 0; q++)
3400 				if (q->devid == devid &&
3401 				    q->type == PCI_QUIRK_UNMAP_REG &&
3402 				    q->arg1 == PCIR_BAR(i))
3403 					break;
3404 			if (q->devid != 0) {
3405 				i++;
3406 				continue;
3407 			}
3408 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3409 			    prefetchmask & (1 << i));
3410 		}
3411 
3412 	/*
3413 	 * Add additional, quirked resources.
3414 	 */
3415 	for (q = &pci_quirks[0]; q->devid != 0; q++)
3416 		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3417 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3418 
3419 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3420 #ifdef __PCI_REROUTE_INTERRUPT
3421 		/*
3422 		 * Try to re-route interrupts. Sometimes the BIOS or
3423 		 * firmware may leave bogus values in these registers.
3424 		 * If the re-route fails, then just stick with what we
3425 		 * have.
3426 		 */
3427 		pci_assign_interrupt(bus, dev, 1);
3428 #else
3429 		pci_assign_interrupt(bus, dev, 0);
3430 #endif
3431 	}
3432 
3433 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3434 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3435 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3436 			xhci_early_takeover(dev);
3437 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3438 			ehci_early_takeover(dev);
3439 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3440 			ohci_early_takeover(dev);
3441 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3442 			uhci_early_takeover(dev);
3443 	}
3444 
3445 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3446 	/*
3447 	 * Reserve resources for secondary bus ranges behind bridge
3448 	 * devices.
3449 	 */
3450 	pci_reserve_secbus(bus, dev, cfg, rl);
3451 #endif
3452 }
3453 
3454 void
3455 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3456 {
3457 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3458 	device_t pcib = device_get_parent(dev);
3459 	struct pci_devinfo *dinfo;
3460 	int maxslots;
3461 	int s, f, pcifunchigh;
3462 	uint8_t hdrtype;
3463 
3464 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3465 	    ("dinfo_size too small"));
3466 	maxslots = PCIB_MAXSLOTS(pcib);
3467 	for (s = 0; s <= maxslots; s++) {
3468 		pcifunchigh = 0;
3469 		f = 0;
3470 		DELAY(1);
3471 		hdrtype = REG(PCIR_HDRTYPE, 1);
3472 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3473 			continue;
3474 		if (hdrtype & PCIM_MFDEV)
3475 			pcifunchigh = PCI_FUNCMAX;
3476 		for (f = 0; f <= pcifunchigh; f++) {
3477 			dinfo = pci_read_device(pcib, domain, busno, s, f,
3478 			    dinfo_size);
3479 			if (dinfo != NULL) {
3480 				pci_add_child(dev, dinfo);
3481 			}
3482 		}
3483 	}
3484 #undef REG
3485 }
3486 
3487 void
3488 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3489 {
3490 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3491 	device_set_ivars(dinfo->cfg.dev, dinfo);
3492 	resource_list_init(&dinfo->resources);
3493 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3494 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3495 	pci_print_verbose(dinfo);
3496 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3497 }
3498 
3499 static int
3500 pci_probe(device_t dev)
3501 {
3502 
3503 	device_set_desc(dev, "PCI bus");
3504 
3505 	/* Allow other subclasses to override this driver. */
3506 	return (BUS_PROBE_GENERIC);
3507 }
3508 
3509 int
3510 pci_attach_common(device_t dev)
3511 {
3512 	struct pci_softc *sc;
3513 	int busno, domain;
3514 #ifdef PCI_DMA_BOUNDARY
3515 	int error, tag_valid;
3516 #endif
3517 #ifdef PCI_RES_BUS
3518 	int rid;
3519 #endif
3520 
3521 	sc = device_get_softc(dev);
3522 	domain = pcib_get_domain(dev);
3523 	busno = pcib_get_bus(dev);
3524 #ifdef PCI_RES_BUS
3525 	rid = 0;
3526 	sc->sc_bus = bus_alloc_resource(dev, PCI_RES_BUS, &rid, busno, busno,
3527 	    1, 0);
3528 	if (sc->sc_bus == NULL) {
3529 		device_printf(dev, "failed to allocate bus number\n");
3530 		return (ENXIO);
3531 	}
3532 #endif
3533 	if (bootverbose)
3534 		device_printf(dev, "domain=%d, physical bus=%d\n",
3535 		    domain, busno);
3536 #ifdef PCI_DMA_BOUNDARY
3537 	tag_valid = 0;
3538 	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
3539 	    devclass_find("pci")) {
3540 		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
3541 		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3542 		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
3543 		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
3544 		if (error)
3545 			device_printf(dev, "Failed to create DMA tag: %d\n",
3546 			    error);
3547 		else
3548 			tag_valid = 1;
3549 	}
3550 	if (!tag_valid)
3551 #endif
3552 		sc->sc_dma_tag = bus_get_dma_tag(dev);
3553 	return (0);
3554 }
3555 
3556 static int
3557 pci_attach(device_t dev)
3558 {
3559 	int busno, domain, error;
3560 
3561 	error = pci_attach_common(dev);
3562 	if (error)
3563 		return (error);
3564 
3565 	/*
3566 	 * Since there can be multiple independantly numbered PCI
3567 	 * busses on systems with multiple PCI domains, we can't use
3568 	 * the unit number to decide which bus we are probing. We ask
3569 	 * the parent pcib what our domain and bus numbers are.
3570 	 */
3571 	domain = pcib_get_domain(dev);
3572 	busno = pcib_get_bus(dev);
3573 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3574 	return (bus_generic_attach(dev));
3575 }
3576 
3577 #ifdef PCI_RES_BUS
3578 static int
3579 pci_detach(device_t dev)
3580 {
3581 	struct pci_softc *sc;
3582 	int error;
3583 
3584 	error = bus_generic_detach(dev);
3585 	if (error)
3586 		return (error);
3587 	sc = device_get_softc(dev);
3588 	return (bus_release_resource(dev, PCI_RES_BUS, 0, sc->sc_bus));
3589 }
3590 #endif
3591 
3592 static void
3593 pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3594     int state)
3595 {
3596 	device_t child, pcib;
3597 	struct pci_devinfo *dinfo;
3598 	int dstate, i;
3599 
3600 	/*
3601 	 * Set the device to the given state.  If the firmware suggests
3602 	 * a different power state, use it instead.  If power management
3603 	 * is not present, the firmware is responsible for managing
3604 	 * device power.  Skip children who aren't attached since they
3605 	 * are handled separately.
3606 	 */
3607 	pcib = device_get_parent(dev);
3608 	for (i = 0; i < numdevs; i++) {
3609 		child = devlist[i];
3610 		dinfo = device_get_ivars(child);
3611 		dstate = state;
3612 		if (device_is_attached(child) &&
3613 		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3614 			pci_set_powerstate(child, dstate);
3615 	}
3616 }
3617 
3618 int
3619 pci_suspend(device_t dev)
3620 {
3621 	device_t child, *devlist;
3622 	struct pci_devinfo *dinfo;
3623 	int error, i, numdevs;
3624 
3625 	/*
3626 	 * Save the PCI configuration space for each child and set the
3627 	 * device in the appropriate power state for this sleep state.
3628 	 */
3629 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3630 		return (error);
3631 	for (i = 0; i < numdevs; i++) {
3632 		child = devlist[i];
3633 		dinfo = device_get_ivars(child);
3634 		pci_cfg_save(child, dinfo, 0);
3635 	}
3636 
3637 	/* Suspend devices before potentially powering them down. */
3638 	error = bus_generic_suspend(dev);
3639 	if (error) {
3640 		free(devlist, M_TEMP);
3641 		return (error);
3642 	}
3643 	if (pci_do_power_suspend)
3644 		pci_set_power_children(dev, devlist, numdevs,
3645 		    PCI_POWERSTATE_D3);
3646 	free(devlist, M_TEMP);
3647 	return (0);
3648 }
3649 
3650 int
3651 pci_resume(device_t dev)
3652 {
3653 	device_t child, *devlist;
3654 	struct pci_devinfo *dinfo;
3655 	int error, i, numdevs;
3656 
3657 	/*
3658 	 * Set each child to D0 and restore its PCI configuration space.
3659 	 */
3660 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3661 		return (error);
3662 	if (pci_do_power_resume)
3663 		pci_set_power_children(dev, devlist, numdevs,
3664 		    PCI_POWERSTATE_D0);
3665 
3666 	/* Now the device is powered up, restore its config space. */
3667 	for (i = 0; i < numdevs; i++) {
3668 		child = devlist[i];
3669 		dinfo = device_get_ivars(child);
3670 
3671 		pci_cfg_restore(child, dinfo);
3672 		if (!device_is_attached(child))
3673 			pci_cfg_save(child, dinfo, 1);
3674 	}
3675 
3676 	/*
3677 	 * Resume critical devices first, then everything else later.
3678 	 */
3679 	for (i = 0; i < numdevs; i++) {
3680 		child = devlist[i];
3681 		switch (pci_get_class(child)) {
3682 		case PCIC_DISPLAY:
3683 		case PCIC_MEMORY:
3684 		case PCIC_BRIDGE:
3685 		case PCIC_BASEPERIPH:
3686 			DEVICE_RESUME(child);
3687 			break;
3688 		}
3689 	}
3690 	for (i = 0; i < numdevs; i++) {
3691 		child = devlist[i];
3692 		switch (pci_get_class(child)) {
3693 		case PCIC_DISPLAY:
3694 		case PCIC_MEMORY:
3695 		case PCIC_BRIDGE:
3696 		case PCIC_BASEPERIPH:
3697 			break;
3698 		default:
3699 			DEVICE_RESUME(child);
3700 		}
3701 	}
3702 	free(devlist, M_TEMP);
3703 	return (0);
3704 }
3705 
3706 static void
3707 pci_load_vendor_data(void)
3708 {
3709 	caddr_t data;
3710 	void *ptr;
3711 	size_t sz;
3712 
3713 	data = preload_search_by_type("pci_vendor_data");
3714 	if (data != NULL) {
3715 		ptr = preload_fetch_addr(data);
3716 		sz = preload_fetch_size(data);
3717 		if (ptr != NULL && sz != 0) {
3718 			pci_vendordata = ptr;
3719 			pci_vendordata_size = sz;
3720 			/* terminate the database */
3721 			pci_vendordata[pci_vendordata_size] = '\n';
3722 		}
3723 	}
3724 }
3725 
3726 void
3727 pci_driver_added(device_t dev, driver_t *driver)
3728 {
3729 	int numdevs;
3730 	device_t *devlist;
3731 	device_t child;
3732 	struct pci_devinfo *dinfo;
3733 	int i;
3734 
3735 	if (bootverbose)
3736 		device_printf(dev, "driver added\n");
3737 	DEVICE_IDENTIFY(driver, dev);
3738 	if (device_get_children(dev, &devlist, &numdevs) != 0)
3739 		return;
3740 	for (i = 0; i < numdevs; i++) {
3741 		child = devlist[i];
3742 		if (device_get_state(child) != DS_NOTPRESENT)
3743 			continue;
3744 		dinfo = device_get_ivars(child);
3745 		pci_print_verbose(dinfo);
3746 		if (bootverbose)
3747 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3748 		pci_cfg_restore(child, dinfo);
3749 		if (device_probe_and_attach(child) != 0)
3750 			pci_child_detached(dev, child);
3751 	}
3752 	free(devlist, M_TEMP);
3753 }
3754 
3755 int
3756 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3757     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3758 {
3759 	struct pci_devinfo *dinfo;
3760 	struct msix_table_entry *mte;
3761 	struct msix_vector *mv;
3762 	uint64_t addr;
3763 	uint32_t data;
3764 	void *cookie;
3765 	int error, rid;
3766 
3767 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3768 	    arg, &cookie);
3769 	if (error)
3770 		return (error);
3771 
3772 	/* If this is not a direct child, just bail out. */
3773 	if (device_get_parent(child) != dev) {
3774 		*cookiep = cookie;
3775 		return(0);
3776 	}
3777 
3778 	rid = rman_get_rid(irq);
3779 	if (rid == 0) {
3780 		/* Make sure that INTx is enabled */
3781 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3782 	} else {
3783 		/*
3784 		 * Check to see if the interrupt is MSI or MSI-X.
3785 		 * Ask our parent to map the MSI and give
3786 		 * us the address and data register values.
3787 		 * If we fail for some reason, teardown the
3788 		 * interrupt handler.
3789 		 */
3790 		dinfo = device_get_ivars(child);
3791 		if (dinfo->cfg.msi.msi_alloc > 0) {
3792 			if (dinfo->cfg.msi.msi_addr == 0) {
3793 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3794 			    ("MSI has handlers, but vectors not mapped"));
3795 				error = PCIB_MAP_MSI(device_get_parent(dev),
3796 				    child, rman_get_start(irq), &addr, &data);
3797 				if (error)
3798 					goto bad;
3799 				dinfo->cfg.msi.msi_addr = addr;
3800 				dinfo->cfg.msi.msi_data = data;
3801 			}
3802 			if (dinfo->cfg.msi.msi_handlers == 0)
3803 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3804 				    dinfo->cfg.msi.msi_data);
3805 			dinfo->cfg.msi.msi_handlers++;
3806 		} else {
3807 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3808 			    ("No MSI or MSI-X interrupts allocated"));
3809 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3810 			    ("MSI-X index too high"));
3811 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3812 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3813 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3814 			KASSERT(mv->mv_irq == rman_get_start(irq),
3815 			    ("IRQ mismatch"));
3816 			if (mv->mv_address == 0) {
3817 				KASSERT(mte->mte_handlers == 0,
3818 		    ("MSI-X table entry has handlers, but vector not mapped"));
3819 				error = PCIB_MAP_MSI(device_get_parent(dev),
3820 				    child, rman_get_start(irq), &addr, &data);
3821 				if (error)
3822 					goto bad;
3823 				mv->mv_address = addr;
3824 				mv->mv_data = data;
3825 			}
3826 			if (mte->mte_handlers == 0) {
3827 				pci_enable_msix(child, rid - 1, mv->mv_address,
3828 				    mv->mv_data);
3829 				pci_unmask_msix(child, rid - 1);
3830 			}
3831 			mte->mte_handlers++;
3832 		}
3833 
3834 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3835 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3836 	bad:
3837 		if (error) {
3838 			(void)bus_generic_teardown_intr(dev, child, irq,
3839 			    cookie);
3840 			return (error);
3841 		}
3842 	}
3843 	*cookiep = cookie;
3844 	return (0);
3845 }
3846 
3847 int
3848 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3849     void *cookie)
3850 {
3851 	struct msix_table_entry *mte;
3852 	struct resource_list_entry *rle;
3853 	struct pci_devinfo *dinfo;
3854 	int error, rid;
3855 
3856 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3857 		return (EINVAL);
3858 
3859 	/* If this isn't a direct child, just bail out */
3860 	if (device_get_parent(child) != dev)
3861 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3862 
3863 	rid = rman_get_rid(irq);
3864 	if (rid == 0) {
3865 		/* Mask INTx */
3866 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3867 	} else {
3868 		/*
3869 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3870 		 * decrement the appropriate handlers count and mask the
3871 		 * MSI-X message, or disable MSI messages if the count
3872 		 * drops to 0.
3873 		 */
3874 		dinfo = device_get_ivars(child);
3875 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3876 		if (rle->res != irq)
3877 			return (EINVAL);
3878 		if (dinfo->cfg.msi.msi_alloc > 0) {
3879 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3880 			    ("MSI-X index too high"));
3881 			if (dinfo->cfg.msi.msi_handlers == 0)
3882 				return (EINVAL);
3883 			dinfo->cfg.msi.msi_handlers--;
3884 			if (dinfo->cfg.msi.msi_handlers == 0)
3885 				pci_disable_msi(child);
3886 		} else {
3887 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3888 			    ("No MSI or MSI-X interrupts allocated"));
3889 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3890 			    ("MSI-X index too high"));
3891 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3892 			if (mte->mte_handlers == 0)
3893 				return (EINVAL);
3894 			mte->mte_handlers--;
3895 			if (mte->mte_handlers == 0)
3896 				pci_mask_msix(child, rid - 1);
3897 		}
3898 	}
3899 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3900 	if (rid > 0)
3901 		KASSERT(error == 0,
3902 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3903 	return (error);
3904 }
3905 
3906 int
3907 pci_print_child(device_t dev, device_t child)
3908 {
3909 	struct pci_devinfo *dinfo;
3910 	struct resource_list *rl;
3911 	int retval = 0;
3912 
3913 	dinfo = device_get_ivars(child);
3914 	rl = &dinfo->resources;
3915 
3916 	retval += bus_print_child_header(dev, child);
3917 
3918 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3919 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3920 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3921 	if (device_get_flags(dev))
3922 		retval += printf(" flags %#x", device_get_flags(dev));
3923 
3924 	retval += printf(" at device %d.%d", pci_get_slot(child),
3925 	    pci_get_function(child));
3926 
3927 	retval += bus_print_child_footer(dev, child);
3928 
3929 	return (retval);
3930 }
3931 
3932 static const struct
3933 {
3934 	int		class;
3935 	int		subclass;
3936 	const char	*desc;
3937 } pci_nomatch_tab[] = {
3938 	{PCIC_OLD,		-1,			"old"},
3939 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3940 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3941 	{PCIC_STORAGE,		-1,			"mass storage"},
3942 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3943 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3944 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3945 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3946 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3947 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3948 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3949 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3950 	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	"NVM"},
3951 	{PCIC_NETWORK,		-1,			"network"},
3952 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3953 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3954 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3955 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3956 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3957 	{PCIC_DISPLAY,		-1,			"display"},
3958 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3959 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3960 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3961 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3962 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3963 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3964 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3965 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3966 	{PCIC_MEMORY,		-1,			"memory"},
3967 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3968 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3969 	{PCIC_BRIDGE,		-1,			"bridge"},
3970 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3971 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3972 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3973 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3974 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3975 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3976 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3977 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3978 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3979 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3980 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3981 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3982 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3983 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3984 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3985 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3986 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3987 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3988 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3989 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3990 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3991 	{PCIC_INPUTDEV,		-1,			"input device"},
3992 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3993 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3994 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3995 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3996 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3997 	{PCIC_DOCKING,		-1,			"docking station"},
3998 	{PCIC_PROCESSOR,	-1,			"processor"},
3999 	{PCIC_SERIALBUS,	-1,			"serial bus"},
4000 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
4001 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
4002 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
4003 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
4004 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
4005 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
4006 	{PCIC_WIRELESS,		-1,			"wireless controller"},
4007 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
4008 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
4009 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
4010 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
4011 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
4012 	{PCIC_SATCOM,		-1,			"satellite communication"},
4013 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
4014 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
4015 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
4016 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
4017 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
4018 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
4019 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
4020 	{PCIC_DASP,		-1,			"dasp"},
4021 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
4022 	{0, 0,		NULL}
4023 };
4024 
4025 void
4026 pci_probe_nomatch(device_t dev, device_t child)
4027 {
4028 	int i;
4029 	const char *cp, *scp;
4030 	char *device;
4031 
4032 	/*
4033 	 * Look for a listing for this device in a loaded device database.
4034 	 */
4035 	if ((device = pci_describe_device(child)) != NULL) {
4036 		device_printf(dev, "<%s>", device);
4037 		free(device, M_DEVBUF);
4038 	} else {
4039 		/*
4040 		 * Scan the class/subclass descriptions for a general
4041 		 * description.
4042 		 */
4043 		cp = "unknown";
4044 		scp = NULL;
4045 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
4046 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
4047 				if (pci_nomatch_tab[i].subclass == -1) {
4048 					cp = pci_nomatch_tab[i].desc;
4049 				} else if (pci_nomatch_tab[i].subclass ==
4050 				    pci_get_subclass(child)) {
4051 					scp = pci_nomatch_tab[i].desc;
4052 				}
4053 			}
4054 		}
4055 		device_printf(dev, "<%s%s%s>",
4056 		    cp ? cp : "",
4057 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
4058 		    scp ? scp : "");
4059 	}
4060 	printf(" at device %d.%d (no driver attached)\n",
4061 	    pci_get_slot(child), pci_get_function(child));
4062 	pci_cfg_save(child, device_get_ivars(child), 1);
4063 }
4064 
4065 void
4066 pci_child_detached(device_t dev, device_t child)
4067 {
4068 	struct pci_devinfo *dinfo;
4069 	struct resource_list *rl;
4070 
4071 	dinfo = device_get_ivars(child);
4072 	rl = &dinfo->resources;
4073 
4074 	/*
4075 	 * Have to deallocate IRQs before releasing any MSI messages and
4076 	 * have to release MSI messages before deallocating any memory
4077 	 * BARs.
4078 	 */
4079 	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
4080 		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
4081 	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
4082 		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
4083 		(void)pci_release_msi(child);
4084 	}
4085 	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
4086 		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
4087 	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
4088 		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
4089 #ifdef PCI_RES_BUS
4090 	if (resource_list_release_active(rl, dev, child, PCI_RES_BUS) != 0)
4091 		pci_printf(&dinfo->cfg, "Device leaked PCI bus numbers\n");
4092 #endif
4093 
4094 	pci_cfg_save(child, dinfo, 1);
4095 }
4096 
4097 /*
4098  * Parse the PCI device database, if loaded, and return a pointer to a
4099  * description of the device.
4100  *
4101  * The database is flat text formatted as follows:
4102  *
4103  * Any line not in a valid format is ignored.
4104  * Lines are terminated with newline '\n' characters.
4105  *
4106  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
4107  * the vendor name.
4108  *
4109  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
4110  * - devices cannot be listed without a corresponding VENDOR line.
4111  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
4112  * another TAB, then the device name.
4113  */
4114 
4115 /*
4116  * Assuming (ptr) points to the beginning of a line in the database,
4117  * return the vendor or device and description of the next entry.
4118  * The value of (vendor) or (device) inappropriate for the entry type
4119  * is set to -1.  Returns nonzero at the end of the database.
4120  *
4121  * Note that this is slightly unrobust in the face of corrupt data;
4122  * we attempt to safeguard against this by spamming the end of the
4123  * database with a newline when we initialise.
4124  */
4125 static int
4126 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
4127 {
4128 	char	*cp = *ptr;
4129 	int	left;
4130 
4131 	*device = -1;
4132 	*vendor = -1;
4133 	**desc = '\0';
4134 	for (;;) {
4135 		left = pci_vendordata_size - (cp - pci_vendordata);
4136 		if (left <= 0) {
4137 			*ptr = cp;
4138 			return(1);
4139 		}
4140 
4141 		/* vendor entry? */
4142 		if (*cp != '\t' &&
4143 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
4144 			break;
4145 		/* device entry? */
4146 		if (*cp == '\t' &&
4147 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
4148 			break;
4149 
4150 		/* skip to next line */
4151 		while (*cp != '\n' && left > 0) {
4152 			cp++;
4153 			left--;
4154 		}
4155 		if (*cp == '\n') {
4156 			cp++;
4157 			left--;
4158 		}
4159 	}
4160 	/* skip to next line */
4161 	while (*cp != '\n' && left > 0) {
4162 		cp++;
4163 		left--;
4164 	}
4165 	if (*cp == '\n' && left > 0)
4166 		cp++;
4167 	*ptr = cp;
4168 	return(0);
4169 }
4170 
4171 static char *
4172 pci_describe_device(device_t dev)
4173 {
4174 	int	vendor, device;
4175 	char	*desc, *vp, *dp, *line;
4176 
4177 	desc = vp = dp = NULL;
4178 
4179 	/*
4180 	 * If we have no vendor data, we can't do anything.
4181 	 */
4182 	if (pci_vendordata == NULL)
4183 		goto out;
4184 
4185 	/*
4186 	 * Scan the vendor data looking for this device
4187 	 */
4188 	line = pci_vendordata;
4189 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4190 		goto out;
4191 	for (;;) {
4192 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
4193 			goto out;
4194 		if (vendor == pci_get_vendor(dev))
4195 			break;
4196 	}
4197 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4198 		goto out;
4199 	for (;;) {
4200 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
4201 			*dp = 0;
4202 			break;
4203 		}
4204 		if (vendor != -1) {
4205 			*dp = 0;
4206 			break;
4207 		}
4208 		if (device == pci_get_device(dev))
4209 			break;
4210 	}
4211 	if (dp[0] == '\0')
4212 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
4213 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
4214 	    NULL)
4215 		sprintf(desc, "%s, %s", vp, dp);
4216 out:
4217 	if (vp != NULL)
4218 		free(vp, M_DEVBUF);
4219 	if (dp != NULL)
4220 		free(dp, M_DEVBUF);
4221 	return(desc);
4222 }
4223 
4224 int
4225 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
4226 {
4227 	struct pci_devinfo *dinfo;
4228 	pcicfgregs *cfg;
4229 
4230 	dinfo = device_get_ivars(child);
4231 	cfg = &dinfo->cfg;
4232 
4233 	switch (which) {
4234 	case PCI_IVAR_ETHADDR:
4235 		/*
4236 		 * The generic accessor doesn't deal with failure, so
4237 		 * we set the return value, then return an error.
4238 		 */
4239 		*((uint8_t **) result) = NULL;
4240 		return (EINVAL);
4241 	case PCI_IVAR_SUBVENDOR:
4242 		*result = cfg->subvendor;
4243 		break;
4244 	case PCI_IVAR_SUBDEVICE:
4245 		*result = cfg->subdevice;
4246 		break;
4247 	case PCI_IVAR_VENDOR:
4248 		*result = cfg->vendor;
4249 		break;
4250 	case PCI_IVAR_DEVICE:
4251 		*result = cfg->device;
4252 		break;
4253 	case PCI_IVAR_DEVID:
4254 		*result = (cfg->device << 16) | cfg->vendor;
4255 		break;
4256 	case PCI_IVAR_CLASS:
4257 		*result = cfg->baseclass;
4258 		break;
4259 	case PCI_IVAR_SUBCLASS:
4260 		*result = cfg->subclass;
4261 		break;
4262 	case PCI_IVAR_PROGIF:
4263 		*result = cfg->progif;
4264 		break;
4265 	case PCI_IVAR_REVID:
4266 		*result = cfg->revid;
4267 		break;
4268 	case PCI_IVAR_INTPIN:
4269 		*result = cfg->intpin;
4270 		break;
4271 	case PCI_IVAR_IRQ:
4272 		*result = cfg->intline;
4273 		break;
4274 	case PCI_IVAR_DOMAIN:
4275 		*result = cfg->domain;
4276 		break;
4277 	case PCI_IVAR_BUS:
4278 		*result = cfg->bus;
4279 		break;
4280 	case PCI_IVAR_SLOT:
4281 		*result = cfg->slot;
4282 		break;
4283 	case PCI_IVAR_FUNCTION:
4284 		*result = cfg->func;
4285 		break;
4286 	case PCI_IVAR_CMDREG:
4287 		*result = cfg->cmdreg;
4288 		break;
4289 	case PCI_IVAR_CACHELNSZ:
4290 		*result = cfg->cachelnsz;
4291 		break;
4292 	case PCI_IVAR_MINGNT:
4293 		*result = cfg->mingnt;
4294 		break;
4295 	case PCI_IVAR_MAXLAT:
4296 		*result = cfg->maxlat;
4297 		break;
4298 	case PCI_IVAR_LATTIMER:
4299 		*result = cfg->lattimer;
4300 		break;
4301 	default:
4302 		return (ENOENT);
4303 	}
4304 	return (0);
4305 }
4306 
4307 int
4308 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4309 {
4310 	struct pci_devinfo *dinfo;
4311 
4312 	dinfo = device_get_ivars(child);
4313 
4314 	switch (which) {
4315 	case PCI_IVAR_INTPIN:
4316 		dinfo->cfg.intpin = value;
4317 		return (0);
4318 	case PCI_IVAR_ETHADDR:
4319 	case PCI_IVAR_SUBVENDOR:
4320 	case PCI_IVAR_SUBDEVICE:
4321 	case PCI_IVAR_VENDOR:
4322 	case PCI_IVAR_DEVICE:
4323 	case PCI_IVAR_DEVID:
4324 	case PCI_IVAR_CLASS:
4325 	case PCI_IVAR_SUBCLASS:
4326 	case PCI_IVAR_PROGIF:
4327 	case PCI_IVAR_REVID:
4328 	case PCI_IVAR_IRQ:
4329 	case PCI_IVAR_DOMAIN:
4330 	case PCI_IVAR_BUS:
4331 	case PCI_IVAR_SLOT:
4332 	case PCI_IVAR_FUNCTION:
4333 		return (EINVAL);	/* disallow for now */
4334 
4335 	default:
4336 		return (ENOENT);
4337 	}
4338 }
4339 
4340 #include "opt_ddb.h"
4341 #ifdef DDB
4342 #include <ddb/ddb.h>
4343 #include <sys/cons.h>
4344 
4345 /*
4346  * List resources based on pci map registers, used for within ddb
4347  */
4348 
4349 DB_SHOW_COMMAND(pciregs, db_pci_dump)
4350 {
4351 	struct pci_devinfo *dinfo;
4352 	struct devlist *devlist_head;
4353 	struct pci_conf *p;
4354 	const char *name;
4355 	int i, error, none_count;
4356 
4357 	none_count = 0;
4358 	/* get the head of the device queue */
4359 	devlist_head = &pci_devq;
4360 
4361 	/*
4362 	 * Go through the list of devices and print out devices
4363 	 */
4364 	for (error = 0, i = 0,
4365 	     dinfo = STAILQ_FIRST(devlist_head);
4366 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4367 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4368 
4369 		/* Populate pd_name and pd_unit */
4370 		name = NULL;
4371 		if (dinfo->cfg.dev)
4372 			name = device_get_name(dinfo->cfg.dev);
4373 
4374 		p = &dinfo->conf;
4375 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4376 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4377 			(name && *name) ? name : "none",
4378 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4379 			none_count++,
4380 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4381 			p->pc_sel.pc_func, (p->pc_class << 16) |
4382 			(p->pc_subclass << 8) | p->pc_progif,
4383 			(p->pc_subdevice << 16) | p->pc_subvendor,
4384 			(p->pc_device << 16) | p->pc_vendor,
4385 			p->pc_revid, p->pc_hdr);
4386 	}
4387 }
4388 #endif /* DDB */
4389 
4390 static struct resource *
4391 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4392     u_long start, u_long end, u_long count, u_int flags)
4393 {
4394 	struct pci_devinfo *dinfo = device_get_ivars(child);
4395 	struct resource_list *rl = &dinfo->resources;
4396 	struct resource *res;
4397 	struct pci_map *pm;
4398 	pci_addr_t map, testval;
4399 	int mapsize;
4400 
4401 	res = NULL;
4402 	pm = pci_find_bar(child, *rid);
4403 	if (pm != NULL) {
4404 		/* This is a BAR that we failed to allocate earlier. */
4405 		mapsize = pm->pm_size;
4406 		map = pm->pm_value;
4407 	} else {
4408 		/*
4409 		 * Weed out the bogons, and figure out how large the
4410 		 * BAR/map is.  BARs that read back 0 here are bogus
4411 		 * and unimplemented.  Note: atapci in legacy mode are
4412 		 * special and handled elsewhere in the code.  If you
4413 		 * have a atapci device in legacy mode and it fails
4414 		 * here, that other code is broken.
4415 		 */
4416 		pci_read_bar(child, *rid, &map, &testval);
4417 
4418 		/*
4419 		 * Determine the size of the BAR and ignore BARs with a size
4420 		 * of 0.  Device ROM BARs use a different mask value.
4421 		 */
4422 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
4423 			mapsize = pci_romsize(testval);
4424 		else
4425 			mapsize = pci_mapsize(testval);
4426 		if (mapsize == 0)
4427 			goto out;
4428 		pm = pci_add_bar(child, *rid, map, mapsize);
4429 	}
4430 
4431 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
4432 		if (type != SYS_RES_MEMORY) {
4433 			if (bootverbose)
4434 				device_printf(dev,
4435 				    "child %s requested type %d for rid %#x,"
4436 				    " but the BAR says it is an memio\n",
4437 				    device_get_nameunit(child), type, *rid);
4438 			goto out;
4439 		}
4440 	} else {
4441 		if (type != SYS_RES_IOPORT) {
4442 			if (bootverbose)
4443 				device_printf(dev,
4444 				    "child %s requested type %d for rid %#x,"
4445 				    " but the BAR says it is an ioport\n",
4446 				    device_get_nameunit(child), type, *rid);
4447 			goto out;
4448 		}
4449 	}
4450 
4451 	/*
4452 	 * For real BARs, we need to override the size that
4453 	 * the driver requests, because that's what the BAR
4454 	 * actually uses and we would otherwise have a
4455 	 * situation where we might allocate the excess to
4456 	 * another driver, which won't work.
4457 	 */
4458 	count = (pci_addr_t)1 << mapsize;
4459 	if (RF_ALIGNMENT(flags) < mapsize)
4460 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
4461 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
4462 		flags |= RF_PREFETCHABLE;
4463 
4464 	/*
4465 	 * Allocate enough resource, and then write back the
4466 	 * appropriate BAR for that resource.
4467 	 */
4468 	resource_list_add(rl, type, *rid, start, end, count);
4469 	res = resource_list_reserve(rl, dev, child, type, rid, start, end,
4470 	    count, flags & ~RF_ACTIVE);
4471 	if (res == NULL) {
4472 		resource_list_delete(rl, type, *rid);
4473 		device_printf(child,
4474 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4475 		    count, *rid, type, start, end);
4476 		goto out;
4477 	}
4478 	if (bootverbose)
4479 		device_printf(child,
4480 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4481 		    count, *rid, type, rman_get_start(res));
4482 	map = rman_get_start(res);
4483 	pci_write_bar(child, pm, map);
4484 out:
4485 	return (res);
4486 }
4487 
4488 struct resource *
4489 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4490 		   u_long start, u_long end, u_long count, u_int flags)
4491 {
4492 	struct pci_devinfo *dinfo;
4493 	struct resource_list *rl;
4494 	struct resource_list_entry *rle;
4495 	struct resource *res;
4496 	pcicfgregs *cfg;
4497 
4498 	if (device_get_parent(child) != dev)
4499 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4500 		    type, rid, start, end, count, flags));
4501 
4502 	/*
4503 	 * Perform lazy resource allocation
4504 	 */
4505 	dinfo = device_get_ivars(child);
4506 	rl = &dinfo->resources;
4507 	cfg = &dinfo->cfg;
4508 	switch (type) {
4509 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
4510 	case PCI_RES_BUS:
4511 		return (pci_alloc_secbus(dev, child, rid, start, end, count,
4512 		    flags));
4513 #endif
4514 	case SYS_RES_IRQ:
4515 		/*
4516 		 * Can't alloc legacy interrupt once MSI messages have
4517 		 * been allocated.
4518 		 */
4519 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4520 		    cfg->msix.msix_alloc > 0))
4521 			return (NULL);
4522 
4523 		/*
4524 		 * If the child device doesn't have an interrupt
4525 		 * routed and is deserving of an interrupt, try to
4526 		 * assign it one.
4527 		 */
4528 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4529 		    (cfg->intpin != 0))
4530 			pci_assign_interrupt(dev, child, 0);
4531 		break;
4532 	case SYS_RES_IOPORT:
4533 	case SYS_RES_MEMORY:
4534 #ifdef NEW_PCIB
4535 		/*
4536 		 * PCI-PCI bridge I/O window resources are not BARs.
4537 		 * For those allocations just pass the request up the
4538 		 * tree.
4539 		 */
4540 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4541 			switch (*rid) {
4542 			case PCIR_IOBASEL_1:
4543 			case PCIR_MEMBASE_1:
4544 			case PCIR_PMBASEL_1:
4545 				/*
4546 				 * XXX: Should we bother creating a resource
4547 				 * list entry?
4548 				 */
4549 				return (bus_generic_alloc_resource(dev, child,
4550 				    type, rid, start, end, count, flags));
4551 			}
4552 		}
4553 #endif
4554 		/* Reserve resources for this BAR if needed. */
4555 		rle = resource_list_find(rl, type, *rid);
4556 		if (rle == NULL) {
4557 			res = pci_reserve_map(dev, child, type, rid, start, end,
4558 			    count, flags);
4559 			if (res == NULL)
4560 				return (NULL);
4561 		}
4562 	}
4563 	return (resource_list_alloc(rl, dev, child, type, rid,
4564 	    start, end, count, flags));
4565 }
4566 
4567 int
4568 pci_release_resource(device_t dev, device_t child, int type, int rid,
4569     struct resource *r)
4570 {
4571 	struct pci_devinfo *dinfo;
4572 	struct resource_list *rl;
4573 	pcicfgregs *cfg;
4574 
4575 	if (device_get_parent(child) != dev)
4576 		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
4577 		    type, rid, r));
4578 
4579 	dinfo = device_get_ivars(child);
4580 	cfg = &dinfo->cfg;
4581 #ifdef NEW_PCIB
4582 	/*
4583 	 * PCI-PCI bridge I/O window resources are not BARs.  For
4584 	 * those allocations just pass the request up the tree.
4585 	 */
4586 	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
4587 	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
4588 		switch (rid) {
4589 		case PCIR_IOBASEL_1:
4590 		case PCIR_MEMBASE_1:
4591 		case PCIR_PMBASEL_1:
4592 			return (bus_generic_release_resource(dev, child, type,
4593 			    rid, r));
4594 		}
4595 	}
4596 #endif
4597 
4598 	rl = &dinfo->resources;
4599 	return (resource_list_release(rl, dev, child, type, rid, r));
4600 }
4601 
4602 int
4603 pci_activate_resource(device_t dev, device_t child, int type, int rid,
4604     struct resource *r)
4605 {
4606 	struct pci_devinfo *dinfo;
4607 	int error;
4608 
4609 	error = bus_generic_activate_resource(dev, child, type, rid, r);
4610 	if (error)
4611 		return (error);
4612 
4613 	/* Enable decoding in the command register when activating BARs. */
4614 	if (device_get_parent(child) == dev) {
4615 		/* Device ROMs need their decoding explicitly enabled. */
4616 		dinfo = device_get_ivars(child);
4617 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4618 			pci_write_bar(child, pci_find_bar(child, rid),
4619 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4620 		switch (type) {
4621 		case SYS_RES_IOPORT:
4622 		case SYS_RES_MEMORY:
4623 			error = PCI_ENABLE_IO(dev, child, type);
4624 			break;
4625 		}
4626 	}
4627 	return (error);
4628 }
4629 
4630 int
4631 pci_deactivate_resource(device_t dev, device_t child, int type,
4632     int rid, struct resource *r)
4633 {
4634 	struct pci_devinfo *dinfo;
4635 	int error;
4636 
4637 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4638 	if (error)
4639 		return (error);
4640 
4641 	/* Disable decoding for device ROMs. */
4642 	if (device_get_parent(child) == dev) {
4643 		dinfo = device_get_ivars(child);
4644 		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4645 			pci_write_bar(child, pci_find_bar(child, rid),
4646 			    rman_get_start(r));
4647 	}
4648 	return (0);
4649 }
4650 
4651 void
4652 pci_delete_child(device_t dev, device_t child)
4653 {
4654 	struct resource_list_entry *rle;
4655 	struct resource_list *rl;
4656 	struct pci_devinfo *dinfo;
4657 
4658 	dinfo = device_get_ivars(child);
4659 	rl = &dinfo->resources;
4660 
4661 	if (device_is_attached(child))
4662 		device_detach(child);
4663 
4664 	/* Turn off access to resources we're about to free */
4665 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4666 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4667 
4668 	/* Free all allocated resources */
4669 	STAILQ_FOREACH(rle, rl, link) {
4670 		if (rle->res) {
4671 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4672 			    resource_list_busy(rl, rle->type, rle->rid)) {
4673 				pci_printf(&dinfo->cfg,
4674 				    "Resource still owned, oops. "
4675 				    "(type=%d, rid=%d, addr=%lx)\n",
4676 				    rle->type, rle->rid,
4677 				    rman_get_start(rle->res));
4678 				bus_release_resource(child, rle->type, rle->rid,
4679 				    rle->res);
4680 			}
4681 			resource_list_unreserve(rl, dev, child, rle->type,
4682 			    rle->rid);
4683 		}
4684 	}
4685 	resource_list_free(rl);
4686 
4687 	device_delete_child(dev, child);
4688 	pci_freecfg(dinfo);
4689 }
4690 
4691 void
4692 pci_delete_resource(device_t dev, device_t child, int type, int rid)
4693 {
4694 	struct pci_devinfo *dinfo;
4695 	struct resource_list *rl;
4696 	struct resource_list_entry *rle;
4697 
4698 	if (device_get_parent(child) != dev)
4699 		return;
4700 
4701 	dinfo = device_get_ivars(child);
4702 	rl = &dinfo->resources;
4703 	rle = resource_list_find(rl, type, rid);
4704 	if (rle == NULL)
4705 		return;
4706 
4707 	if (rle->res) {
4708 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4709 		    resource_list_busy(rl, type, rid)) {
4710 			device_printf(dev, "delete_resource: "
4711 			    "Resource still owned by child, oops. "
4712 			    "(type=%d, rid=%d, addr=%lx)\n",
4713 			    type, rid, rman_get_start(rle->res));
4714 			return;
4715 		}
4716 		resource_list_unreserve(rl, dev, child, type, rid);
4717 	}
4718 	resource_list_delete(rl, type, rid);
4719 }
4720 
4721 struct resource_list *
4722 pci_get_resource_list (device_t dev, device_t child)
4723 {
4724 	struct pci_devinfo *dinfo = device_get_ivars(child);
4725 
4726 	return (&dinfo->resources);
4727 }
4728 
4729 bus_dma_tag_t
4730 pci_get_dma_tag(device_t bus, device_t dev)
4731 {
4732 	struct pci_softc *sc = device_get_softc(bus);
4733 
4734 	return (sc->sc_dma_tag);
4735 }
4736 
4737 uint32_t
4738 pci_read_config_method(device_t dev, device_t child, int reg, int width)
4739 {
4740 	struct pci_devinfo *dinfo = device_get_ivars(child);
4741 	pcicfgregs *cfg = &dinfo->cfg;
4742 
4743 	return (PCIB_READ_CONFIG(device_get_parent(dev),
4744 	    cfg->bus, cfg->slot, cfg->func, reg, width));
4745 }
4746 
4747 void
4748 pci_write_config_method(device_t dev, device_t child, int reg,
4749     uint32_t val, int width)
4750 {
4751 	struct pci_devinfo *dinfo = device_get_ivars(child);
4752 	pcicfgregs *cfg = &dinfo->cfg;
4753 
4754 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4755 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4756 }
4757 
4758 int
4759 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4760     size_t buflen)
4761 {
4762 
4763 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4764 	    pci_get_function(child));
4765 	return (0);
4766 }
4767 
4768 int
4769 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4770     size_t buflen)
4771 {
4772 	struct pci_devinfo *dinfo;
4773 	pcicfgregs *cfg;
4774 
4775 	dinfo = device_get_ivars(child);
4776 	cfg = &dinfo->cfg;
4777 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4778 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4779 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4780 	    cfg->progif);
4781 	return (0);
4782 }
4783 
4784 int
4785 pci_assign_interrupt_method(device_t dev, device_t child)
4786 {
4787 	struct pci_devinfo *dinfo = device_get_ivars(child);
4788 	pcicfgregs *cfg = &dinfo->cfg;
4789 
4790 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4791 	    cfg->intpin));
4792 }
4793 
4794 static int
4795 pci_modevent(module_t mod, int what, void *arg)
4796 {
4797 	static struct cdev *pci_cdev;
4798 
4799 	switch (what) {
4800 	case MOD_LOAD:
4801 		STAILQ_INIT(&pci_devq);
4802 		pci_generation = 0;
4803 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4804 		    "pci");
4805 		pci_load_vendor_data();
4806 		break;
4807 
4808 	case MOD_UNLOAD:
4809 		destroy_dev(pci_cdev);
4810 		break;
4811 	}
4812 
4813 	return (0);
4814 }
4815 
4816 static void
4817 pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
4818 {
4819 #define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
4820 	struct pcicfg_pcie *cfg;
4821 	int version, pos;
4822 
4823 	cfg = &dinfo->cfg.pcie;
4824 	pos = cfg->pcie_location;
4825 
4826 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4827 
4828 	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
4829 
4830 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4831 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4832 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4833 		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
4834 
4835 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4836 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
4837 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
4838 		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
4839 
4840 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4841 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
4842 		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
4843 
4844 	if (version > 1) {
4845 		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
4846 		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
4847 		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
4848 	}
4849 #undef WREG
4850 }
4851 
4852 static void
4853 pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
4854 {
4855 	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
4856 	    dinfo->cfg.pcix.pcix_command,  2);
4857 }
4858 
4859 void
4860 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4861 {
4862 
4863 	/*
4864 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4865 	 * which we know need special treatment.  Type 2 devices are
4866 	 * cardbus bridges which also require special treatment.
4867 	 * Other types are unknown, and we err on the side of safety
4868 	 * by ignoring them.
4869 	 */
4870 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4871 		return;
4872 
4873 	/*
4874 	 * Restore the device to full power mode.  We must do this
4875 	 * before we restore the registers because moving from D3 to
4876 	 * D0 will cause the chip's BARs and some other registers to
4877 	 * be reset to some unknown power on reset values.  Cut down
4878 	 * the noise on boot by doing nothing if we are already in
4879 	 * state D0.
4880 	 */
4881 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4882 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4883 	pci_restore_bars(dev);
4884 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4885 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4886 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4887 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4888 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4889 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4890 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4891 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4892 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4893 
4894 	/*
4895 	 * Restore extended capabilities for PCI-Express and PCI-X
4896 	 */
4897 	if (dinfo->cfg.pcie.pcie_location != 0)
4898 		pci_cfg_restore_pcie(dev, dinfo);
4899 	if (dinfo->cfg.pcix.pcix_location != 0)
4900 		pci_cfg_restore_pcix(dev, dinfo);
4901 
4902 	/* Restore MSI and MSI-X configurations if they are present. */
4903 	if (dinfo->cfg.msi.msi_location != 0)
4904 		pci_resume_msi(dev);
4905 	if (dinfo->cfg.msix.msix_location != 0)
4906 		pci_resume_msix(dev);
4907 }
4908 
4909 static void
4910 pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
4911 {
4912 #define	RREG(n)	pci_read_config(dev, pos + (n), 2)
4913 	struct pcicfg_pcie *cfg;
4914 	int version, pos;
4915 
4916 	cfg = &dinfo->cfg.pcie;
4917 	pos = cfg->pcie_location;
4918 
4919 	cfg->pcie_flags = RREG(PCIER_FLAGS);
4920 
4921 	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4922 
4923 	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
4924 
4925 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4926 	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4927 	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4928 		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
4929 
4930 	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4931 	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
4932 	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
4933 		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
4934 
4935 	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4936 	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
4937 		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
4938 
4939 	if (version > 1) {
4940 		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
4941 		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
4942 		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
4943 	}
4944 #undef RREG
4945 }
4946 
4947 static void
4948 pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
4949 {
4950 	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
4951 	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
4952 }
4953 
4954 void
4955 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4956 {
4957 	uint32_t cls;
4958 	int ps;
4959 
4960 	/*
4961 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4962 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4963 	 * which also require special treatment.  Other types are unknown, and
4964 	 * we err on the side of safety by ignoring them.  Powering down
4965 	 * bridges should not be undertaken lightly.
4966 	 */
4967 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4968 		return;
4969 
4970 	/*
4971 	 * Some drivers apparently write to these registers w/o updating our
4972 	 * cached copy.  No harm happens if we update the copy, so do so here
4973 	 * so we can restore them.  The COMMAND register is modified by the
4974 	 * bus w/o updating the cache.  This should represent the normally
4975 	 * writable portion of the 'defined' part of type 0 headers.  In
4976 	 * theory we also need to save/restore the PCI capability structures
4977 	 * we know about, but apart from power we don't know any that are
4978 	 * writable.
4979 	 */
4980 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4981 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4982 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4983 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4984 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4985 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4986 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4987 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4988 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4989 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4990 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4991 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4992 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4993 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4994 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4995 
4996 	if (dinfo->cfg.pcie.pcie_location != 0)
4997 		pci_cfg_save_pcie(dev, dinfo);
4998 
4999 	if (dinfo->cfg.pcix.pcix_location != 0)
5000 		pci_cfg_save_pcix(dev, dinfo);
5001 
5002 	/*
5003 	 * don't set the state for display devices, base peripherals and
5004 	 * memory devices since bad things happen when they are powered down.
5005 	 * We should (a) have drivers that can easily detach and (b) use
5006 	 * generic drivers for these devices so that some device actually
5007 	 * attaches.  We need to make sure that when we implement (a) we don't
5008 	 * power the device down on a reattach.
5009 	 */
5010 	cls = pci_get_class(dev);
5011 	if (!setstate)
5012 		return;
5013 	switch (pci_do_power_nodriver)
5014 	{
5015 		case 0:		/* NO powerdown at all */
5016 			return;
5017 		case 1:		/* Conservative about what to power down */
5018 			if (cls == PCIC_STORAGE)
5019 				return;
5020 			/*FALLTHROUGH*/
5021 		case 2:		/* Agressive about what to power down */
5022 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
5023 			    cls == PCIC_BASEPERIPH)
5024 				return;
5025 			/*FALLTHROUGH*/
5026 		case 3:		/* Power down everything */
5027 			break;
5028 	}
5029 	/*
5030 	 * PCI spec says we can only go into D3 state from D0 state.
5031 	 * Transition from D[12] into D0 before going to D3 state.
5032 	 */
5033 	ps = pci_get_powerstate(dev);
5034 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
5035 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5036 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
5037 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
5038 }
5039 
5040 /* Wrapper APIs suitable for device driver use. */
5041 void
5042 pci_save_state(device_t dev)
5043 {
5044 	struct pci_devinfo *dinfo;
5045 
5046 	dinfo = device_get_ivars(dev);
5047 	pci_cfg_save(dev, dinfo, 0);
5048 }
5049 
5050 void
5051 pci_restore_state(device_t dev)
5052 {
5053 	struct pci_devinfo *dinfo;
5054 
5055 	dinfo = device_get_ivars(dev);
5056 	pci_cfg_restore(dev, dinfo);
5057 }
5058