xref: /freebsd/sys/dev/pci/pci.c (revision a0dd79dbdf917a8fbe2762d668f05a7c9f682b22)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 
56 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57 #include <machine/intr_machdep.h>
58 #endif
59 
60 #include <sys/pciio.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/pci/pci_private.h>
64 
65 #include <dev/usb/controller/xhcireg.h>
66 #include <dev/usb/controller/ehcireg.h>
67 #include <dev/usb/controller/ohcireg.h>
68 #include <dev/usb/controller/uhcireg.h>
69 
70 #include "pcib_if.h"
71 #include "pci_if.h"
72 
73 #define	PCIR_IS_BIOS(cfg, reg)						\
74 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
75 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
76 
77 static pci_addr_t	pci_mapbase(uint64_t mapreg);
78 static const char	*pci_maptype(uint64_t mapreg);
79 static int		pci_mapsize(uint64_t testval);
80 static int		pci_maprange(uint64_t mapreg);
81 static pci_addr_t	pci_rombase(uint64_t mapreg);
82 static int		pci_romsize(uint64_t testval);
83 static void		pci_fixancient(pcicfgregs *cfg);
84 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
85 
86 static int		pci_porten(device_t dev);
87 static int		pci_memen(device_t dev);
88 static void		pci_assign_interrupt(device_t bus, device_t dev,
89 			    int force_route);
90 static int		pci_add_map(device_t bus, device_t dev, int reg,
91 			    struct resource_list *rl, int force, int prefetch);
92 static int		pci_probe(device_t dev);
93 static int		pci_attach(device_t dev);
94 static void		pci_load_vendor_data(void);
95 static int		pci_describe_parse_line(char **ptr, int *vendor,
96 			    int *device, char **desc);
97 static char		*pci_describe_device(device_t dev);
98 static int		pci_modevent(module_t mod, int what, void *arg);
99 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
100 			    pcicfgregs *cfg);
101 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
102 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
103 			    int reg, uint32_t *data);
104 #if 0
105 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
106 			    int reg, uint32_t data);
107 #endif
108 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
109 static void		pci_disable_msi(device_t dev);
110 static void		pci_enable_msi(device_t dev, uint64_t address,
111 			    uint16_t data);
112 static void		pci_enable_msix(device_t dev, u_int index,
113 			    uint64_t address, uint32_t data);
114 static void		pci_mask_msix(device_t dev, u_int index);
115 static void		pci_unmask_msix(device_t dev, u_int index);
116 static int		pci_msi_blacklisted(void);
117 static void		pci_resume_msi(device_t dev);
118 static void		pci_resume_msix(device_t dev);
119 static int		pci_remap_intr_method(device_t bus, device_t dev,
120 			    u_int irq);
121 
122 static device_method_t pci_methods[] = {
123 	/* Device interface */
124 	DEVMETHOD(device_probe,		pci_probe),
125 	DEVMETHOD(device_attach,	pci_attach),
126 	DEVMETHOD(device_detach,	bus_generic_detach),
127 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
128 	DEVMETHOD(device_suspend,	pci_suspend),
129 	DEVMETHOD(device_resume,	pci_resume),
130 
131 	/* Bus interface */
132 	DEVMETHOD(bus_print_child,	pci_print_child),
133 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
134 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
135 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
136 	DEVMETHOD(bus_driver_added,	pci_driver_added),
137 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
138 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
139 
140 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
141 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
142 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
143 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
144 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
145 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
146 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
147 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
148 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
149 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
150 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
151 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
152 
153 	/* PCI interface */
154 	DEVMETHOD(pci_read_config,	pci_read_config_method),
155 	DEVMETHOD(pci_write_config,	pci_write_config_method),
156 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
157 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
158 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
159 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
160 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
161 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
162 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
163 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
164 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
165 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
166 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
167 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
168 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
169 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
170 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
171 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
172 
173 	DEVMETHOD_END
174 };
175 
176 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
177 
178 static devclass_t pci_devclass;
179 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
180 MODULE_VERSION(pci, 1);
181 
182 static char	*pci_vendordata;
183 static size_t	pci_vendordata_size;
184 
185 struct pci_quirk {
186 	uint32_t devid;	/* Vendor/device of the card */
187 	int	type;
188 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
189 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
190 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
191 	int	arg1;
192 	int	arg2;
193 };
194 
195 static const struct pci_quirk const pci_quirks[] = {
196 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
197 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
198 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
199 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
200 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
201 
202 	/*
203 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
204 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
205 	 */
206 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
207 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208 
209 	/*
210 	 * MSI doesn't work on earlier Intel chipsets including
211 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
212 	 */
213 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
214 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
215 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
216 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
218 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
219 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
220 
221 	/*
222 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
223 	 * bridge.
224 	 */
225 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
226 
227 	/*
228 	 * MSI-X doesn't work with at least LSI SAS1068E passed through by
229 	 * VMware.
230 	 */
231 	{ 0x079015ad, PCI_QUIRK_DISABLE_MSI,	0,	0 },
232 
233 	/*
234 	 * Some virtualization environments emulate an older chipset
235 	 * but support MSI just fine.  QEMU uses the Intel 82440.
236 	 */
237 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
238 
239 	{ 0 }
240 };
241 
242 /* map register information */
243 #define	PCI_MAPMEM	0x01	/* memory map */
244 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
245 #define	PCI_MAPPORT	0x04	/* port map */
246 
247 struct devlist pci_devq;
248 uint32_t pci_generation;
249 uint32_t pci_numdevs = 0;
250 static int pcie_chipset, pcix_chipset;
251 
252 /* sysctl vars */
253 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
254 
255 static int pci_enable_io_modes = 1;
256 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
257 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
258     &pci_enable_io_modes, 1,
259     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
260 enable these bits correctly.  We'd like to do this all the time, but there\n\
261 are some peripherals that this causes problems with.");
262 
263 static int pci_do_power_nodriver = 0;
264 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
265 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
266     &pci_do_power_nodriver, 0,
267   "Place a function into D3 state when no driver attaches to it.  0 means\n\
268 disable.  1 means conservatively place devices into D3 state.  2 means\n\
269 agressively place devices into D3 state.  3 means put absolutely everything\n\
270 in D3 state.");
271 
272 int pci_do_power_resume = 1;
273 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
274 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
275     &pci_do_power_resume, 1,
276   "Transition from D3 -> D0 on resume.");
277 
278 int pci_do_power_suspend = 1;
279 TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
280 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
281     &pci_do_power_suspend, 1,
282   "Transition from D0 -> D3 on suspend.");
283 
284 static int pci_do_msi = 1;
285 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
286 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
287     "Enable support for MSI interrupts");
288 
289 static int pci_do_msix = 1;
290 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
291 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
292     "Enable support for MSI-X interrupts");
293 
294 static int pci_honor_msi_blacklist = 1;
295 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
296 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
297     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
298 
299 #if defined(__i386__) || defined(__amd64__)
300 static int pci_usb_takeover = 1;
301 #else
302 static int pci_usb_takeover = 0;
303 #endif
304 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
305 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
306     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
307 Disable this if you depend on BIOS emulation of USB devices, that is\n\
308 you use USB devices (like keyboard or mouse) but do not load USB drivers");
309 
310 /* Find a device_t by bus/slot/function in domain 0 */
311 
312 device_t
313 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
314 {
315 
316 	return (pci_find_dbsf(0, bus, slot, func));
317 }
318 
319 /* Find a device_t by domain/bus/slot/function */
320 
321 device_t
322 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
323 {
324 	struct pci_devinfo *dinfo;
325 
326 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
327 		if ((dinfo->cfg.domain == domain) &&
328 		    (dinfo->cfg.bus == bus) &&
329 		    (dinfo->cfg.slot == slot) &&
330 		    (dinfo->cfg.func == func)) {
331 			return (dinfo->cfg.dev);
332 		}
333 	}
334 
335 	return (NULL);
336 }
337 
338 /* Find a device_t by vendor/device ID */
339 
340 device_t
341 pci_find_device(uint16_t vendor, uint16_t device)
342 {
343 	struct pci_devinfo *dinfo;
344 
345 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
346 		if ((dinfo->cfg.vendor == vendor) &&
347 		    (dinfo->cfg.device == device)) {
348 			return (dinfo->cfg.dev);
349 		}
350 	}
351 
352 	return (NULL);
353 }
354 
355 device_t
356 pci_find_class(uint8_t class, uint8_t subclass)
357 {
358 	struct pci_devinfo *dinfo;
359 
360 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
361 		if (dinfo->cfg.baseclass == class &&
362 		    dinfo->cfg.subclass == subclass) {
363 			return (dinfo->cfg.dev);
364 		}
365 	}
366 
367 	return (NULL);
368 }
369 
370 static int
371 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
372 {
373 	va_list ap;
374 	int retval;
375 
376 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
377 	    cfg->func);
378 	va_start(ap, fmt);
379 	retval += vprintf(fmt, ap);
380 	va_end(ap);
381 	return (retval);
382 }
383 
384 /* return base address of memory or port map */
385 
386 static pci_addr_t
387 pci_mapbase(uint64_t mapreg)
388 {
389 
390 	if (PCI_BAR_MEM(mapreg))
391 		return (mapreg & PCIM_BAR_MEM_BASE);
392 	else
393 		return (mapreg & PCIM_BAR_IO_BASE);
394 }
395 
396 /* return map type of memory or port map */
397 
398 static const char *
399 pci_maptype(uint64_t mapreg)
400 {
401 
402 	if (PCI_BAR_IO(mapreg))
403 		return ("I/O Port");
404 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
405 		return ("Prefetchable Memory");
406 	return ("Memory");
407 }
408 
409 /* return log2 of map size decoded for memory or port map */
410 
411 static int
412 pci_mapsize(uint64_t testval)
413 {
414 	int ln2size;
415 
416 	testval = pci_mapbase(testval);
417 	ln2size = 0;
418 	if (testval != 0) {
419 		while ((testval & 1) == 0)
420 		{
421 			ln2size++;
422 			testval >>= 1;
423 		}
424 	}
425 	return (ln2size);
426 }
427 
428 /* return base address of device ROM */
429 
430 static pci_addr_t
431 pci_rombase(uint64_t mapreg)
432 {
433 
434 	return (mapreg & PCIM_BIOS_ADDR_MASK);
435 }
436 
437 /* return log2 of map size decided for device ROM */
438 
439 static int
440 pci_romsize(uint64_t testval)
441 {
442 	int ln2size;
443 
444 	testval = pci_rombase(testval);
445 	ln2size = 0;
446 	if (testval != 0) {
447 		while ((testval & 1) == 0)
448 		{
449 			ln2size++;
450 			testval >>= 1;
451 		}
452 	}
453 	return (ln2size);
454 }
455 
456 /* return log2 of address range supported by map register */
457 
458 static int
459 pci_maprange(uint64_t mapreg)
460 {
461 	int ln2range = 0;
462 
463 	if (PCI_BAR_IO(mapreg))
464 		ln2range = 32;
465 	else
466 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
467 		case PCIM_BAR_MEM_32:
468 			ln2range = 32;
469 			break;
470 		case PCIM_BAR_MEM_1MB:
471 			ln2range = 20;
472 			break;
473 		case PCIM_BAR_MEM_64:
474 			ln2range = 64;
475 			break;
476 		}
477 	return (ln2range);
478 }
479 
480 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
481 
482 static void
483 pci_fixancient(pcicfgregs *cfg)
484 {
485 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
486 		return;
487 
488 	/* PCI to PCI bridges use header type 1 */
489 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
490 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
491 }
492 
493 /* extract header type specific config data */
494 
495 static void
496 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
497 {
498 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
499 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
500 	case PCIM_HDRTYPE_NORMAL:
501 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
502 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
503 		cfg->nummaps	    = PCI_MAXMAPS_0;
504 		break;
505 	case PCIM_HDRTYPE_BRIDGE:
506 		cfg->nummaps	    = PCI_MAXMAPS_1;
507 		break;
508 	case PCIM_HDRTYPE_CARDBUS:
509 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
510 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
511 		cfg->nummaps	    = PCI_MAXMAPS_2;
512 		break;
513 	}
514 #undef REG
515 }
516 
517 /* read configuration header into pcicfgregs structure */
518 struct pci_devinfo *
519 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
520 {
521 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
522 	pcicfgregs *cfg = NULL;
523 	struct pci_devinfo *devlist_entry;
524 	struct devlist *devlist_head;
525 
526 	devlist_head = &pci_devq;
527 
528 	devlist_entry = NULL;
529 
530 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
531 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
532 		if (devlist_entry == NULL)
533 			return (NULL);
534 
535 		cfg = &devlist_entry->cfg;
536 
537 		cfg->domain		= d;
538 		cfg->bus		= b;
539 		cfg->slot		= s;
540 		cfg->func		= f;
541 		cfg->vendor		= REG(PCIR_VENDOR, 2);
542 		cfg->device		= REG(PCIR_DEVICE, 2);
543 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
544 		cfg->statreg		= REG(PCIR_STATUS, 2);
545 		cfg->baseclass		= REG(PCIR_CLASS, 1);
546 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
547 		cfg->progif		= REG(PCIR_PROGIF, 1);
548 		cfg->revid		= REG(PCIR_REVID, 1);
549 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
550 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
551 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
552 		cfg->intpin		= REG(PCIR_INTPIN, 1);
553 		cfg->intline		= REG(PCIR_INTLINE, 1);
554 
555 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
556 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
557 
558 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
559 		cfg->hdrtype		&= ~PCIM_MFDEV;
560 		STAILQ_INIT(&cfg->maps);
561 
562 		pci_fixancient(cfg);
563 		pci_hdrtypedata(pcib, b, s, f, cfg);
564 
565 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
566 			pci_read_cap(pcib, cfg);
567 
568 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
569 
570 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
571 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
572 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
573 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
574 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
575 
576 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
577 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
578 		devlist_entry->conf.pc_vendor = cfg->vendor;
579 		devlist_entry->conf.pc_device = cfg->device;
580 
581 		devlist_entry->conf.pc_class = cfg->baseclass;
582 		devlist_entry->conf.pc_subclass = cfg->subclass;
583 		devlist_entry->conf.pc_progif = cfg->progif;
584 		devlist_entry->conf.pc_revid = cfg->revid;
585 
586 		pci_numdevs++;
587 		pci_generation++;
588 	}
589 	return (devlist_entry);
590 #undef REG
591 }
592 
593 static void
594 pci_read_cap(device_t pcib, pcicfgregs *cfg)
595 {
596 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
597 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
598 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
599 	uint64_t addr;
600 #endif
601 	uint32_t val;
602 	int	ptr, nextptr, ptrptr;
603 
604 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
605 	case PCIM_HDRTYPE_NORMAL:
606 	case PCIM_HDRTYPE_BRIDGE:
607 		ptrptr = PCIR_CAP_PTR;
608 		break;
609 	case PCIM_HDRTYPE_CARDBUS:
610 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
611 		break;
612 	default:
613 		return;		/* no extended capabilities support */
614 	}
615 	nextptr = REG(ptrptr, 1);	/* sanity check? */
616 
617 	/*
618 	 * Read capability entries.
619 	 */
620 	while (nextptr != 0) {
621 		/* Sanity check */
622 		if (nextptr > 255) {
623 			printf("illegal PCI extended capability offset %d\n",
624 			    nextptr);
625 			return;
626 		}
627 		/* Find the next entry */
628 		ptr = nextptr;
629 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
630 
631 		/* Process this entry */
632 		switch (REG(ptr + PCICAP_ID, 1)) {
633 		case PCIY_PMG:		/* PCI power management */
634 			if (cfg->pp.pp_cap == 0) {
635 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
636 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
637 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
638 				if ((nextptr - ptr) > PCIR_POWER_DATA)
639 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
640 			}
641 			break;
642 		case PCIY_HT:		/* HyperTransport */
643 			/* Determine HT-specific capability type. */
644 			val = REG(ptr + PCIR_HT_COMMAND, 2);
645 
646 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
647 				cfg->ht.ht_slave = ptr;
648 
649 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
650 			switch (val & PCIM_HTCMD_CAP_MASK) {
651 			case PCIM_HTCAP_MSI_MAPPING:
652 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
653 					/* Sanity check the mapping window. */
654 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
655 					    4);
656 					addr <<= 32;
657 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
658 					    4);
659 					if (addr != MSI_INTEL_ADDR_BASE)
660 						device_printf(pcib,
661 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
662 						    cfg->domain, cfg->bus,
663 						    cfg->slot, cfg->func,
664 						    (long long)addr);
665 				} else
666 					addr = MSI_INTEL_ADDR_BASE;
667 
668 				cfg->ht.ht_msimap = ptr;
669 				cfg->ht.ht_msictrl = val;
670 				cfg->ht.ht_msiaddr = addr;
671 				break;
672 			}
673 #endif
674 			break;
675 		case PCIY_MSI:		/* PCI MSI */
676 			cfg->msi.msi_location = ptr;
677 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
678 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
679 						     PCIM_MSICTRL_MMC_MASK)>>1);
680 			break;
681 		case PCIY_MSIX:		/* PCI MSI-X */
682 			cfg->msix.msix_location = ptr;
683 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
684 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
685 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
686 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
687 			cfg->msix.msix_table_bar = PCIR_BAR(val &
688 			    PCIM_MSIX_BIR_MASK);
689 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
690 			val = REG(ptr + PCIR_MSIX_PBA, 4);
691 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
692 			    PCIM_MSIX_BIR_MASK);
693 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
694 			break;
695 		case PCIY_VPD:		/* PCI Vital Product Data */
696 			cfg->vpd.vpd_reg = ptr;
697 			break;
698 		case PCIY_SUBVENDOR:
699 			/* Should always be true. */
700 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
701 			    PCIM_HDRTYPE_BRIDGE) {
702 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
703 				cfg->subvendor = val & 0xffff;
704 				cfg->subdevice = val >> 16;
705 			}
706 			break;
707 		case PCIY_PCIX:		/* PCI-X */
708 			/*
709 			 * Assume we have a PCI-X chipset if we have
710 			 * at least one PCI-PCI bridge with a PCI-X
711 			 * capability.  Note that some systems with
712 			 * PCI-express or HT chipsets might match on
713 			 * this check as well.
714 			 */
715 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
716 			    PCIM_HDRTYPE_BRIDGE)
717 				pcix_chipset = 1;
718 			break;
719 		case PCIY_EXPRESS:	/* PCI-express */
720 			/*
721 			 * Assume we have a PCI-express chipset if we have
722 			 * at least one PCI-express device.
723 			 */
724 			pcie_chipset = 1;
725 			break;
726 		default:
727 			break;
728 		}
729 	}
730 
731 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
732 	/*
733 	 * Enable the MSI mapping window for all HyperTransport
734 	 * slaves.  PCI-PCI bridges have their windows enabled via
735 	 * PCIB_MAP_MSI().
736 	 */
737 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
738 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
739 		device_printf(pcib,
740 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
741 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
742 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
743 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
744 		     2);
745 	}
746 #endif
747 /* REG and WREG use carry through to next functions */
748 }
749 
750 /*
751  * PCI Vital Product Data
752  */
753 
754 #define	PCI_VPD_TIMEOUT		1000000
755 
756 static int
757 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
758 {
759 	int count = PCI_VPD_TIMEOUT;
760 
761 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
762 
763 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
764 
765 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
766 		if (--count < 0)
767 			return (ENXIO);
768 		DELAY(1);	/* limit looping */
769 	}
770 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
771 
772 	return (0);
773 }
774 
775 #if 0
776 static int
777 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
778 {
779 	int count = PCI_VPD_TIMEOUT;
780 
781 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
782 
783 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
784 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
785 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
786 		if (--count < 0)
787 			return (ENXIO);
788 		DELAY(1);	/* limit looping */
789 	}
790 
791 	return (0);
792 }
793 #endif
794 
795 #undef PCI_VPD_TIMEOUT
796 
797 struct vpd_readstate {
798 	device_t	pcib;
799 	pcicfgregs	*cfg;
800 	uint32_t	val;
801 	int		bytesinval;
802 	int		off;
803 	uint8_t		cksum;
804 };
805 
806 static int
807 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
808 {
809 	uint32_t reg;
810 	uint8_t byte;
811 
812 	if (vrs->bytesinval == 0) {
813 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
814 			return (ENXIO);
815 		vrs->val = le32toh(reg);
816 		vrs->off += 4;
817 		byte = vrs->val & 0xff;
818 		vrs->bytesinval = 3;
819 	} else {
820 		vrs->val = vrs->val >> 8;
821 		byte = vrs->val & 0xff;
822 		vrs->bytesinval--;
823 	}
824 
825 	vrs->cksum += byte;
826 	*data = byte;
827 	return (0);
828 }
829 
830 static void
831 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
832 {
833 	struct vpd_readstate vrs;
834 	int state;
835 	int name;
836 	int remain;
837 	int i;
838 	int alloc, off;		/* alloc/off for RO/W arrays */
839 	int cksumvalid;
840 	int dflen;
841 	uint8_t byte;
842 	uint8_t byte2;
843 
844 	/* init vpd reader */
845 	vrs.bytesinval = 0;
846 	vrs.off = 0;
847 	vrs.pcib = pcib;
848 	vrs.cfg = cfg;
849 	vrs.cksum = 0;
850 
851 	state = 0;
852 	name = remain = i = 0;	/* shut up stupid gcc */
853 	alloc = off = 0;	/* shut up stupid gcc */
854 	dflen = 0;		/* shut up stupid gcc */
855 	cksumvalid = -1;
856 	while (state >= 0) {
857 		if (vpd_nextbyte(&vrs, &byte)) {
858 			state = -2;
859 			break;
860 		}
861 #if 0
862 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
863 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
864 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
865 #endif
866 		switch (state) {
867 		case 0:		/* item name */
868 			if (byte & 0x80) {
869 				if (vpd_nextbyte(&vrs, &byte2)) {
870 					state = -2;
871 					break;
872 				}
873 				remain = byte2;
874 				if (vpd_nextbyte(&vrs, &byte2)) {
875 					state = -2;
876 					break;
877 				}
878 				remain |= byte2 << 8;
879 				if (remain > (0x7f*4 - vrs.off)) {
880 					state = -1;
881 					printf(
882 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
883 					    cfg->domain, cfg->bus, cfg->slot,
884 					    cfg->func, remain);
885 				}
886 				name = byte & 0x7f;
887 			} else {
888 				remain = byte & 0x7;
889 				name = (byte >> 3) & 0xf;
890 			}
891 			switch (name) {
892 			case 0x2:	/* String */
893 				cfg->vpd.vpd_ident = malloc(remain + 1,
894 				    M_DEVBUF, M_WAITOK);
895 				i = 0;
896 				state = 1;
897 				break;
898 			case 0xf:	/* End */
899 				state = -1;
900 				break;
901 			case 0x10:	/* VPD-R */
902 				alloc = 8;
903 				off = 0;
904 				cfg->vpd.vpd_ros = malloc(alloc *
905 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
906 				    M_WAITOK | M_ZERO);
907 				state = 2;
908 				break;
909 			case 0x11:	/* VPD-W */
910 				alloc = 8;
911 				off = 0;
912 				cfg->vpd.vpd_w = malloc(alloc *
913 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
914 				    M_WAITOK | M_ZERO);
915 				state = 5;
916 				break;
917 			default:	/* Invalid data, abort */
918 				state = -1;
919 				break;
920 			}
921 			break;
922 
923 		case 1:	/* Identifier String */
924 			cfg->vpd.vpd_ident[i++] = byte;
925 			remain--;
926 			if (remain == 0)  {
927 				cfg->vpd.vpd_ident[i] = '\0';
928 				state = 0;
929 			}
930 			break;
931 
932 		case 2:	/* VPD-R Keyword Header */
933 			if (off == alloc) {
934 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
935 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
936 				    M_DEVBUF, M_WAITOK | M_ZERO);
937 			}
938 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
939 			if (vpd_nextbyte(&vrs, &byte2)) {
940 				state = -2;
941 				break;
942 			}
943 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
944 			if (vpd_nextbyte(&vrs, &byte2)) {
945 				state = -2;
946 				break;
947 			}
948 			dflen = byte2;
949 			if (dflen == 0 &&
950 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
951 			    2) == 0) {
952 				/*
953 				 * if this happens, we can't trust the rest
954 				 * of the VPD.
955 				 */
956 				printf(
957 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
958 				    cfg->domain, cfg->bus, cfg->slot,
959 				    cfg->func, dflen);
960 				cksumvalid = 0;
961 				state = -1;
962 				break;
963 			} else if (dflen == 0) {
964 				cfg->vpd.vpd_ros[off].value = malloc(1 *
965 				    sizeof(*cfg->vpd.vpd_ros[off].value),
966 				    M_DEVBUF, M_WAITOK);
967 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
968 			} else
969 				cfg->vpd.vpd_ros[off].value = malloc(
970 				    (dflen + 1) *
971 				    sizeof(*cfg->vpd.vpd_ros[off].value),
972 				    M_DEVBUF, M_WAITOK);
973 			remain -= 3;
974 			i = 0;
975 			/* keep in sync w/ state 3's transistions */
976 			if (dflen == 0 && remain == 0)
977 				state = 0;
978 			else if (dflen == 0)
979 				state = 2;
980 			else
981 				state = 3;
982 			break;
983 
984 		case 3:	/* VPD-R Keyword Value */
985 			cfg->vpd.vpd_ros[off].value[i++] = byte;
986 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
987 			    "RV", 2) == 0 && cksumvalid == -1) {
988 				if (vrs.cksum == 0)
989 					cksumvalid = 1;
990 				else {
991 					if (bootverbose)
992 						printf(
993 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
994 						    cfg->domain, cfg->bus,
995 						    cfg->slot, cfg->func,
996 						    vrs.cksum);
997 					cksumvalid = 0;
998 					state = -1;
999 					break;
1000 				}
1001 			}
1002 			dflen--;
1003 			remain--;
1004 			/* keep in sync w/ state 2's transistions */
1005 			if (dflen == 0)
1006 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1007 			if (dflen == 0 && remain == 0) {
1008 				cfg->vpd.vpd_rocnt = off;
1009 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1010 				    off * sizeof(*cfg->vpd.vpd_ros),
1011 				    M_DEVBUF, M_WAITOK | M_ZERO);
1012 				state = 0;
1013 			} else if (dflen == 0)
1014 				state = 2;
1015 			break;
1016 
1017 		case 4:
1018 			remain--;
1019 			if (remain == 0)
1020 				state = 0;
1021 			break;
1022 
1023 		case 5:	/* VPD-W Keyword Header */
1024 			if (off == alloc) {
1025 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1026 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1027 				    M_DEVBUF, M_WAITOK | M_ZERO);
1028 			}
1029 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1030 			if (vpd_nextbyte(&vrs, &byte2)) {
1031 				state = -2;
1032 				break;
1033 			}
1034 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1035 			if (vpd_nextbyte(&vrs, &byte2)) {
1036 				state = -2;
1037 				break;
1038 			}
1039 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1040 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1041 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1042 			    sizeof(*cfg->vpd.vpd_w[off].value),
1043 			    M_DEVBUF, M_WAITOK);
1044 			remain -= 3;
1045 			i = 0;
1046 			/* keep in sync w/ state 6's transistions */
1047 			if (dflen == 0 && remain == 0)
1048 				state = 0;
1049 			else if (dflen == 0)
1050 				state = 5;
1051 			else
1052 				state = 6;
1053 			break;
1054 
1055 		case 6:	/* VPD-W Keyword Value */
1056 			cfg->vpd.vpd_w[off].value[i++] = byte;
1057 			dflen--;
1058 			remain--;
1059 			/* keep in sync w/ state 5's transistions */
1060 			if (dflen == 0)
1061 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1062 			if (dflen == 0 && remain == 0) {
1063 				cfg->vpd.vpd_wcnt = off;
1064 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1065 				    off * sizeof(*cfg->vpd.vpd_w),
1066 				    M_DEVBUF, M_WAITOK | M_ZERO);
1067 				state = 0;
1068 			} else if (dflen == 0)
1069 				state = 5;
1070 			break;
1071 
1072 		default:
1073 			printf("pci%d:%d:%d:%d: invalid state: %d\n",
1074 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1075 			    state);
1076 			state = -1;
1077 			break;
1078 		}
1079 	}
1080 
1081 	if (cksumvalid == 0 || state < -1) {
1082 		/* read-only data bad, clean up */
1083 		if (cfg->vpd.vpd_ros != NULL) {
1084 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1085 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1086 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1087 			cfg->vpd.vpd_ros = NULL;
1088 		}
1089 	}
1090 	if (state < -1) {
1091 		/* I/O error, clean up */
1092 		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1093 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1094 		if (cfg->vpd.vpd_ident != NULL) {
1095 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1096 			cfg->vpd.vpd_ident = NULL;
1097 		}
1098 		if (cfg->vpd.vpd_w != NULL) {
1099 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1100 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1101 			free(cfg->vpd.vpd_w, M_DEVBUF);
1102 			cfg->vpd.vpd_w = NULL;
1103 		}
1104 	}
1105 	cfg->vpd.vpd_cached = 1;
1106 #undef REG
1107 #undef WREG
1108 }
1109 
1110 int
1111 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1112 {
1113 	struct pci_devinfo *dinfo = device_get_ivars(child);
1114 	pcicfgregs *cfg = &dinfo->cfg;
1115 
1116 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1117 		pci_read_vpd(device_get_parent(dev), cfg);
1118 
1119 	*identptr = cfg->vpd.vpd_ident;
1120 
1121 	if (*identptr == NULL)
1122 		return (ENXIO);
1123 
1124 	return (0);
1125 }
1126 
1127 int
1128 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1129 	const char **vptr)
1130 {
1131 	struct pci_devinfo *dinfo = device_get_ivars(child);
1132 	pcicfgregs *cfg = &dinfo->cfg;
1133 	int i;
1134 
1135 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1136 		pci_read_vpd(device_get_parent(dev), cfg);
1137 
1138 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1139 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1140 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1141 			*vptr = cfg->vpd.vpd_ros[i].value;
1142 			return (0);
1143 		}
1144 
1145 	*vptr = NULL;
1146 	return (ENXIO);
1147 }
1148 
1149 /*
1150  * Find the requested extended capability and return the offset in
1151  * configuration space via the pointer provided. The function returns
1152  * 0 on success and error code otherwise.
1153  */
1154 int
1155 pci_find_extcap_method(device_t dev, device_t child, int capability,
1156     int *capreg)
1157 {
1158 	struct pci_devinfo *dinfo = device_get_ivars(child);
1159 	pcicfgregs *cfg = &dinfo->cfg;
1160 	u_int32_t status;
1161 	u_int8_t ptr;
1162 
1163 	/*
1164 	 * Check the CAP_LIST bit of the PCI status register first.
1165 	 */
1166 	status = pci_read_config(child, PCIR_STATUS, 2);
1167 	if (!(status & PCIM_STATUS_CAPPRESENT))
1168 		return (ENXIO);
1169 
1170 	/*
1171 	 * Determine the start pointer of the capabilities list.
1172 	 */
1173 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1174 	case PCIM_HDRTYPE_NORMAL:
1175 	case PCIM_HDRTYPE_BRIDGE:
1176 		ptr = PCIR_CAP_PTR;
1177 		break;
1178 	case PCIM_HDRTYPE_CARDBUS:
1179 		ptr = PCIR_CAP_PTR_2;
1180 		break;
1181 	default:
1182 		/* XXX: panic? */
1183 		return (ENXIO);		/* no extended capabilities support */
1184 	}
1185 	ptr = pci_read_config(child, ptr, 1);
1186 
1187 	/*
1188 	 * Traverse the capabilities list.
1189 	 */
1190 	while (ptr != 0) {
1191 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1192 			if (capreg != NULL)
1193 				*capreg = ptr;
1194 			return (0);
1195 		}
1196 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1197 	}
1198 
1199 	return (ENOENT);
1200 }
1201 
1202 /*
1203  * Support for MSI-X message interrupts.
1204  */
1205 void
1206 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1207 {
1208 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1209 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1210 	uint32_t offset;
1211 
1212 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1213 	offset = msix->msix_table_offset + index * 16;
1214 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1215 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1216 	bus_write_4(msix->msix_table_res, offset + 8, data);
1217 
1218 	/* Enable MSI -> HT mapping. */
1219 	pci_ht_map_msi(dev, address);
1220 }
1221 
1222 void
1223 pci_mask_msix(device_t dev, u_int index)
1224 {
1225 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1226 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1227 	uint32_t offset, val;
1228 
1229 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1230 	offset = msix->msix_table_offset + index * 16 + 12;
1231 	val = bus_read_4(msix->msix_table_res, offset);
1232 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1233 		val |= PCIM_MSIX_VCTRL_MASK;
1234 		bus_write_4(msix->msix_table_res, offset, val);
1235 	}
1236 }
1237 
1238 void
1239 pci_unmask_msix(device_t dev, u_int index)
1240 {
1241 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1242 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1243 	uint32_t offset, val;
1244 
1245 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1246 	offset = msix->msix_table_offset + index * 16 + 12;
1247 	val = bus_read_4(msix->msix_table_res, offset);
1248 	if (val & PCIM_MSIX_VCTRL_MASK) {
1249 		val &= ~PCIM_MSIX_VCTRL_MASK;
1250 		bus_write_4(msix->msix_table_res, offset, val);
1251 	}
1252 }
1253 
1254 int
1255 pci_pending_msix(device_t dev, u_int index)
1256 {
1257 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1258 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1259 	uint32_t offset, bit;
1260 
1261 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1262 	offset = msix->msix_pba_offset + (index / 32) * 4;
1263 	bit = 1 << index % 32;
1264 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1265 }
1266 
1267 /*
1268  * Restore MSI-X registers and table during resume.  If MSI-X is
1269  * enabled then walk the virtual table to restore the actual MSI-X
1270  * table.
1271  */
1272 static void
1273 pci_resume_msix(device_t dev)
1274 {
1275 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1276 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1277 	struct msix_table_entry *mte;
1278 	struct msix_vector *mv;
1279 	int i;
1280 
1281 	if (msix->msix_alloc > 0) {
1282 		/* First, mask all vectors. */
1283 		for (i = 0; i < msix->msix_msgnum; i++)
1284 			pci_mask_msix(dev, i);
1285 
1286 		/* Second, program any messages with at least one handler. */
1287 		for (i = 0; i < msix->msix_table_len; i++) {
1288 			mte = &msix->msix_table[i];
1289 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1290 				continue;
1291 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1292 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1293 			pci_unmask_msix(dev, i);
1294 		}
1295 	}
1296 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1297 	    msix->msix_ctrl, 2);
1298 }
1299 
1300 /*
1301  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1302  * returned in *count.  After this function returns, each message will be
1303  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1304  */
1305 int
1306 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1307 {
1308 	struct pci_devinfo *dinfo = device_get_ivars(child);
1309 	pcicfgregs *cfg = &dinfo->cfg;
1310 	struct resource_list_entry *rle;
1311 	int actual, error, i, irq, max;
1312 
1313 	/* Don't let count == 0 get us into trouble. */
1314 	if (*count == 0)
1315 		return (EINVAL);
1316 
1317 	/* If rid 0 is allocated, then fail. */
1318 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1319 	if (rle != NULL && rle->res != NULL)
1320 		return (ENXIO);
1321 
1322 	/* Already have allocated messages? */
1323 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1324 		return (ENXIO);
1325 
1326 	/* If MSI is blacklisted for this system, fail. */
1327 	if (pci_msi_blacklisted())
1328 		return (ENXIO);
1329 
1330 	/* MSI-X capability present? */
1331 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1332 		return (ENODEV);
1333 
1334 	/* Make sure the appropriate BARs are mapped. */
1335 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1336 	    cfg->msix.msix_table_bar);
1337 	if (rle == NULL || rle->res == NULL ||
1338 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1339 		return (ENXIO);
1340 	cfg->msix.msix_table_res = rle->res;
1341 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1342 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1343 		    cfg->msix.msix_pba_bar);
1344 		if (rle == NULL || rle->res == NULL ||
1345 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1346 			return (ENXIO);
1347 	}
1348 	cfg->msix.msix_pba_res = rle->res;
1349 
1350 	if (bootverbose)
1351 		device_printf(child,
1352 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1353 		    *count, cfg->msix.msix_msgnum);
1354 	max = min(*count, cfg->msix.msix_msgnum);
1355 	for (i = 0; i < max; i++) {
1356 		/* Allocate a message. */
1357 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1358 		if (error) {
1359 			if (i == 0)
1360 				return (error);
1361 			break;
1362 		}
1363 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1364 		    irq, 1);
1365 	}
1366 	actual = i;
1367 
1368 	if (bootverbose) {
1369 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1370 		if (actual == 1)
1371 			device_printf(child, "using IRQ %lu for MSI-X\n",
1372 			    rle->start);
1373 		else {
1374 			int run;
1375 
1376 			/*
1377 			 * Be fancy and try to print contiguous runs of
1378 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1379 			 * 'run' is true if we are in a range.
1380 			 */
1381 			device_printf(child, "using IRQs %lu", rle->start);
1382 			irq = rle->start;
1383 			run = 0;
1384 			for (i = 1; i < actual; i++) {
1385 				rle = resource_list_find(&dinfo->resources,
1386 				    SYS_RES_IRQ, i + 1);
1387 
1388 				/* Still in a run? */
1389 				if (rle->start == irq + 1) {
1390 					run = 1;
1391 					irq++;
1392 					continue;
1393 				}
1394 
1395 				/* Finish previous range. */
1396 				if (run) {
1397 					printf("-%d", irq);
1398 					run = 0;
1399 				}
1400 
1401 				/* Start new range. */
1402 				printf(",%lu", rle->start);
1403 				irq = rle->start;
1404 			}
1405 
1406 			/* Unfinished range? */
1407 			if (run)
1408 				printf("-%d", irq);
1409 			printf(" for MSI-X\n");
1410 		}
1411 	}
1412 
1413 	/* Mask all vectors. */
1414 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1415 		pci_mask_msix(child, i);
1416 
1417 	/* Allocate and initialize vector data and virtual table. */
1418 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1419 	    M_DEVBUF, M_WAITOK | M_ZERO);
1420 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1421 	    M_DEVBUF, M_WAITOK | M_ZERO);
1422 	for (i = 0; i < actual; i++) {
1423 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1424 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1425 		cfg->msix.msix_table[i].mte_vector = i + 1;
1426 	}
1427 
1428 	/* Update control register to enable MSI-X. */
1429 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1430 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1431 	    cfg->msix.msix_ctrl, 2);
1432 
1433 	/* Update counts of alloc'd messages. */
1434 	cfg->msix.msix_alloc = actual;
1435 	cfg->msix.msix_table_len = actual;
1436 	*count = actual;
1437 	return (0);
1438 }
1439 
1440 /*
1441  * By default, pci_alloc_msix() will assign the allocated IRQ
1442  * resources consecutively to the first N messages in the MSI-X table.
1443  * However, device drivers may want to use different layouts if they
1444  * either receive fewer messages than they asked for, or they wish to
1445  * populate the MSI-X table sparsely.  This method allows the driver
1446  * to specify what layout it wants.  It must be called after a
1447  * successful pci_alloc_msix() but before any of the associated
1448  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1449  *
1450  * The 'vectors' array contains 'count' message vectors.  The array
1451  * maps directly to the MSI-X table in that index 0 in the array
1452  * specifies the vector for the first message in the MSI-X table, etc.
1453  * The vector value in each array index can either be 0 to indicate
1454  * that no vector should be assigned to a message slot, or it can be a
1455  * number from 1 to N (where N is the count returned from a
1456  * succcessful call to pci_alloc_msix()) to indicate which message
1457  * vector (IRQ) to be used for the corresponding message.
1458  *
1459  * On successful return, each message with a non-zero vector will have
1460  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1461  * 1.  Additionally, if any of the IRQs allocated via the previous
1462  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1463  * will be freed back to the system automatically.
1464  *
1465  * For example, suppose a driver has a MSI-X table with 6 messages and
1466  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1467  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1468  * C.  After the call to pci_alloc_msix(), the device will be setup to
1469  * have an MSI-X table of ABC--- (where - means no vector assigned).
1470  * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1471  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1472  * be freed back to the system.  This device will also have valid
1473  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1474  *
1475  * In any case, the SYS_RES_IRQ rid X will always map to the message
1476  * at MSI-X table index X - 1 and will only be valid if a vector is
1477  * assigned to that table entry.
1478  */
1479 int
1480 pci_remap_msix_method(device_t dev, device_t child, int count,
1481     const u_int *vectors)
1482 {
1483 	struct pci_devinfo *dinfo = device_get_ivars(child);
1484 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1485 	struct resource_list_entry *rle;
1486 	int i, irq, j, *used;
1487 
1488 	/*
1489 	 * Have to have at least one message in the table but the
1490 	 * table can't be bigger than the actual MSI-X table in the
1491 	 * device.
1492 	 */
1493 	if (count == 0 || count > msix->msix_msgnum)
1494 		return (EINVAL);
1495 
1496 	/* Sanity check the vectors. */
1497 	for (i = 0; i < count; i++)
1498 		if (vectors[i] > msix->msix_alloc)
1499 			return (EINVAL);
1500 
1501 	/*
1502 	 * Make sure there aren't any holes in the vectors to be used.
1503 	 * It's a big pain to support it, and it doesn't really make
1504 	 * sense anyway.  Also, at least one vector must be used.
1505 	 */
1506 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1507 	    M_ZERO);
1508 	for (i = 0; i < count; i++)
1509 		if (vectors[i] != 0)
1510 			used[vectors[i] - 1] = 1;
1511 	for (i = 0; i < msix->msix_alloc - 1; i++)
1512 		if (used[i] == 0 && used[i + 1] == 1) {
1513 			free(used, M_DEVBUF);
1514 			return (EINVAL);
1515 		}
1516 	if (used[0] != 1) {
1517 		free(used, M_DEVBUF);
1518 		return (EINVAL);
1519 	}
1520 
1521 	/* Make sure none of the resources are allocated. */
1522 	for (i = 0; i < msix->msix_table_len; i++) {
1523 		if (msix->msix_table[i].mte_vector == 0)
1524 			continue;
1525 		if (msix->msix_table[i].mte_handlers > 0)
1526 			return (EBUSY);
1527 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1528 		KASSERT(rle != NULL, ("missing resource"));
1529 		if (rle->res != NULL)
1530 			return (EBUSY);
1531 	}
1532 
1533 	/* Free the existing resource list entries. */
1534 	for (i = 0; i < msix->msix_table_len; i++) {
1535 		if (msix->msix_table[i].mte_vector == 0)
1536 			continue;
1537 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1538 	}
1539 
1540 	/*
1541 	 * Build the new virtual table keeping track of which vectors are
1542 	 * used.
1543 	 */
1544 	free(msix->msix_table, M_DEVBUF);
1545 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1546 	    M_DEVBUF, M_WAITOK | M_ZERO);
1547 	for (i = 0; i < count; i++)
1548 		msix->msix_table[i].mte_vector = vectors[i];
1549 	msix->msix_table_len = count;
1550 
1551 	/* Free any unused IRQs and resize the vectors array if necessary. */
1552 	j = msix->msix_alloc - 1;
1553 	if (used[j] == 0) {
1554 		struct msix_vector *vec;
1555 
1556 		while (used[j] == 0) {
1557 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1558 			    msix->msix_vectors[j].mv_irq);
1559 			j--;
1560 		}
1561 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1562 		    M_WAITOK);
1563 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1564 		    (j + 1));
1565 		free(msix->msix_vectors, M_DEVBUF);
1566 		msix->msix_vectors = vec;
1567 		msix->msix_alloc = j + 1;
1568 	}
1569 	free(used, M_DEVBUF);
1570 
1571 	/* Map the IRQs onto the rids. */
1572 	for (i = 0; i < count; i++) {
1573 		if (vectors[i] == 0)
1574 			continue;
1575 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1576 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1577 		    irq, 1);
1578 	}
1579 
1580 	if (bootverbose) {
1581 		device_printf(child, "Remapped MSI-X IRQs as: ");
1582 		for (i = 0; i < count; i++) {
1583 			if (i != 0)
1584 				printf(", ");
1585 			if (vectors[i] == 0)
1586 				printf("---");
1587 			else
1588 				printf("%d",
1589 				    msix->msix_vectors[vectors[i]].mv_irq);
1590 		}
1591 		printf("\n");
1592 	}
1593 
1594 	return (0);
1595 }
1596 
1597 static int
1598 pci_release_msix(device_t dev, device_t child)
1599 {
1600 	struct pci_devinfo *dinfo = device_get_ivars(child);
1601 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1602 	struct resource_list_entry *rle;
1603 	int i;
1604 
1605 	/* Do we have any messages to release? */
1606 	if (msix->msix_alloc == 0)
1607 		return (ENODEV);
1608 
1609 	/* Make sure none of the resources are allocated. */
1610 	for (i = 0; i < msix->msix_table_len; i++) {
1611 		if (msix->msix_table[i].mte_vector == 0)
1612 			continue;
1613 		if (msix->msix_table[i].mte_handlers > 0)
1614 			return (EBUSY);
1615 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1616 		KASSERT(rle != NULL, ("missing resource"));
1617 		if (rle->res != NULL)
1618 			return (EBUSY);
1619 	}
1620 
1621 	/* Update control register to disable MSI-X. */
1622 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1623 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1624 	    msix->msix_ctrl, 2);
1625 
1626 	/* Free the resource list entries. */
1627 	for (i = 0; i < msix->msix_table_len; i++) {
1628 		if (msix->msix_table[i].mte_vector == 0)
1629 			continue;
1630 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1631 	}
1632 	free(msix->msix_table, M_DEVBUF);
1633 	msix->msix_table_len = 0;
1634 
1635 	/* Release the IRQs. */
1636 	for (i = 0; i < msix->msix_alloc; i++)
1637 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1638 		    msix->msix_vectors[i].mv_irq);
1639 	free(msix->msix_vectors, M_DEVBUF);
1640 	msix->msix_alloc = 0;
1641 	return (0);
1642 }
1643 
1644 /*
1645  * Return the max supported MSI-X messages this device supports.
1646  * Basically, assuming the MD code can alloc messages, this function
1647  * should return the maximum value that pci_alloc_msix() can return.
1648  * Thus, it is subject to the tunables, etc.
1649  */
1650 int
1651 pci_msix_count_method(device_t dev, device_t child)
1652 {
1653 	struct pci_devinfo *dinfo = device_get_ivars(child);
1654 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1655 
1656 	if (pci_do_msix && msix->msix_location != 0)
1657 		return (msix->msix_msgnum);
1658 	return (0);
1659 }
1660 
1661 /*
1662  * HyperTransport MSI mapping control
1663  */
1664 void
1665 pci_ht_map_msi(device_t dev, uint64_t addr)
1666 {
1667 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1668 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1669 
1670 	if (!ht->ht_msimap)
1671 		return;
1672 
1673 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1674 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1675 		/* Enable MSI -> HT mapping. */
1676 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1677 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1678 		    ht->ht_msictrl, 2);
1679 	}
1680 
1681 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1682 		/* Disable MSI -> HT mapping. */
1683 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1684 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1685 		    ht->ht_msictrl, 2);
1686 	}
1687 }
1688 
1689 int
1690 pci_get_max_read_req(device_t dev)
1691 {
1692 	int cap;
1693 	uint16_t val;
1694 
1695 	if (pci_find_cap(dev, PCIY_EXPRESS, &cap) != 0)
1696 		return (0);
1697 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1698 	val &= PCIM_EXP_CTL_MAX_READ_REQUEST;
1699 	val >>= 12;
1700 	return (1 << (val + 7));
1701 }
1702 
1703 int
1704 pci_set_max_read_req(device_t dev, int size)
1705 {
1706 	int cap;
1707 	uint16_t val;
1708 
1709 	if (pci_find_cap(dev, PCIY_EXPRESS, &cap) != 0)
1710 		return (0);
1711 	if (size < 128)
1712 		size = 128;
1713 	if (size > 4096)
1714 		size = 4096;
1715 	size = (1 << (fls(size) - 1));
1716 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1717 	val &= ~PCIM_EXP_CTL_MAX_READ_REQUEST;
1718 	val |= (fls(size) - 8) << 12;
1719 	pci_write_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, val, 2);
1720 	return (size);
1721 }
1722 
1723 /*
1724  * Support for MSI message signalled interrupts.
1725  */
1726 void
1727 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1728 {
1729 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1730 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1731 
1732 	/* Write data and address values. */
1733 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1734 	    address & 0xffffffff, 4);
1735 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1736 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1737 		    address >> 32, 4);
1738 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1739 		    data, 2);
1740 	} else
1741 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1742 		    2);
1743 
1744 	/* Enable MSI in the control register. */
1745 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1746 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1747 	    2);
1748 
1749 	/* Enable MSI -> HT mapping. */
1750 	pci_ht_map_msi(dev, address);
1751 }
1752 
1753 void
1754 pci_disable_msi(device_t dev)
1755 {
1756 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1757 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1758 
1759 	/* Disable MSI -> HT mapping. */
1760 	pci_ht_map_msi(dev, 0);
1761 
1762 	/* Disable MSI in the control register. */
1763 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1764 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1765 	    2);
1766 }
1767 
1768 /*
1769  * Restore MSI registers during resume.  If MSI is enabled then
1770  * restore the data and address registers in addition to the control
1771  * register.
1772  */
1773 static void
1774 pci_resume_msi(device_t dev)
1775 {
1776 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1777 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1778 	uint64_t address;
1779 	uint16_t data;
1780 
1781 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1782 		address = msi->msi_addr;
1783 		data = msi->msi_data;
1784 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1785 		    address & 0xffffffff, 4);
1786 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1787 			pci_write_config(dev, msi->msi_location +
1788 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1789 			pci_write_config(dev, msi->msi_location +
1790 			    PCIR_MSI_DATA_64BIT, data, 2);
1791 		} else
1792 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1793 			    data, 2);
1794 	}
1795 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1796 	    2);
1797 }
1798 
1799 static int
1800 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1801 {
1802 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1803 	pcicfgregs *cfg = &dinfo->cfg;
1804 	struct resource_list_entry *rle;
1805 	struct msix_table_entry *mte;
1806 	struct msix_vector *mv;
1807 	uint64_t addr;
1808 	uint32_t data;
1809 	int error, i, j;
1810 
1811 	/*
1812 	 * Handle MSI first.  We try to find this IRQ among our list
1813 	 * of MSI IRQs.  If we find it, we request updated address and
1814 	 * data registers and apply the results.
1815 	 */
1816 	if (cfg->msi.msi_alloc > 0) {
1817 
1818 		/* If we don't have any active handlers, nothing to do. */
1819 		if (cfg->msi.msi_handlers == 0)
1820 			return (0);
1821 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1822 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1823 			    i + 1);
1824 			if (rle->start == irq) {
1825 				error = PCIB_MAP_MSI(device_get_parent(bus),
1826 				    dev, irq, &addr, &data);
1827 				if (error)
1828 					return (error);
1829 				pci_disable_msi(dev);
1830 				dinfo->cfg.msi.msi_addr = addr;
1831 				dinfo->cfg.msi.msi_data = data;
1832 				pci_enable_msi(dev, addr, data);
1833 				return (0);
1834 			}
1835 		}
1836 		return (ENOENT);
1837 	}
1838 
1839 	/*
1840 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1841 	 * we request the updated mapping info.  If that works, we go
1842 	 * through all the slots that use this IRQ and update them.
1843 	 */
1844 	if (cfg->msix.msix_alloc > 0) {
1845 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1846 			mv = &cfg->msix.msix_vectors[i];
1847 			if (mv->mv_irq == irq) {
1848 				error = PCIB_MAP_MSI(device_get_parent(bus),
1849 				    dev, irq, &addr, &data);
1850 				if (error)
1851 					return (error);
1852 				mv->mv_address = addr;
1853 				mv->mv_data = data;
1854 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1855 					mte = &cfg->msix.msix_table[j];
1856 					if (mte->mte_vector != i + 1)
1857 						continue;
1858 					if (mte->mte_handlers == 0)
1859 						continue;
1860 					pci_mask_msix(dev, j);
1861 					pci_enable_msix(dev, j, addr, data);
1862 					pci_unmask_msix(dev, j);
1863 				}
1864 			}
1865 		}
1866 		return (ENOENT);
1867 	}
1868 
1869 	return (ENOENT);
1870 }
1871 
1872 /*
1873  * Returns true if the specified device is blacklisted because MSI
1874  * doesn't work.
1875  */
1876 int
1877 pci_msi_device_blacklisted(device_t dev)
1878 {
1879 	const struct pci_quirk *q;
1880 
1881 	if (!pci_honor_msi_blacklist)
1882 		return (0);
1883 
1884 	for (q = &pci_quirks[0]; q->devid; q++) {
1885 		if (q->devid == pci_get_devid(dev) &&
1886 		    q->type == PCI_QUIRK_DISABLE_MSI)
1887 			return (1);
1888 	}
1889 	return (0);
1890 }
1891 
1892 /*
1893  * Returns true if a specified chipset supports MSI when it is
1894  * emulated hardware in a virtual machine.
1895  */
1896 static int
1897 pci_msi_vm_chipset(device_t dev)
1898 {
1899 	const struct pci_quirk *q;
1900 
1901 	for (q = &pci_quirks[0]; q->devid; q++) {
1902 		if (q->devid == pci_get_devid(dev) &&
1903 		    q->type == PCI_QUIRK_ENABLE_MSI_VM)
1904 			return (1);
1905 	}
1906 	return (0);
1907 }
1908 
1909 /*
1910  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1911  * we just check for blacklisted chipsets as represented by the
1912  * host-PCI bridge at device 0:0:0.  In the future, it may become
1913  * necessary to check other system attributes, such as the kenv values
1914  * that give the motherboard manufacturer and model number.
1915  */
1916 static int
1917 pci_msi_blacklisted(void)
1918 {
1919 	device_t dev;
1920 
1921 	if (!pci_honor_msi_blacklist)
1922 		return (0);
1923 
1924 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1925 	if (!(pcie_chipset || pcix_chipset)) {
1926 		if (vm_guest != VM_GUEST_NO) {
1927 			dev = pci_find_bsf(0, 0, 0);
1928 			if (dev != NULL)
1929 				return (pci_msi_vm_chipset(dev) == 0);
1930 		}
1931 		return (1);
1932 	}
1933 
1934 	dev = pci_find_bsf(0, 0, 0);
1935 	if (dev != NULL)
1936 		return (pci_msi_device_blacklisted(dev));
1937 	return (0);
1938 }
1939 
1940 /*
1941  * Attempt to allocate *count MSI messages.  The actual number allocated is
1942  * returned in *count.  After this function returns, each message will be
1943  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1944  */
1945 int
1946 pci_alloc_msi_method(device_t dev, device_t child, int *count)
1947 {
1948 	struct pci_devinfo *dinfo = device_get_ivars(child);
1949 	pcicfgregs *cfg = &dinfo->cfg;
1950 	struct resource_list_entry *rle;
1951 	int actual, error, i, irqs[32];
1952 	uint16_t ctrl;
1953 
1954 	/* Don't let count == 0 get us into trouble. */
1955 	if (*count == 0)
1956 		return (EINVAL);
1957 
1958 	/* If rid 0 is allocated, then fail. */
1959 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1960 	if (rle != NULL && rle->res != NULL)
1961 		return (ENXIO);
1962 
1963 	/* Already have allocated messages? */
1964 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1965 		return (ENXIO);
1966 
1967 	/* If MSI is blacklisted for this system, fail. */
1968 	if (pci_msi_blacklisted())
1969 		return (ENXIO);
1970 
1971 	/* MSI capability present? */
1972 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1973 		return (ENODEV);
1974 
1975 	if (bootverbose)
1976 		device_printf(child,
1977 		    "attempting to allocate %d MSI vectors (%d supported)\n",
1978 		    *count, cfg->msi.msi_msgnum);
1979 
1980 	/* Don't ask for more than the device supports. */
1981 	actual = min(*count, cfg->msi.msi_msgnum);
1982 
1983 	/* Don't ask for more than 32 messages. */
1984 	actual = min(actual, 32);
1985 
1986 	/* MSI requires power of 2 number of messages. */
1987 	if (!powerof2(actual))
1988 		return (EINVAL);
1989 
1990 	for (;;) {
1991 		/* Try to allocate N messages. */
1992 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1993 		    actual, irqs);
1994 		if (error == 0)
1995 			break;
1996 		if (actual == 1)
1997 			return (error);
1998 
1999 		/* Try N / 2. */
2000 		actual >>= 1;
2001 	}
2002 
2003 	/*
2004 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2005 	 * resources in the irqs[] array, so add new resources
2006 	 * starting at rid 1.
2007 	 */
2008 	for (i = 0; i < actual; i++)
2009 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2010 		    irqs[i], irqs[i], 1);
2011 
2012 	if (bootverbose) {
2013 		if (actual == 1)
2014 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2015 		else {
2016 			int run;
2017 
2018 			/*
2019 			 * Be fancy and try to print contiguous runs
2020 			 * of IRQ values as ranges.  'run' is true if
2021 			 * we are in a range.
2022 			 */
2023 			device_printf(child, "using IRQs %d", irqs[0]);
2024 			run = 0;
2025 			for (i = 1; i < actual; i++) {
2026 
2027 				/* Still in a run? */
2028 				if (irqs[i] == irqs[i - 1] + 1) {
2029 					run = 1;
2030 					continue;
2031 				}
2032 
2033 				/* Finish previous range. */
2034 				if (run) {
2035 					printf("-%d", irqs[i - 1]);
2036 					run = 0;
2037 				}
2038 
2039 				/* Start new range. */
2040 				printf(",%d", irqs[i]);
2041 			}
2042 
2043 			/* Unfinished range? */
2044 			if (run)
2045 				printf("-%d", irqs[actual - 1]);
2046 			printf(" for MSI\n");
2047 		}
2048 	}
2049 
2050 	/* Update control register with actual count. */
2051 	ctrl = cfg->msi.msi_ctrl;
2052 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2053 	ctrl |= (ffs(actual) - 1) << 4;
2054 	cfg->msi.msi_ctrl = ctrl;
2055 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2056 
2057 	/* Update counts of alloc'd messages. */
2058 	cfg->msi.msi_alloc = actual;
2059 	cfg->msi.msi_handlers = 0;
2060 	*count = actual;
2061 	return (0);
2062 }
2063 
2064 /* Release the MSI messages associated with this device. */
2065 int
2066 pci_release_msi_method(device_t dev, device_t child)
2067 {
2068 	struct pci_devinfo *dinfo = device_get_ivars(child);
2069 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2070 	struct resource_list_entry *rle;
2071 	int error, i, irqs[32];
2072 
2073 	/* Try MSI-X first. */
2074 	error = pci_release_msix(dev, child);
2075 	if (error != ENODEV)
2076 		return (error);
2077 
2078 	/* Do we have any messages to release? */
2079 	if (msi->msi_alloc == 0)
2080 		return (ENODEV);
2081 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2082 
2083 	/* Make sure none of the resources are allocated. */
2084 	if (msi->msi_handlers > 0)
2085 		return (EBUSY);
2086 	for (i = 0; i < msi->msi_alloc; i++) {
2087 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2088 		KASSERT(rle != NULL, ("missing MSI resource"));
2089 		if (rle->res != NULL)
2090 			return (EBUSY);
2091 		irqs[i] = rle->start;
2092 	}
2093 
2094 	/* Update control register with 0 count. */
2095 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2096 	    ("%s: MSI still enabled", __func__));
2097 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2098 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2099 	    msi->msi_ctrl, 2);
2100 
2101 	/* Release the messages. */
2102 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2103 	for (i = 0; i < msi->msi_alloc; i++)
2104 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2105 
2106 	/* Update alloc count. */
2107 	msi->msi_alloc = 0;
2108 	msi->msi_addr = 0;
2109 	msi->msi_data = 0;
2110 	return (0);
2111 }
2112 
2113 /*
2114  * Return the max supported MSI messages this device supports.
2115  * Basically, assuming the MD code can alloc messages, this function
2116  * should return the maximum value that pci_alloc_msi() can return.
2117  * Thus, it is subject to the tunables, etc.
2118  */
2119 int
2120 pci_msi_count_method(device_t dev, device_t child)
2121 {
2122 	struct pci_devinfo *dinfo = device_get_ivars(child);
2123 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2124 
2125 	if (pci_do_msi && msi->msi_location != 0)
2126 		return (msi->msi_msgnum);
2127 	return (0);
2128 }
2129 
2130 /* free pcicfgregs structure and all depending data structures */
2131 
2132 int
2133 pci_freecfg(struct pci_devinfo *dinfo)
2134 {
2135 	struct devlist *devlist_head;
2136 	struct pci_map *pm, *next;
2137 	int i;
2138 
2139 	devlist_head = &pci_devq;
2140 
2141 	if (dinfo->cfg.vpd.vpd_reg) {
2142 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2143 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2144 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2145 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2146 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2147 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2148 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2149 	}
2150 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2151 		free(pm, M_DEVBUF);
2152 	}
2153 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2154 	free(dinfo, M_DEVBUF);
2155 
2156 	/* increment the generation count */
2157 	pci_generation++;
2158 
2159 	/* we're losing one device */
2160 	pci_numdevs--;
2161 	return (0);
2162 }
2163 
2164 /*
2165  * PCI power manangement
2166  */
2167 int
2168 pci_set_powerstate_method(device_t dev, device_t child, int state)
2169 {
2170 	struct pci_devinfo *dinfo = device_get_ivars(child);
2171 	pcicfgregs *cfg = &dinfo->cfg;
2172 	uint16_t status;
2173 	int result, oldstate, highest, delay;
2174 
2175 	if (cfg->pp.pp_cap == 0)
2176 		return (EOPNOTSUPP);
2177 
2178 	/*
2179 	 * Optimize a no state change request away.  While it would be OK to
2180 	 * write to the hardware in theory, some devices have shown odd
2181 	 * behavior when going from D3 -> D3.
2182 	 */
2183 	oldstate = pci_get_powerstate(child);
2184 	if (oldstate == state)
2185 		return (0);
2186 
2187 	/*
2188 	 * The PCI power management specification states that after a state
2189 	 * transition between PCI power states, system software must
2190 	 * guarantee a minimal delay before the function accesses the device.
2191 	 * Compute the worst case delay that we need to guarantee before we
2192 	 * access the device.  Many devices will be responsive much more
2193 	 * quickly than this delay, but there are some that don't respond
2194 	 * instantly to state changes.  Transitions to/from D3 state require
2195 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2196 	 * is done below with DELAY rather than a sleeper function because
2197 	 * this function can be called from contexts where we cannot sleep.
2198 	 */
2199 	highest = (oldstate > state) ? oldstate : state;
2200 	if (highest == PCI_POWERSTATE_D3)
2201 	    delay = 10000;
2202 	else if (highest == PCI_POWERSTATE_D2)
2203 	    delay = 200;
2204 	else
2205 	    delay = 0;
2206 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2207 	    & ~PCIM_PSTAT_DMASK;
2208 	result = 0;
2209 	switch (state) {
2210 	case PCI_POWERSTATE_D0:
2211 		status |= PCIM_PSTAT_D0;
2212 		break;
2213 	case PCI_POWERSTATE_D1:
2214 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2215 			return (EOPNOTSUPP);
2216 		status |= PCIM_PSTAT_D1;
2217 		break;
2218 	case PCI_POWERSTATE_D2:
2219 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2220 			return (EOPNOTSUPP);
2221 		status |= PCIM_PSTAT_D2;
2222 		break;
2223 	case PCI_POWERSTATE_D3:
2224 		status |= PCIM_PSTAT_D3;
2225 		break;
2226 	default:
2227 		return (EINVAL);
2228 	}
2229 
2230 	if (bootverbose)
2231 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2232 		    state);
2233 
2234 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2235 	if (delay)
2236 		DELAY(delay);
2237 	return (0);
2238 }
2239 
2240 int
2241 pci_get_powerstate_method(device_t dev, device_t child)
2242 {
2243 	struct pci_devinfo *dinfo = device_get_ivars(child);
2244 	pcicfgregs *cfg = &dinfo->cfg;
2245 	uint16_t status;
2246 	int result;
2247 
2248 	if (cfg->pp.pp_cap != 0) {
2249 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2250 		switch (status & PCIM_PSTAT_DMASK) {
2251 		case PCIM_PSTAT_D0:
2252 			result = PCI_POWERSTATE_D0;
2253 			break;
2254 		case PCIM_PSTAT_D1:
2255 			result = PCI_POWERSTATE_D1;
2256 			break;
2257 		case PCIM_PSTAT_D2:
2258 			result = PCI_POWERSTATE_D2;
2259 			break;
2260 		case PCIM_PSTAT_D3:
2261 			result = PCI_POWERSTATE_D3;
2262 			break;
2263 		default:
2264 			result = PCI_POWERSTATE_UNKNOWN;
2265 			break;
2266 		}
2267 	} else {
2268 		/* No support, device is always at D0 */
2269 		result = PCI_POWERSTATE_D0;
2270 	}
2271 	return (result);
2272 }
2273 
2274 /*
2275  * Some convenience functions for PCI device drivers.
2276  */
2277 
2278 static __inline void
2279 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2280 {
2281 	uint16_t	command;
2282 
2283 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2284 	command |= bit;
2285 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2286 }
2287 
2288 static __inline void
2289 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2290 {
2291 	uint16_t	command;
2292 
2293 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2294 	command &= ~bit;
2295 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2296 }
2297 
2298 int
2299 pci_enable_busmaster_method(device_t dev, device_t child)
2300 {
2301 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2302 	return (0);
2303 }
2304 
2305 int
2306 pci_disable_busmaster_method(device_t dev, device_t child)
2307 {
2308 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2309 	return (0);
2310 }
2311 
2312 int
2313 pci_enable_io_method(device_t dev, device_t child, int space)
2314 {
2315 	uint16_t bit;
2316 
2317 	switch(space) {
2318 	case SYS_RES_IOPORT:
2319 		bit = PCIM_CMD_PORTEN;
2320 		break;
2321 	case SYS_RES_MEMORY:
2322 		bit = PCIM_CMD_MEMEN;
2323 		break;
2324 	default:
2325 		return (EINVAL);
2326 	}
2327 	pci_set_command_bit(dev, child, bit);
2328 	return (0);
2329 }
2330 
2331 int
2332 pci_disable_io_method(device_t dev, device_t child, int space)
2333 {
2334 	uint16_t bit;
2335 
2336 	switch(space) {
2337 	case SYS_RES_IOPORT:
2338 		bit = PCIM_CMD_PORTEN;
2339 		break;
2340 	case SYS_RES_MEMORY:
2341 		bit = PCIM_CMD_MEMEN;
2342 		break;
2343 	default:
2344 		return (EINVAL);
2345 	}
2346 	pci_clear_command_bit(dev, child, bit);
2347 	return (0);
2348 }
2349 
2350 /*
2351  * New style pci driver.  Parent device is either a pci-host-bridge or a
2352  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2353  */
2354 
2355 void
2356 pci_print_verbose(struct pci_devinfo *dinfo)
2357 {
2358 
2359 	if (bootverbose) {
2360 		pcicfgregs *cfg = &dinfo->cfg;
2361 
2362 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2363 		    cfg->vendor, cfg->device, cfg->revid);
2364 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2365 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2366 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2367 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2368 		    cfg->mfdev);
2369 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2370 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2371 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2372 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2373 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2374 		if (cfg->intpin > 0)
2375 			printf("\tintpin=%c, irq=%d\n",
2376 			    cfg->intpin +'a' -1, cfg->intline);
2377 		if (cfg->pp.pp_cap) {
2378 			uint16_t status;
2379 
2380 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2381 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2382 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2383 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2384 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2385 			    status & PCIM_PSTAT_DMASK);
2386 		}
2387 		if (cfg->msi.msi_location) {
2388 			int ctrl;
2389 
2390 			ctrl = cfg->msi.msi_ctrl;
2391 			printf("\tMSI supports %d message%s%s%s\n",
2392 			    cfg->msi.msi_msgnum,
2393 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2394 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2395 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2396 		}
2397 		if (cfg->msix.msix_location) {
2398 			printf("\tMSI-X supports %d message%s ",
2399 			    cfg->msix.msix_msgnum,
2400 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2401 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2402 				printf("in map 0x%x\n",
2403 				    cfg->msix.msix_table_bar);
2404 			else
2405 				printf("in maps 0x%x and 0x%x\n",
2406 				    cfg->msix.msix_table_bar,
2407 				    cfg->msix.msix_pba_bar);
2408 		}
2409 	}
2410 }
2411 
2412 static int
2413 pci_porten(device_t dev)
2414 {
2415 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2416 }
2417 
2418 static int
2419 pci_memen(device_t dev)
2420 {
2421 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2422 }
2423 
2424 static void
2425 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2426 {
2427 	struct pci_devinfo *dinfo;
2428 	pci_addr_t map, testval;
2429 	int ln2range;
2430 	uint16_t cmd;
2431 
2432 	/*
2433 	 * The device ROM BAR is special.  It is always a 32-bit
2434 	 * memory BAR.  Bit 0 is special and should not be set when
2435 	 * sizing the BAR.
2436 	 */
2437 	dinfo = device_get_ivars(dev);
2438 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2439 		map = pci_read_config(dev, reg, 4);
2440 		pci_write_config(dev, reg, 0xfffffffe, 4);
2441 		testval = pci_read_config(dev, reg, 4);
2442 		pci_write_config(dev, reg, map, 4);
2443 		*mapp = map;
2444 		*testvalp = testval;
2445 		return;
2446 	}
2447 
2448 	map = pci_read_config(dev, reg, 4);
2449 	ln2range = pci_maprange(map);
2450 	if (ln2range == 64)
2451 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2452 
2453 	/*
2454 	 * Disable decoding via the command register before
2455 	 * determining the BAR's length since we will be placing it in
2456 	 * a weird state.
2457 	 */
2458 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2459 	pci_write_config(dev, PCIR_COMMAND,
2460 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2461 
2462 	/*
2463 	 * Determine the BAR's length by writing all 1's.  The bottom
2464 	 * log_2(size) bits of the BAR will stick as 0 when we read
2465 	 * the value back.
2466 	 */
2467 	pci_write_config(dev, reg, 0xffffffff, 4);
2468 	testval = pci_read_config(dev, reg, 4);
2469 	if (ln2range == 64) {
2470 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2471 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2472 	}
2473 
2474 	/*
2475 	 * Restore the original value of the BAR.  We may have reprogrammed
2476 	 * the BAR of the low-level console device and when booting verbose,
2477 	 * we need the console device addressable.
2478 	 */
2479 	pci_write_config(dev, reg, map, 4);
2480 	if (ln2range == 64)
2481 		pci_write_config(dev, reg + 4, map >> 32, 4);
2482 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2483 
2484 	*mapp = map;
2485 	*testvalp = testval;
2486 }
2487 
2488 static void
2489 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2490 {
2491 	struct pci_devinfo *dinfo;
2492 	int ln2range;
2493 
2494 	/* The device ROM BAR is always a 32-bit memory BAR. */
2495 	dinfo = device_get_ivars(dev);
2496 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2497 		ln2range = 32;
2498 	else
2499 		ln2range = pci_maprange(pm->pm_value);
2500 	pci_write_config(dev, pm->pm_reg, base, 4);
2501 	if (ln2range == 64)
2502 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2503 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2504 	if (ln2range == 64)
2505 		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2506 		    pm->pm_reg + 4, 4) << 32;
2507 }
2508 
2509 struct pci_map *
2510 pci_find_bar(device_t dev, int reg)
2511 {
2512 	struct pci_devinfo *dinfo;
2513 	struct pci_map *pm;
2514 
2515 	dinfo = device_get_ivars(dev);
2516 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2517 		if (pm->pm_reg == reg)
2518 			return (pm);
2519 	}
2520 	return (NULL);
2521 }
2522 
2523 int
2524 pci_bar_enabled(device_t dev, struct pci_map *pm)
2525 {
2526 	struct pci_devinfo *dinfo;
2527 	uint16_t cmd;
2528 
2529 	dinfo = device_get_ivars(dev);
2530 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2531 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2532 		return (0);
2533 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2534 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2535 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2536 	else
2537 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2538 }
2539 
2540 static struct pci_map *
2541 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2542 {
2543 	struct pci_devinfo *dinfo;
2544 	struct pci_map *pm, *prev;
2545 
2546 	dinfo = device_get_ivars(dev);
2547 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2548 	pm->pm_reg = reg;
2549 	pm->pm_value = value;
2550 	pm->pm_size = size;
2551 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2552 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2553 		    reg));
2554 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2555 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2556 			break;
2557 	}
2558 	if (prev != NULL)
2559 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2560 	else
2561 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2562 	return (pm);
2563 }
2564 
2565 static void
2566 pci_restore_bars(device_t dev)
2567 {
2568 	struct pci_devinfo *dinfo;
2569 	struct pci_map *pm;
2570 	int ln2range;
2571 
2572 	dinfo = device_get_ivars(dev);
2573 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2574 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2575 			ln2range = 32;
2576 		else
2577 			ln2range = pci_maprange(pm->pm_value);
2578 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2579 		if (ln2range == 64)
2580 			pci_write_config(dev, pm->pm_reg + 4,
2581 			    pm->pm_value >> 32, 4);
2582 	}
2583 }
2584 
2585 /*
2586  * Add a resource based on a pci map register. Return 1 if the map
2587  * register is a 32bit map register or 2 if it is a 64bit register.
2588  */
2589 static int
2590 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2591     int force, int prefetch)
2592 {
2593 	struct pci_map *pm;
2594 	pci_addr_t base, map, testval;
2595 	pci_addr_t start, end, count;
2596 	int barlen, basezero, maprange, mapsize, type;
2597 	uint16_t cmd;
2598 	struct resource *res;
2599 
2600 	/*
2601 	 * The BAR may already exist if the device is a CardBus card
2602 	 * whose CIS is stored in this BAR.
2603 	 */
2604 	pm = pci_find_bar(dev, reg);
2605 	if (pm != NULL) {
2606 		maprange = pci_maprange(pm->pm_value);
2607 		barlen = maprange == 64 ? 2 : 1;
2608 		return (barlen);
2609 	}
2610 
2611 	pci_read_bar(dev, reg, &map, &testval);
2612 	if (PCI_BAR_MEM(map)) {
2613 		type = SYS_RES_MEMORY;
2614 		if (map & PCIM_BAR_MEM_PREFETCH)
2615 			prefetch = 1;
2616 	} else
2617 		type = SYS_RES_IOPORT;
2618 	mapsize = pci_mapsize(testval);
2619 	base = pci_mapbase(map);
2620 #ifdef __PCI_BAR_ZERO_VALID
2621 	basezero = 0;
2622 #else
2623 	basezero = base == 0;
2624 #endif
2625 	maprange = pci_maprange(map);
2626 	barlen = maprange == 64 ? 2 : 1;
2627 
2628 	/*
2629 	 * For I/O registers, if bottom bit is set, and the next bit up
2630 	 * isn't clear, we know we have a BAR that doesn't conform to the
2631 	 * spec, so ignore it.  Also, sanity check the size of the data
2632 	 * areas to the type of memory involved.  Memory must be at least
2633 	 * 16 bytes in size, while I/O ranges must be at least 4.
2634 	 */
2635 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2636 		return (barlen);
2637 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2638 	    (type == SYS_RES_IOPORT && mapsize < 2))
2639 		return (barlen);
2640 
2641 	/* Save a record of this BAR. */
2642 	pm = pci_add_bar(dev, reg, map, mapsize);
2643 	if (bootverbose) {
2644 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2645 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2646 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2647 			printf(", port disabled\n");
2648 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2649 			printf(", memory disabled\n");
2650 		else
2651 			printf(", enabled\n");
2652 	}
2653 
2654 	/*
2655 	 * If base is 0, then we have problems if this architecture does
2656 	 * not allow that.  It is best to ignore such entries for the
2657 	 * moment.  These will be allocated later if the driver specifically
2658 	 * requests them.  However, some removable busses look better when
2659 	 * all resources are allocated, so allow '0' to be overriden.
2660 	 *
2661 	 * Similarly treat maps whose values is the same as the test value
2662 	 * read back.  These maps have had all f's written to them by the
2663 	 * BIOS in an attempt to disable the resources.
2664 	 */
2665 	if (!force && (basezero || map == testval))
2666 		return (barlen);
2667 	if ((u_long)base != base) {
2668 		device_printf(bus,
2669 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2670 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2671 		    pci_get_function(dev), reg);
2672 		return (barlen);
2673 	}
2674 
2675 	/*
2676 	 * This code theoretically does the right thing, but has
2677 	 * undesirable side effects in some cases where peripherals
2678 	 * respond oddly to having these bits enabled.  Let the user
2679 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2680 	 * default).
2681 	 */
2682 	if (pci_enable_io_modes) {
2683 		/* Turn on resources that have been left off by a lazy BIOS */
2684 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2685 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2686 			cmd |= PCIM_CMD_PORTEN;
2687 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2688 		}
2689 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2690 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2691 			cmd |= PCIM_CMD_MEMEN;
2692 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2693 		}
2694 	} else {
2695 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2696 			return (barlen);
2697 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2698 			return (barlen);
2699 	}
2700 
2701 	count = (pci_addr_t)1 << mapsize;
2702 	if (basezero || base == pci_mapbase(testval)) {
2703 		start = 0;	/* Let the parent decide. */
2704 		end = ~0ul;
2705 	} else {
2706 		start = base;
2707 		end = base + count - 1;
2708 	}
2709 	resource_list_add(rl, type, reg, start, end, count);
2710 
2711 	/*
2712 	 * Try to allocate the resource for this BAR from our parent
2713 	 * so that this resource range is already reserved.  The
2714 	 * driver for this device will later inherit this resource in
2715 	 * pci_alloc_resource().
2716 	 */
2717 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2718 	    prefetch ? RF_PREFETCHABLE : 0);
2719 	if (res == NULL) {
2720 		/*
2721 		 * If the allocation fails, clear the BAR and delete
2722 		 * the resource list entry to force
2723 		 * pci_alloc_resource() to allocate resources from the
2724 		 * parent.
2725 		 */
2726 		resource_list_delete(rl, type, reg);
2727 		start = 0;
2728 	} else
2729 		start = rman_get_start(res);
2730 	pci_write_bar(dev, pm, start);
2731 	return (barlen);
2732 }
2733 
2734 /*
2735  * For ATA devices we need to decide early what addressing mode to use.
2736  * Legacy demands that the primary and secondary ATA ports sits on the
2737  * same addresses that old ISA hardware did. This dictates that we use
2738  * those addresses and ignore the BAR's if we cannot set PCI native
2739  * addressing mode.
2740  */
2741 static void
2742 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2743     uint32_t prefetchmask)
2744 {
2745 	struct resource *r;
2746 	int rid, type, progif;
2747 #if 0
2748 	/* if this device supports PCI native addressing use it */
2749 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2750 	if ((progif & 0x8a) == 0x8a) {
2751 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2752 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2753 			printf("Trying ATA native PCI addressing mode\n");
2754 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2755 		}
2756 	}
2757 #endif
2758 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2759 	type = SYS_RES_IOPORT;
2760 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2761 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2762 		    prefetchmask & (1 << 0));
2763 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2764 		    prefetchmask & (1 << 1));
2765 	} else {
2766 		rid = PCIR_BAR(0);
2767 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2768 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2769 		    0x1f7, 8, 0);
2770 		rid = PCIR_BAR(1);
2771 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2772 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2773 		    0x3f6, 1, 0);
2774 	}
2775 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2776 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2777 		    prefetchmask & (1 << 2));
2778 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2779 		    prefetchmask & (1 << 3));
2780 	} else {
2781 		rid = PCIR_BAR(2);
2782 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2783 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2784 		    0x177, 8, 0);
2785 		rid = PCIR_BAR(3);
2786 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2787 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2788 		    0x376, 1, 0);
2789 	}
2790 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2791 	    prefetchmask & (1 << 4));
2792 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2793 	    prefetchmask & (1 << 5));
2794 }
2795 
2796 static void
2797 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2798 {
2799 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2800 	pcicfgregs *cfg = &dinfo->cfg;
2801 	char tunable_name[64];
2802 	int irq;
2803 
2804 	/* Has to have an intpin to have an interrupt. */
2805 	if (cfg->intpin == 0)
2806 		return;
2807 
2808 	/* Let the user override the IRQ with a tunable. */
2809 	irq = PCI_INVALID_IRQ;
2810 	snprintf(tunable_name, sizeof(tunable_name),
2811 	    "hw.pci%d.%d.%d.INT%c.irq",
2812 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2813 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2814 		irq = PCI_INVALID_IRQ;
2815 
2816 	/*
2817 	 * If we didn't get an IRQ via the tunable, then we either use the
2818 	 * IRQ value in the intline register or we ask the bus to route an
2819 	 * interrupt for us.  If force_route is true, then we only use the
2820 	 * value in the intline register if the bus was unable to assign an
2821 	 * IRQ.
2822 	 */
2823 	if (!PCI_INTERRUPT_VALID(irq)) {
2824 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2825 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2826 		if (!PCI_INTERRUPT_VALID(irq))
2827 			irq = cfg->intline;
2828 	}
2829 
2830 	/* If after all that we don't have an IRQ, just bail. */
2831 	if (!PCI_INTERRUPT_VALID(irq))
2832 		return;
2833 
2834 	/* Update the config register if it changed. */
2835 	if (irq != cfg->intline) {
2836 		cfg->intline = irq;
2837 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2838 	}
2839 
2840 	/* Add this IRQ as rid 0 interrupt resource. */
2841 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2842 }
2843 
2844 /* Perform early OHCI takeover from SMM. */
2845 static void
2846 ohci_early_takeover(device_t self)
2847 {
2848 	struct resource *res;
2849 	uint32_t ctl;
2850 	int rid;
2851 	int i;
2852 
2853 	rid = PCIR_BAR(0);
2854 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2855 	if (res == NULL)
2856 		return;
2857 
2858 	ctl = bus_read_4(res, OHCI_CONTROL);
2859 	if (ctl & OHCI_IR) {
2860 		if (bootverbose)
2861 			printf("ohci early: "
2862 			    "SMM active, request owner change\n");
2863 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2864 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2865 			DELAY(1000);
2866 			ctl = bus_read_4(res, OHCI_CONTROL);
2867 		}
2868 		if (ctl & OHCI_IR) {
2869 			if (bootverbose)
2870 				printf("ohci early: "
2871 				    "SMM does not respond, resetting\n");
2872 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2873 		}
2874 		/* Disable interrupts */
2875 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2876 	}
2877 
2878 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2879 }
2880 
2881 /* Perform early UHCI takeover from SMM. */
2882 static void
2883 uhci_early_takeover(device_t self)
2884 {
2885 	struct resource *res;
2886 	int rid;
2887 
2888 	/*
2889 	 * Set the PIRQD enable bit and switch off all the others. We don't
2890 	 * want legacy support to interfere with us XXX Does this also mean
2891 	 * that the BIOS won't touch the keyboard anymore if it is connected
2892 	 * to the ports of the root hub?
2893 	 */
2894 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2895 
2896 	/* Disable interrupts */
2897 	rid = PCI_UHCI_BASE_REG;
2898 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2899 	if (res != NULL) {
2900 		bus_write_2(res, UHCI_INTR, 0);
2901 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2902 	}
2903 }
2904 
2905 /* Perform early EHCI takeover from SMM. */
2906 static void
2907 ehci_early_takeover(device_t self)
2908 {
2909 	struct resource *res;
2910 	uint32_t cparams;
2911 	uint32_t eec;
2912 	uint8_t eecp;
2913 	uint8_t bios_sem;
2914 	uint8_t offs;
2915 	int rid;
2916 	int i;
2917 
2918 	rid = PCIR_BAR(0);
2919 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2920 	if (res == NULL)
2921 		return;
2922 
2923 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2924 
2925 	/* Synchronise with the BIOS if it owns the controller. */
2926 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2927 	    eecp = EHCI_EECP_NEXT(eec)) {
2928 		eec = pci_read_config(self, eecp, 4);
2929 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2930 			continue;
2931 		}
2932 		bios_sem = pci_read_config(self, eecp +
2933 		    EHCI_LEGSUP_BIOS_SEM, 1);
2934 		if (bios_sem == 0) {
2935 			continue;
2936 		}
2937 		if (bootverbose)
2938 			printf("ehci early: "
2939 			    "SMM active, request owner change\n");
2940 
2941 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2942 
2943 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2944 			DELAY(1000);
2945 			bios_sem = pci_read_config(self, eecp +
2946 			    EHCI_LEGSUP_BIOS_SEM, 1);
2947 		}
2948 
2949 		if (bios_sem != 0) {
2950 			if (bootverbose)
2951 				printf("ehci early: "
2952 				    "SMM does not respond\n");
2953 		}
2954 		/* Disable interrupts */
2955 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
2956 		bus_write_4(res, offs + EHCI_USBINTR, 0);
2957 	}
2958 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2959 }
2960 
2961 /* Perform early XHCI takeover from SMM. */
2962 static void
2963 xhci_early_takeover(device_t self)
2964 {
2965 	struct resource *res;
2966 	uint32_t cparams;
2967 	uint32_t eec;
2968 	uint8_t eecp;
2969 	uint8_t bios_sem;
2970 	uint8_t offs;
2971 	int rid;
2972 	int i;
2973 
2974 	rid = PCIR_BAR(0);
2975 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2976 	if (res == NULL)
2977 		return;
2978 
2979 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
2980 
2981 	eec = -1;
2982 
2983 	/* Synchronise with the BIOS if it owns the controller. */
2984 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
2985 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
2986 		eec = bus_read_4(res, eecp);
2987 
2988 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
2989 			continue;
2990 
2991 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
2992 		if (bios_sem == 0)
2993 			continue;
2994 
2995 		if (bootverbose)
2996 			printf("xhci early: "
2997 			    "SMM active, request owner change\n");
2998 
2999 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3000 
3001 		/* wait a maximum of 5 second */
3002 
3003 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3004 			DELAY(1000);
3005 			bios_sem = bus_read_1(res, eecp +
3006 			    XHCI_XECP_BIOS_SEM);
3007 		}
3008 
3009 		if (bios_sem != 0) {
3010 			if (bootverbose)
3011 				printf("xhci early: "
3012 				    "SMM does not respond\n");
3013 		}
3014 
3015 		/* Disable interrupts */
3016 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3017 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3018 		bus_read_4(res, offs + XHCI_USBSTS);
3019 	}
3020 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3021 }
3022 
3023 void
3024 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3025 {
3026 	struct pci_devinfo *dinfo = device_get_ivars(dev);
3027 	pcicfgregs *cfg = &dinfo->cfg;
3028 	struct resource_list *rl = &dinfo->resources;
3029 	const struct pci_quirk *q;
3030 	int i;
3031 
3032 	/* ATA devices needs special map treatment */
3033 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3034 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3035 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3036 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3037 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3038 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3039 	else
3040 		for (i = 0; i < cfg->nummaps;)
3041 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3042 			    prefetchmask & (1 << i));
3043 
3044 	/*
3045 	 * Add additional, quirked resources.
3046 	 */
3047 	for (q = &pci_quirks[0]; q->devid; q++) {
3048 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
3049 		    && q->type == PCI_QUIRK_MAP_REG)
3050 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3051 	}
3052 
3053 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3054 #ifdef __PCI_REROUTE_INTERRUPT
3055 		/*
3056 		 * Try to re-route interrupts. Sometimes the BIOS or
3057 		 * firmware may leave bogus values in these registers.
3058 		 * If the re-route fails, then just stick with what we
3059 		 * have.
3060 		 */
3061 		pci_assign_interrupt(bus, dev, 1);
3062 #else
3063 		pci_assign_interrupt(bus, dev, 0);
3064 #endif
3065 	}
3066 
3067 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3068 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3069 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3070 			xhci_early_takeover(dev);
3071 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3072 			ehci_early_takeover(dev);
3073 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3074 			ohci_early_takeover(dev);
3075 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3076 			uhci_early_takeover(dev);
3077 	}
3078 }
3079 
3080 void
3081 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3082 {
3083 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3084 	device_t pcib = device_get_parent(dev);
3085 	struct pci_devinfo *dinfo;
3086 	int maxslots;
3087 	int s, f, pcifunchigh;
3088 	uint8_t hdrtype;
3089 
3090 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3091 	    ("dinfo_size too small"));
3092 	maxslots = PCIB_MAXSLOTS(pcib);
3093 	for (s = 0; s <= maxslots; s++) {
3094 		pcifunchigh = 0;
3095 		f = 0;
3096 		DELAY(1);
3097 		hdrtype = REG(PCIR_HDRTYPE, 1);
3098 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3099 			continue;
3100 		if (hdrtype & PCIM_MFDEV)
3101 			pcifunchigh = PCI_FUNCMAX;
3102 		for (f = 0; f <= pcifunchigh; f++) {
3103 			dinfo = pci_read_device(pcib, domain, busno, s, f,
3104 			    dinfo_size);
3105 			if (dinfo != NULL) {
3106 				pci_add_child(dev, dinfo);
3107 			}
3108 		}
3109 	}
3110 #undef REG
3111 }
3112 
3113 void
3114 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3115 {
3116 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3117 	device_set_ivars(dinfo->cfg.dev, dinfo);
3118 	resource_list_init(&dinfo->resources);
3119 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3120 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3121 	pci_print_verbose(dinfo);
3122 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3123 }
3124 
3125 static int
3126 pci_probe(device_t dev)
3127 {
3128 
3129 	device_set_desc(dev, "PCI bus");
3130 
3131 	/* Allow other subclasses to override this driver. */
3132 	return (BUS_PROBE_GENERIC);
3133 }
3134 
3135 static int
3136 pci_attach(device_t dev)
3137 {
3138 	int busno, domain;
3139 
3140 	/*
3141 	 * Since there can be multiple independantly numbered PCI
3142 	 * busses on systems with multiple PCI domains, we can't use
3143 	 * the unit number to decide which bus we are probing. We ask
3144 	 * the parent pcib what our domain and bus numbers are.
3145 	 */
3146 	domain = pcib_get_domain(dev);
3147 	busno = pcib_get_bus(dev);
3148 	if (bootverbose)
3149 		device_printf(dev, "domain=%d, physical bus=%d\n",
3150 		    domain, busno);
3151 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3152 	return (bus_generic_attach(dev));
3153 }
3154 
3155 static void
3156 pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3157     int state)
3158 {
3159 	device_t child, pcib;
3160 	struct pci_devinfo *dinfo;
3161 	int dstate, i;
3162 
3163 	/*
3164 	 * Set the device to the given state.  If the firmware suggests
3165 	 * a different power state, use it instead.  If power management
3166 	 * is not present, the firmware is responsible for managing
3167 	 * device power.  Skip children who aren't attached since they
3168 	 * are handled separately.
3169 	 */
3170 	pcib = device_get_parent(dev);
3171 	for (i = 0; i < numdevs; i++) {
3172 		child = devlist[i];
3173 		dinfo = device_get_ivars(child);
3174 		dstate = state;
3175 		if (device_is_attached(child) &&
3176 		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3177 			pci_set_powerstate(child, dstate);
3178 	}
3179 }
3180 
3181 int
3182 pci_suspend(device_t dev)
3183 {
3184 	device_t child, *devlist;
3185 	struct pci_devinfo *dinfo;
3186 	int error, i, numdevs;
3187 
3188 	/*
3189 	 * Save the PCI configuration space for each child and set the
3190 	 * device in the appropriate power state for this sleep state.
3191 	 */
3192 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3193 		return (error);
3194 	for (i = 0; i < numdevs; i++) {
3195 		child = devlist[i];
3196 		dinfo = device_get_ivars(child);
3197 		pci_cfg_save(child, dinfo, 0);
3198 	}
3199 
3200 	/* Suspend devices before potentially powering them down. */
3201 	error = bus_generic_suspend(dev);
3202 	if (error) {
3203 		free(devlist, M_TEMP);
3204 		return (error);
3205 	}
3206 	if (pci_do_power_suspend)
3207 		pci_set_power_children(dev, devlist, numdevs,
3208 		    PCI_POWERSTATE_D3);
3209 	free(devlist, M_TEMP);
3210 	return (0);
3211 }
3212 
3213 int
3214 pci_resume(device_t dev)
3215 {
3216 	device_t child, *devlist;
3217 	struct pci_devinfo *dinfo;
3218 	int error, i, numdevs;
3219 
3220 	/*
3221 	 * Set each child to D0 and restore its PCI configuration space.
3222 	 */
3223 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3224 		return (error);
3225 	if (pci_do_power_resume)
3226 		pci_set_power_children(dev, devlist, numdevs,
3227 		    PCI_POWERSTATE_D0);
3228 
3229 	/* Now the device is powered up, restore its config space. */
3230 	for (i = 0; i < numdevs; i++) {
3231 		child = devlist[i];
3232 		dinfo = device_get_ivars(child);
3233 
3234 		pci_cfg_restore(child, dinfo);
3235 		if (!device_is_attached(child))
3236 			pci_cfg_save(child, dinfo, 1);
3237 	}
3238 
3239 	/*
3240 	 * Resume critical devices first, then everything else later.
3241 	 */
3242 	for (i = 0; i < numdevs; i++) {
3243 		child = devlist[i];
3244 		switch (pci_get_class(child)) {
3245 		case PCIC_DISPLAY:
3246 		case PCIC_MEMORY:
3247 		case PCIC_BRIDGE:
3248 		case PCIC_BASEPERIPH:
3249 			DEVICE_RESUME(child);
3250 			break;
3251 		}
3252 	}
3253 	for (i = 0; i < numdevs; i++) {
3254 		child = devlist[i];
3255 		switch (pci_get_class(child)) {
3256 		case PCIC_DISPLAY:
3257 		case PCIC_MEMORY:
3258 		case PCIC_BRIDGE:
3259 		case PCIC_BASEPERIPH:
3260 			break;
3261 		default:
3262 			DEVICE_RESUME(child);
3263 		}
3264 	}
3265 	free(devlist, M_TEMP);
3266 	return (0);
3267 }
3268 
3269 static void
3270 pci_load_vendor_data(void)
3271 {
3272 	caddr_t data;
3273 	void *ptr;
3274 	size_t sz;
3275 
3276 	data = preload_search_by_type("pci_vendor_data");
3277 	if (data != NULL) {
3278 		ptr = preload_fetch_addr(data);
3279 		sz = preload_fetch_size(data);
3280 		if (ptr != NULL && sz != 0) {
3281 			pci_vendordata = ptr;
3282 			pci_vendordata_size = sz;
3283 			/* terminate the database */
3284 			pci_vendordata[pci_vendordata_size] = '\n';
3285 		}
3286 	}
3287 }
3288 
3289 void
3290 pci_driver_added(device_t dev, driver_t *driver)
3291 {
3292 	int numdevs;
3293 	device_t *devlist;
3294 	device_t child;
3295 	struct pci_devinfo *dinfo;
3296 	int i;
3297 
3298 	if (bootverbose)
3299 		device_printf(dev, "driver added\n");
3300 	DEVICE_IDENTIFY(driver, dev);
3301 	if (device_get_children(dev, &devlist, &numdevs) != 0)
3302 		return;
3303 	for (i = 0; i < numdevs; i++) {
3304 		child = devlist[i];
3305 		if (device_get_state(child) != DS_NOTPRESENT)
3306 			continue;
3307 		dinfo = device_get_ivars(child);
3308 		pci_print_verbose(dinfo);
3309 		if (bootverbose)
3310 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3311 		pci_cfg_restore(child, dinfo);
3312 		if (device_probe_and_attach(child) != 0)
3313 			pci_cfg_save(child, dinfo, 1);
3314 	}
3315 	free(devlist, M_TEMP);
3316 }
3317 
3318 int
3319 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3320     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3321 {
3322 	struct pci_devinfo *dinfo;
3323 	struct msix_table_entry *mte;
3324 	struct msix_vector *mv;
3325 	uint64_t addr;
3326 	uint32_t data;
3327 	void *cookie;
3328 	int error, rid;
3329 
3330 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3331 	    arg, &cookie);
3332 	if (error)
3333 		return (error);
3334 
3335 	/* If this is not a direct child, just bail out. */
3336 	if (device_get_parent(child) != dev) {
3337 		*cookiep = cookie;
3338 		return(0);
3339 	}
3340 
3341 	rid = rman_get_rid(irq);
3342 	if (rid == 0) {
3343 		/* Make sure that INTx is enabled */
3344 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3345 	} else {
3346 		/*
3347 		 * Check to see if the interrupt is MSI or MSI-X.
3348 		 * Ask our parent to map the MSI and give
3349 		 * us the address and data register values.
3350 		 * If we fail for some reason, teardown the
3351 		 * interrupt handler.
3352 		 */
3353 		dinfo = device_get_ivars(child);
3354 		if (dinfo->cfg.msi.msi_alloc > 0) {
3355 			if (dinfo->cfg.msi.msi_addr == 0) {
3356 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3357 			    ("MSI has handlers, but vectors not mapped"));
3358 				error = PCIB_MAP_MSI(device_get_parent(dev),
3359 				    child, rman_get_start(irq), &addr, &data);
3360 				if (error)
3361 					goto bad;
3362 				dinfo->cfg.msi.msi_addr = addr;
3363 				dinfo->cfg.msi.msi_data = data;
3364 			}
3365 			if (dinfo->cfg.msi.msi_handlers == 0)
3366 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3367 				    dinfo->cfg.msi.msi_data);
3368 			dinfo->cfg.msi.msi_handlers++;
3369 		} else {
3370 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3371 			    ("No MSI or MSI-X interrupts allocated"));
3372 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3373 			    ("MSI-X index too high"));
3374 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3375 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3376 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3377 			KASSERT(mv->mv_irq == rman_get_start(irq),
3378 			    ("IRQ mismatch"));
3379 			if (mv->mv_address == 0) {
3380 				KASSERT(mte->mte_handlers == 0,
3381 		    ("MSI-X table entry has handlers, but vector not mapped"));
3382 				error = PCIB_MAP_MSI(device_get_parent(dev),
3383 				    child, rman_get_start(irq), &addr, &data);
3384 				if (error)
3385 					goto bad;
3386 				mv->mv_address = addr;
3387 				mv->mv_data = data;
3388 			}
3389 			if (mte->mte_handlers == 0) {
3390 				pci_enable_msix(child, rid - 1, mv->mv_address,
3391 				    mv->mv_data);
3392 				pci_unmask_msix(child, rid - 1);
3393 			}
3394 			mte->mte_handlers++;
3395 		}
3396 
3397 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3398 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3399 	bad:
3400 		if (error) {
3401 			(void)bus_generic_teardown_intr(dev, child, irq,
3402 			    cookie);
3403 			return (error);
3404 		}
3405 	}
3406 	*cookiep = cookie;
3407 	return (0);
3408 }
3409 
3410 int
3411 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3412     void *cookie)
3413 {
3414 	struct msix_table_entry *mte;
3415 	struct resource_list_entry *rle;
3416 	struct pci_devinfo *dinfo;
3417 	int error, rid;
3418 
3419 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3420 		return (EINVAL);
3421 
3422 	/* If this isn't a direct child, just bail out */
3423 	if (device_get_parent(child) != dev)
3424 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3425 
3426 	rid = rman_get_rid(irq);
3427 	if (rid == 0) {
3428 		/* Mask INTx */
3429 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3430 	} else {
3431 		/*
3432 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3433 		 * decrement the appropriate handlers count and mask the
3434 		 * MSI-X message, or disable MSI messages if the count
3435 		 * drops to 0.
3436 		 */
3437 		dinfo = device_get_ivars(child);
3438 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3439 		if (rle->res != irq)
3440 			return (EINVAL);
3441 		if (dinfo->cfg.msi.msi_alloc > 0) {
3442 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3443 			    ("MSI-X index too high"));
3444 			if (dinfo->cfg.msi.msi_handlers == 0)
3445 				return (EINVAL);
3446 			dinfo->cfg.msi.msi_handlers--;
3447 			if (dinfo->cfg.msi.msi_handlers == 0)
3448 				pci_disable_msi(child);
3449 		} else {
3450 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3451 			    ("No MSI or MSI-X interrupts allocated"));
3452 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3453 			    ("MSI-X index too high"));
3454 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3455 			if (mte->mte_handlers == 0)
3456 				return (EINVAL);
3457 			mte->mte_handlers--;
3458 			if (mte->mte_handlers == 0)
3459 				pci_mask_msix(child, rid - 1);
3460 		}
3461 	}
3462 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3463 	if (rid > 0)
3464 		KASSERT(error == 0,
3465 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3466 	return (error);
3467 }
3468 
3469 int
3470 pci_print_child(device_t dev, device_t child)
3471 {
3472 	struct pci_devinfo *dinfo;
3473 	struct resource_list *rl;
3474 	int retval = 0;
3475 
3476 	dinfo = device_get_ivars(child);
3477 	rl = &dinfo->resources;
3478 
3479 	retval += bus_print_child_header(dev, child);
3480 
3481 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3482 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3483 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3484 	if (device_get_flags(dev))
3485 		retval += printf(" flags %#x", device_get_flags(dev));
3486 
3487 	retval += printf(" at device %d.%d", pci_get_slot(child),
3488 	    pci_get_function(child));
3489 
3490 	retval += bus_print_child_footer(dev, child);
3491 
3492 	return (retval);
3493 }
3494 
3495 static struct
3496 {
3497 	int	class;
3498 	int	subclass;
3499 	char	*desc;
3500 } pci_nomatch_tab[] = {
3501 	{PCIC_OLD,		-1,			"old"},
3502 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3503 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3504 	{PCIC_STORAGE,		-1,			"mass storage"},
3505 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3506 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3507 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3508 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3509 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3510 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3511 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3512 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3513 	{PCIC_NETWORK,		-1,			"network"},
3514 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3515 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3516 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3517 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3518 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3519 	{PCIC_DISPLAY,		-1,			"display"},
3520 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3521 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3522 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3523 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3524 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3525 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3526 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3527 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3528 	{PCIC_MEMORY,		-1,			"memory"},
3529 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3530 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3531 	{PCIC_BRIDGE,		-1,			"bridge"},
3532 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3533 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3534 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3535 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3536 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3537 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3538 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3539 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3540 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3541 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3542 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3543 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3544 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3545 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3546 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3547 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3548 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3549 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3550 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3551 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3552 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3553 	{PCIC_INPUTDEV,		-1,			"input device"},
3554 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3555 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3556 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3557 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3558 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3559 	{PCIC_DOCKING,		-1,			"docking station"},
3560 	{PCIC_PROCESSOR,	-1,			"processor"},
3561 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3562 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3563 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3564 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3565 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3566 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3567 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3568 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3569 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3570 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3571 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3572 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3573 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3574 	{PCIC_SATCOM,		-1,			"satellite communication"},
3575 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3576 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3577 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3578 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3579 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3580 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3581 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3582 	{PCIC_DASP,		-1,			"dasp"},
3583 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3584 	{0, 0,		NULL}
3585 };
3586 
3587 void
3588 pci_probe_nomatch(device_t dev, device_t child)
3589 {
3590 	int	i;
3591 	char	*cp, *scp, *device;
3592 
3593 	/*
3594 	 * Look for a listing for this device in a loaded device database.
3595 	 */
3596 	if ((device = pci_describe_device(child)) != NULL) {
3597 		device_printf(dev, "<%s>", device);
3598 		free(device, M_DEVBUF);
3599 	} else {
3600 		/*
3601 		 * Scan the class/subclass descriptions for a general
3602 		 * description.
3603 		 */
3604 		cp = "unknown";
3605 		scp = NULL;
3606 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3607 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3608 				if (pci_nomatch_tab[i].subclass == -1) {
3609 					cp = pci_nomatch_tab[i].desc;
3610 				} else if (pci_nomatch_tab[i].subclass ==
3611 				    pci_get_subclass(child)) {
3612 					scp = pci_nomatch_tab[i].desc;
3613 				}
3614 			}
3615 		}
3616 		device_printf(dev, "<%s%s%s>",
3617 		    cp ? cp : "",
3618 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3619 		    scp ? scp : "");
3620 	}
3621 	printf(" at device %d.%d (no driver attached)\n",
3622 	    pci_get_slot(child), pci_get_function(child));
3623 	pci_cfg_save(child, device_get_ivars(child), 1);
3624 	return;
3625 }
3626 
3627 /*
3628  * Parse the PCI device database, if loaded, and return a pointer to a
3629  * description of the device.
3630  *
3631  * The database is flat text formatted as follows:
3632  *
3633  * Any line not in a valid format is ignored.
3634  * Lines are terminated with newline '\n' characters.
3635  *
3636  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3637  * the vendor name.
3638  *
3639  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3640  * - devices cannot be listed without a corresponding VENDOR line.
3641  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3642  * another TAB, then the device name.
3643  */
3644 
3645 /*
3646  * Assuming (ptr) points to the beginning of a line in the database,
3647  * return the vendor or device and description of the next entry.
3648  * The value of (vendor) or (device) inappropriate for the entry type
3649  * is set to -1.  Returns nonzero at the end of the database.
3650  *
3651  * Note that this is slightly unrobust in the face of corrupt data;
3652  * we attempt to safeguard against this by spamming the end of the
3653  * database with a newline when we initialise.
3654  */
3655 static int
3656 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3657 {
3658 	char	*cp = *ptr;
3659 	int	left;
3660 
3661 	*device = -1;
3662 	*vendor = -1;
3663 	**desc = '\0';
3664 	for (;;) {
3665 		left = pci_vendordata_size - (cp - pci_vendordata);
3666 		if (left <= 0) {
3667 			*ptr = cp;
3668 			return(1);
3669 		}
3670 
3671 		/* vendor entry? */
3672 		if (*cp != '\t' &&
3673 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3674 			break;
3675 		/* device entry? */
3676 		if (*cp == '\t' &&
3677 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3678 			break;
3679 
3680 		/* skip to next line */
3681 		while (*cp != '\n' && left > 0) {
3682 			cp++;
3683 			left--;
3684 		}
3685 		if (*cp == '\n') {
3686 			cp++;
3687 			left--;
3688 		}
3689 	}
3690 	/* skip to next line */
3691 	while (*cp != '\n' && left > 0) {
3692 		cp++;
3693 		left--;
3694 	}
3695 	if (*cp == '\n' && left > 0)
3696 		cp++;
3697 	*ptr = cp;
3698 	return(0);
3699 }
3700 
3701 static char *
3702 pci_describe_device(device_t dev)
3703 {
3704 	int	vendor, device;
3705 	char	*desc, *vp, *dp, *line;
3706 
3707 	desc = vp = dp = NULL;
3708 
3709 	/*
3710 	 * If we have no vendor data, we can't do anything.
3711 	 */
3712 	if (pci_vendordata == NULL)
3713 		goto out;
3714 
3715 	/*
3716 	 * Scan the vendor data looking for this device
3717 	 */
3718 	line = pci_vendordata;
3719 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3720 		goto out;
3721 	for (;;) {
3722 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3723 			goto out;
3724 		if (vendor == pci_get_vendor(dev))
3725 			break;
3726 	}
3727 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3728 		goto out;
3729 	for (;;) {
3730 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3731 			*dp = 0;
3732 			break;
3733 		}
3734 		if (vendor != -1) {
3735 			*dp = 0;
3736 			break;
3737 		}
3738 		if (device == pci_get_device(dev))
3739 			break;
3740 	}
3741 	if (dp[0] == '\0')
3742 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3743 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3744 	    NULL)
3745 		sprintf(desc, "%s, %s", vp, dp);
3746  out:
3747 	if (vp != NULL)
3748 		free(vp, M_DEVBUF);
3749 	if (dp != NULL)
3750 		free(dp, M_DEVBUF);
3751 	return(desc);
3752 }
3753 
3754 int
3755 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3756 {
3757 	struct pci_devinfo *dinfo;
3758 	pcicfgregs *cfg;
3759 
3760 	dinfo = device_get_ivars(child);
3761 	cfg = &dinfo->cfg;
3762 
3763 	switch (which) {
3764 	case PCI_IVAR_ETHADDR:
3765 		/*
3766 		 * The generic accessor doesn't deal with failure, so
3767 		 * we set the return value, then return an error.
3768 		 */
3769 		*((uint8_t **) result) = NULL;
3770 		return (EINVAL);
3771 	case PCI_IVAR_SUBVENDOR:
3772 		*result = cfg->subvendor;
3773 		break;
3774 	case PCI_IVAR_SUBDEVICE:
3775 		*result = cfg->subdevice;
3776 		break;
3777 	case PCI_IVAR_VENDOR:
3778 		*result = cfg->vendor;
3779 		break;
3780 	case PCI_IVAR_DEVICE:
3781 		*result = cfg->device;
3782 		break;
3783 	case PCI_IVAR_DEVID:
3784 		*result = (cfg->device << 16) | cfg->vendor;
3785 		break;
3786 	case PCI_IVAR_CLASS:
3787 		*result = cfg->baseclass;
3788 		break;
3789 	case PCI_IVAR_SUBCLASS:
3790 		*result = cfg->subclass;
3791 		break;
3792 	case PCI_IVAR_PROGIF:
3793 		*result = cfg->progif;
3794 		break;
3795 	case PCI_IVAR_REVID:
3796 		*result = cfg->revid;
3797 		break;
3798 	case PCI_IVAR_INTPIN:
3799 		*result = cfg->intpin;
3800 		break;
3801 	case PCI_IVAR_IRQ:
3802 		*result = cfg->intline;
3803 		break;
3804 	case PCI_IVAR_DOMAIN:
3805 		*result = cfg->domain;
3806 		break;
3807 	case PCI_IVAR_BUS:
3808 		*result = cfg->bus;
3809 		break;
3810 	case PCI_IVAR_SLOT:
3811 		*result = cfg->slot;
3812 		break;
3813 	case PCI_IVAR_FUNCTION:
3814 		*result = cfg->func;
3815 		break;
3816 	case PCI_IVAR_CMDREG:
3817 		*result = cfg->cmdreg;
3818 		break;
3819 	case PCI_IVAR_CACHELNSZ:
3820 		*result = cfg->cachelnsz;
3821 		break;
3822 	case PCI_IVAR_MINGNT:
3823 		*result = cfg->mingnt;
3824 		break;
3825 	case PCI_IVAR_MAXLAT:
3826 		*result = cfg->maxlat;
3827 		break;
3828 	case PCI_IVAR_LATTIMER:
3829 		*result = cfg->lattimer;
3830 		break;
3831 	default:
3832 		return (ENOENT);
3833 	}
3834 	return (0);
3835 }
3836 
3837 int
3838 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3839 {
3840 	struct pci_devinfo *dinfo;
3841 
3842 	dinfo = device_get_ivars(child);
3843 
3844 	switch (which) {
3845 	case PCI_IVAR_INTPIN:
3846 		dinfo->cfg.intpin = value;
3847 		return (0);
3848 	case PCI_IVAR_ETHADDR:
3849 	case PCI_IVAR_SUBVENDOR:
3850 	case PCI_IVAR_SUBDEVICE:
3851 	case PCI_IVAR_VENDOR:
3852 	case PCI_IVAR_DEVICE:
3853 	case PCI_IVAR_DEVID:
3854 	case PCI_IVAR_CLASS:
3855 	case PCI_IVAR_SUBCLASS:
3856 	case PCI_IVAR_PROGIF:
3857 	case PCI_IVAR_REVID:
3858 	case PCI_IVAR_IRQ:
3859 	case PCI_IVAR_DOMAIN:
3860 	case PCI_IVAR_BUS:
3861 	case PCI_IVAR_SLOT:
3862 	case PCI_IVAR_FUNCTION:
3863 		return (EINVAL);	/* disallow for now */
3864 
3865 	default:
3866 		return (ENOENT);
3867 	}
3868 }
3869 
3870 #include "opt_ddb.h"
3871 #ifdef DDB
3872 #include <ddb/ddb.h>
3873 #include <sys/cons.h>
3874 
3875 /*
3876  * List resources based on pci map registers, used for within ddb
3877  */
3878 
3879 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3880 {
3881 	struct pci_devinfo *dinfo;
3882 	struct devlist *devlist_head;
3883 	struct pci_conf *p;
3884 	const char *name;
3885 	int i, error, none_count;
3886 
3887 	none_count = 0;
3888 	/* get the head of the device queue */
3889 	devlist_head = &pci_devq;
3890 
3891 	/*
3892 	 * Go through the list of devices and print out devices
3893 	 */
3894 	for (error = 0, i = 0,
3895 	     dinfo = STAILQ_FIRST(devlist_head);
3896 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3897 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3898 
3899 		/* Populate pd_name and pd_unit */
3900 		name = NULL;
3901 		if (dinfo->cfg.dev)
3902 			name = device_get_name(dinfo->cfg.dev);
3903 
3904 		p = &dinfo->conf;
3905 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3906 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3907 			(name && *name) ? name : "none",
3908 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3909 			none_count++,
3910 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3911 			p->pc_sel.pc_func, (p->pc_class << 16) |
3912 			(p->pc_subclass << 8) | p->pc_progif,
3913 			(p->pc_subdevice << 16) | p->pc_subvendor,
3914 			(p->pc_device << 16) | p->pc_vendor,
3915 			p->pc_revid, p->pc_hdr);
3916 	}
3917 }
3918 #endif /* DDB */
3919 
3920 static struct resource *
3921 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
3922     u_long start, u_long end, u_long count, u_int flags)
3923 {
3924 	struct pci_devinfo *dinfo = device_get_ivars(child);
3925 	struct resource_list *rl = &dinfo->resources;
3926 	struct resource_list_entry *rle;
3927 	struct resource *res;
3928 	struct pci_map *pm;
3929 	pci_addr_t map, testval;
3930 	int mapsize;
3931 
3932 	res = NULL;
3933 	pm = pci_find_bar(child, *rid);
3934 	if (pm != NULL) {
3935 		/* This is a BAR that we failed to allocate earlier. */
3936 		mapsize = pm->pm_size;
3937 		map = pm->pm_value;
3938 	} else {
3939 		/*
3940 		 * Weed out the bogons, and figure out how large the
3941 		 * BAR/map is.  BARs that read back 0 here are bogus
3942 		 * and unimplemented.  Note: atapci in legacy mode are
3943 		 * special and handled elsewhere in the code.  If you
3944 		 * have a atapci device in legacy mode and it fails
3945 		 * here, that other code is broken.
3946 		 */
3947 		pci_read_bar(child, *rid, &map, &testval);
3948 
3949 		/*
3950 		 * Determine the size of the BAR and ignore BARs with a size
3951 		 * of 0.  Device ROM BARs use a different mask value.
3952 		 */
3953 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
3954 			mapsize = pci_romsize(testval);
3955 		else
3956 			mapsize = pci_mapsize(testval);
3957 		if (mapsize == 0)
3958 			goto out;
3959 		pm = pci_add_bar(child, *rid, map, mapsize);
3960 	}
3961 
3962 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
3963 		if (type != SYS_RES_MEMORY) {
3964 			if (bootverbose)
3965 				device_printf(dev,
3966 				    "child %s requested type %d for rid %#x,"
3967 				    " but the BAR says it is an memio\n",
3968 				    device_get_nameunit(child), type, *rid);
3969 			goto out;
3970 		}
3971 	} else {
3972 		if (type != SYS_RES_IOPORT) {
3973 			if (bootverbose)
3974 				device_printf(dev,
3975 				    "child %s requested type %d for rid %#x,"
3976 				    " but the BAR says it is an ioport\n",
3977 				    device_get_nameunit(child), type, *rid);
3978 			goto out;
3979 		}
3980 	}
3981 
3982 	/*
3983 	 * For real BARs, we need to override the size that
3984 	 * the driver requests, because that's what the BAR
3985 	 * actually uses and we would otherwise have a
3986 	 * situation where we might allocate the excess to
3987 	 * another driver, which won't work.
3988 	 */
3989 	count = (pci_addr_t)1 << mapsize;
3990 	if (RF_ALIGNMENT(flags) < mapsize)
3991 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3992 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
3993 		flags |= RF_PREFETCHABLE;
3994 
3995 	/*
3996 	 * Allocate enough resource, and then write back the
3997 	 * appropriate BAR for that resource.
3998 	 */
3999 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
4000 	    start, end, count, flags & ~RF_ACTIVE);
4001 	if (res == NULL) {
4002 		device_printf(child,
4003 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4004 		    count, *rid, type, start, end);
4005 		goto out;
4006 	}
4007 	resource_list_add(rl, type, *rid, start, end, count);
4008 	rle = resource_list_find(rl, type, *rid);
4009 	if (rle == NULL)
4010 		panic("pci_reserve_map: unexpectedly can't find resource.");
4011 	rle->res = res;
4012 	rle->start = rman_get_start(res);
4013 	rle->end = rman_get_end(res);
4014 	rle->count = count;
4015 	rle->flags = RLE_RESERVED;
4016 	if (bootverbose)
4017 		device_printf(child,
4018 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4019 		    count, *rid, type, rman_get_start(res));
4020 	map = rman_get_start(res);
4021 	pci_write_bar(child, pm, map);
4022 out:;
4023 	return (res);
4024 }
4025 
4026 struct resource *
4027 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4028 		   u_long start, u_long end, u_long count, u_int flags)
4029 {
4030 	struct pci_devinfo *dinfo = device_get_ivars(child);
4031 	struct resource_list *rl = &dinfo->resources;
4032 	struct resource_list_entry *rle;
4033 	struct resource *res;
4034 	pcicfgregs *cfg = &dinfo->cfg;
4035 
4036 	if (device_get_parent(child) != dev)
4037 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4038 		    type, rid, start, end, count, flags));
4039 
4040 	/*
4041 	 * Perform lazy resource allocation
4042 	 */
4043 	switch (type) {
4044 	case SYS_RES_IRQ:
4045 		/*
4046 		 * Can't alloc legacy interrupt once MSI messages have
4047 		 * been allocated.
4048 		 */
4049 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4050 		    cfg->msix.msix_alloc > 0))
4051 			return (NULL);
4052 
4053 		/*
4054 		 * If the child device doesn't have an interrupt
4055 		 * routed and is deserving of an interrupt, try to
4056 		 * assign it one.
4057 		 */
4058 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4059 		    (cfg->intpin != 0))
4060 			pci_assign_interrupt(dev, child, 0);
4061 		break;
4062 	case SYS_RES_IOPORT:
4063 	case SYS_RES_MEMORY:
4064 #ifdef NEW_PCIB
4065 		/*
4066 		 * PCI-PCI bridge I/O window resources are not BARs.
4067 		 * For those allocations just pass the request up the
4068 		 * tree.
4069 		 */
4070 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4071 			switch (*rid) {
4072 			case PCIR_IOBASEL_1:
4073 			case PCIR_MEMBASE_1:
4074 			case PCIR_PMBASEL_1:
4075 				/*
4076 				 * XXX: Should we bother creating a resource
4077 				 * list entry?
4078 				 */
4079 				return (bus_generic_alloc_resource(dev, child,
4080 				    type, rid, start, end, count, flags));
4081 			}
4082 		}
4083 #endif
4084 		/* Reserve resources for this BAR if needed. */
4085 		rle = resource_list_find(rl, type, *rid);
4086 		if (rle == NULL) {
4087 			res = pci_reserve_map(dev, child, type, rid, start, end,
4088 			    count, flags);
4089 			if (res == NULL)
4090 				return (NULL);
4091 		}
4092 	}
4093 	return (resource_list_alloc(rl, dev, child, type, rid,
4094 	    start, end, count, flags));
4095 }
4096 
4097 int
4098 pci_activate_resource(device_t dev, device_t child, int type, int rid,
4099     struct resource *r)
4100 {
4101 	struct pci_devinfo *dinfo;
4102 	int error;
4103 
4104 	error = bus_generic_activate_resource(dev, child, type, rid, r);
4105 	if (error)
4106 		return (error);
4107 
4108 	/* Enable decoding in the command register when activating BARs. */
4109 	if (device_get_parent(child) == dev) {
4110 		/* Device ROMs need their decoding explicitly enabled. */
4111 		dinfo = device_get_ivars(child);
4112 		if (PCIR_IS_BIOS(&dinfo->cfg, rid))
4113 			pci_write_bar(child, pci_find_bar(child, rid),
4114 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4115 		switch (type) {
4116 		case SYS_RES_IOPORT:
4117 		case SYS_RES_MEMORY:
4118 			error = PCI_ENABLE_IO(dev, child, type);
4119 			break;
4120 		}
4121 	}
4122 	return (error);
4123 }
4124 
4125 int
4126 pci_deactivate_resource(device_t dev, device_t child, int type,
4127     int rid, struct resource *r)
4128 {
4129 	struct pci_devinfo *dinfo;
4130 	int error;
4131 
4132 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4133 	if (error)
4134 		return (error);
4135 
4136 	/* Disable decoding for device ROMs. */
4137 	if (device_get_parent(child) == dev) {
4138 		dinfo = device_get_ivars(child);
4139 		if (PCIR_IS_BIOS(&dinfo->cfg, rid))
4140 			pci_write_bar(child, pci_find_bar(child, rid),
4141 			    rman_get_start(r));
4142 	}
4143 	return (0);
4144 }
4145 
4146 void
4147 pci_delete_child(device_t dev, device_t child)
4148 {
4149 	struct resource_list_entry *rle;
4150 	struct resource_list *rl;
4151 	struct pci_devinfo *dinfo;
4152 
4153 	dinfo = device_get_ivars(child);
4154 	rl = &dinfo->resources;
4155 
4156 	if (device_is_attached(child))
4157 		device_detach(child);
4158 
4159 	/* Turn off access to resources we're about to free */
4160 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4161 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4162 
4163 	/* Free all allocated resources */
4164 	STAILQ_FOREACH(rle, rl, link) {
4165 		if (rle->res) {
4166 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4167 			    resource_list_busy(rl, rle->type, rle->rid)) {
4168 				pci_printf(&dinfo->cfg,
4169 				    "Resource still owned, oops. "
4170 				    "(type=%d, rid=%d, addr=%lx)\n",
4171 				    rle->type, rle->rid,
4172 				    rman_get_start(rle->res));
4173 				bus_release_resource(child, rle->type, rle->rid,
4174 				    rle->res);
4175 			}
4176 			resource_list_unreserve(rl, dev, child, rle->type,
4177 			    rle->rid);
4178 		}
4179 	}
4180 	resource_list_free(rl);
4181 
4182 	device_delete_child(dev, child);
4183 	pci_freecfg(dinfo);
4184 }
4185 
4186 void
4187 pci_delete_resource(device_t dev, device_t child, int type, int rid)
4188 {
4189 	struct pci_devinfo *dinfo;
4190 	struct resource_list *rl;
4191 	struct resource_list_entry *rle;
4192 
4193 	if (device_get_parent(child) != dev)
4194 		return;
4195 
4196 	dinfo = device_get_ivars(child);
4197 	rl = &dinfo->resources;
4198 	rle = resource_list_find(rl, type, rid);
4199 	if (rle == NULL)
4200 		return;
4201 
4202 	if (rle->res) {
4203 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4204 		    resource_list_busy(rl, type, rid)) {
4205 			device_printf(dev, "delete_resource: "
4206 			    "Resource still owned by child, oops. "
4207 			    "(type=%d, rid=%d, addr=%lx)\n",
4208 			    type, rid, rman_get_start(rle->res));
4209 			return;
4210 		}
4211 
4212 #ifndef __PCI_BAR_ZERO_VALID
4213 		/*
4214 		 * If this is a BAR, clear the BAR so it stops
4215 		 * decoding before releasing the resource.
4216 		 */
4217 		switch (type) {
4218 		case SYS_RES_IOPORT:
4219 		case SYS_RES_MEMORY:
4220 			pci_write_bar(child, pci_find_bar(child, rid), 0);
4221 			break;
4222 		}
4223 #endif
4224 		resource_list_unreserve(rl, dev, child, type, rid);
4225 	}
4226 	resource_list_delete(rl, type, rid);
4227 }
4228 
4229 struct resource_list *
4230 pci_get_resource_list (device_t dev, device_t child)
4231 {
4232 	struct pci_devinfo *dinfo = device_get_ivars(child);
4233 
4234 	return (&dinfo->resources);
4235 }
4236 
4237 uint32_t
4238 pci_read_config_method(device_t dev, device_t child, int reg, int width)
4239 {
4240 	struct pci_devinfo *dinfo = device_get_ivars(child);
4241 	pcicfgregs *cfg = &dinfo->cfg;
4242 
4243 	return (PCIB_READ_CONFIG(device_get_parent(dev),
4244 	    cfg->bus, cfg->slot, cfg->func, reg, width));
4245 }
4246 
4247 void
4248 pci_write_config_method(device_t dev, device_t child, int reg,
4249     uint32_t val, int width)
4250 {
4251 	struct pci_devinfo *dinfo = device_get_ivars(child);
4252 	pcicfgregs *cfg = &dinfo->cfg;
4253 
4254 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4255 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4256 }
4257 
4258 int
4259 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4260     size_t buflen)
4261 {
4262 
4263 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4264 	    pci_get_function(child));
4265 	return (0);
4266 }
4267 
4268 int
4269 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4270     size_t buflen)
4271 {
4272 	struct pci_devinfo *dinfo;
4273 	pcicfgregs *cfg;
4274 
4275 	dinfo = device_get_ivars(child);
4276 	cfg = &dinfo->cfg;
4277 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4278 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4279 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4280 	    cfg->progif);
4281 	return (0);
4282 }
4283 
4284 int
4285 pci_assign_interrupt_method(device_t dev, device_t child)
4286 {
4287 	struct pci_devinfo *dinfo = device_get_ivars(child);
4288 	pcicfgregs *cfg = &dinfo->cfg;
4289 
4290 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4291 	    cfg->intpin));
4292 }
4293 
4294 static int
4295 pci_modevent(module_t mod, int what, void *arg)
4296 {
4297 	static struct cdev *pci_cdev;
4298 
4299 	switch (what) {
4300 	case MOD_LOAD:
4301 		STAILQ_INIT(&pci_devq);
4302 		pci_generation = 0;
4303 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4304 		    "pci");
4305 		pci_load_vendor_data();
4306 		break;
4307 
4308 	case MOD_UNLOAD:
4309 		destroy_dev(pci_cdev);
4310 		break;
4311 	}
4312 
4313 	return (0);
4314 }
4315 
4316 void
4317 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4318 {
4319 
4320 	/*
4321 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4322 	 * which we know need special treatment.  Type 2 devices are
4323 	 * cardbus bridges which also require special treatment.
4324 	 * Other types are unknown, and we err on the side of safety
4325 	 * by ignoring them.
4326 	 */
4327 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4328 		return;
4329 
4330 	/*
4331 	 * Restore the device to full power mode.  We must do this
4332 	 * before we restore the registers because moving from D3 to
4333 	 * D0 will cause the chip's BARs and some other registers to
4334 	 * be reset to some unknown power on reset values.  Cut down
4335 	 * the noise on boot by doing nothing if we are already in
4336 	 * state D0.
4337 	 */
4338 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4339 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4340 	pci_restore_bars(dev);
4341 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4342 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4343 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4344 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4345 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4346 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4347 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4348 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4349 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4350 
4351 	/* Restore MSI and MSI-X configurations if they are present. */
4352 	if (dinfo->cfg.msi.msi_location != 0)
4353 		pci_resume_msi(dev);
4354 	if (dinfo->cfg.msix.msix_location != 0)
4355 		pci_resume_msix(dev);
4356 }
4357 
4358 void
4359 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4360 {
4361 	uint32_t cls;
4362 	int ps;
4363 
4364 	/*
4365 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4366 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4367 	 * which also require special treatment.  Other types are unknown, and
4368 	 * we err on the side of safety by ignoring them.  Powering down
4369 	 * bridges should not be undertaken lightly.
4370 	 */
4371 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4372 		return;
4373 
4374 	/*
4375 	 * Some drivers apparently write to these registers w/o updating our
4376 	 * cached copy.  No harm happens if we update the copy, so do so here
4377 	 * so we can restore them.  The COMMAND register is modified by the
4378 	 * bus w/o updating the cache.  This should represent the normally
4379 	 * writable portion of the 'defined' part of type 0 headers.  In
4380 	 * theory we also need to save/restore the PCI capability structures
4381 	 * we know about, but apart from power we don't know any that are
4382 	 * writable.
4383 	 */
4384 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4385 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4386 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4387 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4388 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4389 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4390 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4391 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4392 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4393 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4394 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4395 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4396 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4397 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4398 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4399 
4400 	/*
4401 	 * don't set the state for display devices, base peripherals and
4402 	 * memory devices since bad things happen when they are powered down.
4403 	 * We should (a) have drivers that can easily detach and (b) use
4404 	 * generic drivers for these devices so that some device actually
4405 	 * attaches.  We need to make sure that when we implement (a) we don't
4406 	 * power the device down on a reattach.
4407 	 */
4408 	cls = pci_get_class(dev);
4409 	if (!setstate)
4410 		return;
4411 	switch (pci_do_power_nodriver)
4412 	{
4413 		case 0:		/* NO powerdown at all */
4414 			return;
4415 		case 1:		/* Conservative about what to power down */
4416 			if (cls == PCIC_STORAGE)
4417 				return;
4418 			/*FALLTHROUGH*/
4419 		case 2:		/* Agressive about what to power down */
4420 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4421 			    cls == PCIC_BASEPERIPH)
4422 				return;
4423 			/*FALLTHROUGH*/
4424 		case 3:		/* Power down everything */
4425 			break;
4426 	}
4427 	/*
4428 	 * PCI spec says we can only go into D3 state from D0 state.
4429 	 * Transition from D[12] into D0 before going to D3 state.
4430 	 */
4431 	ps = pci_get_powerstate(dev);
4432 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4433 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4434 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4435 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4436 }
4437