xref: /freebsd/sys/dev/pci/pci.c (revision 0e1497aefd602cea581d2380d22e67dfdcac6b4e)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 
56 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57 #include <machine/intr_machdep.h>
58 #endif
59 
60 #include <sys/pciio.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/pci/pci_private.h>
64 
65 #include <dev/usb/controller/ehcireg.h>
66 #include <dev/usb/controller/ohcireg.h>
67 #include <dev/usb/controller/uhcireg.h>
68 
69 #include "pcib_if.h"
70 #include "pci_if.h"
71 
72 static pci_addr_t	pci_mapbase(uint64_t mapreg);
73 static const char	*pci_maptype(uint64_t mapreg);
74 static int		pci_mapsize(uint64_t testval);
75 static int		pci_maprange(uint64_t mapreg);
76 static pci_addr_t	pci_rombase(uint64_t mapreg);
77 static int		pci_romsize(uint64_t testval);
78 static void		pci_fixancient(pcicfgregs *cfg);
79 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
80 
81 static int		pci_porten(device_t dev);
82 static int		pci_memen(device_t dev);
83 static void		pci_assign_interrupt(device_t bus, device_t dev,
84 			    int force_route);
85 static int		pci_add_map(device_t bus, device_t dev, int reg,
86 			    struct resource_list *rl, int force, int prefetch);
87 static int		pci_probe(device_t dev);
88 static int		pci_attach(device_t dev);
89 static void		pci_load_vendor_data(void);
90 static int		pci_describe_parse_line(char **ptr, int *vendor,
91 			    int *device, char **desc);
92 static char		*pci_describe_device(device_t dev);
93 static int		pci_modevent(module_t mod, int what, void *arg);
94 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
95 			    pcicfgregs *cfg);
96 static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
97 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
98 			    int reg, uint32_t *data);
99 #if 0
100 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
101 			    int reg, uint32_t data);
102 #endif
103 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
104 static void		pci_disable_msi(device_t dev);
105 static void		pci_enable_msi(device_t dev, uint64_t address,
106 			    uint16_t data);
107 static void		pci_enable_msix(device_t dev, u_int index,
108 			    uint64_t address, uint32_t data);
109 static void		pci_mask_msix(device_t dev, u_int index);
110 static void		pci_unmask_msix(device_t dev, u_int index);
111 static int		pci_msi_blacklisted(void);
112 static void		pci_resume_msi(device_t dev);
113 static void		pci_resume_msix(device_t dev);
114 static int		pci_remap_intr_method(device_t bus, device_t dev,
115 			    u_int irq);
116 
117 static device_method_t pci_methods[] = {
118 	/* Device interface */
119 	DEVMETHOD(device_probe,		pci_probe),
120 	DEVMETHOD(device_attach,	pci_attach),
121 	DEVMETHOD(device_detach,	bus_generic_detach),
122 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
123 	DEVMETHOD(device_suspend,	pci_suspend),
124 	DEVMETHOD(device_resume,	pci_resume),
125 
126 	/* Bus interface */
127 	DEVMETHOD(bus_print_child,	pci_print_child),
128 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
129 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
130 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
131 	DEVMETHOD(bus_driver_added,	pci_driver_added),
132 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
133 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
134 
135 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
136 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
137 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
138 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
139 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
140 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
141 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
142 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
143 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
144 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
145 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
146 
147 	/* PCI interface */
148 	DEVMETHOD(pci_read_config,	pci_read_config_method),
149 	DEVMETHOD(pci_write_config,	pci_write_config_method),
150 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
151 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
152 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
153 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
154 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
155 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
156 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
157 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
158 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
159 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
160 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
161 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
162 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
163 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
164 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
165 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
166 
167 	{ 0, 0 }
168 };
169 
170 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
171 
172 static devclass_t pci_devclass;
173 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
174 MODULE_VERSION(pci, 1);
175 
176 static char	*pci_vendordata;
177 static size_t	pci_vendordata_size;
178 
179 
180 struct pci_quirk {
181 	uint32_t devid;	/* Vendor/device of the card */
182 	int	type;
183 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
184 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
185 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
186 	int	arg1;
187 	int	arg2;
188 };
189 
190 struct pci_quirk pci_quirks[] = {
191 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
192 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
193 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
194 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
195 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
196 
197 	/*
198 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
199 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
200 	 */
201 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
202 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
203 
204 	/*
205 	 * MSI doesn't work on earlier Intel chipsets including
206 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
207 	 */
208 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
210 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
211 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
212 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
213 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
214 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
215 
216 	/*
217 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
218 	 * bridge.
219 	 */
220 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
221 
222 	/*
223 	 * Some virtualization environments emulate an older chipset
224 	 * but support MSI just fine.  QEMU uses the Intel 82440.
225 	 */
226 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
227 
228 	{ 0 }
229 };
230 
231 /* map register information */
232 #define	PCI_MAPMEM	0x01	/* memory map */
233 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
234 #define	PCI_MAPPORT	0x04	/* port map */
235 
236 struct devlist pci_devq;
237 uint32_t pci_generation;
238 uint32_t pci_numdevs = 0;
239 static int pcie_chipset, pcix_chipset;
240 
241 /* sysctl vars */
242 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
243 
244 static int pci_enable_io_modes = 1;
245 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
246 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
247     &pci_enable_io_modes, 1,
248     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
249 enable these bits correctly.  We'd like to do this all the time, but there\n\
250 are some peripherals that this causes problems with.");
251 
252 static int pci_do_power_nodriver = 0;
253 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
254 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
255     &pci_do_power_nodriver, 0,
256   "Place a function into D3 state when no driver attaches to it.  0 means\n\
257 disable.  1 means conservatively place devices into D3 state.  2 means\n\
258 agressively place devices into D3 state.  3 means put absolutely everything\n\
259 in D3 state.");
260 
261 int pci_do_power_resume = 1;
262 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
263 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
264     &pci_do_power_resume, 1,
265   "Transition from D3 -> D0 on resume.");
266 
267 int pci_do_power_suspend = 1;
268 TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
269 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
270     &pci_do_power_suspend, 1,
271   "Transition from D0 -> D3 on suspend.");
272 
273 static int pci_do_msi = 1;
274 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
275 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
276     "Enable support for MSI interrupts");
277 
278 static int pci_do_msix = 1;
279 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
280 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
281     "Enable support for MSI-X interrupts");
282 
283 static int pci_honor_msi_blacklist = 1;
284 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
285 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
286     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
287 
288 #if defined(__i386__) || defined(__amd64__)
289 static int pci_usb_takeover = 1;
290 #else
291 static int pci_usb_takeover = 0;
292 #endif
293 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
294 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RD | CTLFLAG_TUN,
295     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
296 Disable this if you depend on BIOS emulation of USB devices, that is\n\
297 you use USB devices (like keyboard or mouse) but do not load USB drivers");
298 
299 /* Find a device_t by bus/slot/function in domain 0 */
300 
301 device_t
302 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
303 {
304 
305 	return (pci_find_dbsf(0, bus, slot, func));
306 }
307 
308 /* Find a device_t by domain/bus/slot/function */
309 
310 device_t
311 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
312 {
313 	struct pci_devinfo *dinfo;
314 
315 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
316 		if ((dinfo->cfg.domain == domain) &&
317 		    (dinfo->cfg.bus == bus) &&
318 		    (dinfo->cfg.slot == slot) &&
319 		    (dinfo->cfg.func == func)) {
320 			return (dinfo->cfg.dev);
321 		}
322 	}
323 
324 	return (NULL);
325 }
326 
327 /* Find a device_t by vendor/device ID */
328 
329 device_t
330 pci_find_device(uint16_t vendor, uint16_t device)
331 {
332 	struct pci_devinfo *dinfo;
333 
334 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
335 		if ((dinfo->cfg.vendor == vendor) &&
336 		    (dinfo->cfg.device == device)) {
337 			return (dinfo->cfg.dev);
338 		}
339 	}
340 
341 	return (NULL);
342 }
343 
344 static int
345 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
346 {
347 	va_list ap;
348 	int retval;
349 
350 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
351 	    cfg->func);
352 	va_start(ap, fmt);
353 	retval += vprintf(fmt, ap);
354 	va_end(ap);
355 	return (retval);
356 }
357 
358 /* return base address of memory or port map */
359 
360 static pci_addr_t
361 pci_mapbase(uint64_t mapreg)
362 {
363 
364 	if (PCI_BAR_MEM(mapreg))
365 		return (mapreg & PCIM_BAR_MEM_BASE);
366 	else
367 		return (mapreg & PCIM_BAR_IO_BASE);
368 }
369 
370 /* return map type of memory or port map */
371 
372 static const char *
373 pci_maptype(uint64_t mapreg)
374 {
375 
376 	if (PCI_BAR_IO(mapreg))
377 		return ("I/O Port");
378 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
379 		return ("Prefetchable Memory");
380 	return ("Memory");
381 }
382 
383 /* return log2 of map size decoded for memory or port map */
384 
385 static int
386 pci_mapsize(uint64_t testval)
387 {
388 	int ln2size;
389 
390 	testval = pci_mapbase(testval);
391 	ln2size = 0;
392 	if (testval != 0) {
393 		while ((testval & 1) == 0)
394 		{
395 			ln2size++;
396 			testval >>= 1;
397 		}
398 	}
399 	return (ln2size);
400 }
401 
402 /* return base address of device ROM */
403 
404 static pci_addr_t
405 pci_rombase(uint64_t mapreg)
406 {
407 
408 	return (mapreg & PCIM_BIOS_ADDR_MASK);
409 }
410 
411 /* return log2 of map size decided for device ROM */
412 
413 static int
414 pci_romsize(uint64_t testval)
415 {
416 	int ln2size;
417 
418 	testval = pci_rombase(testval);
419 	ln2size = 0;
420 	if (testval != 0) {
421 		while ((testval & 1) == 0)
422 		{
423 			ln2size++;
424 			testval >>= 1;
425 		}
426 	}
427 	return (ln2size);
428 }
429 
430 /* return log2 of address range supported by map register */
431 
432 static int
433 pci_maprange(uint64_t mapreg)
434 {
435 	int ln2range = 0;
436 
437 	if (PCI_BAR_IO(mapreg))
438 		ln2range = 32;
439 	else
440 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
441 		case PCIM_BAR_MEM_32:
442 			ln2range = 32;
443 			break;
444 		case PCIM_BAR_MEM_1MB:
445 			ln2range = 20;
446 			break;
447 		case PCIM_BAR_MEM_64:
448 			ln2range = 64;
449 			break;
450 		}
451 	return (ln2range);
452 }
453 
454 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
455 
456 static void
457 pci_fixancient(pcicfgregs *cfg)
458 {
459 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
460 		return;
461 
462 	/* PCI to PCI bridges use header type 1 */
463 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
464 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
465 }
466 
467 /* extract header type specific config data */
468 
469 static void
470 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
471 {
472 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
473 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
474 	case PCIM_HDRTYPE_NORMAL:
475 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
476 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
477 		cfg->nummaps	    = PCI_MAXMAPS_0;
478 		break;
479 	case PCIM_HDRTYPE_BRIDGE:
480 		cfg->nummaps	    = PCI_MAXMAPS_1;
481 		break;
482 	case PCIM_HDRTYPE_CARDBUS:
483 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
484 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
485 		cfg->nummaps	    = PCI_MAXMAPS_2;
486 		break;
487 	}
488 #undef REG
489 }
490 
491 /* read configuration header into pcicfgregs structure */
492 struct pci_devinfo *
493 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
494 {
495 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
496 	pcicfgregs *cfg = NULL;
497 	struct pci_devinfo *devlist_entry;
498 	struct devlist *devlist_head;
499 
500 	devlist_head = &pci_devq;
501 
502 	devlist_entry = NULL;
503 
504 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
505 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
506 		if (devlist_entry == NULL)
507 			return (NULL);
508 
509 		cfg = &devlist_entry->cfg;
510 
511 		cfg->domain		= d;
512 		cfg->bus		= b;
513 		cfg->slot		= s;
514 		cfg->func		= f;
515 		cfg->vendor		= REG(PCIR_VENDOR, 2);
516 		cfg->device		= REG(PCIR_DEVICE, 2);
517 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
518 		cfg->statreg		= REG(PCIR_STATUS, 2);
519 		cfg->baseclass		= REG(PCIR_CLASS, 1);
520 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
521 		cfg->progif		= REG(PCIR_PROGIF, 1);
522 		cfg->revid		= REG(PCIR_REVID, 1);
523 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
524 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
525 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
526 		cfg->intpin		= REG(PCIR_INTPIN, 1);
527 		cfg->intline		= REG(PCIR_INTLINE, 1);
528 
529 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
530 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
531 
532 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
533 		cfg->hdrtype		&= ~PCIM_MFDEV;
534 
535 		pci_fixancient(cfg);
536 		pci_hdrtypedata(pcib, b, s, f, cfg);
537 
538 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
539 			pci_read_extcap(pcib, cfg);
540 
541 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
542 
543 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
544 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
545 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
546 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
547 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
548 
549 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
550 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
551 		devlist_entry->conf.pc_vendor = cfg->vendor;
552 		devlist_entry->conf.pc_device = cfg->device;
553 
554 		devlist_entry->conf.pc_class = cfg->baseclass;
555 		devlist_entry->conf.pc_subclass = cfg->subclass;
556 		devlist_entry->conf.pc_progif = cfg->progif;
557 		devlist_entry->conf.pc_revid = cfg->revid;
558 
559 		pci_numdevs++;
560 		pci_generation++;
561 	}
562 	return (devlist_entry);
563 #undef REG
564 }
565 
566 static void
567 pci_read_extcap(device_t pcib, pcicfgregs *cfg)
568 {
569 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
570 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
571 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
572 	uint64_t addr;
573 #endif
574 	uint32_t val;
575 	int	ptr, nextptr, ptrptr;
576 
577 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
578 	case PCIM_HDRTYPE_NORMAL:
579 	case PCIM_HDRTYPE_BRIDGE:
580 		ptrptr = PCIR_CAP_PTR;
581 		break;
582 	case PCIM_HDRTYPE_CARDBUS:
583 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
584 		break;
585 	default:
586 		return;		/* no extended capabilities support */
587 	}
588 	nextptr = REG(ptrptr, 1);	/* sanity check? */
589 
590 	/*
591 	 * Read capability entries.
592 	 */
593 	while (nextptr != 0) {
594 		/* Sanity check */
595 		if (nextptr > 255) {
596 			printf("illegal PCI extended capability offset %d\n",
597 			    nextptr);
598 			return;
599 		}
600 		/* Find the next entry */
601 		ptr = nextptr;
602 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
603 
604 		/* Process this entry */
605 		switch (REG(ptr + PCICAP_ID, 1)) {
606 		case PCIY_PMG:		/* PCI power management */
607 			if (cfg->pp.pp_cap == 0) {
608 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
609 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
610 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
611 				if ((nextptr - ptr) > PCIR_POWER_DATA)
612 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
613 			}
614 			break;
615 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
616 		case PCIY_HT:		/* HyperTransport */
617 			/* Determine HT-specific capability type. */
618 			val = REG(ptr + PCIR_HT_COMMAND, 2);
619 			switch (val & PCIM_HTCMD_CAP_MASK) {
620 			case PCIM_HTCAP_MSI_MAPPING:
621 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
622 					/* Sanity check the mapping window. */
623 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
624 					    4);
625 					addr <<= 32;
626 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
627 					    4);
628 					if (addr != MSI_INTEL_ADDR_BASE)
629 						device_printf(pcib,
630 	    "HT Bridge at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
631 						    cfg->domain, cfg->bus,
632 						    cfg->slot, cfg->func,
633 						    (long long)addr);
634 				} else
635 					addr = MSI_INTEL_ADDR_BASE;
636 
637 				cfg->ht.ht_msimap = ptr;
638 				cfg->ht.ht_msictrl = val;
639 				cfg->ht.ht_msiaddr = addr;
640 				break;
641 			}
642 			break;
643 #endif
644 		case PCIY_MSI:		/* PCI MSI */
645 			cfg->msi.msi_location = ptr;
646 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
647 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
648 						     PCIM_MSICTRL_MMC_MASK)>>1);
649 			break;
650 		case PCIY_MSIX:		/* PCI MSI-X */
651 			cfg->msix.msix_location = ptr;
652 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
653 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
654 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
655 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
656 			cfg->msix.msix_table_bar = PCIR_BAR(val &
657 			    PCIM_MSIX_BIR_MASK);
658 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
659 			val = REG(ptr + PCIR_MSIX_PBA, 4);
660 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
661 			    PCIM_MSIX_BIR_MASK);
662 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
663 			break;
664 		case PCIY_VPD:		/* PCI Vital Product Data */
665 			cfg->vpd.vpd_reg = ptr;
666 			break;
667 		case PCIY_SUBVENDOR:
668 			/* Should always be true. */
669 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
670 			    PCIM_HDRTYPE_BRIDGE) {
671 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
672 				cfg->subvendor = val & 0xffff;
673 				cfg->subdevice = val >> 16;
674 			}
675 			break;
676 		case PCIY_PCIX:		/* PCI-X */
677 			/*
678 			 * Assume we have a PCI-X chipset if we have
679 			 * at least one PCI-PCI bridge with a PCI-X
680 			 * capability.  Note that some systems with
681 			 * PCI-express or HT chipsets might match on
682 			 * this check as well.
683 			 */
684 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
685 			    PCIM_HDRTYPE_BRIDGE)
686 				pcix_chipset = 1;
687 			break;
688 		case PCIY_EXPRESS:	/* PCI-express */
689 			/*
690 			 * Assume we have a PCI-express chipset if we have
691 			 * at least one PCI-express device.
692 			 */
693 			pcie_chipset = 1;
694 			break;
695 		default:
696 			break;
697 		}
698 	}
699 /* REG and WREG use carry through to next functions */
700 }
701 
702 /*
703  * PCI Vital Product Data
704  */
705 
706 #define	PCI_VPD_TIMEOUT		1000000
707 
708 static int
709 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
710 {
711 	int count = PCI_VPD_TIMEOUT;
712 
713 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
714 
715 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
716 
717 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
718 		if (--count < 0)
719 			return (ENXIO);
720 		DELAY(1);	/* limit looping */
721 	}
722 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
723 
724 	return (0);
725 }
726 
727 #if 0
728 static int
729 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
730 {
731 	int count = PCI_VPD_TIMEOUT;
732 
733 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
734 
735 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
736 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
737 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
738 		if (--count < 0)
739 			return (ENXIO);
740 		DELAY(1);	/* limit looping */
741 	}
742 
743 	return (0);
744 }
745 #endif
746 
747 #undef PCI_VPD_TIMEOUT
748 
749 struct vpd_readstate {
750 	device_t	pcib;
751 	pcicfgregs	*cfg;
752 	uint32_t	val;
753 	int		bytesinval;
754 	int		off;
755 	uint8_t		cksum;
756 };
757 
758 static int
759 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
760 {
761 	uint32_t reg;
762 	uint8_t byte;
763 
764 	if (vrs->bytesinval == 0) {
765 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
766 			return (ENXIO);
767 		vrs->val = le32toh(reg);
768 		vrs->off += 4;
769 		byte = vrs->val & 0xff;
770 		vrs->bytesinval = 3;
771 	} else {
772 		vrs->val = vrs->val >> 8;
773 		byte = vrs->val & 0xff;
774 		vrs->bytesinval--;
775 	}
776 
777 	vrs->cksum += byte;
778 	*data = byte;
779 	return (0);
780 }
781 
782 static void
783 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
784 {
785 	struct vpd_readstate vrs;
786 	int state;
787 	int name;
788 	int remain;
789 	int i;
790 	int alloc, off;		/* alloc/off for RO/W arrays */
791 	int cksumvalid;
792 	int dflen;
793 	uint8_t byte;
794 	uint8_t byte2;
795 
796 	/* init vpd reader */
797 	vrs.bytesinval = 0;
798 	vrs.off = 0;
799 	vrs.pcib = pcib;
800 	vrs.cfg = cfg;
801 	vrs.cksum = 0;
802 
803 	state = 0;
804 	name = remain = i = 0;	/* shut up stupid gcc */
805 	alloc = off = 0;	/* shut up stupid gcc */
806 	dflen = 0;		/* shut up stupid gcc */
807 	cksumvalid = -1;
808 	while (state >= 0) {
809 		if (vpd_nextbyte(&vrs, &byte)) {
810 			state = -2;
811 			break;
812 		}
813 #if 0
814 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
815 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
816 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
817 #endif
818 		switch (state) {
819 		case 0:		/* item name */
820 			if (byte & 0x80) {
821 				if (vpd_nextbyte(&vrs, &byte2)) {
822 					state = -2;
823 					break;
824 				}
825 				remain = byte2;
826 				if (vpd_nextbyte(&vrs, &byte2)) {
827 					state = -2;
828 					break;
829 				}
830 				remain |= byte2 << 8;
831 				if (remain > (0x7f*4 - vrs.off)) {
832 					state = -1;
833 					printf(
834 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
835 					    cfg->domain, cfg->bus, cfg->slot,
836 					    cfg->func, remain);
837 				}
838 				name = byte & 0x7f;
839 			} else {
840 				remain = byte & 0x7;
841 				name = (byte >> 3) & 0xf;
842 			}
843 			switch (name) {
844 			case 0x2:	/* String */
845 				cfg->vpd.vpd_ident = malloc(remain + 1,
846 				    M_DEVBUF, M_WAITOK);
847 				i = 0;
848 				state = 1;
849 				break;
850 			case 0xf:	/* End */
851 				state = -1;
852 				break;
853 			case 0x10:	/* VPD-R */
854 				alloc = 8;
855 				off = 0;
856 				cfg->vpd.vpd_ros = malloc(alloc *
857 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
858 				    M_WAITOK | M_ZERO);
859 				state = 2;
860 				break;
861 			case 0x11:	/* VPD-W */
862 				alloc = 8;
863 				off = 0;
864 				cfg->vpd.vpd_w = malloc(alloc *
865 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
866 				    M_WAITOK | M_ZERO);
867 				state = 5;
868 				break;
869 			default:	/* Invalid data, abort */
870 				state = -1;
871 				break;
872 			}
873 			break;
874 
875 		case 1:	/* Identifier String */
876 			cfg->vpd.vpd_ident[i++] = byte;
877 			remain--;
878 			if (remain == 0)  {
879 				cfg->vpd.vpd_ident[i] = '\0';
880 				state = 0;
881 			}
882 			break;
883 
884 		case 2:	/* VPD-R Keyword Header */
885 			if (off == alloc) {
886 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
887 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
888 				    M_DEVBUF, M_WAITOK | M_ZERO);
889 			}
890 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
891 			if (vpd_nextbyte(&vrs, &byte2)) {
892 				state = -2;
893 				break;
894 			}
895 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
896 			if (vpd_nextbyte(&vrs, &byte2)) {
897 				state = -2;
898 				break;
899 			}
900 			dflen = byte2;
901 			if (dflen == 0 &&
902 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
903 			    2) == 0) {
904 				/*
905 				 * if this happens, we can't trust the rest
906 				 * of the VPD.
907 				 */
908 				printf(
909 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
910 				    cfg->domain, cfg->bus, cfg->slot,
911 				    cfg->func, dflen);
912 				cksumvalid = 0;
913 				state = -1;
914 				break;
915 			} else if (dflen == 0) {
916 				cfg->vpd.vpd_ros[off].value = malloc(1 *
917 				    sizeof(*cfg->vpd.vpd_ros[off].value),
918 				    M_DEVBUF, M_WAITOK);
919 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
920 			} else
921 				cfg->vpd.vpd_ros[off].value = malloc(
922 				    (dflen + 1) *
923 				    sizeof(*cfg->vpd.vpd_ros[off].value),
924 				    M_DEVBUF, M_WAITOK);
925 			remain -= 3;
926 			i = 0;
927 			/* keep in sync w/ state 3's transistions */
928 			if (dflen == 0 && remain == 0)
929 				state = 0;
930 			else if (dflen == 0)
931 				state = 2;
932 			else
933 				state = 3;
934 			break;
935 
936 		case 3:	/* VPD-R Keyword Value */
937 			cfg->vpd.vpd_ros[off].value[i++] = byte;
938 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
939 			    "RV", 2) == 0 && cksumvalid == -1) {
940 				if (vrs.cksum == 0)
941 					cksumvalid = 1;
942 				else {
943 					if (bootverbose)
944 						printf(
945 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
946 						    cfg->domain, cfg->bus,
947 						    cfg->slot, cfg->func,
948 						    vrs.cksum);
949 					cksumvalid = 0;
950 					state = -1;
951 					break;
952 				}
953 			}
954 			dflen--;
955 			remain--;
956 			/* keep in sync w/ state 2's transistions */
957 			if (dflen == 0)
958 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
959 			if (dflen == 0 && remain == 0) {
960 				cfg->vpd.vpd_rocnt = off;
961 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
962 				    off * sizeof(*cfg->vpd.vpd_ros),
963 				    M_DEVBUF, M_WAITOK | M_ZERO);
964 				state = 0;
965 			} else if (dflen == 0)
966 				state = 2;
967 			break;
968 
969 		case 4:
970 			remain--;
971 			if (remain == 0)
972 				state = 0;
973 			break;
974 
975 		case 5:	/* VPD-W Keyword Header */
976 			if (off == alloc) {
977 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
978 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
979 				    M_DEVBUF, M_WAITOK | M_ZERO);
980 			}
981 			cfg->vpd.vpd_w[off].keyword[0] = byte;
982 			if (vpd_nextbyte(&vrs, &byte2)) {
983 				state = -2;
984 				break;
985 			}
986 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
987 			if (vpd_nextbyte(&vrs, &byte2)) {
988 				state = -2;
989 				break;
990 			}
991 			cfg->vpd.vpd_w[off].len = dflen = byte2;
992 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
993 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
994 			    sizeof(*cfg->vpd.vpd_w[off].value),
995 			    M_DEVBUF, M_WAITOK);
996 			remain -= 3;
997 			i = 0;
998 			/* keep in sync w/ state 6's transistions */
999 			if (dflen == 0 && remain == 0)
1000 				state = 0;
1001 			else if (dflen == 0)
1002 				state = 5;
1003 			else
1004 				state = 6;
1005 			break;
1006 
1007 		case 6:	/* VPD-W Keyword Value */
1008 			cfg->vpd.vpd_w[off].value[i++] = byte;
1009 			dflen--;
1010 			remain--;
1011 			/* keep in sync w/ state 5's transistions */
1012 			if (dflen == 0)
1013 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1014 			if (dflen == 0 && remain == 0) {
1015 				cfg->vpd.vpd_wcnt = off;
1016 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1017 				    off * sizeof(*cfg->vpd.vpd_w),
1018 				    M_DEVBUF, M_WAITOK | M_ZERO);
1019 				state = 0;
1020 			} else if (dflen == 0)
1021 				state = 5;
1022 			break;
1023 
1024 		default:
1025 			printf("pci%d:%d:%d:%d: invalid state: %d\n",
1026 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1027 			    state);
1028 			state = -1;
1029 			break;
1030 		}
1031 	}
1032 
1033 	if (cksumvalid == 0 || state < -1) {
1034 		/* read-only data bad, clean up */
1035 		if (cfg->vpd.vpd_ros != NULL) {
1036 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1037 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1038 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1039 			cfg->vpd.vpd_ros = NULL;
1040 		}
1041 	}
1042 	if (state < -1) {
1043 		/* I/O error, clean up */
1044 		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1045 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1046 		if (cfg->vpd.vpd_ident != NULL) {
1047 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1048 			cfg->vpd.vpd_ident = NULL;
1049 		}
1050 		if (cfg->vpd.vpd_w != NULL) {
1051 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1052 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1053 			free(cfg->vpd.vpd_w, M_DEVBUF);
1054 			cfg->vpd.vpd_w = NULL;
1055 		}
1056 	}
1057 	cfg->vpd.vpd_cached = 1;
1058 #undef REG
1059 #undef WREG
1060 }
1061 
1062 int
1063 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1064 {
1065 	struct pci_devinfo *dinfo = device_get_ivars(child);
1066 	pcicfgregs *cfg = &dinfo->cfg;
1067 
1068 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1069 		pci_read_vpd(device_get_parent(dev), cfg);
1070 
1071 	*identptr = cfg->vpd.vpd_ident;
1072 
1073 	if (*identptr == NULL)
1074 		return (ENXIO);
1075 
1076 	return (0);
1077 }
1078 
1079 int
1080 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1081 	const char **vptr)
1082 {
1083 	struct pci_devinfo *dinfo = device_get_ivars(child);
1084 	pcicfgregs *cfg = &dinfo->cfg;
1085 	int i;
1086 
1087 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1088 		pci_read_vpd(device_get_parent(dev), cfg);
1089 
1090 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1091 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1092 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1093 			*vptr = cfg->vpd.vpd_ros[i].value;
1094 		}
1095 
1096 	if (i != cfg->vpd.vpd_rocnt)
1097 		return (0);
1098 
1099 	*vptr = NULL;
1100 	return (ENXIO);
1101 }
1102 
1103 /*
1104  * Find the requested extended capability and return the offset in
1105  * configuration space via the pointer provided. The function returns
1106  * 0 on success and error code otherwise.
1107  */
1108 int
1109 pci_find_extcap_method(device_t dev, device_t child, int capability,
1110     int *capreg)
1111 {
1112 	struct pci_devinfo *dinfo = device_get_ivars(child);
1113 	pcicfgregs *cfg = &dinfo->cfg;
1114 	u_int32_t status;
1115 	u_int8_t ptr;
1116 
1117 	/*
1118 	 * Check the CAP_LIST bit of the PCI status register first.
1119 	 */
1120 	status = pci_read_config(child, PCIR_STATUS, 2);
1121 	if (!(status & PCIM_STATUS_CAPPRESENT))
1122 		return (ENXIO);
1123 
1124 	/*
1125 	 * Determine the start pointer of the capabilities list.
1126 	 */
1127 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1128 	case PCIM_HDRTYPE_NORMAL:
1129 	case PCIM_HDRTYPE_BRIDGE:
1130 		ptr = PCIR_CAP_PTR;
1131 		break;
1132 	case PCIM_HDRTYPE_CARDBUS:
1133 		ptr = PCIR_CAP_PTR_2;
1134 		break;
1135 	default:
1136 		/* XXX: panic? */
1137 		return (ENXIO);		/* no extended capabilities support */
1138 	}
1139 	ptr = pci_read_config(child, ptr, 1);
1140 
1141 	/*
1142 	 * Traverse the capabilities list.
1143 	 */
1144 	while (ptr != 0) {
1145 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1146 			if (capreg != NULL)
1147 				*capreg = ptr;
1148 			return (0);
1149 		}
1150 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1151 	}
1152 
1153 	return (ENOENT);
1154 }
1155 
1156 /*
1157  * Support for MSI-X message interrupts.
1158  */
1159 void
1160 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1161 {
1162 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1163 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1164 	uint32_t offset;
1165 
1166 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1167 	offset = msix->msix_table_offset + index * 16;
1168 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1169 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1170 	bus_write_4(msix->msix_table_res, offset + 8, data);
1171 
1172 	/* Enable MSI -> HT mapping. */
1173 	pci_ht_map_msi(dev, address);
1174 }
1175 
1176 void
1177 pci_mask_msix(device_t dev, u_int index)
1178 {
1179 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1180 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1181 	uint32_t offset, val;
1182 
1183 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1184 	offset = msix->msix_table_offset + index * 16 + 12;
1185 	val = bus_read_4(msix->msix_table_res, offset);
1186 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1187 		val |= PCIM_MSIX_VCTRL_MASK;
1188 		bus_write_4(msix->msix_table_res, offset, val);
1189 	}
1190 }
1191 
1192 void
1193 pci_unmask_msix(device_t dev, u_int index)
1194 {
1195 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1196 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1197 	uint32_t offset, val;
1198 
1199 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1200 	offset = msix->msix_table_offset + index * 16 + 12;
1201 	val = bus_read_4(msix->msix_table_res, offset);
1202 	if (val & PCIM_MSIX_VCTRL_MASK) {
1203 		val &= ~PCIM_MSIX_VCTRL_MASK;
1204 		bus_write_4(msix->msix_table_res, offset, val);
1205 	}
1206 }
1207 
1208 int
1209 pci_pending_msix(device_t dev, u_int index)
1210 {
1211 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1212 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1213 	uint32_t offset, bit;
1214 
1215 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1216 	offset = msix->msix_pba_offset + (index / 32) * 4;
1217 	bit = 1 << index % 32;
1218 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1219 }
1220 
1221 /*
1222  * Restore MSI-X registers and table during resume.  If MSI-X is
1223  * enabled then walk the virtual table to restore the actual MSI-X
1224  * table.
1225  */
1226 static void
1227 pci_resume_msix(device_t dev)
1228 {
1229 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1230 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1231 	struct msix_table_entry *mte;
1232 	struct msix_vector *mv;
1233 	int i;
1234 
1235 	if (msix->msix_alloc > 0) {
1236 		/* First, mask all vectors. */
1237 		for (i = 0; i < msix->msix_msgnum; i++)
1238 			pci_mask_msix(dev, i);
1239 
1240 		/* Second, program any messages with at least one handler. */
1241 		for (i = 0; i < msix->msix_table_len; i++) {
1242 			mte = &msix->msix_table[i];
1243 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1244 				continue;
1245 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1246 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1247 			pci_unmask_msix(dev, i);
1248 		}
1249 	}
1250 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1251 	    msix->msix_ctrl, 2);
1252 }
1253 
1254 /*
1255  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1256  * returned in *count.  After this function returns, each message will be
1257  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1258  */
1259 int
1260 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1261 {
1262 	struct pci_devinfo *dinfo = device_get_ivars(child);
1263 	pcicfgregs *cfg = &dinfo->cfg;
1264 	struct resource_list_entry *rle;
1265 	int actual, error, i, irq, max;
1266 
1267 	/* Don't let count == 0 get us into trouble. */
1268 	if (*count == 0)
1269 		return (EINVAL);
1270 
1271 	/* If rid 0 is allocated, then fail. */
1272 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1273 	if (rle != NULL && rle->res != NULL)
1274 		return (ENXIO);
1275 
1276 	/* Already have allocated messages? */
1277 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1278 		return (ENXIO);
1279 
1280 	/* If MSI is blacklisted for this system, fail. */
1281 	if (pci_msi_blacklisted())
1282 		return (ENXIO);
1283 
1284 	/* MSI-X capability present? */
1285 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1286 		return (ENODEV);
1287 
1288 	/* Make sure the appropriate BARs are mapped. */
1289 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1290 	    cfg->msix.msix_table_bar);
1291 	if (rle == NULL || rle->res == NULL ||
1292 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1293 		return (ENXIO);
1294 	cfg->msix.msix_table_res = rle->res;
1295 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1296 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1297 		    cfg->msix.msix_pba_bar);
1298 		if (rle == NULL || rle->res == NULL ||
1299 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1300 			return (ENXIO);
1301 	}
1302 	cfg->msix.msix_pba_res = rle->res;
1303 
1304 	if (bootverbose)
1305 		device_printf(child,
1306 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1307 		    *count, cfg->msix.msix_msgnum);
1308 	max = min(*count, cfg->msix.msix_msgnum);
1309 	for (i = 0; i < max; i++) {
1310 		/* Allocate a message. */
1311 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1312 		if (error)
1313 			break;
1314 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1315 		    irq, 1);
1316 	}
1317 	actual = i;
1318 
1319 	if (bootverbose) {
1320 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1321 		if (actual == 1)
1322 			device_printf(child, "using IRQ %lu for MSI-X\n",
1323 			    rle->start);
1324 		else {
1325 			int run;
1326 
1327 			/*
1328 			 * Be fancy and try to print contiguous runs of
1329 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1330 			 * 'run' is true if we are in a range.
1331 			 */
1332 			device_printf(child, "using IRQs %lu", rle->start);
1333 			irq = rle->start;
1334 			run = 0;
1335 			for (i = 1; i < actual; i++) {
1336 				rle = resource_list_find(&dinfo->resources,
1337 				    SYS_RES_IRQ, i + 1);
1338 
1339 				/* Still in a run? */
1340 				if (rle->start == irq + 1) {
1341 					run = 1;
1342 					irq++;
1343 					continue;
1344 				}
1345 
1346 				/* Finish previous range. */
1347 				if (run) {
1348 					printf("-%d", irq);
1349 					run = 0;
1350 				}
1351 
1352 				/* Start new range. */
1353 				printf(",%lu", rle->start);
1354 				irq = rle->start;
1355 			}
1356 
1357 			/* Unfinished range? */
1358 			if (run)
1359 				printf("-%d", irq);
1360 			printf(" for MSI-X\n");
1361 		}
1362 	}
1363 
1364 	/* Mask all vectors. */
1365 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1366 		pci_mask_msix(child, i);
1367 
1368 	/* Allocate and initialize vector data and virtual table. */
1369 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1370 	    M_DEVBUF, M_WAITOK | M_ZERO);
1371 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1372 	    M_DEVBUF, M_WAITOK | M_ZERO);
1373 	for (i = 0; i < actual; i++) {
1374 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1375 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1376 		cfg->msix.msix_table[i].mte_vector = i + 1;
1377 	}
1378 
1379 	/* Update control register to enable MSI-X. */
1380 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1381 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1382 	    cfg->msix.msix_ctrl, 2);
1383 
1384 	/* Update counts of alloc'd messages. */
1385 	cfg->msix.msix_alloc = actual;
1386 	cfg->msix.msix_table_len = actual;
1387 	*count = actual;
1388 	return (0);
1389 }
1390 
1391 /*
1392  * By default, pci_alloc_msix() will assign the allocated IRQ
1393  * resources consecutively to the first N messages in the MSI-X table.
1394  * However, device drivers may want to use different layouts if they
1395  * either receive fewer messages than they asked for, or they wish to
1396  * populate the MSI-X table sparsely.  This method allows the driver
1397  * to specify what layout it wants.  It must be called after a
1398  * successful pci_alloc_msix() but before any of the associated
1399  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1400  *
1401  * The 'vectors' array contains 'count' message vectors.  The array
1402  * maps directly to the MSI-X table in that index 0 in the array
1403  * specifies the vector for the first message in the MSI-X table, etc.
1404  * The vector value in each array index can either be 0 to indicate
1405  * that no vector should be assigned to a message slot, or it can be a
1406  * number from 1 to N (where N is the count returned from a
1407  * succcessful call to pci_alloc_msix()) to indicate which message
1408  * vector (IRQ) to be used for the corresponding message.
1409  *
1410  * On successful return, each message with a non-zero vector will have
1411  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1412  * 1.  Additionally, if any of the IRQs allocated via the previous
1413  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1414  * will be freed back to the system automatically.
1415  *
1416  * For example, suppose a driver has a MSI-X table with 6 messages and
1417  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1418  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1419  * C.  After the call to pci_alloc_msix(), the device will be setup to
1420  * have an MSI-X table of ABC--- (where - means no vector assigned).
1421  * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1422  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1423  * be freed back to the system.  This device will also have valid
1424  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1425  *
1426  * In any case, the SYS_RES_IRQ rid X will always map to the message
1427  * at MSI-X table index X - 1 and will only be valid if a vector is
1428  * assigned to that table entry.
1429  */
1430 int
1431 pci_remap_msix_method(device_t dev, device_t child, int count,
1432     const u_int *vectors)
1433 {
1434 	struct pci_devinfo *dinfo = device_get_ivars(child);
1435 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1436 	struct resource_list_entry *rle;
1437 	int i, irq, j, *used;
1438 
1439 	/*
1440 	 * Have to have at least one message in the table but the
1441 	 * table can't be bigger than the actual MSI-X table in the
1442 	 * device.
1443 	 */
1444 	if (count == 0 || count > msix->msix_msgnum)
1445 		return (EINVAL);
1446 
1447 	/* Sanity check the vectors. */
1448 	for (i = 0; i < count; i++)
1449 		if (vectors[i] > msix->msix_alloc)
1450 			return (EINVAL);
1451 
1452 	/*
1453 	 * Make sure there aren't any holes in the vectors to be used.
1454 	 * It's a big pain to support it, and it doesn't really make
1455 	 * sense anyway.  Also, at least one vector must be used.
1456 	 */
1457 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1458 	    M_ZERO);
1459 	for (i = 0; i < count; i++)
1460 		if (vectors[i] != 0)
1461 			used[vectors[i] - 1] = 1;
1462 	for (i = 0; i < msix->msix_alloc - 1; i++)
1463 		if (used[i] == 0 && used[i + 1] == 1) {
1464 			free(used, M_DEVBUF);
1465 			return (EINVAL);
1466 		}
1467 	if (used[0] != 1) {
1468 		free(used, M_DEVBUF);
1469 		return (EINVAL);
1470 	}
1471 
1472 	/* Make sure none of the resources are allocated. */
1473 	for (i = 0; i < msix->msix_table_len; i++) {
1474 		if (msix->msix_table[i].mte_vector == 0)
1475 			continue;
1476 		if (msix->msix_table[i].mte_handlers > 0)
1477 			return (EBUSY);
1478 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1479 		KASSERT(rle != NULL, ("missing resource"));
1480 		if (rle->res != NULL)
1481 			return (EBUSY);
1482 	}
1483 
1484 	/* Free the existing resource list entries. */
1485 	for (i = 0; i < msix->msix_table_len; i++) {
1486 		if (msix->msix_table[i].mte_vector == 0)
1487 			continue;
1488 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1489 	}
1490 
1491 	/*
1492 	 * Build the new virtual table keeping track of which vectors are
1493 	 * used.
1494 	 */
1495 	free(msix->msix_table, M_DEVBUF);
1496 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1497 	    M_DEVBUF, M_WAITOK | M_ZERO);
1498 	for (i = 0; i < count; i++)
1499 		msix->msix_table[i].mte_vector = vectors[i];
1500 	msix->msix_table_len = count;
1501 
1502 	/* Free any unused IRQs and resize the vectors array if necessary. */
1503 	j = msix->msix_alloc - 1;
1504 	if (used[j] == 0) {
1505 		struct msix_vector *vec;
1506 
1507 		while (used[j] == 0) {
1508 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1509 			    msix->msix_vectors[j].mv_irq);
1510 			j--;
1511 		}
1512 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1513 		    M_WAITOK);
1514 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1515 		    (j + 1));
1516 		free(msix->msix_vectors, M_DEVBUF);
1517 		msix->msix_vectors = vec;
1518 		msix->msix_alloc = j + 1;
1519 	}
1520 	free(used, M_DEVBUF);
1521 
1522 	/* Map the IRQs onto the rids. */
1523 	for (i = 0; i < count; i++) {
1524 		if (vectors[i] == 0)
1525 			continue;
1526 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1527 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1528 		    irq, 1);
1529 	}
1530 
1531 	if (bootverbose) {
1532 		device_printf(child, "Remapped MSI-X IRQs as: ");
1533 		for (i = 0; i < count; i++) {
1534 			if (i != 0)
1535 				printf(", ");
1536 			if (vectors[i] == 0)
1537 				printf("---");
1538 			else
1539 				printf("%d",
1540 				    msix->msix_vectors[vectors[i]].mv_irq);
1541 		}
1542 		printf("\n");
1543 	}
1544 
1545 	return (0);
1546 }
1547 
1548 static int
1549 pci_release_msix(device_t dev, device_t child)
1550 {
1551 	struct pci_devinfo *dinfo = device_get_ivars(child);
1552 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1553 	struct resource_list_entry *rle;
1554 	int i;
1555 
1556 	/* Do we have any messages to release? */
1557 	if (msix->msix_alloc == 0)
1558 		return (ENODEV);
1559 
1560 	/* Make sure none of the resources are allocated. */
1561 	for (i = 0; i < msix->msix_table_len; i++) {
1562 		if (msix->msix_table[i].mte_vector == 0)
1563 			continue;
1564 		if (msix->msix_table[i].mte_handlers > 0)
1565 			return (EBUSY);
1566 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1567 		KASSERT(rle != NULL, ("missing resource"));
1568 		if (rle->res != NULL)
1569 			return (EBUSY);
1570 	}
1571 
1572 	/* Update control register to disable MSI-X. */
1573 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1574 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1575 	    msix->msix_ctrl, 2);
1576 
1577 	/* Free the resource list entries. */
1578 	for (i = 0; i < msix->msix_table_len; i++) {
1579 		if (msix->msix_table[i].mte_vector == 0)
1580 			continue;
1581 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1582 	}
1583 	free(msix->msix_table, M_DEVBUF);
1584 	msix->msix_table_len = 0;
1585 
1586 	/* Release the IRQs. */
1587 	for (i = 0; i < msix->msix_alloc; i++)
1588 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1589 		    msix->msix_vectors[i].mv_irq);
1590 	free(msix->msix_vectors, M_DEVBUF);
1591 	msix->msix_alloc = 0;
1592 	return (0);
1593 }
1594 
1595 /*
1596  * Return the max supported MSI-X messages this device supports.
1597  * Basically, assuming the MD code can alloc messages, this function
1598  * should return the maximum value that pci_alloc_msix() can return.
1599  * Thus, it is subject to the tunables, etc.
1600  */
1601 int
1602 pci_msix_count_method(device_t dev, device_t child)
1603 {
1604 	struct pci_devinfo *dinfo = device_get_ivars(child);
1605 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1606 
1607 	if (pci_do_msix && msix->msix_location != 0)
1608 		return (msix->msix_msgnum);
1609 	return (0);
1610 }
1611 
1612 /*
1613  * HyperTransport MSI mapping control
1614  */
1615 void
1616 pci_ht_map_msi(device_t dev, uint64_t addr)
1617 {
1618 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1619 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1620 
1621 	if (!ht->ht_msimap)
1622 		return;
1623 
1624 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1625 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1626 		/* Enable MSI -> HT mapping. */
1627 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1628 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1629 		    ht->ht_msictrl, 2);
1630 	}
1631 
1632 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1633 		/* Disable MSI -> HT mapping. */
1634 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1635 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1636 		    ht->ht_msictrl, 2);
1637 	}
1638 }
1639 
1640 int
1641 pci_get_max_read_req(device_t dev)
1642 {
1643 	int cap;
1644 	uint16_t val;
1645 
1646 	if (pci_find_extcap(dev, PCIY_EXPRESS, &cap) != 0)
1647 		return (0);
1648 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1649 	val &= PCIM_EXP_CTL_MAX_READ_REQUEST;
1650 	val >>= 12;
1651 	return (1 << (val + 7));
1652 }
1653 
1654 int
1655 pci_set_max_read_req(device_t dev, int size)
1656 {
1657 	int cap;
1658 	uint16_t val;
1659 
1660 	if (pci_find_extcap(dev, PCIY_EXPRESS, &cap) != 0)
1661 		return (0);
1662 	if (size < 128)
1663 		size = 128;
1664 	if (size > 4096)
1665 		size = 4096;
1666 	size = (1 << (fls(size) - 1));
1667 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1668 	val &= ~PCIM_EXP_CTL_MAX_READ_REQUEST;
1669 	val |= (fls(size) - 8) << 12;
1670 	pci_write_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, val, 2);
1671 	return (size);
1672 }
1673 
1674 /*
1675  * Support for MSI message signalled interrupts.
1676  */
1677 void
1678 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1679 {
1680 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1681 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1682 
1683 	/* Write data and address values. */
1684 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1685 	    address & 0xffffffff, 4);
1686 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1687 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1688 		    address >> 32, 4);
1689 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1690 		    data, 2);
1691 	} else
1692 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1693 		    2);
1694 
1695 	/* Enable MSI in the control register. */
1696 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1697 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1698 	    2);
1699 
1700 	/* Enable MSI -> HT mapping. */
1701 	pci_ht_map_msi(dev, address);
1702 }
1703 
1704 void
1705 pci_disable_msi(device_t dev)
1706 {
1707 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1708 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1709 
1710 	/* Disable MSI -> HT mapping. */
1711 	pci_ht_map_msi(dev, 0);
1712 
1713 	/* Disable MSI in the control register. */
1714 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1715 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1716 	    2);
1717 }
1718 
1719 /*
1720  * Restore MSI registers during resume.  If MSI is enabled then
1721  * restore the data and address registers in addition to the control
1722  * register.
1723  */
1724 static void
1725 pci_resume_msi(device_t dev)
1726 {
1727 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1728 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1729 	uint64_t address;
1730 	uint16_t data;
1731 
1732 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1733 		address = msi->msi_addr;
1734 		data = msi->msi_data;
1735 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1736 		    address & 0xffffffff, 4);
1737 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1738 			pci_write_config(dev, msi->msi_location +
1739 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1740 			pci_write_config(dev, msi->msi_location +
1741 			    PCIR_MSI_DATA_64BIT, data, 2);
1742 		} else
1743 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1744 			    data, 2);
1745 	}
1746 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1747 	    2);
1748 }
1749 
1750 static int
1751 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1752 {
1753 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1754 	pcicfgregs *cfg = &dinfo->cfg;
1755 	struct resource_list_entry *rle;
1756 	struct msix_table_entry *mte;
1757 	struct msix_vector *mv;
1758 	uint64_t addr;
1759 	uint32_t data;
1760 	int error, i, j;
1761 
1762 	/*
1763 	 * Handle MSI first.  We try to find this IRQ among our list
1764 	 * of MSI IRQs.  If we find it, we request updated address and
1765 	 * data registers and apply the results.
1766 	 */
1767 	if (cfg->msi.msi_alloc > 0) {
1768 
1769 		/* If we don't have any active handlers, nothing to do. */
1770 		if (cfg->msi.msi_handlers == 0)
1771 			return (0);
1772 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1773 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1774 			    i + 1);
1775 			if (rle->start == irq) {
1776 				error = PCIB_MAP_MSI(device_get_parent(bus),
1777 				    dev, irq, &addr, &data);
1778 				if (error)
1779 					return (error);
1780 				pci_disable_msi(dev);
1781 				dinfo->cfg.msi.msi_addr = addr;
1782 				dinfo->cfg.msi.msi_data = data;
1783 				pci_enable_msi(dev, addr, data);
1784 				return (0);
1785 			}
1786 		}
1787 		return (ENOENT);
1788 	}
1789 
1790 	/*
1791 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1792 	 * we request the updated mapping info.  If that works, we go
1793 	 * through all the slots that use this IRQ and update them.
1794 	 */
1795 	if (cfg->msix.msix_alloc > 0) {
1796 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1797 			mv = &cfg->msix.msix_vectors[i];
1798 			if (mv->mv_irq == irq) {
1799 				error = PCIB_MAP_MSI(device_get_parent(bus),
1800 				    dev, irq, &addr, &data);
1801 				if (error)
1802 					return (error);
1803 				mv->mv_address = addr;
1804 				mv->mv_data = data;
1805 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1806 					mte = &cfg->msix.msix_table[j];
1807 					if (mte->mte_vector != i + 1)
1808 						continue;
1809 					if (mte->mte_handlers == 0)
1810 						continue;
1811 					pci_mask_msix(dev, j);
1812 					pci_enable_msix(dev, j, addr, data);
1813 					pci_unmask_msix(dev, j);
1814 				}
1815 			}
1816 		}
1817 		return (ENOENT);
1818 	}
1819 
1820 	return (ENOENT);
1821 }
1822 
1823 /*
1824  * Returns true if the specified device is blacklisted because MSI
1825  * doesn't work.
1826  */
1827 int
1828 pci_msi_device_blacklisted(device_t dev)
1829 {
1830 	struct pci_quirk *q;
1831 
1832 	if (!pci_honor_msi_blacklist)
1833 		return (0);
1834 
1835 	for (q = &pci_quirks[0]; q->devid; q++) {
1836 		if (q->devid == pci_get_devid(dev) &&
1837 		    q->type == PCI_QUIRK_DISABLE_MSI)
1838 			return (1);
1839 	}
1840 	return (0);
1841 }
1842 
1843 /*
1844  * Returns true if a specified chipset supports MSI when it is
1845  * emulated hardware in a virtual machine.
1846  */
1847 static int
1848 pci_msi_vm_chipset(device_t dev)
1849 {
1850 	struct pci_quirk *q;
1851 
1852 	for (q = &pci_quirks[0]; q->devid; q++) {
1853 		if (q->devid == pci_get_devid(dev) &&
1854 		    q->type == PCI_QUIRK_ENABLE_MSI_VM)
1855 			return (1);
1856 	}
1857 	return (0);
1858 }
1859 
1860 /*
1861  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1862  * we just check for blacklisted chipsets as represented by the
1863  * host-PCI bridge at device 0:0:0.  In the future, it may become
1864  * necessary to check other system attributes, such as the kenv values
1865  * that give the motherboard manufacturer and model number.
1866  */
1867 static int
1868 pci_msi_blacklisted(void)
1869 {
1870 	device_t dev;
1871 
1872 	if (!pci_honor_msi_blacklist)
1873 		return (0);
1874 
1875 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1876 	if (!(pcie_chipset || pcix_chipset)) {
1877 		if (vm_guest != VM_GUEST_NO) {
1878 			dev = pci_find_bsf(0, 0, 0);
1879 			if (dev != NULL)
1880 				return (pci_msi_vm_chipset(dev) == 0);
1881 		}
1882 		return (1);
1883 	}
1884 
1885 	dev = pci_find_bsf(0, 0, 0);
1886 	if (dev != NULL)
1887 		return (pci_msi_device_blacklisted(dev));
1888 	return (0);
1889 }
1890 
1891 /*
1892  * Attempt to allocate *count MSI messages.  The actual number allocated is
1893  * returned in *count.  After this function returns, each message will be
1894  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1895  */
1896 int
1897 pci_alloc_msi_method(device_t dev, device_t child, int *count)
1898 {
1899 	struct pci_devinfo *dinfo = device_get_ivars(child);
1900 	pcicfgregs *cfg = &dinfo->cfg;
1901 	struct resource_list_entry *rle;
1902 	int actual, error, i, irqs[32];
1903 	uint16_t ctrl;
1904 
1905 	/* Don't let count == 0 get us into trouble. */
1906 	if (*count == 0)
1907 		return (EINVAL);
1908 
1909 	/* If rid 0 is allocated, then fail. */
1910 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1911 	if (rle != NULL && rle->res != NULL)
1912 		return (ENXIO);
1913 
1914 	/* Already have allocated messages? */
1915 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1916 		return (ENXIO);
1917 
1918 	/* If MSI is blacklisted for this system, fail. */
1919 	if (pci_msi_blacklisted())
1920 		return (ENXIO);
1921 
1922 	/* MSI capability present? */
1923 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1924 		return (ENODEV);
1925 
1926 	if (bootverbose)
1927 		device_printf(child,
1928 		    "attempting to allocate %d MSI vectors (%d supported)\n",
1929 		    *count, cfg->msi.msi_msgnum);
1930 
1931 	/* Don't ask for more than the device supports. */
1932 	actual = min(*count, cfg->msi.msi_msgnum);
1933 
1934 	/* Don't ask for more than 32 messages. */
1935 	actual = min(actual, 32);
1936 
1937 	/* MSI requires power of 2 number of messages. */
1938 	if (!powerof2(actual))
1939 		return (EINVAL);
1940 
1941 	for (;;) {
1942 		/* Try to allocate N messages. */
1943 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1944 		    cfg->msi.msi_msgnum, irqs);
1945 		if (error == 0)
1946 			break;
1947 		if (actual == 1)
1948 			return (error);
1949 
1950 		/* Try N / 2. */
1951 		actual >>= 1;
1952 	}
1953 
1954 	/*
1955 	 * We now have N actual messages mapped onto SYS_RES_IRQ
1956 	 * resources in the irqs[] array, so add new resources
1957 	 * starting at rid 1.
1958 	 */
1959 	for (i = 0; i < actual; i++)
1960 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1961 		    irqs[i], irqs[i], 1);
1962 
1963 	if (bootverbose) {
1964 		if (actual == 1)
1965 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1966 		else {
1967 			int run;
1968 
1969 			/*
1970 			 * Be fancy and try to print contiguous runs
1971 			 * of IRQ values as ranges.  'run' is true if
1972 			 * we are in a range.
1973 			 */
1974 			device_printf(child, "using IRQs %d", irqs[0]);
1975 			run = 0;
1976 			for (i = 1; i < actual; i++) {
1977 
1978 				/* Still in a run? */
1979 				if (irqs[i] == irqs[i - 1] + 1) {
1980 					run = 1;
1981 					continue;
1982 				}
1983 
1984 				/* Finish previous range. */
1985 				if (run) {
1986 					printf("-%d", irqs[i - 1]);
1987 					run = 0;
1988 				}
1989 
1990 				/* Start new range. */
1991 				printf(",%d", irqs[i]);
1992 			}
1993 
1994 			/* Unfinished range? */
1995 			if (run)
1996 				printf("-%d", irqs[actual - 1]);
1997 			printf(" for MSI\n");
1998 		}
1999 	}
2000 
2001 	/* Update control register with actual count. */
2002 	ctrl = cfg->msi.msi_ctrl;
2003 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2004 	ctrl |= (ffs(actual) - 1) << 4;
2005 	cfg->msi.msi_ctrl = ctrl;
2006 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2007 
2008 	/* Update counts of alloc'd messages. */
2009 	cfg->msi.msi_alloc = actual;
2010 	cfg->msi.msi_handlers = 0;
2011 	*count = actual;
2012 	return (0);
2013 }
2014 
2015 /* Release the MSI messages associated with this device. */
2016 int
2017 pci_release_msi_method(device_t dev, device_t child)
2018 {
2019 	struct pci_devinfo *dinfo = device_get_ivars(child);
2020 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2021 	struct resource_list_entry *rle;
2022 	int error, i, irqs[32];
2023 
2024 	/* Try MSI-X first. */
2025 	error = pci_release_msix(dev, child);
2026 	if (error != ENODEV)
2027 		return (error);
2028 
2029 	/* Do we have any messages to release? */
2030 	if (msi->msi_alloc == 0)
2031 		return (ENODEV);
2032 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2033 
2034 	/* Make sure none of the resources are allocated. */
2035 	if (msi->msi_handlers > 0)
2036 		return (EBUSY);
2037 	for (i = 0; i < msi->msi_alloc; i++) {
2038 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2039 		KASSERT(rle != NULL, ("missing MSI resource"));
2040 		if (rle->res != NULL)
2041 			return (EBUSY);
2042 		irqs[i] = rle->start;
2043 	}
2044 
2045 	/* Update control register with 0 count. */
2046 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2047 	    ("%s: MSI still enabled", __func__));
2048 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2049 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2050 	    msi->msi_ctrl, 2);
2051 
2052 	/* Release the messages. */
2053 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2054 	for (i = 0; i < msi->msi_alloc; i++)
2055 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2056 
2057 	/* Update alloc count. */
2058 	msi->msi_alloc = 0;
2059 	msi->msi_addr = 0;
2060 	msi->msi_data = 0;
2061 	return (0);
2062 }
2063 
2064 /*
2065  * Return the max supported MSI messages this device supports.
2066  * Basically, assuming the MD code can alloc messages, this function
2067  * should return the maximum value that pci_alloc_msi() can return.
2068  * Thus, it is subject to the tunables, etc.
2069  */
2070 int
2071 pci_msi_count_method(device_t dev, device_t child)
2072 {
2073 	struct pci_devinfo *dinfo = device_get_ivars(child);
2074 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2075 
2076 	if (pci_do_msi && msi->msi_location != 0)
2077 		return (msi->msi_msgnum);
2078 	return (0);
2079 }
2080 
2081 /* free pcicfgregs structure and all depending data structures */
2082 
2083 int
2084 pci_freecfg(struct pci_devinfo *dinfo)
2085 {
2086 	struct devlist *devlist_head;
2087 	int i;
2088 
2089 	devlist_head = &pci_devq;
2090 
2091 	if (dinfo->cfg.vpd.vpd_reg) {
2092 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2093 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2094 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2095 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2096 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2097 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2098 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2099 	}
2100 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2101 	free(dinfo, M_DEVBUF);
2102 
2103 	/* increment the generation count */
2104 	pci_generation++;
2105 
2106 	/* we're losing one device */
2107 	pci_numdevs--;
2108 	return (0);
2109 }
2110 
2111 /*
2112  * PCI power manangement
2113  */
2114 int
2115 pci_set_powerstate_method(device_t dev, device_t child, int state)
2116 {
2117 	struct pci_devinfo *dinfo = device_get_ivars(child);
2118 	pcicfgregs *cfg = &dinfo->cfg;
2119 	uint16_t status;
2120 	int result, oldstate, highest, delay;
2121 
2122 	if (cfg->pp.pp_cap == 0)
2123 		return (EOPNOTSUPP);
2124 
2125 	/*
2126 	 * Optimize a no state change request away.  While it would be OK to
2127 	 * write to the hardware in theory, some devices have shown odd
2128 	 * behavior when going from D3 -> D3.
2129 	 */
2130 	oldstate = pci_get_powerstate(child);
2131 	if (oldstate == state)
2132 		return (0);
2133 
2134 	/*
2135 	 * The PCI power management specification states that after a state
2136 	 * transition between PCI power states, system software must
2137 	 * guarantee a minimal delay before the function accesses the device.
2138 	 * Compute the worst case delay that we need to guarantee before we
2139 	 * access the device.  Many devices will be responsive much more
2140 	 * quickly than this delay, but there are some that don't respond
2141 	 * instantly to state changes.  Transitions to/from D3 state require
2142 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2143 	 * is done below with DELAY rather than a sleeper function because
2144 	 * this function can be called from contexts where we cannot sleep.
2145 	 */
2146 	highest = (oldstate > state) ? oldstate : state;
2147 	if (highest == PCI_POWERSTATE_D3)
2148 	    delay = 10000;
2149 	else if (highest == PCI_POWERSTATE_D2)
2150 	    delay = 200;
2151 	else
2152 	    delay = 0;
2153 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2154 	    & ~PCIM_PSTAT_DMASK;
2155 	result = 0;
2156 	switch (state) {
2157 	case PCI_POWERSTATE_D0:
2158 		status |= PCIM_PSTAT_D0;
2159 		break;
2160 	case PCI_POWERSTATE_D1:
2161 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2162 			return (EOPNOTSUPP);
2163 		status |= PCIM_PSTAT_D1;
2164 		break;
2165 	case PCI_POWERSTATE_D2:
2166 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2167 			return (EOPNOTSUPP);
2168 		status |= PCIM_PSTAT_D2;
2169 		break;
2170 	case PCI_POWERSTATE_D3:
2171 		status |= PCIM_PSTAT_D3;
2172 		break;
2173 	default:
2174 		return (EINVAL);
2175 	}
2176 
2177 	if (bootverbose)
2178 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2179 		    state);
2180 
2181 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2182 	if (delay)
2183 		DELAY(delay);
2184 	return (0);
2185 }
2186 
2187 int
2188 pci_get_powerstate_method(device_t dev, device_t child)
2189 {
2190 	struct pci_devinfo *dinfo = device_get_ivars(child);
2191 	pcicfgregs *cfg = &dinfo->cfg;
2192 	uint16_t status;
2193 	int result;
2194 
2195 	if (cfg->pp.pp_cap != 0) {
2196 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2197 		switch (status & PCIM_PSTAT_DMASK) {
2198 		case PCIM_PSTAT_D0:
2199 			result = PCI_POWERSTATE_D0;
2200 			break;
2201 		case PCIM_PSTAT_D1:
2202 			result = PCI_POWERSTATE_D1;
2203 			break;
2204 		case PCIM_PSTAT_D2:
2205 			result = PCI_POWERSTATE_D2;
2206 			break;
2207 		case PCIM_PSTAT_D3:
2208 			result = PCI_POWERSTATE_D3;
2209 			break;
2210 		default:
2211 			result = PCI_POWERSTATE_UNKNOWN;
2212 			break;
2213 		}
2214 	} else {
2215 		/* No support, device is always at D0 */
2216 		result = PCI_POWERSTATE_D0;
2217 	}
2218 	return (result);
2219 }
2220 
2221 /*
2222  * Some convenience functions for PCI device drivers.
2223  */
2224 
2225 static __inline void
2226 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2227 {
2228 	uint16_t	command;
2229 
2230 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2231 	command |= bit;
2232 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2233 }
2234 
2235 static __inline void
2236 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2237 {
2238 	uint16_t	command;
2239 
2240 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2241 	command &= ~bit;
2242 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2243 }
2244 
2245 int
2246 pci_enable_busmaster_method(device_t dev, device_t child)
2247 {
2248 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2249 	return (0);
2250 }
2251 
2252 int
2253 pci_disable_busmaster_method(device_t dev, device_t child)
2254 {
2255 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2256 	return (0);
2257 }
2258 
2259 int
2260 pci_enable_io_method(device_t dev, device_t child, int space)
2261 {
2262 	uint16_t bit;
2263 
2264 	switch(space) {
2265 	case SYS_RES_IOPORT:
2266 		bit = PCIM_CMD_PORTEN;
2267 		break;
2268 	case SYS_RES_MEMORY:
2269 		bit = PCIM_CMD_MEMEN;
2270 		break;
2271 	default:
2272 		return (EINVAL);
2273 	}
2274 	pci_set_command_bit(dev, child, bit);
2275 	return (0);
2276 }
2277 
2278 int
2279 pci_disable_io_method(device_t dev, device_t child, int space)
2280 {
2281 	uint16_t bit;
2282 
2283 	switch(space) {
2284 	case SYS_RES_IOPORT:
2285 		bit = PCIM_CMD_PORTEN;
2286 		break;
2287 	case SYS_RES_MEMORY:
2288 		bit = PCIM_CMD_MEMEN;
2289 		break;
2290 	default:
2291 		return (EINVAL);
2292 	}
2293 	pci_clear_command_bit(dev, child, bit);
2294 	return (0);
2295 }
2296 
2297 /*
2298  * New style pci driver.  Parent device is either a pci-host-bridge or a
2299  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2300  */
2301 
2302 void
2303 pci_print_verbose(struct pci_devinfo *dinfo)
2304 {
2305 
2306 	if (bootverbose) {
2307 		pcicfgregs *cfg = &dinfo->cfg;
2308 
2309 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2310 		    cfg->vendor, cfg->device, cfg->revid);
2311 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2312 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2313 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2314 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2315 		    cfg->mfdev);
2316 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2317 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2318 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2319 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2320 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2321 		if (cfg->intpin > 0)
2322 			printf("\tintpin=%c, irq=%d\n",
2323 			    cfg->intpin +'a' -1, cfg->intline);
2324 		if (cfg->pp.pp_cap) {
2325 			uint16_t status;
2326 
2327 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2328 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2329 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2330 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2331 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2332 			    status & PCIM_PSTAT_DMASK);
2333 		}
2334 		if (cfg->msi.msi_location) {
2335 			int ctrl;
2336 
2337 			ctrl = cfg->msi.msi_ctrl;
2338 			printf("\tMSI supports %d message%s%s%s\n",
2339 			    cfg->msi.msi_msgnum,
2340 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2341 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2342 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2343 		}
2344 		if (cfg->msix.msix_location) {
2345 			printf("\tMSI-X supports %d message%s ",
2346 			    cfg->msix.msix_msgnum,
2347 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2348 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2349 				printf("in map 0x%x\n",
2350 				    cfg->msix.msix_table_bar);
2351 			else
2352 				printf("in maps 0x%x and 0x%x\n",
2353 				    cfg->msix.msix_table_bar,
2354 				    cfg->msix.msix_pba_bar);
2355 		}
2356 	}
2357 }
2358 
2359 static int
2360 pci_porten(device_t dev)
2361 {
2362 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2363 }
2364 
2365 static int
2366 pci_memen(device_t dev)
2367 {
2368 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2369 }
2370 
2371 static void
2372 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2373 {
2374 	pci_addr_t map, testval;
2375 	int ln2range;
2376 	uint16_t cmd;
2377 
2378 	/*
2379 	 * The device ROM BAR is special.  It is always a 32-bit
2380 	 * memory BAR.  Bit 0 is special and should not be set when
2381 	 * sizing the BAR.
2382 	 */
2383 	if (reg == PCIR_BIOS) {
2384 		map = pci_read_config(dev, reg, 4);
2385 		pci_write_config(dev, reg, 0xfffffffe, 4);
2386 		testval = pci_read_config(dev, reg, 4);
2387 		pci_write_config(dev, reg, map, 4);
2388 		*mapp = map;
2389 		*testvalp = testval;
2390 		return;
2391 	}
2392 
2393 	map = pci_read_config(dev, reg, 4);
2394 	ln2range = pci_maprange(map);
2395 	if (ln2range == 64)
2396 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2397 
2398 	/*
2399 	 * Disable decoding via the command register before
2400 	 * determining the BAR's length since we will be placing it in
2401 	 * a weird state.
2402 	 */
2403 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2404 	pci_write_config(dev, PCIR_COMMAND,
2405 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2406 
2407 	/*
2408 	 * Determine the BAR's length by writing all 1's.  The bottom
2409 	 * log_2(size) bits of the BAR will stick as 0 when we read
2410 	 * the value back.
2411 	 */
2412 	pci_write_config(dev, reg, 0xffffffff, 4);
2413 	testval = pci_read_config(dev, reg, 4);
2414 	if (ln2range == 64) {
2415 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2416 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2417 	}
2418 
2419 	/*
2420 	 * Restore the original value of the BAR.  We may have reprogrammed
2421 	 * the BAR of the low-level console device and when booting verbose,
2422 	 * we need the console device addressable.
2423 	 */
2424 	pci_write_config(dev, reg, map, 4);
2425 	if (ln2range == 64)
2426 		pci_write_config(dev, reg + 4, map >> 32, 4);
2427 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2428 
2429 	*mapp = map;
2430 	*testvalp = testval;
2431 }
2432 
2433 static void
2434 pci_write_bar(device_t dev, int reg, pci_addr_t base)
2435 {
2436 	pci_addr_t map;
2437 	int ln2range;
2438 
2439 	map = pci_read_config(dev, reg, 4);
2440 
2441 	/* The device ROM BAR is always 32-bits. */
2442 	if (reg == PCIR_BIOS)
2443 		return;
2444 	ln2range = pci_maprange(map);
2445 	pci_write_config(dev, reg, base, 4);
2446 	if (ln2range == 64)
2447 		pci_write_config(dev, reg + 4, base >> 32, 4);
2448 }
2449 
2450 /*
2451  * Add a resource based on a pci map register. Return 1 if the map
2452  * register is a 32bit map register or 2 if it is a 64bit register.
2453  */
2454 static int
2455 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2456     int force, int prefetch)
2457 {
2458 	pci_addr_t base, map, testval;
2459 	pci_addr_t start, end, count;
2460 	int barlen, basezero, maprange, mapsize, type;
2461 	uint16_t cmd;
2462 	struct resource *res;
2463 
2464 	pci_read_bar(dev, reg, &map, &testval);
2465 	if (PCI_BAR_MEM(map)) {
2466 		type = SYS_RES_MEMORY;
2467 		if (map & PCIM_BAR_MEM_PREFETCH)
2468 			prefetch = 1;
2469 	} else
2470 		type = SYS_RES_IOPORT;
2471 	mapsize = pci_mapsize(testval);
2472 	base = pci_mapbase(map);
2473 #ifdef __PCI_BAR_ZERO_VALID
2474 	basezero = 0;
2475 #else
2476 	basezero = base == 0;
2477 #endif
2478 	maprange = pci_maprange(map);
2479 	barlen = maprange == 64 ? 2 : 1;
2480 
2481 	/*
2482 	 * For I/O registers, if bottom bit is set, and the next bit up
2483 	 * isn't clear, we know we have a BAR that doesn't conform to the
2484 	 * spec, so ignore it.  Also, sanity check the size of the data
2485 	 * areas to the type of memory involved.  Memory must be at least
2486 	 * 16 bytes in size, while I/O ranges must be at least 4.
2487 	 */
2488 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2489 		return (barlen);
2490 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2491 	    (type == SYS_RES_IOPORT && mapsize < 2))
2492 		return (barlen);
2493 
2494 	if (bootverbose) {
2495 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2496 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2497 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2498 			printf(", port disabled\n");
2499 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2500 			printf(", memory disabled\n");
2501 		else
2502 			printf(", enabled\n");
2503 	}
2504 
2505 	/*
2506 	 * If base is 0, then we have problems if this architecture does
2507 	 * not allow that.  It is best to ignore such entries for the
2508 	 * moment.  These will be allocated later if the driver specifically
2509 	 * requests them.  However, some removable busses look better when
2510 	 * all resources are allocated, so allow '0' to be overriden.
2511 	 *
2512 	 * Similarly treat maps whose values is the same as the test value
2513 	 * read back.  These maps have had all f's written to them by the
2514 	 * BIOS in an attempt to disable the resources.
2515 	 */
2516 	if (!force && (basezero || map == testval))
2517 		return (barlen);
2518 	if ((u_long)base != base) {
2519 		device_printf(bus,
2520 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2521 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2522 		    pci_get_function(dev), reg);
2523 		return (barlen);
2524 	}
2525 
2526 	/*
2527 	 * This code theoretically does the right thing, but has
2528 	 * undesirable side effects in some cases where peripherals
2529 	 * respond oddly to having these bits enabled.  Let the user
2530 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2531 	 * default).
2532 	 */
2533 	if (pci_enable_io_modes) {
2534 		/* Turn on resources that have been left off by a lazy BIOS */
2535 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2536 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2537 			cmd |= PCIM_CMD_PORTEN;
2538 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2539 		}
2540 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2541 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2542 			cmd |= PCIM_CMD_MEMEN;
2543 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2544 		}
2545 	} else {
2546 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2547 			return (barlen);
2548 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2549 			return (barlen);
2550 	}
2551 
2552 	count = 1 << mapsize;
2553 	if (basezero || base == pci_mapbase(testval)) {
2554 		start = 0;	/* Let the parent decide. */
2555 		end = ~0ULL;
2556 	} else {
2557 		start = base;
2558 		end = base + (1 << mapsize) - 1;
2559 	}
2560 	resource_list_add(rl, type, reg, start, end, count);
2561 
2562 	/*
2563 	 * Try to allocate the resource for this BAR from our parent
2564 	 * so that this resource range is already reserved.  The
2565 	 * driver for this device will later inherit this resource in
2566 	 * pci_alloc_resource().
2567 	 */
2568 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2569 	    prefetch ? RF_PREFETCHABLE : 0);
2570 	if (res == NULL) {
2571 		/*
2572 		 * If the allocation fails, clear the BAR and delete
2573 		 * the resource list entry to force
2574 		 * pci_alloc_resource() to allocate resources from the
2575 		 * parent.
2576 		 */
2577 		resource_list_delete(rl, type, reg);
2578 		start = 0;
2579 	} else
2580 		start = rman_get_start(res);
2581 	pci_write_bar(dev, reg, start);
2582 	return (barlen);
2583 }
2584 
2585 /*
2586  * For ATA devices we need to decide early what addressing mode to use.
2587  * Legacy demands that the primary and secondary ATA ports sits on the
2588  * same addresses that old ISA hardware did. This dictates that we use
2589  * those addresses and ignore the BAR's if we cannot set PCI native
2590  * addressing mode.
2591  */
2592 static void
2593 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2594     uint32_t prefetchmask)
2595 {
2596 	struct resource *r;
2597 	int rid, type, progif;
2598 #if 0
2599 	/* if this device supports PCI native addressing use it */
2600 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2601 	if ((progif & 0x8a) == 0x8a) {
2602 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2603 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2604 			printf("Trying ATA native PCI addressing mode\n");
2605 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2606 		}
2607 	}
2608 #endif
2609 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2610 	type = SYS_RES_IOPORT;
2611 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2612 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2613 		    prefetchmask & (1 << 0));
2614 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2615 		    prefetchmask & (1 << 1));
2616 	} else {
2617 		rid = PCIR_BAR(0);
2618 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2619 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2620 		    0x1f7, 8, 0);
2621 		rid = PCIR_BAR(1);
2622 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2623 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2624 		    0x3f6, 1, 0);
2625 	}
2626 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2627 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2628 		    prefetchmask & (1 << 2));
2629 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2630 		    prefetchmask & (1 << 3));
2631 	} else {
2632 		rid = PCIR_BAR(2);
2633 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2634 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2635 		    0x177, 8, 0);
2636 		rid = PCIR_BAR(3);
2637 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2638 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2639 		    0x376, 1, 0);
2640 	}
2641 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2642 	    prefetchmask & (1 << 4));
2643 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2644 	    prefetchmask & (1 << 5));
2645 }
2646 
2647 static void
2648 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2649 {
2650 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2651 	pcicfgregs *cfg = &dinfo->cfg;
2652 	char tunable_name[64];
2653 	int irq;
2654 
2655 	/* Has to have an intpin to have an interrupt. */
2656 	if (cfg->intpin == 0)
2657 		return;
2658 
2659 	/* Let the user override the IRQ with a tunable. */
2660 	irq = PCI_INVALID_IRQ;
2661 	snprintf(tunable_name, sizeof(tunable_name),
2662 	    "hw.pci%d.%d.%d.INT%c.irq",
2663 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2664 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2665 		irq = PCI_INVALID_IRQ;
2666 
2667 	/*
2668 	 * If we didn't get an IRQ via the tunable, then we either use the
2669 	 * IRQ value in the intline register or we ask the bus to route an
2670 	 * interrupt for us.  If force_route is true, then we only use the
2671 	 * value in the intline register if the bus was unable to assign an
2672 	 * IRQ.
2673 	 */
2674 	if (!PCI_INTERRUPT_VALID(irq)) {
2675 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2676 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2677 		if (!PCI_INTERRUPT_VALID(irq))
2678 			irq = cfg->intline;
2679 	}
2680 
2681 	/* If after all that we don't have an IRQ, just bail. */
2682 	if (!PCI_INTERRUPT_VALID(irq))
2683 		return;
2684 
2685 	/* Update the config register if it changed. */
2686 	if (irq != cfg->intline) {
2687 		cfg->intline = irq;
2688 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2689 	}
2690 
2691 	/* Add this IRQ as rid 0 interrupt resource. */
2692 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2693 }
2694 
2695 /* Perform early OHCI takeover from SMM. */
2696 static void
2697 ohci_early_takeover(device_t self)
2698 {
2699 	struct resource *res;
2700 	uint32_t ctl;
2701 	int rid;
2702 	int i;
2703 
2704 	rid = PCIR_BAR(0);
2705 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2706 	if (res == NULL)
2707 		return;
2708 
2709 	ctl = bus_read_4(res, OHCI_CONTROL);
2710 	if (ctl & OHCI_IR) {
2711 		if (bootverbose)
2712 			printf("ohci early: "
2713 			    "SMM active, request owner change\n");
2714 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2715 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2716 			DELAY(1000);
2717 			ctl = bus_read_4(res, OHCI_CONTROL);
2718 		}
2719 		if (ctl & OHCI_IR) {
2720 			if (bootverbose)
2721 				printf("ohci early: "
2722 				    "SMM does not respond, resetting\n");
2723 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2724 		}
2725 		/* Disable interrupts */
2726 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2727 	}
2728 
2729 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2730 }
2731 
2732 /* Perform early UHCI takeover from SMM. */
2733 static void
2734 uhci_early_takeover(device_t self)
2735 {
2736 	struct resource *res;
2737 	int rid;
2738 
2739 	/*
2740 	 * Set the PIRQD enable bit and switch off all the others. We don't
2741 	 * want legacy support to interfere with us XXX Does this also mean
2742 	 * that the BIOS won't touch the keyboard anymore if it is connected
2743 	 * to the ports of the root hub?
2744 	 */
2745 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2746 
2747 	/* Disable interrupts */
2748 	rid = PCI_UHCI_BASE_REG;
2749 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2750 	if (res != NULL) {
2751 		bus_write_2(res, UHCI_INTR, 0);
2752 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2753 	}
2754 }
2755 
2756 /* Perform early EHCI takeover from SMM. */
2757 static void
2758 ehci_early_takeover(device_t self)
2759 {
2760 	struct resource *res;
2761 	uint32_t cparams;
2762 	uint32_t eec;
2763 	uint8_t eecp;
2764 	uint8_t bios_sem;
2765 	uint8_t offs;
2766 	int rid;
2767 	int i;
2768 
2769 	rid = PCIR_BAR(0);
2770 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2771 	if (res == NULL)
2772 		return;
2773 
2774 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2775 
2776 	/* Synchronise with the BIOS if it owns the controller. */
2777 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2778 	    eecp = EHCI_EECP_NEXT(eec)) {
2779 		eec = pci_read_config(self, eecp, 4);
2780 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2781 			continue;
2782 		}
2783 		bios_sem = pci_read_config(self, eecp +
2784 		    EHCI_LEGSUP_BIOS_SEM, 1);
2785 		if (bios_sem == 0) {
2786 			continue;
2787 		}
2788 		if (bootverbose)
2789 			printf("ehci early: "
2790 			    "SMM active, request owner change\n");
2791 
2792 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2793 
2794 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2795 			DELAY(1000);
2796 			bios_sem = pci_read_config(self, eecp +
2797 			    EHCI_LEGSUP_BIOS_SEM, 1);
2798 		}
2799 
2800 		if (bios_sem != 0) {
2801 			if (bootverbose)
2802 				printf("ehci early: "
2803 				    "SMM does not respond\n");
2804 		}
2805 		/* Disable interrupts */
2806 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
2807 		bus_write_4(res, offs + EHCI_USBINTR, 0);
2808 	}
2809 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2810 }
2811 
2812 void
2813 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2814 {
2815 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2816 	pcicfgregs *cfg = &dinfo->cfg;
2817 	struct resource_list *rl = &dinfo->resources;
2818 	struct pci_quirk *q;
2819 	int i;
2820 
2821 	/* ATA devices needs special map treatment */
2822 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2823 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2824 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2825 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2826 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2827 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
2828 	else
2829 		for (i = 0; i < cfg->nummaps;)
2830 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
2831 			    prefetchmask & (1 << i));
2832 
2833 	/*
2834 	 * Add additional, quirked resources.
2835 	 */
2836 	for (q = &pci_quirks[0]; q->devid; q++) {
2837 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2838 		    && q->type == PCI_QUIRK_MAP_REG)
2839 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
2840 	}
2841 
2842 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2843 #ifdef __PCI_REROUTE_INTERRUPT
2844 		/*
2845 		 * Try to re-route interrupts. Sometimes the BIOS or
2846 		 * firmware may leave bogus values in these registers.
2847 		 * If the re-route fails, then just stick with what we
2848 		 * have.
2849 		 */
2850 		pci_assign_interrupt(bus, dev, 1);
2851 #else
2852 		pci_assign_interrupt(bus, dev, 0);
2853 #endif
2854 	}
2855 
2856 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
2857 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
2858 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
2859 			ehci_early_takeover(dev);
2860 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
2861 			ohci_early_takeover(dev);
2862 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
2863 			uhci_early_takeover(dev);
2864 	}
2865 }
2866 
2867 void
2868 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2869 {
2870 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2871 	device_t pcib = device_get_parent(dev);
2872 	struct pci_devinfo *dinfo;
2873 	int maxslots;
2874 	int s, f, pcifunchigh;
2875 	uint8_t hdrtype;
2876 
2877 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2878 	    ("dinfo_size too small"));
2879 	maxslots = PCIB_MAXSLOTS(pcib);
2880 	for (s = 0; s <= maxslots; s++) {
2881 		pcifunchigh = 0;
2882 		f = 0;
2883 		DELAY(1);
2884 		hdrtype = REG(PCIR_HDRTYPE, 1);
2885 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2886 			continue;
2887 		if (hdrtype & PCIM_MFDEV)
2888 			pcifunchigh = PCI_FUNCMAX;
2889 		for (f = 0; f <= pcifunchigh; f++) {
2890 			dinfo = pci_read_device(pcib, domain, busno, s, f,
2891 			    dinfo_size);
2892 			if (dinfo != NULL) {
2893 				pci_add_child(dev, dinfo);
2894 			}
2895 		}
2896 	}
2897 #undef REG
2898 }
2899 
2900 void
2901 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2902 {
2903 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2904 	device_set_ivars(dinfo->cfg.dev, dinfo);
2905 	resource_list_init(&dinfo->resources);
2906 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2907 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2908 	pci_print_verbose(dinfo);
2909 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2910 }
2911 
2912 static int
2913 pci_probe(device_t dev)
2914 {
2915 
2916 	device_set_desc(dev, "PCI bus");
2917 
2918 	/* Allow other subclasses to override this driver. */
2919 	return (BUS_PROBE_GENERIC);
2920 }
2921 
2922 static int
2923 pci_attach(device_t dev)
2924 {
2925 	int busno, domain;
2926 
2927 	/*
2928 	 * Since there can be multiple independantly numbered PCI
2929 	 * busses on systems with multiple PCI domains, we can't use
2930 	 * the unit number to decide which bus we are probing. We ask
2931 	 * the parent pcib what our domain and bus numbers are.
2932 	 */
2933 	domain = pcib_get_domain(dev);
2934 	busno = pcib_get_bus(dev);
2935 	if (bootverbose)
2936 		device_printf(dev, "domain=%d, physical bus=%d\n",
2937 		    domain, busno);
2938 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2939 	return (bus_generic_attach(dev));
2940 }
2941 
2942 static void
2943 pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
2944     int state)
2945 {
2946 	device_t child, pcib;
2947 	struct pci_devinfo *dinfo;
2948 	int dstate, i;
2949 
2950 	/*
2951 	 * Set the device to the given state.  If the firmware suggests
2952 	 * a different power state, use it instead.  If power management
2953 	 * is not present, the firmware is responsible for managing
2954 	 * device power.  Skip children who aren't attached since they
2955 	 * are handled separately.
2956 	 */
2957 	pcib = device_get_parent(dev);
2958 	for (i = 0; i < numdevs; i++) {
2959 		child = devlist[i];
2960 		dinfo = device_get_ivars(child);
2961 		dstate = state;
2962 		if (device_is_attached(child) &&
2963 		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
2964 			pci_set_powerstate(child, dstate);
2965 	}
2966 }
2967 
2968 int
2969 pci_suspend(device_t dev)
2970 {
2971 	device_t child, *devlist;
2972 	struct pci_devinfo *dinfo;
2973 	int error, i, numdevs;
2974 
2975 	/*
2976 	 * Save the PCI configuration space for each child and set the
2977 	 * device in the appropriate power state for this sleep state.
2978 	 */
2979 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2980 		return (error);
2981 	for (i = 0; i < numdevs; i++) {
2982 		child = devlist[i];
2983 		dinfo = device_get_ivars(child);
2984 		pci_cfg_save(child, dinfo, 0);
2985 	}
2986 
2987 	/* Suspend devices before potentially powering them down. */
2988 	error = bus_generic_suspend(dev);
2989 	if (error) {
2990 		free(devlist, M_TEMP);
2991 		return (error);
2992 	}
2993 	if (pci_do_power_suspend)
2994 		pci_set_power_children(dev, devlist, numdevs,
2995 		    PCI_POWERSTATE_D3);
2996 	free(devlist, M_TEMP);
2997 	return (0);
2998 }
2999 
3000 int
3001 pci_resume(device_t dev)
3002 {
3003 	device_t child, *devlist;
3004 	struct pci_devinfo *dinfo;
3005 	int error, i, numdevs;
3006 
3007 	/*
3008 	 * Set each child to D0 and restore its PCI configuration space.
3009 	 */
3010 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3011 		return (error);
3012 	if (pci_do_power_resume)
3013 		pci_set_power_children(dev, devlist, numdevs,
3014 		    PCI_POWERSTATE_D0);
3015 
3016 	/* Now the device is powered up, restore its config space. */
3017 	for (i = 0; i < numdevs; i++) {
3018 		child = devlist[i];
3019 		dinfo = device_get_ivars(child);
3020 
3021 		pci_cfg_restore(child, dinfo);
3022 		if (!device_is_attached(child))
3023 			pci_cfg_save(child, dinfo, 1);
3024 	}
3025 
3026 	/*
3027 	 * Resume critical devices first, then everything else later.
3028 	 */
3029 	for (i = 0; i < numdevs; i++) {
3030 		child = devlist[i];
3031 		switch (pci_get_class(child)) {
3032 		case PCIC_DISPLAY:
3033 		case PCIC_MEMORY:
3034 		case PCIC_BRIDGE:
3035 		case PCIC_BASEPERIPH:
3036 			DEVICE_RESUME(child);
3037 			break;
3038 		}
3039 	}
3040 	for (i = 0; i < numdevs; i++) {
3041 		child = devlist[i];
3042 		switch (pci_get_class(child)) {
3043 		case PCIC_DISPLAY:
3044 		case PCIC_MEMORY:
3045 		case PCIC_BRIDGE:
3046 		case PCIC_BASEPERIPH:
3047 			break;
3048 		default:
3049 			DEVICE_RESUME(child);
3050 		}
3051 	}
3052 	free(devlist, M_TEMP);
3053 	return (0);
3054 }
3055 
3056 static void
3057 pci_load_vendor_data(void)
3058 {
3059 	caddr_t vendordata, info;
3060 
3061 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
3062 		info = preload_search_info(vendordata, MODINFO_ADDR);
3063 		pci_vendordata = *(char **)info;
3064 		info = preload_search_info(vendordata, MODINFO_SIZE);
3065 		pci_vendordata_size = *(size_t *)info;
3066 		/* terminate the database */
3067 		pci_vendordata[pci_vendordata_size] = '\n';
3068 	}
3069 }
3070 
3071 void
3072 pci_driver_added(device_t dev, driver_t *driver)
3073 {
3074 	int numdevs;
3075 	device_t *devlist;
3076 	device_t child;
3077 	struct pci_devinfo *dinfo;
3078 	int i;
3079 
3080 	if (bootverbose)
3081 		device_printf(dev, "driver added\n");
3082 	DEVICE_IDENTIFY(driver, dev);
3083 	if (device_get_children(dev, &devlist, &numdevs) != 0)
3084 		return;
3085 	for (i = 0; i < numdevs; i++) {
3086 		child = devlist[i];
3087 		if (device_get_state(child) != DS_NOTPRESENT)
3088 			continue;
3089 		dinfo = device_get_ivars(child);
3090 		pci_print_verbose(dinfo);
3091 		if (bootverbose)
3092 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3093 		pci_cfg_restore(child, dinfo);
3094 		if (device_probe_and_attach(child) != 0)
3095 			pci_cfg_save(child, dinfo, 1);
3096 	}
3097 	free(devlist, M_TEMP);
3098 }
3099 
3100 int
3101 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3102     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3103 {
3104 	struct pci_devinfo *dinfo;
3105 	struct msix_table_entry *mte;
3106 	struct msix_vector *mv;
3107 	uint64_t addr;
3108 	uint32_t data;
3109 	void *cookie;
3110 	int error, rid;
3111 
3112 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3113 	    arg, &cookie);
3114 	if (error)
3115 		return (error);
3116 
3117 	/* If this is not a direct child, just bail out. */
3118 	if (device_get_parent(child) != dev) {
3119 		*cookiep = cookie;
3120 		return(0);
3121 	}
3122 
3123 	rid = rman_get_rid(irq);
3124 	if (rid == 0) {
3125 		/* Make sure that INTx is enabled */
3126 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3127 	} else {
3128 		/*
3129 		 * Check to see if the interrupt is MSI or MSI-X.
3130 		 * Ask our parent to map the MSI and give
3131 		 * us the address and data register values.
3132 		 * If we fail for some reason, teardown the
3133 		 * interrupt handler.
3134 		 */
3135 		dinfo = device_get_ivars(child);
3136 		if (dinfo->cfg.msi.msi_alloc > 0) {
3137 			if (dinfo->cfg.msi.msi_addr == 0) {
3138 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3139 			    ("MSI has handlers, but vectors not mapped"));
3140 				error = PCIB_MAP_MSI(device_get_parent(dev),
3141 				    child, rman_get_start(irq), &addr, &data);
3142 				if (error)
3143 					goto bad;
3144 				dinfo->cfg.msi.msi_addr = addr;
3145 				dinfo->cfg.msi.msi_data = data;
3146 			}
3147 			if (dinfo->cfg.msi.msi_handlers == 0)
3148 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3149 				    dinfo->cfg.msi.msi_data);
3150 			dinfo->cfg.msi.msi_handlers++;
3151 		} else {
3152 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3153 			    ("No MSI or MSI-X interrupts allocated"));
3154 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3155 			    ("MSI-X index too high"));
3156 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3157 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3158 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3159 			KASSERT(mv->mv_irq == rman_get_start(irq),
3160 			    ("IRQ mismatch"));
3161 			if (mv->mv_address == 0) {
3162 				KASSERT(mte->mte_handlers == 0,
3163 		    ("MSI-X table entry has handlers, but vector not mapped"));
3164 				error = PCIB_MAP_MSI(device_get_parent(dev),
3165 				    child, rman_get_start(irq), &addr, &data);
3166 				if (error)
3167 					goto bad;
3168 				mv->mv_address = addr;
3169 				mv->mv_data = data;
3170 			}
3171 			if (mte->mte_handlers == 0) {
3172 				pci_enable_msix(child, rid - 1, mv->mv_address,
3173 				    mv->mv_data);
3174 				pci_unmask_msix(child, rid - 1);
3175 			}
3176 			mte->mte_handlers++;
3177 		}
3178 
3179 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3180 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3181 	bad:
3182 		if (error) {
3183 			(void)bus_generic_teardown_intr(dev, child, irq,
3184 			    cookie);
3185 			return (error);
3186 		}
3187 	}
3188 	*cookiep = cookie;
3189 	return (0);
3190 }
3191 
3192 int
3193 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3194     void *cookie)
3195 {
3196 	struct msix_table_entry *mte;
3197 	struct resource_list_entry *rle;
3198 	struct pci_devinfo *dinfo;
3199 	int error, rid;
3200 
3201 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3202 		return (EINVAL);
3203 
3204 	/* If this isn't a direct child, just bail out */
3205 	if (device_get_parent(child) != dev)
3206 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3207 
3208 	rid = rman_get_rid(irq);
3209 	if (rid == 0) {
3210 		/* Mask INTx */
3211 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3212 	} else {
3213 		/*
3214 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3215 		 * decrement the appropriate handlers count and mask the
3216 		 * MSI-X message, or disable MSI messages if the count
3217 		 * drops to 0.
3218 		 */
3219 		dinfo = device_get_ivars(child);
3220 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3221 		if (rle->res != irq)
3222 			return (EINVAL);
3223 		if (dinfo->cfg.msi.msi_alloc > 0) {
3224 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3225 			    ("MSI-X index too high"));
3226 			if (dinfo->cfg.msi.msi_handlers == 0)
3227 				return (EINVAL);
3228 			dinfo->cfg.msi.msi_handlers--;
3229 			if (dinfo->cfg.msi.msi_handlers == 0)
3230 				pci_disable_msi(child);
3231 		} else {
3232 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3233 			    ("No MSI or MSI-X interrupts allocated"));
3234 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3235 			    ("MSI-X index too high"));
3236 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3237 			if (mte->mte_handlers == 0)
3238 				return (EINVAL);
3239 			mte->mte_handlers--;
3240 			if (mte->mte_handlers == 0)
3241 				pci_mask_msix(child, rid - 1);
3242 		}
3243 	}
3244 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3245 	if (rid > 0)
3246 		KASSERT(error == 0,
3247 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3248 	return (error);
3249 }
3250 
3251 int
3252 pci_print_child(device_t dev, device_t child)
3253 {
3254 	struct pci_devinfo *dinfo;
3255 	struct resource_list *rl;
3256 	int retval = 0;
3257 
3258 	dinfo = device_get_ivars(child);
3259 	rl = &dinfo->resources;
3260 
3261 	retval += bus_print_child_header(dev, child);
3262 
3263 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3264 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3265 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3266 	if (device_get_flags(dev))
3267 		retval += printf(" flags %#x", device_get_flags(dev));
3268 
3269 	retval += printf(" at device %d.%d", pci_get_slot(child),
3270 	    pci_get_function(child));
3271 
3272 	retval += bus_print_child_footer(dev, child);
3273 
3274 	return (retval);
3275 }
3276 
3277 static struct
3278 {
3279 	int	class;
3280 	int	subclass;
3281 	char	*desc;
3282 } pci_nomatch_tab[] = {
3283 	{PCIC_OLD,		-1,			"old"},
3284 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3285 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3286 	{PCIC_STORAGE,		-1,			"mass storage"},
3287 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3288 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3289 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3290 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3291 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3292 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3293 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3294 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3295 	{PCIC_NETWORK,		-1,			"network"},
3296 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3297 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3298 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3299 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3300 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3301 	{PCIC_DISPLAY,		-1,			"display"},
3302 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3303 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3304 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3305 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3306 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3307 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3308 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3309 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3310 	{PCIC_MEMORY,		-1,			"memory"},
3311 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3312 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3313 	{PCIC_BRIDGE,		-1,			"bridge"},
3314 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3315 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3316 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3317 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3318 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3319 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3320 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3321 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3322 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3323 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3324 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3325 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3326 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3327 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3328 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3329 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3330 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3331 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3332 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3333 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3334 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3335 	{PCIC_INPUTDEV,		-1,			"input device"},
3336 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3337 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3338 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3339 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3340 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3341 	{PCIC_DOCKING,		-1,			"docking station"},
3342 	{PCIC_PROCESSOR,	-1,			"processor"},
3343 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3344 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3345 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3346 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3347 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3348 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3349 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3350 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3351 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3352 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3353 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3354 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3355 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3356 	{PCIC_SATCOM,		-1,			"satellite communication"},
3357 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3358 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3359 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3360 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3361 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3362 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3363 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3364 	{PCIC_DASP,		-1,			"dasp"},
3365 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3366 	{0, 0,		NULL}
3367 };
3368 
3369 void
3370 pci_probe_nomatch(device_t dev, device_t child)
3371 {
3372 	int	i;
3373 	char	*cp, *scp, *device;
3374 
3375 	/*
3376 	 * Look for a listing for this device in a loaded device database.
3377 	 */
3378 	if ((device = pci_describe_device(child)) != NULL) {
3379 		device_printf(dev, "<%s>", device);
3380 		free(device, M_DEVBUF);
3381 	} else {
3382 		/*
3383 		 * Scan the class/subclass descriptions for a general
3384 		 * description.
3385 		 */
3386 		cp = "unknown";
3387 		scp = NULL;
3388 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3389 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3390 				if (pci_nomatch_tab[i].subclass == -1) {
3391 					cp = pci_nomatch_tab[i].desc;
3392 				} else if (pci_nomatch_tab[i].subclass ==
3393 				    pci_get_subclass(child)) {
3394 					scp = pci_nomatch_tab[i].desc;
3395 				}
3396 			}
3397 		}
3398 		device_printf(dev, "<%s%s%s>",
3399 		    cp ? cp : "",
3400 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3401 		    scp ? scp : "");
3402 	}
3403 	printf(" at device %d.%d (no driver attached)\n",
3404 	    pci_get_slot(child), pci_get_function(child));
3405 	pci_cfg_save(child, device_get_ivars(child), 1);
3406 	return;
3407 }
3408 
3409 /*
3410  * Parse the PCI device database, if loaded, and return a pointer to a
3411  * description of the device.
3412  *
3413  * The database is flat text formatted as follows:
3414  *
3415  * Any line not in a valid format is ignored.
3416  * Lines are terminated with newline '\n' characters.
3417  *
3418  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3419  * the vendor name.
3420  *
3421  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3422  * - devices cannot be listed without a corresponding VENDOR line.
3423  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3424  * another TAB, then the device name.
3425  */
3426 
3427 /*
3428  * Assuming (ptr) points to the beginning of a line in the database,
3429  * return the vendor or device and description of the next entry.
3430  * The value of (vendor) or (device) inappropriate for the entry type
3431  * is set to -1.  Returns nonzero at the end of the database.
3432  *
3433  * Note that this is slightly unrobust in the face of corrupt data;
3434  * we attempt to safeguard against this by spamming the end of the
3435  * database with a newline when we initialise.
3436  */
3437 static int
3438 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3439 {
3440 	char	*cp = *ptr;
3441 	int	left;
3442 
3443 	*device = -1;
3444 	*vendor = -1;
3445 	**desc = '\0';
3446 	for (;;) {
3447 		left = pci_vendordata_size - (cp - pci_vendordata);
3448 		if (left <= 0) {
3449 			*ptr = cp;
3450 			return(1);
3451 		}
3452 
3453 		/* vendor entry? */
3454 		if (*cp != '\t' &&
3455 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3456 			break;
3457 		/* device entry? */
3458 		if (*cp == '\t' &&
3459 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3460 			break;
3461 
3462 		/* skip to next line */
3463 		while (*cp != '\n' && left > 0) {
3464 			cp++;
3465 			left--;
3466 		}
3467 		if (*cp == '\n') {
3468 			cp++;
3469 			left--;
3470 		}
3471 	}
3472 	/* skip to next line */
3473 	while (*cp != '\n' && left > 0) {
3474 		cp++;
3475 		left--;
3476 	}
3477 	if (*cp == '\n' && left > 0)
3478 		cp++;
3479 	*ptr = cp;
3480 	return(0);
3481 }
3482 
3483 static char *
3484 pci_describe_device(device_t dev)
3485 {
3486 	int	vendor, device;
3487 	char	*desc, *vp, *dp, *line;
3488 
3489 	desc = vp = dp = NULL;
3490 
3491 	/*
3492 	 * If we have no vendor data, we can't do anything.
3493 	 */
3494 	if (pci_vendordata == NULL)
3495 		goto out;
3496 
3497 	/*
3498 	 * Scan the vendor data looking for this device
3499 	 */
3500 	line = pci_vendordata;
3501 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3502 		goto out;
3503 	for (;;) {
3504 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3505 			goto out;
3506 		if (vendor == pci_get_vendor(dev))
3507 			break;
3508 	}
3509 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3510 		goto out;
3511 	for (;;) {
3512 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3513 			*dp = 0;
3514 			break;
3515 		}
3516 		if (vendor != -1) {
3517 			*dp = 0;
3518 			break;
3519 		}
3520 		if (device == pci_get_device(dev))
3521 			break;
3522 	}
3523 	if (dp[0] == '\0')
3524 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3525 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3526 	    NULL)
3527 		sprintf(desc, "%s, %s", vp, dp);
3528  out:
3529 	if (vp != NULL)
3530 		free(vp, M_DEVBUF);
3531 	if (dp != NULL)
3532 		free(dp, M_DEVBUF);
3533 	return(desc);
3534 }
3535 
3536 int
3537 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3538 {
3539 	struct pci_devinfo *dinfo;
3540 	pcicfgregs *cfg;
3541 
3542 	dinfo = device_get_ivars(child);
3543 	cfg = &dinfo->cfg;
3544 
3545 	switch (which) {
3546 	case PCI_IVAR_ETHADDR:
3547 		/*
3548 		 * The generic accessor doesn't deal with failure, so
3549 		 * we set the return value, then return an error.
3550 		 */
3551 		*((uint8_t **) result) = NULL;
3552 		return (EINVAL);
3553 	case PCI_IVAR_SUBVENDOR:
3554 		*result = cfg->subvendor;
3555 		break;
3556 	case PCI_IVAR_SUBDEVICE:
3557 		*result = cfg->subdevice;
3558 		break;
3559 	case PCI_IVAR_VENDOR:
3560 		*result = cfg->vendor;
3561 		break;
3562 	case PCI_IVAR_DEVICE:
3563 		*result = cfg->device;
3564 		break;
3565 	case PCI_IVAR_DEVID:
3566 		*result = (cfg->device << 16) | cfg->vendor;
3567 		break;
3568 	case PCI_IVAR_CLASS:
3569 		*result = cfg->baseclass;
3570 		break;
3571 	case PCI_IVAR_SUBCLASS:
3572 		*result = cfg->subclass;
3573 		break;
3574 	case PCI_IVAR_PROGIF:
3575 		*result = cfg->progif;
3576 		break;
3577 	case PCI_IVAR_REVID:
3578 		*result = cfg->revid;
3579 		break;
3580 	case PCI_IVAR_INTPIN:
3581 		*result = cfg->intpin;
3582 		break;
3583 	case PCI_IVAR_IRQ:
3584 		*result = cfg->intline;
3585 		break;
3586 	case PCI_IVAR_DOMAIN:
3587 		*result = cfg->domain;
3588 		break;
3589 	case PCI_IVAR_BUS:
3590 		*result = cfg->bus;
3591 		break;
3592 	case PCI_IVAR_SLOT:
3593 		*result = cfg->slot;
3594 		break;
3595 	case PCI_IVAR_FUNCTION:
3596 		*result = cfg->func;
3597 		break;
3598 	case PCI_IVAR_CMDREG:
3599 		*result = cfg->cmdreg;
3600 		break;
3601 	case PCI_IVAR_CACHELNSZ:
3602 		*result = cfg->cachelnsz;
3603 		break;
3604 	case PCI_IVAR_MINGNT:
3605 		*result = cfg->mingnt;
3606 		break;
3607 	case PCI_IVAR_MAXLAT:
3608 		*result = cfg->maxlat;
3609 		break;
3610 	case PCI_IVAR_LATTIMER:
3611 		*result = cfg->lattimer;
3612 		break;
3613 	default:
3614 		return (ENOENT);
3615 	}
3616 	return (0);
3617 }
3618 
3619 int
3620 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3621 {
3622 	struct pci_devinfo *dinfo;
3623 
3624 	dinfo = device_get_ivars(child);
3625 
3626 	switch (which) {
3627 	case PCI_IVAR_INTPIN:
3628 		dinfo->cfg.intpin = value;
3629 		return (0);
3630 	case PCI_IVAR_ETHADDR:
3631 	case PCI_IVAR_SUBVENDOR:
3632 	case PCI_IVAR_SUBDEVICE:
3633 	case PCI_IVAR_VENDOR:
3634 	case PCI_IVAR_DEVICE:
3635 	case PCI_IVAR_DEVID:
3636 	case PCI_IVAR_CLASS:
3637 	case PCI_IVAR_SUBCLASS:
3638 	case PCI_IVAR_PROGIF:
3639 	case PCI_IVAR_REVID:
3640 	case PCI_IVAR_IRQ:
3641 	case PCI_IVAR_DOMAIN:
3642 	case PCI_IVAR_BUS:
3643 	case PCI_IVAR_SLOT:
3644 	case PCI_IVAR_FUNCTION:
3645 		return (EINVAL);	/* disallow for now */
3646 
3647 	default:
3648 		return (ENOENT);
3649 	}
3650 }
3651 
3652 
3653 #include "opt_ddb.h"
3654 #ifdef DDB
3655 #include <ddb/ddb.h>
3656 #include <sys/cons.h>
3657 
3658 /*
3659  * List resources based on pci map registers, used for within ddb
3660  */
3661 
3662 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3663 {
3664 	struct pci_devinfo *dinfo;
3665 	struct devlist *devlist_head;
3666 	struct pci_conf *p;
3667 	const char *name;
3668 	int i, error, none_count;
3669 
3670 	none_count = 0;
3671 	/* get the head of the device queue */
3672 	devlist_head = &pci_devq;
3673 
3674 	/*
3675 	 * Go through the list of devices and print out devices
3676 	 */
3677 	for (error = 0, i = 0,
3678 	     dinfo = STAILQ_FIRST(devlist_head);
3679 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3680 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3681 
3682 		/* Populate pd_name and pd_unit */
3683 		name = NULL;
3684 		if (dinfo->cfg.dev)
3685 			name = device_get_name(dinfo->cfg.dev);
3686 
3687 		p = &dinfo->conf;
3688 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3689 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3690 			(name && *name) ? name : "none",
3691 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3692 			none_count++,
3693 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3694 			p->pc_sel.pc_func, (p->pc_class << 16) |
3695 			(p->pc_subclass << 8) | p->pc_progif,
3696 			(p->pc_subdevice << 16) | p->pc_subvendor,
3697 			(p->pc_device << 16) | p->pc_vendor,
3698 			p->pc_revid, p->pc_hdr);
3699 	}
3700 }
3701 #endif /* DDB */
3702 
3703 static struct resource *
3704 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
3705     u_long start, u_long end, u_long count, u_int flags)
3706 {
3707 	struct pci_devinfo *dinfo = device_get_ivars(child);
3708 	struct resource_list *rl = &dinfo->resources;
3709 	struct resource_list_entry *rle;
3710 	struct resource *res;
3711 	pci_addr_t map, testval;
3712 	int mapsize;
3713 
3714 	/*
3715 	 * Weed out the bogons, and figure out how large the BAR/map
3716 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3717 	 * Note: atapci in legacy mode are special and handled elsewhere
3718 	 * in the code.  If you have a atapci device in legacy mode and
3719 	 * it fails here, that other code is broken.
3720 	 */
3721 	res = NULL;
3722 	pci_read_bar(child, *rid, &map, &testval);
3723 
3724 	/*
3725 	 * Determine the size of the BAR and ignore BARs with a size
3726 	 * of 0.  Device ROM BARs use a different mask value.
3727 	 */
3728 	if (*rid == PCIR_BIOS)
3729 		mapsize = pci_romsize(testval);
3730 	else
3731 		mapsize = pci_mapsize(testval);
3732 	if (mapsize == 0)
3733 		goto out;
3734 
3735 	if (PCI_BAR_MEM(testval) || *rid == PCIR_BIOS) {
3736 		if (type != SYS_RES_MEMORY) {
3737 			if (bootverbose)
3738 				device_printf(dev,
3739 				    "child %s requested type %d for rid %#x,"
3740 				    " but the BAR says it is an memio\n",
3741 				    device_get_nameunit(child), type, *rid);
3742 			goto out;
3743 		}
3744 	} else {
3745 		if (type != SYS_RES_IOPORT) {
3746 			if (bootverbose)
3747 				device_printf(dev,
3748 				    "child %s requested type %d for rid %#x,"
3749 				    " but the BAR says it is an ioport\n",
3750 				    device_get_nameunit(child), type, *rid);
3751 			goto out;
3752 		}
3753 	}
3754 
3755 	/*
3756 	 * For real BARs, we need to override the size that
3757 	 * the driver requests, because that's what the BAR
3758 	 * actually uses and we would otherwise have a
3759 	 * situation where we might allocate the excess to
3760 	 * another driver, which won't work.
3761 	 */
3762 	count = 1UL << mapsize;
3763 	if (RF_ALIGNMENT(flags) < mapsize)
3764 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3765 	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3766 		flags |= RF_PREFETCHABLE;
3767 
3768 	/*
3769 	 * Allocate enough resource, and then write back the
3770 	 * appropriate bar for that resource.
3771 	 */
3772 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3773 	    start, end, count, flags & ~RF_ACTIVE);
3774 	if (res == NULL) {
3775 		device_printf(child,
3776 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3777 		    count, *rid, type, start, end);
3778 		goto out;
3779 	}
3780 	resource_list_add(rl, type, *rid, start, end, count);
3781 	rle = resource_list_find(rl, type, *rid);
3782 	if (rle == NULL)
3783 		panic("pci_reserve_map: unexpectedly can't find resource.");
3784 	rle->res = res;
3785 	rle->start = rman_get_start(res);
3786 	rle->end = rman_get_end(res);
3787 	rle->count = count;
3788 	rle->flags = RLE_RESERVED;
3789 	if (bootverbose)
3790 		device_printf(child,
3791 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3792 		    count, *rid, type, rman_get_start(res));
3793 	map = rman_get_start(res);
3794 	pci_write_bar(child, *rid, map);
3795 out:;
3796 	return (res);
3797 }
3798 
3799 
3800 struct resource *
3801 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3802 		   u_long start, u_long end, u_long count, u_int flags)
3803 {
3804 	struct pci_devinfo *dinfo = device_get_ivars(child);
3805 	struct resource_list *rl = &dinfo->resources;
3806 	struct resource_list_entry *rle;
3807 	struct resource *res;
3808 	pcicfgregs *cfg = &dinfo->cfg;
3809 
3810 	if (device_get_parent(child) != dev)
3811 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3812 		    type, rid, start, end, count, flags));
3813 
3814 	/*
3815 	 * Perform lazy resource allocation
3816 	 */
3817 	switch (type) {
3818 	case SYS_RES_IRQ:
3819 		/*
3820 		 * Can't alloc legacy interrupt once MSI messages have
3821 		 * been allocated.
3822 		 */
3823 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3824 		    cfg->msix.msix_alloc > 0))
3825 			return (NULL);
3826 
3827 		/*
3828 		 * If the child device doesn't have an interrupt
3829 		 * routed and is deserving of an interrupt, try to
3830 		 * assign it one.
3831 		 */
3832 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3833 		    (cfg->intpin != 0))
3834 			pci_assign_interrupt(dev, child, 0);
3835 		break;
3836 	case SYS_RES_IOPORT:
3837 	case SYS_RES_MEMORY:
3838 		/* Reserve resources for this BAR if needed. */
3839 		rle = resource_list_find(rl, type, *rid);
3840 		if (rle == NULL) {
3841 			res = pci_reserve_map(dev, child, type, rid, start, end,
3842 			    count, flags);
3843 			if (res == NULL)
3844 				return (NULL);
3845 		}
3846 	}
3847 	return (resource_list_alloc(rl, dev, child, type, rid,
3848 	    start, end, count, flags));
3849 }
3850 
3851 int
3852 pci_activate_resource(device_t dev, device_t child, int type, int rid,
3853     struct resource *r)
3854 {
3855 	int error;
3856 
3857 	error = bus_generic_activate_resource(dev, child, type, rid, r);
3858 	if (error)
3859 		return (error);
3860 
3861 	/* Enable decoding in the command register when activating BARs. */
3862 	if (device_get_parent(child) == dev) {
3863 		/* Device ROMs need their decoding explicitly enabled. */
3864 		if (rid == PCIR_BIOS)
3865 			pci_write_config(child, rid, rman_get_start(r) |
3866 			    PCIM_BIOS_ENABLE, 4);
3867 		switch (type) {
3868 		case SYS_RES_IOPORT:
3869 		case SYS_RES_MEMORY:
3870 			error = PCI_ENABLE_IO(dev, child, type);
3871 			break;
3872 		}
3873 	}
3874 	return (error);
3875 }
3876 
3877 int
3878 pci_deactivate_resource(device_t dev, device_t child, int type,
3879     int rid, struct resource *r)
3880 {
3881 	int error;
3882 
3883 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
3884 	if (error)
3885 		return (error);
3886 
3887 	/* Disable decoding for device ROMs. */
3888 	if (rid == PCIR_BIOS)
3889 		pci_write_config(child, rid, rman_get_start(r), 4);
3890 	return (0);
3891 }
3892 
3893 void
3894 pci_delete_child(device_t dev, device_t child)
3895 {
3896 	struct resource_list_entry *rle;
3897 	struct resource_list *rl;
3898 	struct pci_devinfo *dinfo;
3899 
3900 	dinfo = device_get_ivars(child);
3901 	rl = &dinfo->resources;
3902 
3903 	if (device_is_attached(child))
3904 		device_detach(child);
3905 
3906 	/* Turn off access to resources we're about to free */
3907 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
3908 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
3909 
3910 	/* Free all allocated resources */
3911 	STAILQ_FOREACH(rle, rl, link) {
3912 		if (rle->res) {
3913 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
3914 			    resource_list_busy(rl, rle->type, rle->rid)) {
3915 				pci_printf(&dinfo->cfg,
3916 				    "Resource still owned, oops. "
3917 				    "(type=%d, rid=%d, addr=%lx)\n",
3918 				    rle->type, rle->rid,
3919 				    rman_get_start(rle->res));
3920 				bus_release_resource(child, rle->type, rle->rid,
3921 				    rle->res);
3922 			}
3923 			resource_list_unreserve(rl, dev, child, rle->type,
3924 			    rle->rid);
3925 		}
3926 	}
3927 	resource_list_free(rl);
3928 
3929 	device_delete_child(dev, child);
3930 	pci_freecfg(dinfo);
3931 }
3932 
3933 void
3934 pci_delete_resource(device_t dev, device_t child, int type, int rid)
3935 {
3936 	struct pci_devinfo *dinfo;
3937 	struct resource_list *rl;
3938 	struct resource_list_entry *rle;
3939 
3940 	if (device_get_parent(child) != dev)
3941 		return;
3942 
3943 	dinfo = device_get_ivars(child);
3944 	rl = &dinfo->resources;
3945 	rle = resource_list_find(rl, type, rid);
3946 	if (rle == NULL)
3947 		return;
3948 
3949 	if (rle->res) {
3950 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
3951 		    resource_list_busy(rl, type, rid)) {
3952 			device_printf(dev, "delete_resource: "
3953 			    "Resource still owned by child, oops. "
3954 			    "(type=%d, rid=%d, addr=%lx)\n",
3955 			    type, rid, rman_get_start(rle->res));
3956 			return;
3957 		}
3958 
3959 #ifndef __PCI_BAR_ZERO_VALID
3960 		/*
3961 		 * If this is a BAR, clear the BAR so it stops
3962 		 * decoding before releasing the resource.
3963 		 */
3964 		switch (type) {
3965 		case SYS_RES_IOPORT:
3966 		case SYS_RES_MEMORY:
3967 			pci_write_bar(child, rid, 0);
3968 			break;
3969 		}
3970 #endif
3971 		resource_list_unreserve(rl, dev, child, type, rid);
3972 	}
3973 	resource_list_delete(rl, type, rid);
3974 }
3975 
3976 struct resource_list *
3977 pci_get_resource_list (device_t dev, device_t child)
3978 {
3979 	struct pci_devinfo *dinfo = device_get_ivars(child);
3980 
3981 	return (&dinfo->resources);
3982 }
3983 
3984 uint32_t
3985 pci_read_config_method(device_t dev, device_t child, int reg, int width)
3986 {
3987 	struct pci_devinfo *dinfo = device_get_ivars(child);
3988 	pcicfgregs *cfg = &dinfo->cfg;
3989 
3990 	return (PCIB_READ_CONFIG(device_get_parent(dev),
3991 	    cfg->bus, cfg->slot, cfg->func, reg, width));
3992 }
3993 
3994 void
3995 pci_write_config_method(device_t dev, device_t child, int reg,
3996     uint32_t val, int width)
3997 {
3998 	struct pci_devinfo *dinfo = device_get_ivars(child);
3999 	pcicfgregs *cfg = &dinfo->cfg;
4000 
4001 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4002 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4003 }
4004 
4005 int
4006 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4007     size_t buflen)
4008 {
4009 
4010 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4011 	    pci_get_function(child));
4012 	return (0);
4013 }
4014 
4015 int
4016 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4017     size_t buflen)
4018 {
4019 	struct pci_devinfo *dinfo;
4020 	pcicfgregs *cfg;
4021 
4022 	dinfo = device_get_ivars(child);
4023 	cfg = &dinfo->cfg;
4024 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4025 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4026 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4027 	    cfg->progif);
4028 	return (0);
4029 }
4030 
4031 int
4032 pci_assign_interrupt_method(device_t dev, device_t child)
4033 {
4034 	struct pci_devinfo *dinfo = device_get_ivars(child);
4035 	pcicfgregs *cfg = &dinfo->cfg;
4036 
4037 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4038 	    cfg->intpin));
4039 }
4040 
4041 static int
4042 pci_modevent(module_t mod, int what, void *arg)
4043 {
4044 	static struct cdev *pci_cdev;
4045 
4046 	switch (what) {
4047 	case MOD_LOAD:
4048 		STAILQ_INIT(&pci_devq);
4049 		pci_generation = 0;
4050 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4051 		    "pci");
4052 		pci_load_vendor_data();
4053 		break;
4054 
4055 	case MOD_UNLOAD:
4056 		destroy_dev(pci_cdev);
4057 		break;
4058 	}
4059 
4060 	return (0);
4061 }
4062 
4063 void
4064 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4065 {
4066 	int i;
4067 
4068 	/*
4069 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4070 	 * which we know need special treatment.  Type 2 devices are
4071 	 * cardbus bridges which also require special treatment.
4072 	 * Other types are unknown, and we err on the side of safety
4073 	 * by ignoring them.
4074 	 */
4075 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4076 		return;
4077 
4078 	/*
4079 	 * Restore the device to full power mode.  We must do this
4080 	 * before we restore the registers because moving from D3 to
4081 	 * D0 will cause the chip's BARs and some other registers to
4082 	 * be reset to some unknown power on reset values.  Cut down
4083 	 * the noise on boot by doing nothing if we are already in
4084 	 * state D0.
4085 	 */
4086 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4087 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4088 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4089 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
4090 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
4091 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4092 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4093 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4094 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4095 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4096 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4097 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4098 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4099 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4100 
4101 	/* Restore MSI and MSI-X configurations if they are present. */
4102 	if (dinfo->cfg.msi.msi_location != 0)
4103 		pci_resume_msi(dev);
4104 	if (dinfo->cfg.msix.msix_location != 0)
4105 		pci_resume_msix(dev);
4106 }
4107 
4108 void
4109 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4110 {
4111 	int i;
4112 	uint32_t cls;
4113 	int ps;
4114 
4115 	/*
4116 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4117 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4118 	 * which also require special treatment.  Other types are unknown, and
4119 	 * we err on the side of safety by ignoring them.  Powering down
4120 	 * bridges should not be undertaken lightly.
4121 	 */
4122 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4123 		return;
4124 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4125 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
4126 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
4127 
4128 	/*
4129 	 * Some drivers apparently write to these registers w/o updating our
4130 	 * cached copy.  No harm happens if we update the copy, so do so here
4131 	 * so we can restore them.  The COMMAND register is modified by the
4132 	 * bus w/o updating the cache.  This should represent the normally
4133 	 * writable portion of the 'defined' part of type 0 headers.  In
4134 	 * theory we also need to save/restore the PCI capability structures
4135 	 * we know about, but apart from power we don't know any that are
4136 	 * writable.
4137 	 */
4138 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4139 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4140 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4141 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4142 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4143 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4144 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4145 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4146 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4147 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4148 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4149 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4150 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4151 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4152 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4153 
4154 	/*
4155 	 * don't set the state for display devices, base peripherals and
4156 	 * memory devices since bad things happen when they are powered down.
4157 	 * We should (a) have drivers that can easily detach and (b) use
4158 	 * generic drivers for these devices so that some device actually
4159 	 * attaches.  We need to make sure that when we implement (a) we don't
4160 	 * power the device down on a reattach.
4161 	 */
4162 	cls = pci_get_class(dev);
4163 	if (!setstate)
4164 		return;
4165 	switch (pci_do_power_nodriver)
4166 	{
4167 		case 0:		/* NO powerdown at all */
4168 			return;
4169 		case 1:		/* Conservative about what to power down */
4170 			if (cls == PCIC_STORAGE)
4171 				return;
4172 			/*FALLTHROUGH*/
4173 		case 2:		/* Agressive about what to power down */
4174 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4175 			    cls == PCIC_BASEPERIPH)
4176 				return;
4177 			/*FALLTHROUGH*/
4178 		case 3:		/* Power down everything */
4179 			break;
4180 	}
4181 	/*
4182 	 * PCI spec says we can only go into D3 state from D0 state.
4183 	 * Transition from D[12] into D0 before going to D3 state.
4184 	 */
4185 	ps = pci_get_powerstate(dev);
4186 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4187 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4188 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4189 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4190 }
4191