xref: /freebsd/sys/dev/pci/pci.c (revision 32ba16b6e6dbfa5e4f536695191a8816bd6a8765)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 
56 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57 #include <machine/intr_machdep.h>
58 #endif
59 
60 #include <sys/pciio.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/pci/pci_private.h>
64 
65 #include <dev/usb/controller/ehcireg.h>
66 #include <dev/usb/controller/ohcireg.h>
67 #include <dev/usb/controller/uhcireg.h>
68 
69 #include "pcib_if.h"
70 #include "pci_if.h"
71 
72 #ifdef __HAVE_ACPI
73 #include <contrib/dev/acpica/include/acpi.h>
74 #include "acpi_if.h"
75 #else
76 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
77 #endif
78 
79 static pci_addr_t	pci_mapbase(uint64_t mapreg);
80 static const char	*pci_maptype(uint64_t mapreg);
81 static int		pci_mapsize(uint64_t testval);
82 static int		pci_maprange(uint64_t mapreg);
83 static pci_addr_t	pci_rombase(uint64_t mapreg);
84 static int		pci_romsize(uint64_t testval);
85 static void		pci_fixancient(pcicfgregs *cfg);
86 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
87 
88 static int		pci_porten(device_t dev);
89 static int		pci_memen(device_t dev);
90 static void		pci_assign_interrupt(device_t bus, device_t dev,
91 			    int force_route);
92 static int		pci_add_map(device_t bus, device_t dev, int reg,
93 			    struct resource_list *rl, int force, int prefetch);
94 static int		pci_probe(device_t dev);
95 static int		pci_attach(device_t dev);
96 static void		pci_load_vendor_data(void);
97 static int		pci_describe_parse_line(char **ptr, int *vendor,
98 			    int *device, char **desc);
99 static char		*pci_describe_device(device_t dev);
100 static int		pci_modevent(module_t mod, int what, void *arg);
101 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
102 			    pcicfgregs *cfg);
103 static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
104 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
105 			    int reg, uint32_t *data);
106 #if 0
107 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
108 			    int reg, uint32_t data);
109 #endif
110 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
111 static void		pci_disable_msi(device_t dev);
112 static void		pci_enable_msi(device_t dev, uint64_t address,
113 			    uint16_t data);
114 static void		pci_enable_msix(device_t dev, u_int index,
115 			    uint64_t address, uint32_t data);
116 static void		pci_mask_msix(device_t dev, u_int index);
117 static void		pci_unmask_msix(device_t dev, u_int index);
118 static int		pci_msi_blacklisted(void);
119 static void		pci_resume_msi(device_t dev);
120 static void		pci_resume_msix(device_t dev);
121 static int		pci_remap_intr_method(device_t bus, device_t dev,
122 			    u_int irq);
123 
124 static device_method_t pci_methods[] = {
125 	/* Device interface */
126 	DEVMETHOD(device_probe,		pci_probe),
127 	DEVMETHOD(device_attach,	pci_attach),
128 	DEVMETHOD(device_detach,	bus_generic_detach),
129 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
130 	DEVMETHOD(device_suspend,	pci_suspend),
131 	DEVMETHOD(device_resume,	pci_resume),
132 
133 	/* Bus interface */
134 	DEVMETHOD(bus_print_child,	pci_print_child),
135 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
136 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
137 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
138 	DEVMETHOD(bus_driver_added,	pci_driver_added),
139 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
140 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
141 
142 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
143 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
144 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
145 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
146 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
147 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
148 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
149 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
150 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
151 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
152 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
153 
154 	/* PCI interface */
155 	DEVMETHOD(pci_read_config,	pci_read_config_method),
156 	DEVMETHOD(pci_write_config,	pci_write_config_method),
157 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
158 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
159 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
160 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
161 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
162 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
163 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
164 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
165 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
166 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
167 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
168 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
169 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
170 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
171 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
172 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
173 
174 	{ 0, 0 }
175 };
176 
177 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
178 
179 static devclass_t pci_devclass;
180 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
181 MODULE_VERSION(pci, 1);
182 
183 static char	*pci_vendordata;
184 static size_t	pci_vendordata_size;
185 
186 
187 struct pci_quirk {
188 	uint32_t devid;	/* Vendor/device of the card */
189 	int	type;
190 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
191 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
192 	int	arg1;
193 	int	arg2;
194 };
195 
196 struct pci_quirk pci_quirks[] = {
197 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
198 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
199 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
200 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
201 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
202 
203 	/*
204 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
205 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
206 	 */
207 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209 
210 	/*
211 	 * MSI doesn't work on earlier Intel chipsets including
212 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
213 	 */
214 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
215 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
216 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
218 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
219 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
220 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
221 
222 	/*
223 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
224 	 * bridge.
225 	 */
226 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
227 
228 	{ 0 }
229 };
230 
231 /* map register information */
232 #define	PCI_MAPMEM	0x01	/* memory map */
233 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
234 #define	PCI_MAPPORT	0x04	/* port map */
235 
236 struct devlist pci_devq;
237 uint32_t pci_generation;
238 uint32_t pci_numdevs = 0;
239 static int pcie_chipset, pcix_chipset;
240 
241 /* sysctl vars */
242 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
243 
244 static int pci_enable_io_modes = 1;
245 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
246 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
247     &pci_enable_io_modes, 1,
248     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
249 enable these bits correctly.  We'd like to do this all the time, but there\n\
250 are some peripherals that this causes problems with.");
251 
252 static int pci_do_power_nodriver = 0;
253 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
254 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
255     &pci_do_power_nodriver, 0,
256   "Place a function into D3 state when no driver attaches to it.  0 means\n\
257 disable.  1 means conservatively place devices into D3 state.  2 means\n\
258 agressively place devices into D3 state.  3 means put absolutely everything\n\
259 in D3 state.");
260 
261 int pci_do_power_resume = 1;
262 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
263 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
264     &pci_do_power_resume, 1,
265   "Transition from D3 -> D0 on resume.");
266 
267 static int pci_do_msi = 1;
268 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
269 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
270     "Enable support for MSI interrupts");
271 
272 static int pci_do_msix = 1;
273 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
274 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
275     "Enable support for MSI-X interrupts");
276 
277 static int pci_honor_msi_blacklist = 1;
278 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
279 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
280     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
281 
282 #if defined(__i386__) || defined(__amd64__)
283 static int pci_usb_takeover = 1;
284 #else
285 static int pci_usb_takeover = 0;
286 #endif
287 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
288 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RD | CTLFLAG_TUN,
289     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
290 Disable this if you depend on BIOS emulation of USB devices, that is\n\
291 you use USB devices (like keyboard or mouse) but do not load USB drivers");
292 
293 /* Find a device_t by bus/slot/function in domain 0 */
294 
295 device_t
296 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
297 {
298 
299 	return (pci_find_dbsf(0, bus, slot, func));
300 }
301 
302 /* Find a device_t by domain/bus/slot/function */
303 
304 device_t
305 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
306 {
307 	struct pci_devinfo *dinfo;
308 
309 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
310 		if ((dinfo->cfg.domain == domain) &&
311 		    (dinfo->cfg.bus == bus) &&
312 		    (dinfo->cfg.slot == slot) &&
313 		    (dinfo->cfg.func == func)) {
314 			return (dinfo->cfg.dev);
315 		}
316 	}
317 
318 	return (NULL);
319 }
320 
321 /* Find a device_t by vendor/device ID */
322 
323 device_t
324 pci_find_device(uint16_t vendor, uint16_t device)
325 {
326 	struct pci_devinfo *dinfo;
327 
328 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
329 		if ((dinfo->cfg.vendor == vendor) &&
330 		    (dinfo->cfg.device == device)) {
331 			return (dinfo->cfg.dev);
332 		}
333 	}
334 
335 	return (NULL);
336 }
337 
338 static int
339 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
340 {
341 	va_list ap;
342 	int retval;
343 
344 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
345 	    cfg->func);
346 	va_start(ap, fmt);
347 	retval += vprintf(fmt, ap);
348 	va_end(ap);
349 	return (retval);
350 }
351 
352 /* return base address of memory or port map */
353 
354 static pci_addr_t
355 pci_mapbase(uint64_t mapreg)
356 {
357 
358 	if (PCI_BAR_MEM(mapreg))
359 		return (mapreg & PCIM_BAR_MEM_BASE);
360 	else
361 		return (mapreg & PCIM_BAR_IO_BASE);
362 }
363 
364 /* return map type of memory or port map */
365 
366 static const char *
367 pci_maptype(uint64_t mapreg)
368 {
369 
370 	if (PCI_BAR_IO(mapreg))
371 		return ("I/O Port");
372 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
373 		return ("Prefetchable Memory");
374 	return ("Memory");
375 }
376 
377 /* return log2 of map size decoded for memory or port map */
378 
379 static int
380 pci_mapsize(uint64_t testval)
381 {
382 	int ln2size;
383 
384 	testval = pci_mapbase(testval);
385 	ln2size = 0;
386 	if (testval != 0) {
387 		while ((testval & 1) == 0)
388 		{
389 			ln2size++;
390 			testval >>= 1;
391 		}
392 	}
393 	return (ln2size);
394 }
395 
396 /* return base address of device ROM */
397 
398 static pci_addr_t
399 pci_rombase(uint64_t mapreg)
400 {
401 
402 	return (mapreg & PCIM_BIOS_ADDR_MASK);
403 }
404 
405 /* return log2 of map size decided for device ROM */
406 
407 static int
408 pci_romsize(uint64_t testval)
409 {
410 	int ln2size;
411 
412 	testval = pci_rombase(testval);
413 	ln2size = 0;
414 	if (testval != 0) {
415 		while ((testval & 1) == 0)
416 		{
417 			ln2size++;
418 			testval >>= 1;
419 		}
420 	}
421 	return (ln2size);
422 }
423 
424 /* return log2 of address range supported by map register */
425 
426 static int
427 pci_maprange(uint64_t mapreg)
428 {
429 	int ln2range = 0;
430 
431 	if (PCI_BAR_IO(mapreg))
432 		ln2range = 32;
433 	else
434 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
435 		case PCIM_BAR_MEM_32:
436 			ln2range = 32;
437 			break;
438 		case PCIM_BAR_MEM_1MB:
439 			ln2range = 20;
440 			break;
441 		case PCIM_BAR_MEM_64:
442 			ln2range = 64;
443 			break;
444 		}
445 	return (ln2range);
446 }
447 
448 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
449 
450 static void
451 pci_fixancient(pcicfgregs *cfg)
452 {
453 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
454 		return;
455 
456 	/* PCI to PCI bridges use header type 1 */
457 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
458 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
459 }
460 
461 /* extract header type specific config data */
462 
463 static void
464 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
465 {
466 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
467 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
468 	case PCIM_HDRTYPE_NORMAL:
469 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
470 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
471 		cfg->nummaps	    = PCI_MAXMAPS_0;
472 		break;
473 	case PCIM_HDRTYPE_BRIDGE:
474 		cfg->nummaps	    = PCI_MAXMAPS_1;
475 		break;
476 	case PCIM_HDRTYPE_CARDBUS:
477 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
478 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
479 		cfg->nummaps	    = PCI_MAXMAPS_2;
480 		break;
481 	}
482 #undef REG
483 }
484 
485 /* read configuration header into pcicfgregs structure */
486 struct pci_devinfo *
487 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
488 {
489 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
490 	pcicfgregs *cfg = NULL;
491 	struct pci_devinfo *devlist_entry;
492 	struct devlist *devlist_head;
493 
494 	devlist_head = &pci_devq;
495 
496 	devlist_entry = NULL;
497 
498 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
499 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
500 		if (devlist_entry == NULL)
501 			return (NULL);
502 
503 		cfg = &devlist_entry->cfg;
504 
505 		cfg->domain		= d;
506 		cfg->bus		= b;
507 		cfg->slot		= s;
508 		cfg->func		= f;
509 		cfg->vendor		= REG(PCIR_VENDOR, 2);
510 		cfg->device		= REG(PCIR_DEVICE, 2);
511 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
512 		cfg->statreg		= REG(PCIR_STATUS, 2);
513 		cfg->baseclass		= REG(PCIR_CLASS, 1);
514 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
515 		cfg->progif		= REG(PCIR_PROGIF, 1);
516 		cfg->revid		= REG(PCIR_REVID, 1);
517 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
518 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
519 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
520 		cfg->intpin		= REG(PCIR_INTPIN, 1);
521 		cfg->intline		= REG(PCIR_INTLINE, 1);
522 
523 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
524 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
525 
526 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
527 		cfg->hdrtype		&= ~PCIM_MFDEV;
528 
529 		pci_fixancient(cfg);
530 		pci_hdrtypedata(pcib, b, s, f, cfg);
531 
532 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
533 			pci_read_extcap(pcib, cfg);
534 
535 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
536 
537 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
538 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
539 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
540 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
541 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
542 
543 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
544 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
545 		devlist_entry->conf.pc_vendor = cfg->vendor;
546 		devlist_entry->conf.pc_device = cfg->device;
547 
548 		devlist_entry->conf.pc_class = cfg->baseclass;
549 		devlist_entry->conf.pc_subclass = cfg->subclass;
550 		devlist_entry->conf.pc_progif = cfg->progif;
551 		devlist_entry->conf.pc_revid = cfg->revid;
552 
553 		pci_numdevs++;
554 		pci_generation++;
555 	}
556 	return (devlist_entry);
557 #undef REG
558 }
559 
560 static void
561 pci_read_extcap(device_t pcib, pcicfgregs *cfg)
562 {
563 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
564 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
565 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
566 	uint64_t addr;
567 #endif
568 	uint32_t val;
569 	int	ptr, nextptr, ptrptr;
570 
571 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
572 	case PCIM_HDRTYPE_NORMAL:
573 	case PCIM_HDRTYPE_BRIDGE:
574 		ptrptr = PCIR_CAP_PTR;
575 		break;
576 	case PCIM_HDRTYPE_CARDBUS:
577 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
578 		break;
579 	default:
580 		return;		/* no extended capabilities support */
581 	}
582 	nextptr = REG(ptrptr, 1);	/* sanity check? */
583 
584 	/*
585 	 * Read capability entries.
586 	 */
587 	while (nextptr != 0) {
588 		/* Sanity check */
589 		if (nextptr > 255) {
590 			printf("illegal PCI extended capability offset %d\n",
591 			    nextptr);
592 			return;
593 		}
594 		/* Find the next entry */
595 		ptr = nextptr;
596 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
597 
598 		/* Process this entry */
599 		switch (REG(ptr + PCICAP_ID, 1)) {
600 		case PCIY_PMG:		/* PCI power management */
601 			if (cfg->pp.pp_cap == 0) {
602 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
603 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
604 				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
605 				if ((nextptr - ptr) > PCIR_POWER_DATA)
606 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
607 			}
608 			break;
609 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
610 		case PCIY_HT:		/* HyperTransport */
611 			/* Determine HT-specific capability type. */
612 			val = REG(ptr + PCIR_HT_COMMAND, 2);
613 			switch (val & PCIM_HTCMD_CAP_MASK) {
614 			case PCIM_HTCAP_MSI_MAPPING:
615 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
616 					/* Sanity check the mapping window. */
617 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
618 					    4);
619 					addr <<= 32;
620 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
621 					    4);
622 					if (addr != MSI_INTEL_ADDR_BASE)
623 						device_printf(pcib,
624 	    "HT Bridge at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
625 						    cfg->domain, cfg->bus,
626 						    cfg->slot, cfg->func,
627 						    (long long)addr);
628 				} else
629 					addr = MSI_INTEL_ADDR_BASE;
630 
631 				cfg->ht.ht_msimap = ptr;
632 				cfg->ht.ht_msictrl = val;
633 				cfg->ht.ht_msiaddr = addr;
634 				break;
635 			}
636 			break;
637 #endif
638 		case PCIY_MSI:		/* PCI MSI */
639 			cfg->msi.msi_location = ptr;
640 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
641 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
642 						     PCIM_MSICTRL_MMC_MASK)>>1);
643 			break;
644 		case PCIY_MSIX:		/* PCI MSI-X */
645 			cfg->msix.msix_location = ptr;
646 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
647 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
648 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
649 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
650 			cfg->msix.msix_table_bar = PCIR_BAR(val &
651 			    PCIM_MSIX_BIR_MASK);
652 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
653 			val = REG(ptr + PCIR_MSIX_PBA, 4);
654 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
655 			    PCIM_MSIX_BIR_MASK);
656 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
657 			break;
658 		case PCIY_VPD:		/* PCI Vital Product Data */
659 			cfg->vpd.vpd_reg = ptr;
660 			break;
661 		case PCIY_SUBVENDOR:
662 			/* Should always be true. */
663 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
664 			    PCIM_HDRTYPE_BRIDGE) {
665 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
666 				cfg->subvendor = val & 0xffff;
667 				cfg->subdevice = val >> 16;
668 			}
669 			break;
670 		case PCIY_PCIX:		/* PCI-X */
671 			/*
672 			 * Assume we have a PCI-X chipset if we have
673 			 * at least one PCI-PCI bridge with a PCI-X
674 			 * capability.  Note that some systems with
675 			 * PCI-express or HT chipsets might match on
676 			 * this check as well.
677 			 */
678 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
679 			    PCIM_HDRTYPE_BRIDGE)
680 				pcix_chipset = 1;
681 			break;
682 		case PCIY_EXPRESS:	/* PCI-express */
683 			/*
684 			 * Assume we have a PCI-express chipset if we have
685 			 * at least one PCI-express device.
686 			 */
687 			pcie_chipset = 1;
688 			break;
689 		default:
690 			break;
691 		}
692 	}
693 /* REG and WREG use carry through to next functions */
694 }
695 
696 /*
697  * PCI Vital Product Data
698  */
699 
700 #define	PCI_VPD_TIMEOUT		1000000
701 
702 static int
703 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
704 {
705 	int count = PCI_VPD_TIMEOUT;
706 
707 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
708 
709 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
710 
711 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
712 		if (--count < 0)
713 			return (ENXIO);
714 		DELAY(1);	/* limit looping */
715 	}
716 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
717 
718 	return (0);
719 }
720 
721 #if 0
722 static int
723 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
724 {
725 	int count = PCI_VPD_TIMEOUT;
726 
727 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
728 
729 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
730 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
731 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
732 		if (--count < 0)
733 			return (ENXIO);
734 		DELAY(1);	/* limit looping */
735 	}
736 
737 	return (0);
738 }
739 #endif
740 
741 #undef PCI_VPD_TIMEOUT
742 
743 struct vpd_readstate {
744 	device_t	pcib;
745 	pcicfgregs	*cfg;
746 	uint32_t	val;
747 	int		bytesinval;
748 	int		off;
749 	uint8_t		cksum;
750 };
751 
752 static int
753 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
754 {
755 	uint32_t reg;
756 	uint8_t byte;
757 
758 	if (vrs->bytesinval == 0) {
759 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
760 			return (ENXIO);
761 		vrs->val = le32toh(reg);
762 		vrs->off += 4;
763 		byte = vrs->val & 0xff;
764 		vrs->bytesinval = 3;
765 	} else {
766 		vrs->val = vrs->val >> 8;
767 		byte = vrs->val & 0xff;
768 		vrs->bytesinval--;
769 	}
770 
771 	vrs->cksum += byte;
772 	*data = byte;
773 	return (0);
774 }
775 
776 static void
777 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
778 {
779 	struct vpd_readstate vrs;
780 	int state;
781 	int name;
782 	int remain;
783 	int i;
784 	int alloc, off;		/* alloc/off for RO/W arrays */
785 	int cksumvalid;
786 	int dflen;
787 	uint8_t byte;
788 	uint8_t byte2;
789 
790 	/* init vpd reader */
791 	vrs.bytesinval = 0;
792 	vrs.off = 0;
793 	vrs.pcib = pcib;
794 	vrs.cfg = cfg;
795 	vrs.cksum = 0;
796 
797 	state = 0;
798 	name = remain = i = 0;	/* shut up stupid gcc */
799 	alloc = off = 0;	/* shut up stupid gcc */
800 	dflen = 0;		/* shut up stupid gcc */
801 	cksumvalid = -1;
802 	while (state >= 0) {
803 		if (vpd_nextbyte(&vrs, &byte)) {
804 			state = -2;
805 			break;
806 		}
807 #if 0
808 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
809 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
810 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
811 #endif
812 		switch (state) {
813 		case 0:		/* item name */
814 			if (byte & 0x80) {
815 				if (vpd_nextbyte(&vrs, &byte2)) {
816 					state = -2;
817 					break;
818 				}
819 				remain = byte2;
820 				if (vpd_nextbyte(&vrs, &byte2)) {
821 					state = -2;
822 					break;
823 				}
824 				remain |= byte2 << 8;
825 				if (remain > (0x7f*4 - vrs.off)) {
826 					state = -1;
827 					printf(
828 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
829 					    cfg->domain, cfg->bus, cfg->slot,
830 					    cfg->func, remain);
831 				}
832 				name = byte & 0x7f;
833 			} else {
834 				remain = byte & 0x7;
835 				name = (byte >> 3) & 0xf;
836 			}
837 			switch (name) {
838 			case 0x2:	/* String */
839 				cfg->vpd.vpd_ident = malloc(remain + 1,
840 				    M_DEVBUF, M_WAITOK);
841 				i = 0;
842 				state = 1;
843 				break;
844 			case 0xf:	/* End */
845 				state = -1;
846 				break;
847 			case 0x10:	/* VPD-R */
848 				alloc = 8;
849 				off = 0;
850 				cfg->vpd.vpd_ros = malloc(alloc *
851 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
852 				    M_WAITOK | M_ZERO);
853 				state = 2;
854 				break;
855 			case 0x11:	/* VPD-W */
856 				alloc = 8;
857 				off = 0;
858 				cfg->vpd.vpd_w = malloc(alloc *
859 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
860 				    M_WAITOK | M_ZERO);
861 				state = 5;
862 				break;
863 			default:	/* Invalid data, abort */
864 				state = -1;
865 				break;
866 			}
867 			break;
868 
869 		case 1:	/* Identifier String */
870 			cfg->vpd.vpd_ident[i++] = byte;
871 			remain--;
872 			if (remain == 0)  {
873 				cfg->vpd.vpd_ident[i] = '\0';
874 				state = 0;
875 			}
876 			break;
877 
878 		case 2:	/* VPD-R Keyword Header */
879 			if (off == alloc) {
880 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
881 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
882 				    M_DEVBUF, M_WAITOK | M_ZERO);
883 			}
884 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
885 			if (vpd_nextbyte(&vrs, &byte2)) {
886 				state = -2;
887 				break;
888 			}
889 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
890 			if (vpd_nextbyte(&vrs, &byte2)) {
891 				state = -2;
892 				break;
893 			}
894 			dflen = byte2;
895 			if (dflen == 0 &&
896 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
897 			    2) == 0) {
898 				/*
899 				 * if this happens, we can't trust the rest
900 				 * of the VPD.
901 				 */
902 				printf(
903 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
904 				    cfg->domain, cfg->bus, cfg->slot,
905 				    cfg->func, dflen);
906 				cksumvalid = 0;
907 				state = -1;
908 				break;
909 			} else if (dflen == 0) {
910 				cfg->vpd.vpd_ros[off].value = malloc(1 *
911 				    sizeof(*cfg->vpd.vpd_ros[off].value),
912 				    M_DEVBUF, M_WAITOK);
913 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
914 			} else
915 				cfg->vpd.vpd_ros[off].value = malloc(
916 				    (dflen + 1) *
917 				    sizeof(*cfg->vpd.vpd_ros[off].value),
918 				    M_DEVBUF, M_WAITOK);
919 			remain -= 3;
920 			i = 0;
921 			/* keep in sync w/ state 3's transistions */
922 			if (dflen == 0 && remain == 0)
923 				state = 0;
924 			else if (dflen == 0)
925 				state = 2;
926 			else
927 				state = 3;
928 			break;
929 
930 		case 3:	/* VPD-R Keyword Value */
931 			cfg->vpd.vpd_ros[off].value[i++] = byte;
932 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
933 			    "RV", 2) == 0 && cksumvalid == -1) {
934 				if (vrs.cksum == 0)
935 					cksumvalid = 1;
936 				else {
937 					if (bootverbose)
938 						printf(
939 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
940 						    cfg->domain, cfg->bus,
941 						    cfg->slot, cfg->func,
942 						    vrs.cksum);
943 					cksumvalid = 0;
944 					state = -1;
945 					break;
946 				}
947 			}
948 			dflen--;
949 			remain--;
950 			/* keep in sync w/ state 2's transistions */
951 			if (dflen == 0)
952 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
953 			if (dflen == 0 && remain == 0) {
954 				cfg->vpd.vpd_rocnt = off;
955 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
956 				    off * sizeof(*cfg->vpd.vpd_ros),
957 				    M_DEVBUF, M_WAITOK | M_ZERO);
958 				state = 0;
959 			} else if (dflen == 0)
960 				state = 2;
961 			break;
962 
963 		case 4:
964 			remain--;
965 			if (remain == 0)
966 				state = 0;
967 			break;
968 
969 		case 5:	/* VPD-W Keyword Header */
970 			if (off == alloc) {
971 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
972 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
973 				    M_DEVBUF, M_WAITOK | M_ZERO);
974 			}
975 			cfg->vpd.vpd_w[off].keyword[0] = byte;
976 			if (vpd_nextbyte(&vrs, &byte2)) {
977 				state = -2;
978 				break;
979 			}
980 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
981 			if (vpd_nextbyte(&vrs, &byte2)) {
982 				state = -2;
983 				break;
984 			}
985 			cfg->vpd.vpd_w[off].len = dflen = byte2;
986 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
987 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
988 			    sizeof(*cfg->vpd.vpd_w[off].value),
989 			    M_DEVBUF, M_WAITOK);
990 			remain -= 3;
991 			i = 0;
992 			/* keep in sync w/ state 6's transistions */
993 			if (dflen == 0 && remain == 0)
994 				state = 0;
995 			else if (dflen == 0)
996 				state = 5;
997 			else
998 				state = 6;
999 			break;
1000 
1001 		case 6:	/* VPD-W Keyword Value */
1002 			cfg->vpd.vpd_w[off].value[i++] = byte;
1003 			dflen--;
1004 			remain--;
1005 			/* keep in sync w/ state 5's transistions */
1006 			if (dflen == 0)
1007 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1008 			if (dflen == 0 && remain == 0) {
1009 				cfg->vpd.vpd_wcnt = off;
1010 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1011 				    off * sizeof(*cfg->vpd.vpd_w),
1012 				    M_DEVBUF, M_WAITOK | M_ZERO);
1013 				state = 0;
1014 			} else if (dflen == 0)
1015 				state = 5;
1016 			break;
1017 
1018 		default:
1019 			printf("pci%d:%d:%d:%d: invalid state: %d\n",
1020 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1021 			    state);
1022 			state = -1;
1023 			break;
1024 		}
1025 	}
1026 
1027 	if (cksumvalid == 0 || state < -1) {
1028 		/* read-only data bad, clean up */
1029 		if (cfg->vpd.vpd_ros != NULL) {
1030 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1031 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1032 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1033 			cfg->vpd.vpd_ros = NULL;
1034 		}
1035 	}
1036 	if (state < -1) {
1037 		/* I/O error, clean up */
1038 		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1039 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1040 		if (cfg->vpd.vpd_ident != NULL) {
1041 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1042 			cfg->vpd.vpd_ident = NULL;
1043 		}
1044 		if (cfg->vpd.vpd_w != NULL) {
1045 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1046 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1047 			free(cfg->vpd.vpd_w, M_DEVBUF);
1048 			cfg->vpd.vpd_w = NULL;
1049 		}
1050 	}
1051 	cfg->vpd.vpd_cached = 1;
1052 #undef REG
1053 #undef WREG
1054 }
1055 
1056 int
1057 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1058 {
1059 	struct pci_devinfo *dinfo = device_get_ivars(child);
1060 	pcicfgregs *cfg = &dinfo->cfg;
1061 
1062 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1063 		pci_read_vpd(device_get_parent(dev), cfg);
1064 
1065 	*identptr = cfg->vpd.vpd_ident;
1066 
1067 	if (*identptr == NULL)
1068 		return (ENXIO);
1069 
1070 	return (0);
1071 }
1072 
1073 int
1074 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1075 	const char **vptr)
1076 {
1077 	struct pci_devinfo *dinfo = device_get_ivars(child);
1078 	pcicfgregs *cfg = &dinfo->cfg;
1079 	int i;
1080 
1081 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1082 		pci_read_vpd(device_get_parent(dev), cfg);
1083 
1084 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1085 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1086 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1087 			*vptr = cfg->vpd.vpd_ros[i].value;
1088 		}
1089 
1090 	if (i != cfg->vpd.vpd_rocnt)
1091 		return (0);
1092 
1093 	*vptr = NULL;
1094 	return (ENXIO);
1095 }
1096 
1097 /*
1098  * Find the requested extended capability and return the offset in
1099  * configuration space via the pointer provided. The function returns
1100  * 0 on success and error code otherwise.
1101  */
1102 int
1103 pci_find_extcap_method(device_t dev, device_t child, int capability,
1104     int *capreg)
1105 {
1106 	struct pci_devinfo *dinfo = device_get_ivars(child);
1107 	pcicfgregs *cfg = &dinfo->cfg;
1108 	u_int32_t status;
1109 	u_int8_t ptr;
1110 
1111 	/*
1112 	 * Check the CAP_LIST bit of the PCI status register first.
1113 	 */
1114 	status = pci_read_config(child, PCIR_STATUS, 2);
1115 	if (!(status & PCIM_STATUS_CAPPRESENT))
1116 		return (ENXIO);
1117 
1118 	/*
1119 	 * Determine the start pointer of the capabilities list.
1120 	 */
1121 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1122 	case PCIM_HDRTYPE_NORMAL:
1123 	case PCIM_HDRTYPE_BRIDGE:
1124 		ptr = PCIR_CAP_PTR;
1125 		break;
1126 	case PCIM_HDRTYPE_CARDBUS:
1127 		ptr = PCIR_CAP_PTR_2;
1128 		break;
1129 	default:
1130 		/* XXX: panic? */
1131 		return (ENXIO);		/* no extended capabilities support */
1132 	}
1133 	ptr = pci_read_config(child, ptr, 1);
1134 
1135 	/*
1136 	 * Traverse the capabilities list.
1137 	 */
1138 	while (ptr != 0) {
1139 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1140 			if (capreg != NULL)
1141 				*capreg = ptr;
1142 			return (0);
1143 		}
1144 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1145 	}
1146 
1147 	return (ENOENT);
1148 }
1149 
1150 /*
1151  * Support for MSI-X message interrupts.
1152  */
1153 void
1154 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1155 {
1156 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1157 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1158 	uint32_t offset;
1159 
1160 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1161 	offset = msix->msix_table_offset + index * 16;
1162 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1163 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1164 	bus_write_4(msix->msix_table_res, offset + 8, data);
1165 
1166 	/* Enable MSI -> HT mapping. */
1167 	pci_ht_map_msi(dev, address);
1168 }
1169 
1170 void
1171 pci_mask_msix(device_t dev, u_int index)
1172 {
1173 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1174 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1175 	uint32_t offset, val;
1176 
1177 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1178 	offset = msix->msix_table_offset + index * 16 + 12;
1179 	val = bus_read_4(msix->msix_table_res, offset);
1180 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1181 		val |= PCIM_MSIX_VCTRL_MASK;
1182 		bus_write_4(msix->msix_table_res, offset, val);
1183 	}
1184 }
1185 
1186 void
1187 pci_unmask_msix(device_t dev, u_int index)
1188 {
1189 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1190 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1191 	uint32_t offset, val;
1192 
1193 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1194 	offset = msix->msix_table_offset + index * 16 + 12;
1195 	val = bus_read_4(msix->msix_table_res, offset);
1196 	if (val & PCIM_MSIX_VCTRL_MASK) {
1197 		val &= ~PCIM_MSIX_VCTRL_MASK;
1198 		bus_write_4(msix->msix_table_res, offset, val);
1199 	}
1200 }
1201 
1202 int
1203 pci_pending_msix(device_t dev, u_int index)
1204 {
1205 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1206 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1207 	uint32_t offset, bit;
1208 
1209 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1210 	offset = msix->msix_pba_offset + (index / 32) * 4;
1211 	bit = 1 << index % 32;
1212 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1213 }
1214 
1215 /*
1216  * Restore MSI-X registers and table during resume.  If MSI-X is
1217  * enabled then walk the virtual table to restore the actual MSI-X
1218  * table.
1219  */
1220 static void
1221 pci_resume_msix(device_t dev)
1222 {
1223 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1224 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1225 	struct msix_table_entry *mte;
1226 	struct msix_vector *mv;
1227 	int i;
1228 
1229 	if (msix->msix_alloc > 0) {
1230 		/* First, mask all vectors. */
1231 		for (i = 0; i < msix->msix_msgnum; i++)
1232 			pci_mask_msix(dev, i);
1233 
1234 		/* Second, program any messages with at least one handler. */
1235 		for (i = 0; i < msix->msix_table_len; i++) {
1236 			mte = &msix->msix_table[i];
1237 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1238 				continue;
1239 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1240 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1241 			pci_unmask_msix(dev, i);
1242 		}
1243 	}
1244 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1245 	    msix->msix_ctrl, 2);
1246 }
1247 
1248 /*
1249  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1250  * returned in *count.  After this function returns, each message will be
1251  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1252  */
1253 int
1254 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1255 {
1256 	struct pci_devinfo *dinfo = device_get_ivars(child);
1257 	pcicfgregs *cfg = &dinfo->cfg;
1258 	struct resource_list_entry *rle;
1259 	int actual, error, i, irq, max;
1260 
1261 	/* Don't let count == 0 get us into trouble. */
1262 	if (*count == 0)
1263 		return (EINVAL);
1264 
1265 	/* If rid 0 is allocated, then fail. */
1266 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1267 	if (rle != NULL && rle->res != NULL)
1268 		return (ENXIO);
1269 
1270 	/* Already have allocated messages? */
1271 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1272 		return (ENXIO);
1273 
1274 	/* If MSI is blacklisted for this system, fail. */
1275 	if (pci_msi_blacklisted())
1276 		return (ENXIO);
1277 
1278 	/* MSI-X capability present? */
1279 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1280 		return (ENODEV);
1281 
1282 	/* Make sure the appropriate BARs are mapped. */
1283 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1284 	    cfg->msix.msix_table_bar);
1285 	if (rle == NULL || rle->res == NULL ||
1286 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1287 		return (ENXIO);
1288 	cfg->msix.msix_table_res = rle->res;
1289 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1290 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1291 		    cfg->msix.msix_pba_bar);
1292 		if (rle == NULL || rle->res == NULL ||
1293 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1294 			return (ENXIO);
1295 	}
1296 	cfg->msix.msix_pba_res = rle->res;
1297 
1298 	if (bootverbose)
1299 		device_printf(child,
1300 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1301 		    *count, cfg->msix.msix_msgnum);
1302 	max = min(*count, cfg->msix.msix_msgnum);
1303 	for (i = 0; i < max; i++) {
1304 		/* Allocate a message. */
1305 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1306 		if (error)
1307 			break;
1308 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1309 		    irq, 1);
1310 	}
1311 	actual = i;
1312 
1313 	if (bootverbose) {
1314 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1315 		if (actual == 1)
1316 			device_printf(child, "using IRQ %lu for MSI-X\n",
1317 			    rle->start);
1318 		else {
1319 			int run;
1320 
1321 			/*
1322 			 * Be fancy and try to print contiguous runs of
1323 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1324 			 * 'run' is true if we are in a range.
1325 			 */
1326 			device_printf(child, "using IRQs %lu", rle->start);
1327 			irq = rle->start;
1328 			run = 0;
1329 			for (i = 1; i < actual; i++) {
1330 				rle = resource_list_find(&dinfo->resources,
1331 				    SYS_RES_IRQ, i + 1);
1332 
1333 				/* Still in a run? */
1334 				if (rle->start == irq + 1) {
1335 					run = 1;
1336 					irq++;
1337 					continue;
1338 				}
1339 
1340 				/* Finish previous range. */
1341 				if (run) {
1342 					printf("-%d", irq);
1343 					run = 0;
1344 				}
1345 
1346 				/* Start new range. */
1347 				printf(",%lu", rle->start);
1348 				irq = rle->start;
1349 			}
1350 
1351 			/* Unfinished range? */
1352 			if (run)
1353 				printf("-%d", irq);
1354 			printf(" for MSI-X\n");
1355 		}
1356 	}
1357 
1358 	/* Mask all vectors. */
1359 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1360 		pci_mask_msix(child, i);
1361 
1362 	/* Allocate and initialize vector data and virtual table. */
1363 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1364 	    M_DEVBUF, M_WAITOK | M_ZERO);
1365 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1366 	    M_DEVBUF, M_WAITOK | M_ZERO);
1367 	for (i = 0; i < actual; i++) {
1368 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1369 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1370 		cfg->msix.msix_table[i].mte_vector = i + 1;
1371 	}
1372 
1373 	/* Update control register to enable MSI-X. */
1374 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1375 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1376 	    cfg->msix.msix_ctrl, 2);
1377 
1378 	/* Update counts of alloc'd messages. */
1379 	cfg->msix.msix_alloc = actual;
1380 	cfg->msix.msix_table_len = actual;
1381 	*count = actual;
1382 	return (0);
1383 }
1384 
1385 /*
1386  * By default, pci_alloc_msix() will assign the allocated IRQ
1387  * resources consecutively to the first N messages in the MSI-X table.
1388  * However, device drivers may want to use different layouts if they
1389  * either receive fewer messages than they asked for, or they wish to
1390  * populate the MSI-X table sparsely.  This method allows the driver
1391  * to specify what layout it wants.  It must be called after a
1392  * successful pci_alloc_msix() but before any of the associated
1393  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1394  *
1395  * The 'vectors' array contains 'count' message vectors.  The array
1396  * maps directly to the MSI-X table in that index 0 in the array
1397  * specifies the vector for the first message in the MSI-X table, etc.
1398  * The vector value in each array index can either be 0 to indicate
1399  * that no vector should be assigned to a message slot, or it can be a
1400  * number from 1 to N (where N is the count returned from a
1401  * succcessful call to pci_alloc_msix()) to indicate which message
1402  * vector (IRQ) to be used for the corresponding message.
1403  *
1404  * On successful return, each message with a non-zero vector will have
1405  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1406  * 1.  Additionally, if any of the IRQs allocated via the previous
1407  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1408  * will be freed back to the system automatically.
1409  *
1410  * For example, suppose a driver has a MSI-X table with 6 messages and
1411  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1412  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1413  * C.  After the call to pci_alloc_msix(), the device will be setup to
1414  * have an MSI-X table of ABC--- (where - means no vector assigned).
1415  * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1416  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1417  * be freed back to the system.  This device will also have valid
1418  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1419  *
1420  * In any case, the SYS_RES_IRQ rid X will always map to the message
1421  * at MSI-X table index X - 1 and will only be valid if a vector is
1422  * assigned to that table entry.
1423  */
1424 int
1425 pci_remap_msix_method(device_t dev, device_t child, int count,
1426     const u_int *vectors)
1427 {
1428 	struct pci_devinfo *dinfo = device_get_ivars(child);
1429 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1430 	struct resource_list_entry *rle;
1431 	int i, irq, j, *used;
1432 
1433 	/*
1434 	 * Have to have at least one message in the table but the
1435 	 * table can't be bigger than the actual MSI-X table in the
1436 	 * device.
1437 	 */
1438 	if (count == 0 || count > msix->msix_msgnum)
1439 		return (EINVAL);
1440 
1441 	/* Sanity check the vectors. */
1442 	for (i = 0; i < count; i++)
1443 		if (vectors[i] > msix->msix_alloc)
1444 			return (EINVAL);
1445 
1446 	/*
1447 	 * Make sure there aren't any holes in the vectors to be used.
1448 	 * It's a big pain to support it, and it doesn't really make
1449 	 * sense anyway.  Also, at least one vector must be used.
1450 	 */
1451 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1452 	    M_ZERO);
1453 	for (i = 0; i < count; i++)
1454 		if (vectors[i] != 0)
1455 			used[vectors[i] - 1] = 1;
1456 	for (i = 0; i < msix->msix_alloc - 1; i++)
1457 		if (used[i] == 0 && used[i + 1] == 1) {
1458 			free(used, M_DEVBUF);
1459 			return (EINVAL);
1460 		}
1461 	if (used[0] != 1) {
1462 		free(used, M_DEVBUF);
1463 		return (EINVAL);
1464 	}
1465 
1466 	/* Make sure none of the resources are allocated. */
1467 	for (i = 0; i < msix->msix_table_len; i++) {
1468 		if (msix->msix_table[i].mte_vector == 0)
1469 			continue;
1470 		if (msix->msix_table[i].mte_handlers > 0)
1471 			return (EBUSY);
1472 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1473 		KASSERT(rle != NULL, ("missing resource"));
1474 		if (rle->res != NULL)
1475 			return (EBUSY);
1476 	}
1477 
1478 	/* Free the existing resource list entries. */
1479 	for (i = 0; i < msix->msix_table_len; i++) {
1480 		if (msix->msix_table[i].mte_vector == 0)
1481 			continue;
1482 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1483 	}
1484 
1485 	/*
1486 	 * Build the new virtual table keeping track of which vectors are
1487 	 * used.
1488 	 */
1489 	free(msix->msix_table, M_DEVBUF);
1490 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1491 	    M_DEVBUF, M_WAITOK | M_ZERO);
1492 	for (i = 0; i < count; i++)
1493 		msix->msix_table[i].mte_vector = vectors[i];
1494 	msix->msix_table_len = count;
1495 
1496 	/* Free any unused IRQs and resize the vectors array if necessary. */
1497 	j = msix->msix_alloc - 1;
1498 	if (used[j] == 0) {
1499 		struct msix_vector *vec;
1500 
1501 		while (used[j] == 0) {
1502 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1503 			    msix->msix_vectors[j].mv_irq);
1504 			j--;
1505 		}
1506 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1507 		    M_WAITOK);
1508 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1509 		    (j + 1));
1510 		free(msix->msix_vectors, M_DEVBUF);
1511 		msix->msix_vectors = vec;
1512 		msix->msix_alloc = j + 1;
1513 	}
1514 	free(used, M_DEVBUF);
1515 
1516 	/* Map the IRQs onto the rids. */
1517 	for (i = 0; i < count; i++) {
1518 		if (vectors[i] == 0)
1519 			continue;
1520 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1521 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1522 		    irq, 1);
1523 	}
1524 
1525 	if (bootverbose) {
1526 		device_printf(child, "Remapped MSI-X IRQs as: ");
1527 		for (i = 0; i < count; i++) {
1528 			if (i != 0)
1529 				printf(", ");
1530 			if (vectors[i] == 0)
1531 				printf("---");
1532 			else
1533 				printf("%d",
1534 				    msix->msix_vectors[vectors[i]].mv_irq);
1535 		}
1536 		printf("\n");
1537 	}
1538 
1539 	return (0);
1540 }
1541 
1542 static int
1543 pci_release_msix(device_t dev, device_t child)
1544 {
1545 	struct pci_devinfo *dinfo = device_get_ivars(child);
1546 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1547 	struct resource_list_entry *rle;
1548 	int i;
1549 
1550 	/* Do we have any messages to release? */
1551 	if (msix->msix_alloc == 0)
1552 		return (ENODEV);
1553 
1554 	/* Make sure none of the resources are allocated. */
1555 	for (i = 0; i < msix->msix_table_len; i++) {
1556 		if (msix->msix_table[i].mte_vector == 0)
1557 			continue;
1558 		if (msix->msix_table[i].mte_handlers > 0)
1559 			return (EBUSY);
1560 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1561 		KASSERT(rle != NULL, ("missing resource"));
1562 		if (rle->res != NULL)
1563 			return (EBUSY);
1564 	}
1565 
1566 	/* Update control register to disable MSI-X. */
1567 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1568 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1569 	    msix->msix_ctrl, 2);
1570 
1571 	/* Free the resource list entries. */
1572 	for (i = 0; i < msix->msix_table_len; i++) {
1573 		if (msix->msix_table[i].mte_vector == 0)
1574 			continue;
1575 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1576 	}
1577 	free(msix->msix_table, M_DEVBUF);
1578 	msix->msix_table_len = 0;
1579 
1580 	/* Release the IRQs. */
1581 	for (i = 0; i < msix->msix_alloc; i++)
1582 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1583 		    msix->msix_vectors[i].mv_irq);
1584 	free(msix->msix_vectors, M_DEVBUF);
1585 	msix->msix_alloc = 0;
1586 	return (0);
1587 }
1588 
1589 /*
1590  * Return the max supported MSI-X messages this device supports.
1591  * Basically, assuming the MD code can alloc messages, this function
1592  * should return the maximum value that pci_alloc_msix() can return.
1593  * Thus, it is subject to the tunables, etc.
1594  */
1595 int
1596 pci_msix_count_method(device_t dev, device_t child)
1597 {
1598 	struct pci_devinfo *dinfo = device_get_ivars(child);
1599 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1600 
1601 	if (pci_do_msix && msix->msix_location != 0)
1602 		return (msix->msix_msgnum);
1603 	return (0);
1604 }
1605 
1606 /*
1607  * HyperTransport MSI mapping control
1608  */
1609 void
1610 pci_ht_map_msi(device_t dev, uint64_t addr)
1611 {
1612 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1613 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1614 
1615 	if (!ht->ht_msimap)
1616 		return;
1617 
1618 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1619 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1620 		/* Enable MSI -> HT mapping. */
1621 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1622 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1623 		    ht->ht_msictrl, 2);
1624 	}
1625 
1626 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1627 		/* Disable MSI -> HT mapping. */
1628 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1629 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1630 		    ht->ht_msictrl, 2);
1631 	}
1632 }
1633 
1634 int
1635 pci_get_max_read_req(device_t dev)
1636 {
1637 	int cap;
1638 	uint16_t val;
1639 
1640 	if (pci_find_extcap(dev, PCIY_EXPRESS, &cap) != 0)
1641 		return (0);
1642 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1643 	val &= PCIM_EXP_CTL_MAX_READ_REQUEST;
1644 	val >>= 12;
1645 	return (1 << (val + 7));
1646 }
1647 
1648 int
1649 pci_set_max_read_req(device_t dev, int size)
1650 {
1651 	int cap;
1652 	uint16_t val;
1653 
1654 	if (pci_find_extcap(dev, PCIY_EXPRESS, &cap) != 0)
1655 		return (0);
1656 	if (size < 128)
1657 		size = 128;
1658 	if (size > 4096)
1659 		size = 4096;
1660 	size = (1 << (fls(size) - 1));
1661 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1662 	val &= ~PCIM_EXP_CTL_MAX_READ_REQUEST;
1663 	val |= (fls(size) - 8) << 12;
1664 	pci_write_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, val, 2);
1665 	return (size);
1666 }
1667 
1668 /*
1669  * Support for MSI message signalled interrupts.
1670  */
1671 void
1672 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1673 {
1674 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1675 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1676 
1677 	/* Write data and address values. */
1678 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1679 	    address & 0xffffffff, 4);
1680 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1681 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1682 		    address >> 32, 4);
1683 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1684 		    data, 2);
1685 	} else
1686 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1687 		    2);
1688 
1689 	/* Enable MSI in the control register. */
1690 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1691 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1692 	    2);
1693 
1694 	/* Enable MSI -> HT mapping. */
1695 	pci_ht_map_msi(dev, address);
1696 }
1697 
1698 void
1699 pci_disable_msi(device_t dev)
1700 {
1701 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1702 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1703 
1704 	/* Disable MSI -> HT mapping. */
1705 	pci_ht_map_msi(dev, 0);
1706 
1707 	/* Disable MSI in the control register. */
1708 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1709 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1710 	    2);
1711 }
1712 
1713 /*
1714  * Restore MSI registers during resume.  If MSI is enabled then
1715  * restore the data and address registers in addition to the control
1716  * register.
1717  */
1718 static void
1719 pci_resume_msi(device_t dev)
1720 {
1721 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1722 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1723 	uint64_t address;
1724 	uint16_t data;
1725 
1726 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1727 		address = msi->msi_addr;
1728 		data = msi->msi_data;
1729 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1730 		    address & 0xffffffff, 4);
1731 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1732 			pci_write_config(dev, msi->msi_location +
1733 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1734 			pci_write_config(dev, msi->msi_location +
1735 			    PCIR_MSI_DATA_64BIT, data, 2);
1736 		} else
1737 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1738 			    data, 2);
1739 	}
1740 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1741 	    2);
1742 }
1743 
1744 static int
1745 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1746 {
1747 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1748 	pcicfgregs *cfg = &dinfo->cfg;
1749 	struct resource_list_entry *rle;
1750 	struct msix_table_entry *mte;
1751 	struct msix_vector *mv;
1752 	uint64_t addr;
1753 	uint32_t data;
1754 	int error, i, j;
1755 
1756 	/*
1757 	 * Handle MSI first.  We try to find this IRQ among our list
1758 	 * of MSI IRQs.  If we find it, we request updated address and
1759 	 * data registers and apply the results.
1760 	 */
1761 	if (cfg->msi.msi_alloc > 0) {
1762 
1763 		/* If we don't have any active handlers, nothing to do. */
1764 		if (cfg->msi.msi_handlers == 0)
1765 			return (0);
1766 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1767 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1768 			    i + 1);
1769 			if (rle->start == irq) {
1770 				error = PCIB_MAP_MSI(device_get_parent(bus),
1771 				    dev, irq, &addr, &data);
1772 				if (error)
1773 					return (error);
1774 				pci_disable_msi(dev);
1775 				dinfo->cfg.msi.msi_addr = addr;
1776 				dinfo->cfg.msi.msi_data = data;
1777 				pci_enable_msi(dev, addr, data);
1778 				return (0);
1779 			}
1780 		}
1781 		return (ENOENT);
1782 	}
1783 
1784 	/*
1785 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1786 	 * we request the updated mapping info.  If that works, we go
1787 	 * through all the slots that use this IRQ and update them.
1788 	 */
1789 	if (cfg->msix.msix_alloc > 0) {
1790 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1791 			mv = &cfg->msix.msix_vectors[i];
1792 			if (mv->mv_irq == irq) {
1793 				error = PCIB_MAP_MSI(device_get_parent(bus),
1794 				    dev, irq, &addr, &data);
1795 				if (error)
1796 					return (error);
1797 				mv->mv_address = addr;
1798 				mv->mv_data = data;
1799 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1800 					mte = &cfg->msix.msix_table[j];
1801 					if (mte->mte_vector != i + 1)
1802 						continue;
1803 					if (mte->mte_handlers == 0)
1804 						continue;
1805 					pci_mask_msix(dev, j);
1806 					pci_enable_msix(dev, j, addr, data);
1807 					pci_unmask_msix(dev, j);
1808 				}
1809 			}
1810 		}
1811 		return (ENOENT);
1812 	}
1813 
1814 	return (ENOENT);
1815 }
1816 
1817 /*
1818  * Returns true if the specified device is blacklisted because MSI
1819  * doesn't work.
1820  */
1821 int
1822 pci_msi_device_blacklisted(device_t dev)
1823 {
1824 	struct pci_quirk *q;
1825 
1826 	if (!pci_honor_msi_blacklist)
1827 		return (0);
1828 
1829 	for (q = &pci_quirks[0]; q->devid; q++) {
1830 		if (q->devid == pci_get_devid(dev) &&
1831 		    q->type == PCI_QUIRK_DISABLE_MSI)
1832 			return (1);
1833 	}
1834 	return (0);
1835 }
1836 
1837 /*
1838  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1839  * we just check for blacklisted chipsets as represented by the
1840  * host-PCI bridge at device 0:0:0.  In the future, it may become
1841  * necessary to check other system attributes, such as the kenv values
1842  * that give the motherboard manufacturer and model number.
1843  */
1844 static int
1845 pci_msi_blacklisted(void)
1846 {
1847 	device_t dev;
1848 
1849 	if (!pci_honor_msi_blacklist)
1850 		return (0);
1851 
1852 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1853 	if (!(pcie_chipset || pcix_chipset))
1854 		return (1);
1855 
1856 	dev = pci_find_bsf(0, 0, 0);
1857 	if (dev != NULL)
1858 		return (pci_msi_device_blacklisted(dev));
1859 	return (0);
1860 }
1861 
1862 /*
1863  * Attempt to allocate *count MSI messages.  The actual number allocated is
1864  * returned in *count.  After this function returns, each message will be
1865  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1866  */
1867 int
1868 pci_alloc_msi_method(device_t dev, device_t child, int *count)
1869 {
1870 	struct pci_devinfo *dinfo = device_get_ivars(child);
1871 	pcicfgregs *cfg = &dinfo->cfg;
1872 	struct resource_list_entry *rle;
1873 	int actual, error, i, irqs[32];
1874 	uint16_t ctrl;
1875 
1876 	/* Don't let count == 0 get us into trouble. */
1877 	if (*count == 0)
1878 		return (EINVAL);
1879 
1880 	/* If rid 0 is allocated, then fail. */
1881 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1882 	if (rle != NULL && rle->res != NULL)
1883 		return (ENXIO);
1884 
1885 	/* Already have allocated messages? */
1886 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1887 		return (ENXIO);
1888 
1889 	/* If MSI is blacklisted for this system, fail. */
1890 	if (pci_msi_blacklisted())
1891 		return (ENXIO);
1892 
1893 	/* MSI capability present? */
1894 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1895 		return (ENODEV);
1896 
1897 	if (bootverbose)
1898 		device_printf(child,
1899 		    "attempting to allocate %d MSI vectors (%d supported)\n",
1900 		    *count, cfg->msi.msi_msgnum);
1901 
1902 	/* Don't ask for more than the device supports. */
1903 	actual = min(*count, cfg->msi.msi_msgnum);
1904 
1905 	/* Don't ask for more than 32 messages. */
1906 	actual = min(actual, 32);
1907 
1908 	/* MSI requires power of 2 number of messages. */
1909 	if (!powerof2(actual))
1910 		return (EINVAL);
1911 
1912 	for (;;) {
1913 		/* Try to allocate N messages. */
1914 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1915 		    cfg->msi.msi_msgnum, irqs);
1916 		if (error == 0)
1917 			break;
1918 		if (actual == 1)
1919 			return (error);
1920 
1921 		/* Try N / 2. */
1922 		actual >>= 1;
1923 	}
1924 
1925 	/*
1926 	 * We now have N actual messages mapped onto SYS_RES_IRQ
1927 	 * resources in the irqs[] array, so add new resources
1928 	 * starting at rid 1.
1929 	 */
1930 	for (i = 0; i < actual; i++)
1931 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1932 		    irqs[i], irqs[i], 1);
1933 
1934 	if (bootverbose) {
1935 		if (actual == 1)
1936 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1937 		else {
1938 			int run;
1939 
1940 			/*
1941 			 * Be fancy and try to print contiguous runs
1942 			 * of IRQ values as ranges.  'run' is true if
1943 			 * we are in a range.
1944 			 */
1945 			device_printf(child, "using IRQs %d", irqs[0]);
1946 			run = 0;
1947 			for (i = 1; i < actual; i++) {
1948 
1949 				/* Still in a run? */
1950 				if (irqs[i] == irqs[i - 1] + 1) {
1951 					run = 1;
1952 					continue;
1953 				}
1954 
1955 				/* Finish previous range. */
1956 				if (run) {
1957 					printf("-%d", irqs[i - 1]);
1958 					run = 0;
1959 				}
1960 
1961 				/* Start new range. */
1962 				printf(",%d", irqs[i]);
1963 			}
1964 
1965 			/* Unfinished range? */
1966 			if (run)
1967 				printf("-%d", irqs[actual - 1]);
1968 			printf(" for MSI\n");
1969 		}
1970 	}
1971 
1972 	/* Update control register with actual count. */
1973 	ctrl = cfg->msi.msi_ctrl;
1974 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1975 	ctrl |= (ffs(actual) - 1) << 4;
1976 	cfg->msi.msi_ctrl = ctrl;
1977 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1978 
1979 	/* Update counts of alloc'd messages. */
1980 	cfg->msi.msi_alloc = actual;
1981 	cfg->msi.msi_handlers = 0;
1982 	*count = actual;
1983 	return (0);
1984 }
1985 
1986 /* Release the MSI messages associated with this device. */
1987 int
1988 pci_release_msi_method(device_t dev, device_t child)
1989 {
1990 	struct pci_devinfo *dinfo = device_get_ivars(child);
1991 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1992 	struct resource_list_entry *rle;
1993 	int error, i, irqs[32];
1994 
1995 	/* Try MSI-X first. */
1996 	error = pci_release_msix(dev, child);
1997 	if (error != ENODEV)
1998 		return (error);
1999 
2000 	/* Do we have any messages to release? */
2001 	if (msi->msi_alloc == 0)
2002 		return (ENODEV);
2003 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2004 
2005 	/* Make sure none of the resources are allocated. */
2006 	if (msi->msi_handlers > 0)
2007 		return (EBUSY);
2008 	for (i = 0; i < msi->msi_alloc; i++) {
2009 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2010 		KASSERT(rle != NULL, ("missing MSI resource"));
2011 		if (rle->res != NULL)
2012 			return (EBUSY);
2013 		irqs[i] = rle->start;
2014 	}
2015 
2016 	/* Update control register with 0 count. */
2017 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2018 	    ("%s: MSI still enabled", __func__));
2019 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2020 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2021 	    msi->msi_ctrl, 2);
2022 
2023 	/* Release the messages. */
2024 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2025 	for (i = 0; i < msi->msi_alloc; i++)
2026 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2027 
2028 	/* Update alloc count. */
2029 	msi->msi_alloc = 0;
2030 	msi->msi_addr = 0;
2031 	msi->msi_data = 0;
2032 	return (0);
2033 }
2034 
2035 /*
2036  * Return the max supported MSI messages this device supports.
2037  * Basically, assuming the MD code can alloc messages, this function
2038  * should return the maximum value that pci_alloc_msi() can return.
2039  * Thus, it is subject to the tunables, etc.
2040  */
2041 int
2042 pci_msi_count_method(device_t dev, device_t child)
2043 {
2044 	struct pci_devinfo *dinfo = device_get_ivars(child);
2045 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2046 
2047 	if (pci_do_msi && msi->msi_location != 0)
2048 		return (msi->msi_msgnum);
2049 	return (0);
2050 }
2051 
2052 /* free pcicfgregs structure and all depending data structures */
2053 
2054 int
2055 pci_freecfg(struct pci_devinfo *dinfo)
2056 {
2057 	struct devlist *devlist_head;
2058 	int i;
2059 
2060 	devlist_head = &pci_devq;
2061 
2062 	if (dinfo->cfg.vpd.vpd_reg) {
2063 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2064 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2065 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2066 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2067 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2068 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2069 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2070 	}
2071 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2072 	free(dinfo, M_DEVBUF);
2073 
2074 	/* increment the generation count */
2075 	pci_generation++;
2076 
2077 	/* we're losing one device */
2078 	pci_numdevs--;
2079 	return (0);
2080 }
2081 
2082 /*
2083  * PCI power manangement
2084  */
2085 int
2086 pci_set_powerstate_method(device_t dev, device_t child, int state)
2087 {
2088 	struct pci_devinfo *dinfo = device_get_ivars(child);
2089 	pcicfgregs *cfg = &dinfo->cfg;
2090 	uint16_t status;
2091 	int result, oldstate, highest, delay;
2092 
2093 	if (cfg->pp.pp_cap == 0)
2094 		return (EOPNOTSUPP);
2095 
2096 	/*
2097 	 * Optimize a no state change request away.  While it would be OK to
2098 	 * write to the hardware in theory, some devices have shown odd
2099 	 * behavior when going from D3 -> D3.
2100 	 */
2101 	oldstate = pci_get_powerstate(child);
2102 	if (oldstate == state)
2103 		return (0);
2104 
2105 	/*
2106 	 * The PCI power management specification states that after a state
2107 	 * transition between PCI power states, system software must
2108 	 * guarantee a minimal delay before the function accesses the device.
2109 	 * Compute the worst case delay that we need to guarantee before we
2110 	 * access the device.  Many devices will be responsive much more
2111 	 * quickly than this delay, but there are some that don't respond
2112 	 * instantly to state changes.  Transitions to/from D3 state require
2113 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2114 	 * is done below with DELAY rather than a sleeper function because
2115 	 * this function can be called from contexts where we cannot sleep.
2116 	 */
2117 	highest = (oldstate > state) ? oldstate : state;
2118 	if (highest == PCI_POWERSTATE_D3)
2119 	    delay = 10000;
2120 	else if (highest == PCI_POWERSTATE_D2)
2121 	    delay = 200;
2122 	else
2123 	    delay = 0;
2124 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2125 	    & ~PCIM_PSTAT_DMASK;
2126 	result = 0;
2127 	switch (state) {
2128 	case PCI_POWERSTATE_D0:
2129 		status |= PCIM_PSTAT_D0;
2130 		break;
2131 	case PCI_POWERSTATE_D1:
2132 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2133 			return (EOPNOTSUPP);
2134 		status |= PCIM_PSTAT_D1;
2135 		break;
2136 	case PCI_POWERSTATE_D2:
2137 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2138 			return (EOPNOTSUPP);
2139 		status |= PCIM_PSTAT_D2;
2140 		break;
2141 	case PCI_POWERSTATE_D3:
2142 		status |= PCIM_PSTAT_D3;
2143 		break;
2144 	default:
2145 		return (EINVAL);
2146 	}
2147 
2148 	if (bootverbose)
2149 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2150 		    state);
2151 
2152 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2153 	if (delay)
2154 		DELAY(delay);
2155 	return (0);
2156 }
2157 
2158 int
2159 pci_get_powerstate_method(device_t dev, device_t child)
2160 {
2161 	struct pci_devinfo *dinfo = device_get_ivars(child);
2162 	pcicfgregs *cfg = &dinfo->cfg;
2163 	uint16_t status;
2164 	int result;
2165 
2166 	if (cfg->pp.pp_cap != 0) {
2167 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2168 		switch (status & PCIM_PSTAT_DMASK) {
2169 		case PCIM_PSTAT_D0:
2170 			result = PCI_POWERSTATE_D0;
2171 			break;
2172 		case PCIM_PSTAT_D1:
2173 			result = PCI_POWERSTATE_D1;
2174 			break;
2175 		case PCIM_PSTAT_D2:
2176 			result = PCI_POWERSTATE_D2;
2177 			break;
2178 		case PCIM_PSTAT_D3:
2179 			result = PCI_POWERSTATE_D3;
2180 			break;
2181 		default:
2182 			result = PCI_POWERSTATE_UNKNOWN;
2183 			break;
2184 		}
2185 	} else {
2186 		/* No support, device is always at D0 */
2187 		result = PCI_POWERSTATE_D0;
2188 	}
2189 	return (result);
2190 }
2191 
2192 /*
2193  * Some convenience functions for PCI device drivers.
2194  */
2195 
2196 static __inline void
2197 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2198 {
2199 	uint16_t	command;
2200 
2201 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2202 	command |= bit;
2203 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2204 }
2205 
2206 static __inline void
2207 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2208 {
2209 	uint16_t	command;
2210 
2211 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2212 	command &= ~bit;
2213 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2214 }
2215 
2216 int
2217 pci_enable_busmaster_method(device_t dev, device_t child)
2218 {
2219 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2220 	return (0);
2221 }
2222 
2223 int
2224 pci_disable_busmaster_method(device_t dev, device_t child)
2225 {
2226 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2227 	return (0);
2228 }
2229 
2230 int
2231 pci_enable_io_method(device_t dev, device_t child, int space)
2232 {
2233 	uint16_t bit;
2234 
2235 	switch(space) {
2236 	case SYS_RES_IOPORT:
2237 		bit = PCIM_CMD_PORTEN;
2238 		break;
2239 	case SYS_RES_MEMORY:
2240 		bit = PCIM_CMD_MEMEN;
2241 		break;
2242 	default:
2243 		return (EINVAL);
2244 	}
2245 	pci_set_command_bit(dev, child, bit);
2246 	return (0);
2247 }
2248 
2249 int
2250 pci_disable_io_method(device_t dev, device_t child, int space)
2251 {
2252 	uint16_t bit;
2253 
2254 	switch(space) {
2255 	case SYS_RES_IOPORT:
2256 		bit = PCIM_CMD_PORTEN;
2257 		break;
2258 	case SYS_RES_MEMORY:
2259 		bit = PCIM_CMD_MEMEN;
2260 		break;
2261 	default:
2262 		return (EINVAL);
2263 	}
2264 	pci_clear_command_bit(dev, child, bit);
2265 	return (0);
2266 }
2267 
2268 /*
2269  * New style pci driver.  Parent device is either a pci-host-bridge or a
2270  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2271  */
2272 
2273 void
2274 pci_print_verbose(struct pci_devinfo *dinfo)
2275 {
2276 
2277 	if (bootverbose) {
2278 		pcicfgregs *cfg = &dinfo->cfg;
2279 
2280 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2281 		    cfg->vendor, cfg->device, cfg->revid);
2282 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2283 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2284 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2285 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2286 		    cfg->mfdev);
2287 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2288 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2289 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2290 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2291 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2292 		if (cfg->intpin > 0)
2293 			printf("\tintpin=%c, irq=%d\n",
2294 			    cfg->intpin +'a' -1, cfg->intline);
2295 		if (cfg->pp.pp_cap) {
2296 			uint16_t status;
2297 
2298 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2299 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2300 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2301 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2302 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2303 			    status & PCIM_PSTAT_DMASK);
2304 		}
2305 		if (cfg->msi.msi_location) {
2306 			int ctrl;
2307 
2308 			ctrl = cfg->msi.msi_ctrl;
2309 			printf("\tMSI supports %d message%s%s%s\n",
2310 			    cfg->msi.msi_msgnum,
2311 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2312 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2313 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2314 		}
2315 		if (cfg->msix.msix_location) {
2316 			printf("\tMSI-X supports %d message%s ",
2317 			    cfg->msix.msix_msgnum,
2318 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2319 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2320 				printf("in map 0x%x\n",
2321 				    cfg->msix.msix_table_bar);
2322 			else
2323 				printf("in maps 0x%x and 0x%x\n",
2324 				    cfg->msix.msix_table_bar,
2325 				    cfg->msix.msix_pba_bar);
2326 		}
2327 	}
2328 }
2329 
2330 static int
2331 pci_porten(device_t dev)
2332 {
2333 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2334 }
2335 
2336 static int
2337 pci_memen(device_t dev)
2338 {
2339 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2340 }
2341 
2342 static void
2343 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2344 {
2345 	pci_addr_t map, testval;
2346 	int ln2range;
2347 	uint16_t cmd;
2348 
2349 	/*
2350 	 * The device ROM BAR is special.  It is always a 32-bit
2351 	 * memory BAR.  Bit 0 is special and should not be set when
2352 	 * sizing the BAR.
2353 	 */
2354 	if (reg == PCIR_BIOS) {
2355 		map = pci_read_config(dev, reg, 4);
2356 		pci_write_config(dev, reg, 0xfffffffe, 4);
2357 		testval = pci_read_config(dev, reg, 4);
2358 		pci_write_config(dev, reg, map, 4);
2359 		*mapp = map;
2360 		*testvalp = testval;
2361 		return;
2362 	}
2363 
2364 	map = pci_read_config(dev, reg, 4);
2365 	ln2range = pci_maprange(map);
2366 	if (ln2range == 64)
2367 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2368 
2369 	/*
2370 	 * Disable decoding via the command register before
2371 	 * determining the BAR's length since we will be placing it in
2372 	 * a weird state.
2373 	 */
2374 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2375 	pci_write_config(dev, PCIR_COMMAND,
2376 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2377 
2378 	/*
2379 	 * Determine the BAR's length by writing all 1's.  The bottom
2380 	 * log_2(size) bits of the BAR will stick as 0 when we read
2381 	 * the value back.
2382 	 */
2383 	pci_write_config(dev, reg, 0xffffffff, 4);
2384 	testval = pci_read_config(dev, reg, 4);
2385 	if (ln2range == 64) {
2386 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2387 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2388 	}
2389 
2390 	/*
2391 	 * Restore the original value of the BAR.  We may have reprogrammed
2392 	 * the BAR of the low-level console device and when booting verbose,
2393 	 * we need the console device addressable.
2394 	 */
2395 	pci_write_config(dev, reg, map, 4);
2396 	if (ln2range == 64)
2397 		pci_write_config(dev, reg + 4, map >> 32, 4);
2398 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2399 
2400 	*mapp = map;
2401 	*testvalp = testval;
2402 }
2403 
2404 static void
2405 pci_write_bar(device_t dev, int reg, pci_addr_t base)
2406 {
2407 	pci_addr_t map;
2408 	int ln2range;
2409 
2410 	map = pci_read_config(dev, reg, 4);
2411 
2412 	/* The device ROM BAR is always 32-bits. */
2413 	if (reg == PCIR_BIOS)
2414 		return;
2415 	ln2range = pci_maprange(map);
2416 	pci_write_config(dev, reg, base, 4);
2417 	if (ln2range == 64)
2418 		pci_write_config(dev, reg + 4, base >> 32, 4);
2419 }
2420 
2421 /*
2422  * Add a resource based on a pci map register. Return 1 if the map
2423  * register is a 32bit map register or 2 if it is a 64bit register.
2424  */
2425 static int
2426 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2427     int force, int prefetch)
2428 {
2429 	pci_addr_t base, map, testval;
2430 	pci_addr_t start, end, count;
2431 	int barlen, basezero, maprange, mapsize, type;
2432 	uint16_t cmd;
2433 	struct resource *res;
2434 
2435 	pci_read_bar(dev, reg, &map, &testval);
2436 	if (PCI_BAR_MEM(map)) {
2437 		type = SYS_RES_MEMORY;
2438 		if (map & PCIM_BAR_MEM_PREFETCH)
2439 			prefetch = 1;
2440 	} else
2441 		type = SYS_RES_IOPORT;
2442 	mapsize = pci_mapsize(testval);
2443 	base = pci_mapbase(map);
2444 #ifdef __PCI_BAR_ZERO_VALID
2445 	basezero = 0;
2446 #else
2447 	basezero = base == 0;
2448 #endif
2449 	maprange = pci_maprange(map);
2450 	barlen = maprange == 64 ? 2 : 1;
2451 
2452 	/*
2453 	 * For I/O registers, if bottom bit is set, and the next bit up
2454 	 * isn't clear, we know we have a BAR that doesn't conform to the
2455 	 * spec, so ignore it.  Also, sanity check the size of the data
2456 	 * areas to the type of memory involved.  Memory must be at least
2457 	 * 16 bytes in size, while I/O ranges must be at least 4.
2458 	 */
2459 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2460 		return (barlen);
2461 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2462 	    (type == SYS_RES_IOPORT && mapsize < 2))
2463 		return (barlen);
2464 
2465 	if (bootverbose) {
2466 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2467 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2468 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2469 			printf(", port disabled\n");
2470 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2471 			printf(", memory disabled\n");
2472 		else
2473 			printf(", enabled\n");
2474 	}
2475 
2476 	/*
2477 	 * If base is 0, then we have problems if this architecture does
2478 	 * not allow that.  It is best to ignore such entries for the
2479 	 * moment.  These will be allocated later if the driver specifically
2480 	 * requests them.  However, some removable busses look better when
2481 	 * all resources are allocated, so allow '0' to be overriden.
2482 	 *
2483 	 * Similarly treat maps whose values is the same as the test value
2484 	 * read back.  These maps have had all f's written to them by the
2485 	 * BIOS in an attempt to disable the resources.
2486 	 */
2487 	if (!force && (basezero || map == testval))
2488 		return (barlen);
2489 	if ((u_long)base != base) {
2490 		device_printf(bus,
2491 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2492 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2493 		    pci_get_function(dev), reg);
2494 		return (barlen);
2495 	}
2496 
2497 	/*
2498 	 * This code theoretically does the right thing, but has
2499 	 * undesirable side effects in some cases where peripherals
2500 	 * respond oddly to having these bits enabled.  Let the user
2501 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2502 	 * default).
2503 	 */
2504 	if (pci_enable_io_modes) {
2505 		/* Turn on resources that have been left off by a lazy BIOS */
2506 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2507 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2508 			cmd |= PCIM_CMD_PORTEN;
2509 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2510 		}
2511 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2512 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2513 			cmd |= PCIM_CMD_MEMEN;
2514 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2515 		}
2516 	} else {
2517 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2518 			return (barlen);
2519 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2520 			return (barlen);
2521 	}
2522 
2523 	count = 1 << mapsize;
2524 	if (basezero || base == pci_mapbase(testval)) {
2525 		start = 0;	/* Let the parent decide. */
2526 		end = ~0ULL;
2527 	} else {
2528 		start = base;
2529 		end = base + (1 << mapsize) - 1;
2530 	}
2531 	resource_list_add(rl, type, reg, start, end, count);
2532 
2533 	/*
2534 	 * Try to allocate the resource for this BAR from our parent
2535 	 * so that this resource range is already reserved.  The
2536 	 * driver for this device will later inherit this resource in
2537 	 * pci_alloc_resource().
2538 	 */
2539 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2540 	    prefetch ? RF_PREFETCHABLE : 0);
2541 	if (res == NULL) {
2542 		/*
2543 		 * If the allocation fails, clear the BAR and delete
2544 		 * the resource list entry to force
2545 		 * pci_alloc_resource() to allocate resources from the
2546 		 * parent.
2547 		 */
2548 		resource_list_delete(rl, type, reg);
2549 		start = 0;
2550 	} else
2551 		start = rman_get_start(res);
2552 	pci_write_bar(dev, reg, start);
2553 	return (barlen);
2554 }
2555 
2556 /*
2557  * For ATA devices we need to decide early what addressing mode to use.
2558  * Legacy demands that the primary and secondary ATA ports sits on the
2559  * same addresses that old ISA hardware did. This dictates that we use
2560  * those addresses and ignore the BAR's if we cannot set PCI native
2561  * addressing mode.
2562  */
2563 static void
2564 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2565     uint32_t prefetchmask)
2566 {
2567 	struct resource *r;
2568 	int rid, type, progif;
2569 #if 0
2570 	/* if this device supports PCI native addressing use it */
2571 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2572 	if ((progif & 0x8a) == 0x8a) {
2573 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2574 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2575 			printf("Trying ATA native PCI addressing mode\n");
2576 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2577 		}
2578 	}
2579 #endif
2580 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2581 	type = SYS_RES_IOPORT;
2582 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2583 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2584 		    prefetchmask & (1 << 0));
2585 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2586 		    prefetchmask & (1 << 1));
2587 	} else {
2588 		rid = PCIR_BAR(0);
2589 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2590 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2591 		    0x1f7, 8, 0);
2592 		rid = PCIR_BAR(1);
2593 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2594 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2595 		    0x3f6, 1, 0);
2596 	}
2597 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2598 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2599 		    prefetchmask & (1 << 2));
2600 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2601 		    prefetchmask & (1 << 3));
2602 	} else {
2603 		rid = PCIR_BAR(2);
2604 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2605 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2606 		    0x177, 8, 0);
2607 		rid = PCIR_BAR(3);
2608 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2609 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2610 		    0x376, 1, 0);
2611 	}
2612 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2613 	    prefetchmask & (1 << 4));
2614 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2615 	    prefetchmask & (1 << 5));
2616 }
2617 
2618 static void
2619 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2620 {
2621 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2622 	pcicfgregs *cfg = &dinfo->cfg;
2623 	char tunable_name[64];
2624 	int irq;
2625 
2626 	/* Has to have an intpin to have an interrupt. */
2627 	if (cfg->intpin == 0)
2628 		return;
2629 
2630 	/* Let the user override the IRQ with a tunable. */
2631 	irq = PCI_INVALID_IRQ;
2632 	snprintf(tunable_name, sizeof(tunable_name),
2633 	    "hw.pci%d.%d.%d.INT%c.irq",
2634 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2635 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2636 		irq = PCI_INVALID_IRQ;
2637 
2638 	/*
2639 	 * If we didn't get an IRQ via the tunable, then we either use the
2640 	 * IRQ value in the intline register or we ask the bus to route an
2641 	 * interrupt for us.  If force_route is true, then we only use the
2642 	 * value in the intline register if the bus was unable to assign an
2643 	 * IRQ.
2644 	 */
2645 	if (!PCI_INTERRUPT_VALID(irq)) {
2646 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2647 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2648 		if (!PCI_INTERRUPT_VALID(irq))
2649 			irq = cfg->intline;
2650 	}
2651 
2652 	/* If after all that we don't have an IRQ, just bail. */
2653 	if (!PCI_INTERRUPT_VALID(irq))
2654 		return;
2655 
2656 	/* Update the config register if it changed. */
2657 	if (irq != cfg->intline) {
2658 		cfg->intline = irq;
2659 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2660 	}
2661 
2662 	/* Add this IRQ as rid 0 interrupt resource. */
2663 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2664 }
2665 
2666 /* Perform early OHCI takeover from SMM. */
2667 static void
2668 ohci_early_takeover(device_t self)
2669 {
2670 	struct resource *res;
2671 	uint32_t ctl;
2672 	int rid;
2673 	int i;
2674 
2675 	rid = PCIR_BAR(0);
2676 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2677 	if (res == NULL)
2678 		return;
2679 
2680 	ctl = bus_read_4(res, OHCI_CONTROL);
2681 	if (ctl & OHCI_IR) {
2682 		if (bootverbose)
2683 			printf("ohci early: "
2684 			    "SMM active, request owner change\n");
2685 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2686 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2687 			DELAY(1000);
2688 			ctl = bus_read_4(res, OHCI_CONTROL);
2689 		}
2690 		if (ctl & OHCI_IR) {
2691 			if (bootverbose)
2692 				printf("ohci early: "
2693 				    "SMM does not respond, resetting\n");
2694 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2695 		}
2696 		/* Disable interrupts */
2697 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2698 	}
2699 
2700 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2701 }
2702 
2703 /* Perform early UHCI takeover from SMM. */
2704 static void
2705 uhci_early_takeover(device_t self)
2706 {
2707 	struct resource *res;
2708 	int rid;
2709 
2710 	/*
2711 	 * Set the PIRQD enable bit and switch off all the others. We don't
2712 	 * want legacy support to interfere with us XXX Does this also mean
2713 	 * that the BIOS won't touch the keyboard anymore if it is connected
2714 	 * to the ports of the root hub?
2715 	 */
2716 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2717 
2718 	/* Disable interrupts */
2719 	rid = PCI_UHCI_BASE_REG;
2720 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2721 	if (res != NULL) {
2722 		bus_write_2(res, UHCI_INTR, 0);
2723 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2724 	}
2725 }
2726 
2727 /* Perform early EHCI takeover from SMM. */
2728 static void
2729 ehci_early_takeover(device_t self)
2730 {
2731 	struct resource *res;
2732 	uint32_t cparams;
2733 	uint32_t eec;
2734 	uint8_t eecp;
2735 	uint8_t bios_sem;
2736 	uint8_t offs;
2737 	int rid;
2738 	int i;
2739 
2740 	rid = PCIR_BAR(0);
2741 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2742 	if (res == NULL)
2743 		return;
2744 
2745 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2746 
2747 	/* Synchronise with the BIOS if it owns the controller. */
2748 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2749 	    eecp = EHCI_EECP_NEXT(eec)) {
2750 		eec = pci_read_config(self, eecp, 4);
2751 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2752 			continue;
2753 		}
2754 		bios_sem = pci_read_config(self, eecp +
2755 		    EHCI_LEGSUP_BIOS_SEM, 1);
2756 		if (bios_sem == 0) {
2757 			continue;
2758 		}
2759 		if (bootverbose)
2760 			printf("ehci early: "
2761 			    "SMM active, request owner change\n");
2762 
2763 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2764 
2765 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2766 			DELAY(1000);
2767 			bios_sem = pci_read_config(self, eecp +
2768 			    EHCI_LEGSUP_BIOS_SEM, 1);
2769 		}
2770 
2771 		if (bios_sem != 0) {
2772 			if (bootverbose)
2773 				printf("ehci early: "
2774 				    "SMM does not respond\n");
2775 		}
2776 		/* Disable interrupts */
2777 		offs = bus_read_1(res, EHCI_CAPLENGTH);
2778 		bus_write_4(res, offs + EHCI_USBINTR, 0);
2779 	}
2780 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2781 }
2782 
2783 void
2784 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2785 {
2786 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2787 	pcicfgregs *cfg = &dinfo->cfg;
2788 	struct resource_list *rl = &dinfo->resources;
2789 	struct pci_quirk *q;
2790 	int i;
2791 
2792 	/* ATA devices needs special map treatment */
2793 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2794 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2795 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2796 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2797 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2798 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
2799 	else
2800 		for (i = 0; i < cfg->nummaps;)
2801 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
2802 			    prefetchmask & (1 << i));
2803 
2804 	/*
2805 	 * Add additional, quirked resources.
2806 	 */
2807 	for (q = &pci_quirks[0]; q->devid; q++) {
2808 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2809 		    && q->type == PCI_QUIRK_MAP_REG)
2810 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
2811 	}
2812 
2813 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2814 #ifdef __PCI_REROUTE_INTERRUPT
2815 		/*
2816 		 * Try to re-route interrupts. Sometimes the BIOS or
2817 		 * firmware may leave bogus values in these registers.
2818 		 * If the re-route fails, then just stick with what we
2819 		 * have.
2820 		 */
2821 		pci_assign_interrupt(bus, dev, 1);
2822 #else
2823 		pci_assign_interrupt(bus, dev, 0);
2824 #endif
2825 	}
2826 
2827 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
2828 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
2829 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
2830 			ehci_early_takeover(dev);
2831 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
2832 			ohci_early_takeover(dev);
2833 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
2834 			uhci_early_takeover(dev);
2835 	}
2836 }
2837 
2838 void
2839 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2840 {
2841 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2842 	device_t pcib = device_get_parent(dev);
2843 	struct pci_devinfo *dinfo;
2844 	int maxslots;
2845 	int s, f, pcifunchigh;
2846 	uint8_t hdrtype;
2847 
2848 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2849 	    ("dinfo_size too small"));
2850 	maxslots = PCIB_MAXSLOTS(pcib);
2851 	for (s = 0; s <= maxslots; s++) {
2852 		pcifunchigh = 0;
2853 		f = 0;
2854 		DELAY(1);
2855 		hdrtype = REG(PCIR_HDRTYPE, 1);
2856 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2857 			continue;
2858 		if (hdrtype & PCIM_MFDEV)
2859 			pcifunchigh = PCI_FUNCMAX;
2860 		for (f = 0; f <= pcifunchigh; f++) {
2861 			dinfo = pci_read_device(pcib, domain, busno, s, f,
2862 			    dinfo_size);
2863 			if (dinfo != NULL) {
2864 				pci_add_child(dev, dinfo);
2865 			}
2866 		}
2867 	}
2868 #undef REG
2869 }
2870 
2871 void
2872 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2873 {
2874 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2875 	device_set_ivars(dinfo->cfg.dev, dinfo);
2876 	resource_list_init(&dinfo->resources);
2877 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2878 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2879 	pci_print_verbose(dinfo);
2880 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2881 }
2882 
2883 static int
2884 pci_probe(device_t dev)
2885 {
2886 
2887 	device_set_desc(dev, "PCI bus");
2888 
2889 	/* Allow other subclasses to override this driver. */
2890 	return (BUS_PROBE_GENERIC);
2891 }
2892 
2893 static int
2894 pci_attach(device_t dev)
2895 {
2896 	int busno, domain;
2897 
2898 	/*
2899 	 * Since there can be multiple independantly numbered PCI
2900 	 * busses on systems with multiple PCI domains, we can't use
2901 	 * the unit number to decide which bus we are probing. We ask
2902 	 * the parent pcib what our domain and bus numbers are.
2903 	 */
2904 	domain = pcib_get_domain(dev);
2905 	busno = pcib_get_bus(dev);
2906 	if (bootverbose)
2907 		device_printf(dev, "domain=%d, physical bus=%d\n",
2908 		    domain, busno);
2909 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2910 	return (bus_generic_attach(dev));
2911 }
2912 
2913 int
2914 pci_suspend(device_t dev)
2915 {
2916 	int dstate, error, i, numdevs;
2917 	device_t acpi_dev, child, *devlist;
2918 	struct pci_devinfo *dinfo;
2919 
2920 	/*
2921 	 * Save the PCI configuration space for each child and set the
2922 	 * device in the appropriate power state for this sleep state.
2923 	 */
2924 	acpi_dev = NULL;
2925 	if (pci_do_power_resume)
2926 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2927 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2928 		return (error);
2929 	for (i = 0; i < numdevs; i++) {
2930 		child = devlist[i];
2931 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2932 		pci_cfg_save(child, dinfo, 0);
2933 	}
2934 
2935 	/* Suspend devices before potentially powering them down. */
2936 	error = bus_generic_suspend(dev);
2937 	if (error) {
2938 		free(devlist, M_TEMP);
2939 		return (error);
2940 	}
2941 
2942 	/*
2943 	 * Always set the device to D3.  If ACPI suggests a different
2944 	 * power state, use it instead.  If ACPI is not present, the
2945 	 * firmware is responsible for managing device power.  Skip
2946 	 * children who aren't attached since they are powered down
2947 	 * separately.  Only manage type 0 devices for now.
2948 	 */
2949 	for (i = 0; acpi_dev && i < numdevs; i++) {
2950 		child = devlist[i];
2951 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2952 		if (device_is_attached(child) &&
2953 		    (dinfo->cfg.hdrtype & PCIM_HDRTYPE) ==
2954 		    PCIM_HDRTYPE_NORMAL) {
2955 			dstate = PCI_POWERSTATE_D3;
2956 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2957 			pci_set_powerstate(child, dstate);
2958 		}
2959 	}
2960 	free(devlist, M_TEMP);
2961 	return (0);
2962 }
2963 
2964 int
2965 pci_resume(device_t dev)
2966 {
2967 	int i, numdevs, error;
2968 	device_t acpi_dev, child, *devlist;
2969 	struct pci_devinfo *dinfo;
2970 
2971 	/*
2972 	 * Set each child to D0 and restore its PCI configuration space.
2973 	 */
2974 	acpi_dev = NULL;
2975 	if (pci_do_power_resume)
2976 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2977 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2978 		return (error);
2979 	for (i = 0; i < numdevs; i++) {
2980 		/*
2981 		 * Notify ACPI we're going to D0 but ignore the result.  If
2982 		 * ACPI is not present, the firmware is responsible for
2983 		 * managing device power.  Only manage type 0 devices for now.
2984 		 */
2985 		child = devlist[i];
2986 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2987 		if (acpi_dev && device_is_attached(child) &&
2988 		    (dinfo->cfg.hdrtype & PCIM_HDRTYPE) ==
2989 		    PCIM_HDRTYPE_NORMAL) {
2990 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2991 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2992 		}
2993 
2994 		/* Now the device is powered up, restore its config space. */
2995 		pci_cfg_restore(child, dinfo);
2996 		if (!device_is_attached(child))
2997 			pci_cfg_save(child, dinfo, 1);
2998 	}
2999 	free(devlist, M_TEMP);
3000 	return (bus_generic_resume(dev));
3001 }
3002 
3003 static void
3004 pci_load_vendor_data(void)
3005 {
3006 	caddr_t vendordata, info;
3007 
3008 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
3009 		info = preload_search_info(vendordata, MODINFO_ADDR);
3010 		pci_vendordata = *(char **)info;
3011 		info = preload_search_info(vendordata, MODINFO_SIZE);
3012 		pci_vendordata_size = *(size_t *)info;
3013 		/* terminate the database */
3014 		pci_vendordata[pci_vendordata_size] = '\n';
3015 	}
3016 }
3017 
3018 void
3019 pci_driver_added(device_t dev, driver_t *driver)
3020 {
3021 	int numdevs;
3022 	device_t *devlist;
3023 	device_t child;
3024 	struct pci_devinfo *dinfo;
3025 	int i;
3026 
3027 	if (bootverbose)
3028 		device_printf(dev, "driver added\n");
3029 	DEVICE_IDENTIFY(driver, dev);
3030 	if (device_get_children(dev, &devlist, &numdevs) != 0)
3031 		return;
3032 	for (i = 0; i < numdevs; i++) {
3033 		child = devlist[i];
3034 		if (device_get_state(child) != DS_NOTPRESENT)
3035 			continue;
3036 		dinfo = device_get_ivars(child);
3037 		pci_print_verbose(dinfo);
3038 		if (bootverbose)
3039 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3040 		pci_cfg_restore(child, dinfo);
3041 		if (device_probe_and_attach(child) != 0)
3042 			pci_cfg_save(child, dinfo, 1);
3043 	}
3044 	free(devlist, M_TEMP);
3045 }
3046 
3047 int
3048 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3049     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3050 {
3051 	struct pci_devinfo *dinfo;
3052 	struct msix_table_entry *mte;
3053 	struct msix_vector *mv;
3054 	uint64_t addr;
3055 	uint32_t data;
3056 	void *cookie;
3057 	int error, rid;
3058 
3059 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3060 	    arg, &cookie);
3061 	if (error)
3062 		return (error);
3063 
3064 	/* If this is not a direct child, just bail out. */
3065 	if (device_get_parent(child) != dev) {
3066 		*cookiep = cookie;
3067 		return(0);
3068 	}
3069 
3070 	rid = rman_get_rid(irq);
3071 	if (rid == 0) {
3072 		/* Make sure that INTx is enabled */
3073 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3074 	} else {
3075 		/*
3076 		 * Check to see if the interrupt is MSI or MSI-X.
3077 		 * Ask our parent to map the MSI and give
3078 		 * us the address and data register values.
3079 		 * If we fail for some reason, teardown the
3080 		 * interrupt handler.
3081 		 */
3082 		dinfo = device_get_ivars(child);
3083 		if (dinfo->cfg.msi.msi_alloc > 0) {
3084 			if (dinfo->cfg.msi.msi_addr == 0) {
3085 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3086 			    ("MSI has handlers, but vectors not mapped"));
3087 				error = PCIB_MAP_MSI(device_get_parent(dev),
3088 				    child, rman_get_start(irq), &addr, &data);
3089 				if (error)
3090 					goto bad;
3091 				dinfo->cfg.msi.msi_addr = addr;
3092 				dinfo->cfg.msi.msi_data = data;
3093 			}
3094 			if (dinfo->cfg.msi.msi_handlers == 0)
3095 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3096 				    dinfo->cfg.msi.msi_data);
3097 			dinfo->cfg.msi.msi_handlers++;
3098 		} else {
3099 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3100 			    ("No MSI or MSI-X interrupts allocated"));
3101 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3102 			    ("MSI-X index too high"));
3103 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3104 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3105 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3106 			KASSERT(mv->mv_irq == rman_get_start(irq),
3107 			    ("IRQ mismatch"));
3108 			if (mv->mv_address == 0) {
3109 				KASSERT(mte->mte_handlers == 0,
3110 		    ("MSI-X table entry has handlers, but vector not mapped"));
3111 				error = PCIB_MAP_MSI(device_get_parent(dev),
3112 				    child, rman_get_start(irq), &addr, &data);
3113 				if (error)
3114 					goto bad;
3115 				mv->mv_address = addr;
3116 				mv->mv_data = data;
3117 			}
3118 			if (mte->mte_handlers == 0) {
3119 				pci_enable_msix(child, rid - 1, mv->mv_address,
3120 				    mv->mv_data);
3121 				pci_unmask_msix(child, rid - 1);
3122 			}
3123 			mte->mte_handlers++;
3124 		}
3125 
3126 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3127 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3128 	bad:
3129 		if (error) {
3130 			(void)bus_generic_teardown_intr(dev, child, irq,
3131 			    cookie);
3132 			return (error);
3133 		}
3134 	}
3135 	*cookiep = cookie;
3136 	return (0);
3137 }
3138 
3139 int
3140 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3141     void *cookie)
3142 {
3143 	struct msix_table_entry *mte;
3144 	struct resource_list_entry *rle;
3145 	struct pci_devinfo *dinfo;
3146 	int error, rid;
3147 
3148 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3149 		return (EINVAL);
3150 
3151 	/* If this isn't a direct child, just bail out */
3152 	if (device_get_parent(child) != dev)
3153 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3154 
3155 	rid = rman_get_rid(irq);
3156 	if (rid == 0) {
3157 		/* Mask INTx */
3158 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3159 	} else {
3160 		/*
3161 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3162 		 * decrement the appropriate handlers count and mask the
3163 		 * MSI-X message, or disable MSI messages if the count
3164 		 * drops to 0.
3165 		 */
3166 		dinfo = device_get_ivars(child);
3167 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3168 		if (rle->res != irq)
3169 			return (EINVAL);
3170 		if (dinfo->cfg.msi.msi_alloc > 0) {
3171 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3172 			    ("MSI-X index too high"));
3173 			if (dinfo->cfg.msi.msi_handlers == 0)
3174 				return (EINVAL);
3175 			dinfo->cfg.msi.msi_handlers--;
3176 			if (dinfo->cfg.msi.msi_handlers == 0)
3177 				pci_disable_msi(child);
3178 		} else {
3179 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3180 			    ("No MSI or MSI-X interrupts allocated"));
3181 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3182 			    ("MSI-X index too high"));
3183 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3184 			if (mte->mte_handlers == 0)
3185 				return (EINVAL);
3186 			mte->mte_handlers--;
3187 			if (mte->mte_handlers == 0)
3188 				pci_mask_msix(child, rid - 1);
3189 		}
3190 	}
3191 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3192 	if (rid > 0)
3193 		KASSERT(error == 0,
3194 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3195 	return (error);
3196 }
3197 
3198 int
3199 pci_print_child(device_t dev, device_t child)
3200 {
3201 	struct pci_devinfo *dinfo;
3202 	struct resource_list *rl;
3203 	int retval = 0;
3204 
3205 	dinfo = device_get_ivars(child);
3206 	rl = &dinfo->resources;
3207 
3208 	retval += bus_print_child_header(dev, child);
3209 
3210 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3211 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3212 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3213 	if (device_get_flags(dev))
3214 		retval += printf(" flags %#x", device_get_flags(dev));
3215 
3216 	retval += printf(" at device %d.%d", pci_get_slot(child),
3217 	    pci_get_function(child));
3218 
3219 	retval += bus_print_child_footer(dev, child);
3220 
3221 	return (retval);
3222 }
3223 
3224 static struct
3225 {
3226 	int	class;
3227 	int	subclass;
3228 	char	*desc;
3229 } pci_nomatch_tab[] = {
3230 	{PCIC_OLD,		-1,			"old"},
3231 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3232 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3233 	{PCIC_STORAGE,		-1,			"mass storage"},
3234 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3235 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3236 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3237 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3238 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3239 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3240 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3241 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3242 	{PCIC_NETWORK,		-1,			"network"},
3243 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3244 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3245 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3246 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3247 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3248 	{PCIC_DISPLAY,		-1,			"display"},
3249 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3250 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3251 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3252 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3253 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3254 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3255 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3256 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3257 	{PCIC_MEMORY,		-1,			"memory"},
3258 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3259 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3260 	{PCIC_BRIDGE,		-1,			"bridge"},
3261 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3262 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3263 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3264 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3265 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3266 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3267 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3268 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3269 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3270 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3271 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3272 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3273 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3274 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3275 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3276 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3277 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3278 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3279 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3280 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3281 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3282 	{PCIC_INPUTDEV,		-1,			"input device"},
3283 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3284 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3285 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3286 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3287 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3288 	{PCIC_DOCKING,		-1,			"docking station"},
3289 	{PCIC_PROCESSOR,	-1,			"processor"},
3290 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3291 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3292 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3293 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3294 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3295 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3296 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3297 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3298 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3299 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3300 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3301 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3302 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3303 	{PCIC_SATCOM,		-1,			"satellite communication"},
3304 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3305 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3306 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3307 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3308 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3309 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3310 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3311 	{PCIC_DASP,		-1,			"dasp"},
3312 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3313 	{0, 0,		NULL}
3314 };
3315 
3316 void
3317 pci_probe_nomatch(device_t dev, device_t child)
3318 {
3319 	int	i;
3320 	char	*cp, *scp, *device;
3321 
3322 	/*
3323 	 * Look for a listing for this device in a loaded device database.
3324 	 */
3325 	if ((device = pci_describe_device(child)) != NULL) {
3326 		device_printf(dev, "<%s>", device);
3327 		free(device, M_DEVBUF);
3328 	} else {
3329 		/*
3330 		 * Scan the class/subclass descriptions for a general
3331 		 * description.
3332 		 */
3333 		cp = "unknown";
3334 		scp = NULL;
3335 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3336 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3337 				if (pci_nomatch_tab[i].subclass == -1) {
3338 					cp = pci_nomatch_tab[i].desc;
3339 				} else if (pci_nomatch_tab[i].subclass ==
3340 				    pci_get_subclass(child)) {
3341 					scp = pci_nomatch_tab[i].desc;
3342 				}
3343 			}
3344 		}
3345 		device_printf(dev, "<%s%s%s>",
3346 		    cp ? cp : "",
3347 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3348 		    scp ? scp : "");
3349 	}
3350 	printf(" at device %d.%d (no driver attached)\n",
3351 	    pci_get_slot(child), pci_get_function(child));
3352 	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3353 	return;
3354 }
3355 
3356 /*
3357  * Parse the PCI device database, if loaded, and return a pointer to a
3358  * description of the device.
3359  *
3360  * The database is flat text formatted as follows:
3361  *
3362  * Any line not in a valid format is ignored.
3363  * Lines are terminated with newline '\n' characters.
3364  *
3365  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3366  * the vendor name.
3367  *
3368  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3369  * - devices cannot be listed without a corresponding VENDOR line.
3370  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3371  * another TAB, then the device name.
3372  */
3373 
3374 /*
3375  * Assuming (ptr) points to the beginning of a line in the database,
3376  * return the vendor or device and description of the next entry.
3377  * The value of (vendor) or (device) inappropriate for the entry type
3378  * is set to -1.  Returns nonzero at the end of the database.
3379  *
3380  * Note that this is slightly unrobust in the face of corrupt data;
3381  * we attempt to safeguard against this by spamming the end of the
3382  * database with a newline when we initialise.
3383  */
3384 static int
3385 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3386 {
3387 	char	*cp = *ptr;
3388 	int	left;
3389 
3390 	*device = -1;
3391 	*vendor = -1;
3392 	**desc = '\0';
3393 	for (;;) {
3394 		left = pci_vendordata_size - (cp - pci_vendordata);
3395 		if (left <= 0) {
3396 			*ptr = cp;
3397 			return(1);
3398 		}
3399 
3400 		/* vendor entry? */
3401 		if (*cp != '\t' &&
3402 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3403 			break;
3404 		/* device entry? */
3405 		if (*cp == '\t' &&
3406 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3407 			break;
3408 
3409 		/* skip to next line */
3410 		while (*cp != '\n' && left > 0) {
3411 			cp++;
3412 			left--;
3413 		}
3414 		if (*cp == '\n') {
3415 			cp++;
3416 			left--;
3417 		}
3418 	}
3419 	/* skip to next line */
3420 	while (*cp != '\n' && left > 0) {
3421 		cp++;
3422 		left--;
3423 	}
3424 	if (*cp == '\n' && left > 0)
3425 		cp++;
3426 	*ptr = cp;
3427 	return(0);
3428 }
3429 
3430 static char *
3431 pci_describe_device(device_t dev)
3432 {
3433 	int	vendor, device;
3434 	char	*desc, *vp, *dp, *line;
3435 
3436 	desc = vp = dp = NULL;
3437 
3438 	/*
3439 	 * If we have no vendor data, we can't do anything.
3440 	 */
3441 	if (pci_vendordata == NULL)
3442 		goto out;
3443 
3444 	/*
3445 	 * Scan the vendor data looking for this device
3446 	 */
3447 	line = pci_vendordata;
3448 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3449 		goto out;
3450 	for (;;) {
3451 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3452 			goto out;
3453 		if (vendor == pci_get_vendor(dev))
3454 			break;
3455 	}
3456 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3457 		goto out;
3458 	for (;;) {
3459 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3460 			*dp = 0;
3461 			break;
3462 		}
3463 		if (vendor != -1) {
3464 			*dp = 0;
3465 			break;
3466 		}
3467 		if (device == pci_get_device(dev))
3468 			break;
3469 	}
3470 	if (dp[0] == '\0')
3471 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3472 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3473 	    NULL)
3474 		sprintf(desc, "%s, %s", vp, dp);
3475  out:
3476 	if (vp != NULL)
3477 		free(vp, M_DEVBUF);
3478 	if (dp != NULL)
3479 		free(dp, M_DEVBUF);
3480 	return(desc);
3481 }
3482 
3483 int
3484 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3485 {
3486 	struct pci_devinfo *dinfo;
3487 	pcicfgregs *cfg;
3488 
3489 	dinfo = device_get_ivars(child);
3490 	cfg = &dinfo->cfg;
3491 
3492 	switch (which) {
3493 	case PCI_IVAR_ETHADDR:
3494 		/*
3495 		 * The generic accessor doesn't deal with failure, so
3496 		 * we set the return value, then return an error.
3497 		 */
3498 		*((uint8_t **) result) = NULL;
3499 		return (EINVAL);
3500 	case PCI_IVAR_SUBVENDOR:
3501 		*result = cfg->subvendor;
3502 		break;
3503 	case PCI_IVAR_SUBDEVICE:
3504 		*result = cfg->subdevice;
3505 		break;
3506 	case PCI_IVAR_VENDOR:
3507 		*result = cfg->vendor;
3508 		break;
3509 	case PCI_IVAR_DEVICE:
3510 		*result = cfg->device;
3511 		break;
3512 	case PCI_IVAR_DEVID:
3513 		*result = (cfg->device << 16) | cfg->vendor;
3514 		break;
3515 	case PCI_IVAR_CLASS:
3516 		*result = cfg->baseclass;
3517 		break;
3518 	case PCI_IVAR_SUBCLASS:
3519 		*result = cfg->subclass;
3520 		break;
3521 	case PCI_IVAR_PROGIF:
3522 		*result = cfg->progif;
3523 		break;
3524 	case PCI_IVAR_REVID:
3525 		*result = cfg->revid;
3526 		break;
3527 	case PCI_IVAR_INTPIN:
3528 		*result = cfg->intpin;
3529 		break;
3530 	case PCI_IVAR_IRQ:
3531 		*result = cfg->intline;
3532 		break;
3533 	case PCI_IVAR_DOMAIN:
3534 		*result = cfg->domain;
3535 		break;
3536 	case PCI_IVAR_BUS:
3537 		*result = cfg->bus;
3538 		break;
3539 	case PCI_IVAR_SLOT:
3540 		*result = cfg->slot;
3541 		break;
3542 	case PCI_IVAR_FUNCTION:
3543 		*result = cfg->func;
3544 		break;
3545 	case PCI_IVAR_CMDREG:
3546 		*result = cfg->cmdreg;
3547 		break;
3548 	case PCI_IVAR_CACHELNSZ:
3549 		*result = cfg->cachelnsz;
3550 		break;
3551 	case PCI_IVAR_MINGNT:
3552 		*result = cfg->mingnt;
3553 		break;
3554 	case PCI_IVAR_MAXLAT:
3555 		*result = cfg->maxlat;
3556 		break;
3557 	case PCI_IVAR_LATTIMER:
3558 		*result = cfg->lattimer;
3559 		break;
3560 	default:
3561 		return (ENOENT);
3562 	}
3563 	return (0);
3564 }
3565 
3566 int
3567 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3568 {
3569 	struct pci_devinfo *dinfo;
3570 
3571 	dinfo = device_get_ivars(child);
3572 
3573 	switch (which) {
3574 	case PCI_IVAR_INTPIN:
3575 		dinfo->cfg.intpin = value;
3576 		return (0);
3577 	case PCI_IVAR_ETHADDR:
3578 	case PCI_IVAR_SUBVENDOR:
3579 	case PCI_IVAR_SUBDEVICE:
3580 	case PCI_IVAR_VENDOR:
3581 	case PCI_IVAR_DEVICE:
3582 	case PCI_IVAR_DEVID:
3583 	case PCI_IVAR_CLASS:
3584 	case PCI_IVAR_SUBCLASS:
3585 	case PCI_IVAR_PROGIF:
3586 	case PCI_IVAR_REVID:
3587 	case PCI_IVAR_IRQ:
3588 	case PCI_IVAR_DOMAIN:
3589 	case PCI_IVAR_BUS:
3590 	case PCI_IVAR_SLOT:
3591 	case PCI_IVAR_FUNCTION:
3592 		return (EINVAL);	/* disallow for now */
3593 
3594 	default:
3595 		return (ENOENT);
3596 	}
3597 }
3598 
3599 
3600 #include "opt_ddb.h"
3601 #ifdef DDB
3602 #include <ddb/ddb.h>
3603 #include <sys/cons.h>
3604 
3605 /*
3606  * List resources based on pci map registers, used for within ddb
3607  */
3608 
3609 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3610 {
3611 	struct pci_devinfo *dinfo;
3612 	struct devlist *devlist_head;
3613 	struct pci_conf *p;
3614 	const char *name;
3615 	int i, error, none_count;
3616 
3617 	none_count = 0;
3618 	/* get the head of the device queue */
3619 	devlist_head = &pci_devq;
3620 
3621 	/*
3622 	 * Go through the list of devices and print out devices
3623 	 */
3624 	for (error = 0, i = 0,
3625 	     dinfo = STAILQ_FIRST(devlist_head);
3626 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3627 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3628 
3629 		/* Populate pd_name and pd_unit */
3630 		name = NULL;
3631 		if (dinfo->cfg.dev)
3632 			name = device_get_name(dinfo->cfg.dev);
3633 
3634 		p = &dinfo->conf;
3635 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3636 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3637 			(name && *name) ? name : "none",
3638 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3639 			none_count++,
3640 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3641 			p->pc_sel.pc_func, (p->pc_class << 16) |
3642 			(p->pc_subclass << 8) | p->pc_progif,
3643 			(p->pc_subdevice << 16) | p->pc_subvendor,
3644 			(p->pc_device << 16) | p->pc_vendor,
3645 			p->pc_revid, p->pc_hdr);
3646 	}
3647 }
3648 #endif /* DDB */
3649 
3650 static struct resource *
3651 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
3652     u_long start, u_long end, u_long count, u_int flags)
3653 {
3654 	struct pci_devinfo *dinfo = device_get_ivars(child);
3655 	struct resource_list *rl = &dinfo->resources;
3656 	struct resource_list_entry *rle;
3657 	struct resource *res;
3658 	pci_addr_t map, testval;
3659 	int mapsize;
3660 
3661 	/*
3662 	 * Weed out the bogons, and figure out how large the BAR/map
3663 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3664 	 * Note: atapci in legacy mode are special and handled elsewhere
3665 	 * in the code.  If you have a atapci device in legacy mode and
3666 	 * it fails here, that other code is broken.
3667 	 */
3668 	res = NULL;
3669 	pci_read_bar(child, *rid, &map, &testval);
3670 
3671 	/* Ignore a BAR with a base of 0. */
3672 	if ((*rid == PCIR_BIOS && pci_rombase(testval) == 0) ||
3673 	    pci_mapbase(testval) == 0)
3674 		goto out;
3675 
3676 	if (PCI_BAR_MEM(testval) || *rid == PCIR_BIOS) {
3677 		if (type != SYS_RES_MEMORY) {
3678 			if (bootverbose)
3679 				device_printf(dev,
3680 				    "child %s requested type %d for rid %#x,"
3681 				    " but the BAR says it is an memio\n",
3682 				    device_get_nameunit(child), type, *rid);
3683 			goto out;
3684 		}
3685 	} else {
3686 		if (type != SYS_RES_IOPORT) {
3687 			if (bootverbose)
3688 				device_printf(dev,
3689 				    "child %s requested type %d for rid %#x,"
3690 				    " but the BAR says it is an ioport\n",
3691 				    device_get_nameunit(child), type, *rid);
3692 			goto out;
3693 		}
3694 	}
3695 
3696 	/*
3697 	 * For real BARs, we need to override the size that
3698 	 * the driver requests, because that's what the BAR
3699 	 * actually uses and we would otherwise have a
3700 	 * situation where we might allocate the excess to
3701 	 * another driver, which won't work.
3702 	 *
3703 	 * Device ROM BARs use a different mask value.
3704 	 */
3705 	if (*rid == PCIR_BIOS)
3706 		mapsize = pci_romsize(testval);
3707 	else
3708 		mapsize = pci_mapsize(testval);
3709 	count = 1UL << mapsize;
3710 	if (RF_ALIGNMENT(flags) < mapsize)
3711 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3712 	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3713 		flags |= RF_PREFETCHABLE;
3714 
3715 	/*
3716 	 * Allocate enough resource, and then write back the
3717 	 * appropriate bar for that resource.
3718 	 */
3719 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3720 	    start, end, count, flags & ~RF_ACTIVE);
3721 	if (res == NULL) {
3722 		device_printf(child,
3723 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3724 		    count, *rid, type, start, end);
3725 		goto out;
3726 	}
3727 	resource_list_add(rl, type, *rid, start, end, count);
3728 	rle = resource_list_find(rl, type, *rid);
3729 	if (rle == NULL)
3730 		panic("pci_reserve_map: unexpectedly can't find resource.");
3731 	rle->res = res;
3732 	rle->start = rman_get_start(res);
3733 	rle->end = rman_get_end(res);
3734 	rle->count = count;
3735 	rle->flags = RLE_RESERVED;
3736 	if (bootverbose)
3737 		device_printf(child,
3738 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3739 		    count, *rid, type, rman_get_start(res));
3740 	map = rman_get_start(res);
3741 	pci_write_bar(child, *rid, map);
3742 out:;
3743 	return (res);
3744 }
3745 
3746 
3747 struct resource *
3748 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3749 		   u_long start, u_long end, u_long count, u_int flags)
3750 {
3751 	struct pci_devinfo *dinfo = device_get_ivars(child);
3752 	struct resource_list *rl = &dinfo->resources;
3753 	struct resource_list_entry *rle;
3754 	struct resource *res;
3755 	pcicfgregs *cfg = &dinfo->cfg;
3756 
3757 	if (device_get_parent(child) != dev)
3758 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3759 		    type, rid, start, end, count, flags));
3760 
3761 	/*
3762 	 * Perform lazy resource allocation
3763 	 */
3764 	switch (type) {
3765 	case SYS_RES_IRQ:
3766 		/*
3767 		 * Can't alloc legacy interrupt once MSI messages have
3768 		 * been allocated.
3769 		 */
3770 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3771 		    cfg->msix.msix_alloc > 0))
3772 			return (NULL);
3773 
3774 		/*
3775 		 * If the child device doesn't have an interrupt
3776 		 * routed and is deserving of an interrupt, try to
3777 		 * assign it one.
3778 		 */
3779 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3780 		    (cfg->intpin != 0))
3781 			pci_assign_interrupt(dev, child, 0);
3782 		break;
3783 	case SYS_RES_IOPORT:
3784 	case SYS_RES_MEMORY:
3785 		/* Reserve resources for this BAR if needed. */
3786 		rle = resource_list_find(rl, type, *rid);
3787 		if (rle == NULL) {
3788 			res = pci_reserve_map(dev, child, type, rid, start, end,
3789 			    count, flags);
3790 			if (res == NULL)
3791 				return (NULL);
3792 		}
3793 	}
3794 	return (resource_list_alloc(rl, dev, child, type, rid,
3795 	    start, end, count, flags));
3796 }
3797 
3798 int
3799 pci_activate_resource(device_t dev, device_t child, int type, int rid,
3800     struct resource *r)
3801 {
3802 	int error;
3803 
3804 	error = bus_generic_activate_resource(dev, child, type, rid, r);
3805 	if (error)
3806 		return (error);
3807 
3808 	/* Enable decoding in the command register when activating BARs. */
3809 	if (device_get_parent(child) == dev) {
3810 		/* Device ROMs need their decoding explicitly enabled. */
3811 		if (rid == PCIR_BIOS)
3812 			pci_write_config(child, rid, rman_get_start(r) |
3813 			    PCIM_BIOS_ENABLE, 4);
3814 		switch (type) {
3815 		case SYS_RES_IOPORT:
3816 		case SYS_RES_MEMORY:
3817 			error = PCI_ENABLE_IO(dev, child, type);
3818 			break;
3819 		}
3820 	}
3821 	return (error);
3822 }
3823 
3824 int
3825 pci_deactivate_resource(device_t dev, device_t child, int type,
3826     int rid, struct resource *r)
3827 {
3828 	int error;
3829 
3830 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
3831 	if (error)
3832 		return (error);
3833 
3834 	/* Disable decoding for device ROMs. */
3835 	if (rid == PCIR_BIOS)
3836 		pci_write_config(child, rid, rman_get_start(r), 4);
3837 	return (0);
3838 }
3839 
3840 void
3841 pci_delete_child(device_t dev, device_t child)
3842 {
3843 	struct resource_list_entry *rle;
3844 	struct resource_list *rl;
3845 	struct pci_devinfo *dinfo;
3846 
3847 	dinfo = device_get_ivars(child);
3848 	rl = &dinfo->resources;
3849 
3850 	if (device_is_attached(child))
3851 		device_detach(child);
3852 
3853 	/* Turn off access to resources we're about to free */
3854 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
3855 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
3856 
3857 	/* Free all allocated resources */
3858 	STAILQ_FOREACH(rle, rl, link) {
3859 		if (rle->res) {
3860 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
3861 			    resource_list_busy(rl, rle->type, rle->rid)) {
3862 				pci_printf(&dinfo->cfg,
3863 				    "Resource still owned, oops. "
3864 				    "(type=%d, rid=%d, addr=%lx)\n",
3865 				    rle->type, rle->rid,
3866 				    rman_get_start(rle->res));
3867 				bus_release_resource(child, rle->type, rle->rid,
3868 				    rle->res);
3869 			}
3870 			resource_list_unreserve(rl, dev, child, rle->type,
3871 			    rle->rid);
3872 		}
3873 	}
3874 	resource_list_free(rl);
3875 
3876 	device_delete_child(dev, child);
3877 	pci_freecfg(dinfo);
3878 }
3879 
3880 void
3881 pci_delete_resource(device_t dev, device_t child, int type, int rid)
3882 {
3883 	struct pci_devinfo *dinfo;
3884 	struct resource_list *rl;
3885 	struct resource_list_entry *rle;
3886 
3887 	if (device_get_parent(child) != dev)
3888 		return;
3889 
3890 	dinfo = device_get_ivars(child);
3891 	rl = &dinfo->resources;
3892 	rle = resource_list_find(rl, type, rid);
3893 	if (rle == NULL)
3894 		return;
3895 
3896 	if (rle->res) {
3897 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
3898 		    resource_list_busy(rl, type, rid)) {
3899 			device_printf(dev, "delete_resource: "
3900 			    "Resource still owned by child, oops. "
3901 			    "(type=%d, rid=%d, addr=%lx)\n",
3902 			    type, rid, rman_get_start(rle->res));
3903 			return;
3904 		}
3905 
3906 #ifndef __PCI_BAR_ZERO_VALID
3907 		/*
3908 		 * If this is a BAR, clear the BAR so it stops
3909 		 * decoding before releasing the resource.
3910 		 */
3911 		switch (type) {
3912 		case SYS_RES_IOPORT:
3913 		case SYS_RES_MEMORY:
3914 			pci_write_bar(child, rid, 0);
3915 			break;
3916 		}
3917 #endif
3918 		resource_list_unreserve(rl, dev, child, type, rid);
3919 	}
3920 	resource_list_delete(rl, type, rid);
3921 }
3922 
3923 struct resource_list *
3924 pci_get_resource_list (device_t dev, device_t child)
3925 {
3926 	struct pci_devinfo *dinfo = device_get_ivars(child);
3927 
3928 	return (&dinfo->resources);
3929 }
3930 
3931 uint32_t
3932 pci_read_config_method(device_t dev, device_t child, int reg, int width)
3933 {
3934 	struct pci_devinfo *dinfo = device_get_ivars(child);
3935 	pcicfgregs *cfg = &dinfo->cfg;
3936 
3937 	return (PCIB_READ_CONFIG(device_get_parent(dev),
3938 	    cfg->bus, cfg->slot, cfg->func, reg, width));
3939 }
3940 
3941 void
3942 pci_write_config_method(device_t dev, device_t child, int reg,
3943     uint32_t val, int width)
3944 {
3945 	struct pci_devinfo *dinfo = device_get_ivars(child);
3946 	pcicfgregs *cfg = &dinfo->cfg;
3947 
3948 	PCIB_WRITE_CONFIG(device_get_parent(dev),
3949 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3950 }
3951 
3952 int
3953 pci_child_location_str_method(device_t dev, device_t child, char *buf,
3954     size_t buflen)
3955 {
3956 
3957 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3958 	    pci_get_function(child));
3959 	return (0);
3960 }
3961 
3962 int
3963 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3964     size_t buflen)
3965 {
3966 	struct pci_devinfo *dinfo;
3967 	pcicfgregs *cfg;
3968 
3969 	dinfo = device_get_ivars(child);
3970 	cfg = &dinfo->cfg;
3971 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3972 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3973 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3974 	    cfg->progif);
3975 	return (0);
3976 }
3977 
3978 int
3979 pci_assign_interrupt_method(device_t dev, device_t child)
3980 {
3981 	struct pci_devinfo *dinfo = device_get_ivars(child);
3982 	pcicfgregs *cfg = &dinfo->cfg;
3983 
3984 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3985 	    cfg->intpin));
3986 }
3987 
3988 static int
3989 pci_modevent(module_t mod, int what, void *arg)
3990 {
3991 	static struct cdev *pci_cdev;
3992 
3993 	switch (what) {
3994 	case MOD_LOAD:
3995 		STAILQ_INIT(&pci_devq);
3996 		pci_generation = 0;
3997 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3998 		    "pci");
3999 		pci_load_vendor_data();
4000 		break;
4001 
4002 	case MOD_UNLOAD:
4003 		destroy_dev(pci_cdev);
4004 		break;
4005 	}
4006 
4007 	return (0);
4008 }
4009 
4010 void
4011 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4012 {
4013 	int i;
4014 
4015 	/*
4016 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4017 	 * which we know need special treatment.  Type 2 devices are
4018 	 * cardbus bridges which also require special treatment.
4019 	 * Other types are unknown, and we err on the side of safety
4020 	 * by ignoring them.
4021 	 */
4022 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4023 		return;
4024 
4025 	/*
4026 	 * Restore the device to full power mode.  We must do this
4027 	 * before we restore the registers because moving from D3 to
4028 	 * D0 will cause the chip's BARs and some other registers to
4029 	 * be reset to some unknown power on reset values.  Cut down
4030 	 * the noise on boot by doing nothing if we are already in
4031 	 * state D0.
4032 	 */
4033 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
4034 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4035 	}
4036 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4037 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
4038 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
4039 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4040 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4041 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4042 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4043 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4044 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4045 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4046 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4047 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4048 
4049 	/* Restore MSI and MSI-X configurations if they are present. */
4050 	if (dinfo->cfg.msi.msi_location != 0)
4051 		pci_resume_msi(dev);
4052 	if (dinfo->cfg.msix.msix_location != 0)
4053 		pci_resume_msix(dev);
4054 }
4055 
4056 void
4057 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4058 {
4059 	int i;
4060 	uint32_t cls;
4061 	int ps;
4062 
4063 	/*
4064 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4065 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4066 	 * which also require special treatment.  Other types are unknown, and
4067 	 * we err on the side of safety by ignoring them.  Powering down
4068 	 * bridges should not be undertaken lightly.
4069 	 */
4070 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4071 		return;
4072 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4073 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
4074 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
4075 
4076 	/*
4077 	 * Some drivers apparently write to these registers w/o updating our
4078 	 * cached copy.  No harm happens if we update the copy, so do so here
4079 	 * so we can restore them.  The COMMAND register is modified by the
4080 	 * bus w/o updating the cache.  This should represent the normally
4081 	 * writable portion of the 'defined' part of type 0 headers.  In
4082 	 * theory we also need to save/restore the PCI capability structures
4083 	 * we know about, but apart from power we don't know any that are
4084 	 * writable.
4085 	 */
4086 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4087 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4088 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4089 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4090 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4091 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4092 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4093 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4094 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4095 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4096 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4097 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4098 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4099 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4100 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4101 
4102 	/*
4103 	 * don't set the state for display devices, base peripherals and
4104 	 * memory devices since bad things happen when they are powered down.
4105 	 * We should (a) have drivers that can easily detach and (b) use
4106 	 * generic drivers for these devices so that some device actually
4107 	 * attaches.  We need to make sure that when we implement (a) we don't
4108 	 * power the device down on a reattach.
4109 	 */
4110 	cls = pci_get_class(dev);
4111 	if (!setstate)
4112 		return;
4113 	switch (pci_do_power_nodriver)
4114 	{
4115 		case 0:		/* NO powerdown at all */
4116 			return;
4117 		case 1:		/* Conservative about what to power down */
4118 			if (cls == PCIC_STORAGE)
4119 				return;
4120 			/*FALLTHROUGH*/
4121 		case 2:		/* Agressive about what to power down */
4122 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4123 			    cls == PCIC_BASEPERIPH)
4124 				return;
4125 			/*FALLTHROUGH*/
4126 		case 3:		/* Power down everything */
4127 			break;
4128 	}
4129 	/*
4130 	 * PCI spec says we can only go into D3 state from D0 state.
4131 	 * Transition from D[12] into D0 before going to D3 state.
4132 	 */
4133 	ps = pci_get_powerstate(dev);
4134 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4135 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4136 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4137 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4138 }
4139