xref: /freebsd/sys/dev/pci/pci.c (revision a3cf0ef5a295c885c895fabfd56470c0d1db322d)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 
56 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57 #include <machine/intr_machdep.h>
58 #endif
59 
60 #include <sys/pciio.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/pci/pci_private.h>
64 
65 #include <dev/usb/controller/ehcireg.h>
66 #include <dev/usb/controller/ohcireg.h>
67 #include <dev/usb/controller/uhcireg.h>
68 
69 #include "pcib_if.h"
70 #include "pci_if.h"
71 
72 static pci_addr_t	pci_mapbase(uint64_t mapreg);
73 static const char	*pci_maptype(uint64_t mapreg);
74 static int		pci_mapsize(uint64_t testval);
75 static int		pci_maprange(uint64_t mapreg);
76 static pci_addr_t	pci_rombase(uint64_t mapreg);
77 static int		pci_romsize(uint64_t testval);
78 static void		pci_fixancient(pcicfgregs *cfg);
79 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
80 
81 static int		pci_porten(device_t dev);
82 static int		pci_memen(device_t dev);
83 static void		pci_assign_interrupt(device_t bus, device_t dev,
84 			    int force_route);
85 static int		pci_add_map(device_t bus, device_t dev, int reg,
86 			    struct resource_list *rl, int force, int prefetch);
87 static int		pci_probe(device_t dev);
88 static int		pci_attach(device_t dev);
89 static void		pci_load_vendor_data(void);
90 static int		pci_describe_parse_line(char **ptr, int *vendor,
91 			    int *device, char **desc);
92 static char		*pci_describe_device(device_t dev);
93 static int		pci_modevent(module_t mod, int what, void *arg);
94 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
95 			    pcicfgregs *cfg);
96 static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
97 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
98 			    int reg, uint32_t *data);
99 #if 0
100 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
101 			    int reg, uint32_t data);
102 #endif
103 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
104 static void		pci_disable_msi(device_t dev);
105 static void		pci_enable_msi(device_t dev, uint64_t address,
106 			    uint16_t data);
107 static void		pci_enable_msix(device_t dev, u_int index,
108 			    uint64_t address, uint32_t data);
109 static void		pci_mask_msix(device_t dev, u_int index);
110 static void		pci_unmask_msix(device_t dev, u_int index);
111 static int		pci_msi_blacklisted(void);
112 static void		pci_resume_msi(device_t dev);
113 static void		pci_resume_msix(device_t dev);
114 static int		pci_remap_intr_method(device_t bus, device_t dev,
115 			    u_int irq);
116 
117 static device_method_t pci_methods[] = {
118 	/* Device interface */
119 	DEVMETHOD(device_probe,		pci_probe),
120 	DEVMETHOD(device_attach,	pci_attach),
121 	DEVMETHOD(device_detach,	bus_generic_detach),
122 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
123 	DEVMETHOD(device_suspend,	pci_suspend),
124 	DEVMETHOD(device_resume,	pci_resume),
125 
126 	/* Bus interface */
127 	DEVMETHOD(bus_print_child,	pci_print_child),
128 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
129 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
130 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
131 	DEVMETHOD(bus_driver_added,	pci_driver_added),
132 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
133 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
134 
135 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
136 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
137 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
138 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
139 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
140 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
141 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
142 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
143 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
144 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
145 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
146 
147 	/* PCI interface */
148 	DEVMETHOD(pci_read_config,	pci_read_config_method),
149 	DEVMETHOD(pci_write_config,	pci_write_config_method),
150 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
151 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
152 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
153 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
154 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
155 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
156 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
157 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
158 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
159 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
160 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
161 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
162 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
163 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
164 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
165 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
166 
167 	{ 0, 0 }
168 };
169 
170 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
171 
172 static devclass_t pci_devclass;
173 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
174 MODULE_VERSION(pci, 1);
175 
176 static char	*pci_vendordata;
177 static size_t	pci_vendordata_size;
178 
179 
180 struct pci_quirk {
181 	uint32_t devid;	/* Vendor/device of the card */
182 	int	type;
183 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
184 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
185 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
186 	int	arg1;
187 	int	arg2;
188 };
189 
190 struct pci_quirk pci_quirks[] = {
191 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
192 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
193 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
194 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
195 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
196 
197 	/*
198 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
199 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
200 	 */
201 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
202 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
203 
204 	/*
205 	 * MSI doesn't work on earlier Intel chipsets including
206 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
207 	 */
208 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
210 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
211 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
212 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
213 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
214 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
215 
216 	/*
217 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
218 	 * bridge.
219 	 */
220 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
221 
222 	/*
223 	 * Some virtualization environments emulate an older chipset
224 	 * but support MSI just fine.  QEMU uses the Intel 82440.
225 	 */
226 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
227 
228 	{ 0 }
229 };
230 
231 /* map register information */
232 #define	PCI_MAPMEM	0x01	/* memory map */
233 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
234 #define	PCI_MAPPORT	0x04	/* port map */
235 
236 struct devlist pci_devq;
237 uint32_t pci_generation;
238 uint32_t pci_numdevs = 0;
239 static int pcie_chipset, pcix_chipset;
240 
241 /* sysctl vars */
242 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
243 
244 static int pci_enable_io_modes = 1;
245 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
246 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
247     &pci_enable_io_modes, 1,
248     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
249 enable these bits correctly.  We'd like to do this all the time, but there\n\
250 are some peripherals that this causes problems with.");
251 
252 static int pci_do_power_nodriver = 0;
253 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
254 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
255     &pci_do_power_nodriver, 0,
256   "Place a function into D3 state when no driver attaches to it.  0 means\n\
257 disable.  1 means conservatively place devices into D3 state.  2 means\n\
258 agressively place devices into D3 state.  3 means put absolutely everything\n\
259 in D3 state.");
260 
261 int pci_do_power_resume = 1;
262 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
263 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
264     &pci_do_power_resume, 1,
265   "Transition from D3 -> D0 on resume.");
266 
267 int pci_do_power_suspend = 1;
268 TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
269 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
270     &pci_do_power_suspend, 1,
271   "Transition from D0 -> D3 on suspend.");
272 
273 static int pci_do_msi = 1;
274 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
275 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
276     "Enable support for MSI interrupts");
277 
278 static int pci_do_msix = 1;
279 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
280 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
281     "Enable support for MSI-X interrupts");
282 
283 static int pci_honor_msi_blacklist = 1;
284 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
285 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
286     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
287 
288 #if defined(__i386__) || defined(__amd64__)
289 static int pci_usb_takeover = 1;
290 #else
291 static int pci_usb_takeover = 0;
292 #endif
293 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
294 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RD | CTLFLAG_TUN,
295     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
296 Disable this if you depend on BIOS emulation of USB devices, that is\n\
297 you use USB devices (like keyboard or mouse) but do not load USB drivers");
298 
299 /* Find a device_t by bus/slot/function in domain 0 */
300 
301 device_t
302 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
303 {
304 
305 	return (pci_find_dbsf(0, bus, slot, func));
306 }
307 
308 /* Find a device_t by domain/bus/slot/function */
309 
310 device_t
311 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
312 {
313 	struct pci_devinfo *dinfo;
314 
315 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
316 		if ((dinfo->cfg.domain == domain) &&
317 		    (dinfo->cfg.bus == bus) &&
318 		    (dinfo->cfg.slot == slot) &&
319 		    (dinfo->cfg.func == func)) {
320 			return (dinfo->cfg.dev);
321 		}
322 	}
323 
324 	return (NULL);
325 }
326 
327 /* Find a device_t by vendor/device ID */
328 
329 device_t
330 pci_find_device(uint16_t vendor, uint16_t device)
331 {
332 	struct pci_devinfo *dinfo;
333 
334 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
335 		if ((dinfo->cfg.vendor == vendor) &&
336 		    (dinfo->cfg.device == device)) {
337 			return (dinfo->cfg.dev);
338 		}
339 	}
340 
341 	return (NULL);
342 }
343 
344 static int
345 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
346 {
347 	va_list ap;
348 	int retval;
349 
350 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
351 	    cfg->func);
352 	va_start(ap, fmt);
353 	retval += vprintf(fmt, ap);
354 	va_end(ap);
355 	return (retval);
356 }
357 
358 /* return base address of memory or port map */
359 
360 static pci_addr_t
361 pci_mapbase(uint64_t mapreg)
362 {
363 
364 	if (PCI_BAR_MEM(mapreg))
365 		return (mapreg & PCIM_BAR_MEM_BASE);
366 	else
367 		return (mapreg & PCIM_BAR_IO_BASE);
368 }
369 
370 /* return map type of memory or port map */
371 
372 static const char *
373 pci_maptype(uint64_t mapreg)
374 {
375 
376 	if (PCI_BAR_IO(mapreg))
377 		return ("I/O Port");
378 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
379 		return ("Prefetchable Memory");
380 	return ("Memory");
381 }
382 
383 /* return log2 of map size decoded for memory or port map */
384 
385 static int
386 pci_mapsize(uint64_t testval)
387 {
388 	int ln2size;
389 
390 	testval = pci_mapbase(testval);
391 	ln2size = 0;
392 	if (testval != 0) {
393 		while ((testval & 1) == 0)
394 		{
395 			ln2size++;
396 			testval >>= 1;
397 		}
398 	}
399 	return (ln2size);
400 }
401 
402 /* return base address of device ROM */
403 
404 static pci_addr_t
405 pci_rombase(uint64_t mapreg)
406 {
407 
408 	return (mapreg & PCIM_BIOS_ADDR_MASK);
409 }
410 
411 /* return log2 of map size decided for device ROM */
412 
413 static int
414 pci_romsize(uint64_t testval)
415 {
416 	int ln2size;
417 
418 	testval = pci_rombase(testval);
419 	ln2size = 0;
420 	if (testval != 0) {
421 		while ((testval & 1) == 0)
422 		{
423 			ln2size++;
424 			testval >>= 1;
425 		}
426 	}
427 	return (ln2size);
428 }
429 
430 /* return log2 of address range supported by map register */
431 
432 static int
433 pci_maprange(uint64_t mapreg)
434 {
435 	int ln2range = 0;
436 
437 	if (PCI_BAR_IO(mapreg))
438 		ln2range = 32;
439 	else
440 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
441 		case PCIM_BAR_MEM_32:
442 			ln2range = 32;
443 			break;
444 		case PCIM_BAR_MEM_1MB:
445 			ln2range = 20;
446 			break;
447 		case PCIM_BAR_MEM_64:
448 			ln2range = 64;
449 			break;
450 		}
451 	return (ln2range);
452 }
453 
454 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
455 
456 static void
457 pci_fixancient(pcicfgregs *cfg)
458 {
459 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
460 		return;
461 
462 	/* PCI to PCI bridges use header type 1 */
463 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
464 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
465 }
466 
467 /* extract header type specific config data */
468 
469 static void
470 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
471 {
472 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
473 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
474 	case PCIM_HDRTYPE_NORMAL:
475 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
476 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
477 		cfg->nummaps	    = PCI_MAXMAPS_0;
478 		break;
479 	case PCIM_HDRTYPE_BRIDGE:
480 		cfg->nummaps	    = PCI_MAXMAPS_1;
481 		break;
482 	case PCIM_HDRTYPE_CARDBUS:
483 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
484 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
485 		cfg->nummaps	    = PCI_MAXMAPS_2;
486 		break;
487 	}
488 #undef REG
489 }
490 
491 /* read configuration header into pcicfgregs structure */
492 struct pci_devinfo *
493 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
494 {
495 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
496 	pcicfgregs *cfg = NULL;
497 	struct pci_devinfo *devlist_entry;
498 	struct devlist *devlist_head;
499 
500 	devlist_head = &pci_devq;
501 
502 	devlist_entry = NULL;
503 
504 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
505 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
506 		if (devlist_entry == NULL)
507 			return (NULL);
508 
509 		cfg = &devlist_entry->cfg;
510 
511 		cfg->domain		= d;
512 		cfg->bus		= b;
513 		cfg->slot		= s;
514 		cfg->func		= f;
515 		cfg->vendor		= REG(PCIR_VENDOR, 2);
516 		cfg->device		= REG(PCIR_DEVICE, 2);
517 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
518 		cfg->statreg		= REG(PCIR_STATUS, 2);
519 		cfg->baseclass		= REG(PCIR_CLASS, 1);
520 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
521 		cfg->progif		= REG(PCIR_PROGIF, 1);
522 		cfg->revid		= REG(PCIR_REVID, 1);
523 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
524 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
525 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
526 		cfg->intpin		= REG(PCIR_INTPIN, 1);
527 		cfg->intline		= REG(PCIR_INTLINE, 1);
528 
529 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
530 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
531 
532 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
533 		cfg->hdrtype		&= ~PCIM_MFDEV;
534 
535 		pci_fixancient(cfg);
536 		pci_hdrtypedata(pcib, b, s, f, cfg);
537 
538 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
539 			pci_read_extcap(pcib, cfg);
540 
541 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
542 
543 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
544 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
545 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
546 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
547 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
548 
549 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
550 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
551 		devlist_entry->conf.pc_vendor = cfg->vendor;
552 		devlist_entry->conf.pc_device = cfg->device;
553 
554 		devlist_entry->conf.pc_class = cfg->baseclass;
555 		devlist_entry->conf.pc_subclass = cfg->subclass;
556 		devlist_entry->conf.pc_progif = cfg->progif;
557 		devlist_entry->conf.pc_revid = cfg->revid;
558 
559 		pci_numdevs++;
560 		pci_generation++;
561 	}
562 	return (devlist_entry);
563 #undef REG
564 }
565 
566 static void
567 pci_read_extcap(device_t pcib, pcicfgregs *cfg)
568 {
569 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
570 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
571 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
572 	uint64_t addr;
573 #endif
574 	uint32_t val;
575 	int	ptr, nextptr, ptrptr;
576 
577 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
578 	case PCIM_HDRTYPE_NORMAL:
579 	case PCIM_HDRTYPE_BRIDGE:
580 		ptrptr = PCIR_CAP_PTR;
581 		break;
582 	case PCIM_HDRTYPE_CARDBUS:
583 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
584 		break;
585 	default:
586 		return;		/* no extended capabilities support */
587 	}
588 	nextptr = REG(ptrptr, 1);	/* sanity check? */
589 
590 	/*
591 	 * Read capability entries.
592 	 */
593 	while (nextptr != 0) {
594 		/* Sanity check */
595 		if (nextptr > 255) {
596 			printf("illegal PCI extended capability offset %d\n",
597 			    nextptr);
598 			return;
599 		}
600 		/* Find the next entry */
601 		ptr = nextptr;
602 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
603 
604 		/* Process this entry */
605 		switch (REG(ptr + PCICAP_ID, 1)) {
606 		case PCIY_PMG:		/* PCI power management */
607 			if (cfg->pp.pp_cap == 0) {
608 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
609 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
610 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
611 				if ((nextptr - ptr) > PCIR_POWER_DATA)
612 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
613 			}
614 			break;
615 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
616 		case PCIY_HT:		/* HyperTransport */
617 			/* Determine HT-specific capability type. */
618 			val = REG(ptr + PCIR_HT_COMMAND, 2);
619 			switch (val & PCIM_HTCMD_CAP_MASK) {
620 			case PCIM_HTCAP_MSI_MAPPING:
621 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
622 					/* Sanity check the mapping window. */
623 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
624 					    4);
625 					addr <<= 32;
626 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
627 					    4);
628 					if (addr != MSI_INTEL_ADDR_BASE)
629 						device_printf(pcib,
630 	    "HT Bridge at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
631 						    cfg->domain, cfg->bus,
632 						    cfg->slot, cfg->func,
633 						    (long long)addr);
634 				} else
635 					addr = MSI_INTEL_ADDR_BASE;
636 
637 				cfg->ht.ht_msimap = ptr;
638 				cfg->ht.ht_msictrl = val;
639 				cfg->ht.ht_msiaddr = addr;
640 				break;
641 			}
642 			break;
643 #endif
644 		case PCIY_MSI:		/* PCI MSI */
645 			cfg->msi.msi_location = ptr;
646 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
647 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
648 						     PCIM_MSICTRL_MMC_MASK)>>1);
649 			break;
650 		case PCIY_MSIX:		/* PCI MSI-X */
651 			cfg->msix.msix_location = ptr;
652 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
653 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
654 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
655 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
656 			cfg->msix.msix_table_bar = PCIR_BAR(val &
657 			    PCIM_MSIX_BIR_MASK);
658 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
659 			val = REG(ptr + PCIR_MSIX_PBA, 4);
660 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
661 			    PCIM_MSIX_BIR_MASK);
662 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
663 			break;
664 		case PCIY_VPD:		/* PCI Vital Product Data */
665 			cfg->vpd.vpd_reg = ptr;
666 			break;
667 		case PCIY_SUBVENDOR:
668 			/* Should always be true. */
669 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
670 			    PCIM_HDRTYPE_BRIDGE) {
671 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
672 				cfg->subvendor = val & 0xffff;
673 				cfg->subdevice = val >> 16;
674 			}
675 			break;
676 		case PCIY_PCIX:		/* PCI-X */
677 			/*
678 			 * Assume we have a PCI-X chipset if we have
679 			 * at least one PCI-PCI bridge with a PCI-X
680 			 * capability.  Note that some systems with
681 			 * PCI-express or HT chipsets might match on
682 			 * this check as well.
683 			 */
684 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
685 			    PCIM_HDRTYPE_BRIDGE)
686 				pcix_chipset = 1;
687 			break;
688 		case PCIY_EXPRESS:	/* PCI-express */
689 			/*
690 			 * Assume we have a PCI-express chipset if we have
691 			 * at least one PCI-express device.
692 			 */
693 			pcie_chipset = 1;
694 			break;
695 		default:
696 			break;
697 		}
698 	}
699 /* REG and WREG use carry through to next functions */
700 }
701 
702 /*
703  * PCI Vital Product Data
704  */
705 
706 #define	PCI_VPD_TIMEOUT		1000000
707 
708 static int
709 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
710 {
711 	int count = PCI_VPD_TIMEOUT;
712 
713 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
714 
715 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
716 
717 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
718 		if (--count < 0)
719 			return (ENXIO);
720 		DELAY(1);	/* limit looping */
721 	}
722 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
723 
724 	return (0);
725 }
726 
727 #if 0
728 static int
729 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
730 {
731 	int count = PCI_VPD_TIMEOUT;
732 
733 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
734 
735 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
736 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
737 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
738 		if (--count < 0)
739 			return (ENXIO);
740 		DELAY(1);	/* limit looping */
741 	}
742 
743 	return (0);
744 }
745 #endif
746 
747 #undef PCI_VPD_TIMEOUT
748 
749 struct vpd_readstate {
750 	device_t	pcib;
751 	pcicfgregs	*cfg;
752 	uint32_t	val;
753 	int		bytesinval;
754 	int		off;
755 	uint8_t		cksum;
756 };
757 
758 static int
759 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
760 {
761 	uint32_t reg;
762 	uint8_t byte;
763 
764 	if (vrs->bytesinval == 0) {
765 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
766 			return (ENXIO);
767 		vrs->val = le32toh(reg);
768 		vrs->off += 4;
769 		byte = vrs->val & 0xff;
770 		vrs->bytesinval = 3;
771 	} else {
772 		vrs->val = vrs->val >> 8;
773 		byte = vrs->val & 0xff;
774 		vrs->bytesinval--;
775 	}
776 
777 	vrs->cksum += byte;
778 	*data = byte;
779 	return (0);
780 }
781 
782 static void
783 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
784 {
785 	struct vpd_readstate vrs;
786 	int state;
787 	int name;
788 	int remain;
789 	int i;
790 	int alloc, off;		/* alloc/off for RO/W arrays */
791 	int cksumvalid;
792 	int dflen;
793 	uint8_t byte;
794 	uint8_t byte2;
795 
796 	/* init vpd reader */
797 	vrs.bytesinval = 0;
798 	vrs.off = 0;
799 	vrs.pcib = pcib;
800 	vrs.cfg = cfg;
801 	vrs.cksum = 0;
802 
803 	state = 0;
804 	name = remain = i = 0;	/* shut up stupid gcc */
805 	alloc = off = 0;	/* shut up stupid gcc */
806 	dflen = 0;		/* shut up stupid gcc */
807 	cksumvalid = -1;
808 	while (state >= 0) {
809 		if (vpd_nextbyte(&vrs, &byte)) {
810 			state = -2;
811 			break;
812 		}
813 #if 0
814 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
815 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
816 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
817 #endif
818 		switch (state) {
819 		case 0:		/* item name */
820 			if (byte & 0x80) {
821 				if (vpd_nextbyte(&vrs, &byte2)) {
822 					state = -2;
823 					break;
824 				}
825 				remain = byte2;
826 				if (vpd_nextbyte(&vrs, &byte2)) {
827 					state = -2;
828 					break;
829 				}
830 				remain |= byte2 << 8;
831 				if (remain > (0x7f*4 - vrs.off)) {
832 					state = -1;
833 					printf(
834 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
835 					    cfg->domain, cfg->bus, cfg->slot,
836 					    cfg->func, remain);
837 				}
838 				name = byte & 0x7f;
839 			} else {
840 				remain = byte & 0x7;
841 				name = (byte >> 3) & 0xf;
842 			}
843 			switch (name) {
844 			case 0x2:	/* String */
845 				cfg->vpd.vpd_ident = malloc(remain + 1,
846 				    M_DEVBUF, M_WAITOK);
847 				i = 0;
848 				state = 1;
849 				break;
850 			case 0xf:	/* End */
851 				state = -1;
852 				break;
853 			case 0x10:	/* VPD-R */
854 				alloc = 8;
855 				off = 0;
856 				cfg->vpd.vpd_ros = malloc(alloc *
857 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
858 				    M_WAITOK | M_ZERO);
859 				state = 2;
860 				break;
861 			case 0x11:	/* VPD-W */
862 				alloc = 8;
863 				off = 0;
864 				cfg->vpd.vpd_w = malloc(alloc *
865 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
866 				    M_WAITOK | M_ZERO);
867 				state = 5;
868 				break;
869 			default:	/* Invalid data, abort */
870 				state = -1;
871 				break;
872 			}
873 			break;
874 
875 		case 1:	/* Identifier String */
876 			cfg->vpd.vpd_ident[i++] = byte;
877 			remain--;
878 			if (remain == 0)  {
879 				cfg->vpd.vpd_ident[i] = '\0';
880 				state = 0;
881 			}
882 			break;
883 
884 		case 2:	/* VPD-R Keyword Header */
885 			if (off == alloc) {
886 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
887 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
888 				    M_DEVBUF, M_WAITOK | M_ZERO);
889 			}
890 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
891 			if (vpd_nextbyte(&vrs, &byte2)) {
892 				state = -2;
893 				break;
894 			}
895 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
896 			if (vpd_nextbyte(&vrs, &byte2)) {
897 				state = -2;
898 				break;
899 			}
900 			dflen = byte2;
901 			if (dflen == 0 &&
902 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
903 			    2) == 0) {
904 				/*
905 				 * if this happens, we can't trust the rest
906 				 * of the VPD.
907 				 */
908 				printf(
909 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
910 				    cfg->domain, cfg->bus, cfg->slot,
911 				    cfg->func, dflen);
912 				cksumvalid = 0;
913 				state = -1;
914 				break;
915 			} else if (dflen == 0) {
916 				cfg->vpd.vpd_ros[off].value = malloc(1 *
917 				    sizeof(*cfg->vpd.vpd_ros[off].value),
918 				    M_DEVBUF, M_WAITOK);
919 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
920 			} else
921 				cfg->vpd.vpd_ros[off].value = malloc(
922 				    (dflen + 1) *
923 				    sizeof(*cfg->vpd.vpd_ros[off].value),
924 				    M_DEVBUF, M_WAITOK);
925 			remain -= 3;
926 			i = 0;
927 			/* keep in sync w/ state 3's transistions */
928 			if (dflen == 0 && remain == 0)
929 				state = 0;
930 			else if (dflen == 0)
931 				state = 2;
932 			else
933 				state = 3;
934 			break;
935 
936 		case 3:	/* VPD-R Keyword Value */
937 			cfg->vpd.vpd_ros[off].value[i++] = byte;
938 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
939 			    "RV", 2) == 0 && cksumvalid == -1) {
940 				if (vrs.cksum == 0)
941 					cksumvalid = 1;
942 				else {
943 					if (bootverbose)
944 						printf(
945 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
946 						    cfg->domain, cfg->bus,
947 						    cfg->slot, cfg->func,
948 						    vrs.cksum);
949 					cksumvalid = 0;
950 					state = -1;
951 					break;
952 				}
953 			}
954 			dflen--;
955 			remain--;
956 			/* keep in sync w/ state 2's transistions */
957 			if (dflen == 0)
958 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
959 			if (dflen == 0 && remain == 0) {
960 				cfg->vpd.vpd_rocnt = off;
961 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
962 				    off * sizeof(*cfg->vpd.vpd_ros),
963 				    M_DEVBUF, M_WAITOK | M_ZERO);
964 				state = 0;
965 			} else if (dflen == 0)
966 				state = 2;
967 			break;
968 
969 		case 4:
970 			remain--;
971 			if (remain == 0)
972 				state = 0;
973 			break;
974 
975 		case 5:	/* VPD-W Keyword Header */
976 			if (off == alloc) {
977 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
978 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
979 				    M_DEVBUF, M_WAITOK | M_ZERO);
980 			}
981 			cfg->vpd.vpd_w[off].keyword[0] = byte;
982 			if (vpd_nextbyte(&vrs, &byte2)) {
983 				state = -2;
984 				break;
985 			}
986 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
987 			if (vpd_nextbyte(&vrs, &byte2)) {
988 				state = -2;
989 				break;
990 			}
991 			cfg->vpd.vpd_w[off].len = dflen = byte2;
992 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
993 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
994 			    sizeof(*cfg->vpd.vpd_w[off].value),
995 			    M_DEVBUF, M_WAITOK);
996 			remain -= 3;
997 			i = 0;
998 			/* keep in sync w/ state 6's transistions */
999 			if (dflen == 0 && remain == 0)
1000 				state = 0;
1001 			else if (dflen == 0)
1002 				state = 5;
1003 			else
1004 				state = 6;
1005 			break;
1006 
1007 		case 6:	/* VPD-W Keyword Value */
1008 			cfg->vpd.vpd_w[off].value[i++] = byte;
1009 			dflen--;
1010 			remain--;
1011 			/* keep in sync w/ state 5's transistions */
1012 			if (dflen == 0)
1013 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1014 			if (dflen == 0 && remain == 0) {
1015 				cfg->vpd.vpd_wcnt = off;
1016 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1017 				    off * sizeof(*cfg->vpd.vpd_w),
1018 				    M_DEVBUF, M_WAITOK | M_ZERO);
1019 				state = 0;
1020 			} else if (dflen == 0)
1021 				state = 5;
1022 			break;
1023 
1024 		default:
1025 			printf("pci%d:%d:%d:%d: invalid state: %d\n",
1026 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1027 			    state);
1028 			state = -1;
1029 			break;
1030 		}
1031 	}
1032 
1033 	if (cksumvalid == 0 || state < -1) {
1034 		/* read-only data bad, clean up */
1035 		if (cfg->vpd.vpd_ros != NULL) {
1036 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1037 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1038 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1039 			cfg->vpd.vpd_ros = NULL;
1040 		}
1041 	}
1042 	if (state < -1) {
1043 		/* I/O error, clean up */
1044 		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1045 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1046 		if (cfg->vpd.vpd_ident != NULL) {
1047 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1048 			cfg->vpd.vpd_ident = NULL;
1049 		}
1050 		if (cfg->vpd.vpd_w != NULL) {
1051 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1052 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1053 			free(cfg->vpd.vpd_w, M_DEVBUF);
1054 			cfg->vpd.vpd_w = NULL;
1055 		}
1056 	}
1057 	cfg->vpd.vpd_cached = 1;
1058 #undef REG
1059 #undef WREG
1060 }
1061 
1062 int
1063 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1064 {
1065 	struct pci_devinfo *dinfo = device_get_ivars(child);
1066 	pcicfgregs *cfg = &dinfo->cfg;
1067 
1068 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1069 		pci_read_vpd(device_get_parent(dev), cfg);
1070 
1071 	*identptr = cfg->vpd.vpd_ident;
1072 
1073 	if (*identptr == NULL)
1074 		return (ENXIO);
1075 
1076 	return (0);
1077 }
1078 
1079 int
1080 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1081 	const char **vptr)
1082 {
1083 	struct pci_devinfo *dinfo = device_get_ivars(child);
1084 	pcicfgregs *cfg = &dinfo->cfg;
1085 	int i;
1086 
1087 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1088 		pci_read_vpd(device_get_parent(dev), cfg);
1089 
1090 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1091 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1092 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1093 			*vptr = cfg->vpd.vpd_ros[i].value;
1094 		}
1095 
1096 	if (i != cfg->vpd.vpd_rocnt)
1097 		return (0);
1098 
1099 	*vptr = NULL;
1100 	return (ENXIO);
1101 }
1102 
1103 /*
1104  * Find the requested extended capability and return the offset in
1105  * configuration space via the pointer provided. The function returns
1106  * 0 on success and error code otherwise.
1107  */
1108 int
1109 pci_find_extcap_method(device_t dev, device_t child, int capability,
1110     int *capreg)
1111 {
1112 	struct pci_devinfo *dinfo = device_get_ivars(child);
1113 	pcicfgregs *cfg = &dinfo->cfg;
1114 	u_int32_t status;
1115 	u_int8_t ptr;
1116 
1117 	/*
1118 	 * Check the CAP_LIST bit of the PCI status register first.
1119 	 */
1120 	status = pci_read_config(child, PCIR_STATUS, 2);
1121 	if (!(status & PCIM_STATUS_CAPPRESENT))
1122 		return (ENXIO);
1123 
1124 	/*
1125 	 * Determine the start pointer of the capabilities list.
1126 	 */
1127 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1128 	case PCIM_HDRTYPE_NORMAL:
1129 	case PCIM_HDRTYPE_BRIDGE:
1130 		ptr = PCIR_CAP_PTR;
1131 		break;
1132 	case PCIM_HDRTYPE_CARDBUS:
1133 		ptr = PCIR_CAP_PTR_2;
1134 		break;
1135 	default:
1136 		/* XXX: panic? */
1137 		return (ENXIO);		/* no extended capabilities support */
1138 	}
1139 	ptr = pci_read_config(child, ptr, 1);
1140 
1141 	/*
1142 	 * Traverse the capabilities list.
1143 	 */
1144 	while (ptr != 0) {
1145 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1146 			if (capreg != NULL)
1147 				*capreg = ptr;
1148 			return (0);
1149 		}
1150 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1151 	}
1152 
1153 	return (ENOENT);
1154 }
1155 
1156 /*
1157  * Support for MSI-X message interrupts.
1158  */
1159 void
1160 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1161 {
1162 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1163 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1164 	uint32_t offset;
1165 
1166 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1167 	offset = msix->msix_table_offset + index * 16;
1168 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1169 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1170 	bus_write_4(msix->msix_table_res, offset + 8, data);
1171 
1172 	/* Enable MSI -> HT mapping. */
1173 	pci_ht_map_msi(dev, address);
1174 }
1175 
1176 void
1177 pci_mask_msix(device_t dev, u_int index)
1178 {
1179 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1180 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1181 	uint32_t offset, val;
1182 
1183 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1184 	offset = msix->msix_table_offset + index * 16 + 12;
1185 	val = bus_read_4(msix->msix_table_res, offset);
1186 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1187 		val |= PCIM_MSIX_VCTRL_MASK;
1188 		bus_write_4(msix->msix_table_res, offset, val);
1189 	}
1190 }
1191 
1192 void
1193 pci_unmask_msix(device_t dev, u_int index)
1194 {
1195 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1196 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1197 	uint32_t offset, val;
1198 
1199 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1200 	offset = msix->msix_table_offset + index * 16 + 12;
1201 	val = bus_read_4(msix->msix_table_res, offset);
1202 	if (val & PCIM_MSIX_VCTRL_MASK) {
1203 		val &= ~PCIM_MSIX_VCTRL_MASK;
1204 		bus_write_4(msix->msix_table_res, offset, val);
1205 	}
1206 }
1207 
1208 int
1209 pci_pending_msix(device_t dev, u_int index)
1210 {
1211 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1212 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1213 	uint32_t offset, bit;
1214 
1215 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1216 	offset = msix->msix_pba_offset + (index / 32) * 4;
1217 	bit = 1 << index % 32;
1218 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1219 }
1220 
1221 /*
1222  * Restore MSI-X registers and table during resume.  If MSI-X is
1223  * enabled then walk the virtual table to restore the actual MSI-X
1224  * table.
1225  */
1226 static void
1227 pci_resume_msix(device_t dev)
1228 {
1229 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1230 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1231 	struct msix_table_entry *mte;
1232 	struct msix_vector *mv;
1233 	int i;
1234 
1235 	if (msix->msix_alloc > 0) {
1236 		/* First, mask all vectors. */
1237 		for (i = 0; i < msix->msix_msgnum; i++)
1238 			pci_mask_msix(dev, i);
1239 
1240 		/* Second, program any messages with at least one handler. */
1241 		for (i = 0; i < msix->msix_table_len; i++) {
1242 			mte = &msix->msix_table[i];
1243 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1244 				continue;
1245 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1246 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1247 			pci_unmask_msix(dev, i);
1248 		}
1249 	}
1250 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1251 	    msix->msix_ctrl, 2);
1252 }
1253 
1254 /*
1255  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1256  * returned in *count.  After this function returns, each message will be
1257  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1258  */
1259 int
1260 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1261 {
1262 	struct pci_devinfo *dinfo = device_get_ivars(child);
1263 	pcicfgregs *cfg = &dinfo->cfg;
1264 	struct resource_list_entry *rle;
1265 	int actual, error, i, irq, max;
1266 
1267 	/* Don't let count == 0 get us into trouble. */
1268 	if (*count == 0)
1269 		return (EINVAL);
1270 
1271 	/* If rid 0 is allocated, then fail. */
1272 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1273 	if (rle != NULL && rle->res != NULL)
1274 		return (ENXIO);
1275 
1276 	/* Already have allocated messages? */
1277 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1278 		return (ENXIO);
1279 
1280 	/* If MSI is blacklisted for this system, fail. */
1281 	if (pci_msi_blacklisted())
1282 		return (ENXIO);
1283 
1284 	/* MSI-X capability present? */
1285 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1286 		return (ENODEV);
1287 
1288 	/* Make sure the appropriate BARs are mapped. */
1289 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1290 	    cfg->msix.msix_table_bar);
1291 	if (rle == NULL || rle->res == NULL ||
1292 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1293 		return (ENXIO);
1294 	cfg->msix.msix_table_res = rle->res;
1295 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1296 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1297 		    cfg->msix.msix_pba_bar);
1298 		if (rle == NULL || rle->res == NULL ||
1299 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1300 			return (ENXIO);
1301 	}
1302 	cfg->msix.msix_pba_res = rle->res;
1303 
1304 	if (bootverbose)
1305 		device_printf(child,
1306 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1307 		    *count, cfg->msix.msix_msgnum);
1308 	max = min(*count, cfg->msix.msix_msgnum);
1309 	for (i = 0; i < max; i++) {
1310 		/* Allocate a message. */
1311 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1312 		if (error)
1313 			break;
1314 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1315 		    irq, 1);
1316 	}
1317 	actual = i;
1318 
1319 	if (bootverbose) {
1320 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1321 		if (actual == 1)
1322 			device_printf(child, "using IRQ %lu for MSI-X\n",
1323 			    rle->start);
1324 		else {
1325 			int run;
1326 
1327 			/*
1328 			 * Be fancy and try to print contiguous runs of
1329 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1330 			 * 'run' is true if we are in a range.
1331 			 */
1332 			device_printf(child, "using IRQs %lu", rle->start);
1333 			irq = rle->start;
1334 			run = 0;
1335 			for (i = 1; i < actual; i++) {
1336 				rle = resource_list_find(&dinfo->resources,
1337 				    SYS_RES_IRQ, i + 1);
1338 
1339 				/* Still in a run? */
1340 				if (rle->start == irq + 1) {
1341 					run = 1;
1342 					irq++;
1343 					continue;
1344 				}
1345 
1346 				/* Finish previous range. */
1347 				if (run) {
1348 					printf("-%d", irq);
1349 					run = 0;
1350 				}
1351 
1352 				/* Start new range. */
1353 				printf(",%lu", rle->start);
1354 				irq = rle->start;
1355 			}
1356 
1357 			/* Unfinished range? */
1358 			if (run)
1359 				printf("-%d", irq);
1360 			printf(" for MSI-X\n");
1361 		}
1362 	}
1363 
1364 	/* Mask all vectors. */
1365 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1366 		pci_mask_msix(child, i);
1367 
1368 	/* Allocate and initialize vector data and virtual table. */
1369 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1370 	    M_DEVBUF, M_WAITOK | M_ZERO);
1371 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1372 	    M_DEVBUF, M_WAITOK | M_ZERO);
1373 	for (i = 0; i < actual; i++) {
1374 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1375 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1376 		cfg->msix.msix_table[i].mte_vector = i + 1;
1377 	}
1378 
1379 	/* Update control register to enable MSI-X. */
1380 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1381 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1382 	    cfg->msix.msix_ctrl, 2);
1383 
1384 	/* Update counts of alloc'd messages. */
1385 	cfg->msix.msix_alloc = actual;
1386 	cfg->msix.msix_table_len = actual;
1387 	*count = actual;
1388 	return (0);
1389 }
1390 
1391 /*
1392  * By default, pci_alloc_msix() will assign the allocated IRQ
1393  * resources consecutively to the first N messages in the MSI-X table.
1394  * However, device drivers may want to use different layouts if they
1395  * either receive fewer messages than they asked for, or they wish to
1396  * populate the MSI-X table sparsely.  This method allows the driver
1397  * to specify what layout it wants.  It must be called after a
1398  * successful pci_alloc_msix() but before any of the associated
1399  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1400  *
1401  * The 'vectors' array contains 'count' message vectors.  The array
1402  * maps directly to the MSI-X table in that index 0 in the array
1403  * specifies the vector for the first message in the MSI-X table, etc.
1404  * The vector value in each array index can either be 0 to indicate
1405  * that no vector should be assigned to a message slot, or it can be a
1406  * number from 1 to N (where N is the count returned from a
1407  * succcessful call to pci_alloc_msix()) to indicate which message
1408  * vector (IRQ) to be used for the corresponding message.
1409  *
1410  * On successful return, each message with a non-zero vector will have
1411  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1412  * 1.  Additionally, if any of the IRQs allocated via the previous
1413  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1414  * will be freed back to the system automatically.
1415  *
1416  * For example, suppose a driver has a MSI-X table with 6 messages and
1417  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1418  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1419  * C.  After the call to pci_alloc_msix(), the device will be setup to
1420  * have an MSI-X table of ABC--- (where - means no vector assigned).
1421  * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1422  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1423  * be freed back to the system.  This device will also have valid
1424  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1425  *
1426  * In any case, the SYS_RES_IRQ rid X will always map to the message
1427  * at MSI-X table index X - 1 and will only be valid if a vector is
1428  * assigned to that table entry.
1429  */
1430 int
1431 pci_remap_msix_method(device_t dev, device_t child, int count,
1432     const u_int *vectors)
1433 {
1434 	struct pci_devinfo *dinfo = device_get_ivars(child);
1435 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1436 	struct resource_list_entry *rle;
1437 	int i, irq, j, *used;
1438 
1439 	/*
1440 	 * Have to have at least one message in the table but the
1441 	 * table can't be bigger than the actual MSI-X table in the
1442 	 * device.
1443 	 */
1444 	if (count == 0 || count > msix->msix_msgnum)
1445 		return (EINVAL);
1446 
1447 	/* Sanity check the vectors. */
1448 	for (i = 0; i < count; i++)
1449 		if (vectors[i] > msix->msix_alloc)
1450 			return (EINVAL);
1451 
1452 	/*
1453 	 * Make sure there aren't any holes in the vectors to be used.
1454 	 * It's a big pain to support it, and it doesn't really make
1455 	 * sense anyway.  Also, at least one vector must be used.
1456 	 */
1457 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1458 	    M_ZERO);
1459 	for (i = 0; i < count; i++)
1460 		if (vectors[i] != 0)
1461 			used[vectors[i] - 1] = 1;
1462 	for (i = 0; i < msix->msix_alloc - 1; i++)
1463 		if (used[i] == 0 && used[i + 1] == 1) {
1464 			free(used, M_DEVBUF);
1465 			return (EINVAL);
1466 		}
1467 	if (used[0] != 1) {
1468 		free(used, M_DEVBUF);
1469 		return (EINVAL);
1470 	}
1471 
1472 	/* Make sure none of the resources are allocated. */
1473 	for (i = 0; i < msix->msix_table_len; i++) {
1474 		if (msix->msix_table[i].mte_vector == 0)
1475 			continue;
1476 		if (msix->msix_table[i].mte_handlers > 0)
1477 			return (EBUSY);
1478 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1479 		KASSERT(rle != NULL, ("missing resource"));
1480 		if (rle->res != NULL)
1481 			return (EBUSY);
1482 	}
1483 
1484 	/* Free the existing resource list entries. */
1485 	for (i = 0; i < msix->msix_table_len; i++) {
1486 		if (msix->msix_table[i].mte_vector == 0)
1487 			continue;
1488 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1489 	}
1490 
1491 	/*
1492 	 * Build the new virtual table keeping track of which vectors are
1493 	 * used.
1494 	 */
1495 	free(msix->msix_table, M_DEVBUF);
1496 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1497 	    M_DEVBUF, M_WAITOK | M_ZERO);
1498 	for (i = 0; i < count; i++)
1499 		msix->msix_table[i].mte_vector = vectors[i];
1500 	msix->msix_table_len = count;
1501 
1502 	/* Free any unused IRQs and resize the vectors array if necessary. */
1503 	j = msix->msix_alloc - 1;
1504 	if (used[j] == 0) {
1505 		struct msix_vector *vec;
1506 
1507 		while (used[j] == 0) {
1508 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1509 			    msix->msix_vectors[j].mv_irq);
1510 			j--;
1511 		}
1512 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1513 		    M_WAITOK);
1514 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1515 		    (j + 1));
1516 		free(msix->msix_vectors, M_DEVBUF);
1517 		msix->msix_vectors = vec;
1518 		msix->msix_alloc = j + 1;
1519 	}
1520 	free(used, M_DEVBUF);
1521 
1522 	/* Map the IRQs onto the rids. */
1523 	for (i = 0; i < count; i++) {
1524 		if (vectors[i] == 0)
1525 			continue;
1526 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1527 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1528 		    irq, 1);
1529 	}
1530 
1531 	if (bootverbose) {
1532 		device_printf(child, "Remapped MSI-X IRQs as: ");
1533 		for (i = 0; i < count; i++) {
1534 			if (i != 0)
1535 				printf(", ");
1536 			if (vectors[i] == 0)
1537 				printf("---");
1538 			else
1539 				printf("%d",
1540 				    msix->msix_vectors[vectors[i]].mv_irq);
1541 		}
1542 		printf("\n");
1543 	}
1544 
1545 	return (0);
1546 }
1547 
1548 static int
1549 pci_release_msix(device_t dev, device_t child)
1550 {
1551 	struct pci_devinfo *dinfo = device_get_ivars(child);
1552 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1553 	struct resource_list_entry *rle;
1554 	int i;
1555 
1556 	/* Do we have any messages to release? */
1557 	if (msix->msix_alloc == 0)
1558 		return (ENODEV);
1559 
1560 	/* Make sure none of the resources are allocated. */
1561 	for (i = 0; i < msix->msix_table_len; i++) {
1562 		if (msix->msix_table[i].mte_vector == 0)
1563 			continue;
1564 		if (msix->msix_table[i].mte_handlers > 0)
1565 			return (EBUSY);
1566 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1567 		KASSERT(rle != NULL, ("missing resource"));
1568 		if (rle->res != NULL)
1569 			return (EBUSY);
1570 	}
1571 
1572 	/* Update control register to disable MSI-X. */
1573 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1574 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1575 	    msix->msix_ctrl, 2);
1576 
1577 	/* Free the resource list entries. */
1578 	for (i = 0; i < msix->msix_table_len; i++) {
1579 		if (msix->msix_table[i].mte_vector == 0)
1580 			continue;
1581 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1582 	}
1583 	free(msix->msix_table, M_DEVBUF);
1584 	msix->msix_table_len = 0;
1585 
1586 	/* Release the IRQs. */
1587 	for (i = 0; i < msix->msix_alloc; i++)
1588 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1589 		    msix->msix_vectors[i].mv_irq);
1590 	free(msix->msix_vectors, M_DEVBUF);
1591 	msix->msix_alloc = 0;
1592 	return (0);
1593 }
1594 
1595 /*
1596  * Return the max supported MSI-X messages this device supports.
1597  * Basically, assuming the MD code can alloc messages, this function
1598  * should return the maximum value that pci_alloc_msix() can return.
1599  * Thus, it is subject to the tunables, etc.
1600  */
1601 int
1602 pci_msix_count_method(device_t dev, device_t child)
1603 {
1604 	struct pci_devinfo *dinfo = device_get_ivars(child);
1605 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1606 
1607 	if (pci_do_msix && msix->msix_location != 0)
1608 		return (msix->msix_msgnum);
1609 	return (0);
1610 }
1611 
1612 /*
1613  * HyperTransport MSI mapping control
1614  */
1615 void
1616 pci_ht_map_msi(device_t dev, uint64_t addr)
1617 {
1618 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1619 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1620 
1621 	if (!ht->ht_msimap)
1622 		return;
1623 
1624 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1625 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1626 		/* Enable MSI -> HT mapping. */
1627 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1628 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1629 		    ht->ht_msictrl, 2);
1630 	}
1631 
1632 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1633 		/* Disable MSI -> HT mapping. */
1634 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1635 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1636 		    ht->ht_msictrl, 2);
1637 	}
1638 }
1639 
1640 int
1641 pci_get_max_read_req(device_t dev)
1642 {
1643 	int cap;
1644 	uint16_t val;
1645 
1646 	if (pci_find_extcap(dev, PCIY_EXPRESS, &cap) != 0)
1647 		return (0);
1648 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1649 	val &= PCIM_EXP_CTL_MAX_READ_REQUEST;
1650 	val >>= 12;
1651 	return (1 << (val + 7));
1652 }
1653 
1654 int
1655 pci_set_max_read_req(device_t dev, int size)
1656 {
1657 	int cap;
1658 	uint16_t val;
1659 
1660 	if (pci_find_extcap(dev, PCIY_EXPRESS, &cap) != 0)
1661 		return (0);
1662 	if (size < 128)
1663 		size = 128;
1664 	if (size > 4096)
1665 		size = 4096;
1666 	size = (1 << (fls(size) - 1));
1667 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1668 	val &= ~PCIM_EXP_CTL_MAX_READ_REQUEST;
1669 	val |= (fls(size) - 8) << 12;
1670 	pci_write_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, val, 2);
1671 	return (size);
1672 }
1673 
1674 /*
1675  * Support for MSI message signalled interrupts.
1676  */
1677 void
1678 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1679 {
1680 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1681 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1682 
1683 	/* Write data and address values. */
1684 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1685 	    address & 0xffffffff, 4);
1686 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1687 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1688 		    address >> 32, 4);
1689 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1690 		    data, 2);
1691 	} else
1692 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1693 		    2);
1694 
1695 	/* Enable MSI in the control register. */
1696 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1697 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1698 	    2);
1699 
1700 	/* Enable MSI -> HT mapping. */
1701 	pci_ht_map_msi(dev, address);
1702 }
1703 
1704 void
1705 pci_disable_msi(device_t dev)
1706 {
1707 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1708 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1709 
1710 	/* Disable MSI -> HT mapping. */
1711 	pci_ht_map_msi(dev, 0);
1712 
1713 	/* Disable MSI in the control register. */
1714 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1715 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1716 	    2);
1717 }
1718 
1719 /*
1720  * Restore MSI registers during resume.  If MSI is enabled then
1721  * restore the data and address registers in addition to the control
1722  * register.
1723  */
1724 static void
1725 pci_resume_msi(device_t dev)
1726 {
1727 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1728 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1729 	uint64_t address;
1730 	uint16_t data;
1731 
1732 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1733 		address = msi->msi_addr;
1734 		data = msi->msi_data;
1735 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1736 		    address & 0xffffffff, 4);
1737 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1738 			pci_write_config(dev, msi->msi_location +
1739 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1740 			pci_write_config(dev, msi->msi_location +
1741 			    PCIR_MSI_DATA_64BIT, data, 2);
1742 		} else
1743 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1744 			    data, 2);
1745 	}
1746 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1747 	    2);
1748 }
1749 
1750 static int
1751 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1752 {
1753 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1754 	pcicfgregs *cfg = &dinfo->cfg;
1755 	struct resource_list_entry *rle;
1756 	struct msix_table_entry *mte;
1757 	struct msix_vector *mv;
1758 	uint64_t addr;
1759 	uint32_t data;
1760 	int error, i, j;
1761 
1762 	/*
1763 	 * Handle MSI first.  We try to find this IRQ among our list
1764 	 * of MSI IRQs.  If we find it, we request updated address and
1765 	 * data registers and apply the results.
1766 	 */
1767 	if (cfg->msi.msi_alloc > 0) {
1768 
1769 		/* If we don't have any active handlers, nothing to do. */
1770 		if (cfg->msi.msi_handlers == 0)
1771 			return (0);
1772 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1773 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1774 			    i + 1);
1775 			if (rle->start == irq) {
1776 				error = PCIB_MAP_MSI(device_get_parent(bus),
1777 				    dev, irq, &addr, &data);
1778 				if (error)
1779 					return (error);
1780 				pci_disable_msi(dev);
1781 				dinfo->cfg.msi.msi_addr = addr;
1782 				dinfo->cfg.msi.msi_data = data;
1783 				pci_enable_msi(dev, addr, data);
1784 				return (0);
1785 			}
1786 		}
1787 		return (ENOENT);
1788 	}
1789 
1790 	/*
1791 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1792 	 * we request the updated mapping info.  If that works, we go
1793 	 * through all the slots that use this IRQ and update them.
1794 	 */
1795 	if (cfg->msix.msix_alloc > 0) {
1796 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1797 			mv = &cfg->msix.msix_vectors[i];
1798 			if (mv->mv_irq == irq) {
1799 				error = PCIB_MAP_MSI(device_get_parent(bus),
1800 				    dev, irq, &addr, &data);
1801 				if (error)
1802 					return (error);
1803 				mv->mv_address = addr;
1804 				mv->mv_data = data;
1805 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1806 					mte = &cfg->msix.msix_table[j];
1807 					if (mte->mte_vector != i + 1)
1808 						continue;
1809 					if (mte->mte_handlers == 0)
1810 						continue;
1811 					pci_mask_msix(dev, j);
1812 					pci_enable_msix(dev, j, addr, data);
1813 					pci_unmask_msix(dev, j);
1814 				}
1815 			}
1816 		}
1817 		return (ENOENT);
1818 	}
1819 
1820 	return (ENOENT);
1821 }
1822 
1823 /*
1824  * Returns true if the specified device is blacklisted because MSI
1825  * doesn't work.
1826  */
1827 int
1828 pci_msi_device_blacklisted(device_t dev)
1829 {
1830 	struct pci_quirk *q;
1831 
1832 	if (!pci_honor_msi_blacklist)
1833 		return (0);
1834 
1835 	for (q = &pci_quirks[0]; q->devid; q++) {
1836 		if (q->devid == pci_get_devid(dev) &&
1837 		    q->type == PCI_QUIRK_DISABLE_MSI)
1838 			return (1);
1839 	}
1840 	return (0);
1841 }
1842 
1843 /*
1844  * Returns true if a specified chipset supports MSI when it is
1845  * emulated hardware in a virtual machine.
1846  */
1847 static int
1848 pci_msi_vm_chipset(device_t dev)
1849 {
1850 	struct pci_quirk *q;
1851 
1852 	for (q = &pci_quirks[0]; q->devid; q++) {
1853 		if (q->devid == pci_get_devid(dev) &&
1854 		    q->type == PCI_QUIRK_ENABLE_MSI_VM)
1855 			return (1);
1856 	}
1857 	return (0);
1858 }
1859 
1860 /*
1861  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1862  * we just check for blacklisted chipsets as represented by the
1863  * host-PCI bridge at device 0:0:0.  In the future, it may become
1864  * necessary to check other system attributes, such as the kenv values
1865  * that give the motherboard manufacturer and model number.
1866  */
1867 static int
1868 pci_msi_blacklisted(void)
1869 {
1870 	device_t dev;
1871 
1872 	if (!pci_honor_msi_blacklist)
1873 		return (0);
1874 
1875 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1876 	if (!(pcie_chipset || pcix_chipset)) {
1877 		if (vm_guest != VM_GUEST_NO) {
1878 			dev = pci_find_bsf(0, 0, 0);
1879 			if (dev != NULL)
1880 				return (pci_msi_vm_chipset(dev) == 0);
1881 		}
1882 		return (1);
1883 	}
1884 
1885 	dev = pci_find_bsf(0, 0, 0);
1886 	if (dev != NULL)
1887 		return (pci_msi_device_blacklisted(dev));
1888 	return (0);
1889 }
1890 
1891 /*
1892  * Attempt to allocate *count MSI messages.  The actual number allocated is
1893  * returned in *count.  After this function returns, each message will be
1894  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1895  */
1896 int
1897 pci_alloc_msi_method(device_t dev, device_t child, int *count)
1898 {
1899 	struct pci_devinfo *dinfo = device_get_ivars(child);
1900 	pcicfgregs *cfg = &dinfo->cfg;
1901 	struct resource_list_entry *rle;
1902 	int actual, error, i, irqs[32];
1903 	uint16_t ctrl;
1904 
1905 	/* Don't let count == 0 get us into trouble. */
1906 	if (*count == 0)
1907 		return (EINVAL);
1908 
1909 	/* If rid 0 is allocated, then fail. */
1910 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1911 	if (rle != NULL && rle->res != NULL)
1912 		return (ENXIO);
1913 
1914 	/* Already have allocated messages? */
1915 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1916 		return (ENXIO);
1917 
1918 	/* If MSI is blacklisted for this system, fail. */
1919 	if (pci_msi_blacklisted())
1920 		return (ENXIO);
1921 
1922 	/* MSI capability present? */
1923 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1924 		return (ENODEV);
1925 
1926 	if (bootverbose)
1927 		device_printf(child,
1928 		    "attempting to allocate %d MSI vectors (%d supported)\n",
1929 		    *count, cfg->msi.msi_msgnum);
1930 
1931 	/* Don't ask for more than the device supports. */
1932 	actual = min(*count, cfg->msi.msi_msgnum);
1933 
1934 	/* Don't ask for more than 32 messages. */
1935 	actual = min(actual, 32);
1936 
1937 	/* MSI requires power of 2 number of messages. */
1938 	if (!powerof2(actual))
1939 		return (EINVAL);
1940 
1941 	for (;;) {
1942 		/* Try to allocate N messages. */
1943 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1944 		    cfg->msi.msi_msgnum, irqs);
1945 		if (error == 0)
1946 			break;
1947 		if (actual == 1)
1948 			return (error);
1949 
1950 		/* Try N / 2. */
1951 		actual >>= 1;
1952 	}
1953 
1954 	/*
1955 	 * We now have N actual messages mapped onto SYS_RES_IRQ
1956 	 * resources in the irqs[] array, so add new resources
1957 	 * starting at rid 1.
1958 	 */
1959 	for (i = 0; i < actual; i++)
1960 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1961 		    irqs[i], irqs[i], 1);
1962 
1963 	if (bootverbose) {
1964 		if (actual == 1)
1965 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1966 		else {
1967 			int run;
1968 
1969 			/*
1970 			 * Be fancy and try to print contiguous runs
1971 			 * of IRQ values as ranges.  'run' is true if
1972 			 * we are in a range.
1973 			 */
1974 			device_printf(child, "using IRQs %d", irqs[0]);
1975 			run = 0;
1976 			for (i = 1; i < actual; i++) {
1977 
1978 				/* Still in a run? */
1979 				if (irqs[i] == irqs[i - 1] + 1) {
1980 					run = 1;
1981 					continue;
1982 				}
1983 
1984 				/* Finish previous range. */
1985 				if (run) {
1986 					printf("-%d", irqs[i - 1]);
1987 					run = 0;
1988 				}
1989 
1990 				/* Start new range. */
1991 				printf(",%d", irqs[i]);
1992 			}
1993 
1994 			/* Unfinished range? */
1995 			if (run)
1996 				printf("-%d", irqs[actual - 1]);
1997 			printf(" for MSI\n");
1998 		}
1999 	}
2000 
2001 	/* Update control register with actual count. */
2002 	ctrl = cfg->msi.msi_ctrl;
2003 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2004 	ctrl |= (ffs(actual) - 1) << 4;
2005 	cfg->msi.msi_ctrl = ctrl;
2006 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2007 
2008 	/* Update counts of alloc'd messages. */
2009 	cfg->msi.msi_alloc = actual;
2010 	cfg->msi.msi_handlers = 0;
2011 	*count = actual;
2012 	return (0);
2013 }
2014 
2015 /* Release the MSI messages associated with this device. */
2016 int
2017 pci_release_msi_method(device_t dev, device_t child)
2018 {
2019 	struct pci_devinfo *dinfo = device_get_ivars(child);
2020 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2021 	struct resource_list_entry *rle;
2022 	int error, i, irqs[32];
2023 
2024 	/* Try MSI-X first. */
2025 	error = pci_release_msix(dev, child);
2026 	if (error != ENODEV)
2027 		return (error);
2028 
2029 	/* Do we have any messages to release? */
2030 	if (msi->msi_alloc == 0)
2031 		return (ENODEV);
2032 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2033 
2034 	/* Make sure none of the resources are allocated. */
2035 	if (msi->msi_handlers > 0)
2036 		return (EBUSY);
2037 	for (i = 0; i < msi->msi_alloc; i++) {
2038 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2039 		KASSERT(rle != NULL, ("missing MSI resource"));
2040 		if (rle->res != NULL)
2041 			return (EBUSY);
2042 		irqs[i] = rle->start;
2043 	}
2044 
2045 	/* Update control register with 0 count. */
2046 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2047 	    ("%s: MSI still enabled", __func__));
2048 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2049 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2050 	    msi->msi_ctrl, 2);
2051 
2052 	/* Release the messages. */
2053 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2054 	for (i = 0; i < msi->msi_alloc; i++)
2055 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2056 
2057 	/* Update alloc count. */
2058 	msi->msi_alloc = 0;
2059 	msi->msi_addr = 0;
2060 	msi->msi_data = 0;
2061 	return (0);
2062 }
2063 
2064 /*
2065  * Return the max supported MSI messages this device supports.
2066  * Basically, assuming the MD code can alloc messages, this function
2067  * should return the maximum value that pci_alloc_msi() can return.
2068  * Thus, it is subject to the tunables, etc.
2069  */
2070 int
2071 pci_msi_count_method(device_t dev, device_t child)
2072 {
2073 	struct pci_devinfo *dinfo = device_get_ivars(child);
2074 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2075 
2076 	if (pci_do_msi && msi->msi_location != 0)
2077 		return (msi->msi_msgnum);
2078 	return (0);
2079 }
2080 
2081 /* free pcicfgregs structure and all depending data structures */
2082 
2083 int
2084 pci_freecfg(struct pci_devinfo *dinfo)
2085 {
2086 	struct devlist *devlist_head;
2087 	int i;
2088 
2089 	devlist_head = &pci_devq;
2090 
2091 	if (dinfo->cfg.vpd.vpd_reg) {
2092 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2093 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2094 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2095 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2096 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2097 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2098 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2099 	}
2100 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2101 	free(dinfo, M_DEVBUF);
2102 
2103 	/* increment the generation count */
2104 	pci_generation++;
2105 
2106 	/* we're losing one device */
2107 	pci_numdevs--;
2108 	return (0);
2109 }
2110 
2111 /*
2112  * PCI power manangement
2113  */
2114 int
2115 pci_set_powerstate_method(device_t dev, device_t child, int state)
2116 {
2117 	struct pci_devinfo *dinfo = device_get_ivars(child);
2118 	pcicfgregs *cfg = &dinfo->cfg;
2119 	uint16_t status;
2120 	int result, oldstate, highest, delay;
2121 
2122 	if (cfg->pp.pp_cap == 0)
2123 		return (EOPNOTSUPP);
2124 
2125 	/*
2126 	 * Optimize a no state change request away.  While it would be OK to
2127 	 * write to the hardware in theory, some devices have shown odd
2128 	 * behavior when going from D3 -> D3.
2129 	 */
2130 	oldstate = pci_get_powerstate(child);
2131 	if (oldstate == state)
2132 		return (0);
2133 
2134 	/*
2135 	 * The PCI power management specification states that after a state
2136 	 * transition between PCI power states, system software must
2137 	 * guarantee a minimal delay before the function accesses the device.
2138 	 * Compute the worst case delay that we need to guarantee before we
2139 	 * access the device.  Many devices will be responsive much more
2140 	 * quickly than this delay, but there are some that don't respond
2141 	 * instantly to state changes.  Transitions to/from D3 state require
2142 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2143 	 * is done below with DELAY rather than a sleeper function because
2144 	 * this function can be called from contexts where we cannot sleep.
2145 	 */
2146 	highest = (oldstate > state) ? oldstate : state;
2147 	if (highest == PCI_POWERSTATE_D3)
2148 	    delay = 10000;
2149 	else if (highest == PCI_POWERSTATE_D2)
2150 	    delay = 200;
2151 	else
2152 	    delay = 0;
2153 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2154 	    & ~PCIM_PSTAT_DMASK;
2155 	result = 0;
2156 	switch (state) {
2157 	case PCI_POWERSTATE_D0:
2158 		status |= PCIM_PSTAT_D0;
2159 		break;
2160 	case PCI_POWERSTATE_D1:
2161 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2162 			return (EOPNOTSUPP);
2163 		status |= PCIM_PSTAT_D1;
2164 		break;
2165 	case PCI_POWERSTATE_D2:
2166 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2167 			return (EOPNOTSUPP);
2168 		status |= PCIM_PSTAT_D2;
2169 		break;
2170 	case PCI_POWERSTATE_D3:
2171 		status |= PCIM_PSTAT_D3;
2172 		break;
2173 	default:
2174 		return (EINVAL);
2175 	}
2176 
2177 	if (bootverbose)
2178 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2179 		    state);
2180 
2181 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2182 	if (delay)
2183 		DELAY(delay);
2184 	return (0);
2185 }
2186 
2187 int
2188 pci_get_powerstate_method(device_t dev, device_t child)
2189 {
2190 	struct pci_devinfo *dinfo = device_get_ivars(child);
2191 	pcicfgregs *cfg = &dinfo->cfg;
2192 	uint16_t status;
2193 	int result;
2194 
2195 	if (cfg->pp.pp_cap != 0) {
2196 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2197 		switch (status & PCIM_PSTAT_DMASK) {
2198 		case PCIM_PSTAT_D0:
2199 			result = PCI_POWERSTATE_D0;
2200 			break;
2201 		case PCIM_PSTAT_D1:
2202 			result = PCI_POWERSTATE_D1;
2203 			break;
2204 		case PCIM_PSTAT_D2:
2205 			result = PCI_POWERSTATE_D2;
2206 			break;
2207 		case PCIM_PSTAT_D3:
2208 			result = PCI_POWERSTATE_D3;
2209 			break;
2210 		default:
2211 			result = PCI_POWERSTATE_UNKNOWN;
2212 			break;
2213 		}
2214 	} else {
2215 		/* No support, device is always at D0 */
2216 		result = PCI_POWERSTATE_D0;
2217 	}
2218 	return (result);
2219 }
2220 
2221 /*
2222  * Some convenience functions for PCI device drivers.
2223  */
2224 
2225 static __inline void
2226 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2227 {
2228 	uint16_t	command;
2229 
2230 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2231 	command |= bit;
2232 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2233 }
2234 
2235 static __inline void
2236 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2237 {
2238 	uint16_t	command;
2239 
2240 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2241 	command &= ~bit;
2242 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2243 }
2244 
2245 int
2246 pci_enable_busmaster_method(device_t dev, device_t child)
2247 {
2248 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2249 	return (0);
2250 }
2251 
2252 int
2253 pci_disable_busmaster_method(device_t dev, device_t child)
2254 {
2255 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2256 	return (0);
2257 }
2258 
2259 int
2260 pci_enable_io_method(device_t dev, device_t child, int space)
2261 {
2262 	uint16_t bit;
2263 
2264 	switch(space) {
2265 	case SYS_RES_IOPORT:
2266 		bit = PCIM_CMD_PORTEN;
2267 		break;
2268 	case SYS_RES_MEMORY:
2269 		bit = PCIM_CMD_MEMEN;
2270 		break;
2271 	default:
2272 		return (EINVAL);
2273 	}
2274 	pci_set_command_bit(dev, child, bit);
2275 	return (0);
2276 }
2277 
2278 int
2279 pci_disable_io_method(device_t dev, device_t child, int space)
2280 {
2281 	uint16_t bit;
2282 
2283 	switch(space) {
2284 	case SYS_RES_IOPORT:
2285 		bit = PCIM_CMD_PORTEN;
2286 		break;
2287 	case SYS_RES_MEMORY:
2288 		bit = PCIM_CMD_MEMEN;
2289 		break;
2290 	default:
2291 		return (EINVAL);
2292 	}
2293 	pci_clear_command_bit(dev, child, bit);
2294 	return (0);
2295 }
2296 
2297 /*
2298  * New style pci driver.  Parent device is either a pci-host-bridge or a
2299  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2300  */
2301 
2302 void
2303 pci_print_verbose(struct pci_devinfo *dinfo)
2304 {
2305 
2306 	if (bootverbose) {
2307 		pcicfgregs *cfg = &dinfo->cfg;
2308 
2309 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2310 		    cfg->vendor, cfg->device, cfg->revid);
2311 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2312 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2313 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2314 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2315 		    cfg->mfdev);
2316 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2317 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2318 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2319 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2320 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2321 		if (cfg->intpin > 0)
2322 			printf("\tintpin=%c, irq=%d\n",
2323 			    cfg->intpin +'a' -1, cfg->intline);
2324 		if (cfg->pp.pp_cap) {
2325 			uint16_t status;
2326 
2327 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2328 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2329 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2330 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2331 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2332 			    status & PCIM_PSTAT_DMASK);
2333 		}
2334 		if (cfg->msi.msi_location) {
2335 			int ctrl;
2336 
2337 			ctrl = cfg->msi.msi_ctrl;
2338 			printf("\tMSI supports %d message%s%s%s\n",
2339 			    cfg->msi.msi_msgnum,
2340 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2341 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2342 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2343 		}
2344 		if (cfg->msix.msix_location) {
2345 			printf("\tMSI-X supports %d message%s ",
2346 			    cfg->msix.msix_msgnum,
2347 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2348 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2349 				printf("in map 0x%x\n",
2350 				    cfg->msix.msix_table_bar);
2351 			else
2352 				printf("in maps 0x%x and 0x%x\n",
2353 				    cfg->msix.msix_table_bar,
2354 				    cfg->msix.msix_pba_bar);
2355 		}
2356 	}
2357 }
2358 
2359 static int
2360 pci_porten(device_t dev)
2361 {
2362 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2363 }
2364 
2365 static int
2366 pci_memen(device_t dev)
2367 {
2368 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2369 }
2370 
2371 static void
2372 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2373 {
2374 	pci_addr_t map, testval;
2375 	int ln2range;
2376 	uint16_t cmd;
2377 
2378 	/*
2379 	 * The device ROM BAR is special.  It is always a 32-bit
2380 	 * memory BAR.  Bit 0 is special and should not be set when
2381 	 * sizing the BAR.
2382 	 */
2383 	if (reg == PCIR_BIOS) {
2384 		map = pci_read_config(dev, reg, 4);
2385 		pci_write_config(dev, reg, 0xfffffffe, 4);
2386 		testval = pci_read_config(dev, reg, 4);
2387 		pci_write_config(dev, reg, map, 4);
2388 		*mapp = map;
2389 		*testvalp = testval;
2390 		return;
2391 	}
2392 
2393 	map = pci_read_config(dev, reg, 4);
2394 	ln2range = pci_maprange(map);
2395 	if (ln2range == 64)
2396 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2397 
2398 	/*
2399 	 * Disable decoding via the command register before
2400 	 * determining the BAR's length since we will be placing it in
2401 	 * a weird state.
2402 	 */
2403 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2404 	pci_write_config(dev, PCIR_COMMAND,
2405 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2406 
2407 	/*
2408 	 * Determine the BAR's length by writing all 1's.  The bottom
2409 	 * log_2(size) bits of the BAR will stick as 0 when we read
2410 	 * the value back.
2411 	 */
2412 	pci_write_config(dev, reg, 0xffffffff, 4);
2413 	testval = pci_read_config(dev, reg, 4);
2414 	if (ln2range == 64) {
2415 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2416 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2417 	}
2418 
2419 	/*
2420 	 * Restore the original value of the BAR.  We may have reprogrammed
2421 	 * the BAR of the low-level console device and when booting verbose,
2422 	 * we need the console device addressable.
2423 	 */
2424 	pci_write_config(dev, reg, map, 4);
2425 	if (ln2range == 64)
2426 		pci_write_config(dev, reg + 4, map >> 32, 4);
2427 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2428 
2429 	*mapp = map;
2430 	*testvalp = testval;
2431 }
2432 
2433 static void
2434 pci_write_bar(device_t dev, int reg, pci_addr_t base)
2435 {
2436 	pci_addr_t map;
2437 	int ln2range;
2438 
2439 	map = pci_read_config(dev, reg, 4);
2440 
2441 	/* The device ROM BAR is always 32-bits. */
2442 	if (reg == PCIR_BIOS)
2443 		return;
2444 	ln2range = pci_maprange(map);
2445 	pci_write_config(dev, reg, base, 4);
2446 	if (ln2range == 64)
2447 		pci_write_config(dev, reg + 4, base >> 32, 4);
2448 }
2449 
2450 /*
2451  * Add a resource based on a pci map register. Return 1 if the map
2452  * register is a 32bit map register or 2 if it is a 64bit register.
2453  */
2454 static int
2455 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2456     int force, int prefetch)
2457 {
2458 	pci_addr_t base, map, testval;
2459 	pci_addr_t start, end, count;
2460 	int barlen, basezero, maprange, mapsize, type;
2461 	uint16_t cmd;
2462 	struct resource *res;
2463 
2464 	pci_read_bar(dev, reg, &map, &testval);
2465 	if (PCI_BAR_MEM(map)) {
2466 		type = SYS_RES_MEMORY;
2467 		if (map & PCIM_BAR_MEM_PREFETCH)
2468 			prefetch = 1;
2469 	} else
2470 		type = SYS_RES_IOPORT;
2471 	mapsize = pci_mapsize(testval);
2472 	base = pci_mapbase(map);
2473 #ifdef __PCI_BAR_ZERO_VALID
2474 	basezero = 0;
2475 #else
2476 	basezero = base == 0;
2477 #endif
2478 	maprange = pci_maprange(map);
2479 	barlen = maprange == 64 ? 2 : 1;
2480 
2481 	/*
2482 	 * For I/O registers, if bottom bit is set, and the next bit up
2483 	 * isn't clear, we know we have a BAR that doesn't conform to the
2484 	 * spec, so ignore it.  Also, sanity check the size of the data
2485 	 * areas to the type of memory involved.  Memory must be at least
2486 	 * 16 bytes in size, while I/O ranges must be at least 4.
2487 	 */
2488 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2489 		return (barlen);
2490 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2491 	    (type == SYS_RES_IOPORT && mapsize < 2))
2492 		return (barlen);
2493 
2494 	if (bootverbose) {
2495 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2496 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2497 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2498 			printf(", port disabled\n");
2499 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2500 			printf(", memory disabled\n");
2501 		else
2502 			printf(", enabled\n");
2503 	}
2504 
2505 	/*
2506 	 * If base is 0, then we have problems if this architecture does
2507 	 * not allow that.  It is best to ignore such entries for the
2508 	 * moment.  These will be allocated later if the driver specifically
2509 	 * requests them.  However, some removable busses look better when
2510 	 * all resources are allocated, so allow '0' to be overriden.
2511 	 *
2512 	 * Similarly treat maps whose values is the same as the test value
2513 	 * read back.  These maps have had all f's written to them by the
2514 	 * BIOS in an attempt to disable the resources.
2515 	 */
2516 	if (!force && (basezero || map == testval))
2517 		return (barlen);
2518 	if ((u_long)base != base) {
2519 		device_printf(bus,
2520 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2521 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2522 		    pci_get_function(dev), reg);
2523 		return (barlen);
2524 	}
2525 
2526 	/*
2527 	 * This code theoretically does the right thing, but has
2528 	 * undesirable side effects in some cases where peripherals
2529 	 * respond oddly to having these bits enabled.  Let the user
2530 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2531 	 * default).
2532 	 */
2533 	if (pci_enable_io_modes) {
2534 		/* Turn on resources that have been left off by a lazy BIOS */
2535 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2536 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2537 			cmd |= PCIM_CMD_PORTEN;
2538 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2539 		}
2540 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2541 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2542 			cmd |= PCIM_CMD_MEMEN;
2543 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2544 		}
2545 	} else {
2546 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2547 			return (barlen);
2548 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2549 			return (barlen);
2550 	}
2551 
2552 	count = 1 << mapsize;
2553 	if (basezero || base == pci_mapbase(testval)) {
2554 		start = 0;	/* Let the parent decide. */
2555 		end = ~0ULL;
2556 	} else {
2557 		start = base;
2558 		end = base + (1 << mapsize) - 1;
2559 	}
2560 	resource_list_add(rl, type, reg, start, end, count);
2561 
2562 	/*
2563 	 * Try to allocate the resource for this BAR from our parent
2564 	 * so that this resource range is already reserved.  The
2565 	 * driver for this device will later inherit this resource in
2566 	 * pci_alloc_resource().
2567 	 */
2568 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2569 	    prefetch ? RF_PREFETCHABLE : 0);
2570 	if (res == NULL) {
2571 		/*
2572 		 * If the allocation fails, clear the BAR and delete
2573 		 * the resource list entry to force
2574 		 * pci_alloc_resource() to allocate resources from the
2575 		 * parent.
2576 		 */
2577 		resource_list_delete(rl, type, reg);
2578 		start = 0;
2579 	} else
2580 		start = rman_get_start(res);
2581 	pci_write_bar(dev, reg, start);
2582 	return (barlen);
2583 }
2584 
2585 /*
2586  * For ATA devices we need to decide early what addressing mode to use.
2587  * Legacy demands that the primary and secondary ATA ports sits on the
2588  * same addresses that old ISA hardware did. This dictates that we use
2589  * those addresses and ignore the BAR's if we cannot set PCI native
2590  * addressing mode.
2591  */
2592 static void
2593 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2594     uint32_t prefetchmask)
2595 {
2596 	struct resource *r;
2597 	int rid, type, progif;
2598 #if 0
2599 	/* if this device supports PCI native addressing use it */
2600 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2601 	if ((progif & 0x8a) == 0x8a) {
2602 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2603 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2604 			printf("Trying ATA native PCI addressing mode\n");
2605 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2606 		}
2607 	}
2608 #endif
2609 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2610 	type = SYS_RES_IOPORT;
2611 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2612 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2613 		    prefetchmask & (1 << 0));
2614 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2615 		    prefetchmask & (1 << 1));
2616 	} else {
2617 		rid = PCIR_BAR(0);
2618 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2619 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2620 		    0x1f7, 8, 0);
2621 		rid = PCIR_BAR(1);
2622 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2623 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2624 		    0x3f6, 1, 0);
2625 	}
2626 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2627 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2628 		    prefetchmask & (1 << 2));
2629 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2630 		    prefetchmask & (1 << 3));
2631 	} else {
2632 		rid = PCIR_BAR(2);
2633 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2634 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2635 		    0x177, 8, 0);
2636 		rid = PCIR_BAR(3);
2637 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2638 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2639 		    0x376, 1, 0);
2640 	}
2641 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2642 	    prefetchmask & (1 << 4));
2643 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2644 	    prefetchmask & (1 << 5));
2645 }
2646 
2647 static void
2648 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2649 {
2650 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2651 	pcicfgregs *cfg = &dinfo->cfg;
2652 	char tunable_name[64];
2653 	int irq;
2654 
2655 	/* Has to have an intpin to have an interrupt. */
2656 	if (cfg->intpin == 0)
2657 		return;
2658 
2659 	/* Let the user override the IRQ with a tunable. */
2660 	irq = PCI_INVALID_IRQ;
2661 	snprintf(tunable_name, sizeof(tunable_name),
2662 	    "hw.pci%d.%d.%d.INT%c.irq",
2663 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2664 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2665 		irq = PCI_INVALID_IRQ;
2666 
2667 	/*
2668 	 * If we didn't get an IRQ via the tunable, then we either use the
2669 	 * IRQ value in the intline register or we ask the bus to route an
2670 	 * interrupt for us.  If force_route is true, then we only use the
2671 	 * value in the intline register if the bus was unable to assign an
2672 	 * IRQ.
2673 	 */
2674 	if (!PCI_INTERRUPT_VALID(irq)) {
2675 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2676 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2677 		if (!PCI_INTERRUPT_VALID(irq))
2678 			irq = cfg->intline;
2679 	}
2680 
2681 	/* If after all that we don't have an IRQ, just bail. */
2682 	if (!PCI_INTERRUPT_VALID(irq))
2683 		return;
2684 
2685 	/* Update the config register if it changed. */
2686 	if (irq != cfg->intline) {
2687 		cfg->intline = irq;
2688 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2689 	}
2690 
2691 	/* Add this IRQ as rid 0 interrupt resource. */
2692 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2693 }
2694 
2695 /* Perform early OHCI takeover from SMM. */
2696 static void
2697 ohci_early_takeover(device_t self)
2698 {
2699 	struct resource *res;
2700 	uint32_t ctl;
2701 	int rid;
2702 	int i;
2703 
2704 	rid = PCIR_BAR(0);
2705 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2706 	if (res == NULL)
2707 		return;
2708 
2709 	ctl = bus_read_4(res, OHCI_CONTROL);
2710 	if (ctl & OHCI_IR) {
2711 		if (bootverbose)
2712 			printf("ohci early: "
2713 			    "SMM active, request owner change\n");
2714 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2715 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2716 			DELAY(1000);
2717 			ctl = bus_read_4(res, OHCI_CONTROL);
2718 		}
2719 		if (ctl & OHCI_IR) {
2720 			if (bootverbose)
2721 				printf("ohci early: "
2722 				    "SMM does not respond, resetting\n");
2723 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2724 		}
2725 		/* Disable interrupts */
2726 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2727 	}
2728 
2729 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2730 }
2731 
2732 /* Perform early UHCI takeover from SMM. */
2733 static void
2734 uhci_early_takeover(device_t self)
2735 {
2736 	struct resource *res;
2737 	int rid;
2738 
2739 	/*
2740 	 * Set the PIRQD enable bit and switch off all the others. We don't
2741 	 * want legacy support to interfere with us XXX Does this also mean
2742 	 * that the BIOS won't touch the keyboard anymore if it is connected
2743 	 * to the ports of the root hub?
2744 	 */
2745 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2746 
2747 	/* Disable interrupts */
2748 	rid = PCI_UHCI_BASE_REG;
2749 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2750 	if (res != NULL) {
2751 		bus_write_2(res, UHCI_INTR, 0);
2752 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2753 	}
2754 }
2755 
2756 /* Perform early EHCI takeover from SMM. */
2757 static void
2758 ehci_early_takeover(device_t self)
2759 {
2760 	struct resource *res;
2761 	uint32_t cparams;
2762 	uint32_t eec;
2763 	uint8_t eecp;
2764 	uint8_t bios_sem;
2765 	uint8_t offs;
2766 	int rid;
2767 	int i;
2768 
2769 	rid = PCIR_BAR(0);
2770 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2771 	if (res == NULL)
2772 		return;
2773 
2774 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2775 
2776 	/* Synchronise with the BIOS if it owns the controller. */
2777 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2778 	    eecp = EHCI_EECP_NEXT(eec)) {
2779 		eec = pci_read_config(self, eecp, 4);
2780 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2781 			continue;
2782 		}
2783 		bios_sem = pci_read_config(self, eecp +
2784 		    EHCI_LEGSUP_BIOS_SEM, 1);
2785 		if (bios_sem == 0) {
2786 			continue;
2787 		}
2788 		if (bootverbose)
2789 			printf("ehci early: "
2790 			    "SMM active, request owner change\n");
2791 
2792 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2793 
2794 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2795 			DELAY(1000);
2796 			bios_sem = pci_read_config(self, eecp +
2797 			    EHCI_LEGSUP_BIOS_SEM, 1);
2798 		}
2799 
2800 		if (bios_sem != 0) {
2801 			if (bootverbose)
2802 				printf("ehci early: "
2803 				    "SMM does not respond\n");
2804 		}
2805 		/* Disable interrupts */
2806 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
2807 		bus_write_4(res, offs + EHCI_USBINTR, 0);
2808 	}
2809 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2810 }
2811 
2812 void
2813 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2814 {
2815 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2816 	pcicfgregs *cfg = &dinfo->cfg;
2817 	struct resource_list *rl = &dinfo->resources;
2818 	struct pci_quirk *q;
2819 	int i;
2820 
2821 	/* ATA devices needs special map treatment */
2822 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2823 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2824 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2825 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2826 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2827 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
2828 	else
2829 		for (i = 0; i < cfg->nummaps;)
2830 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
2831 			    prefetchmask & (1 << i));
2832 
2833 	/*
2834 	 * Add additional, quirked resources.
2835 	 */
2836 	for (q = &pci_quirks[0]; q->devid; q++) {
2837 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2838 		    && q->type == PCI_QUIRK_MAP_REG)
2839 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
2840 	}
2841 
2842 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2843 #ifdef __PCI_REROUTE_INTERRUPT
2844 		/*
2845 		 * Try to re-route interrupts. Sometimes the BIOS or
2846 		 * firmware may leave bogus values in these registers.
2847 		 * If the re-route fails, then just stick with what we
2848 		 * have.
2849 		 */
2850 		pci_assign_interrupt(bus, dev, 1);
2851 #else
2852 		pci_assign_interrupt(bus, dev, 0);
2853 #endif
2854 	}
2855 
2856 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
2857 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
2858 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
2859 			ehci_early_takeover(dev);
2860 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
2861 			ohci_early_takeover(dev);
2862 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
2863 			uhci_early_takeover(dev);
2864 	}
2865 }
2866 
2867 void
2868 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2869 {
2870 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2871 	device_t pcib = device_get_parent(dev);
2872 	struct pci_devinfo *dinfo;
2873 	int maxslots;
2874 	int s, f, pcifunchigh;
2875 	uint8_t hdrtype;
2876 
2877 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2878 	    ("dinfo_size too small"));
2879 	maxslots = PCIB_MAXSLOTS(pcib);
2880 	for (s = 0; s <= maxslots; s++) {
2881 		pcifunchigh = 0;
2882 		f = 0;
2883 		DELAY(1);
2884 		hdrtype = REG(PCIR_HDRTYPE, 1);
2885 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2886 			continue;
2887 		if (hdrtype & PCIM_MFDEV)
2888 			pcifunchigh = PCI_FUNCMAX;
2889 		for (f = 0; f <= pcifunchigh; f++) {
2890 			dinfo = pci_read_device(pcib, domain, busno, s, f,
2891 			    dinfo_size);
2892 			if (dinfo != NULL) {
2893 				pci_add_child(dev, dinfo);
2894 			}
2895 		}
2896 	}
2897 #undef REG
2898 }
2899 
2900 void
2901 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2902 {
2903 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2904 	device_set_ivars(dinfo->cfg.dev, dinfo);
2905 	resource_list_init(&dinfo->resources);
2906 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2907 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2908 	pci_print_verbose(dinfo);
2909 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2910 }
2911 
2912 static int
2913 pci_probe(device_t dev)
2914 {
2915 
2916 	device_set_desc(dev, "PCI bus");
2917 
2918 	/* Allow other subclasses to override this driver. */
2919 	return (BUS_PROBE_GENERIC);
2920 }
2921 
2922 static int
2923 pci_attach(device_t dev)
2924 {
2925 	int busno, domain;
2926 
2927 	/*
2928 	 * Since there can be multiple independantly numbered PCI
2929 	 * busses on systems with multiple PCI domains, we can't use
2930 	 * the unit number to decide which bus we are probing. We ask
2931 	 * the parent pcib what our domain and bus numbers are.
2932 	 */
2933 	domain = pcib_get_domain(dev);
2934 	busno = pcib_get_bus(dev);
2935 	if (bootverbose)
2936 		device_printf(dev, "domain=%d, physical bus=%d\n",
2937 		    domain, busno);
2938 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2939 	return (bus_generic_attach(dev));
2940 }
2941 
2942 static void
2943 pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
2944     int state)
2945 {
2946 	device_t child, pcib;
2947 	struct pci_devinfo *dinfo;
2948 	int dstate, i;
2949 
2950 	/*
2951 	 * Set the device to the given state.  If the firmware suggests
2952 	 * a different power state, use it instead.  If power management
2953 	 * is not present, the firmware is responsible for managing
2954 	 * device power.  Skip children who aren't attached since they
2955 	 * are handled separately.
2956 	 */
2957 	pcib = device_get_parent(dev);
2958 	for (i = 0; i < numdevs; i++) {
2959 		child = devlist[i];
2960 		dinfo = device_get_ivars(child);
2961 		dstate = state;
2962 		if (device_is_attached(child) &&
2963 		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
2964 			pci_set_powerstate(child, dstate);
2965 	}
2966 }
2967 
2968 int
2969 pci_suspend(device_t dev)
2970 {
2971 	device_t child, *devlist;
2972 	struct pci_devinfo *dinfo;
2973 	int error, i, numdevs;
2974 
2975 	/*
2976 	 * Save the PCI configuration space for each child and set the
2977 	 * device in the appropriate power state for this sleep state.
2978 	 */
2979 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2980 		return (error);
2981 	for (i = 0; i < numdevs; i++) {
2982 		child = devlist[i];
2983 		dinfo = device_get_ivars(child);
2984 		pci_cfg_save(child, dinfo, 0);
2985 	}
2986 
2987 	/* Suspend devices before potentially powering them down. */
2988 	error = bus_generic_suspend(dev);
2989 	if (error) {
2990 		free(devlist, M_TEMP);
2991 		return (error);
2992 	}
2993 	if (pci_do_power_suspend)
2994 		pci_set_power_children(dev, devlist, numdevs,
2995 		    PCI_POWERSTATE_D3);
2996 	free(devlist, M_TEMP);
2997 	return (0);
2998 }
2999 
3000 int
3001 pci_resume(device_t dev)
3002 {
3003 	device_t child, *devlist;
3004 	struct pci_devinfo *dinfo;
3005 	int error, i, numdevs;
3006 
3007 	/*
3008 	 * Set each child to D0 and restore its PCI configuration space.
3009 	 */
3010 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3011 		return (error);
3012 	if (pci_do_power_resume)
3013 		pci_set_power_children(dev, devlist, numdevs,
3014 		    PCI_POWERSTATE_D0);
3015 
3016 	/* Now the device is powered up, restore its config space. */
3017 	for (i = 0; i < numdevs; i++) {
3018 		child = devlist[i];
3019 		dinfo = device_get_ivars(child);
3020 
3021 		pci_cfg_restore(child, dinfo);
3022 		if (!device_is_attached(child))
3023 			pci_cfg_save(child, dinfo, 1);
3024 	}
3025 	free(devlist, M_TEMP);
3026 	return (bus_generic_resume(dev));
3027 }
3028 
3029 static void
3030 pci_load_vendor_data(void)
3031 {
3032 	caddr_t vendordata, info;
3033 
3034 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
3035 		info = preload_search_info(vendordata, MODINFO_ADDR);
3036 		pci_vendordata = *(char **)info;
3037 		info = preload_search_info(vendordata, MODINFO_SIZE);
3038 		pci_vendordata_size = *(size_t *)info;
3039 		/* terminate the database */
3040 		pci_vendordata[pci_vendordata_size] = '\n';
3041 	}
3042 }
3043 
3044 void
3045 pci_driver_added(device_t dev, driver_t *driver)
3046 {
3047 	int numdevs;
3048 	device_t *devlist;
3049 	device_t child;
3050 	struct pci_devinfo *dinfo;
3051 	int i;
3052 
3053 	if (bootverbose)
3054 		device_printf(dev, "driver added\n");
3055 	DEVICE_IDENTIFY(driver, dev);
3056 	if (device_get_children(dev, &devlist, &numdevs) != 0)
3057 		return;
3058 	for (i = 0; i < numdevs; i++) {
3059 		child = devlist[i];
3060 		if (device_get_state(child) != DS_NOTPRESENT)
3061 			continue;
3062 		dinfo = device_get_ivars(child);
3063 		pci_print_verbose(dinfo);
3064 		if (bootverbose)
3065 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3066 		pci_cfg_restore(child, dinfo);
3067 		if (device_probe_and_attach(child) != 0)
3068 			pci_cfg_save(child, dinfo, 1);
3069 	}
3070 	free(devlist, M_TEMP);
3071 }
3072 
3073 int
3074 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3075     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3076 {
3077 	struct pci_devinfo *dinfo;
3078 	struct msix_table_entry *mte;
3079 	struct msix_vector *mv;
3080 	uint64_t addr;
3081 	uint32_t data;
3082 	void *cookie;
3083 	int error, rid;
3084 
3085 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3086 	    arg, &cookie);
3087 	if (error)
3088 		return (error);
3089 
3090 	/* If this is not a direct child, just bail out. */
3091 	if (device_get_parent(child) != dev) {
3092 		*cookiep = cookie;
3093 		return(0);
3094 	}
3095 
3096 	rid = rman_get_rid(irq);
3097 	if (rid == 0) {
3098 		/* Make sure that INTx is enabled */
3099 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3100 	} else {
3101 		/*
3102 		 * Check to see if the interrupt is MSI or MSI-X.
3103 		 * Ask our parent to map the MSI and give
3104 		 * us the address and data register values.
3105 		 * If we fail for some reason, teardown the
3106 		 * interrupt handler.
3107 		 */
3108 		dinfo = device_get_ivars(child);
3109 		if (dinfo->cfg.msi.msi_alloc > 0) {
3110 			if (dinfo->cfg.msi.msi_addr == 0) {
3111 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3112 			    ("MSI has handlers, but vectors not mapped"));
3113 				error = PCIB_MAP_MSI(device_get_parent(dev),
3114 				    child, rman_get_start(irq), &addr, &data);
3115 				if (error)
3116 					goto bad;
3117 				dinfo->cfg.msi.msi_addr = addr;
3118 				dinfo->cfg.msi.msi_data = data;
3119 			}
3120 			if (dinfo->cfg.msi.msi_handlers == 0)
3121 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3122 				    dinfo->cfg.msi.msi_data);
3123 			dinfo->cfg.msi.msi_handlers++;
3124 		} else {
3125 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3126 			    ("No MSI or MSI-X interrupts allocated"));
3127 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3128 			    ("MSI-X index too high"));
3129 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3130 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3131 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3132 			KASSERT(mv->mv_irq == rman_get_start(irq),
3133 			    ("IRQ mismatch"));
3134 			if (mv->mv_address == 0) {
3135 				KASSERT(mte->mte_handlers == 0,
3136 		    ("MSI-X table entry has handlers, but vector not mapped"));
3137 				error = PCIB_MAP_MSI(device_get_parent(dev),
3138 				    child, rman_get_start(irq), &addr, &data);
3139 				if (error)
3140 					goto bad;
3141 				mv->mv_address = addr;
3142 				mv->mv_data = data;
3143 			}
3144 			if (mte->mte_handlers == 0) {
3145 				pci_enable_msix(child, rid - 1, mv->mv_address,
3146 				    mv->mv_data);
3147 				pci_unmask_msix(child, rid - 1);
3148 			}
3149 			mte->mte_handlers++;
3150 		}
3151 
3152 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3153 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3154 	bad:
3155 		if (error) {
3156 			(void)bus_generic_teardown_intr(dev, child, irq,
3157 			    cookie);
3158 			return (error);
3159 		}
3160 	}
3161 	*cookiep = cookie;
3162 	return (0);
3163 }
3164 
3165 int
3166 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3167     void *cookie)
3168 {
3169 	struct msix_table_entry *mte;
3170 	struct resource_list_entry *rle;
3171 	struct pci_devinfo *dinfo;
3172 	int error, rid;
3173 
3174 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3175 		return (EINVAL);
3176 
3177 	/* If this isn't a direct child, just bail out */
3178 	if (device_get_parent(child) != dev)
3179 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3180 
3181 	rid = rman_get_rid(irq);
3182 	if (rid == 0) {
3183 		/* Mask INTx */
3184 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3185 	} else {
3186 		/*
3187 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3188 		 * decrement the appropriate handlers count and mask the
3189 		 * MSI-X message, or disable MSI messages if the count
3190 		 * drops to 0.
3191 		 */
3192 		dinfo = device_get_ivars(child);
3193 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3194 		if (rle->res != irq)
3195 			return (EINVAL);
3196 		if (dinfo->cfg.msi.msi_alloc > 0) {
3197 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3198 			    ("MSI-X index too high"));
3199 			if (dinfo->cfg.msi.msi_handlers == 0)
3200 				return (EINVAL);
3201 			dinfo->cfg.msi.msi_handlers--;
3202 			if (dinfo->cfg.msi.msi_handlers == 0)
3203 				pci_disable_msi(child);
3204 		} else {
3205 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3206 			    ("No MSI or MSI-X interrupts allocated"));
3207 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3208 			    ("MSI-X index too high"));
3209 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3210 			if (mte->mte_handlers == 0)
3211 				return (EINVAL);
3212 			mte->mte_handlers--;
3213 			if (mte->mte_handlers == 0)
3214 				pci_mask_msix(child, rid - 1);
3215 		}
3216 	}
3217 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3218 	if (rid > 0)
3219 		KASSERT(error == 0,
3220 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3221 	return (error);
3222 }
3223 
3224 int
3225 pci_print_child(device_t dev, device_t child)
3226 {
3227 	struct pci_devinfo *dinfo;
3228 	struct resource_list *rl;
3229 	int retval = 0;
3230 
3231 	dinfo = device_get_ivars(child);
3232 	rl = &dinfo->resources;
3233 
3234 	retval += bus_print_child_header(dev, child);
3235 
3236 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3237 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3238 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3239 	if (device_get_flags(dev))
3240 		retval += printf(" flags %#x", device_get_flags(dev));
3241 
3242 	retval += printf(" at device %d.%d", pci_get_slot(child),
3243 	    pci_get_function(child));
3244 
3245 	retval += bus_print_child_footer(dev, child);
3246 
3247 	return (retval);
3248 }
3249 
3250 static struct
3251 {
3252 	int	class;
3253 	int	subclass;
3254 	char	*desc;
3255 } pci_nomatch_tab[] = {
3256 	{PCIC_OLD,		-1,			"old"},
3257 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3258 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3259 	{PCIC_STORAGE,		-1,			"mass storage"},
3260 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3261 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3262 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3263 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3264 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3265 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3266 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3267 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3268 	{PCIC_NETWORK,		-1,			"network"},
3269 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3270 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3271 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3272 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3273 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3274 	{PCIC_DISPLAY,		-1,			"display"},
3275 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3276 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3277 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3278 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3279 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3280 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3281 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3282 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3283 	{PCIC_MEMORY,		-1,			"memory"},
3284 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3285 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3286 	{PCIC_BRIDGE,		-1,			"bridge"},
3287 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3288 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3289 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3290 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3291 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3292 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3293 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3294 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3295 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3296 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3297 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3298 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3299 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3300 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3301 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3302 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3303 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3304 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3305 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3306 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3307 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3308 	{PCIC_INPUTDEV,		-1,			"input device"},
3309 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3310 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3311 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3312 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3313 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3314 	{PCIC_DOCKING,		-1,			"docking station"},
3315 	{PCIC_PROCESSOR,	-1,			"processor"},
3316 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3317 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3318 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3319 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3320 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3321 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3322 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3323 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3324 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3325 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3326 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3327 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3328 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3329 	{PCIC_SATCOM,		-1,			"satellite communication"},
3330 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3331 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3332 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3333 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3334 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3335 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3336 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3337 	{PCIC_DASP,		-1,			"dasp"},
3338 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3339 	{0, 0,		NULL}
3340 };
3341 
3342 void
3343 pci_probe_nomatch(device_t dev, device_t child)
3344 {
3345 	int	i;
3346 	char	*cp, *scp, *device;
3347 
3348 	/*
3349 	 * Look for a listing for this device in a loaded device database.
3350 	 */
3351 	if ((device = pci_describe_device(child)) != NULL) {
3352 		device_printf(dev, "<%s>", device);
3353 		free(device, M_DEVBUF);
3354 	} else {
3355 		/*
3356 		 * Scan the class/subclass descriptions for a general
3357 		 * description.
3358 		 */
3359 		cp = "unknown";
3360 		scp = NULL;
3361 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3362 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3363 				if (pci_nomatch_tab[i].subclass == -1) {
3364 					cp = pci_nomatch_tab[i].desc;
3365 				} else if (pci_nomatch_tab[i].subclass ==
3366 				    pci_get_subclass(child)) {
3367 					scp = pci_nomatch_tab[i].desc;
3368 				}
3369 			}
3370 		}
3371 		device_printf(dev, "<%s%s%s>",
3372 		    cp ? cp : "",
3373 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3374 		    scp ? scp : "");
3375 	}
3376 	printf(" at device %d.%d (no driver attached)\n",
3377 	    pci_get_slot(child), pci_get_function(child));
3378 	pci_cfg_save(child, device_get_ivars(child), 1);
3379 	return;
3380 }
3381 
3382 /*
3383  * Parse the PCI device database, if loaded, and return a pointer to a
3384  * description of the device.
3385  *
3386  * The database is flat text formatted as follows:
3387  *
3388  * Any line not in a valid format is ignored.
3389  * Lines are terminated with newline '\n' characters.
3390  *
3391  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3392  * the vendor name.
3393  *
3394  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3395  * - devices cannot be listed without a corresponding VENDOR line.
3396  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3397  * another TAB, then the device name.
3398  */
3399 
3400 /*
3401  * Assuming (ptr) points to the beginning of a line in the database,
3402  * return the vendor or device and description of the next entry.
3403  * The value of (vendor) or (device) inappropriate for the entry type
3404  * is set to -1.  Returns nonzero at the end of the database.
3405  *
3406  * Note that this is slightly unrobust in the face of corrupt data;
3407  * we attempt to safeguard against this by spamming the end of the
3408  * database with a newline when we initialise.
3409  */
3410 static int
3411 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3412 {
3413 	char	*cp = *ptr;
3414 	int	left;
3415 
3416 	*device = -1;
3417 	*vendor = -1;
3418 	**desc = '\0';
3419 	for (;;) {
3420 		left = pci_vendordata_size - (cp - pci_vendordata);
3421 		if (left <= 0) {
3422 			*ptr = cp;
3423 			return(1);
3424 		}
3425 
3426 		/* vendor entry? */
3427 		if (*cp != '\t' &&
3428 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3429 			break;
3430 		/* device entry? */
3431 		if (*cp == '\t' &&
3432 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3433 			break;
3434 
3435 		/* skip to next line */
3436 		while (*cp != '\n' && left > 0) {
3437 			cp++;
3438 			left--;
3439 		}
3440 		if (*cp == '\n') {
3441 			cp++;
3442 			left--;
3443 		}
3444 	}
3445 	/* skip to next line */
3446 	while (*cp != '\n' && left > 0) {
3447 		cp++;
3448 		left--;
3449 	}
3450 	if (*cp == '\n' && left > 0)
3451 		cp++;
3452 	*ptr = cp;
3453 	return(0);
3454 }
3455 
3456 static char *
3457 pci_describe_device(device_t dev)
3458 {
3459 	int	vendor, device;
3460 	char	*desc, *vp, *dp, *line;
3461 
3462 	desc = vp = dp = NULL;
3463 
3464 	/*
3465 	 * If we have no vendor data, we can't do anything.
3466 	 */
3467 	if (pci_vendordata == NULL)
3468 		goto out;
3469 
3470 	/*
3471 	 * Scan the vendor data looking for this device
3472 	 */
3473 	line = pci_vendordata;
3474 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3475 		goto out;
3476 	for (;;) {
3477 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3478 			goto out;
3479 		if (vendor == pci_get_vendor(dev))
3480 			break;
3481 	}
3482 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3483 		goto out;
3484 	for (;;) {
3485 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3486 			*dp = 0;
3487 			break;
3488 		}
3489 		if (vendor != -1) {
3490 			*dp = 0;
3491 			break;
3492 		}
3493 		if (device == pci_get_device(dev))
3494 			break;
3495 	}
3496 	if (dp[0] == '\0')
3497 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3498 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3499 	    NULL)
3500 		sprintf(desc, "%s, %s", vp, dp);
3501  out:
3502 	if (vp != NULL)
3503 		free(vp, M_DEVBUF);
3504 	if (dp != NULL)
3505 		free(dp, M_DEVBUF);
3506 	return(desc);
3507 }
3508 
3509 int
3510 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3511 {
3512 	struct pci_devinfo *dinfo;
3513 	pcicfgregs *cfg;
3514 
3515 	dinfo = device_get_ivars(child);
3516 	cfg = &dinfo->cfg;
3517 
3518 	switch (which) {
3519 	case PCI_IVAR_ETHADDR:
3520 		/*
3521 		 * The generic accessor doesn't deal with failure, so
3522 		 * we set the return value, then return an error.
3523 		 */
3524 		*((uint8_t **) result) = NULL;
3525 		return (EINVAL);
3526 	case PCI_IVAR_SUBVENDOR:
3527 		*result = cfg->subvendor;
3528 		break;
3529 	case PCI_IVAR_SUBDEVICE:
3530 		*result = cfg->subdevice;
3531 		break;
3532 	case PCI_IVAR_VENDOR:
3533 		*result = cfg->vendor;
3534 		break;
3535 	case PCI_IVAR_DEVICE:
3536 		*result = cfg->device;
3537 		break;
3538 	case PCI_IVAR_DEVID:
3539 		*result = (cfg->device << 16) | cfg->vendor;
3540 		break;
3541 	case PCI_IVAR_CLASS:
3542 		*result = cfg->baseclass;
3543 		break;
3544 	case PCI_IVAR_SUBCLASS:
3545 		*result = cfg->subclass;
3546 		break;
3547 	case PCI_IVAR_PROGIF:
3548 		*result = cfg->progif;
3549 		break;
3550 	case PCI_IVAR_REVID:
3551 		*result = cfg->revid;
3552 		break;
3553 	case PCI_IVAR_INTPIN:
3554 		*result = cfg->intpin;
3555 		break;
3556 	case PCI_IVAR_IRQ:
3557 		*result = cfg->intline;
3558 		break;
3559 	case PCI_IVAR_DOMAIN:
3560 		*result = cfg->domain;
3561 		break;
3562 	case PCI_IVAR_BUS:
3563 		*result = cfg->bus;
3564 		break;
3565 	case PCI_IVAR_SLOT:
3566 		*result = cfg->slot;
3567 		break;
3568 	case PCI_IVAR_FUNCTION:
3569 		*result = cfg->func;
3570 		break;
3571 	case PCI_IVAR_CMDREG:
3572 		*result = cfg->cmdreg;
3573 		break;
3574 	case PCI_IVAR_CACHELNSZ:
3575 		*result = cfg->cachelnsz;
3576 		break;
3577 	case PCI_IVAR_MINGNT:
3578 		*result = cfg->mingnt;
3579 		break;
3580 	case PCI_IVAR_MAXLAT:
3581 		*result = cfg->maxlat;
3582 		break;
3583 	case PCI_IVAR_LATTIMER:
3584 		*result = cfg->lattimer;
3585 		break;
3586 	default:
3587 		return (ENOENT);
3588 	}
3589 	return (0);
3590 }
3591 
3592 int
3593 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3594 {
3595 	struct pci_devinfo *dinfo;
3596 
3597 	dinfo = device_get_ivars(child);
3598 
3599 	switch (which) {
3600 	case PCI_IVAR_INTPIN:
3601 		dinfo->cfg.intpin = value;
3602 		return (0);
3603 	case PCI_IVAR_ETHADDR:
3604 	case PCI_IVAR_SUBVENDOR:
3605 	case PCI_IVAR_SUBDEVICE:
3606 	case PCI_IVAR_VENDOR:
3607 	case PCI_IVAR_DEVICE:
3608 	case PCI_IVAR_DEVID:
3609 	case PCI_IVAR_CLASS:
3610 	case PCI_IVAR_SUBCLASS:
3611 	case PCI_IVAR_PROGIF:
3612 	case PCI_IVAR_REVID:
3613 	case PCI_IVAR_IRQ:
3614 	case PCI_IVAR_DOMAIN:
3615 	case PCI_IVAR_BUS:
3616 	case PCI_IVAR_SLOT:
3617 	case PCI_IVAR_FUNCTION:
3618 		return (EINVAL);	/* disallow for now */
3619 
3620 	default:
3621 		return (ENOENT);
3622 	}
3623 }
3624 
3625 
3626 #include "opt_ddb.h"
3627 #ifdef DDB
3628 #include <ddb/ddb.h>
3629 #include <sys/cons.h>
3630 
3631 /*
3632  * List resources based on pci map registers, used for within ddb
3633  */
3634 
3635 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3636 {
3637 	struct pci_devinfo *dinfo;
3638 	struct devlist *devlist_head;
3639 	struct pci_conf *p;
3640 	const char *name;
3641 	int i, error, none_count;
3642 
3643 	none_count = 0;
3644 	/* get the head of the device queue */
3645 	devlist_head = &pci_devq;
3646 
3647 	/*
3648 	 * Go through the list of devices and print out devices
3649 	 */
3650 	for (error = 0, i = 0,
3651 	     dinfo = STAILQ_FIRST(devlist_head);
3652 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3653 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3654 
3655 		/* Populate pd_name and pd_unit */
3656 		name = NULL;
3657 		if (dinfo->cfg.dev)
3658 			name = device_get_name(dinfo->cfg.dev);
3659 
3660 		p = &dinfo->conf;
3661 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3662 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3663 			(name && *name) ? name : "none",
3664 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3665 			none_count++,
3666 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3667 			p->pc_sel.pc_func, (p->pc_class << 16) |
3668 			(p->pc_subclass << 8) | p->pc_progif,
3669 			(p->pc_subdevice << 16) | p->pc_subvendor,
3670 			(p->pc_device << 16) | p->pc_vendor,
3671 			p->pc_revid, p->pc_hdr);
3672 	}
3673 }
3674 #endif /* DDB */
3675 
3676 static struct resource *
3677 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
3678     u_long start, u_long end, u_long count, u_int flags)
3679 {
3680 	struct pci_devinfo *dinfo = device_get_ivars(child);
3681 	struct resource_list *rl = &dinfo->resources;
3682 	struct resource_list_entry *rle;
3683 	struct resource *res;
3684 	pci_addr_t map, testval;
3685 	int mapsize;
3686 
3687 	/*
3688 	 * Weed out the bogons, and figure out how large the BAR/map
3689 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3690 	 * Note: atapci in legacy mode are special and handled elsewhere
3691 	 * in the code.  If you have a atapci device in legacy mode and
3692 	 * it fails here, that other code is broken.
3693 	 */
3694 	res = NULL;
3695 	pci_read_bar(child, *rid, &map, &testval);
3696 
3697 	/*
3698 	 * Determine the size of the BAR and ignore BARs with a size
3699 	 * of 0.  Device ROM BARs use a different mask value.
3700 	 */
3701 	if (*rid == PCIR_BIOS)
3702 		mapsize = pci_romsize(testval);
3703 	else
3704 		mapsize = pci_mapsize(testval);
3705 	if (mapsize == 0)
3706 		goto out;
3707 
3708 	if (PCI_BAR_MEM(testval) || *rid == PCIR_BIOS) {
3709 		if (type != SYS_RES_MEMORY) {
3710 			if (bootverbose)
3711 				device_printf(dev,
3712 				    "child %s requested type %d for rid %#x,"
3713 				    " but the BAR says it is an memio\n",
3714 				    device_get_nameunit(child), type, *rid);
3715 			goto out;
3716 		}
3717 	} else {
3718 		if (type != SYS_RES_IOPORT) {
3719 			if (bootverbose)
3720 				device_printf(dev,
3721 				    "child %s requested type %d for rid %#x,"
3722 				    " but the BAR says it is an ioport\n",
3723 				    device_get_nameunit(child), type, *rid);
3724 			goto out;
3725 		}
3726 	}
3727 
3728 	/*
3729 	 * For real BARs, we need to override the size that
3730 	 * the driver requests, because that's what the BAR
3731 	 * actually uses and we would otherwise have a
3732 	 * situation where we might allocate the excess to
3733 	 * another driver, which won't work.
3734 	 */
3735 	count = 1UL << mapsize;
3736 	if (RF_ALIGNMENT(flags) < mapsize)
3737 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3738 	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3739 		flags |= RF_PREFETCHABLE;
3740 
3741 	/*
3742 	 * Allocate enough resource, and then write back the
3743 	 * appropriate bar for that resource.
3744 	 */
3745 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3746 	    start, end, count, flags & ~RF_ACTIVE);
3747 	if (res == NULL) {
3748 		device_printf(child,
3749 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3750 		    count, *rid, type, start, end);
3751 		goto out;
3752 	}
3753 	resource_list_add(rl, type, *rid, start, end, count);
3754 	rle = resource_list_find(rl, type, *rid);
3755 	if (rle == NULL)
3756 		panic("pci_reserve_map: unexpectedly can't find resource.");
3757 	rle->res = res;
3758 	rle->start = rman_get_start(res);
3759 	rle->end = rman_get_end(res);
3760 	rle->count = count;
3761 	rle->flags = RLE_RESERVED;
3762 	if (bootverbose)
3763 		device_printf(child,
3764 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3765 		    count, *rid, type, rman_get_start(res));
3766 	map = rman_get_start(res);
3767 	pci_write_bar(child, *rid, map);
3768 out:;
3769 	return (res);
3770 }
3771 
3772 
3773 struct resource *
3774 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3775 		   u_long start, u_long end, u_long count, u_int flags)
3776 {
3777 	struct pci_devinfo *dinfo = device_get_ivars(child);
3778 	struct resource_list *rl = &dinfo->resources;
3779 	struct resource_list_entry *rle;
3780 	struct resource *res;
3781 	pcicfgregs *cfg = &dinfo->cfg;
3782 
3783 	if (device_get_parent(child) != dev)
3784 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3785 		    type, rid, start, end, count, flags));
3786 
3787 	/*
3788 	 * Perform lazy resource allocation
3789 	 */
3790 	switch (type) {
3791 	case SYS_RES_IRQ:
3792 		/*
3793 		 * Can't alloc legacy interrupt once MSI messages have
3794 		 * been allocated.
3795 		 */
3796 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3797 		    cfg->msix.msix_alloc > 0))
3798 			return (NULL);
3799 
3800 		/*
3801 		 * If the child device doesn't have an interrupt
3802 		 * routed and is deserving of an interrupt, try to
3803 		 * assign it one.
3804 		 */
3805 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3806 		    (cfg->intpin != 0))
3807 			pci_assign_interrupt(dev, child, 0);
3808 		break;
3809 	case SYS_RES_IOPORT:
3810 	case SYS_RES_MEMORY:
3811 		/* Reserve resources for this BAR if needed. */
3812 		rle = resource_list_find(rl, type, *rid);
3813 		if (rle == NULL) {
3814 			res = pci_reserve_map(dev, child, type, rid, start, end,
3815 			    count, flags);
3816 			if (res == NULL)
3817 				return (NULL);
3818 		}
3819 	}
3820 	return (resource_list_alloc(rl, dev, child, type, rid,
3821 	    start, end, count, flags));
3822 }
3823 
3824 int
3825 pci_activate_resource(device_t dev, device_t child, int type, int rid,
3826     struct resource *r)
3827 {
3828 	int error;
3829 
3830 	error = bus_generic_activate_resource(dev, child, type, rid, r);
3831 	if (error)
3832 		return (error);
3833 
3834 	/* Enable decoding in the command register when activating BARs. */
3835 	if (device_get_parent(child) == dev) {
3836 		/* Device ROMs need their decoding explicitly enabled. */
3837 		if (rid == PCIR_BIOS)
3838 			pci_write_config(child, rid, rman_get_start(r) |
3839 			    PCIM_BIOS_ENABLE, 4);
3840 		switch (type) {
3841 		case SYS_RES_IOPORT:
3842 		case SYS_RES_MEMORY:
3843 			error = PCI_ENABLE_IO(dev, child, type);
3844 			break;
3845 		}
3846 	}
3847 	return (error);
3848 }
3849 
3850 int
3851 pci_deactivate_resource(device_t dev, device_t child, int type,
3852     int rid, struct resource *r)
3853 {
3854 	int error;
3855 
3856 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
3857 	if (error)
3858 		return (error);
3859 
3860 	/* Disable decoding for device ROMs. */
3861 	if (rid == PCIR_BIOS)
3862 		pci_write_config(child, rid, rman_get_start(r), 4);
3863 	return (0);
3864 }
3865 
3866 void
3867 pci_delete_child(device_t dev, device_t child)
3868 {
3869 	struct resource_list_entry *rle;
3870 	struct resource_list *rl;
3871 	struct pci_devinfo *dinfo;
3872 
3873 	dinfo = device_get_ivars(child);
3874 	rl = &dinfo->resources;
3875 
3876 	if (device_is_attached(child))
3877 		device_detach(child);
3878 
3879 	/* Turn off access to resources we're about to free */
3880 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
3881 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
3882 
3883 	/* Free all allocated resources */
3884 	STAILQ_FOREACH(rle, rl, link) {
3885 		if (rle->res) {
3886 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
3887 			    resource_list_busy(rl, rle->type, rle->rid)) {
3888 				pci_printf(&dinfo->cfg,
3889 				    "Resource still owned, oops. "
3890 				    "(type=%d, rid=%d, addr=%lx)\n",
3891 				    rle->type, rle->rid,
3892 				    rman_get_start(rle->res));
3893 				bus_release_resource(child, rle->type, rle->rid,
3894 				    rle->res);
3895 			}
3896 			resource_list_unreserve(rl, dev, child, rle->type,
3897 			    rle->rid);
3898 		}
3899 	}
3900 	resource_list_free(rl);
3901 
3902 	device_delete_child(dev, child);
3903 	pci_freecfg(dinfo);
3904 }
3905 
3906 void
3907 pci_delete_resource(device_t dev, device_t child, int type, int rid)
3908 {
3909 	struct pci_devinfo *dinfo;
3910 	struct resource_list *rl;
3911 	struct resource_list_entry *rle;
3912 
3913 	if (device_get_parent(child) != dev)
3914 		return;
3915 
3916 	dinfo = device_get_ivars(child);
3917 	rl = &dinfo->resources;
3918 	rle = resource_list_find(rl, type, rid);
3919 	if (rle == NULL)
3920 		return;
3921 
3922 	if (rle->res) {
3923 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
3924 		    resource_list_busy(rl, type, rid)) {
3925 			device_printf(dev, "delete_resource: "
3926 			    "Resource still owned by child, oops. "
3927 			    "(type=%d, rid=%d, addr=%lx)\n",
3928 			    type, rid, rman_get_start(rle->res));
3929 			return;
3930 		}
3931 
3932 #ifndef __PCI_BAR_ZERO_VALID
3933 		/*
3934 		 * If this is a BAR, clear the BAR so it stops
3935 		 * decoding before releasing the resource.
3936 		 */
3937 		switch (type) {
3938 		case SYS_RES_IOPORT:
3939 		case SYS_RES_MEMORY:
3940 			pci_write_bar(child, rid, 0);
3941 			break;
3942 		}
3943 #endif
3944 		resource_list_unreserve(rl, dev, child, type, rid);
3945 	}
3946 	resource_list_delete(rl, type, rid);
3947 }
3948 
3949 struct resource_list *
3950 pci_get_resource_list (device_t dev, device_t child)
3951 {
3952 	struct pci_devinfo *dinfo = device_get_ivars(child);
3953 
3954 	return (&dinfo->resources);
3955 }
3956 
3957 uint32_t
3958 pci_read_config_method(device_t dev, device_t child, int reg, int width)
3959 {
3960 	struct pci_devinfo *dinfo = device_get_ivars(child);
3961 	pcicfgregs *cfg = &dinfo->cfg;
3962 
3963 	return (PCIB_READ_CONFIG(device_get_parent(dev),
3964 	    cfg->bus, cfg->slot, cfg->func, reg, width));
3965 }
3966 
3967 void
3968 pci_write_config_method(device_t dev, device_t child, int reg,
3969     uint32_t val, int width)
3970 {
3971 	struct pci_devinfo *dinfo = device_get_ivars(child);
3972 	pcicfgregs *cfg = &dinfo->cfg;
3973 
3974 	PCIB_WRITE_CONFIG(device_get_parent(dev),
3975 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3976 }
3977 
3978 int
3979 pci_child_location_str_method(device_t dev, device_t child, char *buf,
3980     size_t buflen)
3981 {
3982 
3983 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3984 	    pci_get_function(child));
3985 	return (0);
3986 }
3987 
3988 int
3989 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3990     size_t buflen)
3991 {
3992 	struct pci_devinfo *dinfo;
3993 	pcicfgregs *cfg;
3994 
3995 	dinfo = device_get_ivars(child);
3996 	cfg = &dinfo->cfg;
3997 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3998 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3999 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4000 	    cfg->progif);
4001 	return (0);
4002 }
4003 
4004 int
4005 pci_assign_interrupt_method(device_t dev, device_t child)
4006 {
4007 	struct pci_devinfo *dinfo = device_get_ivars(child);
4008 	pcicfgregs *cfg = &dinfo->cfg;
4009 
4010 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4011 	    cfg->intpin));
4012 }
4013 
4014 static int
4015 pci_modevent(module_t mod, int what, void *arg)
4016 {
4017 	static struct cdev *pci_cdev;
4018 
4019 	switch (what) {
4020 	case MOD_LOAD:
4021 		STAILQ_INIT(&pci_devq);
4022 		pci_generation = 0;
4023 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4024 		    "pci");
4025 		pci_load_vendor_data();
4026 		break;
4027 
4028 	case MOD_UNLOAD:
4029 		destroy_dev(pci_cdev);
4030 		break;
4031 	}
4032 
4033 	return (0);
4034 }
4035 
4036 void
4037 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4038 {
4039 	int i;
4040 
4041 	/*
4042 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4043 	 * which we know need special treatment.  Type 2 devices are
4044 	 * cardbus bridges which also require special treatment.
4045 	 * Other types are unknown, and we err on the side of safety
4046 	 * by ignoring them.
4047 	 */
4048 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4049 		return;
4050 
4051 	/*
4052 	 * Restore the device to full power mode.  We must do this
4053 	 * before we restore the registers because moving from D3 to
4054 	 * D0 will cause the chip's BARs and some other registers to
4055 	 * be reset to some unknown power on reset values.  Cut down
4056 	 * the noise on boot by doing nothing if we are already in
4057 	 * state D0.
4058 	 */
4059 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4060 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4061 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4062 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
4063 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
4064 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4065 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4066 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4067 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4068 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4069 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4070 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4071 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4072 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4073 
4074 	/* Restore MSI and MSI-X configurations if they are present. */
4075 	if (dinfo->cfg.msi.msi_location != 0)
4076 		pci_resume_msi(dev);
4077 	if (dinfo->cfg.msix.msix_location != 0)
4078 		pci_resume_msix(dev);
4079 }
4080 
4081 void
4082 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4083 {
4084 	int i;
4085 	uint32_t cls;
4086 	int ps;
4087 
4088 	/*
4089 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4090 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4091 	 * which also require special treatment.  Other types are unknown, and
4092 	 * we err on the side of safety by ignoring them.  Powering down
4093 	 * bridges should not be undertaken lightly.
4094 	 */
4095 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4096 		return;
4097 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4098 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
4099 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
4100 
4101 	/*
4102 	 * Some drivers apparently write to these registers w/o updating our
4103 	 * cached copy.  No harm happens if we update the copy, so do so here
4104 	 * so we can restore them.  The COMMAND register is modified by the
4105 	 * bus w/o updating the cache.  This should represent the normally
4106 	 * writable portion of the 'defined' part of type 0 headers.  In
4107 	 * theory we also need to save/restore the PCI capability structures
4108 	 * we know about, but apart from power we don't know any that are
4109 	 * writable.
4110 	 */
4111 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4112 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4113 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4114 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4115 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4116 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4117 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4118 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4119 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4120 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4121 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4122 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4123 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4124 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4125 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4126 
4127 	/*
4128 	 * don't set the state for display devices, base peripherals and
4129 	 * memory devices since bad things happen when they are powered down.
4130 	 * We should (a) have drivers that can easily detach and (b) use
4131 	 * generic drivers for these devices so that some device actually
4132 	 * attaches.  We need to make sure that when we implement (a) we don't
4133 	 * power the device down on a reattach.
4134 	 */
4135 	cls = pci_get_class(dev);
4136 	if (!setstate)
4137 		return;
4138 	switch (pci_do_power_nodriver)
4139 	{
4140 		case 0:		/* NO powerdown at all */
4141 			return;
4142 		case 1:		/* Conservative about what to power down */
4143 			if (cls == PCIC_STORAGE)
4144 				return;
4145 			/*FALLTHROUGH*/
4146 		case 2:		/* Agressive about what to power down */
4147 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4148 			    cls == PCIC_BASEPERIPH)
4149 				return;
4150 			/*FALLTHROUGH*/
4151 		case 3:		/* Power down everything */
4152 			break;
4153 	}
4154 	/*
4155 	 * PCI spec says we can only go into D3 state from D0 state.
4156 	 * Transition from D[12] into D0 before going to D3 state.
4157 	 */
4158 	ps = pci_get_powerstate(dev);
4159 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4160 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4161 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4162 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4163 }
4164