xref: /freebsd/sys/dev/pci/pci.c (revision 01ded8b942effbbb4d9225c4227f264e499e9698)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 
56 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57 #include <machine/intr_machdep.h>
58 #endif
59 
60 #include <sys/pciio.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/pci/pci_private.h>
64 
65 #include <dev/usb/controller/ehcireg.h>
66 #include <dev/usb/controller/ohcireg.h>
67 #include <dev/usb/controller/uhcireg.h>
68 
69 #include "pcib_if.h"
70 #include "pci_if.h"
71 
72 static pci_addr_t	pci_mapbase(uint64_t mapreg);
73 static const char	*pci_maptype(uint64_t mapreg);
74 static int		pci_mapsize(uint64_t testval);
75 static int		pci_maprange(uint64_t mapreg);
76 static pci_addr_t	pci_rombase(uint64_t mapreg);
77 static int		pci_romsize(uint64_t testval);
78 static void		pci_fixancient(pcicfgregs *cfg);
79 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
80 
81 static int		pci_porten(device_t dev);
82 static int		pci_memen(device_t dev);
83 static void		pci_assign_interrupt(device_t bus, device_t dev,
84 			    int force_route);
85 static int		pci_add_map(device_t bus, device_t dev, int reg,
86 			    struct resource_list *rl, int force, int prefetch);
87 static int		pci_probe(device_t dev);
88 static int		pci_attach(device_t dev);
89 static void		pci_load_vendor_data(void);
90 static int		pci_describe_parse_line(char **ptr, int *vendor,
91 			    int *device, char **desc);
92 static char		*pci_describe_device(device_t dev);
93 static int		pci_modevent(module_t mod, int what, void *arg);
94 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
95 			    pcicfgregs *cfg);
96 static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
97 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
98 			    int reg, uint32_t *data);
99 #if 0
100 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
101 			    int reg, uint32_t data);
102 #endif
103 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
104 static void		pci_disable_msi(device_t dev);
105 static void		pci_enable_msi(device_t dev, uint64_t address,
106 			    uint16_t data);
107 static void		pci_enable_msix(device_t dev, u_int index,
108 			    uint64_t address, uint32_t data);
109 static void		pci_mask_msix(device_t dev, u_int index);
110 static void		pci_unmask_msix(device_t dev, u_int index);
111 static int		pci_msi_blacklisted(void);
112 static void		pci_resume_msi(device_t dev);
113 static void		pci_resume_msix(device_t dev);
114 static int		pci_remap_intr_method(device_t bus, device_t dev,
115 			    u_int irq);
116 
117 static device_method_t pci_methods[] = {
118 	/* Device interface */
119 	DEVMETHOD(device_probe,		pci_probe),
120 	DEVMETHOD(device_attach,	pci_attach),
121 	DEVMETHOD(device_detach,	bus_generic_detach),
122 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
123 	DEVMETHOD(device_suspend,	pci_suspend),
124 	DEVMETHOD(device_resume,	pci_resume),
125 
126 	/* Bus interface */
127 	DEVMETHOD(bus_print_child,	pci_print_child),
128 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
129 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
130 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
131 	DEVMETHOD(bus_driver_added,	pci_driver_added),
132 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
133 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
134 
135 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
136 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
137 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
138 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
139 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
140 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
141 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
142 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
143 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
144 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
145 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
146 
147 	/* PCI interface */
148 	DEVMETHOD(pci_read_config,	pci_read_config_method),
149 	DEVMETHOD(pci_write_config,	pci_write_config_method),
150 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
151 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
152 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
153 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
154 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
155 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
156 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
157 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
158 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
159 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
160 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
161 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
162 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
163 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
164 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
165 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
166 
167 	{ 0, 0 }
168 };
169 
170 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
171 
172 static devclass_t pci_devclass;
173 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
174 MODULE_VERSION(pci, 1);
175 
176 static char	*pci_vendordata;
177 static size_t	pci_vendordata_size;
178 
179 
180 struct pci_quirk {
181 	uint32_t devid;	/* Vendor/device of the card */
182 	int	type;
183 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
184 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
185 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
186 	int	arg1;
187 	int	arg2;
188 };
189 
190 struct pci_quirk pci_quirks[] = {
191 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
192 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
193 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
194 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
195 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
196 
197 	/*
198 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
199 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
200 	 */
201 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
202 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
203 
204 	/*
205 	 * MSI doesn't work on earlier Intel chipsets including
206 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
207 	 */
208 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
210 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
211 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
212 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
213 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
214 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
215 
216 	/*
217 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
218 	 * bridge.
219 	 */
220 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
221 
222 	/*
223 	 * Some virtualization environments emulate an older chipset
224 	 * but support MSI just fine.  QEMU uses the Intel 82440.
225 	 */
226 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
227 
228 	{ 0 }
229 };
230 
231 /* map register information */
232 #define	PCI_MAPMEM	0x01	/* memory map */
233 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
234 #define	PCI_MAPPORT	0x04	/* port map */
235 
236 struct devlist pci_devq;
237 uint32_t pci_generation;
238 uint32_t pci_numdevs = 0;
239 static int pcie_chipset, pcix_chipset;
240 
241 /* sysctl vars */
242 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
243 
244 static int pci_enable_io_modes = 1;
245 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
246 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
247     &pci_enable_io_modes, 1,
248     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
249 enable these bits correctly.  We'd like to do this all the time, but there\n\
250 are some peripherals that this causes problems with.");
251 
252 static int pci_do_power_nodriver = 0;
253 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
254 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
255     &pci_do_power_nodriver, 0,
256   "Place a function into D3 state when no driver attaches to it.  0 means\n\
257 disable.  1 means conservatively place devices into D3 state.  2 means\n\
258 agressively place devices into D3 state.  3 means put absolutely everything\n\
259 in D3 state.");
260 
261 int pci_do_power_resume = 1;
262 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
263 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
264     &pci_do_power_resume, 1,
265   "Transition from D3 -> D0 on resume.");
266 
267 int pci_do_power_suspend = 1;
268 TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
269 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
270     &pci_do_power_suspend, 1,
271   "Transition from D0 -> D3 on suspend.");
272 
273 static int pci_do_msi = 1;
274 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
275 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
276     "Enable support for MSI interrupts");
277 
278 static int pci_do_msix = 1;
279 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
280 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
281     "Enable support for MSI-X interrupts");
282 
283 static int pci_honor_msi_blacklist = 1;
284 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
285 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
286     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
287 
288 #if defined(__i386__) || defined(__amd64__)
289 static int pci_usb_takeover = 1;
290 #else
291 static int pci_usb_takeover = 0;
292 #endif
293 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
294 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RD | CTLFLAG_TUN,
295     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
296 Disable this if you depend on BIOS emulation of USB devices, that is\n\
297 you use USB devices (like keyboard or mouse) but do not load USB drivers");
298 
299 /* Find a device_t by bus/slot/function in domain 0 */
300 
301 device_t
302 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
303 {
304 
305 	return (pci_find_dbsf(0, bus, slot, func));
306 }
307 
308 /* Find a device_t by domain/bus/slot/function */
309 
310 device_t
311 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
312 {
313 	struct pci_devinfo *dinfo;
314 
315 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
316 		if ((dinfo->cfg.domain == domain) &&
317 		    (dinfo->cfg.bus == bus) &&
318 		    (dinfo->cfg.slot == slot) &&
319 		    (dinfo->cfg.func == func)) {
320 			return (dinfo->cfg.dev);
321 		}
322 	}
323 
324 	return (NULL);
325 }
326 
327 /* Find a device_t by vendor/device ID */
328 
329 device_t
330 pci_find_device(uint16_t vendor, uint16_t device)
331 {
332 	struct pci_devinfo *dinfo;
333 
334 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
335 		if ((dinfo->cfg.vendor == vendor) &&
336 		    (dinfo->cfg.device == device)) {
337 			return (dinfo->cfg.dev);
338 		}
339 	}
340 
341 	return (NULL);
342 }
343 
344 static int
345 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
346 {
347 	va_list ap;
348 	int retval;
349 
350 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
351 	    cfg->func);
352 	va_start(ap, fmt);
353 	retval += vprintf(fmt, ap);
354 	va_end(ap);
355 	return (retval);
356 }
357 
358 /* return base address of memory or port map */
359 
360 static pci_addr_t
361 pci_mapbase(uint64_t mapreg)
362 {
363 
364 	if (PCI_BAR_MEM(mapreg))
365 		return (mapreg & PCIM_BAR_MEM_BASE);
366 	else
367 		return (mapreg & PCIM_BAR_IO_BASE);
368 }
369 
370 /* return map type of memory or port map */
371 
372 static const char *
373 pci_maptype(uint64_t mapreg)
374 {
375 
376 	if (PCI_BAR_IO(mapreg))
377 		return ("I/O Port");
378 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
379 		return ("Prefetchable Memory");
380 	return ("Memory");
381 }
382 
383 /* return log2 of map size decoded for memory or port map */
384 
385 static int
386 pci_mapsize(uint64_t testval)
387 {
388 	int ln2size;
389 
390 	testval = pci_mapbase(testval);
391 	ln2size = 0;
392 	if (testval != 0) {
393 		while ((testval & 1) == 0)
394 		{
395 			ln2size++;
396 			testval >>= 1;
397 		}
398 	}
399 	return (ln2size);
400 }
401 
402 /* return base address of device ROM */
403 
404 static pci_addr_t
405 pci_rombase(uint64_t mapreg)
406 {
407 
408 	return (mapreg & PCIM_BIOS_ADDR_MASK);
409 }
410 
411 /* return log2 of map size decided for device ROM */
412 
413 static int
414 pci_romsize(uint64_t testval)
415 {
416 	int ln2size;
417 
418 	testval = pci_rombase(testval);
419 	ln2size = 0;
420 	if (testval != 0) {
421 		while ((testval & 1) == 0)
422 		{
423 			ln2size++;
424 			testval >>= 1;
425 		}
426 	}
427 	return (ln2size);
428 }
429 
430 /* return log2 of address range supported by map register */
431 
432 static int
433 pci_maprange(uint64_t mapreg)
434 {
435 	int ln2range = 0;
436 
437 	if (PCI_BAR_IO(mapreg))
438 		ln2range = 32;
439 	else
440 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
441 		case PCIM_BAR_MEM_32:
442 			ln2range = 32;
443 			break;
444 		case PCIM_BAR_MEM_1MB:
445 			ln2range = 20;
446 			break;
447 		case PCIM_BAR_MEM_64:
448 			ln2range = 64;
449 			break;
450 		}
451 	return (ln2range);
452 }
453 
454 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
455 
456 static void
457 pci_fixancient(pcicfgregs *cfg)
458 {
459 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
460 		return;
461 
462 	/* PCI to PCI bridges use header type 1 */
463 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
464 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
465 }
466 
467 /* extract header type specific config data */
468 
469 static void
470 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
471 {
472 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
473 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
474 	case PCIM_HDRTYPE_NORMAL:
475 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
476 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
477 		cfg->nummaps	    = PCI_MAXMAPS_0;
478 		break;
479 	case PCIM_HDRTYPE_BRIDGE:
480 		cfg->nummaps	    = PCI_MAXMAPS_1;
481 		break;
482 	case PCIM_HDRTYPE_CARDBUS:
483 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
484 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
485 		cfg->nummaps	    = PCI_MAXMAPS_2;
486 		break;
487 	}
488 #undef REG
489 }
490 
491 /* read configuration header into pcicfgregs structure */
492 struct pci_devinfo *
493 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
494 {
495 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
496 	pcicfgregs *cfg = NULL;
497 	struct pci_devinfo *devlist_entry;
498 	struct devlist *devlist_head;
499 
500 	devlist_head = &pci_devq;
501 
502 	devlist_entry = NULL;
503 
504 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
505 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
506 		if (devlist_entry == NULL)
507 			return (NULL);
508 
509 		cfg = &devlist_entry->cfg;
510 
511 		cfg->domain		= d;
512 		cfg->bus		= b;
513 		cfg->slot		= s;
514 		cfg->func		= f;
515 		cfg->vendor		= REG(PCIR_VENDOR, 2);
516 		cfg->device		= REG(PCIR_DEVICE, 2);
517 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
518 		cfg->statreg		= REG(PCIR_STATUS, 2);
519 		cfg->baseclass		= REG(PCIR_CLASS, 1);
520 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
521 		cfg->progif		= REG(PCIR_PROGIF, 1);
522 		cfg->revid		= REG(PCIR_REVID, 1);
523 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
524 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
525 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
526 		cfg->intpin		= REG(PCIR_INTPIN, 1);
527 		cfg->intline		= REG(PCIR_INTLINE, 1);
528 
529 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
530 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
531 
532 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
533 		cfg->hdrtype		&= ~PCIM_MFDEV;
534 
535 		pci_fixancient(cfg);
536 		pci_hdrtypedata(pcib, b, s, f, cfg);
537 
538 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
539 			pci_read_extcap(pcib, cfg);
540 
541 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
542 
543 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
544 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
545 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
546 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
547 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
548 
549 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
550 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
551 		devlist_entry->conf.pc_vendor = cfg->vendor;
552 		devlist_entry->conf.pc_device = cfg->device;
553 
554 		devlist_entry->conf.pc_class = cfg->baseclass;
555 		devlist_entry->conf.pc_subclass = cfg->subclass;
556 		devlist_entry->conf.pc_progif = cfg->progif;
557 		devlist_entry->conf.pc_revid = cfg->revid;
558 
559 		pci_numdevs++;
560 		pci_generation++;
561 	}
562 	return (devlist_entry);
563 #undef REG
564 }
565 
566 static void
567 pci_read_extcap(device_t pcib, pcicfgregs *cfg)
568 {
569 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
570 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
571 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
572 	uint64_t addr;
573 #endif
574 	uint32_t val;
575 	int	ptr, nextptr, ptrptr;
576 
577 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
578 	case PCIM_HDRTYPE_NORMAL:
579 	case PCIM_HDRTYPE_BRIDGE:
580 		ptrptr = PCIR_CAP_PTR;
581 		break;
582 	case PCIM_HDRTYPE_CARDBUS:
583 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
584 		break;
585 	default:
586 		return;		/* no extended capabilities support */
587 	}
588 	nextptr = REG(ptrptr, 1);	/* sanity check? */
589 
590 	/*
591 	 * Read capability entries.
592 	 */
593 	while (nextptr != 0) {
594 		/* Sanity check */
595 		if (nextptr > 255) {
596 			printf("illegal PCI extended capability offset %d\n",
597 			    nextptr);
598 			return;
599 		}
600 		/* Find the next entry */
601 		ptr = nextptr;
602 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
603 
604 		/* Process this entry */
605 		switch (REG(ptr + PCICAP_ID, 1)) {
606 		case PCIY_PMG:		/* PCI power management */
607 			if (cfg->pp.pp_cap == 0) {
608 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
609 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
610 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
611 				if ((nextptr - ptr) > PCIR_POWER_DATA)
612 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
613 			}
614 			break;
615 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
616 		case PCIY_HT:		/* HyperTransport */
617 			/* Determine HT-specific capability type. */
618 			val = REG(ptr + PCIR_HT_COMMAND, 2);
619 			switch (val & PCIM_HTCMD_CAP_MASK) {
620 			case PCIM_HTCAP_MSI_MAPPING:
621 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
622 					/* Sanity check the mapping window. */
623 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
624 					    4);
625 					addr <<= 32;
626 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
627 					    4);
628 					if (addr != MSI_INTEL_ADDR_BASE)
629 						device_printf(pcib,
630 	    "HT Bridge at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
631 						    cfg->domain, cfg->bus,
632 						    cfg->slot, cfg->func,
633 						    (long long)addr);
634 				} else
635 					addr = MSI_INTEL_ADDR_BASE;
636 
637 				cfg->ht.ht_msimap = ptr;
638 				cfg->ht.ht_msictrl = val;
639 				cfg->ht.ht_msiaddr = addr;
640 				break;
641 			}
642 			break;
643 #endif
644 		case PCIY_MSI:		/* PCI MSI */
645 			cfg->msi.msi_location = ptr;
646 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
647 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
648 						     PCIM_MSICTRL_MMC_MASK)>>1);
649 			break;
650 		case PCIY_MSIX:		/* PCI MSI-X */
651 			cfg->msix.msix_location = ptr;
652 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
653 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
654 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
655 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
656 			cfg->msix.msix_table_bar = PCIR_BAR(val &
657 			    PCIM_MSIX_BIR_MASK);
658 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
659 			val = REG(ptr + PCIR_MSIX_PBA, 4);
660 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
661 			    PCIM_MSIX_BIR_MASK);
662 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
663 			break;
664 		case PCIY_VPD:		/* PCI Vital Product Data */
665 			cfg->vpd.vpd_reg = ptr;
666 			break;
667 		case PCIY_SUBVENDOR:
668 			/* Should always be true. */
669 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
670 			    PCIM_HDRTYPE_BRIDGE) {
671 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
672 				cfg->subvendor = val & 0xffff;
673 				cfg->subdevice = val >> 16;
674 			}
675 			break;
676 		case PCIY_PCIX:		/* PCI-X */
677 			/*
678 			 * Assume we have a PCI-X chipset if we have
679 			 * at least one PCI-PCI bridge with a PCI-X
680 			 * capability.  Note that some systems with
681 			 * PCI-express or HT chipsets might match on
682 			 * this check as well.
683 			 */
684 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
685 			    PCIM_HDRTYPE_BRIDGE)
686 				pcix_chipset = 1;
687 			break;
688 		case PCIY_EXPRESS:	/* PCI-express */
689 			/*
690 			 * Assume we have a PCI-express chipset if we have
691 			 * at least one PCI-express device.
692 			 */
693 			pcie_chipset = 1;
694 			break;
695 		default:
696 			break;
697 		}
698 	}
699 /* REG and WREG use carry through to next functions */
700 }
701 
702 /*
703  * PCI Vital Product Data
704  */
705 
706 #define	PCI_VPD_TIMEOUT		1000000
707 
708 static int
709 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
710 {
711 	int count = PCI_VPD_TIMEOUT;
712 
713 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
714 
715 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
716 
717 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
718 		if (--count < 0)
719 			return (ENXIO);
720 		DELAY(1);	/* limit looping */
721 	}
722 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
723 
724 	return (0);
725 }
726 
727 #if 0
728 static int
729 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
730 {
731 	int count = PCI_VPD_TIMEOUT;
732 
733 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
734 
735 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
736 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
737 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
738 		if (--count < 0)
739 			return (ENXIO);
740 		DELAY(1);	/* limit looping */
741 	}
742 
743 	return (0);
744 }
745 #endif
746 
747 #undef PCI_VPD_TIMEOUT
748 
749 struct vpd_readstate {
750 	device_t	pcib;
751 	pcicfgregs	*cfg;
752 	uint32_t	val;
753 	int		bytesinval;
754 	int		off;
755 	uint8_t		cksum;
756 };
757 
758 static int
759 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
760 {
761 	uint32_t reg;
762 	uint8_t byte;
763 
764 	if (vrs->bytesinval == 0) {
765 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
766 			return (ENXIO);
767 		vrs->val = le32toh(reg);
768 		vrs->off += 4;
769 		byte = vrs->val & 0xff;
770 		vrs->bytesinval = 3;
771 	} else {
772 		vrs->val = vrs->val >> 8;
773 		byte = vrs->val & 0xff;
774 		vrs->bytesinval--;
775 	}
776 
777 	vrs->cksum += byte;
778 	*data = byte;
779 	return (0);
780 }
781 
782 static void
783 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
784 {
785 	struct vpd_readstate vrs;
786 	int state;
787 	int name;
788 	int remain;
789 	int i;
790 	int alloc, off;		/* alloc/off for RO/W arrays */
791 	int cksumvalid;
792 	int dflen;
793 	uint8_t byte;
794 	uint8_t byte2;
795 
796 	/* init vpd reader */
797 	vrs.bytesinval = 0;
798 	vrs.off = 0;
799 	vrs.pcib = pcib;
800 	vrs.cfg = cfg;
801 	vrs.cksum = 0;
802 
803 	state = 0;
804 	name = remain = i = 0;	/* shut up stupid gcc */
805 	alloc = off = 0;	/* shut up stupid gcc */
806 	dflen = 0;		/* shut up stupid gcc */
807 	cksumvalid = -1;
808 	while (state >= 0) {
809 		if (vpd_nextbyte(&vrs, &byte)) {
810 			state = -2;
811 			break;
812 		}
813 #if 0
814 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
815 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
816 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
817 #endif
818 		switch (state) {
819 		case 0:		/* item name */
820 			if (byte & 0x80) {
821 				if (vpd_nextbyte(&vrs, &byte2)) {
822 					state = -2;
823 					break;
824 				}
825 				remain = byte2;
826 				if (vpd_nextbyte(&vrs, &byte2)) {
827 					state = -2;
828 					break;
829 				}
830 				remain |= byte2 << 8;
831 				if (remain > (0x7f*4 - vrs.off)) {
832 					state = -1;
833 					printf(
834 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
835 					    cfg->domain, cfg->bus, cfg->slot,
836 					    cfg->func, remain);
837 				}
838 				name = byte & 0x7f;
839 			} else {
840 				remain = byte & 0x7;
841 				name = (byte >> 3) & 0xf;
842 			}
843 			switch (name) {
844 			case 0x2:	/* String */
845 				cfg->vpd.vpd_ident = malloc(remain + 1,
846 				    M_DEVBUF, M_WAITOK);
847 				i = 0;
848 				state = 1;
849 				break;
850 			case 0xf:	/* End */
851 				state = -1;
852 				break;
853 			case 0x10:	/* VPD-R */
854 				alloc = 8;
855 				off = 0;
856 				cfg->vpd.vpd_ros = malloc(alloc *
857 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
858 				    M_WAITOK | M_ZERO);
859 				state = 2;
860 				break;
861 			case 0x11:	/* VPD-W */
862 				alloc = 8;
863 				off = 0;
864 				cfg->vpd.vpd_w = malloc(alloc *
865 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
866 				    M_WAITOK | M_ZERO);
867 				state = 5;
868 				break;
869 			default:	/* Invalid data, abort */
870 				state = -1;
871 				break;
872 			}
873 			break;
874 
875 		case 1:	/* Identifier String */
876 			cfg->vpd.vpd_ident[i++] = byte;
877 			remain--;
878 			if (remain == 0)  {
879 				cfg->vpd.vpd_ident[i] = '\0';
880 				state = 0;
881 			}
882 			break;
883 
884 		case 2:	/* VPD-R Keyword Header */
885 			if (off == alloc) {
886 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
887 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
888 				    M_DEVBUF, M_WAITOK | M_ZERO);
889 			}
890 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
891 			if (vpd_nextbyte(&vrs, &byte2)) {
892 				state = -2;
893 				break;
894 			}
895 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
896 			if (vpd_nextbyte(&vrs, &byte2)) {
897 				state = -2;
898 				break;
899 			}
900 			dflen = byte2;
901 			if (dflen == 0 &&
902 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
903 			    2) == 0) {
904 				/*
905 				 * if this happens, we can't trust the rest
906 				 * of the VPD.
907 				 */
908 				printf(
909 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
910 				    cfg->domain, cfg->bus, cfg->slot,
911 				    cfg->func, dflen);
912 				cksumvalid = 0;
913 				state = -1;
914 				break;
915 			} else if (dflen == 0) {
916 				cfg->vpd.vpd_ros[off].value = malloc(1 *
917 				    sizeof(*cfg->vpd.vpd_ros[off].value),
918 				    M_DEVBUF, M_WAITOK);
919 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
920 			} else
921 				cfg->vpd.vpd_ros[off].value = malloc(
922 				    (dflen + 1) *
923 				    sizeof(*cfg->vpd.vpd_ros[off].value),
924 				    M_DEVBUF, M_WAITOK);
925 			remain -= 3;
926 			i = 0;
927 			/* keep in sync w/ state 3's transistions */
928 			if (dflen == 0 && remain == 0)
929 				state = 0;
930 			else if (dflen == 0)
931 				state = 2;
932 			else
933 				state = 3;
934 			break;
935 
936 		case 3:	/* VPD-R Keyword Value */
937 			cfg->vpd.vpd_ros[off].value[i++] = byte;
938 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
939 			    "RV", 2) == 0 && cksumvalid == -1) {
940 				if (vrs.cksum == 0)
941 					cksumvalid = 1;
942 				else {
943 					if (bootverbose)
944 						printf(
945 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
946 						    cfg->domain, cfg->bus,
947 						    cfg->slot, cfg->func,
948 						    vrs.cksum);
949 					cksumvalid = 0;
950 					state = -1;
951 					break;
952 				}
953 			}
954 			dflen--;
955 			remain--;
956 			/* keep in sync w/ state 2's transistions */
957 			if (dflen == 0)
958 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
959 			if (dflen == 0 && remain == 0) {
960 				cfg->vpd.vpd_rocnt = off;
961 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
962 				    off * sizeof(*cfg->vpd.vpd_ros),
963 				    M_DEVBUF, M_WAITOK | M_ZERO);
964 				state = 0;
965 			} else if (dflen == 0)
966 				state = 2;
967 			break;
968 
969 		case 4:
970 			remain--;
971 			if (remain == 0)
972 				state = 0;
973 			break;
974 
975 		case 5:	/* VPD-W Keyword Header */
976 			if (off == alloc) {
977 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
978 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
979 				    M_DEVBUF, M_WAITOK | M_ZERO);
980 			}
981 			cfg->vpd.vpd_w[off].keyword[0] = byte;
982 			if (vpd_nextbyte(&vrs, &byte2)) {
983 				state = -2;
984 				break;
985 			}
986 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
987 			if (vpd_nextbyte(&vrs, &byte2)) {
988 				state = -2;
989 				break;
990 			}
991 			cfg->vpd.vpd_w[off].len = dflen = byte2;
992 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
993 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
994 			    sizeof(*cfg->vpd.vpd_w[off].value),
995 			    M_DEVBUF, M_WAITOK);
996 			remain -= 3;
997 			i = 0;
998 			/* keep in sync w/ state 6's transistions */
999 			if (dflen == 0 && remain == 0)
1000 				state = 0;
1001 			else if (dflen == 0)
1002 				state = 5;
1003 			else
1004 				state = 6;
1005 			break;
1006 
1007 		case 6:	/* VPD-W Keyword Value */
1008 			cfg->vpd.vpd_w[off].value[i++] = byte;
1009 			dflen--;
1010 			remain--;
1011 			/* keep in sync w/ state 5's transistions */
1012 			if (dflen == 0)
1013 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1014 			if (dflen == 0 && remain == 0) {
1015 				cfg->vpd.vpd_wcnt = off;
1016 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1017 				    off * sizeof(*cfg->vpd.vpd_w),
1018 				    M_DEVBUF, M_WAITOK | M_ZERO);
1019 				state = 0;
1020 			} else if (dflen == 0)
1021 				state = 5;
1022 			break;
1023 
1024 		default:
1025 			printf("pci%d:%d:%d:%d: invalid state: %d\n",
1026 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1027 			    state);
1028 			state = -1;
1029 			break;
1030 		}
1031 	}
1032 
1033 	if (cksumvalid == 0 || state < -1) {
1034 		/* read-only data bad, clean up */
1035 		if (cfg->vpd.vpd_ros != NULL) {
1036 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1037 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1038 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1039 			cfg->vpd.vpd_ros = NULL;
1040 		}
1041 	}
1042 	if (state < -1) {
1043 		/* I/O error, clean up */
1044 		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1045 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1046 		if (cfg->vpd.vpd_ident != NULL) {
1047 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1048 			cfg->vpd.vpd_ident = NULL;
1049 		}
1050 		if (cfg->vpd.vpd_w != NULL) {
1051 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1052 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1053 			free(cfg->vpd.vpd_w, M_DEVBUF);
1054 			cfg->vpd.vpd_w = NULL;
1055 		}
1056 	}
1057 	cfg->vpd.vpd_cached = 1;
1058 #undef REG
1059 #undef WREG
1060 }
1061 
1062 int
1063 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1064 {
1065 	struct pci_devinfo *dinfo = device_get_ivars(child);
1066 	pcicfgregs *cfg = &dinfo->cfg;
1067 
1068 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1069 		pci_read_vpd(device_get_parent(dev), cfg);
1070 
1071 	*identptr = cfg->vpd.vpd_ident;
1072 
1073 	if (*identptr == NULL)
1074 		return (ENXIO);
1075 
1076 	return (0);
1077 }
1078 
1079 int
1080 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1081 	const char **vptr)
1082 {
1083 	struct pci_devinfo *dinfo = device_get_ivars(child);
1084 	pcicfgregs *cfg = &dinfo->cfg;
1085 	int i;
1086 
1087 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1088 		pci_read_vpd(device_get_parent(dev), cfg);
1089 
1090 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1091 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1092 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1093 			*vptr = cfg->vpd.vpd_ros[i].value;
1094 		}
1095 
1096 	if (i != cfg->vpd.vpd_rocnt)
1097 		return (0);
1098 
1099 	*vptr = NULL;
1100 	return (ENXIO);
1101 }
1102 
1103 /*
1104  * Find the requested extended capability and return the offset in
1105  * configuration space via the pointer provided. The function returns
1106  * 0 on success and error code otherwise.
1107  */
1108 int
1109 pci_find_extcap_method(device_t dev, device_t child, int capability,
1110     int *capreg)
1111 {
1112 	struct pci_devinfo *dinfo = device_get_ivars(child);
1113 	pcicfgregs *cfg = &dinfo->cfg;
1114 	u_int32_t status;
1115 	u_int8_t ptr;
1116 
1117 	/*
1118 	 * Check the CAP_LIST bit of the PCI status register first.
1119 	 */
1120 	status = pci_read_config(child, PCIR_STATUS, 2);
1121 	if (!(status & PCIM_STATUS_CAPPRESENT))
1122 		return (ENXIO);
1123 
1124 	/*
1125 	 * Determine the start pointer of the capabilities list.
1126 	 */
1127 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1128 	case PCIM_HDRTYPE_NORMAL:
1129 	case PCIM_HDRTYPE_BRIDGE:
1130 		ptr = PCIR_CAP_PTR;
1131 		break;
1132 	case PCIM_HDRTYPE_CARDBUS:
1133 		ptr = PCIR_CAP_PTR_2;
1134 		break;
1135 	default:
1136 		/* XXX: panic? */
1137 		return (ENXIO);		/* no extended capabilities support */
1138 	}
1139 	ptr = pci_read_config(child, ptr, 1);
1140 
1141 	/*
1142 	 * Traverse the capabilities list.
1143 	 */
1144 	while (ptr != 0) {
1145 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1146 			if (capreg != NULL)
1147 				*capreg = ptr;
1148 			return (0);
1149 		}
1150 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1151 	}
1152 
1153 	return (ENOENT);
1154 }
1155 
1156 /*
1157  * Support for MSI-X message interrupts.
1158  */
1159 void
1160 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1161 {
1162 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1163 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1164 	uint32_t offset;
1165 
1166 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1167 	offset = msix->msix_table_offset + index * 16;
1168 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1169 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1170 	bus_write_4(msix->msix_table_res, offset + 8, data);
1171 
1172 	/* Enable MSI -> HT mapping. */
1173 	pci_ht_map_msi(dev, address);
1174 }
1175 
1176 void
1177 pci_mask_msix(device_t dev, u_int index)
1178 {
1179 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1180 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1181 	uint32_t offset, val;
1182 
1183 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1184 	offset = msix->msix_table_offset + index * 16 + 12;
1185 	val = bus_read_4(msix->msix_table_res, offset);
1186 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1187 		val |= PCIM_MSIX_VCTRL_MASK;
1188 		bus_write_4(msix->msix_table_res, offset, val);
1189 	}
1190 }
1191 
1192 void
1193 pci_unmask_msix(device_t dev, u_int index)
1194 {
1195 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1196 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1197 	uint32_t offset, val;
1198 
1199 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1200 	offset = msix->msix_table_offset + index * 16 + 12;
1201 	val = bus_read_4(msix->msix_table_res, offset);
1202 	if (val & PCIM_MSIX_VCTRL_MASK) {
1203 		val &= ~PCIM_MSIX_VCTRL_MASK;
1204 		bus_write_4(msix->msix_table_res, offset, val);
1205 	}
1206 }
1207 
1208 int
1209 pci_pending_msix(device_t dev, u_int index)
1210 {
1211 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1212 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1213 	uint32_t offset, bit;
1214 
1215 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1216 	offset = msix->msix_pba_offset + (index / 32) * 4;
1217 	bit = 1 << index % 32;
1218 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1219 }
1220 
1221 /*
1222  * Restore MSI-X registers and table during resume.  If MSI-X is
1223  * enabled then walk the virtual table to restore the actual MSI-X
1224  * table.
1225  */
1226 static void
1227 pci_resume_msix(device_t dev)
1228 {
1229 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1230 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1231 	struct msix_table_entry *mte;
1232 	struct msix_vector *mv;
1233 	int i;
1234 
1235 	if (msix->msix_alloc > 0) {
1236 		/* First, mask all vectors. */
1237 		for (i = 0; i < msix->msix_msgnum; i++)
1238 			pci_mask_msix(dev, i);
1239 
1240 		/* Second, program any messages with at least one handler. */
1241 		for (i = 0; i < msix->msix_table_len; i++) {
1242 			mte = &msix->msix_table[i];
1243 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1244 				continue;
1245 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1246 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1247 			pci_unmask_msix(dev, i);
1248 		}
1249 	}
1250 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1251 	    msix->msix_ctrl, 2);
1252 }
1253 
1254 /*
1255  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1256  * returned in *count.  After this function returns, each message will be
1257  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1258  */
1259 int
1260 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1261 {
1262 	struct pci_devinfo *dinfo = device_get_ivars(child);
1263 	pcicfgregs *cfg = &dinfo->cfg;
1264 	struct resource_list_entry *rle;
1265 	int actual, error, i, irq, max;
1266 
1267 	/* Don't let count == 0 get us into trouble. */
1268 	if (*count == 0)
1269 		return (EINVAL);
1270 
1271 	/* If rid 0 is allocated, then fail. */
1272 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1273 	if (rle != NULL && rle->res != NULL)
1274 		return (ENXIO);
1275 
1276 	/* Already have allocated messages? */
1277 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1278 		return (ENXIO);
1279 
1280 	/* If MSI is blacklisted for this system, fail. */
1281 	if (pci_msi_blacklisted())
1282 		return (ENXIO);
1283 
1284 	/* MSI-X capability present? */
1285 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1286 		return (ENODEV);
1287 
1288 	/* Make sure the appropriate BARs are mapped. */
1289 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1290 	    cfg->msix.msix_table_bar);
1291 	if (rle == NULL || rle->res == NULL ||
1292 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1293 		return (ENXIO);
1294 	cfg->msix.msix_table_res = rle->res;
1295 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1296 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1297 		    cfg->msix.msix_pba_bar);
1298 		if (rle == NULL || rle->res == NULL ||
1299 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1300 			return (ENXIO);
1301 	}
1302 	cfg->msix.msix_pba_res = rle->res;
1303 
1304 	if (bootverbose)
1305 		device_printf(child,
1306 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1307 		    *count, cfg->msix.msix_msgnum);
1308 	max = min(*count, cfg->msix.msix_msgnum);
1309 	for (i = 0; i < max; i++) {
1310 		/* Allocate a message. */
1311 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1312 		if (error)
1313 			break;
1314 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1315 		    irq, 1);
1316 	}
1317 	actual = i;
1318 
1319 	if (bootverbose) {
1320 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1321 		if (actual == 1)
1322 			device_printf(child, "using IRQ %lu for MSI-X\n",
1323 			    rle->start);
1324 		else {
1325 			int run;
1326 
1327 			/*
1328 			 * Be fancy and try to print contiguous runs of
1329 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1330 			 * 'run' is true if we are in a range.
1331 			 */
1332 			device_printf(child, "using IRQs %lu", rle->start);
1333 			irq = rle->start;
1334 			run = 0;
1335 			for (i = 1; i < actual; i++) {
1336 				rle = resource_list_find(&dinfo->resources,
1337 				    SYS_RES_IRQ, i + 1);
1338 
1339 				/* Still in a run? */
1340 				if (rle->start == irq + 1) {
1341 					run = 1;
1342 					irq++;
1343 					continue;
1344 				}
1345 
1346 				/* Finish previous range. */
1347 				if (run) {
1348 					printf("-%d", irq);
1349 					run = 0;
1350 				}
1351 
1352 				/* Start new range. */
1353 				printf(",%lu", rle->start);
1354 				irq = rle->start;
1355 			}
1356 
1357 			/* Unfinished range? */
1358 			if (run)
1359 				printf("-%d", irq);
1360 			printf(" for MSI-X\n");
1361 		}
1362 	}
1363 
1364 	/* Mask all vectors. */
1365 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1366 		pci_mask_msix(child, i);
1367 
1368 	/* Allocate and initialize vector data and virtual table. */
1369 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1370 	    M_DEVBUF, M_WAITOK | M_ZERO);
1371 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1372 	    M_DEVBUF, M_WAITOK | M_ZERO);
1373 	for (i = 0; i < actual; i++) {
1374 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1375 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1376 		cfg->msix.msix_table[i].mte_vector = i + 1;
1377 	}
1378 
1379 	/* Update control register to enable MSI-X. */
1380 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1381 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1382 	    cfg->msix.msix_ctrl, 2);
1383 
1384 	/* Update counts of alloc'd messages. */
1385 	cfg->msix.msix_alloc = actual;
1386 	cfg->msix.msix_table_len = actual;
1387 	*count = actual;
1388 	return (0);
1389 }
1390 
1391 /*
1392  * By default, pci_alloc_msix() will assign the allocated IRQ
1393  * resources consecutively to the first N messages in the MSI-X table.
1394  * However, device drivers may want to use different layouts if they
1395  * either receive fewer messages than they asked for, or they wish to
1396  * populate the MSI-X table sparsely.  This method allows the driver
1397  * to specify what layout it wants.  It must be called after a
1398  * successful pci_alloc_msix() but before any of the associated
1399  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1400  *
1401  * The 'vectors' array contains 'count' message vectors.  The array
1402  * maps directly to the MSI-X table in that index 0 in the array
1403  * specifies the vector for the first message in the MSI-X table, etc.
1404  * The vector value in each array index can either be 0 to indicate
1405  * that no vector should be assigned to a message slot, or it can be a
1406  * number from 1 to N (where N is the count returned from a
1407  * succcessful call to pci_alloc_msix()) to indicate which message
1408  * vector (IRQ) to be used for the corresponding message.
1409  *
1410  * On successful return, each message with a non-zero vector will have
1411  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1412  * 1.  Additionally, if any of the IRQs allocated via the previous
1413  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1414  * will be freed back to the system automatically.
1415  *
1416  * For example, suppose a driver has a MSI-X table with 6 messages and
1417  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1418  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1419  * C.  After the call to pci_alloc_msix(), the device will be setup to
1420  * have an MSI-X table of ABC--- (where - means no vector assigned).
1421  * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1422  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1423  * be freed back to the system.  This device will also have valid
1424  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1425  *
1426  * In any case, the SYS_RES_IRQ rid X will always map to the message
1427  * at MSI-X table index X - 1 and will only be valid if a vector is
1428  * assigned to that table entry.
1429  */
1430 int
1431 pci_remap_msix_method(device_t dev, device_t child, int count,
1432     const u_int *vectors)
1433 {
1434 	struct pci_devinfo *dinfo = device_get_ivars(child);
1435 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1436 	struct resource_list_entry *rle;
1437 	int i, irq, j, *used;
1438 
1439 	/*
1440 	 * Have to have at least one message in the table but the
1441 	 * table can't be bigger than the actual MSI-X table in the
1442 	 * device.
1443 	 */
1444 	if (count == 0 || count > msix->msix_msgnum)
1445 		return (EINVAL);
1446 
1447 	/* Sanity check the vectors. */
1448 	for (i = 0; i < count; i++)
1449 		if (vectors[i] > msix->msix_alloc)
1450 			return (EINVAL);
1451 
1452 	/*
1453 	 * Make sure there aren't any holes in the vectors to be used.
1454 	 * It's a big pain to support it, and it doesn't really make
1455 	 * sense anyway.  Also, at least one vector must be used.
1456 	 */
1457 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1458 	    M_ZERO);
1459 	for (i = 0; i < count; i++)
1460 		if (vectors[i] != 0)
1461 			used[vectors[i] - 1] = 1;
1462 	for (i = 0; i < msix->msix_alloc - 1; i++)
1463 		if (used[i] == 0 && used[i + 1] == 1) {
1464 			free(used, M_DEVBUF);
1465 			return (EINVAL);
1466 		}
1467 	if (used[0] != 1) {
1468 		free(used, M_DEVBUF);
1469 		return (EINVAL);
1470 	}
1471 
1472 	/* Make sure none of the resources are allocated. */
1473 	for (i = 0; i < msix->msix_table_len; i++) {
1474 		if (msix->msix_table[i].mte_vector == 0)
1475 			continue;
1476 		if (msix->msix_table[i].mte_handlers > 0)
1477 			return (EBUSY);
1478 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1479 		KASSERT(rle != NULL, ("missing resource"));
1480 		if (rle->res != NULL)
1481 			return (EBUSY);
1482 	}
1483 
1484 	/* Free the existing resource list entries. */
1485 	for (i = 0; i < msix->msix_table_len; i++) {
1486 		if (msix->msix_table[i].mte_vector == 0)
1487 			continue;
1488 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1489 	}
1490 
1491 	/*
1492 	 * Build the new virtual table keeping track of which vectors are
1493 	 * used.
1494 	 */
1495 	free(msix->msix_table, M_DEVBUF);
1496 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1497 	    M_DEVBUF, M_WAITOK | M_ZERO);
1498 	for (i = 0; i < count; i++)
1499 		msix->msix_table[i].mte_vector = vectors[i];
1500 	msix->msix_table_len = count;
1501 
1502 	/* Free any unused IRQs and resize the vectors array if necessary. */
1503 	j = msix->msix_alloc - 1;
1504 	if (used[j] == 0) {
1505 		struct msix_vector *vec;
1506 
1507 		while (used[j] == 0) {
1508 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1509 			    msix->msix_vectors[j].mv_irq);
1510 			j--;
1511 		}
1512 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1513 		    M_WAITOK);
1514 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1515 		    (j + 1));
1516 		free(msix->msix_vectors, M_DEVBUF);
1517 		msix->msix_vectors = vec;
1518 		msix->msix_alloc = j + 1;
1519 	}
1520 	free(used, M_DEVBUF);
1521 
1522 	/* Map the IRQs onto the rids. */
1523 	for (i = 0; i < count; i++) {
1524 		if (vectors[i] == 0)
1525 			continue;
1526 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1527 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1528 		    irq, 1);
1529 	}
1530 
1531 	if (bootverbose) {
1532 		device_printf(child, "Remapped MSI-X IRQs as: ");
1533 		for (i = 0; i < count; i++) {
1534 			if (i != 0)
1535 				printf(", ");
1536 			if (vectors[i] == 0)
1537 				printf("---");
1538 			else
1539 				printf("%d",
1540 				    msix->msix_vectors[vectors[i]].mv_irq);
1541 		}
1542 		printf("\n");
1543 	}
1544 
1545 	return (0);
1546 }
1547 
1548 static int
1549 pci_release_msix(device_t dev, device_t child)
1550 {
1551 	struct pci_devinfo *dinfo = device_get_ivars(child);
1552 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1553 	struct resource_list_entry *rle;
1554 	int i;
1555 
1556 	/* Do we have any messages to release? */
1557 	if (msix->msix_alloc == 0)
1558 		return (ENODEV);
1559 
1560 	/* Make sure none of the resources are allocated. */
1561 	for (i = 0; i < msix->msix_table_len; i++) {
1562 		if (msix->msix_table[i].mte_vector == 0)
1563 			continue;
1564 		if (msix->msix_table[i].mte_handlers > 0)
1565 			return (EBUSY);
1566 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1567 		KASSERT(rle != NULL, ("missing resource"));
1568 		if (rle->res != NULL)
1569 			return (EBUSY);
1570 	}
1571 
1572 	/* Update control register to disable MSI-X. */
1573 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1574 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1575 	    msix->msix_ctrl, 2);
1576 
1577 	/* Free the resource list entries. */
1578 	for (i = 0; i < msix->msix_table_len; i++) {
1579 		if (msix->msix_table[i].mte_vector == 0)
1580 			continue;
1581 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1582 	}
1583 	free(msix->msix_table, M_DEVBUF);
1584 	msix->msix_table_len = 0;
1585 
1586 	/* Release the IRQs. */
1587 	for (i = 0; i < msix->msix_alloc; i++)
1588 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1589 		    msix->msix_vectors[i].mv_irq);
1590 	free(msix->msix_vectors, M_DEVBUF);
1591 	msix->msix_alloc = 0;
1592 	return (0);
1593 }
1594 
1595 /*
1596  * Return the max supported MSI-X messages this device supports.
1597  * Basically, assuming the MD code can alloc messages, this function
1598  * should return the maximum value that pci_alloc_msix() can return.
1599  * Thus, it is subject to the tunables, etc.
1600  */
1601 int
1602 pci_msix_count_method(device_t dev, device_t child)
1603 {
1604 	struct pci_devinfo *dinfo = device_get_ivars(child);
1605 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1606 
1607 	if (pci_do_msix && msix->msix_location != 0)
1608 		return (msix->msix_msgnum);
1609 	return (0);
1610 }
1611 
1612 /*
1613  * HyperTransport MSI mapping control
1614  */
1615 void
1616 pci_ht_map_msi(device_t dev, uint64_t addr)
1617 {
1618 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1619 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1620 
1621 	if (!ht->ht_msimap)
1622 		return;
1623 
1624 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1625 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1626 		/* Enable MSI -> HT mapping. */
1627 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1628 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1629 		    ht->ht_msictrl, 2);
1630 	}
1631 
1632 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1633 		/* Disable MSI -> HT mapping. */
1634 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1635 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1636 		    ht->ht_msictrl, 2);
1637 	}
1638 }
1639 
1640 int
1641 pci_get_max_read_req(device_t dev)
1642 {
1643 	int cap;
1644 	uint16_t val;
1645 
1646 	if (pci_find_extcap(dev, PCIY_EXPRESS, &cap) != 0)
1647 		return (0);
1648 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1649 	val &= PCIM_EXP_CTL_MAX_READ_REQUEST;
1650 	val >>= 12;
1651 	return (1 << (val + 7));
1652 }
1653 
1654 int
1655 pci_set_max_read_req(device_t dev, int size)
1656 {
1657 	int cap;
1658 	uint16_t val;
1659 
1660 	if (pci_find_extcap(dev, PCIY_EXPRESS, &cap) != 0)
1661 		return (0);
1662 	if (size < 128)
1663 		size = 128;
1664 	if (size > 4096)
1665 		size = 4096;
1666 	size = (1 << (fls(size) - 1));
1667 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1668 	val &= ~PCIM_EXP_CTL_MAX_READ_REQUEST;
1669 	val |= (fls(size) - 8) << 12;
1670 	pci_write_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, val, 2);
1671 	return (size);
1672 }
1673 
1674 /*
1675  * Support for MSI message signalled interrupts.
1676  */
1677 void
1678 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1679 {
1680 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1681 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1682 
1683 	/* Write data and address values. */
1684 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1685 	    address & 0xffffffff, 4);
1686 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1687 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1688 		    address >> 32, 4);
1689 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1690 		    data, 2);
1691 	} else
1692 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1693 		    2);
1694 
1695 	/* Enable MSI in the control register. */
1696 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1697 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1698 	    2);
1699 
1700 	/* Enable MSI -> HT mapping. */
1701 	pci_ht_map_msi(dev, address);
1702 }
1703 
1704 void
1705 pci_disable_msi(device_t dev)
1706 {
1707 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1708 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1709 
1710 	/* Disable MSI -> HT mapping. */
1711 	pci_ht_map_msi(dev, 0);
1712 
1713 	/* Disable MSI in the control register. */
1714 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1715 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1716 	    2);
1717 }
1718 
1719 /*
1720  * Restore MSI registers during resume.  If MSI is enabled then
1721  * restore the data and address registers in addition to the control
1722  * register.
1723  */
1724 static void
1725 pci_resume_msi(device_t dev)
1726 {
1727 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1728 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1729 	uint64_t address;
1730 	uint16_t data;
1731 
1732 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1733 		address = msi->msi_addr;
1734 		data = msi->msi_data;
1735 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1736 		    address & 0xffffffff, 4);
1737 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1738 			pci_write_config(dev, msi->msi_location +
1739 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1740 			pci_write_config(dev, msi->msi_location +
1741 			    PCIR_MSI_DATA_64BIT, data, 2);
1742 		} else
1743 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1744 			    data, 2);
1745 	}
1746 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1747 	    2);
1748 }
1749 
1750 static int
1751 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1752 {
1753 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1754 	pcicfgregs *cfg = &dinfo->cfg;
1755 	struct resource_list_entry *rle;
1756 	struct msix_table_entry *mte;
1757 	struct msix_vector *mv;
1758 	uint64_t addr;
1759 	uint32_t data;
1760 	int error, i, j;
1761 
1762 	/*
1763 	 * Handle MSI first.  We try to find this IRQ among our list
1764 	 * of MSI IRQs.  If we find it, we request updated address and
1765 	 * data registers and apply the results.
1766 	 */
1767 	if (cfg->msi.msi_alloc > 0) {
1768 
1769 		/* If we don't have any active handlers, nothing to do. */
1770 		if (cfg->msi.msi_handlers == 0)
1771 			return (0);
1772 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1773 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1774 			    i + 1);
1775 			if (rle->start == irq) {
1776 				error = PCIB_MAP_MSI(device_get_parent(bus),
1777 				    dev, irq, &addr, &data);
1778 				if (error)
1779 					return (error);
1780 				pci_disable_msi(dev);
1781 				dinfo->cfg.msi.msi_addr = addr;
1782 				dinfo->cfg.msi.msi_data = data;
1783 				pci_enable_msi(dev, addr, data);
1784 				return (0);
1785 			}
1786 		}
1787 		return (ENOENT);
1788 	}
1789 
1790 	/*
1791 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1792 	 * we request the updated mapping info.  If that works, we go
1793 	 * through all the slots that use this IRQ and update them.
1794 	 */
1795 	if (cfg->msix.msix_alloc > 0) {
1796 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1797 			mv = &cfg->msix.msix_vectors[i];
1798 			if (mv->mv_irq == irq) {
1799 				error = PCIB_MAP_MSI(device_get_parent(bus),
1800 				    dev, irq, &addr, &data);
1801 				if (error)
1802 					return (error);
1803 				mv->mv_address = addr;
1804 				mv->mv_data = data;
1805 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1806 					mte = &cfg->msix.msix_table[j];
1807 					if (mte->mte_vector != i + 1)
1808 						continue;
1809 					if (mte->mte_handlers == 0)
1810 						continue;
1811 					pci_mask_msix(dev, j);
1812 					pci_enable_msix(dev, j, addr, data);
1813 					pci_unmask_msix(dev, j);
1814 				}
1815 			}
1816 		}
1817 		return (ENOENT);
1818 	}
1819 
1820 	return (ENOENT);
1821 }
1822 
1823 /*
1824  * Returns true if the specified device is blacklisted because MSI
1825  * doesn't work.
1826  */
1827 int
1828 pci_msi_device_blacklisted(device_t dev)
1829 {
1830 	struct pci_quirk *q;
1831 
1832 	if (!pci_honor_msi_blacklist)
1833 		return (0);
1834 
1835 	for (q = &pci_quirks[0]; q->devid; q++) {
1836 		if (q->devid == pci_get_devid(dev) &&
1837 		    q->type == PCI_QUIRK_DISABLE_MSI)
1838 			return (1);
1839 	}
1840 	return (0);
1841 }
1842 
1843 /*
1844  * Returns true if a specified chipset supports MSI when it is
1845  * emulated hardware in a virtual machine.
1846  */
1847 static int
1848 pci_msi_vm_chipset(device_t dev)
1849 {
1850 	struct pci_quirk *q;
1851 
1852 	for (q = &pci_quirks[0]; q->devid; q++) {
1853 		if (q->devid == pci_get_devid(dev) &&
1854 		    q->type == PCI_QUIRK_ENABLE_MSI_VM)
1855 			return (1);
1856 	}
1857 	return (0);
1858 }
1859 
1860 /*
1861  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1862  * we just check for blacklisted chipsets as represented by the
1863  * host-PCI bridge at device 0:0:0.  In the future, it may become
1864  * necessary to check other system attributes, such as the kenv values
1865  * that give the motherboard manufacturer and model number.
1866  */
1867 static int
1868 pci_msi_blacklisted(void)
1869 {
1870 	device_t dev;
1871 
1872 	if (!pci_honor_msi_blacklist)
1873 		return (0);
1874 
1875 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1876 	if (!(pcie_chipset || pcix_chipset)) {
1877 		if (vm_guest != VM_GUEST_NO) {
1878 			dev = pci_find_bsf(0, 0, 0);
1879 			if (dev != NULL)
1880 				return (pci_msi_vm_chipset(dev) == 0);
1881 		}
1882 		return (1);
1883 	}
1884 
1885 	dev = pci_find_bsf(0, 0, 0);
1886 	if (dev != NULL)
1887 		return (pci_msi_device_blacklisted(dev));
1888 	return (0);
1889 }
1890 
1891 /*
1892  * Attempt to allocate *count MSI messages.  The actual number allocated is
1893  * returned in *count.  After this function returns, each message will be
1894  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1895  */
1896 int
1897 pci_alloc_msi_method(device_t dev, device_t child, int *count)
1898 {
1899 	struct pci_devinfo *dinfo = device_get_ivars(child);
1900 	pcicfgregs *cfg = &dinfo->cfg;
1901 	struct resource_list_entry *rle;
1902 	int actual, error, i, irqs[32];
1903 	uint16_t ctrl;
1904 
1905 	/* Don't let count == 0 get us into trouble. */
1906 	if (*count == 0)
1907 		return (EINVAL);
1908 
1909 	/* If rid 0 is allocated, then fail. */
1910 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1911 	if (rle != NULL && rle->res != NULL)
1912 		return (ENXIO);
1913 
1914 	/* Already have allocated messages? */
1915 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1916 		return (ENXIO);
1917 
1918 	/* If MSI is blacklisted for this system, fail. */
1919 	if (pci_msi_blacklisted())
1920 		return (ENXIO);
1921 
1922 	/* MSI capability present? */
1923 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1924 		return (ENODEV);
1925 
1926 	if (bootverbose)
1927 		device_printf(child,
1928 		    "attempting to allocate %d MSI vectors (%d supported)\n",
1929 		    *count, cfg->msi.msi_msgnum);
1930 
1931 	/* Don't ask for more than the device supports. */
1932 	actual = min(*count, cfg->msi.msi_msgnum);
1933 
1934 	/* Don't ask for more than 32 messages. */
1935 	actual = min(actual, 32);
1936 
1937 	/* MSI requires power of 2 number of messages. */
1938 	if (!powerof2(actual))
1939 		return (EINVAL);
1940 
1941 	for (;;) {
1942 		/* Try to allocate N messages. */
1943 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1944 		    cfg->msi.msi_msgnum, irqs);
1945 		if (error == 0)
1946 			break;
1947 		if (actual == 1)
1948 			return (error);
1949 
1950 		/* Try N / 2. */
1951 		actual >>= 1;
1952 	}
1953 
1954 	/*
1955 	 * We now have N actual messages mapped onto SYS_RES_IRQ
1956 	 * resources in the irqs[] array, so add new resources
1957 	 * starting at rid 1.
1958 	 */
1959 	for (i = 0; i < actual; i++)
1960 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1961 		    irqs[i], irqs[i], 1);
1962 
1963 	if (bootverbose) {
1964 		if (actual == 1)
1965 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1966 		else {
1967 			int run;
1968 
1969 			/*
1970 			 * Be fancy and try to print contiguous runs
1971 			 * of IRQ values as ranges.  'run' is true if
1972 			 * we are in a range.
1973 			 */
1974 			device_printf(child, "using IRQs %d", irqs[0]);
1975 			run = 0;
1976 			for (i = 1; i < actual; i++) {
1977 
1978 				/* Still in a run? */
1979 				if (irqs[i] == irqs[i - 1] + 1) {
1980 					run = 1;
1981 					continue;
1982 				}
1983 
1984 				/* Finish previous range. */
1985 				if (run) {
1986 					printf("-%d", irqs[i - 1]);
1987 					run = 0;
1988 				}
1989 
1990 				/* Start new range. */
1991 				printf(",%d", irqs[i]);
1992 			}
1993 
1994 			/* Unfinished range? */
1995 			if (run)
1996 				printf("-%d", irqs[actual - 1]);
1997 			printf(" for MSI\n");
1998 		}
1999 	}
2000 
2001 	/* Update control register with actual count. */
2002 	ctrl = cfg->msi.msi_ctrl;
2003 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2004 	ctrl |= (ffs(actual) - 1) << 4;
2005 	cfg->msi.msi_ctrl = ctrl;
2006 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2007 
2008 	/* Update counts of alloc'd messages. */
2009 	cfg->msi.msi_alloc = actual;
2010 	cfg->msi.msi_handlers = 0;
2011 	*count = actual;
2012 	return (0);
2013 }
2014 
2015 /* Release the MSI messages associated with this device. */
2016 int
2017 pci_release_msi_method(device_t dev, device_t child)
2018 {
2019 	struct pci_devinfo *dinfo = device_get_ivars(child);
2020 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2021 	struct resource_list_entry *rle;
2022 	int error, i, irqs[32];
2023 
2024 	/* Try MSI-X first. */
2025 	error = pci_release_msix(dev, child);
2026 	if (error != ENODEV)
2027 		return (error);
2028 
2029 	/* Do we have any messages to release? */
2030 	if (msi->msi_alloc == 0)
2031 		return (ENODEV);
2032 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2033 
2034 	/* Make sure none of the resources are allocated. */
2035 	if (msi->msi_handlers > 0)
2036 		return (EBUSY);
2037 	for (i = 0; i < msi->msi_alloc; i++) {
2038 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2039 		KASSERT(rle != NULL, ("missing MSI resource"));
2040 		if (rle->res != NULL)
2041 			return (EBUSY);
2042 		irqs[i] = rle->start;
2043 	}
2044 
2045 	/* Update control register with 0 count. */
2046 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2047 	    ("%s: MSI still enabled", __func__));
2048 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2049 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2050 	    msi->msi_ctrl, 2);
2051 
2052 	/* Release the messages. */
2053 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2054 	for (i = 0; i < msi->msi_alloc; i++)
2055 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2056 
2057 	/* Update alloc count. */
2058 	msi->msi_alloc = 0;
2059 	msi->msi_addr = 0;
2060 	msi->msi_data = 0;
2061 	return (0);
2062 }
2063 
2064 /*
2065  * Return the max supported MSI messages this device supports.
2066  * Basically, assuming the MD code can alloc messages, this function
2067  * should return the maximum value that pci_alloc_msi() can return.
2068  * Thus, it is subject to the tunables, etc.
2069  */
2070 int
2071 pci_msi_count_method(device_t dev, device_t child)
2072 {
2073 	struct pci_devinfo *dinfo = device_get_ivars(child);
2074 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2075 
2076 	if (pci_do_msi && msi->msi_location != 0)
2077 		return (msi->msi_msgnum);
2078 	return (0);
2079 }
2080 
2081 /* free pcicfgregs structure and all depending data structures */
2082 
2083 int
2084 pci_freecfg(struct pci_devinfo *dinfo)
2085 {
2086 	struct devlist *devlist_head;
2087 	int i;
2088 
2089 	devlist_head = &pci_devq;
2090 
2091 	if (dinfo->cfg.vpd.vpd_reg) {
2092 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2093 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2094 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2095 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2096 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2097 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2098 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2099 	}
2100 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2101 	free(dinfo, M_DEVBUF);
2102 
2103 	/* increment the generation count */
2104 	pci_generation++;
2105 
2106 	/* we're losing one device */
2107 	pci_numdevs--;
2108 	return (0);
2109 }
2110 
2111 /*
2112  * PCI power manangement
2113  */
2114 int
2115 pci_set_powerstate_method(device_t dev, device_t child, int state)
2116 {
2117 	struct pci_devinfo *dinfo = device_get_ivars(child);
2118 	pcicfgregs *cfg = &dinfo->cfg;
2119 	uint16_t status;
2120 	int result, oldstate, highest, delay;
2121 
2122 	if (cfg->pp.pp_cap == 0)
2123 		return (EOPNOTSUPP);
2124 
2125 	/*
2126 	 * Optimize a no state change request away.  While it would be OK to
2127 	 * write to the hardware in theory, some devices have shown odd
2128 	 * behavior when going from D3 -> D3.
2129 	 */
2130 	oldstate = pci_get_powerstate(child);
2131 	if (oldstate == state)
2132 		return (0);
2133 
2134 	/*
2135 	 * The PCI power management specification states that after a state
2136 	 * transition between PCI power states, system software must
2137 	 * guarantee a minimal delay before the function accesses the device.
2138 	 * Compute the worst case delay that we need to guarantee before we
2139 	 * access the device.  Many devices will be responsive much more
2140 	 * quickly than this delay, but there are some that don't respond
2141 	 * instantly to state changes.  Transitions to/from D3 state require
2142 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2143 	 * is done below with DELAY rather than a sleeper function because
2144 	 * this function can be called from contexts where we cannot sleep.
2145 	 */
2146 	highest = (oldstate > state) ? oldstate : state;
2147 	if (highest == PCI_POWERSTATE_D3)
2148 	    delay = 10000;
2149 	else if (highest == PCI_POWERSTATE_D2)
2150 	    delay = 200;
2151 	else
2152 	    delay = 0;
2153 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2154 	    & ~PCIM_PSTAT_DMASK;
2155 	result = 0;
2156 	switch (state) {
2157 	case PCI_POWERSTATE_D0:
2158 		status |= PCIM_PSTAT_D0;
2159 		break;
2160 	case PCI_POWERSTATE_D1:
2161 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2162 			return (EOPNOTSUPP);
2163 		status |= PCIM_PSTAT_D1;
2164 		break;
2165 	case PCI_POWERSTATE_D2:
2166 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2167 			return (EOPNOTSUPP);
2168 		status |= PCIM_PSTAT_D2;
2169 		break;
2170 	case PCI_POWERSTATE_D3:
2171 		status |= PCIM_PSTAT_D3;
2172 		break;
2173 	default:
2174 		return (EINVAL);
2175 	}
2176 
2177 	if (bootverbose)
2178 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2179 		    state);
2180 
2181 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2182 	if (delay)
2183 		DELAY(delay);
2184 	return (0);
2185 }
2186 
2187 int
2188 pci_get_powerstate_method(device_t dev, device_t child)
2189 {
2190 	struct pci_devinfo *dinfo = device_get_ivars(child);
2191 	pcicfgregs *cfg = &dinfo->cfg;
2192 	uint16_t status;
2193 	int result;
2194 
2195 	if (cfg->pp.pp_cap != 0) {
2196 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2197 		switch (status & PCIM_PSTAT_DMASK) {
2198 		case PCIM_PSTAT_D0:
2199 			result = PCI_POWERSTATE_D0;
2200 			break;
2201 		case PCIM_PSTAT_D1:
2202 			result = PCI_POWERSTATE_D1;
2203 			break;
2204 		case PCIM_PSTAT_D2:
2205 			result = PCI_POWERSTATE_D2;
2206 			break;
2207 		case PCIM_PSTAT_D3:
2208 			result = PCI_POWERSTATE_D3;
2209 			break;
2210 		default:
2211 			result = PCI_POWERSTATE_UNKNOWN;
2212 			break;
2213 		}
2214 	} else {
2215 		/* No support, device is always at D0 */
2216 		result = PCI_POWERSTATE_D0;
2217 	}
2218 	return (result);
2219 }
2220 
2221 /*
2222  * Some convenience functions for PCI device drivers.
2223  */
2224 
2225 static __inline void
2226 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2227 {
2228 	uint16_t	command;
2229 
2230 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2231 	command |= bit;
2232 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2233 }
2234 
2235 static __inline void
2236 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2237 {
2238 	uint16_t	command;
2239 
2240 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2241 	command &= ~bit;
2242 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2243 }
2244 
2245 int
2246 pci_enable_busmaster_method(device_t dev, device_t child)
2247 {
2248 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2249 	return (0);
2250 }
2251 
2252 int
2253 pci_disable_busmaster_method(device_t dev, device_t child)
2254 {
2255 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2256 	return (0);
2257 }
2258 
2259 int
2260 pci_enable_io_method(device_t dev, device_t child, int space)
2261 {
2262 	uint16_t bit;
2263 
2264 	switch(space) {
2265 	case SYS_RES_IOPORT:
2266 		bit = PCIM_CMD_PORTEN;
2267 		break;
2268 	case SYS_RES_MEMORY:
2269 		bit = PCIM_CMD_MEMEN;
2270 		break;
2271 	default:
2272 		return (EINVAL);
2273 	}
2274 	pci_set_command_bit(dev, child, bit);
2275 	return (0);
2276 }
2277 
2278 int
2279 pci_disable_io_method(device_t dev, device_t child, int space)
2280 {
2281 	uint16_t bit;
2282 
2283 	switch(space) {
2284 	case SYS_RES_IOPORT:
2285 		bit = PCIM_CMD_PORTEN;
2286 		break;
2287 	case SYS_RES_MEMORY:
2288 		bit = PCIM_CMD_MEMEN;
2289 		break;
2290 	default:
2291 		return (EINVAL);
2292 	}
2293 	pci_clear_command_bit(dev, child, bit);
2294 	return (0);
2295 }
2296 
2297 /*
2298  * New style pci driver.  Parent device is either a pci-host-bridge or a
2299  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2300  */
2301 
2302 void
2303 pci_print_verbose(struct pci_devinfo *dinfo)
2304 {
2305 
2306 	if (bootverbose) {
2307 		pcicfgregs *cfg = &dinfo->cfg;
2308 
2309 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2310 		    cfg->vendor, cfg->device, cfg->revid);
2311 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2312 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2313 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2314 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2315 		    cfg->mfdev);
2316 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2317 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2318 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2319 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2320 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2321 		if (cfg->intpin > 0)
2322 			printf("\tintpin=%c, irq=%d\n",
2323 			    cfg->intpin +'a' -1, cfg->intline);
2324 		if (cfg->pp.pp_cap) {
2325 			uint16_t status;
2326 
2327 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2328 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2329 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2330 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2331 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2332 			    status & PCIM_PSTAT_DMASK);
2333 		}
2334 		if (cfg->msi.msi_location) {
2335 			int ctrl;
2336 
2337 			ctrl = cfg->msi.msi_ctrl;
2338 			printf("\tMSI supports %d message%s%s%s\n",
2339 			    cfg->msi.msi_msgnum,
2340 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2341 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2342 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2343 		}
2344 		if (cfg->msix.msix_location) {
2345 			printf("\tMSI-X supports %d message%s ",
2346 			    cfg->msix.msix_msgnum,
2347 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2348 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2349 				printf("in map 0x%x\n",
2350 				    cfg->msix.msix_table_bar);
2351 			else
2352 				printf("in maps 0x%x and 0x%x\n",
2353 				    cfg->msix.msix_table_bar,
2354 				    cfg->msix.msix_pba_bar);
2355 		}
2356 	}
2357 }
2358 
2359 static int
2360 pci_porten(device_t dev)
2361 {
2362 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2363 }
2364 
2365 static int
2366 pci_memen(device_t dev)
2367 {
2368 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2369 }
2370 
2371 static void
2372 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2373 {
2374 	pci_addr_t map, testval;
2375 	int ln2range;
2376 	uint16_t cmd;
2377 
2378 	/*
2379 	 * The device ROM BAR is special.  It is always a 32-bit
2380 	 * memory BAR.  Bit 0 is special and should not be set when
2381 	 * sizing the BAR.
2382 	 */
2383 	if (reg == PCIR_BIOS) {
2384 		map = pci_read_config(dev, reg, 4);
2385 		pci_write_config(dev, reg, 0xfffffffe, 4);
2386 		testval = pci_read_config(dev, reg, 4);
2387 		pci_write_config(dev, reg, map, 4);
2388 		*mapp = map;
2389 		*testvalp = testval;
2390 		return;
2391 	}
2392 
2393 	map = pci_read_config(dev, reg, 4);
2394 	ln2range = pci_maprange(map);
2395 	if (ln2range == 64)
2396 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2397 
2398 	/*
2399 	 * Disable decoding via the command register before
2400 	 * determining the BAR's length since we will be placing it in
2401 	 * a weird state.
2402 	 */
2403 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2404 	pci_write_config(dev, PCIR_COMMAND,
2405 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2406 
2407 	/*
2408 	 * Determine the BAR's length by writing all 1's.  The bottom
2409 	 * log_2(size) bits of the BAR will stick as 0 when we read
2410 	 * the value back.
2411 	 */
2412 	pci_write_config(dev, reg, 0xffffffff, 4);
2413 	testval = pci_read_config(dev, reg, 4);
2414 	if (ln2range == 64) {
2415 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2416 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2417 	}
2418 
2419 	/*
2420 	 * Restore the original value of the BAR.  We may have reprogrammed
2421 	 * the BAR of the low-level console device and when booting verbose,
2422 	 * we need the console device addressable.
2423 	 */
2424 	pci_write_config(dev, reg, map, 4);
2425 	if (ln2range == 64)
2426 		pci_write_config(dev, reg + 4, map >> 32, 4);
2427 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2428 
2429 	*mapp = map;
2430 	*testvalp = testval;
2431 }
2432 
2433 static void
2434 pci_write_bar(device_t dev, int reg, pci_addr_t base)
2435 {
2436 	pci_addr_t map;
2437 	int ln2range;
2438 
2439 	map = pci_read_config(dev, reg, 4);
2440 
2441 	/* The device ROM BAR is always 32-bits. */
2442 	if (reg == PCIR_BIOS)
2443 		return;
2444 	ln2range = pci_maprange(map);
2445 	pci_write_config(dev, reg, base, 4);
2446 	if (ln2range == 64)
2447 		pci_write_config(dev, reg + 4, base >> 32, 4);
2448 }
2449 
2450 /*
2451  * Add a resource based on a pci map register. Return 1 if the map
2452  * register is a 32bit map register or 2 if it is a 64bit register.
2453  */
2454 static int
2455 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2456     int force, int prefetch)
2457 {
2458 	pci_addr_t base, map, testval;
2459 	pci_addr_t start, end, count;
2460 	int barlen, basezero, maprange, mapsize, type;
2461 	uint16_t cmd;
2462 	struct resource *res;
2463 
2464 	pci_read_bar(dev, reg, &map, &testval);
2465 	if (PCI_BAR_MEM(map)) {
2466 		type = SYS_RES_MEMORY;
2467 		if (map & PCIM_BAR_MEM_PREFETCH)
2468 			prefetch = 1;
2469 	} else
2470 		type = SYS_RES_IOPORT;
2471 	mapsize = pci_mapsize(testval);
2472 	base = pci_mapbase(map);
2473 #ifdef __PCI_BAR_ZERO_VALID
2474 	basezero = 0;
2475 #else
2476 	basezero = base == 0;
2477 #endif
2478 	maprange = pci_maprange(map);
2479 	barlen = maprange == 64 ? 2 : 1;
2480 
2481 	/*
2482 	 * For I/O registers, if bottom bit is set, and the next bit up
2483 	 * isn't clear, we know we have a BAR that doesn't conform to the
2484 	 * spec, so ignore it.  Also, sanity check the size of the data
2485 	 * areas to the type of memory involved.  Memory must be at least
2486 	 * 16 bytes in size, while I/O ranges must be at least 4.
2487 	 */
2488 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2489 		return (barlen);
2490 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2491 	    (type == SYS_RES_IOPORT && mapsize < 2))
2492 		return (barlen);
2493 
2494 	if (bootverbose) {
2495 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2496 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2497 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2498 			printf(", port disabled\n");
2499 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2500 			printf(", memory disabled\n");
2501 		else
2502 			printf(", enabled\n");
2503 	}
2504 
2505 	/*
2506 	 * If base is 0, then we have problems if this architecture does
2507 	 * not allow that.  It is best to ignore such entries for the
2508 	 * moment.  These will be allocated later if the driver specifically
2509 	 * requests them.  However, some removable busses look better when
2510 	 * all resources are allocated, so allow '0' to be overriden.
2511 	 *
2512 	 * Similarly treat maps whose values is the same as the test value
2513 	 * read back.  These maps have had all f's written to them by the
2514 	 * BIOS in an attempt to disable the resources.
2515 	 */
2516 	if (!force && (basezero || map == testval))
2517 		return (barlen);
2518 	if ((u_long)base != base) {
2519 		device_printf(bus,
2520 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2521 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2522 		    pci_get_function(dev), reg);
2523 		return (barlen);
2524 	}
2525 
2526 	/*
2527 	 * This code theoretically does the right thing, but has
2528 	 * undesirable side effects in some cases where peripherals
2529 	 * respond oddly to having these bits enabled.  Let the user
2530 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2531 	 * default).
2532 	 */
2533 	if (pci_enable_io_modes) {
2534 		/* Turn on resources that have been left off by a lazy BIOS */
2535 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2536 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2537 			cmd |= PCIM_CMD_PORTEN;
2538 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2539 		}
2540 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2541 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2542 			cmd |= PCIM_CMD_MEMEN;
2543 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2544 		}
2545 	} else {
2546 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2547 			return (barlen);
2548 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2549 			return (barlen);
2550 	}
2551 
2552 	count = (pci_addr_t)1 << mapsize;
2553 	if (basezero || base == pci_mapbase(testval)) {
2554 		start = 0;	/* Let the parent decide. */
2555 		end = ~0ULL;
2556 	} else {
2557 		start = base;
2558 		end = base + count - 1;
2559 	}
2560 	resource_list_add(rl, type, reg, start, end, count);
2561 
2562 	/*
2563 	 * Try to allocate the resource for this BAR from our parent
2564 	 * so that this resource range is already reserved.  The
2565 	 * driver for this device will later inherit this resource in
2566 	 * pci_alloc_resource().
2567 	 */
2568 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2569 	    prefetch ? RF_PREFETCHABLE : 0);
2570 	if (res == NULL) {
2571 		/*
2572 		 * If the allocation fails, clear the BAR and delete
2573 		 * the resource list entry to force
2574 		 * pci_alloc_resource() to allocate resources from the
2575 		 * parent.
2576 		 */
2577 		resource_list_delete(rl, type, reg);
2578 		start = 0;
2579 	} else
2580 		start = rman_get_start(res);
2581 	pci_write_bar(dev, reg, start);
2582 	return (barlen);
2583 }
2584 
2585 /*
2586  * For ATA devices we need to decide early what addressing mode to use.
2587  * Legacy demands that the primary and secondary ATA ports sits on the
2588  * same addresses that old ISA hardware did. This dictates that we use
2589  * those addresses and ignore the BAR's if we cannot set PCI native
2590  * addressing mode.
2591  */
2592 static void
2593 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2594     uint32_t prefetchmask)
2595 {
2596 	struct resource *r;
2597 	int rid, type, progif;
2598 #if 0
2599 	/* if this device supports PCI native addressing use it */
2600 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2601 	if ((progif & 0x8a) == 0x8a) {
2602 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2603 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2604 			printf("Trying ATA native PCI addressing mode\n");
2605 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2606 		}
2607 	}
2608 #endif
2609 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2610 	type = SYS_RES_IOPORT;
2611 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2612 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2613 		    prefetchmask & (1 << 0));
2614 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2615 		    prefetchmask & (1 << 1));
2616 	} else {
2617 		rid = PCIR_BAR(0);
2618 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2619 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2620 		    0x1f7, 8, 0);
2621 		rid = PCIR_BAR(1);
2622 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2623 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2624 		    0x3f6, 1, 0);
2625 	}
2626 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2627 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2628 		    prefetchmask & (1 << 2));
2629 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2630 		    prefetchmask & (1 << 3));
2631 	} else {
2632 		rid = PCIR_BAR(2);
2633 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2634 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2635 		    0x177, 8, 0);
2636 		rid = PCIR_BAR(3);
2637 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2638 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2639 		    0x376, 1, 0);
2640 	}
2641 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2642 	    prefetchmask & (1 << 4));
2643 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2644 	    prefetchmask & (1 << 5));
2645 }
2646 
2647 static void
2648 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2649 {
2650 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2651 	pcicfgregs *cfg = &dinfo->cfg;
2652 	char tunable_name[64];
2653 	int irq;
2654 
2655 	/* Has to have an intpin to have an interrupt. */
2656 	if (cfg->intpin == 0)
2657 		return;
2658 
2659 	/* Let the user override the IRQ with a tunable. */
2660 	irq = PCI_INVALID_IRQ;
2661 	snprintf(tunable_name, sizeof(tunable_name),
2662 	    "hw.pci%d.%d.%d.INT%c.irq",
2663 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2664 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2665 		irq = PCI_INVALID_IRQ;
2666 
2667 	/*
2668 	 * If we didn't get an IRQ via the tunable, then we either use the
2669 	 * IRQ value in the intline register or we ask the bus to route an
2670 	 * interrupt for us.  If force_route is true, then we only use the
2671 	 * value in the intline register if the bus was unable to assign an
2672 	 * IRQ.
2673 	 */
2674 	if (!PCI_INTERRUPT_VALID(irq)) {
2675 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2676 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2677 		if (!PCI_INTERRUPT_VALID(irq))
2678 			irq = cfg->intline;
2679 	}
2680 
2681 	/* If after all that we don't have an IRQ, just bail. */
2682 	if (!PCI_INTERRUPT_VALID(irq))
2683 		return;
2684 
2685 	/* Update the config register if it changed. */
2686 	if (irq != cfg->intline) {
2687 		cfg->intline = irq;
2688 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2689 	}
2690 
2691 	/* Add this IRQ as rid 0 interrupt resource. */
2692 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2693 }
2694 
2695 /* Perform early OHCI takeover from SMM. */
2696 static void
2697 ohci_early_takeover(device_t self)
2698 {
2699 	struct resource *res;
2700 	uint32_t ctl;
2701 	int rid;
2702 	int i;
2703 
2704 	rid = PCIR_BAR(0);
2705 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2706 	if (res == NULL)
2707 		return;
2708 
2709 	ctl = bus_read_4(res, OHCI_CONTROL);
2710 	if (ctl & OHCI_IR) {
2711 		if (bootverbose)
2712 			printf("ohci early: "
2713 			    "SMM active, request owner change\n");
2714 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2715 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2716 			DELAY(1000);
2717 			ctl = bus_read_4(res, OHCI_CONTROL);
2718 		}
2719 		if (ctl & OHCI_IR) {
2720 			if (bootverbose)
2721 				printf("ohci early: "
2722 				    "SMM does not respond, resetting\n");
2723 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2724 		}
2725 		/* Disable interrupts */
2726 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2727 	}
2728 
2729 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2730 }
2731 
2732 /* Perform early UHCI takeover from SMM. */
2733 static void
2734 uhci_early_takeover(device_t self)
2735 {
2736 	struct resource *res;
2737 	int rid;
2738 
2739 	/*
2740 	 * Set the PIRQD enable bit and switch off all the others. We don't
2741 	 * want legacy support to interfere with us XXX Does this also mean
2742 	 * that the BIOS won't touch the keyboard anymore if it is connected
2743 	 * to the ports of the root hub?
2744 	 */
2745 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2746 
2747 	/* Disable interrupts */
2748 	rid = PCI_UHCI_BASE_REG;
2749 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2750 	if (res != NULL) {
2751 		bus_write_2(res, UHCI_INTR, 0);
2752 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2753 	}
2754 }
2755 
2756 /* Perform early EHCI takeover from SMM. */
2757 static void
2758 ehci_early_takeover(device_t self)
2759 {
2760 	struct resource *res;
2761 	uint32_t cparams;
2762 	uint32_t eec;
2763 	uint8_t eecp;
2764 	uint8_t bios_sem;
2765 	uint8_t offs;
2766 	int rid;
2767 	int i;
2768 
2769 	rid = PCIR_BAR(0);
2770 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2771 	if (res == NULL)
2772 		return;
2773 
2774 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2775 
2776 	/* Synchronise with the BIOS if it owns the controller. */
2777 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2778 	    eecp = EHCI_EECP_NEXT(eec)) {
2779 		eec = pci_read_config(self, eecp, 4);
2780 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2781 			continue;
2782 		}
2783 		bios_sem = pci_read_config(self, eecp +
2784 		    EHCI_LEGSUP_BIOS_SEM, 1);
2785 		if (bios_sem == 0) {
2786 			continue;
2787 		}
2788 		if (bootverbose)
2789 			printf("ehci early: "
2790 			    "SMM active, request owner change\n");
2791 
2792 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2793 
2794 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2795 			DELAY(1000);
2796 			bios_sem = pci_read_config(self, eecp +
2797 			    EHCI_LEGSUP_BIOS_SEM, 1);
2798 		}
2799 
2800 		if (bios_sem != 0) {
2801 			if (bootverbose)
2802 				printf("ehci early: "
2803 				    "SMM does not respond\n");
2804 		}
2805 		/* Disable interrupts */
2806 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
2807 		bus_write_4(res, offs + EHCI_USBINTR, 0);
2808 	}
2809 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2810 }
2811 
2812 void
2813 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2814 {
2815 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2816 	pcicfgregs *cfg = &dinfo->cfg;
2817 	struct resource_list *rl = &dinfo->resources;
2818 	struct pci_quirk *q;
2819 	int i;
2820 
2821 	/* ATA devices needs special map treatment */
2822 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2823 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2824 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2825 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2826 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2827 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
2828 	else
2829 		for (i = 0; i < cfg->nummaps;)
2830 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
2831 			    prefetchmask & (1 << i));
2832 
2833 	/*
2834 	 * Add additional, quirked resources.
2835 	 */
2836 	for (q = &pci_quirks[0]; q->devid; q++) {
2837 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2838 		    && q->type == PCI_QUIRK_MAP_REG)
2839 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
2840 	}
2841 
2842 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2843 #ifdef __PCI_REROUTE_INTERRUPT
2844 		/*
2845 		 * Try to re-route interrupts. Sometimes the BIOS or
2846 		 * firmware may leave bogus values in these registers.
2847 		 * If the re-route fails, then just stick with what we
2848 		 * have.
2849 		 */
2850 		pci_assign_interrupt(bus, dev, 1);
2851 #else
2852 		pci_assign_interrupt(bus, dev, 0);
2853 #endif
2854 	}
2855 
2856 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
2857 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
2858 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
2859 			ehci_early_takeover(dev);
2860 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
2861 			ohci_early_takeover(dev);
2862 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
2863 			uhci_early_takeover(dev);
2864 	}
2865 }
2866 
2867 void
2868 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2869 {
2870 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2871 	device_t pcib = device_get_parent(dev);
2872 	struct pci_devinfo *dinfo;
2873 	int maxslots;
2874 	int s, f, pcifunchigh;
2875 	uint8_t hdrtype;
2876 
2877 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2878 	    ("dinfo_size too small"));
2879 	maxslots = PCIB_MAXSLOTS(pcib);
2880 	for (s = 0; s <= maxslots; s++) {
2881 		pcifunchigh = 0;
2882 		f = 0;
2883 		DELAY(1);
2884 		hdrtype = REG(PCIR_HDRTYPE, 1);
2885 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2886 			continue;
2887 		if (hdrtype & PCIM_MFDEV)
2888 			pcifunchigh = PCI_FUNCMAX;
2889 		for (f = 0; f <= pcifunchigh; f++) {
2890 			dinfo = pci_read_device(pcib, domain, busno, s, f,
2891 			    dinfo_size);
2892 			if (dinfo != NULL) {
2893 				pci_add_child(dev, dinfo);
2894 			}
2895 		}
2896 	}
2897 #undef REG
2898 }
2899 
2900 void
2901 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2902 {
2903 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2904 	device_set_ivars(dinfo->cfg.dev, dinfo);
2905 	resource_list_init(&dinfo->resources);
2906 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2907 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2908 	pci_print_verbose(dinfo);
2909 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2910 }
2911 
2912 static int
2913 pci_probe(device_t dev)
2914 {
2915 
2916 	device_set_desc(dev, "PCI bus");
2917 
2918 	/* Allow other subclasses to override this driver. */
2919 	return (BUS_PROBE_GENERIC);
2920 }
2921 
2922 static int
2923 pci_attach(device_t dev)
2924 {
2925 	int busno, domain;
2926 
2927 	/*
2928 	 * Since there can be multiple independantly numbered PCI
2929 	 * busses on systems with multiple PCI domains, we can't use
2930 	 * the unit number to decide which bus we are probing. We ask
2931 	 * the parent pcib what our domain and bus numbers are.
2932 	 */
2933 	domain = pcib_get_domain(dev);
2934 	busno = pcib_get_bus(dev);
2935 	if (bootverbose)
2936 		device_printf(dev, "domain=%d, physical bus=%d\n",
2937 		    domain, busno);
2938 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2939 	return (bus_generic_attach(dev));
2940 }
2941 
2942 static void
2943 pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
2944     int state)
2945 {
2946 	device_t child, pcib;
2947 	struct pci_devinfo *dinfo;
2948 	int dstate, i;
2949 
2950 	/*
2951 	 * Set the device to the given state.  If the firmware suggests
2952 	 * a different power state, use it instead.  If power management
2953 	 * is not present, the firmware is responsible for managing
2954 	 * device power.  Skip children who aren't attached since they
2955 	 * are handled separately.
2956 	 */
2957 	pcib = device_get_parent(dev);
2958 	for (i = 0; i < numdevs; i++) {
2959 		child = devlist[i];
2960 		dinfo = device_get_ivars(child);
2961 		dstate = state;
2962 		if (device_is_attached(child) &&
2963 		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
2964 			pci_set_powerstate(child, dstate);
2965 	}
2966 }
2967 
2968 int
2969 pci_suspend(device_t dev)
2970 {
2971 	device_t child, *devlist;
2972 	struct pci_devinfo *dinfo;
2973 	int error, i, numdevs;
2974 
2975 	/*
2976 	 * Save the PCI configuration space for each child and set the
2977 	 * device in the appropriate power state for this sleep state.
2978 	 */
2979 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2980 		return (error);
2981 	for (i = 0; i < numdevs; i++) {
2982 		child = devlist[i];
2983 		dinfo = device_get_ivars(child);
2984 		pci_cfg_save(child, dinfo, 0);
2985 	}
2986 
2987 	/* Suspend devices before potentially powering them down. */
2988 	error = bus_generic_suspend(dev);
2989 	if (error) {
2990 		free(devlist, M_TEMP);
2991 		return (error);
2992 	}
2993 	if (pci_do_power_suspend)
2994 		pci_set_power_children(dev, devlist, numdevs,
2995 		    PCI_POWERSTATE_D3);
2996 	free(devlist, M_TEMP);
2997 	return (0);
2998 }
2999 
3000 int
3001 pci_resume(device_t dev)
3002 {
3003 	device_t child, *devlist;
3004 	struct pci_devinfo *dinfo;
3005 	int error, i, numdevs;
3006 
3007 	/*
3008 	 * Set each child to D0 and restore its PCI configuration space.
3009 	 */
3010 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3011 		return (error);
3012 	if (pci_do_power_resume)
3013 		pci_set_power_children(dev, devlist, numdevs,
3014 		    PCI_POWERSTATE_D0);
3015 
3016 	/* Now the device is powered up, restore its config space. */
3017 	for (i = 0; i < numdevs; i++) {
3018 		child = devlist[i];
3019 		dinfo = device_get_ivars(child);
3020 
3021 		pci_cfg_restore(child, dinfo);
3022 		if (!device_is_attached(child))
3023 			pci_cfg_save(child, dinfo, 1);
3024 	}
3025 
3026 	/*
3027 	 * Resume critical devices first, then everything else later.
3028 	 */
3029 	for (i = 0; i < numdevs; i++) {
3030 		child = devlist[i];
3031 		switch (pci_get_class(child)) {
3032 		case PCIC_DISPLAY:
3033 		case PCIC_MEMORY:
3034 		case PCIC_BRIDGE:
3035 		case PCIC_BASEPERIPH:
3036 			DEVICE_RESUME(child);
3037 			break;
3038 		}
3039 	}
3040 	for (i = 0; i < numdevs; i++) {
3041 		child = devlist[i];
3042 		switch (pci_get_class(child)) {
3043 		case PCIC_DISPLAY:
3044 		case PCIC_MEMORY:
3045 		case PCIC_BRIDGE:
3046 		case PCIC_BASEPERIPH:
3047 			break;
3048 		default:
3049 			DEVICE_RESUME(child);
3050 		}
3051 	}
3052 	free(devlist, M_TEMP);
3053 	return (0);
3054 }
3055 
3056 static void
3057 pci_load_vendor_data(void)
3058 {
3059 	caddr_t data;
3060 	void *ptr;
3061 	size_t sz;
3062 
3063 	data = preload_search_by_type("pci_vendor_data");
3064 	if (data != NULL) {
3065 		ptr = preload_fetch_addr(data);
3066 		sz = preload_fetch_size(data);
3067 		if (ptr != NULL && sz != 0) {
3068 			pci_vendordata = ptr;
3069 			pci_vendordata_size = sz;
3070 			/* terminate the database */
3071 			pci_vendordata[pci_vendordata_size] = '\n';
3072 		}
3073 	}
3074 }
3075 
3076 void
3077 pci_driver_added(device_t dev, driver_t *driver)
3078 {
3079 	int numdevs;
3080 	device_t *devlist;
3081 	device_t child;
3082 	struct pci_devinfo *dinfo;
3083 	int i;
3084 
3085 	if (bootverbose)
3086 		device_printf(dev, "driver added\n");
3087 	DEVICE_IDENTIFY(driver, dev);
3088 	if (device_get_children(dev, &devlist, &numdevs) != 0)
3089 		return;
3090 	for (i = 0; i < numdevs; i++) {
3091 		child = devlist[i];
3092 		if (device_get_state(child) != DS_NOTPRESENT)
3093 			continue;
3094 		dinfo = device_get_ivars(child);
3095 		pci_print_verbose(dinfo);
3096 		if (bootverbose)
3097 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3098 		pci_cfg_restore(child, dinfo);
3099 		if (device_probe_and_attach(child) != 0)
3100 			pci_cfg_save(child, dinfo, 1);
3101 	}
3102 	free(devlist, M_TEMP);
3103 }
3104 
3105 int
3106 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3107     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3108 {
3109 	struct pci_devinfo *dinfo;
3110 	struct msix_table_entry *mte;
3111 	struct msix_vector *mv;
3112 	uint64_t addr;
3113 	uint32_t data;
3114 	void *cookie;
3115 	int error, rid;
3116 
3117 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3118 	    arg, &cookie);
3119 	if (error)
3120 		return (error);
3121 
3122 	/* If this is not a direct child, just bail out. */
3123 	if (device_get_parent(child) != dev) {
3124 		*cookiep = cookie;
3125 		return(0);
3126 	}
3127 
3128 	rid = rman_get_rid(irq);
3129 	if (rid == 0) {
3130 		/* Make sure that INTx is enabled */
3131 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3132 	} else {
3133 		/*
3134 		 * Check to see if the interrupt is MSI or MSI-X.
3135 		 * Ask our parent to map the MSI and give
3136 		 * us the address and data register values.
3137 		 * If we fail for some reason, teardown the
3138 		 * interrupt handler.
3139 		 */
3140 		dinfo = device_get_ivars(child);
3141 		if (dinfo->cfg.msi.msi_alloc > 0) {
3142 			if (dinfo->cfg.msi.msi_addr == 0) {
3143 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3144 			    ("MSI has handlers, but vectors not mapped"));
3145 				error = PCIB_MAP_MSI(device_get_parent(dev),
3146 				    child, rman_get_start(irq), &addr, &data);
3147 				if (error)
3148 					goto bad;
3149 				dinfo->cfg.msi.msi_addr = addr;
3150 				dinfo->cfg.msi.msi_data = data;
3151 			}
3152 			if (dinfo->cfg.msi.msi_handlers == 0)
3153 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3154 				    dinfo->cfg.msi.msi_data);
3155 			dinfo->cfg.msi.msi_handlers++;
3156 		} else {
3157 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3158 			    ("No MSI or MSI-X interrupts allocated"));
3159 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3160 			    ("MSI-X index too high"));
3161 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3162 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3163 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3164 			KASSERT(mv->mv_irq == rman_get_start(irq),
3165 			    ("IRQ mismatch"));
3166 			if (mv->mv_address == 0) {
3167 				KASSERT(mte->mte_handlers == 0,
3168 		    ("MSI-X table entry has handlers, but vector not mapped"));
3169 				error = PCIB_MAP_MSI(device_get_parent(dev),
3170 				    child, rman_get_start(irq), &addr, &data);
3171 				if (error)
3172 					goto bad;
3173 				mv->mv_address = addr;
3174 				mv->mv_data = data;
3175 			}
3176 			if (mte->mte_handlers == 0) {
3177 				pci_enable_msix(child, rid - 1, mv->mv_address,
3178 				    mv->mv_data);
3179 				pci_unmask_msix(child, rid - 1);
3180 			}
3181 			mte->mte_handlers++;
3182 		}
3183 
3184 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3185 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3186 	bad:
3187 		if (error) {
3188 			(void)bus_generic_teardown_intr(dev, child, irq,
3189 			    cookie);
3190 			return (error);
3191 		}
3192 	}
3193 	*cookiep = cookie;
3194 	return (0);
3195 }
3196 
3197 int
3198 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3199     void *cookie)
3200 {
3201 	struct msix_table_entry *mte;
3202 	struct resource_list_entry *rle;
3203 	struct pci_devinfo *dinfo;
3204 	int error, rid;
3205 
3206 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3207 		return (EINVAL);
3208 
3209 	/* If this isn't a direct child, just bail out */
3210 	if (device_get_parent(child) != dev)
3211 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3212 
3213 	rid = rman_get_rid(irq);
3214 	if (rid == 0) {
3215 		/* Mask INTx */
3216 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3217 	} else {
3218 		/*
3219 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3220 		 * decrement the appropriate handlers count and mask the
3221 		 * MSI-X message, or disable MSI messages if the count
3222 		 * drops to 0.
3223 		 */
3224 		dinfo = device_get_ivars(child);
3225 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3226 		if (rle->res != irq)
3227 			return (EINVAL);
3228 		if (dinfo->cfg.msi.msi_alloc > 0) {
3229 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3230 			    ("MSI-X index too high"));
3231 			if (dinfo->cfg.msi.msi_handlers == 0)
3232 				return (EINVAL);
3233 			dinfo->cfg.msi.msi_handlers--;
3234 			if (dinfo->cfg.msi.msi_handlers == 0)
3235 				pci_disable_msi(child);
3236 		} else {
3237 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3238 			    ("No MSI or MSI-X interrupts allocated"));
3239 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3240 			    ("MSI-X index too high"));
3241 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3242 			if (mte->mte_handlers == 0)
3243 				return (EINVAL);
3244 			mte->mte_handlers--;
3245 			if (mte->mte_handlers == 0)
3246 				pci_mask_msix(child, rid - 1);
3247 		}
3248 	}
3249 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3250 	if (rid > 0)
3251 		KASSERT(error == 0,
3252 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3253 	return (error);
3254 }
3255 
3256 int
3257 pci_print_child(device_t dev, device_t child)
3258 {
3259 	struct pci_devinfo *dinfo;
3260 	struct resource_list *rl;
3261 	int retval = 0;
3262 
3263 	dinfo = device_get_ivars(child);
3264 	rl = &dinfo->resources;
3265 
3266 	retval += bus_print_child_header(dev, child);
3267 
3268 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3269 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3270 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3271 	if (device_get_flags(dev))
3272 		retval += printf(" flags %#x", device_get_flags(dev));
3273 
3274 	retval += printf(" at device %d.%d", pci_get_slot(child),
3275 	    pci_get_function(child));
3276 
3277 	retval += bus_print_child_footer(dev, child);
3278 
3279 	return (retval);
3280 }
3281 
3282 static struct
3283 {
3284 	int	class;
3285 	int	subclass;
3286 	char	*desc;
3287 } pci_nomatch_tab[] = {
3288 	{PCIC_OLD,		-1,			"old"},
3289 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3290 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3291 	{PCIC_STORAGE,		-1,			"mass storage"},
3292 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3293 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3294 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3295 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3296 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3297 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3298 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3299 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3300 	{PCIC_NETWORK,		-1,			"network"},
3301 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3302 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3303 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3304 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3305 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3306 	{PCIC_DISPLAY,		-1,			"display"},
3307 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3308 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3309 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3310 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3311 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3312 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3313 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3314 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3315 	{PCIC_MEMORY,		-1,			"memory"},
3316 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3317 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3318 	{PCIC_BRIDGE,		-1,			"bridge"},
3319 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3320 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3321 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3322 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3323 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3324 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3325 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3326 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3327 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3328 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3329 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3330 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3331 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3332 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3333 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3334 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3335 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3336 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3337 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3338 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3339 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3340 	{PCIC_INPUTDEV,		-1,			"input device"},
3341 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3342 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3343 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3344 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3345 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3346 	{PCIC_DOCKING,		-1,			"docking station"},
3347 	{PCIC_PROCESSOR,	-1,			"processor"},
3348 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3349 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3350 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3351 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3352 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3353 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3354 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3355 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3356 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3357 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3358 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3359 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3360 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3361 	{PCIC_SATCOM,		-1,			"satellite communication"},
3362 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3363 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3364 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3365 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3366 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3367 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3368 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3369 	{PCIC_DASP,		-1,			"dasp"},
3370 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3371 	{0, 0,		NULL}
3372 };
3373 
3374 void
3375 pci_probe_nomatch(device_t dev, device_t child)
3376 {
3377 	int	i;
3378 	char	*cp, *scp, *device;
3379 
3380 	/*
3381 	 * Look for a listing for this device in a loaded device database.
3382 	 */
3383 	if ((device = pci_describe_device(child)) != NULL) {
3384 		device_printf(dev, "<%s>", device);
3385 		free(device, M_DEVBUF);
3386 	} else {
3387 		/*
3388 		 * Scan the class/subclass descriptions for a general
3389 		 * description.
3390 		 */
3391 		cp = "unknown";
3392 		scp = NULL;
3393 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3394 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3395 				if (pci_nomatch_tab[i].subclass == -1) {
3396 					cp = pci_nomatch_tab[i].desc;
3397 				} else if (pci_nomatch_tab[i].subclass ==
3398 				    pci_get_subclass(child)) {
3399 					scp = pci_nomatch_tab[i].desc;
3400 				}
3401 			}
3402 		}
3403 		device_printf(dev, "<%s%s%s>",
3404 		    cp ? cp : "",
3405 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3406 		    scp ? scp : "");
3407 	}
3408 	printf(" at device %d.%d (no driver attached)\n",
3409 	    pci_get_slot(child), pci_get_function(child));
3410 	pci_cfg_save(child, device_get_ivars(child), 1);
3411 	return;
3412 }
3413 
3414 /*
3415  * Parse the PCI device database, if loaded, and return a pointer to a
3416  * description of the device.
3417  *
3418  * The database is flat text formatted as follows:
3419  *
3420  * Any line not in a valid format is ignored.
3421  * Lines are terminated with newline '\n' characters.
3422  *
3423  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3424  * the vendor name.
3425  *
3426  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3427  * - devices cannot be listed without a corresponding VENDOR line.
3428  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3429  * another TAB, then the device name.
3430  */
3431 
3432 /*
3433  * Assuming (ptr) points to the beginning of a line in the database,
3434  * return the vendor or device and description of the next entry.
3435  * The value of (vendor) or (device) inappropriate for the entry type
3436  * is set to -1.  Returns nonzero at the end of the database.
3437  *
3438  * Note that this is slightly unrobust in the face of corrupt data;
3439  * we attempt to safeguard against this by spamming the end of the
3440  * database with a newline when we initialise.
3441  */
3442 static int
3443 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3444 {
3445 	char	*cp = *ptr;
3446 	int	left;
3447 
3448 	*device = -1;
3449 	*vendor = -1;
3450 	**desc = '\0';
3451 	for (;;) {
3452 		left = pci_vendordata_size - (cp - pci_vendordata);
3453 		if (left <= 0) {
3454 			*ptr = cp;
3455 			return(1);
3456 		}
3457 
3458 		/* vendor entry? */
3459 		if (*cp != '\t' &&
3460 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3461 			break;
3462 		/* device entry? */
3463 		if (*cp == '\t' &&
3464 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3465 			break;
3466 
3467 		/* skip to next line */
3468 		while (*cp != '\n' && left > 0) {
3469 			cp++;
3470 			left--;
3471 		}
3472 		if (*cp == '\n') {
3473 			cp++;
3474 			left--;
3475 		}
3476 	}
3477 	/* skip to next line */
3478 	while (*cp != '\n' && left > 0) {
3479 		cp++;
3480 		left--;
3481 	}
3482 	if (*cp == '\n' && left > 0)
3483 		cp++;
3484 	*ptr = cp;
3485 	return(0);
3486 }
3487 
3488 static char *
3489 pci_describe_device(device_t dev)
3490 {
3491 	int	vendor, device;
3492 	char	*desc, *vp, *dp, *line;
3493 
3494 	desc = vp = dp = NULL;
3495 
3496 	/*
3497 	 * If we have no vendor data, we can't do anything.
3498 	 */
3499 	if (pci_vendordata == NULL)
3500 		goto out;
3501 
3502 	/*
3503 	 * Scan the vendor data looking for this device
3504 	 */
3505 	line = pci_vendordata;
3506 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3507 		goto out;
3508 	for (;;) {
3509 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3510 			goto out;
3511 		if (vendor == pci_get_vendor(dev))
3512 			break;
3513 	}
3514 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3515 		goto out;
3516 	for (;;) {
3517 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3518 			*dp = 0;
3519 			break;
3520 		}
3521 		if (vendor != -1) {
3522 			*dp = 0;
3523 			break;
3524 		}
3525 		if (device == pci_get_device(dev))
3526 			break;
3527 	}
3528 	if (dp[0] == '\0')
3529 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3530 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3531 	    NULL)
3532 		sprintf(desc, "%s, %s", vp, dp);
3533  out:
3534 	if (vp != NULL)
3535 		free(vp, M_DEVBUF);
3536 	if (dp != NULL)
3537 		free(dp, M_DEVBUF);
3538 	return(desc);
3539 }
3540 
3541 int
3542 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3543 {
3544 	struct pci_devinfo *dinfo;
3545 	pcicfgregs *cfg;
3546 
3547 	dinfo = device_get_ivars(child);
3548 	cfg = &dinfo->cfg;
3549 
3550 	switch (which) {
3551 	case PCI_IVAR_ETHADDR:
3552 		/*
3553 		 * The generic accessor doesn't deal with failure, so
3554 		 * we set the return value, then return an error.
3555 		 */
3556 		*((uint8_t **) result) = NULL;
3557 		return (EINVAL);
3558 	case PCI_IVAR_SUBVENDOR:
3559 		*result = cfg->subvendor;
3560 		break;
3561 	case PCI_IVAR_SUBDEVICE:
3562 		*result = cfg->subdevice;
3563 		break;
3564 	case PCI_IVAR_VENDOR:
3565 		*result = cfg->vendor;
3566 		break;
3567 	case PCI_IVAR_DEVICE:
3568 		*result = cfg->device;
3569 		break;
3570 	case PCI_IVAR_DEVID:
3571 		*result = (cfg->device << 16) | cfg->vendor;
3572 		break;
3573 	case PCI_IVAR_CLASS:
3574 		*result = cfg->baseclass;
3575 		break;
3576 	case PCI_IVAR_SUBCLASS:
3577 		*result = cfg->subclass;
3578 		break;
3579 	case PCI_IVAR_PROGIF:
3580 		*result = cfg->progif;
3581 		break;
3582 	case PCI_IVAR_REVID:
3583 		*result = cfg->revid;
3584 		break;
3585 	case PCI_IVAR_INTPIN:
3586 		*result = cfg->intpin;
3587 		break;
3588 	case PCI_IVAR_IRQ:
3589 		*result = cfg->intline;
3590 		break;
3591 	case PCI_IVAR_DOMAIN:
3592 		*result = cfg->domain;
3593 		break;
3594 	case PCI_IVAR_BUS:
3595 		*result = cfg->bus;
3596 		break;
3597 	case PCI_IVAR_SLOT:
3598 		*result = cfg->slot;
3599 		break;
3600 	case PCI_IVAR_FUNCTION:
3601 		*result = cfg->func;
3602 		break;
3603 	case PCI_IVAR_CMDREG:
3604 		*result = cfg->cmdreg;
3605 		break;
3606 	case PCI_IVAR_CACHELNSZ:
3607 		*result = cfg->cachelnsz;
3608 		break;
3609 	case PCI_IVAR_MINGNT:
3610 		*result = cfg->mingnt;
3611 		break;
3612 	case PCI_IVAR_MAXLAT:
3613 		*result = cfg->maxlat;
3614 		break;
3615 	case PCI_IVAR_LATTIMER:
3616 		*result = cfg->lattimer;
3617 		break;
3618 	default:
3619 		return (ENOENT);
3620 	}
3621 	return (0);
3622 }
3623 
3624 int
3625 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3626 {
3627 	struct pci_devinfo *dinfo;
3628 
3629 	dinfo = device_get_ivars(child);
3630 
3631 	switch (which) {
3632 	case PCI_IVAR_INTPIN:
3633 		dinfo->cfg.intpin = value;
3634 		return (0);
3635 	case PCI_IVAR_ETHADDR:
3636 	case PCI_IVAR_SUBVENDOR:
3637 	case PCI_IVAR_SUBDEVICE:
3638 	case PCI_IVAR_VENDOR:
3639 	case PCI_IVAR_DEVICE:
3640 	case PCI_IVAR_DEVID:
3641 	case PCI_IVAR_CLASS:
3642 	case PCI_IVAR_SUBCLASS:
3643 	case PCI_IVAR_PROGIF:
3644 	case PCI_IVAR_REVID:
3645 	case PCI_IVAR_IRQ:
3646 	case PCI_IVAR_DOMAIN:
3647 	case PCI_IVAR_BUS:
3648 	case PCI_IVAR_SLOT:
3649 	case PCI_IVAR_FUNCTION:
3650 		return (EINVAL);	/* disallow for now */
3651 
3652 	default:
3653 		return (ENOENT);
3654 	}
3655 }
3656 
3657 
3658 #include "opt_ddb.h"
3659 #ifdef DDB
3660 #include <ddb/ddb.h>
3661 #include <sys/cons.h>
3662 
3663 /*
3664  * List resources based on pci map registers, used for within ddb
3665  */
3666 
3667 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3668 {
3669 	struct pci_devinfo *dinfo;
3670 	struct devlist *devlist_head;
3671 	struct pci_conf *p;
3672 	const char *name;
3673 	int i, error, none_count;
3674 
3675 	none_count = 0;
3676 	/* get the head of the device queue */
3677 	devlist_head = &pci_devq;
3678 
3679 	/*
3680 	 * Go through the list of devices and print out devices
3681 	 */
3682 	for (error = 0, i = 0,
3683 	     dinfo = STAILQ_FIRST(devlist_head);
3684 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3685 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3686 
3687 		/* Populate pd_name and pd_unit */
3688 		name = NULL;
3689 		if (dinfo->cfg.dev)
3690 			name = device_get_name(dinfo->cfg.dev);
3691 
3692 		p = &dinfo->conf;
3693 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3694 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3695 			(name && *name) ? name : "none",
3696 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3697 			none_count++,
3698 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3699 			p->pc_sel.pc_func, (p->pc_class << 16) |
3700 			(p->pc_subclass << 8) | p->pc_progif,
3701 			(p->pc_subdevice << 16) | p->pc_subvendor,
3702 			(p->pc_device << 16) | p->pc_vendor,
3703 			p->pc_revid, p->pc_hdr);
3704 	}
3705 }
3706 #endif /* DDB */
3707 
3708 static struct resource *
3709 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
3710     u_long start, u_long end, u_long count, u_int flags)
3711 {
3712 	struct pci_devinfo *dinfo = device_get_ivars(child);
3713 	struct resource_list *rl = &dinfo->resources;
3714 	struct resource_list_entry *rle;
3715 	struct resource *res;
3716 	pci_addr_t map, testval;
3717 	int mapsize;
3718 
3719 	/*
3720 	 * Weed out the bogons, and figure out how large the BAR/map
3721 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3722 	 * Note: atapci in legacy mode are special and handled elsewhere
3723 	 * in the code.  If you have a atapci device in legacy mode and
3724 	 * it fails here, that other code is broken.
3725 	 */
3726 	res = NULL;
3727 	pci_read_bar(child, *rid, &map, &testval);
3728 
3729 	/*
3730 	 * Determine the size of the BAR and ignore BARs with a size
3731 	 * of 0.  Device ROM BARs use a different mask value.
3732 	 */
3733 	if (*rid == PCIR_BIOS)
3734 		mapsize = pci_romsize(testval);
3735 	else
3736 		mapsize = pci_mapsize(testval);
3737 	if (mapsize == 0)
3738 		goto out;
3739 
3740 	if (PCI_BAR_MEM(testval) || *rid == PCIR_BIOS) {
3741 		if (type != SYS_RES_MEMORY) {
3742 			if (bootverbose)
3743 				device_printf(dev,
3744 				    "child %s requested type %d for rid %#x,"
3745 				    " but the BAR says it is an memio\n",
3746 				    device_get_nameunit(child), type, *rid);
3747 			goto out;
3748 		}
3749 	} else {
3750 		if (type != SYS_RES_IOPORT) {
3751 			if (bootverbose)
3752 				device_printf(dev,
3753 				    "child %s requested type %d for rid %#x,"
3754 				    " but the BAR says it is an ioport\n",
3755 				    device_get_nameunit(child), type, *rid);
3756 			goto out;
3757 		}
3758 	}
3759 
3760 	/*
3761 	 * For real BARs, we need to override the size that
3762 	 * the driver requests, because that's what the BAR
3763 	 * actually uses and we would otherwise have a
3764 	 * situation where we might allocate the excess to
3765 	 * another driver, which won't work.
3766 	 */
3767 	count = (pci_addr_t)1 << mapsize;
3768 	if (RF_ALIGNMENT(flags) < mapsize)
3769 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3770 	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3771 		flags |= RF_PREFETCHABLE;
3772 
3773 	/*
3774 	 * Allocate enough resource, and then write back the
3775 	 * appropriate bar for that resource.
3776 	 */
3777 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3778 	    start, end, count, flags & ~RF_ACTIVE);
3779 	if (res == NULL) {
3780 		device_printf(child,
3781 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3782 		    count, *rid, type, start, end);
3783 		goto out;
3784 	}
3785 	resource_list_add(rl, type, *rid, start, end, count);
3786 	rle = resource_list_find(rl, type, *rid);
3787 	if (rle == NULL)
3788 		panic("pci_reserve_map: unexpectedly can't find resource.");
3789 	rle->res = res;
3790 	rle->start = rman_get_start(res);
3791 	rle->end = rman_get_end(res);
3792 	rle->count = count;
3793 	rle->flags = RLE_RESERVED;
3794 	if (bootverbose)
3795 		device_printf(child,
3796 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3797 		    count, *rid, type, rman_get_start(res));
3798 	map = rman_get_start(res);
3799 	pci_write_bar(child, *rid, map);
3800 out:;
3801 	return (res);
3802 }
3803 
3804 
3805 struct resource *
3806 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3807 		   u_long start, u_long end, u_long count, u_int flags)
3808 {
3809 	struct pci_devinfo *dinfo = device_get_ivars(child);
3810 	struct resource_list *rl = &dinfo->resources;
3811 	struct resource_list_entry *rle;
3812 	struct resource *res;
3813 	pcicfgregs *cfg = &dinfo->cfg;
3814 
3815 	if (device_get_parent(child) != dev)
3816 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3817 		    type, rid, start, end, count, flags));
3818 
3819 	/*
3820 	 * Perform lazy resource allocation
3821 	 */
3822 	switch (type) {
3823 	case SYS_RES_IRQ:
3824 		/*
3825 		 * Can't alloc legacy interrupt once MSI messages have
3826 		 * been allocated.
3827 		 */
3828 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3829 		    cfg->msix.msix_alloc > 0))
3830 			return (NULL);
3831 
3832 		/*
3833 		 * If the child device doesn't have an interrupt
3834 		 * routed and is deserving of an interrupt, try to
3835 		 * assign it one.
3836 		 */
3837 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3838 		    (cfg->intpin != 0))
3839 			pci_assign_interrupt(dev, child, 0);
3840 		break;
3841 	case SYS_RES_IOPORT:
3842 	case SYS_RES_MEMORY:
3843 		/* Reserve resources for this BAR if needed. */
3844 		rle = resource_list_find(rl, type, *rid);
3845 		if (rle == NULL) {
3846 			res = pci_reserve_map(dev, child, type, rid, start, end,
3847 			    count, flags);
3848 			if (res == NULL)
3849 				return (NULL);
3850 		}
3851 	}
3852 	return (resource_list_alloc(rl, dev, child, type, rid,
3853 	    start, end, count, flags));
3854 }
3855 
3856 int
3857 pci_activate_resource(device_t dev, device_t child, int type, int rid,
3858     struct resource *r)
3859 {
3860 	int error;
3861 
3862 	error = bus_generic_activate_resource(dev, child, type, rid, r);
3863 	if (error)
3864 		return (error);
3865 
3866 	/* Enable decoding in the command register when activating BARs. */
3867 	if (device_get_parent(child) == dev) {
3868 		/* Device ROMs need their decoding explicitly enabled. */
3869 		if (rid == PCIR_BIOS)
3870 			pci_write_config(child, rid, rman_get_start(r) |
3871 			    PCIM_BIOS_ENABLE, 4);
3872 		switch (type) {
3873 		case SYS_RES_IOPORT:
3874 		case SYS_RES_MEMORY:
3875 			error = PCI_ENABLE_IO(dev, child, type);
3876 			break;
3877 		}
3878 	}
3879 	return (error);
3880 }
3881 
3882 int
3883 pci_deactivate_resource(device_t dev, device_t child, int type,
3884     int rid, struct resource *r)
3885 {
3886 	int error;
3887 
3888 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
3889 	if (error)
3890 		return (error);
3891 
3892 	/* Disable decoding for device ROMs. */
3893 	if (rid == PCIR_BIOS)
3894 		pci_write_config(child, rid, rman_get_start(r), 4);
3895 	return (0);
3896 }
3897 
3898 void
3899 pci_delete_child(device_t dev, device_t child)
3900 {
3901 	struct resource_list_entry *rle;
3902 	struct resource_list *rl;
3903 	struct pci_devinfo *dinfo;
3904 
3905 	dinfo = device_get_ivars(child);
3906 	rl = &dinfo->resources;
3907 
3908 	if (device_is_attached(child))
3909 		device_detach(child);
3910 
3911 	/* Turn off access to resources we're about to free */
3912 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
3913 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
3914 
3915 	/* Free all allocated resources */
3916 	STAILQ_FOREACH(rle, rl, link) {
3917 		if (rle->res) {
3918 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
3919 			    resource_list_busy(rl, rle->type, rle->rid)) {
3920 				pci_printf(&dinfo->cfg,
3921 				    "Resource still owned, oops. "
3922 				    "(type=%d, rid=%d, addr=%lx)\n",
3923 				    rle->type, rle->rid,
3924 				    rman_get_start(rle->res));
3925 				bus_release_resource(child, rle->type, rle->rid,
3926 				    rle->res);
3927 			}
3928 			resource_list_unreserve(rl, dev, child, rle->type,
3929 			    rle->rid);
3930 		}
3931 	}
3932 	resource_list_free(rl);
3933 
3934 	device_delete_child(dev, child);
3935 	pci_freecfg(dinfo);
3936 }
3937 
3938 void
3939 pci_delete_resource(device_t dev, device_t child, int type, int rid)
3940 {
3941 	struct pci_devinfo *dinfo;
3942 	struct resource_list *rl;
3943 	struct resource_list_entry *rle;
3944 
3945 	if (device_get_parent(child) != dev)
3946 		return;
3947 
3948 	dinfo = device_get_ivars(child);
3949 	rl = &dinfo->resources;
3950 	rle = resource_list_find(rl, type, rid);
3951 	if (rle == NULL)
3952 		return;
3953 
3954 	if (rle->res) {
3955 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
3956 		    resource_list_busy(rl, type, rid)) {
3957 			device_printf(dev, "delete_resource: "
3958 			    "Resource still owned by child, oops. "
3959 			    "(type=%d, rid=%d, addr=%lx)\n",
3960 			    type, rid, rman_get_start(rle->res));
3961 			return;
3962 		}
3963 
3964 #ifndef __PCI_BAR_ZERO_VALID
3965 		/*
3966 		 * If this is a BAR, clear the BAR so it stops
3967 		 * decoding before releasing the resource.
3968 		 */
3969 		switch (type) {
3970 		case SYS_RES_IOPORT:
3971 		case SYS_RES_MEMORY:
3972 			pci_write_bar(child, rid, 0);
3973 			break;
3974 		}
3975 #endif
3976 		resource_list_unreserve(rl, dev, child, type, rid);
3977 	}
3978 	resource_list_delete(rl, type, rid);
3979 }
3980 
3981 struct resource_list *
3982 pci_get_resource_list (device_t dev, device_t child)
3983 {
3984 	struct pci_devinfo *dinfo = device_get_ivars(child);
3985 
3986 	return (&dinfo->resources);
3987 }
3988 
3989 uint32_t
3990 pci_read_config_method(device_t dev, device_t child, int reg, int width)
3991 {
3992 	struct pci_devinfo *dinfo = device_get_ivars(child);
3993 	pcicfgregs *cfg = &dinfo->cfg;
3994 
3995 	return (PCIB_READ_CONFIG(device_get_parent(dev),
3996 	    cfg->bus, cfg->slot, cfg->func, reg, width));
3997 }
3998 
3999 void
4000 pci_write_config_method(device_t dev, device_t child, int reg,
4001     uint32_t val, int width)
4002 {
4003 	struct pci_devinfo *dinfo = device_get_ivars(child);
4004 	pcicfgregs *cfg = &dinfo->cfg;
4005 
4006 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4007 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4008 }
4009 
4010 int
4011 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4012     size_t buflen)
4013 {
4014 
4015 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4016 	    pci_get_function(child));
4017 	return (0);
4018 }
4019 
4020 int
4021 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4022     size_t buflen)
4023 {
4024 	struct pci_devinfo *dinfo;
4025 	pcicfgregs *cfg;
4026 
4027 	dinfo = device_get_ivars(child);
4028 	cfg = &dinfo->cfg;
4029 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4030 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4031 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4032 	    cfg->progif);
4033 	return (0);
4034 }
4035 
4036 int
4037 pci_assign_interrupt_method(device_t dev, device_t child)
4038 {
4039 	struct pci_devinfo *dinfo = device_get_ivars(child);
4040 	pcicfgregs *cfg = &dinfo->cfg;
4041 
4042 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4043 	    cfg->intpin));
4044 }
4045 
4046 static int
4047 pci_modevent(module_t mod, int what, void *arg)
4048 {
4049 	static struct cdev *pci_cdev;
4050 
4051 	switch (what) {
4052 	case MOD_LOAD:
4053 		STAILQ_INIT(&pci_devq);
4054 		pci_generation = 0;
4055 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4056 		    "pci");
4057 		pci_load_vendor_data();
4058 		break;
4059 
4060 	case MOD_UNLOAD:
4061 		destroy_dev(pci_cdev);
4062 		break;
4063 	}
4064 
4065 	return (0);
4066 }
4067 
4068 void
4069 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4070 {
4071 	int i;
4072 
4073 	/*
4074 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4075 	 * which we know need special treatment.  Type 2 devices are
4076 	 * cardbus bridges which also require special treatment.
4077 	 * Other types are unknown, and we err on the side of safety
4078 	 * by ignoring them.
4079 	 */
4080 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4081 		return;
4082 
4083 	/*
4084 	 * Restore the device to full power mode.  We must do this
4085 	 * before we restore the registers because moving from D3 to
4086 	 * D0 will cause the chip's BARs and some other registers to
4087 	 * be reset to some unknown power on reset values.  Cut down
4088 	 * the noise on boot by doing nothing if we are already in
4089 	 * state D0.
4090 	 */
4091 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4092 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4093 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4094 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
4095 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
4096 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4097 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4098 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4099 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4100 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4101 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4102 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4103 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4104 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4105 
4106 	/* Restore MSI and MSI-X configurations if they are present. */
4107 	if (dinfo->cfg.msi.msi_location != 0)
4108 		pci_resume_msi(dev);
4109 	if (dinfo->cfg.msix.msix_location != 0)
4110 		pci_resume_msix(dev);
4111 }
4112 
4113 void
4114 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4115 {
4116 	int i;
4117 	uint32_t cls;
4118 	int ps;
4119 
4120 	/*
4121 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4122 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4123 	 * which also require special treatment.  Other types are unknown, and
4124 	 * we err on the side of safety by ignoring them.  Powering down
4125 	 * bridges should not be undertaken lightly.
4126 	 */
4127 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4128 		return;
4129 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4130 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
4131 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
4132 
4133 	/*
4134 	 * Some drivers apparently write to these registers w/o updating our
4135 	 * cached copy.  No harm happens if we update the copy, so do so here
4136 	 * so we can restore them.  The COMMAND register is modified by the
4137 	 * bus w/o updating the cache.  This should represent the normally
4138 	 * writable portion of the 'defined' part of type 0 headers.  In
4139 	 * theory we also need to save/restore the PCI capability structures
4140 	 * we know about, but apart from power we don't know any that are
4141 	 * writable.
4142 	 */
4143 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4144 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4145 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4146 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4147 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4148 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4149 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4150 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4151 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4152 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4153 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4154 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4155 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4156 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4157 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4158 
4159 	/*
4160 	 * don't set the state for display devices, base peripherals and
4161 	 * memory devices since bad things happen when they are powered down.
4162 	 * We should (a) have drivers that can easily detach and (b) use
4163 	 * generic drivers for these devices so that some device actually
4164 	 * attaches.  We need to make sure that when we implement (a) we don't
4165 	 * power the device down on a reattach.
4166 	 */
4167 	cls = pci_get_class(dev);
4168 	if (!setstate)
4169 		return;
4170 	switch (pci_do_power_nodriver)
4171 	{
4172 		case 0:		/* NO powerdown at all */
4173 			return;
4174 		case 1:		/* Conservative about what to power down */
4175 			if (cls == PCIC_STORAGE)
4176 				return;
4177 			/*FALLTHROUGH*/
4178 		case 2:		/* Agressive about what to power down */
4179 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4180 			    cls == PCIC_BASEPERIPH)
4181 				return;
4182 			/*FALLTHROUGH*/
4183 		case 3:		/* Power down everything */
4184 			break;
4185 	}
4186 	/*
4187 	 * PCI spec says we can only go into D3 state from D0 state.
4188 	 * Transition from D[12] into D0 before going to D3 state.
4189 	 */
4190 	ps = pci_get_powerstate(dev);
4191 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4192 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4193 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4194 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4195 }
4196