xref: /freebsd/sys/dev/pci/pci.c (revision 7aa383846770374466b1dcb2cefd71bde9acf463)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 
56 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57 #include <machine/intr_machdep.h>
58 #endif
59 
60 #include <sys/pciio.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/pci/pci_private.h>
64 
65 #include <dev/usb/controller/ehcireg.h>
66 #include <dev/usb/controller/ohcireg.h>
67 #include <dev/usb/controller/uhcireg.h>
68 
69 #include "pcib_if.h"
70 #include "pci_if.h"
71 
72 #ifdef __HAVE_ACPI
73 #include <contrib/dev/acpica/include/acpi.h>
74 #include "acpi_if.h"
75 #else
76 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
77 #endif
78 
79 static pci_addr_t	pci_mapbase(uint64_t mapreg);
80 static const char	*pci_maptype(uint64_t mapreg);
81 static int		pci_mapsize(uint64_t testval);
82 static int		pci_maprange(uint64_t mapreg);
83 static pci_addr_t	pci_rombase(uint64_t mapreg);
84 static int		pci_romsize(uint64_t testval);
85 static void		pci_fixancient(pcicfgregs *cfg);
86 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
87 
88 static int		pci_porten(device_t dev);
89 static int		pci_memen(device_t dev);
90 static void		pci_assign_interrupt(device_t bus, device_t dev,
91 			    int force_route);
92 static int		pci_add_map(device_t bus, device_t dev, int reg,
93 			    struct resource_list *rl, int force, int prefetch);
94 static int		pci_probe(device_t dev);
95 static int		pci_attach(device_t dev);
96 static void		pci_load_vendor_data(void);
97 static int		pci_describe_parse_line(char **ptr, int *vendor,
98 			    int *device, char **desc);
99 static char		*pci_describe_device(device_t dev);
100 static int		pci_modevent(module_t mod, int what, void *arg);
101 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
102 			    pcicfgregs *cfg);
103 static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
104 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
105 			    int reg, uint32_t *data);
106 #if 0
107 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
108 			    int reg, uint32_t data);
109 #endif
110 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
111 static void		pci_disable_msi(device_t dev);
112 static void		pci_enable_msi(device_t dev, uint64_t address,
113 			    uint16_t data);
114 static void		pci_enable_msix(device_t dev, u_int index,
115 			    uint64_t address, uint32_t data);
116 static void		pci_mask_msix(device_t dev, u_int index);
117 static void		pci_unmask_msix(device_t dev, u_int index);
118 static int		pci_msi_blacklisted(void);
119 static void		pci_resume_msi(device_t dev);
120 static void		pci_resume_msix(device_t dev);
121 static int		pci_remap_intr_method(device_t bus, device_t dev,
122 			    u_int irq);
123 
124 static device_method_t pci_methods[] = {
125 	/* Device interface */
126 	DEVMETHOD(device_probe,		pci_probe),
127 	DEVMETHOD(device_attach,	pci_attach),
128 	DEVMETHOD(device_detach,	bus_generic_detach),
129 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
130 	DEVMETHOD(device_suspend,	pci_suspend),
131 	DEVMETHOD(device_resume,	pci_resume),
132 
133 	/* Bus interface */
134 	DEVMETHOD(bus_print_child,	pci_print_child),
135 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
136 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
137 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
138 	DEVMETHOD(bus_driver_added,	pci_driver_added),
139 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
140 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
141 
142 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
143 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
144 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
145 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
146 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
147 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
148 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
149 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
150 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
151 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
152 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
153 
154 	/* PCI interface */
155 	DEVMETHOD(pci_read_config,	pci_read_config_method),
156 	DEVMETHOD(pci_write_config,	pci_write_config_method),
157 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
158 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
159 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
160 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
161 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
162 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
163 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
164 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
165 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
166 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
167 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
168 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
169 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
170 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
171 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
172 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
173 
174 	{ 0, 0 }
175 };
176 
177 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
178 
179 static devclass_t pci_devclass;
180 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
181 MODULE_VERSION(pci, 1);
182 
183 static char	*pci_vendordata;
184 static size_t	pci_vendordata_size;
185 
186 
187 struct pci_quirk {
188 	uint32_t devid;	/* Vendor/device of the card */
189 	int	type;
190 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
191 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
192 	int	arg1;
193 	int	arg2;
194 };
195 
196 struct pci_quirk pci_quirks[] = {
197 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
198 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
199 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
200 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
201 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
202 
203 	/*
204 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
205 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
206 	 */
207 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209 
210 	/*
211 	 * MSI doesn't work on earlier Intel chipsets including
212 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
213 	 */
214 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
215 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
216 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
218 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
219 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
220 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
221 
222 	/*
223 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
224 	 * bridge.
225 	 */
226 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
227 
228 	{ 0 }
229 };
230 
231 /* map register information */
232 #define	PCI_MAPMEM	0x01	/* memory map */
233 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
234 #define	PCI_MAPPORT	0x04	/* port map */
235 
236 struct devlist pci_devq;
237 uint32_t pci_generation;
238 uint32_t pci_numdevs = 0;
239 static int pcie_chipset, pcix_chipset;
240 
241 /* sysctl vars */
242 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
243 
244 static int pci_enable_io_modes = 1;
245 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
246 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
247     &pci_enable_io_modes, 1,
248     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
249 enable these bits correctly.  We'd like to do this all the time, but there\n\
250 are some peripherals that this causes problems with.");
251 
252 static int pci_do_power_nodriver = 0;
253 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
254 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
255     &pci_do_power_nodriver, 0,
256   "Place a function into D3 state when no driver attaches to it.  0 means\n\
257 disable.  1 means conservatively place devices into D3 state.  2 means\n\
258 agressively place devices into D3 state.  3 means put absolutely everything\n\
259 in D3 state.");
260 
261 int pci_do_power_resume = 1;
262 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
263 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
264     &pci_do_power_resume, 1,
265   "Transition from D3 -> D0 on resume.");
266 
267 static int pci_do_msi = 1;
268 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
269 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
270     "Enable support for MSI interrupts");
271 
272 static int pci_do_msix = 1;
273 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
274 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
275     "Enable support for MSI-X interrupts");
276 
277 static int pci_honor_msi_blacklist = 1;
278 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
279 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
280     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
281 
282 #if defined(__i386__) || defined(__amd64__)
283 static int pci_usb_takeover = 1;
284 #else
285 static int pci_usb_takeover = 0;
286 #endif
287 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
288 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RD | CTLFLAG_TUN,
289     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
290 Disable this if you depend on BIOS emulation of USB devices, that is\n\
291 you use USB devices (like keyboard or mouse) but do not load USB drivers");
292 
293 /* Find a device_t by bus/slot/function in domain 0 */
294 
295 device_t
296 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
297 {
298 
299 	return (pci_find_dbsf(0, bus, slot, func));
300 }
301 
302 /* Find a device_t by domain/bus/slot/function */
303 
304 device_t
305 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
306 {
307 	struct pci_devinfo *dinfo;
308 
309 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
310 		if ((dinfo->cfg.domain == domain) &&
311 		    (dinfo->cfg.bus == bus) &&
312 		    (dinfo->cfg.slot == slot) &&
313 		    (dinfo->cfg.func == func)) {
314 			return (dinfo->cfg.dev);
315 		}
316 	}
317 
318 	return (NULL);
319 }
320 
321 /* Find a device_t by vendor/device ID */
322 
323 device_t
324 pci_find_device(uint16_t vendor, uint16_t device)
325 {
326 	struct pci_devinfo *dinfo;
327 
328 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
329 		if ((dinfo->cfg.vendor == vendor) &&
330 		    (dinfo->cfg.device == device)) {
331 			return (dinfo->cfg.dev);
332 		}
333 	}
334 
335 	return (NULL);
336 }
337 
338 static int
339 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
340 {
341 	va_list ap;
342 	int retval;
343 
344 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
345 	    cfg->func);
346 	va_start(ap, fmt);
347 	retval += vprintf(fmt, ap);
348 	va_end(ap);
349 	return (retval);
350 }
351 
352 /* return base address of memory or port map */
353 
354 static pci_addr_t
355 pci_mapbase(uint64_t mapreg)
356 {
357 
358 	if (PCI_BAR_MEM(mapreg))
359 		return (mapreg & PCIM_BAR_MEM_BASE);
360 	else
361 		return (mapreg & PCIM_BAR_IO_BASE);
362 }
363 
364 /* return map type of memory or port map */
365 
366 static const char *
367 pci_maptype(uint64_t mapreg)
368 {
369 
370 	if (PCI_BAR_IO(mapreg))
371 		return ("I/O Port");
372 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
373 		return ("Prefetchable Memory");
374 	return ("Memory");
375 }
376 
377 /* return log2 of map size decoded for memory or port map */
378 
379 static int
380 pci_mapsize(uint64_t testval)
381 {
382 	int ln2size;
383 
384 	testval = pci_mapbase(testval);
385 	ln2size = 0;
386 	if (testval != 0) {
387 		while ((testval & 1) == 0)
388 		{
389 			ln2size++;
390 			testval >>= 1;
391 		}
392 	}
393 	return (ln2size);
394 }
395 
396 /* return base address of device ROM */
397 
398 static pci_addr_t
399 pci_rombase(uint64_t mapreg)
400 {
401 
402 	return (mapreg & PCIM_BIOS_ADDR_MASK);
403 }
404 
405 /* return log2 of map size decided for device ROM */
406 
407 static int
408 pci_romsize(uint64_t testval)
409 {
410 	int ln2size;
411 
412 	testval = pci_rombase(testval);
413 	ln2size = 0;
414 	if (testval != 0) {
415 		while ((testval & 1) == 0)
416 		{
417 			ln2size++;
418 			testval >>= 1;
419 		}
420 	}
421 	return (ln2size);
422 }
423 
424 /* return log2 of address range supported by map register */
425 
426 static int
427 pci_maprange(uint64_t mapreg)
428 {
429 	int ln2range = 0;
430 
431 	if (PCI_BAR_IO(mapreg))
432 		ln2range = 32;
433 	else
434 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
435 		case PCIM_BAR_MEM_32:
436 			ln2range = 32;
437 			break;
438 		case PCIM_BAR_MEM_1MB:
439 			ln2range = 20;
440 			break;
441 		case PCIM_BAR_MEM_64:
442 			ln2range = 64;
443 			break;
444 		}
445 	return (ln2range);
446 }
447 
448 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
449 
450 static void
451 pci_fixancient(pcicfgregs *cfg)
452 {
453 	if (cfg->hdrtype != 0)
454 		return;
455 
456 	/* PCI to PCI bridges use header type 1 */
457 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
458 		cfg->hdrtype = 1;
459 }
460 
461 /* extract header type specific config data */
462 
463 static void
464 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
465 {
466 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
467 	switch (cfg->hdrtype) {
468 	case 0:
469 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
470 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
471 		cfg->nummaps	    = PCI_MAXMAPS_0;
472 		break;
473 	case 1:
474 		cfg->nummaps	    = PCI_MAXMAPS_1;
475 		break;
476 	case 2:
477 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
478 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
479 		cfg->nummaps	    = PCI_MAXMAPS_2;
480 		break;
481 	}
482 #undef REG
483 }
484 
485 /* read configuration header into pcicfgregs structure */
486 struct pci_devinfo *
487 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
488 {
489 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
490 	pcicfgregs *cfg = NULL;
491 	struct pci_devinfo *devlist_entry;
492 	struct devlist *devlist_head;
493 
494 	devlist_head = &pci_devq;
495 
496 	devlist_entry = NULL;
497 
498 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
499 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
500 		if (devlist_entry == NULL)
501 			return (NULL);
502 
503 		cfg = &devlist_entry->cfg;
504 
505 		cfg->domain		= d;
506 		cfg->bus		= b;
507 		cfg->slot		= s;
508 		cfg->func		= f;
509 		cfg->vendor		= REG(PCIR_VENDOR, 2);
510 		cfg->device		= REG(PCIR_DEVICE, 2);
511 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
512 		cfg->statreg		= REG(PCIR_STATUS, 2);
513 		cfg->baseclass		= REG(PCIR_CLASS, 1);
514 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
515 		cfg->progif		= REG(PCIR_PROGIF, 1);
516 		cfg->revid		= REG(PCIR_REVID, 1);
517 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
518 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
519 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
520 		cfg->intpin		= REG(PCIR_INTPIN, 1);
521 		cfg->intline		= REG(PCIR_INTLINE, 1);
522 
523 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
524 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
525 
526 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
527 		cfg->hdrtype		&= ~PCIM_MFDEV;
528 
529 		pci_fixancient(cfg);
530 		pci_hdrtypedata(pcib, b, s, f, cfg);
531 
532 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
533 			pci_read_extcap(pcib, cfg);
534 
535 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
536 
537 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
538 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
539 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
540 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
541 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
542 
543 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
544 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
545 		devlist_entry->conf.pc_vendor = cfg->vendor;
546 		devlist_entry->conf.pc_device = cfg->device;
547 
548 		devlist_entry->conf.pc_class = cfg->baseclass;
549 		devlist_entry->conf.pc_subclass = cfg->subclass;
550 		devlist_entry->conf.pc_progif = cfg->progif;
551 		devlist_entry->conf.pc_revid = cfg->revid;
552 
553 		pci_numdevs++;
554 		pci_generation++;
555 	}
556 	return (devlist_entry);
557 #undef REG
558 }
559 
560 static void
561 pci_read_extcap(device_t pcib, pcicfgregs *cfg)
562 {
563 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
564 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
565 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
566 	uint64_t addr;
567 #endif
568 	uint32_t val;
569 	int	ptr, nextptr, ptrptr;
570 
571 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
572 	case 0:
573 	case 1:
574 		ptrptr = PCIR_CAP_PTR;
575 		break;
576 	case 2:
577 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
578 		break;
579 	default:
580 		return;		/* no extended capabilities support */
581 	}
582 	nextptr = REG(ptrptr, 1);	/* sanity check? */
583 
584 	/*
585 	 * Read capability entries.
586 	 */
587 	while (nextptr != 0) {
588 		/* Sanity check */
589 		if (nextptr > 255) {
590 			printf("illegal PCI extended capability offset %d\n",
591 			    nextptr);
592 			return;
593 		}
594 		/* Find the next entry */
595 		ptr = nextptr;
596 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
597 
598 		/* Process this entry */
599 		switch (REG(ptr + PCICAP_ID, 1)) {
600 		case PCIY_PMG:		/* PCI power management */
601 			if (cfg->pp.pp_cap == 0) {
602 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
603 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
604 				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
605 				if ((nextptr - ptr) > PCIR_POWER_DATA)
606 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
607 			}
608 			break;
609 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
610 		case PCIY_HT:		/* HyperTransport */
611 			/* Determine HT-specific capability type. */
612 			val = REG(ptr + PCIR_HT_COMMAND, 2);
613 			switch (val & PCIM_HTCMD_CAP_MASK) {
614 			case PCIM_HTCAP_MSI_MAPPING:
615 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
616 					/* Sanity check the mapping window. */
617 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
618 					    4);
619 					addr <<= 32;
620 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
621 					    4);
622 					if (addr != MSI_INTEL_ADDR_BASE)
623 						device_printf(pcib,
624 	    "HT Bridge at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
625 						    cfg->domain, cfg->bus,
626 						    cfg->slot, cfg->func,
627 						    (long long)addr);
628 				} else
629 					addr = MSI_INTEL_ADDR_BASE;
630 
631 				cfg->ht.ht_msimap = ptr;
632 				cfg->ht.ht_msictrl = val;
633 				cfg->ht.ht_msiaddr = addr;
634 				break;
635 			}
636 			break;
637 #endif
638 		case PCIY_MSI:		/* PCI MSI */
639 			cfg->msi.msi_location = ptr;
640 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
641 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
642 						     PCIM_MSICTRL_MMC_MASK)>>1);
643 			break;
644 		case PCIY_MSIX:		/* PCI MSI-X */
645 			cfg->msix.msix_location = ptr;
646 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
647 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
648 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
649 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
650 			cfg->msix.msix_table_bar = PCIR_BAR(val &
651 			    PCIM_MSIX_BIR_MASK);
652 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
653 			val = REG(ptr + PCIR_MSIX_PBA, 4);
654 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
655 			    PCIM_MSIX_BIR_MASK);
656 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
657 			break;
658 		case PCIY_VPD:		/* PCI Vital Product Data */
659 			cfg->vpd.vpd_reg = ptr;
660 			break;
661 		case PCIY_SUBVENDOR:
662 			/* Should always be true. */
663 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
664 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
665 				cfg->subvendor = val & 0xffff;
666 				cfg->subdevice = val >> 16;
667 			}
668 			break;
669 		case PCIY_PCIX:		/* PCI-X */
670 			/*
671 			 * Assume we have a PCI-X chipset if we have
672 			 * at least one PCI-PCI bridge with a PCI-X
673 			 * capability.  Note that some systems with
674 			 * PCI-express or HT chipsets might match on
675 			 * this check as well.
676 			 */
677 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
678 				pcix_chipset = 1;
679 			break;
680 		case PCIY_EXPRESS:	/* PCI-express */
681 			/*
682 			 * Assume we have a PCI-express chipset if we have
683 			 * at least one PCI-express device.
684 			 */
685 			pcie_chipset = 1;
686 			break;
687 		default:
688 			break;
689 		}
690 	}
691 /* REG and WREG use carry through to next functions */
692 }
693 
694 /*
695  * PCI Vital Product Data
696  */
697 
698 #define	PCI_VPD_TIMEOUT		1000000
699 
700 static int
701 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
702 {
703 	int count = PCI_VPD_TIMEOUT;
704 
705 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
706 
707 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
708 
709 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
710 		if (--count < 0)
711 			return (ENXIO);
712 		DELAY(1);	/* limit looping */
713 	}
714 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
715 
716 	return (0);
717 }
718 
719 #if 0
720 static int
721 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
722 {
723 	int count = PCI_VPD_TIMEOUT;
724 
725 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
726 
727 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
728 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
729 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
730 		if (--count < 0)
731 			return (ENXIO);
732 		DELAY(1);	/* limit looping */
733 	}
734 
735 	return (0);
736 }
737 #endif
738 
739 #undef PCI_VPD_TIMEOUT
740 
741 struct vpd_readstate {
742 	device_t	pcib;
743 	pcicfgregs	*cfg;
744 	uint32_t	val;
745 	int		bytesinval;
746 	int		off;
747 	uint8_t		cksum;
748 };
749 
750 static int
751 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
752 {
753 	uint32_t reg;
754 	uint8_t byte;
755 
756 	if (vrs->bytesinval == 0) {
757 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
758 			return (ENXIO);
759 		vrs->val = le32toh(reg);
760 		vrs->off += 4;
761 		byte = vrs->val & 0xff;
762 		vrs->bytesinval = 3;
763 	} else {
764 		vrs->val = vrs->val >> 8;
765 		byte = vrs->val & 0xff;
766 		vrs->bytesinval--;
767 	}
768 
769 	vrs->cksum += byte;
770 	*data = byte;
771 	return (0);
772 }
773 
774 static void
775 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
776 {
777 	struct vpd_readstate vrs;
778 	int state;
779 	int name;
780 	int remain;
781 	int i;
782 	int alloc, off;		/* alloc/off for RO/W arrays */
783 	int cksumvalid;
784 	int dflen;
785 	uint8_t byte;
786 	uint8_t byte2;
787 
788 	/* init vpd reader */
789 	vrs.bytesinval = 0;
790 	vrs.off = 0;
791 	vrs.pcib = pcib;
792 	vrs.cfg = cfg;
793 	vrs.cksum = 0;
794 
795 	state = 0;
796 	name = remain = i = 0;	/* shut up stupid gcc */
797 	alloc = off = 0;	/* shut up stupid gcc */
798 	dflen = 0;		/* shut up stupid gcc */
799 	cksumvalid = -1;
800 	while (state >= 0) {
801 		if (vpd_nextbyte(&vrs, &byte)) {
802 			state = -2;
803 			break;
804 		}
805 #if 0
806 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
807 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
808 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
809 #endif
810 		switch (state) {
811 		case 0:		/* item name */
812 			if (byte & 0x80) {
813 				if (vpd_nextbyte(&vrs, &byte2)) {
814 					state = -2;
815 					break;
816 				}
817 				remain = byte2;
818 				if (vpd_nextbyte(&vrs, &byte2)) {
819 					state = -2;
820 					break;
821 				}
822 				remain |= byte2 << 8;
823 				if (remain > (0x7f*4 - vrs.off)) {
824 					state = -1;
825 					printf(
826 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
827 					    cfg->domain, cfg->bus, cfg->slot,
828 					    cfg->func, remain);
829 				}
830 				name = byte & 0x7f;
831 			} else {
832 				remain = byte & 0x7;
833 				name = (byte >> 3) & 0xf;
834 			}
835 			switch (name) {
836 			case 0x2:	/* String */
837 				cfg->vpd.vpd_ident = malloc(remain + 1,
838 				    M_DEVBUF, M_WAITOK);
839 				i = 0;
840 				state = 1;
841 				break;
842 			case 0xf:	/* End */
843 				state = -1;
844 				break;
845 			case 0x10:	/* VPD-R */
846 				alloc = 8;
847 				off = 0;
848 				cfg->vpd.vpd_ros = malloc(alloc *
849 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
850 				    M_WAITOK | M_ZERO);
851 				state = 2;
852 				break;
853 			case 0x11:	/* VPD-W */
854 				alloc = 8;
855 				off = 0;
856 				cfg->vpd.vpd_w = malloc(alloc *
857 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
858 				    M_WAITOK | M_ZERO);
859 				state = 5;
860 				break;
861 			default:	/* Invalid data, abort */
862 				state = -1;
863 				break;
864 			}
865 			break;
866 
867 		case 1:	/* Identifier String */
868 			cfg->vpd.vpd_ident[i++] = byte;
869 			remain--;
870 			if (remain == 0)  {
871 				cfg->vpd.vpd_ident[i] = '\0';
872 				state = 0;
873 			}
874 			break;
875 
876 		case 2:	/* VPD-R Keyword Header */
877 			if (off == alloc) {
878 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
879 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
880 				    M_DEVBUF, M_WAITOK | M_ZERO);
881 			}
882 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
883 			if (vpd_nextbyte(&vrs, &byte2)) {
884 				state = -2;
885 				break;
886 			}
887 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
888 			if (vpd_nextbyte(&vrs, &byte2)) {
889 				state = -2;
890 				break;
891 			}
892 			dflen = byte2;
893 			if (dflen == 0 &&
894 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
895 			    2) == 0) {
896 				/*
897 				 * if this happens, we can't trust the rest
898 				 * of the VPD.
899 				 */
900 				printf(
901 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
902 				    cfg->domain, cfg->bus, cfg->slot,
903 				    cfg->func, dflen);
904 				cksumvalid = 0;
905 				state = -1;
906 				break;
907 			} else if (dflen == 0) {
908 				cfg->vpd.vpd_ros[off].value = malloc(1 *
909 				    sizeof(*cfg->vpd.vpd_ros[off].value),
910 				    M_DEVBUF, M_WAITOK);
911 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
912 			} else
913 				cfg->vpd.vpd_ros[off].value = malloc(
914 				    (dflen + 1) *
915 				    sizeof(*cfg->vpd.vpd_ros[off].value),
916 				    M_DEVBUF, M_WAITOK);
917 			remain -= 3;
918 			i = 0;
919 			/* keep in sync w/ state 3's transistions */
920 			if (dflen == 0 && remain == 0)
921 				state = 0;
922 			else if (dflen == 0)
923 				state = 2;
924 			else
925 				state = 3;
926 			break;
927 
928 		case 3:	/* VPD-R Keyword Value */
929 			cfg->vpd.vpd_ros[off].value[i++] = byte;
930 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
931 			    "RV", 2) == 0 && cksumvalid == -1) {
932 				if (vrs.cksum == 0)
933 					cksumvalid = 1;
934 				else {
935 					if (bootverbose)
936 						printf(
937 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
938 						    cfg->domain, cfg->bus,
939 						    cfg->slot, cfg->func,
940 						    vrs.cksum);
941 					cksumvalid = 0;
942 					state = -1;
943 					break;
944 				}
945 			}
946 			dflen--;
947 			remain--;
948 			/* keep in sync w/ state 2's transistions */
949 			if (dflen == 0)
950 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
951 			if (dflen == 0 && remain == 0) {
952 				cfg->vpd.vpd_rocnt = off;
953 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
954 				    off * sizeof(*cfg->vpd.vpd_ros),
955 				    M_DEVBUF, M_WAITOK | M_ZERO);
956 				state = 0;
957 			} else if (dflen == 0)
958 				state = 2;
959 			break;
960 
961 		case 4:
962 			remain--;
963 			if (remain == 0)
964 				state = 0;
965 			break;
966 
967 		case 5:	/* VPD-W Keyword Header */
968 			if (off == alloc) {
969 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
970 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
971 				    M_DEVBUF, M_WAITOK | M_ZERO);
972 			}
973 			cfg->vpd.vpd_w[off].keyword[0] = byte;
974 			if (vpd_nextbyte(&vrs, &byte2)) {
975 				state = -2;
976 				break;
977 			}
978 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
979 			if (vpd_nextbyte(&vrs, &byte2)) {
980 				state = -2;
981 				break;
982 			}
983 			cfg->vpd.vpd_w[off].len = dflen = byte2;
984 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
985 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
986 			    sizeof(*cfg->vpd.vpd_w[off].value),
987 			    M_DEVBUF, M_WAITOK);
988 			remain -= 3;
989 			i = 0;
990 			/* keep in sync w/ state 6's transistions */
991 			if (dflen == 0 && remain == 0)
992 				state = 0;
993 			else if (dflen == 0)
994 				state = 5;
995 			else
996 				state = 6;
997 			break;
998 
999 		case 6:	/* VPD-W Keyword Value */
1000 			cfg->vpd.vpd_w[off].value[i++] = byte;
1001 			dflen--;
1002 			remain--;
1003 			/* keep in sync w/ state 5's transistions */
1004 			if (dflen == 0)
1005 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1006 			if (dflen == 0 && remain == 0) {
1007 				cfg->vpd.vpd_wcnt = off;
1008 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1009 				    off * sizeof(*cfg->vpd.vpd_w),
1010 				    M_DEVBUF, M_WAITOK | M_ZERO);
1011 				state = 0;
1012 			} else if (dflen == 0)
1013 				state = 5;
1014 			break;
1015 
1016 		default:
1017 			printf("pci%d:%d:%d:%d: invalid state: %d\n",
1018 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1019 			    state);
1020 			state = -1;
1021 			break;
1022 		}
1023 	}
1024 
1025 	if (cksumvalid == 0 || state < -1) {
1026 		/* read-only data bad, clean up */
1027 		if (cfg->vpd.vpd_ros != NULL) {
1028 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1029 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1030 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1031 			cfg->vpd.vpd_ros = NULL;
1032 		}
1033 	}
1034 	if (state < -1) {
1035 		/* I/O error, clean up */
1036 		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1037 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1038 		if (cfg->vpd.vpd_ident != NULL) {
1039 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1040 			cfg->vpd.vpd_ident = NULL;
1041 		}
1042 		if (cfg->vpd.vpd_w != NULL) {
1043 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1044 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1045 			free(cfg->vpd.vpd_w, M_DEVBUF);
1046 			cfg->vpd.vpd_w = NULL;
1047 		}
1048 	}
1049 	cfg->vpd.vpd_cached = 1;
1050 #undef REG
1051 #undef WREG
1052 }
1053 
1054 int
1055 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1056 {
1057 	struct pci_devinfo *dinfo = device_get_ivars(child);
1058 	pcicfgregs *cfg = &dinfo->cfg;
1059 
1060 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1061 		pci_read_vpd(device_get_parent(dev), cfg);
1062 
1063 	*identptr = cfg->vpd.vpd_ident;
1064 
1065 	if (*identptr == NULL)
1066 		return (ENXIO);
1067 
1068 	return (0);
1069 }
1070 
1071 int
1072 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1073 	const char **vptr)
1074 {
1075 	struct pci_devinfo *dinfo = device_get_ivars(child);
1076 	pcicfgregs *cfg = &dinfo->cfg;
1077 	int i;
1078 
1079 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1080 		pci_read_vpd(device_get_parent(dev), cfg);
1081 
1082 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1083 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1084 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1085 			*vptr = cfg->vpd.vpd_ros[i].value;
1086 		}
1087 
1088 	if (i != cfg->vpd.vpd_rocnt)
1089 		return (0);
1090 
1091 	*vptr = NULL;
1092 	return (ENXIO);
1093 }
1094 
1095 /*
1096  * Find the requested extended capability and return the offset in
1097  * configuration space via the pointer provided. The function returns
1098  * 0 on success and error code otherwise.
1099  */
1100 int
1101 pci_find_extcap_method(device_t dev, device_t child, int capability,
1102     int *capreg)
1103 {
1104 	struct pci_devinfo *dinfo = device_get_ivars(child);
1105 	pcicfgregs *cfg = &dinfo->cfg;
1106 	u_int32_t status;
1107 	u_int8_t ptr;
1108 
1109 	/*
1110 	 * Check the CAP_LIST bit of the PCI status register first.
1111 	 */
1112 	status = pci_read_config(child, PCIR_STATUS, 2);
1113 	if (!(status & PCIM_STATUS_CAPPRESENT))
1114 		return (ENXIO);
1115 
1116 	/*
1117 	 * Determine the start pointer of the capabilities list.
1118 	 */
1119 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1120 	case 0:
1121 	case 1:
1122 		ptr = PCIR_CAP_PTR;
1123 		break;
1124 	case 2:
1125 		ptr = PCIR_CAP_PTR_2;
1126 		break;
1127 	default:
1128 		/* XXX: panic? */
1129 		return (ENXIO);		/* no extended capabilities support */
1130 	}
1131 	ptr = pci_read_config(child, ptr, 1);
1132 
1133 	/*
1134 	 * Traverse the capabilities list.
1135 	 */
1136 	while (ptr != 0) {
1137 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1138 			if (capreg != NULL)
1139 				*capreg = ptr;
1140 			return (0);
1141 		}
1142 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1143 	}
1144 
1145 	return (ENOENT);
1146 }
1147 
1148 /*
1149  * Support for MSI-X message interrupts.
1150  */
1151 void
1152 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1153 {
1154 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1155 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1156 	uint32_t offset;
1157 
1158 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1159 	offset = msix->msix_table_offset + index * 16;
1160 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1161 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1162 	bus_write_4(msix->msix_table_res, offset + 8, data);
1163 
1164 	/* Enable MSI -> HT mapping. */
1165 	pci_ht_map_msi(dev, address);
1166 }
1167 
1168 void
1169 pci_mask_msix(device_t dev, u_int index)
1170 {
1171 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1172 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1173 	uint32_t offset, val;
1174 
1175 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1176 	offset = msix->msix_table_offset + index * 16 + 12;
1177 	val = bus_read_4(msix->msix_table_res, offset);
1178 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1179 		val |= PCIM_MSIX_VCTRL_MASK;
1180 		bus_write_4(msix->msix_table_res, offset, val);
1181 	}
1182 }
1183 
1184 void
1185 pci_unmask_msix(device_t dev, u_int index)
1186 {
1187 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1188 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1189 	uint32_t offset, val;
1190 
1191 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1192 	offset = msix->msix_table_offset + index * 16 + 12;
1193 	val = bus_read_4(msix->msix_table_res, offset);
1194 	if (val & PCIM_MSIX_VCTRL_MASK) {
1195 		val &= ~PCIM_MSIX_VCTRL_MASK;
1196 		bus_write_4(msix->msix_table_res, offset, val);
1197 	}
1198 }
1199 
1200 int
1201 pci_pending_msix(device_t dev, u_int index)
1202 {
1203 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1204 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1205 	uint32_t offset, bit;
1206 
1207 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1208 	offset = msix->msix_pba_offset + (index / 32) * 4;
1209 	bit = 1 << index % 32;
1210 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1211 }
1212 
1213 /*
1214  * Restore MSI-X registers and table during resume.  If MSI-X is
1215  * enabled then walk the virtual table to restore the actual MSI-X
1216  * table.
1217  */
1218 static void
1219 pci_resume_msix(device_t dev)
1220 {
1221 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1222 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1223 	struct msix_table_entry *mte;
1224 	struct msix_vector *mv;
1225 	int i;
1226 
1227 	if (msix->msix_alloc > 0) {
1228 		/* First, mask all vectors. */
1229 		for (i = 0; i < msix->msix_msgnum; i++)
1230 			pci_mask_msix(dev, i);
1231 
1232 		/* Second, program any messages with at least one handler. */
1233 		for (i = 0; i < msix->msix_table_len; i++) {
1234 			mte = &msix->msix_table[i];
1235 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1236 				continue;
1237 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1238 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1239 			pci_unmask_msix(dev, i);
1240 		}
1241 	}
1242 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1243 	    msix->msix_ctrl, 2);
1244 }
1245 
1246 /*
1247  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1248  * returned in *count.  After this function returns, each message will be
1249  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1250  */
1251 int
1252 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1253 {
1254 	struct pci_devinfo *dinfo = device_get_ivars(child);
1255 	pcicfgregs *cfg = &dinfo->cfg;
1256 	struct resource_list_entry *rle;
1257 	int actual, error, i, irq, max;
1258 
1259 	/* Don't let count == 0 get us into trouble. */
1260 	if (*count == 0)
1261 		return (EINVAL);
1262 
1263 	/* If rid 0 is allocated, then fail. */
1264 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1265 	if (rle != NULL && rle->res != NULL)
1266 		return (ENXIO);
1267 
1268 	/* Already have allocated messages? */
1269 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1270 		return (ENXIO);
1271 
1272 	/* If MSI is blacklisted for this system, fail. */
1273 	if (pci_msi_blacklisted())
1274 		return (ENXIO);
1275 
1276 	/* MSI-X capability present? */
1277 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1278 		return (ENODEV);
1279 
1280 	/* Make sure the appropriate BARs are mapped. */
1281 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1282 	    cfg->msix.msix_table_bar);
1283 	if (rle == NULL || rle->res == NULL ||
1284 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1285 		return (ENXIO);
1286 	cfg->msix.msix_table_res = rle->res;
1287 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1288 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1289 		    cfg->msix.msix_pba_bar);
1290 		if (rle == NULL || rle->res == NULL ||
1291 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1292 			return (ENXIO);
1293 	}
1294 	cfg->msix.msix_pba_res = rle->res;
1295 
1296 	if (bootverbose)
1297 		device_printf(child,
1298 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1299 		    *count, cfg->msix.msix_msgnum);
1300 	max = min(*count, cfg->msix.msix_msgnum);
1301 	for (i = 0; i < max; i++) {
1302 		/* Allocate a message. */
1303 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1304 		if (error)
1305 			break;
1306 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1307 		    irq, 1);
1308 	}
1309 	actual = i;
1310 
1311 	if (bootverbose) {
1312 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1313 		if (actual == 1)
1314 			device_printf(child, "using IRQ %lu for MSI-X\n",
1315 			    rle->start);
1316 		else {
1317 			int run;
1318 
1319 			/*
1320 			 * Be fancy and try to print contiguous runs of
1321 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1322 			 * 'run' is true if we are in a range.
1323 			 */
1324 			device_printf(child, "using IRQs %lu", rle->start);
1325 			irq = rle->start;
1326 			run = 0;
1327 			for (i = 1; i < actual; i++) {
1328 				rle = resource_list_find(&dinfo->resources,
1329 				    SYS_RES_IRQ, i + 1);
1330 
1331 				/* Still in a run? */
1332 				if (rle->start == irq + 1) {
1333 					run = 1;
1334 					irq++;
1335 					continue;
1336 				}
1337 
1338 				/* Finish previous range. */
1339 				if (run) {
1340 					printf("-%d", irq);
1341 					run = 0;
1342 				}
1343 
1344 				/* Start new range. */
1345 				printf(",%lu", rle->start);
1346 				irq = rle->start;
1347 			}
1348 
1349 			/* Unfinished range? */
1350 			if (run)
1351 				printf("-%d", irq);
1352 			printf(" for MSI-X\n");
1353 		}
1354 	}
1355 
1356 	/* Mask all vectors. */
1357 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1358 		pci_mask_msix(child, i);
1359 
1360 	/* Allocate and initialize vector data and virtual table. */
1361 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1362 	    M_DEVBUF, M_WAITOK | M_ZERO);
1363 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1364 	    M_DEVBUF, M_WAITOK | M_ZERO);
1365 	for (i = 0; i < actual; i++) {
1366 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1367 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1368 		cfg->msix.msix_table[i].mte_vector = i + 1;
1369 	}
1370 
1371 	/* Update control register to enable MSI-X. */
1372 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1373 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1374 	    cfg->msix.msix_ctrl, 2);
1375 
1376 	/* Update counts of alloc'd messages. */
1377 	cfg->msix.msix_alloc = actual;
1378 	cfg->msix.msix_table_len = actual;
1379 	*count = actual;
1380 	return (0);
1381 }
1382 
1383 /*
1384  * By default, pci_alloc_msix() will assign the allocated IRQ
1385  * resources consecutively to the first N messages in the MSI-X table.
1386  * However, device drivers may want to use different layouts if they
1387  * either receive fewer messages than they asked for, or they wish to
1388  * populate the MSI-X table sparsely.  This method allows the driver
1389  * to specify what layout it wants.  It must be called after a
1390  * successful pci_alloc_msix() but before any of the associated
1391  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1392  *
1393  * The 'vectors' array contains 'count' message vectors.  The array
1394  * maps directly to the MSI-X table in that index 0 in the array
1395  * specifies the vector for the first message in the MSI-X table, etc.
1396  * The vector value in each array index can either be 0 to indicate
1397  * that no vector should be assigned to a message slot, or it can be a
1398  * number from 1 to N (where N is the count returned from a
1399  * succcessful call to pci_alloc_msix()) to indicate which message
1400  * vector (IRQ) to be used for the corresponding message.
1401  *
1402  * On successful return, each message with a non-zero vector will have
1403  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1404  * 1.  Additionally, if any of the IRQs allocated via the previous
1405  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1406  * will be freed back to the system automatically.
1407  *
1408  * For example, suppose a driver has a MSI-X table with 6 messages and
1409  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1410  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1411  * C.  After the call to pci_alloc_msix(), the device will be setup to
1412  * have an MSI-X table of ABC--- (where - means no vector assigned).
1413  * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1414  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1415  * be freed back to the system.  This device will also have valid
1416  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1417  *
1418  * In any case, the SYS_RES_IRQ rid X will always map to the message
1419  * at MSI-X table index X - 1 and will only be valid if a vector is
1420  * assigned to that table entry.
1421  */
1422 int
1423 pci_remap_msix_method(device_t dev, device_t child, int count,
1424     const u_int *vectors)
1425 {
1426 	struct pci_devinfo *dinfo = device_get_ivars(child);
1427 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1428 	struct resource_list_entry *rle;
1429 	int i, irq, j, *used;
1430 
1431 	/*
1432 	 * Have to have at least one message in the table but the
1433 	 * table can't be bigger than the actual MSI-X table in the
1434 	 * device.
1435 	 */
1436 	if (count == 0 || count > msix->msix_msgnum)
1437 		return (EINVAL);
1438 
1439 	/* Sanity check the vectors. */
1440 	for (i = 0; i < count; i++)
1441 		if (vectors[i] > msix->msix_alloc)
1442 			return (EINVAL);
1443 
1444 	/*
1445 	 * Make sure there aren't any holes in the vectors to be used.
1446 	 * It's a big pain to support it, and it doesn't really make
1447 	 * sense anyway.  Also, at least one vector must be used.
1448 	 */
1449 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1450 	    M_ZERO);
1451 	for (i = 0; i < count; i++)
1452 		if (vectors[i] != 0)
1453 			used[vectors[i] - 1] = 1;
1454 	for (i = 0; i < msix->msix_alloc - 1; i++)
1455 		if (used[i] == 0 && used[i + 1] == 1) {
1456 			free(used, M_DEVBUF);
1457 			return (EINVAL);
1458 		}
1459 	if (used[0] != 1) {
1460 		free(used, M_DEVBUF);
1461 		return (EINVAL);
1462 	}
1463 
1464 	/* Make sure none of the resources are allocated. */
1465 	for (i = 0; i < msix->msix_table_len; i++) {
1466 		if (msix->msix_table[i].mte_vector == 0)
1467 			continue;
1468 		if (msix->msix_table[i].mte_handlers > 0)
1469 			return (EBUSY);
1470 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1471 		KASSERT(rle != NULL, ("missing resource"));
1472 		if (rle->res != NULL)
1473 			return (EBUSY);
1474 	}
1475 
1476 	/* Free the existing resource list entries. */
1477 	for (i = 0; i < msix->msix_table_len; i++) {
1478 		if (msix->msix_table[i].mte_vector == 0)
1479 			continue;
1480 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1481 	}
1482 
1483 	/*
1484 	 * Build the new virtual table keeping track of which vectors are
1485 	 * used.
1486 	 */
1487 	free(msix->msix_table, M_DEVBUF);
1488 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1489 	    M_DEVBUF, M_WAITOK | M_ZERO);
1490 	for (i = 0; i < count; i++)
1491 		msix->msix_table[i].mte_vector = vectors[i];
1492 	msix->msix_table_len = count;
1493 
1494 	/* Free any unused IRQs and resize the vectors array if necessary. */
1495 	j = msix->msix_alloc - 1;
1496 	if (used[j] == 0) {
1497 		struct msix_vector *vec;
1498 
1499 		while (used[j] == 0) {
1500 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1501 			    msix->msix_vectors[j].mv_irq);
1502 			j--;
1503 		}
1504 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1505 		    M_WAITOK);
1506 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1507 		    (j + 1));
1508 		free(msix->msix_vectors, M_DEVBUF);
1509 		msix->msix_vectors = vec;
1510 		msix->msix_alloc = j + 1;
1511 	}
1512 	free(used, M_DEVBUF);
1513 
1514 	/* Map the IRQs onto the rids. */
1515 	for (i = 0; i < count; i++) {
1516 		if (vectors[i] == 0)
1517 			continue;
1518 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1519 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1520 		    irq, 1);
1521 	}
1522 
1523 	if (bootverbose) {
1524 		device_printf(child, "Remapped MSI-X IRQs as: ");
1525 		for (i = 0; i < count; i++) {
1526 			if (i != 0)
1527 				printf(", ");
1528 			if (vectors[i] == 0)
1529 				printf("---");
1530 			else
1531 				printf("%d",
1532 				    msix->msix_vectors[vectors[i]].mv_irq);
1533 		}
1534 		printf("\n");
1535 	}
1536 
1537 	return (0);
1538 }
1539 
1540 static int
1541 pci_release_msix(device_t dev, device_t child)
1542 {
1543 	struct pci_devinfo *dinfo = device_get_ivars(child);
1544 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1545 	struct resource_list_entry *rle;
1546 	int i;
1547 
1548 	/* Do we have any messages to release? */
1549 	if (msix->msix_alloc == 0)
1550 		return (ENODEV);
1551 
1552 	/* Make sure none of the resources are allocated. */
1553 	for (i = 0; i < msix->msix_table_len; i++) {
1554 		if (msix->msix_table[i].mte_vector == 0)
1555 			continue;
1556 		if (msix->msix_table[i].mte_handlers > 0)
1557 			return (EBUSY);
1558 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1559 		KASSERT(rle != NULL, ("missing resource"));
1560 		if (rle->res != NULL)
1561 			return (EBUSY);
1562 	}
1563 
1564 	/* Update control register to disable MSI-X. */
1565 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1566 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1567 	    msix->msix_ctrl, 2);
1568 
1569 	/* Free the resource list entries. */
1570 	for (i = 0; i < msix->msix_table_len; i++) {
1571 		if (msix->msix_table[i].mte_vector == 0)
1572 			continue;
1573 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1574 	}
1575 	free(msix->msix_table, M_DEVBUF);
1576 	msix->msix_table_len = 0;
1577 
1578 	/* Release the IRQs. */
1579 	for (i = 0; i < msix->msix_alloc; i++)
1580 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1581 		    msix->msix_vectors[i].mv_irq);
1582 	free(msix->msix_vectors, M_DEVBUF);
1583 	msix->msix_alloc = 0;
1584 	return (0);
1585 }
1586 
1587 /*
1588  * Return the max supported MSI-X messages this device supports.
1589  * Basically, assuming the MD code can alloc messages, this function
1590  * should return the maximum value that pci_alloc_msix() can return.
1591  * Thus, it is subject to the tunables, etc.
1592  */
1593 int
1594 pci_msix_count_method(device_t dev, device_t child)
1595 {
1596 	struct pci_devinfo *dinfo = device_get_ivars(child);
1597 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1598 
1599 	if (pci_do_msix && msix->msix_location != 0)
1600 		return (msix->msix_msgnum);
1601 	return (0);
1602 }
1603 
1604 /*
1605  * HyperTransport MSI mapping control
1606  */
1607 void
1608 pci_ht_map_msi(device_t dev, uint64_t addr)
1609 {
1610 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1611 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1612 
1613 	if (!ht->ht_msimap)
1614 		return;
1615 
1616 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1617 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1618 		/* Enable MSI -> HT mapping. */
1619 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1620 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1621 		    ht->ht_msictrl, 2);
1622 	}
1623 
1624 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1625 		/* Disable MSI -> HT mapping. */
1626 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1627 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1628 		    ht->ht_msictrl, 2);
1629 	}
1630 }
1631 
1632 int
1633 pci_get_max_read_req(device_t dev)
1634 {
1635 	int cap;
1636 	uint16_t val;
1637 
1638 	if (pci_find_extcap(dev, PCIY_EXPRESS, &cap) != 0)
1639 		return (0);
1640 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1641 	val &= PCIM_EXP_CTL_MAX_READ_REQUEST;
1642 	val >>= 12;
1643 	return (1 << (val + 7));
1644 }
1645 
1646 int
1647 pci_set_max_read_req(device_t dev, int size)
1648 {
1649 	int cap;
1650 	uint16_t val;
1651 
1652 	if (pci_find_extcap(dev, PCIY_EXPRESS, &cap) != 0)
1653 		return (0);
1654 	if (size < 128)
1655 		size = 128;
1656 	if (size > 4096)
1657 		size = 4096;
1658 	size = (1 << (fls(size) - 1));
1659 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1660 	val &= ~PCIM_EXP_CTL_MAX_READ_REQUEST;
1661 	val |= (fls(size) - 8) << 12;
1662 	pci_write_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, val, 2);
1663 	return (size);
1664 }
1665 
1666 /*
1667  * Support for MSI message signalled interrupts.
1668  */
1669 void
1670 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1671 {
1672 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1673 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1674 
1675 	/* Write data and address values. */
1676 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1677 	    address & 0xffffffff, 4);
1678 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1679 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1680 		    address >> 32, 4);
1681 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1682 		    data, 2);
1683 	} else
1684 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1685 		    2);
1686 
1687 	/* Enable MSI in the control register. */
1688 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1689 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1690 	    2);
1691 
1692 	/* Enable MSI -> HT mapping. */
1693 	pci_ht_map_msi(dev, address);
1694 }
1695 
1696 void
1697 pci_disable_msi(device_t dev)
1698 {
1699 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1700 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1701 
1702 	/* Disable MSI -> HT mapping. */
1703 	pci_ht_map_msi(dev, 0);
1704 
1705 	/* Disable MSI in the control register. */
1706 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1707 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1708 	    2);
1709 }
1710 
1711 /*
1712  * Restore MSI registers during resume.  If MSI is enabled then
1713  * restore the data and address registers in addition to the control
1714  * register.
1715  */
1716 static void
1717 pci_resume_msi(device_t dev)
1718 {
1719 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1720 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1721 	uint64_t address;
1722 	uint16_t data;
1723 
1724 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1725 		address = msi->msi_addr;
1726 		data = msi->msi_data;
1727 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1728 		    address & 0xffffffff, 4);
1729 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1730 			pci_write_config(dev, msi->msi_location +
1731 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1732 			pci_write_config(dev, msi->msi_location +
1733 			    PCIR_MSI_DATA_64BIT, data, 2);
1734 		} else
1735 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1736 			    data, 2);
1737 	}
1738 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1739 	    2);
1740 }
1741 
1742 static int
1743 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1744 {
1745 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1746 	pcicfgregs *cfg = &dinfo->cfg;
1747 	struct resource_list_entry *rle;
1748 	struct msix_table_entry *mte;
1749 	struct msix_vector *mv;
1750 	uint64_t addr;
1751 	uint32_t data;
1752 	int error, i, j;
1753 
1754 	/*
1755 	 * Handle MSI first.  We try to find this IRQ among our list
1756 	 * of MSI IRQs.  If we find it, we request updated address and
1757 	 * data registers and apply the results.
1758 	 */
1759 	if (cfg->msi.msi_alloc > 0) {
1760 
1761 		/* If we don't have any active handlers, nothing to do. */
1762 		if (cfg->msi.msi_handlers == 0)
1763 			return (0);
1764 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1765 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1766 			    i + 1);
1767 			if (rle->start == irq) {
1768 				error = PCIB_MAP_MSI(device_get_parent(bus),
1769 				    dev, irq, &addr, &data);
1770 				if (error)
1771 					return (error);
1772 				pci_disable_msi(dev);
1773 				dinfo->cfg.msi.msi_addr = addr;
1774 				dinfo->cfg.msi.msi_data = data;
1775 				pci_enable_msi(dev, addr, data);
1776 				return (0);
1777 			}
1778 		}
1779 		return (ENOENT);
1780 	}
1781 
1782 	/*
1783 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1784 	 * we request the updated mapping info.  If that works, we go
1785 	 * through all the slots that use this IRQ and update them.
1786 	 */
1787 	if (cfg->msix.msix_alloc > 0) {
1788 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1789 			mv = &cfg->msix.msix_vectors[i];
1790 			if (mv->mv_irq == irq) {
1791 				error = PCIB_MAP_MSI(device_get_parent(bus),
1792 				    dev, irq, &addr, &data);
1793 				if (error)
1794 					return (error);
1795 				mv->mv_address = addr;
1796 				mv->mv_data = data;
1797 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1798 					mte = &cfg->msix.msix_table[j];
1799 					if (mte->mte_vector != i + 1)
1800 						continue;
1801 					if (mte->mte_handlers == 0)
1802 						continue;
1803 					pci_mask_msix(dev, j);
1804 					pci_enable_msix(dev, j, addr, data);
1805 					pci_unmask_msix(dev, j);
1806 				}
1807 			}
1808 		}
1809 		return (ENOENT);
1810 	}
1811 
1812 	return (ENOENT);
1813 }
1814 
1815 /*
1816  * Returns true if the specified device is blacklisted because MSI
1817  * doesn't work.
1818  */
1819 int
1820 pci_msi_device_blacklisted(device_t dev)
1821 {
1822 	struct pci_quirk *q;
1823 
1824 	if (!pci_honor_msi_blacklist)
1825 		return (0);
1826 
1827 	for (q = &pci_quirks[0]; q->devid; q++) {
1828 		if (q->devid == pci_get_devid(dev) &&
1829 		    q->type == PCI_QUIRK_DISABLE_MSI)
1830 			return (1);
1831 	}
1832 	return (0);
1833 }
1834 
1835 /*
1836  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1837  * we just check for blacklisted chipsets as represented by the
1838  * host-PCI bridge at device 0:0:0.  In the future, it may become
1839  * necessary to check other system attributes, such as the kenv values
1840  * that give the motherboard manufacturer and model number.
1841  */
1842 static int
1843 pci_msi_blacklisted(void)
1844 {
1845 	device_t dev;
1846 
1847 	if (!pci_honor_msi_blacklist)
1848 		return (0);
1849 
1850 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1851 	if (!(pcie_chipset || pcix_chipset))
1852 		return (1);
1853 
1854 	dev = pci_find_bsf(0, 0, 0);
1855 	if (dev != NULL)
1856 		return (pci_msi_device_blacklisted(dev));
1857 	return (0);
1858 }
1859 
1860 /*
1861  * Attempt to allocate *count MSI messages.  The actual number allocated is
1862  * returned in *count.  After this function returns, each message will be
1863  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1864  */
1865 int
1866 pci_alloc_msi_method(device_t dev, device_t child, int *count)
1867 {
1868 	struct pci_devinfo *dinfo = device_get_ivars(child);
1869 	pcicfgregs *cfg = &dinfo->cfg;
1870 	struct resource_list_entry *rle;
1871 	int actual, error, i, irqs[32];
1872 	uint16_t ctrl;
1873 
1874 	/* Don't let count == 0 get us into trouble. */
1875 	if (*count == 0)
1876 		return (EINVAL);
1877 
1878 	/* If rid 0 is allocated, then fail. */
1879 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1880 	if (rle != NULL && rle->res != NULL)
1881 		return (ENXIO);
1882 
1883 	/* Already have allocated messages? */
1884 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1885 		return (ENXIO);
1886 
1887 	/* If MSI is blacklisted for this system, fail. */
1888 	if (pci_msi_blacklisted())
1889 		return (ENXIO);
1890 
1891 	/* MSI capability present? */
1892 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1893 		return (ENODEV);
1894 
1895 	if (bootverbose)
1896 		device_printf(child,
1897 		    "attempting to allocate %d MSI vectors (%d supported)\n",
1898 		    *count, cfg->msi.msi_msgnum);
1899 
1900 	/* Don't ask for more than the device supports. */
1901 	actual = min(*count, cfg->msi.msi_msgnum);
1902 
1903 	/* Don't ask for more than 32 messages. */
1904 	actual = min(actual, 32);
1905 
1906 	/* MSI requires power of 2 number of messages. */
1907 	if (!powerof2(actual))
1908 		return (EINVAL);
1909 
1910 	for (;;) {
1911 		/* Try to allocate N messages. */
1912 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1913 		    cfg->msi.msi_msgnum, irqs);
1914 		if (error == 0)
1915 			break;
1916 		if (actual == 1)
1917 			return (error);
1918 
1919 		/* Try N / 2. */
1920 		actual >>= 1;
1921 	}
1922 
1923 	/*
1924 	 * We now have N actual messages mapped onto SYS_RES_IRQ
1925 	 * resources in the irqs[] array, so add new resources
1926 	 * starting at rid 1.
1927 	 */
1928 	for (i = 0; i < actual; i++)
1929 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1930 		    irqs[i], irqs[i], 1);
1931 
1932 	if (bootverbose) {
1933 		if (actual == 1)
1934 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1935 		else {
1936 			int run;
1937 
1938 			/*
1939 			 * Be fancy and try to print contiguous runs
1940 			 * of IRQ values as ranges.  'run' is true if
1941 			 * we are in a range.
1942 			 */
1943 			device_printf(child, "using IRQs %d", irqs[0]);
1944 			run = 0;
1945 			for (i = 1; i < actual; i++) {
1946 
1947 				/* Still in a run? */
1948 				if (irqs[i] == irqs[i - 1] + 1) {
1949 					run = 1;
1950 					continue;
1951 				}
1952 
1953 				/* Finish previous range. */
1954 				if (run) {
1955 					printf("-%d", irqs[i - 1]);
1956 					run = 0;
1957 				}
1958 
1959 				/* Start new range. */
1960 				printf(",%d", irqs[i]);
1961 			}
1962 
1963 			/* Unfinished range? */
1964 			if (run)
1965 				printf("-%d", irqs[actual - 1]);
1966 			printf(" for MSI\n");
1967 		}
1968 	}
1969 
1970 	/* Update control register with actual count. */
1971 	ctrl = cfg->msi.msi_ctrl;
1972 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1973 	ctrl |= (ffs(actual) - 1) << 4;
1974 	cfg->msi.msi_ctrl = ctrl;
1975 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1976 
1977 	/* Update counts of alloc'd messages. */
1978 	cfg->msi.msi_alloc = actual;
1979 	cfg->msi.msi_handlers = 0;
1980 	*count = actual;
1981 	return (0);
1982 }
1983 
1984 /* Release the MSI messages associated with this device. */
1985 int
1986 pci_release_msi_method(device_t dev, device_t child)
1987 {
1988 	struct pci_devinfo *dinfo = device_get_ivars(child);
1989 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1990 	struct resource_list_entry *rle;
1991 	int error, i, irqs[32];
1992 
1993 	/* Try MSI-X first. */
1994 	error = pci_release_msix(dev, child);
1995 	if (error != ENODEV)
1996 		return (error);
1997 
1998 	/* Do we have any messages to release? */
1999 	if (msi->msi_alloc == 0)
2000 		return (ENODEV);
2001 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2002 
2003 	/* Make sure none of the resources are allocated. */
2004 	if (msi->msi_handlers > 0)
2005 		return (EBUSY);
2006 	for (i = 0; i < msi->msi_alloc; i++) {
2007 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2008 		KASSERT(rle != NULL, ("missing MSI resource"));
2009 		if (rle->res != NULL)
2010 			return (EBUSY);
2011 		irqs[i] = rle->start;
2012 	}
2013 
2014 	/* Update control register with 0 count. */
2015 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2016 	    ("%s: MSI still enabled", __func__));
2017 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2018 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2019 	    msi->msi_ctrl, 2);
2020 
2021 	/* Release the messages. */
2022 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2023 	for (i = 0; i < msi->msi_alloc; i++)
2024 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2025 
2026 	/* Update alloc count. */
2027 	msi->msi_alloc = 0;
2028 	msi->msi_addr = 0;
2029 	msi->msi_data = 0;
2030 	return (0);
2031 }
2032 
2033 /*
2034  * Return the max supported MSI messages this device supports.
2035  * Basically, assuming the MD code can alloc messages, this function
2036  * should return the maximum value that pci_alloc_msi() can return.
2037  * Thus, it is subject to the tunables, etc.
2038  */
2039 int
2040 pci_msi_count_method(device_t dev, device_t child)
2041 {
2042 	struct pci_devinfo *dinfo = device_get_ivars(child);
2043 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2044 
2045 	if (pci_do_msi && msi->msi_location != 0)
2046 		return (msi->msi_msgnum);
2047 	return (0);
2048 }
2049 
2050 /* free pcicfgregs structure and all depending data structures */
2051 
2052 int
2053 pci_freecfg(struct pci_devinfo *dinfo)
2054 {
2055 	struct devlist *devlist_head;
2056 	int i;
2057 
2058 	devlist_head = &pci_devq;
2059 
2060 	if (dinfo->cfg.vpd.vpd_reg) {
2061 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2062 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2063 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2064 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2065 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2066 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2067 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2068 	}
2069 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2070 	free(dinfo, M_DEVBUF);
2071 
2072 	/* increment the generation count */
2073 	pci_generation++;
2074 
2075 	/* we're losing one device */
2076 	pci_numdevs--;
2077 	return (0);
2078 }
2079 
2080 /*
2081  * PCI power manangement
2082  */
2083 int
2084 pci_set_powerstate_method(device_t dev, device_t child, int state)
2085 {
2086 	struct pci_devinfo *dinfo = device_get_ivars(child);
2087 	pcicfgregs *cfg = &dinfo->cfg;
2088 	uint16_t status;
2089 	int result, oldstate, highest, delay;
2090 
2091 	if (cfg->pp.pp_cap == 0)
2092 		return (EOPNOTSUPP);
2093 
2094 	/*
2095 	 * Optimize a no state change request away.  While it would be OK to
2096 	 * write to the hardware in theory, some devices have shown odd
2097 	 * behavior when going from D3 -> D3.
2098 	 */
2099 	oldstate = pci_get_powerstate(child);
2100 	if (oldstate == state)
2101 		return (0);
2102 
2103 	/*
2104 	 * The PCI power management specification states that after a state
2105 	 * transition between PCI power states, system software must
2106 	 * guarantee a minimal delay before the function accesses the device.
2107 	 * Compute the worst case delay that we need to guarantee before we
2108 	 * access the device.  Many devices will be responsive much more
2109 	 * quickly than this delay, but there are some that don't respond
2110 	 * instantly to state changes.  Transitions to/from D3 state require
2111 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2112 	 * is done below with DELAY rather than a sleeper function because
2113 	 * this function can be called from contexts where we cannot sleep.
2114 	 */
2115 	highest = (oldstate > state) ? oldstate : state;
2116 	if (highest == PCI_POWERSTATE_D3)
2117 	    delay = 10000;
2118 	else if (highest == PCI_POWERSTATE_D2)
2119 	    delay = 200;
2120 	else
2121 	    delay = 0;
2122 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2123 	    & ~PCIM_PSTAT_DMASK;
2124 	result = 0;
2125 	switch (state) {
2126 	case PCI_POWERSTATE_D0:
2127 		status |= PCIM_PSTAT_D0;
2128 		break;
2129 	case PCI_POWERSTATE_D1:
2130 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2131 			return (EOPNOTSUPP);
2132 		status |= PCIM_PSTAT_D1;
2133 		break;
2134 	case PCI_POWERSTATE_D2:
2135 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2136 			return (EOPNOTSUPP);
2137 		status |= PCIM_PSTAT_D2;
2138 		break;
2139 	case PCI_POWERSTATE_D3:
2140 		status |= PCIM_PSTAT_D3;
2141 		break;
2142 	default:
2143 		return (EINVAL);
2144 	}
2145 
2146 	if (bootverbose)
2147 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2148 		    state);
2149 
2150 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2151 	if (delay)
2152 		DELAY(delay);
2153 	return (0);
2154 }
2155 
2156 int
2157 pci_get_powerstate_method(device_t dev, device_t child)
2158 {
2159 	struct pci_devinfo *dinfo = device_get_ivars(child);
2160 	pcicfgregs *cfg = &dinfo->cfg;
2161 	uint16_t status;
2162 	int result;
2163 
2164 	if (cfg->pp.pp_cap != 0) {
2165 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2166 		switch (status & PCIM_PSTAT_DMASK) {
2167 		case PCIM_PSTAT_D0:
2168 			result = PCI_POWERSTATE_D0;
2169 			break;
2170 		case PCIM_PSTAT_D1:
2171 			result = PCI_POWERSTATE_D1;
2172 			break;
2173 		case PCIM_PSTAT_D2:
2174 			result = PCI_POWERSTATE_D2;
2175 			break;
2176 		case PCIM_PSTAT_D3:
2177 			result = PCI_POWERSTATE_D3;
2178 			break;
2179 		default:
2180 			result = PCI_POWERSTATE_UNKNOWN;
2181 			break;
2182 		}
2183 	} else {
2184 		/* No support, device is always at D0 */
2185 		result = PCI_POWERSTATE_D0;
2186 	}
2187 	return (result);
2188 }
2189 
2190 /*
2191  * Some convenience functions for PCI device drivers.
2192  */
2193 
2194 static __inline void
2195 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2196 {
2197 	uint16_t	command;
2198 
2199 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2200 	command |= bit;
2201 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2202 }
2203 
2204 static __inline void
2205 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2206 {
2207 	uint16_t	command;
2208 
2209 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2210 	command &= ~bit;
2211 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2212 }
2213 
2214 int
2215 pci_enable_busmaster_method(device_t dev, device_t child)
2216 {
2217 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2218 	return (0);
2219 }
2220 
2221 int
2222 pci_disable_busmaster_method(device_t dev, device_t child)
2223 {
2224 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2225 	return (0);
2226 }
2227 
2228 int
2229 pci_enable_io_method(device_t dev, device_t child, int space)
2230 {
2231 	uint16_t bit;
2232 
2233 	switch(space) {
2234 	case SYS_RES_IOPORT:
2235 		bit = PCIM_CMD_PORTEN;
2236 		break;
2237 	case SYS_RES_MEMORY:
2238 		bit = PCIM_CMD_MEMEN;
2239 		break;
2240 	default:
2241 		return (EINVAL);
2242 	}
2243 	pci_set_command_bit(dev, child, bit);
2244 	return (0);
2245 }
2246 
2247 int
2248 pci_disable_io_method(device_t dev, device_t child, int space)
2249 {
2250 	uint16_t bit;
2251 
2252 	switch(space) {
2253 	case SYS_RES_IOPORT:
2254 		bit = PCIM_CMD_PORTEN;
2255 		break;
2256 	case SYS_RES_MEMORY:
2257 		bit = PCIM_CMD_MEMEN;
2258 		break;
2259 	default:
2260 		return (EINVAL);
2261 	}
2262 	pci_clear_command_bit(dev, child, bit);
2263 	return (0);
2264 }
2265 
2266 /*
2267  * New style pci driver.  Parent device is either a pci-host-bridge or a
2268  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2269  */
2270 
2271 void
2272 pci_print_verbose(struct pci_devinfo *dinfo)
2273 {
2274 
2275 	if (bootverbose) {
2276 		pcicfgregs *cfg = &dinfo->cfg;
2277 
2278 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2279 		    cfg->vendor, cfg->device, cfg->revid);
2280 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2281 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2282 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2283 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2284 		    cfg->mfdev);
2285 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2286 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2287 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2288 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2289 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2290 		if (cfg->intpin > 0)
2291 			printf("\tintpin=%c, irq=%d\n",
2292 			    cfg->intpin +'a' -1, cfg->intline);
2293 		if (cfg->pp.pp_cap) {
2294 			uint16_t status;
2295 
2296 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2297 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2298 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2299 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2300 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2301 			    status & PCIM_PSTAT_DMASK);
2302 		}
2303 		if (cfg->msi.msi_location) {
2304 			int ctrl;
2305 
2306 			ctrl = cfg->msi.msi_ctrl;
2307 			printf("\tMSI supports %d message%s%s%s\n",
2308 			    cfg->msi.msi_msgnum,
2309 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2310 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2311 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2312 		}
2313 		if (cfg->msix.msix_location) {
2314 			printf("\tMSI-X supports %d message%s ",
2315 			    cfg->msix.msix_msgnum,
2316 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2317 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2318 				printf("in map 0x%x\n",
2319 				    cfg->msix.msix_table_bar);
2320 			else
2321 				printf("in maps 0x%x and 0x%x\n",
2322 				    cfg->msix.msix_table_bar,
2323 				    cfg->msix.msix_pba_bar);
2324 		}
2325 	}
2326 }
2327 
2328 static int
2329 pci_porten(device_t dev)
2330 {
2331 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2332 }
2333 
2334 static int
2335 pci_memen(device_t dev)
2336 {
2337 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2338 }
2339 
2340 static void
2341 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2342 {
2343 	pci_addr_t map, testval;
2344 	int ln2range;
2345 	uint16_t cmd;
2346 
2347 	/*
2348 	 * The device ROM BAR is special.  It is always a 32-bit
2349 	 * memory BAR.  Bit 0 is special and should not be set when
2350 	 * sizing the BAR.
2351 	 */
2352 	if (reg == PCIR_BIOS) {
2353 		map = pci_read_config(dev, reg, 4);
2354 		pci_write_config(dev, reg, 0xfffffffe, 4);
2355 		testval = pci_read_config(dev, reg, 4);
2356 		pci_write_config(dev, reg, map, 4);
2357 		*mapp = map;
2358 		*testvalp = testval;
2359 		return;
2360 	}
2361 
2362 	map = pci_read_config(dev, reg, 4);
2363 	ln2range = pci_maprange(map);
2364 	if (ln2range == 64)
2365 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2366 
2367 	/*
2368 	 * Disable decoding via the command register before
2369 	 * determining the BAR's length since we will be placing it in
2370 	 * a weird state.
2371 	 */
2372 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2373 	pci_write_config(dev, PCIR_COMMAND,
2374 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2375 
2376 	/*
2377 	 * Determine the BAR's length by writing all 1's.  The bottom
2378 	 * log_2(size) bits of the BAR will stick as 0 when we read
2379 	 * the value back.
2380 	 */
2381 	pci_write_config(dev, reg, 0xffffffff, 4);
2382 	testval = pci_read_config(dev, reg, 4);
2383 	if (ln2range == 64) {
2384 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2385 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2386 	}
2387 
2388 	/*
2389 	 * Restore the original value of the BAR.  We may have reprogrammed
2390 	 * the BAR of the low-level console device and when booting verbose,
2391 	 * we need the console device addressable.
2392 	 */
2393 	pci_write_config(dev, reg, map, 4);
2394 	if (ln2range == 64)
2395 		pci_write_config(dev, reg + 4, map >> 32, 4);
2396 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2397 
2398 	*mapp = map;
2399 	*testvalp = testval;
2400 }
2401 
2402 static void
2403 pci_write_bar(device_t dev, int reg, pci_addr_t base)
2404 {
2405 	pci_addr_t map;
2406 	int ln2range;
2407 
2408 	map = pci_read_config(dev, reg, 4);
2409 
2410 	/* The device ROM BAR is always 32-bits. */
2411 	if (reg == PCIR_BIOS)
2412 		return;
2413 	ln2range = pci_maprange(map);
2414 	pci_write_config(dev, reg, base, 4);
2415 	if (ln2range == 64)
2416 		pci_write_config(dev, reg + 4, base >> 32, 4);
2417 }
2418 
2419 /*
2420  * Add a resource based on a pci map register. Return 1 if the map
2421  * register is a 32bit map register or 2 if it is a 64bit register.
2422  */
2423 static int
2424 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2425     int force, int prefetch)
2426 {
2427 	pci_addr_t base, map, testval;
2428 	pci_addr_t start, end, count;
2429 	int barlen, basezero, maprange, mapsize, type;
2430 	uint16_t cmd;
2431 	struct resource *res;
2432 
2433 	pci_read_bar(dev, reg, &map, &testval);
2434 	if (PCI_BAR_MEM(map)) {
2435 		type = SYS_RES_MEMORY;
2436 		if (map & PCIM_BAR_MEM_PREFETCH)
2437 			prefetch = 1;
2438 	} else
2439 		type = SYS_RES_IOPORT;
2440 	mapsize = pci_mapsize(testval);
2441 	base = pci_mapbase(map);
2442 #ifdef __PCI_BAR_ZERO_VALID
2443 	basezero = 0;
2444 #else
2445 	basezero = base == 0;
2446 #endif
2447 	maprange = pci_maprange(map);
2448 	barlen = maprange == 64 ? 2 : 1;
2449 
2450 	/*
2451 	 * For I/O registers, if bottom bit is set, and the next bit up
2452 	 * isn't clear, we know we have a BAR that doesn't conform to the
2453 	 * spec, so ignore it.  Also, sanity check the size of the data
2454 	 * areas to the type of memory involved.  Memory must be at least
2455 	 * 16 bytes in size, while I/O ranges must be at least 4.
2456 	 */
2457 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2458 		return (barlen);
2459 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2460 	    (type == SYS_RES_IOPORT && mapsize < 2))
2461 		return (barlen);
2462 
2463 	if (bootverbose) {
2464 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2465 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2466 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2467 			printf(", port disabled\n");
2468 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2469 			printf(", memory disabled\n");
2470 		else
2471 			printf(", enabled\n");
2472 	}
2473 
2474 	/*
2475 	 * If base is 0, then we have problems if this architecture does
2476 	 * not allow that.  It is best to ignore such entries for the
2477 	 * moment.  These will be allocated later if the driver specifically
2478 	 * requests them.  However, some removable busses look better when
2479 	 * all resources are allocated, so allow '0' to be overriden.
2480 	 *
2481 	 * Similarly treat maps whose values is the same as the test value
2482 	 * read back.  These maps have had all f's written to them by the
2483 	 * BIOS in an attempt to disable the resources.
2484 	 */
2485 	if (!force && (basezero || map == testval))
2486 		return (barlen);
2487 	if ((u_long)base != base) {
2488 		device_printf(bus,
2489 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2490 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2491 		    pci_get_function(dev), reg);
2492 		return (barlen);
2493 	}
2494 
2495 	/*
2496 	 * This code theoretically does the right thing, but has
2497 	 * undesirable side effects in some cases where peripherals
2498 	 * respond oddly to having these bits enabled.  Let the user
2499 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2500 	 * default).
2501 	 */
2502 	if (pci_enable_io_modes) {
2503 		/* Turn on resources that have been left off by a lazy BIOS */
2504 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2505 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2506 			cmd |= PCIM_CMD_PORTEN;
2507 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2508 		}
2509 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2510 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2511 			cmd |= PCIM_CMD_MEMEN;
2512 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2513 		}
2514 	} else {
2515 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2516 			return (barlen);
2517 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2518 			return (barlen);
2519 	}
2520 
2521 	count = 1 << mapsize;
2522 	if (basezero || base == pci_mapbase(testval)) {
2523 		start = 0;	/* Let the parent decide. */
2524 		end = ~0ULL;
2525 	} else {
2526 		start = base;
2527 		end = base + (1 << mapsize) - 1;
2528 	}
2529 	resource_list_add(rl, type, reg, start, end, count);
2530 
2531 	/*
2532 	 * Try to allocate the resource for this BAR from our parent
2533 	 * so that this resource range is already reserved.  The
2534 	 * driver for this device will later inherit this resource in
2535 	 * pci_alloc_resource().
2536 	 */
2537 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2538 	    prefetch ? RF_PREFETCHABLE : 0);
2539 	if (res == NULL) {
2540 		/*
2541 		 * If the allocation fails, clear the BAR and delete
2542 		 * the resource list entry to force
2543 		 * pci_alloc_resource() to allocate resources from the
2544 		 * parent.
2545 		 */
2546 		resource_list_delete(rl, type, reg);
2547 		start = 0;
2548 	} else
2549 		start = rman_get_start(res);
2550 	pci_write_bar(dev, reg, start);
2551 	return (barlen);
2552 }
2553 
2554 /*
2555  * For ATA devices we need to decide early what addressing mode to use.
2556  * Legacy demands that the primary and secondary ATA ports sits on the
2557  * same addresses that old ISA hardware did. This dictates that we use
2558  * those addresses and ignore the BAR's if we cannot set PCI native
2559  * addressing mode.
2560  */
2561 static void
2562 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2563     uint32_t prefetchmask)
2564 {
2565 	struct resource *r;
2566 	int rid, type, progif;
2567 #if 0
2568 	/* if this device supports PCI native addressing use it */
2569 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2570 	if ((progif & 0x8a) == 0x8a) {
2571 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2572 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2573 			printf("Trying ATA native PCI addressing mode\n");
2574 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2575 		}
2576 	}
2577 #endif
2578 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2579 	type = SYS_RES_IOPORT;
2580 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2581 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2582 		    prefetchmask & (1 << 0));
2583 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2584 		    prefetchmask & (1 << 1));
2585 	} else {
2586 		rid = PCIR_BAR(0);
2587 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2588 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2589 		    0x1f7, 8, 0);
2590 		rid = PCIR_BAR(1);
2591 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2592 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2593 		    0x3f6, 1, 0);
2594 	}
2595 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2596 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2597 		    prefetchmask & (1 << 2));
2598 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2599 		    prefetchmask & (1 << 3));
2600 	} else {
2601 		rid = PCIR_BAR(2);
2602 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2603 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2604 		    0x177, 8, 0);
2605 		rid = PCIR_BAR(3);
2606 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2607 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2608 		    0x376, 1, 0);
2609 	}
2610 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2611 	    prefetchmask & (1 << 4));
2612 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2613 	    prefetchmask & (1 << 5));
2614 }
2615 
2616 static void
2617 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2618 {
2619 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2620 	pcicfgregs *cfg = &dinfo->cfg;
2621 	char tunable_name[64];
2622 	int irq;
2623 
2624 	/* Has to have an intpin to have an interrupt. */
2625 	if (cfg->intpin == 0)
2626 		return;
2627 
2628 	/* Let the user override the IRQ with a tunable. */
2629 	irq = PCI_INVALID_IRQ;
2630 	snprintf(tunable_name, sizeof(tunable_name),
2631 	    "hw.pci%d.%d.%d.INT%c.irq",
2632 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2633 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2634 		irq = PCI_INVALID_IRQ;
2635 
2636 	/*
2637 	 * If we didn't get an IRQ via the tunable, then we either use the
2638 	 * IRQ value in the intline register or we ask the bus to route an
2639 	 * interrupt for us.  If force_route is true, then we only use the
2640 	 * value in the intline register if the bus was unable to assign an
2641 	 * IRQ.
2642 	 */
2643 	if (!PCI_INTERRUPT_VALID(irq)) {
2644 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2645 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2646 		if (!PCI_INTERRUPT_VALID(irq))
2647 			irq = cfg->intline;
2648 	}
2649 
2650 	/* If after all that we don't have an IRQ, just bail. */
2651 	if (!PCI_INTERRUPT_VALID(irq))
2652 		return;
2653 
2654 	/* Update the config register if it changed. */
2655 	if (irq != cfg->intline) {
2656 		cfg->intline = irq;
2657 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2658 	}
2659 
2660 	/* Add this IRQ as rid 0 interrupt resource. */
2661 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2662 }
2663 
2664 /* Perform early OHCI takeover from SMM. */
2665 static void
2666 ohci_early_takeover(device_t self)
2667 {
2668 	struct resource *res;
2669 	uint32_t ctl;
2670 	int rid;
2671 	int i;
2672 
2673 	rid = PCIR_BAR(0);
2674 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2675 	if (res == NULL)
2676 		return;
2677 
2678 	ctl = bus_read_4(res, OHCI_CONTROL);
2679 	if (ctl & OHCI_IR) {
2680 		if (bootverbose)
2681 			printf("ohci early: "
2682 			    "SMM active, request owner change\n");
2683 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2684 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2685 			DELAY(1000);
2686 			ctl = bus_read_4(res, OHCI_CONTROL);
2687 		}
2688 		if (ctl & OHCI_IR) {
2689 			if (bootverbose)
2690 				printf("ohci early: "
2691 				    "SMM does not respond, resetting\n");
2692 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2693 		}
2694 		/* Disable interrupts */
2695 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2696 	}
2697 
2698 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2699 }
2700 
2701 /* Perform early UHCI takeover from SMM. */
2702 static void
2703 uhci_early_takeover(device_t self)
2704 {
2705 	struct resource *res;
2706 	int rid;
2707 
2708 	/*
2709 	 * Set the PIRQD enable bit and switch off all the others. We don't
2710 	 * want legacy support to interfere with us XXX Does this also mean
2711 	 * that the BIOS won't touch the keyboard anymore if it is connected
2712 	 * to the ports of the root hub?
2713 	 */
2714 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2715 
2716 	/* Disable interrupts */
2717 	rid = PCI_UHCI_BASE_REG;
2718 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2719 	if (res != NULL) {
2720 		bus_write_2(res, UHCI_INTR, 0);
2721 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2722 	}
2723 }
2724 
2725 /* Perform early EHCI takeover from SMM. */
2726 static void
2727 ehci_early_takeover(device_t self)
2728 {
2729 	struct resource *res;
2730 	uint32_t cparams;
2731 	uint32_t eec;
2732 	uint8_t eecp;
2733 	uint8_t bios_sem;
2734 	uint8_t offs;
2735 	int rid;
2736 	int i;
2737 
2738 	rid = PCIR_BAR(0);
2739 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2740 	if (res == NULL)
2741 		return;
2742 
2743 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2744 
2745 	/* Synchronise with the BIOS if it owns the controller. */
2746 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2747 	    eecp = EHCI_EECP_NEXT(eec)) {
2748 		eec = pci_read_config(self, eecp, 4);
2749 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2750 			continue;
2751 		}
2752 		bios_sem = pci_read_config(self, eecp +
2753 		    EHCI_LEGSUP_BIOS_SEM, 1);
2754 		if (bios_sem == 0) {
2755 			continue;
2756 		}
2757 		if (bootverbose)
2758 			printf("ehci early: "
2759 			    "SMM active, request owner change\n");
2760 
2761 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2762 
2763 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2764 			DELAY(1000);
2765 			bios_sem = pci_read_config(self, eecp +
2766 			    EHCI_LEGSUP_BIOS_SEM, 1);
2767 		}
2768 
2769 		if (bios_sem != 0) {
2770 			if (bootverbose)
2771 				printf("ehci early: "
2772 				    "SMM does not respond\n");
2773 		}
2774 		/* Disable interrupts */
2775 		offs = bus_read_1(res, EHCI_CAPLENGTH);
2776 		bus_write_4(res, offs + EHCI_USBINTR, 0);
2777 	}
2778 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2779 }
2780 
2781 void
2782 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2783 {
2784 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2785 	pcicfgregs *cfg = &dinfo->cfg;
2786 	struct resource_list *rl = &dinfo->resources;
2787 	struct pci_quirk *q;
2788 	int i;
2789 
2790 	/* ATA devices needs special map treatment */
2791 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2792 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2793 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2794 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2795 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2796 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
2797 	else
2798 		for (i = 0; i < cfg->nummaps;)
2799 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
2800 			    prefetchmask & (1 << i));
2801 
2802 	/*
2803 	 * Add additional, quirked resources.
2804 	 */
2805 	for (q = &pci_quirks[0]; q->devid; q++) {
2806 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2807 		    && q->type == PCI_QUIRK_MAP_REG)
2808 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
2809 	}
2810 
2811 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2812 #ifdef __PCI_REROUTE_INTERRUPT
2813 		/*
2814 		 * Try to re-route interrupts. Sometimes the BIOS or
2815 		 * firmware may leave bogus values in these registers.
2816 		 * If the re-route fails, then just stick with what we
2817 		 * have.
2818 		 */
2819 		pci_assign_interrupt(bus, dev, 1);
2820 #else
2821 		pci_assign_interrupt(bus, dev, 0);
2822 #endif
2823 	}
2824 
2825 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
2826 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
2827 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
2828 			ehci_early_takeover(dev);
2829 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
2830 			ohci_early_takeover(dev);
2831 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
2832 			uhci_early_takeover(dev);
2833 	}
2834 }
2835 
2836 void
2837 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2838 {
2839 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2840 	device_t pcib = device_get_parent(dev);
2841 	struct pci_devinfo *dinfo;
2842 	int maxslots;
2843 	int s, f, pcifunchigh;
2844 	uint8_t hdrtype;
2845 
2846 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2847 	    ("dinfo_size too small"));
2848 	maxslots = PCIB_MAXSLOTS(pcib);
2849 	for (s = 0; s <= maxslots; s++) {
2850 		pcifunchigh = 0;
2851 		f = 0;
2852 		DELAY(1);
2853 		hdrtype = REG(PCIR_HDRTYPE, 1);
2854 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2855 			continue;
2856 		if (hdrtype & PCIM_MFDEV)
2857 			pcifunchigh = PCI_FUNCMAX;
2858 		for (f = 0; f <= pcifunchigh; f++) {
2859 			dinfo = pci_read_device(pcib, domain, busno, s, f,
2860 			    dinfo_size);
2861 			if (dinfo != NULL) {
2862 				pci_add_child(dev, dinfo);
2863 			}
2864 		}
2865 	}
2866 #undef REG
2867 }
2868 
2869 void
2870 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2871 {
2872 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2873 	device_set_ivars(dinfo->cfg.dev, dinfo);
2874 	resource_list_init(&dinfo->resources);
2875 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2876 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2877 	pci_print_verbose(dinfo);
2878 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2879 }
2880 
2881 static int
2882 pci_probe(device_t dev)
2883 {
2884 
2885 	device_set_desc(dev, "PCI bus");
2886 
2887 	/* Allow other subclasses to override this driver. */
2888 	return (BUS_PROBE_GENERIC);
2889 }
2890 
2891 static int
2892 pci_attach(device_t dev)
2893 {
2894 	int busno, domain;
2895 
2896 	/*
2897 	 * Since there can be multiple independantly numbered PCI
2898 	 * busses on systems with multiple PCI domains, we can't use
2899 	 * the unit number to decide which bus we are probing. We ask
2900 	 * the parent pcib what our domain and bus numbers are.
2901 	 */
2902 	domain = pcib_get_domain(dev);
2903 	busno = pcib_get_bus(dev);
2904 	if (bootverbose)
2905 		device_printf(dev, "domain=%d, physical bus=%d\n",
2906 		    domain, busno);
2907 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2908 	return (bus_generic_attach(dev));
2909 }
2910 
2911 int
2912 pci_suspend(device_t dev)
2913 {
2914 	int dstate, error, i, numdevs;
2915 	device_t acpi_dev, child, *devlist;
2916 	struct pci_devinfo *dinfo;
2917 
2918 	/*
2919 	 * Save the PCI configuration space for each child and set the
2920 	 * device in the appropriate power state for this sleep state.
2921 	 */
2922 	acpi_dev = NULL;
2923 	if (pci_do_power_resume)
2924 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2925 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2926 		return (error);
2927 	for (i = 0; i < numdevs; i++) {
2928 		child = devlist[i];
2929 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2930 		pci_cfg_save(child, dinfo, 0);
2931 	}
2932 
2933 	/* Suspend devices before potentially powering them down. */
2934 	error = bus_generic_suspend(dev);
2935 	if (error) {
2936 		free(devlist, M_TEMP);
2937 		return (error);
2938 	}
2939 
2940 	/*
2941 	 * Always set the device to D3.  If ACPI suggests a different
2942 	 * power state, use it instead.  If ACPI is not present, the
2943 	 * firmware is responsible for managing device power.  Skip
2944 	 * children who aren't attached since they are powered down
2945 	 * separately.  Only manage type 0 devices for now.
2946 	 */
2947 	for (i = 0; acpi_dev && i < numdevs; i++) {
2948 		child = devlist[i];
2949 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2950 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2951 			dstate = PCI_POWERSTATE_D3;
2952 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2953 			pci_set_powerstate(child, dstate);
2954 		}
2955 	}
2956 	free(devlist, M_TEMP);
2957 	return (0);
2958 }
2959 
2960 int
2961 pci_resume(device_t dev)
2962 {
2963 	int i, numdevs, error;
2964 	device_t acpi_dev, child, *devlist;
2965 	struct pci_devinfo *dinfo;
2966 
2967 	/*
2968 	 * Set each child to D0 and restore its PCI configuration space.
2969 	 */
2970 	acpi_dev = NULL;
2971 	if (pci_do_power_resume)
2972 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2973 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2974 		return (error);
2975 	for (i = 0; i < numdevs; i++) {
2976 		/*
2977 		 * Notify ACPI we're going to D0 but ignore the result.  If
2978 		 * ACPI is not present, the firmware is responsible for
2979 		 * managing device power.  Only manage type 0 devices for now.
2980 		 */
2981 		child = devlist[i];
2982 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2983 		if (acpi_dev && device_is_attached(child) &&
2984 		    dinfo->cfg.hdrtype == 0) {
2985 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2986 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2987 		}
2988 
2989 		/* Now the device is powered up, restore its config space. */
2990 		pci_cfg_restore(child, dinfo);
2991 		if (!device_is_attached(child))
2992 			pci_cfg_save(child, dinfo, 1);
2993 	}
2994 	free(devlist, M_TEMP);
2995 	return (bus_generic_resume(dev));
2996 }
2997 
2998 static void
2999 pci_load_vendor_data(void)
3000 {
3001 	caddr_t vendordata, info;
3002 
3003 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
3004 		info = preload_search_info(vendordata, MODINFO_ADDR);
3005 		pci_vendordata = *(char **)info;
3006 		info = preload_search_info(vendordata, MODINFO_SIZE);
3007 		pci_vendordata_size = *(size_t *)info;
3008 		/* terminate the database */
3009 		pci_vendordata[pci_vendordata_size] = '\n';
3010 	}
3011 }
3012 
3013 void
3014 pci_driver_added(device_t dev, driver_t *driver)
3015 {
3016 	int numdevs;
3017 	device_t *devlist;
3018 	device_t child;
3019 	struct pci_devinfo *dinfo;
3020 	int i;
3021 
3022 	if (bootverbose)
3023 		device_printf(dev, "driver added\n");
3024 	DEVICE_IDENTIFY(driver, dev);
3025 	if (device_get_children(dev, &devlist, &numdevs) != 0)
3026 		return;
3027 	for (i = 0; i < numdevs; i++) {
3028 		child = devlist[i];
3029 		if (device_get_state(child) != DS_NOTPRESENT)
3030 			continue;
3031 		dinfo = device_get_ivars(child);
3032 		pci_print_verbose(dinfo);
3033 		if (bootverbose)
3034 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3035 		pci_cfg_restore(child, dinfo);
3036 		if (device_probe_and_attach(child) != 0)
3037 			pci_cfg_save(child, dinfo, 1);
3038 	}
3039 	free(devlist, M_TEMP);
3040 }
3041 
3042 int
3043 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3044     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3045 {
3046 	struct pci_devinfo *dinfo;
3047 	struct msix_table_entry *mte;
3048 	struct msix_vector *mv;
3049 	uint64_t addr;
3050 	uint32_t data;
3051 	void *cookie;
3052 	int error, rid;
3053 
3054 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3055 	    arg, &cookie);
3056 	if (error)
3057 		return (error);
3058 
3059 	/* If this is not a direct child, just bail out. */
3060 	if (device_get_parent(child) != dev) {
3061 		*cookiep = cookie;
3062 		return(0);
3063 	}
3064 
3065 	rid = rman_get_rid(irq);
3066 	if (rid == 0) {
3067 		/* Make sure that INTx is enabled */
3068 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3069 	} else {
3070 		/*
3071 		 * Check to see if the interrupt is MSI or MSI-X.
3072 		 * Ask our parent to map the MSI and give
3073 		 * us the address and data register values.
3074 		 * If we fail for some reason, teardown the
3075 		 * interrupt handler.
3076 		 */
3077 		dinfo = device_get_ivars(child);
3078 		if (dinfo->cfg.msi.msi_alloc > 0) {
3079 			if (dinfo->cfg.msi.msi_addr == 0) {
3080 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3081 			    ("MSI has handlers, but vectors not mapped"));
3082 				error = PCIB_MAP_MSI(device_get_parent(dev),
3083 				    child, rman_get_start(irq), &addr, &data);
3084 				if (error)
3085 					goto bad;
3086 				dinfo->cfg.msi.msi_addr = addr;
3087 				dinfo->cfg.msi.msi_data = data;
3088 			}
3089 			if (dinfo->cfg.msi.msi_handlers == 0)
3090 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3091 				    dinfo->cfg.msi.msi_data);
3092 			dinfo->cfg.msi.msi_handlers++;
3093 		} else {
3094 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3095 			    ("No MSI or MSI-X interrupts allocated"));
3096 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3097 			    ("MSI-X index too high"));
3098 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3099 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3100 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3101 			KASSERT(mv->mv_irq == rman_get_start(irq),
3102 			    ("IRQ mismatch"));
3103 			if (mv->mv_address == 0) {
3104 				KASSERT(mte->mte_handlers == 0,
3105 		    ("MSI-X table entry has handlers, but vector not mapped"));
3106 				error = PCIB_MAP_MSI(device_get_parent(dev),
3107 				    child, rman_get_start(irq), &addr, &data);
3108 				if (error)
3109 					goto bad;
3110 				mv->mv_address = addr;
3111 				mv->mv_data = data;
3112 			}
3113 			if (mte->mte_handlers == 0) {
3114 				pci_enable_msix(child, rid - 1, mv->mv_address,
3115 				    mv->mv_data);
3116 				pci_unmask_msix(child, rid - 1);
3117 			}
3118 			mte->mte_handlers++;
3119 		}
3120 
3121 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3122 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3123 	bad:
3124 		if (error) {
3125 			(void)bus_generic_teardown_intr(dev, child, irq,
3126 			    cookie);
3127 			return (error);
3128 		}
3129 	}
3130 	*cookiep = cookie;
3131 	return (0);
3132 }
3133 
3134 int
3135 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3136     void *cookie)
3137 {
3138 	struct msix_table_entry *mte;
3139 	struct resource_list_entry *rle;
3140 	struct pci_devinfo *dinfo;
3141 	int error, rid;
3142 
3143 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3144 		return (EINVAL);
3145 
3146 	/* If this isn't a direct child, just bail out */
3147 	if (device_get_parent(child) != dev)
3148 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3149 
3150 	rid = rman_get_rid(irq);
3151 	if (rid == 0) {
3152 		/* Mask INTx */
3153 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3154 	} else {
3155 		/*
3156 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3157 		 * decrement the appropriate handlers count and mask the
3158 		 * MSI-X message, or disable MSI messages if the count
3159 		 * drops to 0.
3160 		 */
3161 		dinfo = device_get_ivars(child);
3162 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3163 		if (rle->res != irq)
3164 			return (EINVAL);
3165 		if (dinfo->cfg.msi.msi_alloc > 0) {
3166 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3167 			    ("MSI-X index too high"));
3168 			if (dinfo->cfg.msi.msi_handlers == 0)
3169 				return (EINVAL);
3170 			dinfo->cfg.msi.msi_handlers--;
3171 			if (dinfo->cfg.msi.msi_handlers == 0)
3172 				pci_disable_msi(child);
3173 		} else {
3174 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3175 			    ("No MSI or MSI-X interrupts allocated"));
3176 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3177 			    ("MSI-X index too high"));
3178 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3179 			if (mte->mte_handlers == 0)
3180 				return (EINVAL);
3181 			mte->mte_handlers--;
3182 			if (mte->mte_handlers == 0)
3183 				pci_mask_msix(child, rid - 1);
3184 		}
3185 	}
3186 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3187 	if (rid > 0)
3188 		KASSERT(error == 0,
3189 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3190 	return (error);
3191 }
3192 
3193 int
3194 pci_print_child(device_t dev, device_t child)
3195 {
3196 	struct pci_devinfo *dinfo;
3197 	struct resource_list *rl;
3198 	int retval = 0;
3199 
3200 	dinfo = device_get_ivars(child);
3201 	rl = &dinfo->resources;
3202 
3203 	retval += bus_print_child_header(dev, child);
3204 
3205 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3206 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3207 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3208 	if (device_get_flags(dev))
3209 		retval += printf(" flags %#x", device_get_flags(dev));
3210 
3211 	retval += printf(" at device %d.%d", pci_get_slot(child),
3212 	    pci_get_function(child));
3213 
3214 	retval += bus_print_child_footer(dev, child);
3215 
3216 	return (retval);
3217 }
3218 
3219 static struct
3220 {
3221 	int	class;
3222 	int	subclass;
3223 	char	*desc;
3224 } pci_nomatch_tab[] = {
3225 	{PCIC_OLD,		-1,			"old"},
3226 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3227 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3228 	{PCIC_STORAGE,		-1,			"mass storage"},
3229 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3230 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3231 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3232 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3233 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3234 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3235 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3236 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3237 	{PCIC_NETWORK,		-1,			"network"},
3238 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3239 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3240 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3241 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3242 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3243 	{PCIC_DISPLAY,		-1,			"display"},
3244 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3245 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3246 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3247 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3248 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3249 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3250 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3251 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3252 	{PCIC_MEMORY,		-1,			"memory"},
3253 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3254 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3255 	{PCIC_BRIDGE,		-1,			"bridge"},
3256 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3257 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3258 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3259 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3260 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3261 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3262 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3263 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3264 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3265 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3266 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3267 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3268 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3269 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3270 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3271 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3272 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3273 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3274 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3275 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3276 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3277 	{PCIC_INPUTDEV,		-1,			"input device"},
3278 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3279 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3280 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3281 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3282 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3283 	{PCIC_DOCKING,		-1,			"docking station"},
3284 	{PCIC_PROCESSOR,	-1,			"processor"},
3285 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3286 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3287 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3288 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3289 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3290 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3291 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3292 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3293 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3294 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3295 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3296 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3297 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3298 	{PCIC_SATCOM,		-1,			"satellite communication"},
3299 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3300 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3301 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3302 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3303 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3304 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3305 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3306 	{PCIC_DASP,		-1,			"dasp"},
3307 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3308 	{0, 0,		NULL}
3309 };
3310 
3311 void
3312 pci_probe_nomatch(device_t dev, device_t child)
3313 {
3314 	int	i;
3315 	char	*cp, *scp, *device;
3316 
3317 	/*
3318 	 * Look for a listing for this device in a loaded device database.
3319 	 */
3320 	if ((device = pci_describe_device(child)) != NULL) {
3321 		device_printf(dev, "<%s>", device);
3322 		free(device, M_DEVBUF);
3323 	} else {
3324 		/*
3325 		 * Scan the class/subclass descriptions for a general
3326 		 * description.
3327 		 */
3328 		cp = "unknown";
3329 		scp = NULL;
3330 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3331 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3332 				if (pci_nomatch_tab[i].subclass == -1) {
3333 					cp = pci_nomatch_tab[i].desc;
3334 				} else if (pci_nomatch_tab[i].subclass ==
3335 				    pci_get_subclass(child)) {
3336 					scp = pci_nomatch_tab[i].desc;
3337 				}
3338 			}
3339 		}
3340 		device_printf(dev, "<%s%s%s>",
3341 		    cp ? cp : "",
3342 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3343 		    scp ? scp : "");
3344 	}
3345 	printf(" at device %d.%d (no driver attached)\n",
3346 	    pci_get_slot(child), pci_get_function(child));
3347 	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3348 	return;
3349 }
3350 
3351 /*
3352  * Parse the PCI device database, if loaded, and return a pointer to a
3353  * description of the device.
3354  *
3355  * The database is flat text formatted as follows:
3356  *
3357  * Any line not in a valid format is ignored.
3358  * Lines are terminated with newline '\n' characters.
3359  *
3360  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3361  * the vendor name.
3362  *
3363  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3364  * - devices cannot be listed without a corresponding VENDOR line.
3365  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3366  * another TAB, then the device name.
3367  */
3368 
3369 /*
3370  * Assuming (ptr) points to the beginning of a line in the database,
3371  * return the vendor or device and description of the next entry.
3372  * The value of (vendor) or (device) inappropriate for the entry type
3373  * is set to -1.  Returns nonzero at the end of the database.
3374  *
3375  * Note that this is slightly unrobust in the face of corrupt data;
3376  * we attempt to safeguard against this by spamming the end of the
3377  * database with a newline when we initialise.
3378  */
3379 static int
3380 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3381 {
3382 	char	*cp = *ptr;
3383 	int	left;
3384 
3385 	*device = -1;
3386 	*vendor = -1;
3387 	**desc = '\0';
3388 	for (;;) {
3389 		left = pci_vendordata_size - (cp - pci_vendordata);
3390 		if (left <= 0) {
3391 			*ptr = cp;
3392 			return(1);
3393 		}
3394 
3395 		/* vendor entry? */
3396 		if (*cp != '\t' &&
3397 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3398 			break;
3399 		/* device entry? */
3400 		if (*cp == '\t' &&
3401 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3402 			break;
3403 
3404 		/* skip to next line */
3405 		while (*cp != '\n' && left > 0) {
3406 			cp++;
3407 			left--;
3408 		}
3409 		if (*cp == '\n') {
3410 			cp++;
3411 			left--;
3412 		}
3413 	}
3414 	/* skip to next line */
3415 	while (*cp != '\n' && left > 0) {
3416 		cp++;
3417 		left--;
3418 	}
3419 	if (*cp == '\n' && left > 0)
3420 		cp++;
3421 	*ptr = cp;
3422 	return(0);
3423 }
3424 
3425 static char *
3426 pci_describe_device(device_t dev)
3427 {
3428 	int	vendor, device;
3429 	char	*desc, *vp, *dp, *line;
3430 
3431 	desc = vp = dp = NULL;
3432 
3433 	/*
3434 	 * If we have no vendor data, we can't do anything.
3435 	 */
3436 	if (pci_vendordata == NULL)
3437 		goto out;
3438 
3439 	/*
3440 	 * Scan the vendor data looking for this device
3441 	 */
3442 	line = pci_vendordata;
3443 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3444 		goto out;
3445 	for (;;) {
3446 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3447 			goto out;
3448 		if (vendor == pci_get_vendor(dev))
3449 			break;
3450 	}
3451 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3452 		goto out;
3453 	for (;;) {
3454 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3455 			*dp = 0;
3456 			break;
3457 		}
3458 		if (vendor != -1) {
3459 			*dp = 0;
3460 			break;
3461 		}
3462 		if (device == pci_get_device(dev))
3463 			break;
3464 	}
3465 	if (dp[0] == '\0')
3466 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3467 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3468 	    NULL)
3469 		sprintf(desc, "%s, %s", vp, dp);
3470  out:
3471 	if (vp != NULL)
3472 		free(vp, M_DEVBUF);
3473 	if (dp != NULL)
3474 		free(dp, M_DEVBUF);
3475 	return(desc);
3476 }
3477 
3478 int
3479 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3480 {
3481 	struct pci_devinfo *dinfo;
3482 	pcicfgregs *cfg;
3483 
3484 	dinfo = device_get_ivars(child);
3485 	cfg = &dinfo->cfg;
3486 
3487 	switch (which) {
3488 	case PCI_IVAR_ETHADDR:
3489 		/*
3490 		 * The generic accessor doesn't deal with failure, so
3491 		 * we set the return value, then return an error.
3492 		 */
3493 		*((uint8_t **) result) = NULL;
3494 		return (EINVAL);
3495 	case PCI_IVAR_SUBVENDOR:
3496 		*result = cfg->subvendor;
3497 		break;
3498 	case PCI_IVAR_SUBDEVICE:
3499 		*result = cfg->subdevice;
3500 		break;
3501 	case PCI_IVAR_VENDOR:
3502 		*result = cfg->vendor;
3503 		break;
3504 	case PCI_IVAR_DEVICE:
3505 		*result = cfg->device;
3506 		break;
3507 	case PCI_IVAR_DEVID:
3508 		*result = (cfg->device << 16) | cfg->vendor;
3509 		break;
3510 	case PCI_IVAR_CLASS:
3511 		*result = cfg->baseclass;
3512 		break;
3513 	case PCI_IVAR_SUBCLASS:
3514 		*result = cfg->subclass;
3515 		break;
3516 	case PCI_IVAR_PROGIF:
3517 		*result = cfg->progif;
3518 		break;
3519 	case PCI_IVAR_REVID:
3520 		*result = cfg->revid;
3521 		break;
3522 	case PCI_IVAR_INTPIN:
3523 		*result = cfg->intpin;
3524 		break;
3525 	case PCI_IVAR_IRQ:
3526 		*result = cfg->intline;
3527 		break;
3528 	case PCI_IVAR_DOMAIN:
3529 		*result = cfg->domain;
3530 		break;
3531 	case PCI_IVAR_BUS:
3532 		*result = cfg->bus;
3533 		break;
3534 	case PCI_IVAR_SLOT:
3535 		*result = cfg->slot;
3536 		break;
3537 	case PCI_IVAR_FUNCTION:
3538 		*result = cfg->func;
3539 		break;
3540 	case PCI_IVAR_CMDREG:
3541 		*result = cfg->cmdreg;
3542 		break;
3543 	case PCI_IVAR_CACHELNSZ:
3544 		*result = cfg->cachelnsz;
3545 		break;
3546 	case PCI_IVAR_MINGNT:
3547 		*result = cfg->mingnt;
3548 		break;
3549 	case PCI_IVAR_MAXLAT:
3550 		*result = cfg->maxlat;
3551 		break;
3552 	case PCI_IVAR_LATTIMER:
3553 		*result = cfg->lattimer;
3554 		break;
3555 	default:
3556 		return (ENOENT);
3557 	}
3558 	return (0);
3559 }
3560 
3561 int
3562 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3563 {
3564 	struct pci_devinfo *dinfo;
3565 
3566 	dinfo = device_get_ivars(child);
3567 
3568 	switch (which) {
3569 	case PCI_IVAR_INTPIN:
3570 		dinfo->cfg.intpin = value;
3571 		return (0);
3572 	case PCI_IVAR_ETHADDR:
3573 	case PCI_IVAR_SUBVENDOR:
3574 	case PCI_IVAR_SUBDEVICE:
3575 	case PCI_IVAR_VENDOR:
3576 	case PCI_IVAR_DEVICE:
3577 	case PCI_IVAR_DEVID:
3578 	case PCI_IVAR_CLASS:
3579 	case PCI_IVAR_SUBCLASS:
3580 	case PCI_IVAR_PROGIF:
3581 	case PCI_IVAR_REVID:
3582 	case PCI_IVAR_IRQ:
3583 	case PCI_IVAR_DOMAIN:
3584 	case PCI_IVAR_BUS:
3585 	case PCI_IVAR_SLOT:
3586 	case PCI_IVAR_FUNCTION:
3587 		return (EINVAL);	/* disallow for now */
3588 
3589 	default:
3590 		return (ENOENT);
3591 	}
3592 }
3593 
3594 
3595 #include "opt_ddb.h"
3596 #ifdef DDB
3597 #include <ddb/ddb.h>
3598 #include <sys/cons.h>
3599 
3600 /*
3601  * List resources based on pci map registers, used for within ddb
3602  */
3603 
3604 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3605 {
3606 	struct pci_devinfo *dinfo;
3607 	struct devlist *devlist_head;
3608 	struct pci_conf *p;
3609 	const char *name;
3610 	int i, error, none_count;
3611 
3612 	none_count = 0;
3613 	/* get the head of the device queue */
3614 	devlist_head = &pci_devq;
3615 
3616 	/*
3617 	 * Go through the list of devices and print out devices
3618 	 */
3619 	for (error = 0, i = 0,
3620 	     dinfo = STAILQ_FIRST(devlist_head);
3621 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3622 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3623 
3624 		/* Populate pd_name and pd_unit */
3625 		name = NULL;
3626 		if (dinfo->cfg.dev)
3627 			name = device_get_name(dinfo->cfg.dev);
3628 
3629 		p = &dinfo->conf;
3630 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3631 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3632 			(name && *name) ? name : "none",
3633 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3634 			none_count++,
3635 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3636 			p->pc_sel.pc_func, (p->pc_class << 16) |
3637 			(p->pc_subclass << 8) | p->pc_progif,
3638 			(p->pc_subdevice << 16) | p->pc_subvendor,
3639 			(p->pc_device << 16) | p->pc_vendor,
3640 			p->pc_revid, p->pc_hdr);
3641 	}
3642 }
3643 #endif /* DDB */
3644 
3645 static struct resource *
3646 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
3647     u_long start, u_long end, u_long count, u_int flags)
3648 {
3649 	struct pci_devinfo *dinfo = device_get_ivars(child);
3650 	struct resource_list *rl = &dinfo->resources;
3651 	struct resource_list_entry *rle;
3652 	struct resource *res;
3653 	pci_addr_t map, testval;
3654 	int mapsize;
3655 
3656 	/*
3657 	 * Weed out the bogons, and figure out how large the BAR/map
3658 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3659 	 * Note: atapci in legacy mode are special and handled elsewhere
3660 	 * in the code.  If you have a atapci device in legacy mode and
3661 	 * it fails here, that other code is broken.
3662 	 */
3663 	res = NULL;
3664 	pci_read_bar(child, *rid, &map, &testval);
3665 
3666 	/* Ignore a BAR with a base of 0. */
3667 	if ((*rid == PCIR_BIOS && pci_rombase(testval) == 0) ||
3668 	    pci_mapbase(testval) == 0)
3669 		goto out;
3670 
3671 	if (PCI_BAR_MEM(testval) || *rid == PCIR_BIOS) {
3672 		if (type != SYS_RES_MEMORY) {
3673 			if (bootverbose)
3674 				device_printf(dev,
3675 				    "child %s requested type %d for rid %#x,"
3676 				    " but the BAR says it is an memio\n",
3677 				    device_get_nameunit(child), type, *rid);
3678 			goto out;
3679 		}
3680 	} else {
3681 		if (type != SYS_RES_IOPORT) {
3682 			if (bootverbose)
3683 				device_printf(dev,
3684 				    "child %s requested type %d for rid %#x,"
3685 				    " but the BAR says it is an ioport\n",
3686 				    device_get_nameunit(child), type, *rid);
3687 			goto out;
3688 		}
3689 	}
3690 
3691 	/*
3692 	 * For real BARs, we need to override the size that
3693 	 * the driver requests, because that's what the BAR
3694 	 * actually uses and we would otherwise have a
3695 	 * situation where we might allocate the excess to
3696 	 * another driver, which won't work.
3697 	 *
3698 	 * Device ROM BARs use a different mask value.
3699 	 */
3700 	if (*rid == PCIR_BIOS)
3701 		mapsize = pci_romsize(testval);
3702 	else
3703 		mapsize = pci_mapsize(testval);
3704 	count = 1UL << mapsize;
3705 	if (RF_ALIGNMENT(flags) < mapsize)
3706 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3707 	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3708 		flags |= RF_PREFETCHABLE;
3709 
3710 	/*
3711 	 * Allocate enough resource, and then write back the
3712 	 * appropriate bar for that resource.
3713 	 */
3714 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3715 	    start, end, count, flags & ~RF_ACTIVE);
3716 	if (res == NULL) {
3717 		device_printf(child,
3718 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3719 		    count, *rid, type, start, end);
3720 		goto out;
3721 	}
3722 	resource_list_add(rl, type, *rid, start, end, count);
3723 	rle = resource_list_find(rl, type, *rid);
3724 	if (rle == NULL)
3725 		panic("pci_reserve_map: unexpectedly can't find resource.");
3726 	rle->res = res;
3727 	rle->start = rman_get_start(res);
3728 	rle->end = rman_get_end(res);
3729 	rle->count = count;
3730 	rle->flags = RLE_RESERVED;
3731 	if (bootverbose)
3732 		device_printf(child,
3733 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3734 		    count, *rid, type, rman_get_start(res));
3735 	map = rman_get_start(res);
3736 	pci_write_bar(child, *rid, map);
3737 out:;
3738 	return (res);
3739 }
3740 
3741 
3742 struct resource *
3743 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3744 		   u_long start, u_long end, u_long count, u_int flags)
3745 {
3746 	struct pci_devinfo *dinfo = device_get_ivars(child);
3747 	struct resource_list *rl = &dinfo->resources;
3748 	struct resource_list_entry *rle;
3749 	struct resource *res;
3750 	pcicfgregs *cfg = &dinfo->cfg;
3751 
3752 	if (device_get_parent(child) != dev)
3753 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3754 		    type, rid, start, end, count, flags));
3755 
3756 	/*
3757 	 * Perform lazy resource allocation
3758 	 */
3759 	switch (type) {
3760 	case SYS_RES_IRQ:
3761 		/*
3762 		 * Can't alloc legacy interrupt once MSI messages have
3763 		 * been allocated.
3764 		 */
3765 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3766 		    cfg->msix.msix_alloc > 0))
3767 			return (NULL);
3768 
3769 		/*
3770 		 * If the child device doesn't have an interrupt
3771 		 * routed and is deserving of an interrupt, try to
3772 		 * assign it one.
3773 		 */
3774 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3775 		    (cfg->intpin != 0))
3776 			pci_assign_interrupt(dev, child, 0);
3777 		break;
3778 	case SYS_RES_IOPORT:
3779 	case SYS_RES_MEMORY:
3780 		/* Reserve resources for this BAR if needed. */
3781 		rle = resource_list_find(rl, type, *rid);
3782 		if (rle == NULL) {
3783 			res = pci_reserve_map(dev, child, type, rid, start, end,
3784 			    count, flags);
3785 			if (res == NULL)
3786 				return (NULL);
3787 		}
3788 	}
3789 	return (resource_list_alloc(rl, dev, child, type, rid,
3790 	    start, end, count, flags));
3791 }
3792 
3793 int
3794 pci_activate_resource(device_t dev, device_t child, int type, int rid,
3795     struct resource *r)
3796 {
3797 	int error;
3798 
3799 	error = bus_generic_activate_resource(dev, child, type, rid, r);
3800 	if (error)
3801 		return (error);
3802 
3803 	/* Enable decoding in the command register when activating BARs. */
3804 	if (device_get_parent(child) == dev) {
3805 		/* Device ROMs need their decoding explicitly enabled. */
3806 		if (rid == PCIR_BIOS)
3807 			pci_write_config(child, rid, rman_get_start(r) |
3808 			    PCIM_BIOS_ENABLE, 4);
3809 		switch (type) {
3810 		case SYS_RES_IOPORT:
3811 		case SYS_RES_MEMORY:
3812 			error = PCI_ENABLE_IO(dev, child, type);
3813 			break;
3814 		}
3815 	}
3816 	return (error);
3817 }
3818 
3819 int
3820 pci_deactivate_resource(device_t dev, device_t child, int type,
3821     int rid, struct resource *r)
3822 {
3823 	int error;
3824 
3825 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
3826 	if (error)
3827 		return (error);
3828 
3829 	/* Disable decoding for device ROMs. */
3830 	if (rid == PCIR_BIOS)
3831 		pci_write_config(child, rid, rman_get_start(r), 4);
3832 	return (0);
3833 }
3834 
3835 void
3836 pci_delete_child(device_t dev, device_t child)
3837 {
3838 	struct resource_list_entry *rle;
3839 	struct resource_list *rl;
3840 	struct pci_devinfo *dinfo;
3841 
3842 	dinfo = device_get_ivars(child);
3843 	rl = &dinfo->resources;
3844 
3845 	if (device_is_attached(child))
3846 		device_detach(child);
3847 
3848 	/* Turn off access to resources we're about to free */
3849 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
3850 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
3851 
3852 	/* Free all allocated resources */
3853 	STAILQ_FOREACH(rle, rl, link) {
3854 		if (rle->res) {
3855 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
3856 			    resource_list_busy(rl, rle->type, rle->rid)) {
3857 				pci_printf(&dinfo->cfg,
3858 				    "Resource still owned, oops. "
3859 				    "(type=%d, rid=%d, addr=%lx)\n",
3860 				    rle->type, rle->rid,
3861 				    rman_get_start(rle->res));
3862 				bus_release_resource(child, rle->type, rle->rid,
3863 				    rle->res);
3864 			}
3865 			resource_list_unreserve(rl, dev, child, rle->type,
3866 			    rle->rid);
3867 		}
3868 	}
3869 	resource_list_free(rl);
3870 
3871 	device_delete_child(dev, child);
3872 	pci_freecfg(dinfo);
3873 }
3874 
3875 void
3876 pci_delete_resource(device_t dev, device_t child, int type, int rid)
3877 {
3878 	struct pci_devinfo *dinfo;
3879 	struct resource_list *rl;
3880 	struct resource_list_entry *rle;
3881 
3882 	if (device_get_parent(child) != dev)
3883 		return;
3884 
3885 	dinfo = device_get_ivars(child);
3886 	rl = &dinfo->resources;
3887 	rle = resource_list_find(rl, type, rid);
3888 	if (rle == NULL)
3889 		return;
3890 
3891 	if (rle->res) {
3892 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
3893 		    resource_list_busy(rl, type, rid)) {
3894 			device_printf(dev, "delete_resource: "
3895 			    "Resource still owned by child, oops. "
3896 			    "(type=%d, rid=%d, addr=%lx)\n",
3897 			    type, rid, rman_get_start(rle->res));
3898 			return;
3899 		}
3900 
3901 #ifndef __PCI_BAR_ZERO_VALID
3902 		/*
3903 		 * If this is a BAR, clear the BAR so it stops
3904 		 * decoding before releasing the resource.
3905 		 */
3906 		switch (type) {
3907 		case SYS_RES_IOPORT:
3908 		case SYS_RES_MEMORY:
3909 			pci_write_bar(child, rid, 0);
3910 			break;
3911 		}
3912 #endif
3913 		resource_list_unreserve(rl, dev, child, type, rid);
3914 	}
3915 	resource_list_delete(rl, type, rid);
3916 }
3917 
3918 struct resource_list *
3919 pci_get_resource_list (device_t dev, device_t child)
3920 {
3921 	struct pci_devinfo *dinfo = device_get_ivars(child);
3922 
3923 	return (&dinfo->resources);
3924 }
3925 
3926 uint32_t
3927 pci_read_config_method(device_t dev, device_t child, int reg, int width)
3928 {
3929 	struct pci_devinfo *dinfo = device_get_ivars(child);
3930 	pcicfgregs *cfg = &dinfo->cfg;
3931 
3932 	return (PCIB_READ_CONFIG(device_get_parent(dev),
3933 	    cfg->bus, cfg->slot, cfg->func, reg, width));
3934 }
3935 
3936 void
3937 pci_write_config_method(device_t dev, device_t child, int reg,
3938     uint32_t val, int width)
3939 {
3940 	struct pci_devinfo *dinfo = device_get_ivars(child);
3941 	pcicfgregs *cfg = &dinfo->cfg;
3942 
3943 	PCIB_WRITE_CONFIG(device_get_parent(dev),
3944 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3945 }
3946 
3947 int
3948 pci_child_location_str_method(device_t dev, device_t child, char *buf,
3949     size_t buflen)
3950 {
3951 
3952 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3953 	    pci_get_function(child));
3954 	return (0);
3955 }
3956 
3957 int
3958 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3959     size_t buflen)
3960 {
3961 	struct pci_devinfo *dinfo;
3962 	pcicfgregs *cfg;
3963 
3964 	dinfo = device_get_ivars(child);
3965 	cfg = &dinfo->cfg;
3966 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3967 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3968 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3969 	    cfg->progif);
3970 	return (0);
3971 }
3972 
3973 int
3974 pci_assign_interrupt_method(device_t dev, device_t child)
3975 {
3976 	struct pci_devinfo *dinfo = device_get_ivars(child);
3977 	pcicfgregs *cfg = &dinfo->cfg;
3978 
3979 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3980 	    cfg->intpin));
3981 }
3982 
3983 static int
3984 pci_modevent(module_t mod, int what, void *arg)
3985 {
3986 	static struct cdev *pci_cdev;
3987 
3988 	switch (what) {
3989 	case MOD_LOAD:
3990 		STAILQ_INIT(&pci_devq);
3991 		pci_generation = 0;
3992 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3993 		    "pci");
3994 		pci_load_vendor_data();
3995 		break;
3996 
3997 	case MOD_UNLOAD:
3998 		destroy_dev(pci_cdev);
3999 		break;
4000 	}
4001 
4002 	return (0);
4003 }
4004 
4005 void
4006 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4007 {
4008 	int i;
4009 
4010 	/*
4011 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4012 	 * which we know need special treatment.  Type 2 devices are
4013 	 * cardbus bridges which also require special treatment.
4014 	 * Other types are unknown, and we err on the side of safety
4015 	 * by ignoring them.
4016 	 */
4017 	if (dinfo->cfg.hdrtype != 0)
4018 		return;
4019 
4020 	/*
4021 	 * Restore the device to full power mode.  We must do this
4022 	 * before we restore the registers because moving from D3 to
4023 	 * D0 will cause the chip's BARs and some other registers to
4024 	 * be reset to some unknown power on reset values.  Cut down
4025 	 * the noise on boot by doing nothing if we are already in
4026 	 * state D0.
4027 	 */
4028 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
4029 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4030 	}
4031 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4032 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
4033 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
4034 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4035 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4036 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4037 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4038 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4039 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4040 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4041 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4042 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4043 
4044 	/* Restore MSI and MSI-X configurations if they are present. */
4045 	if (dinfo->cfg.msi.msi_location != 0)
4046 		pci_resume_msi(dev);
4047 	if (dinfo->cfg.msix.msix_location != 0)
4048 		pci_resume_msix(dev);
4049 }
4050 
4051 void
4052 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4053 {
4054 	int i;
4055 	uint32_t cls;
4056 	int ps;
4057 
4058 	/*
4059 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4060 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4061 	 * which also require special treatment.  Other types are unknown, and
4062 	 * we err on the side of safety by ignoring them.  Powering down
4063 	 * bridges should not be undertaken lightly.
4064 	 */
4065 	if (dinfo->cfg.hdrtype != 0)
4066 		return;
4067 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4068 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
4069 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
4070 
4071 	/*
4072 	 * Some drivers apparently write to these registers w/o updating our
4073 	 * cached copy.  No harm happens if we update the copy, so do so here
4074 	 * so we can restore them.  The COMMAND register is modified by the
4075 	 * bus w/o updating the cache.  This should represent the normally
4076 	 * writable portion of the 'defined' part of type 0 headers.  In
4077 	 * theory we also need to save/restore the PCI capability structures
4078 	 * we know about, but apart from power we don't know any that are
4079 	 * writable.
4080 	 */
4081 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4082 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4083 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4084 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4085 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4086 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4087 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4088 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4089 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4090 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4091 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4092 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4093 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4094 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4095 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4096 
4097 	/*
4098 	 * don't set the state for display devices, base peripherals and
4099 	 * memory devices since bad things happen when they are powered down.
4100 	 * We should (a) have drivers that can easily detach and (b) use
4101 	 * generic drivers for these devices so that some device actually
4102 	 * attaches.  We need to make sure that when we implement (a) we don't
4103 	 * power the device down on a reattach.
4104 	 */
4105 	cls = pci_get_class(dev);
4106 	if (!setstate)
4107 		return;
4108 	switch (pci_do_power_nodriver)
4109 	{
4110 		case 0:		/* NO powerdown at all */
4111 			return;
4112 		case 1:		/* Conservative about what to power down */
4113 			if (cls == PCIC_STORAGE)
4114 				return;
4115 			/*FALLTHROUGH*/
4116 		case 2:		/* Agressive about what to power down */
4117 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4118 			    cls == PCIC_BASEPERIPH)
4119 				return;
4120 			/*FALLTHROUGH*/
4121 		case 3:		/* Power down everything */
4122 			break;
4123 	}
4124 	/*
4125 	 * PCI spec says we can only go into D3 state from D0 state.
4126 	 * Transition from D[12] into D0 before going to D3 state.
4127 	 */
4128 	ps = pci_get_powerstate(dev);
4129 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4130 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4131 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4132 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4133 }
4134