xref: /freebsd/sys/dev/pci/pci.c (revision aa64588d28258aef88cc33b8043112e8856948d0)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 
56 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57 #include <machine/intr_machdep.h>
58 #endif
59 
60 #include <sys/pciio.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/pci/pci_private.h>
64 
65 #include <dev/usb/controller/ehcireg.h>
66 #include <dev/usb/controller/ohcireg.h>
67 #include <dev/usb/controller/uhcireg.h>
68 
69 #include "pcib_if.h"
70 #include "pci_if.h"
71 
72 #ifdef __HAVE_ACPI
73 #include <contrib/dev/acpica/include/acpi.h>
74 #include "acpi_if.h"
75 #else
76 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
77 #endif
78 
79 static pci_addr_t	pci_mapbase(uint64_t mapreg);
80 static const char	*pci_maptype(uint64_t mapreg);
81 static int		pci_mapsize(uint64_t testval);
82 static int		pci_maprange(uint64_t mapreg);
83 static pci_addr_t	pci_rombase(uint64_t mapreg);
84 static int		pci_romsize(uint64_t testval);
85 static void		pci_fixancient(pcicfgregs *cfg);
86 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
87 
88 static int		pci_porten(device_t dev);
89 static int		pci_memen(device_t dev);
90 static void		pci_assign_interrupt(device_t bus, device_t dev,
91 			    int force_route);
92 static int		pci_add_map(device_t bus, device_t dev, int reg,
93 			    struct resource_list *rl, int force, int prefetch);
94 static int		pci_probe(device_t dev);
95 static int		pci_attach(device_t dev);
96 static void		pci_load_vendor_data(void);
97 static int		pci_describe_parse_line(char **ptr, int *vendor,
98 			    int *device, char **desc);
99 static char		*pci_describe_device(device_t dev);
100 static int		pci_modevent(module_t mod, int what, void *arg);
101 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
102 			    pcicfgregs *cfg);
103 static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
104 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
105 			    int reg, uint32_t *data);
106 #if 0
107 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
108 			    int reg, uint32_t data);
109 #endif
110 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
111 static void		pci_disable_msi(device_t dev);
112 static void		pci_enable_msi(device_t dev, uint64_t address,
113 			    uint16_t data);
114 static void		pci_enable_msix(device_t dev, u_int index,
115 			    uint64_t address, uint32_t data);
116 static void		pci_mask_msix(device_t dev, u_int index);
117 static void		pci_unmask_msix(device_t dev, u_int index);
118 static int		pci_msi_blacklisted(void);
119 static void		pci_resume_msi(device_t dev);
120 static void		pci_resume_msix(device_t dev);
121 
122 static device_method_t pci_methods[] = {
123 	/* Device interface */
124 	DEVMETHOD(device_probe,		pci_probe),
125 	DEVMETHOD(device_attach,	pci_attach),
126 	DEVMETHOD(device_detach,	bus_generic_detach),
127 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
128 	DEVMETHOD(device_suspend,	pci_suspend),
129 	DEVMETHOD(device_resume,	pci_resume),
130 
131 	/* Bus interface */
132 	DEVMETHOD(bus_print_child,	pci_print_child),
133 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
134 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
135 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
136 	DEVMETHOD(bus_driver_added,	pci_driver_added),
137 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
138 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
139 
140 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
141 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
142 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
143 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
144 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
145 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
146 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
147 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
148 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
149 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
150 
151 	/* PCI interface */
152 	DEVMETHOD(pci_read_config,	pci_read_config_method),
153 	DEVMETHOD(pci_write_config,	pci_write_config_method),
154 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
155 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
156 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
157 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
158 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
159 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
160 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
161 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
162 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
163 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
164 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
165 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
166 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
167 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
168 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
169 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
170 
171 	{ 0, 0 }
172 };
173 
174 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
175 
176 static devclass_t pci_devclass;
177 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
178 MODULE_VERSION(pci, 1);
179 
180 static char	*pci_vendordata;
181 static size_t	pci_vendordata_size;
182 
183 
184 struct pci_quirk {
185 	uint32_t devid;	/* Vendor/device of the card */
186 	int	type;
187 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
188 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
189 	int	arg1;
190 	int	arg2;
191 };
192 
193 struct pci_quirk pci_quirks[] = {
194 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
195 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
196 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
197 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
198 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
199 
200 	/*
201 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
202 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
203 	 */
204 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
205 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
206 
207 	/*
208 	 * MSI doesn't work on earlier Intel chipsets including
209 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
210 	 */
211 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
212 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
213 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
214 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
215 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
216 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
218 
219 	/*
220 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
221 	 * bridge.
222 	 */
223 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
224 
225 	{ 0 }
226 };
227 
228 /* map register information */
229 #define	PCI_MAPMEM	0x01	/* memory map */
230 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
231 #define	PCI_MAPPORT	0x04	/* port map */
232 
233 struct devlist pci_devq;
234 uint32_t pci_generation;
235 uint32_t pci_numdevs = 0;
236 static int pcie_chipset, pcix_chipset;
237 
238 /* sysctl vars */
239 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
240 
241 static int pci_enable_io_modes = 1;
242 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
243 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
244     &pci_enable_io_modes, 1,
245     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
246 enable these bits correctly.  We'd like to do this all the time, but there\n\
247 are some peripherals that this causes problems with.");
248 
249 static int pci_do_power_nodriver = 0;
250 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
251 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
252     &pci_do_power_nodriver, 0,
253   "Place a function into D3 state when no driver attaches to it.  0 means\n\
254 disable.  1 means conservatively place devices into D3 state.  2 means\n\
255 agressively place devices into D3 state.  3 means put absolutely everything\n\
256 in D3 state.");
257 
258 int pci_do_power_resume = 1;
259 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
260 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
261     &pci_do_power_resume, 1,
262   "Transition from D3 -> D0 on resume.");
263 
264 static int pci_do_msi = 1;
265 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
266 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
267     "Enable support for MSI interrupts");
268 
269 static int pci_do_msix = 1;
270 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
271 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
272     "Enable support for MSI-X interrupts");
273 
274 static int pci_honor_msi_blacklist = 1;
275 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
276 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
277     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
278 
279 #if defined(__i386__) || defined(__amd64__)
280 static int pci_usb_takeover = 1;
281 #else
282 static int pci_usb_takeover = 0;
283 #endif
284 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
285 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RD | CTLFLAG_TUN,
286     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
287 Disable this if you depend on BIOS emulation of USB devices, that is\n\
288 you use USB devices (like keyboard or mouse) but do not load USB drivers");
289 
290 /* Find a device_t by bus/slot/function in domain 0 */
291 
292 device_t
293 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
294 {
295 
296 	return (pci_find_dbsf(0, bus, slot, func));
297 }
298 
299 /* Find a device_t by domain/bus/slot/function */
300 
301 device_t
302 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
303 {
304 	struct pci_devinfo *dinfo;
305 
306 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
307 		if ((dinfo->cfg.domain == domain) &&
308 		    (dinfo->cfg.bus == bus) &&
309 		    (dinfo->cfg.slot == slot) &&
310 		    (dinfo->cfg.func == func)) {
311 			return (dinfo->cfg.dev);
312 		}
313 	}
314 
315 	return (NULL);
316 }
317 
318 /* Find a device_t by vendor/device ID */
319 
320 device_t
321 pci_find_device(uint16_t vendor, uint16_t device)
322 {
323 	struct pci_devinfo *dinfo;
324 
325 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
326 		if ((dinfo->cfg.vendor == vendor) &&
327 		    (dinfo->cfg.device == device)) {
328 			return (dinfo->cfg.dev);
329 		}
330 	}
331 
332 	return (NULL);
333 }
334 
335 static int
336 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
337 {
338 	va_list ap;
339 	int retval;
340 
341 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
342 	    cfg->func);
343 	va_start(ap, fmt);
344 	retval += vprintf(fmt, ap);
345 	va_end(ap);
346 	return (retval);
347 }
348 
349 /* return base address of memory or port map */
350 
351 static pci_addr_t
352 pci_mapbase(uint64_t mapreg)
353 {
354 
355 	if (PCI_BAR_MEM(mapreg))
356 		return (mapreg & PCIM_BAR_MEM_BASE);
357 	else
358 		return (mapreg & PCIM_BAR_IO_BASE);
359 }
360 
361 /* return map type of memory or port map */
362 
363 static const char *
364 pci_maptype(uint64_t mapreg)
365 {
366 
367 	if (PCI_BAR_IO(mapreg))
368 		return ("I/O Port");
369 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
370 		return ("Prefetchable Memory");
371 	return ("Memory");
372 }
373 
374 /* return log2 of map size decoded for memory or port map */
375 
376 static int
377 pci_mapsize(uint64_t testval)
378 {
379 	int ln2size;
380 
381 	testval = pci_mapbase(testval);
382 	ln2size = 0;
383 	if (testval != 0) {
384 		while ((testval & 1) == 0)
385 		{
386 			ln2size++;
387 			testval >>= 1;
388 		}
389 	}
390 	return (ln2size);
391 }
392 
393 /* return base address of device ROM */
394 
395 static pci_addr_t
396 pci_rombase(uint64_t mapreg)
397 {
398 
399 	return (mapreg & PCIM_BIOS_ADDR_MASK);
400 }
401 
402 /* return log2 of map size decided for device ROM */
403 
404 static int
405 pci_romsize(uint64_t testval)
406 {
407 	int ln2size;
408 
409 	testval = pci_rombase(testval);
410 	ln2size = 0;
411 	if (testval != 0) {
412 		while ((testval & 1) == 0)
413 		{
414 			ln2size++;
415 			testval >>= 1;
416 		}
417 	}
418 	return (ln2size);
419 }
420 
421 /* return log2 of address range supported by map register */
422 
423 static int
424 pci_maprange(uint64_t mapreg)
425 {
426 	int ln2range = 0;
427 
428 	if (PCI_BAR_IO(mapreg))
429 		ln2range = 32;
430 	else
431 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
432 		case PCIM_BAR_MEM_32:
433 			ln2range = 32;
434 			break;
435 		case PCIM_BAR_MEM_1MB:
436 			ln2range = 20;
437 			break;
438 		case PCIM_BAR_MEM_64:
439 			ln2range = 64;
440 			break;
441 		}
442 	return (ln2range);
443 }
444 
445 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
446 
447 static void
448 pci_fixancient(pcicfgregs *cfg)
449 {
450 	if (cfg->hdrtype != 0)
451 		return;
452 
453 	/* PCI to PCI bridges use header type 1 */
454 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
455 		cfg->hdrtype = 1;
456 }
457 
458 /* extract header type specific config data */
459 
460 static void
461 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
462 {
463 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
464 	switch (cfg->hdrtype) {
465 	case 0:
466 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
467 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
468 		cfg->nummaps	    = PCI_MAXMAPS_0;
469 		break;
470 	case 1:
471 		cfg->nummaps	    = PCI_MAXMAPS_1;
472 		break;
473 	case 2:
474 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
475 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
476 		cfg->nummaps	    = PCI_MAXMAPS_2;
477 		break;
478 	}
479 #undef REG
480 }
481 
482 /* read configuration header into pcicfgregs structure */
483 struct pci_devinfo *
484 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
485 {
486 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
487 	pcicfgregs *cfg = NULL;
488 	struct pci_devinfo *devlist_entry;
489 	struct devlist *devlist_head;
490 
491 	devlist_head = &pci_devq;
492 
493 	devlist_entry = NULL;
494 
495 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
496 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
497 		if (devlist_entry == NULL)
498 			return (NULL);
499 
500 		cfg = &devlist_entry->cfg;
501 
502 		cfg->domain		= d;
503 		cfg->bus		= b;
504 		cfg->slot		= s;
505 		cfg->func		= f;
506 		cfg->vendor		= REG(PCIR_VENDOR, 2);
507 		cfg->device		= REG(PCIR_DEVICE, 2);
508 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
509 		cfg->statreg		= REG(PCIR_STATUS, 2);
510 		cfg->baseclass		= REG(PCIR_CLASS, 1);
511 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
512 		cfg->progif		= REG(PCIR_PROGIF, 1);
513 		cfg->revid		= REG(PCIR_REVID, 1);
514 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
515 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
516 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
517 		cfg->intpin		= REG(PCIR_INTPIN, 1);
518 		cfg->intline		= REG(PCIR_INTLINE, 1);
519 
520 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
521 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
522 
523 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
524 		cfg->hdrtype		&= ~PCIM_MFDEV;
525 
526 		pci_fixancient(cfg);
527 		pci_hdrtypedata(pcib, b, s, f, cfg);
528 
529 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
530 			pci_read_extcap(pcib, cfg);
531 
532 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
533 
534 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
535 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
536 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
537 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
538 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
539 
540 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
541 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
542 		devlist_entry->conf.pc_vendor = cfg->vendor;
543 		devlist_entry->conf.pc_device = cfg->device;
544 
545 		devlist_entry->conf.pc_class = cfg->baseclass;
546 		devlist_entry->conf.pc_subclass = cfg->subclass;
547 		devlist_entry->conf.pc_progif = cfg->progif;
548 		devlist_entry->conf.pc_revid = cfg->revid;
549 
550 		pci_numdevs++;
551 		pci_generation++;
552 	}
553 	return (devlist_entry);
554 #undef REG
555 }
556 
557 static void
558 pci_read_extcap(device_t pcib, pcicfgregs *cfg)
559 {
560 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
561 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
562 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
563 	uint64_t addr;
564 #endif
565 	uint32_t val;
566 	int	ptr, nextptr, ptrptr;
567 
568 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
569 	case 0:
570 	case 1:
571 		ptrptr = PCIR_CAP_PTR;
572 		break;
573 	case 2:
574 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
575 		break;
576 	default:
577 		return;		/* no extended capabilities support */
578 	}
579 	nextptr = REG(ptrptr, 1);	/* sanity check? */
580 
581 	/*
582 	 * Read capability entries.
583 	 */
584 	while (nextptr != 0) {
585 		/* Sanity check */
586 		if (nextptr > 255) {
587 			printf("illegal PCI extended capability offset %d\n",
588 			    nextptr);
589 			return;
590 		}
591 		/* Find the next entry */
592 		ptr = nextptr;
593 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
594 
595 		/* Process this entry */
596 		switch (REG(ptr + PCICAP_ID, 1)) {
597 		case PCIY_PMG:		/* PCI power management */
598 			if (cfg->pp.pp_cap == 0) {
599 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
600 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
601 				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
602 				if ((nextptr - ptr) > PCIR_POWER_DATA)
603 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
604 			}
605 			break;
606 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
607 		case PCIY_HT:		/* HyperTransport */
608 			/* Determine HT-specific capability type. */
609 			val = REG(ptr + PCIR_HT_COMMAND, 2);
610 			switch (val & PCIM_HTCMD_CAP_MASK) {
611 			case PCIM_HTCAP_MSI_MAPPING:
612 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
613 					/* Sanity check the mapping window. */
614 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
615 					    4);
616 					addr <<= 32;
617 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
618 					    4);
619 					if (addr != MSI_INTEL_ADDR_BASE)
620 						device_printf(pcib,
621 	    "HT Bridge at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
622 						    cfg->domain, cfg->bus,
623 						    cfg->slot, cfg->func,
624 						    (long long)addr);
625 				} else
626 					addr = MSI_INTEL_ADDR_BASE;
627 
628 				cfg->ht.ht_msimap = ptr;
629 				cfg->ht.ht_msictrl = val;
630 				cfg->ht.ht_msiaddr = addr;
631 				break;
632 			}
633 			break;
634 #endif
635 		case PCIY_MSI:		/* PCI MSI */
636 			cfg->msi.msi_location = ptr;
637 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
638 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
639 						     PCIM_MSICTRL_MMC_MASK)>>1);
640 			break;
641 		case PCIY_MSIX:		/* PCI MSI-X */
642 			cfg->msix.msix_location = ptr;
643 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
644 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
645 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
646 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
647 			cfg->msix.msix_table_bar = PCIR_BAR(val &
648 			    PCIM_MSIX_BIR_MASK);
649 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
650 			val = REG(ptr + PCIR_MSIX_PBA, 4);
651 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
652 			    PCIM_MSIX_BIR_MASK);
653 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
654 			break;
655 		case PCIY_VPD:		/* PCI Vital Product Data */
656 			cfg->vpd.vpd_reg = ptr;
657 			break;
658 		case PCIY_SUBVENDOR:
659 			/* Should always be true. */
660 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
661 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
662 				cfg->subvendor = val & 0xffff;
663 				cfg->subdevice = val >> 16;
664 			}
665 			break;
666 		case PCIY_PCIX:		/* PCI-X */
667 			/*
668 			 * Assume we have a PCI-X chipset if we have
669 			 * at least one PCI-PCI bridge with a PCI-X
670 			 * capability.  Note that some systems with
671 			 * PCI-express or HT chipsets might match on
672 			 * this check as well.
673 			 */
674 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
675 				pcix_chipset = 1;
676 			break;
677 		case PCIY_EXPRESS:	/* PCI-express */
678 			/*
679 			 * Assume we have a PCI-express chipset if we have
680 			 * at least one PCI-express device.
681 			 */
682 			pcie_chipset = 1;
683 			break;
684 		default:
685 			break;
686 		}
687 	}
688 /* REG and WREG use carry through to next functions */
689 }
690 
691 /*
692  * PCI Vital Product Data
693  */
694 
695 #define	PCI_VPD_TIMEOUT		1000000
696 
697 static int
698 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
699 {
700 	int count = PCI_VPD_TIMEOUT;
701 
702 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
703 
704 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
705 
706 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
707 		if (--count < 0)
708 			return (ENXIO);
709 		DELAY(1);	/* limit looping */
710 	}
711 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
712 
713 	return (0);
714 }
715 
716 #if 0
717 static int
718 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
719 {
720 	int count = PCI_VPD_TIMEOUT;
721 
722 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
723 
724 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
725 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
726 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
727 		if (--count < 0)
728 			return (ENXIO);
729 		DELAY(1);	/* limit looping */
730 	}
731 
732 	return (0);
733 }
734 #endif
735 
736 #undef PCI_VPD_TIMEOUT
737 
738 struct vpd_readstate {
739 	device_t	pcib;
740 	pcicfgregs	*cfg;
741 	uint32_t	val;
742 	int		bytesinval;
743 	int		off;
744 	uint8_t		cksum;
745 };
746 
747 static int
748 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
749 {
750 	uint32_t reg;
751 	uint8_t byte;
752 
753 	if (vrs->bytesinval == 0) {
754 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
755 			return (ENXIO);
756 		vrs->val = le32toh(reg);
757 		vrs->off += 4;
758 		byte = vrs->val & 0xff;
759 		vrs->bytesinval = 3;
760 	} else {
761 		vrs->val = vrs->val >> 8;
762 		byte = vrs->val & 0xff;
763 		vrs->bytesinval--;
764 	}
765 
766 	vrs->cksum += byte;
767 	*data = byte;
768 	return (0);
769 }
770 
771 static void
772 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
773 {
774 	struct vpd_readstate vrs;
775 	int state;
776 	int name;
777 	int remain;
778 	int i;
779 	int alloc, off;		/* alloc/off for RO/W arrays */
780 	int cksumvalid;
781 	int dflen;
782 	uint8_t byte;
783 	uint8_t byte2;
784 
785 	/* init vpd reader */
786 	vrs.bytesinval = 0;
787 	vrs.off = 0;
788 	vrs.pcib = pcib;
789 	vrs.cfg = cfg;
790 	vrs.cksum = 0;
791 
792 	state = 0;
793 	name = remain = i = 0;	/* shut up stupid gcc */
794 	alloc = off = 0;	/* shut up stupid gcc */
795 	dflen = 0;		/* shut up stupid gcc */
796 	cksumvalid = -1;
797 	while (state >= 0) {
798 		if (vpd_nextbyte(&vrs, &byte)) {
799 			state = -2;
800 			break;
801 		}
802 #if 0
803 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
804 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
805 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
806 #endif
807 		switch (state) {
808 		case 0:		/* item name */
809 			if (byte & 0x80) {
810 				if (vpd_nextbyte(&vrs, &byte2)) {
811 					state = -2;
812 					break;
813 				}
814 				remain = byte2;
815 				if (vpd_nextbyte(&vrs, &byte2)) {
816 					state = -2;
817 					break;
818 				}
819 				remain |= byte2 << 8;
820 				if (remain > (0x7f*4 - vrs.off)) {
821 					state = -1;
822 					printf(
823 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
824 					    cfg->domain, cfg->bus, cfg->slot,
825 					    cfg->func, remain);
826 				}
827 				name = byte & 0x7f;
828 			} else {
829 				remain = byte & 0x7;
830 				name = (byte >> 3) & 0xf;
831 			}
832 			switch (name) {
833 			case 0x2:	/* String */
834 				cfg->vpd.vpd_ident = malloc(remain + 1,
835 				    M_DEVBUF, M_WAITOK);
836 				i = 0;
837 				state = 1;
838 				break;
839 			case 0xf:	/* End */
840 				state = -1;
841 				break;
842 			case 0x10:	/* VPD-R */
843 				alloc = 8;
844 				off = 0;
845 				cfg->vpd.vpd_ros = malloc(alloc *
846 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
847 				    M_WAITOK | M_ZERO);
848 				state = 2;
849 				break;
850 			case 0x11:	/* VPD-W */
851 				alloc = 8;
852 				off = 0;
853 				cfg->vpd.vpd_w = malloc(alloc *
854 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
855 				    M_WAITOK | M_ZERO);
856 				state = 5;
857 				break;
858 			default:	/* Invalid data, abort */
859 				state = -1;
860 				break;
861 			}
862 			break;
863 
864 		case 1:	/* Identifier String */
865 			cfg->vpd.vpd_ident[i++] = byte;
866 			remain--;
867 			if (remain == 0)  {
868 				cfg->vpd.vpd_ident[i] = '\0';
869 				state = 0;
870 			}
871 			break;
872 
873 		case 2:	/* VPD-R Keyword Header */
874 			if (off == alloc) {
875 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
876 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
877 				    M_DEVBUF, M_WAITOK | M_ZERO);
878 			}
879 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
880 			if (vpd_nextbyte(&vrs, &byte2)) {
881 				state = -2;
882 				break;
883 			}
884 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
885 			if (vpd_nextbyte(&vrs, &byte2)) {
886 				state = -2;
887 				break;
888 			}
889 			dflen = byte2;
890 			if (dflen == 0 &&
891 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
892 			    2) == 0) {
893 				/*
894 				 * if this happens, we can't trust the rest
895 				 * of the VPD.
896 				 */
897 				printf(
898 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
899 				    cfg->domain, cfg->bus, cfg->slot,
900 				    cfg->func, dflen);
901 				cksumvalid = 0;
902 				state = -1;
903 				break;
904 			} else if (dflen == 0) {
905 				cfg->vpd.vpd_ros[off].value = malloc(1 *
906 				    sizeof(*cfg->vpd.vpd_ros[off].value),
907 				    M_DEVBUF, M_WAITOK);
908 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
909 			} else
910 				cfg->vpd.vpd_ros[off].value = malloc(
911 				    (dflen + 1) *
912 				    sizeof(*cfg->vpd.vpd_ros[off].value),
913 				    M_DEVBUF, M_WAITOK);
914 			remain -= 3;
915 			i = 0;
916 			/* keep in sync w/ state 3's transistions */
917 			if (dflen == 0 && remain == 0)
918 				state = 0;
919 			else if (dflen == 0)
920 				state = 2;
921 			else
922 				state = 3;
923 			break;
924 
925 		case 3:	/* VPD-R Keyword Value */
926 			cfg->vpd.vpd_ros[off].value[i++] = byte;
927 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
928 			    "RV", 2) == 0 && cksumvalid == -1) {
929 				if (vrs.cksum == 0)
930 					cksumvalid = 1;
931 				else {
932 					if (bootverbose)
933 						printf(
934 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
935 						    cfg->domain, cfg->bus,
936 						    cfg->slot, cfg->func,
937 						    vrs.cksum);
938 					cksumvalid = 0;
939 					state = -1;
940 					break;
941 				}
942 			}
943 			dflen--;
944 			remain--;
945 			/* keep in sync w/ state 2's transistions */
946 			if (dflen == 0)
947 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
948 			if (dflen == 0 && remain == 0) {
949 				cfg->vpd.vpd_rocnt = off;
950 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
951 				    off * sizeof(*cfg->vpd.vpd_ros),
952 				    M_DEVBUF, M_WAITOK | M_ZERO);
953 				state = 0;
954 			} else if (dflen == 0)
955 				state = 2;
956 			break;
957 
958 		case 4:
959 			remain--;
960 			if (remain == 0)
961 				state = 0;
962 			break;
963 
964 		case 5:	/* VPD-W Keyword Header */
965 			if (off == alloc) {
966 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
967 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
968 				    M_DEVBUF, M_WAITOK | M_ZERO);
969 			}
970 			cfg->vpd.vpd_w[off].keyword[0] = byte;
971 			if (vpd_nextbyte(&vrs, &byte2)) {
972 				state = -2;
973 				break;
974 			}
975 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
976 			if (vpd_nextbyte(&vrs, &byte2)) {
977 				state = -2;
978 				break;
979 			}
980 			cfg->vpd.vpd_w[off].len = dflen = byte2;
981 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
982 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
983 			    sizeof(*cfg->vpd.vpd_w[off].value),
984 			    M_DEVBUF, M_WAITOK);
985 			remain -= 3;
986 			i = 0;
987 			/* keep in sync w/ state 6's transistions */
988 			if (dflen == 0 && remain == 0)
989 				state = 0;
990 			else if (dflen == 0)
991 				state = 5;
992 			else
993 				state = 6;
994 			break;
995 
996 		case 6:	/* VPD-W Keyword Value */
997 			cfg->vpd.vpd_w[off].value[i++] = byte;
998 			dflen--;
999 			remain--;
1000 			/* keep in sync w/ state 5's transistions */
1001 			if (dflen == 0)
1002 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1003 			if (dflen == 0 && remain == 0) {
1004 				cfg->vpd.vpd_wcnt = off;
1005 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1006 				    off * sizeof(*cfg->vpd.vpd_w),
1007 				    M_DEVBUF, M_WAITOK | M_ZERO);
1008 				state = 0;
1009 			} else if (dflen == 0)
1010 				state = 5;
1011 			break;
1012 
1013 		default:
1014 			printf("pci%d:%d:%d:%d: invalid state: %d\n",
1015 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1016 			    state);
1017 			state = -1;
1018 			break;
1019 		}
1020 	}
1021 
1022 	if (cksumvalid == 0 || state < -1) {
1023 		/* read-only data bad, clean up */
1024 		if (cfg->vpd.vpd_ros != NULL) {
1025 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1026 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1027 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1028 			cfg->vpd.vpd_ros = NULL;
1029 		}
1030 	}
1031 	if (state < -1) {
1032 		/* I/O error, clean up */
1033 		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1034 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1035 		if (cfg->vpd.vpd_ident != NULL) {
1036 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1037 			cfg->vpd.vpd_ident = NULL;
1038 		}
1039 		if (cfg->vpd.vpd_w != NULL) {
1040 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1041 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1042 			free(cfg->vpd.vpd_w, M_DEVBUF);
1043 			cfg->vpd.vpd_w = NULL;
1044 		}
1045 	}
1046 	cfg->vpd.vpd_cached = 1;
1047 #undef REG
1048 #undef WREG
1049 }
1050 
1051 int
1052 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1053 {
1054 	struct pci_devinfo *dinfo = device_get_ivars(child);
1055 	pcicfgregs *cfg = &dinfo->cfg;
1056 
1057 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1058 		pci_read_vpd(device_get_parent(dev), cfg);
1059 
1060 	*identptr = cfg->vpd.vpd_ident;
1061 
1062 	if (*identptr == NULL)
1063 		return (ENXIO);
1064 
1065 	return (0);
1066 }
1067 
1068 int
1069 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1070 	const char **vptr)
1071 {
1072 	struct pci_devinfo *dinfo = device_get_ivars(child);
1073 	pcicfgregs *cfg = &dinfo->cfg;
1074 	int i;
1075 
1076 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1077 		pci_read_vpd(device_get_parent(dev), cfg);
1078 
1079 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1080 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1081 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1082 			*vptr = cfg->vpd.vpd_ros[i].value;
1083 		}
1084 
1085 	if (i != cfg->vpd.vpd_rocnt)
1086 		return (0);
1087 
1088 	*vptr = NULL;
1089 	return (ENXIO);
1090 }
1091 
1092 /*
1093  * Find the requested extended capability and return the offset in
1094  * configuration space via the pointer provided. The function returns
1095  * 0 on success and error code otherwise.
1096  */
1097 int
1098 pci_find_extcap_method(device_t dev, device_t child, int capability,
1099     int *capreg)
1100 {
1101 	struct pci_devinfo *dinfo = device_get_ivars(child);
1102 	pcicfgregs *cfg = &dinfo->cfg;
1103 	u_int32_t status;
1104 	u_int8_t ptr;
1105 
1106 	/*
1107 	 * Check the CAP_LIST bit of the PCI status register first.
1108 	 */
1109 	status = pci_read_config(child, PCIR_STATUS, 2);
1110 	if (!(status & PCIM_STATUS_CAPPRESENT))
1111 		return (ENXIO);
1112 
1113 	/*
1114 	 * Determine the start pointer of the capabilities list.
1115 	 */
1116 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1117 	case 0:
1118 	case 1:
1119 		ptr = PCIR_CAP_PTR;
1120 		break;
1121 	case 2:
1122 		ptr = PCIR_CAP_PTR_2;
1123 		break;
1124 	default:
1125 		/* XXX: panic? */
1126 		return (ENXIO);		/* no extended capabilities support */
1127 	}
1128 	ptr = pci_read_config(child, ptr, 1);
1129 
1130 	/*
1131 	 * Traverse the capabilities list.
1132 	 */
1133 	while (ptr != 0) {
1134 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1135 			if (capreg != NULL)
1136 				*capreg = ptr;
1137 			return (0);
1138 		}
1139 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1140 	}
1141 
1142 	return (ENOENT);
1143 }
1144 
1145 /*
1146  * Support for MSI-X message interrupts.
1147  */
1148 void
1149 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1150 {
1151 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1152 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1153 	uint32_t offset;
1154 
1155 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1156 	offset = msix->msix_table_offset + index * 16;
1157 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1158 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1159 	bus_write_4(msix->msix_table_res, offset + 8, data);
1160 
1161 	/* Enable MSI -> HT mapping. */
1162 	pci_ht_map_msi(dev, address);
1163 }
1164 
1165 void
1166 pci_mask_msix(device_t dev, u_int index)
1167 {
1168 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1169 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1170 	uint32_t offset, val;
1171 
1172 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1173 	offset = msix->msix_table_offset + index * 16 + 12;
1174 	val = bus_read_4(msix->msix_table_res, offset);
1175 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1176 		val |= PCIM_MSIX_VCTRL_MASK;
1177 		bus_write_4(msix->msix_table_res, offset, val);
1178 	}
1179 }
1180 
1181 void
1182 pci_unmask_msix(device_t dev, u_int index)
1183 {
1184 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1185 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1186 	uint32_t offset, val;
1187 
1188 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1189 	offset = msix->msix_table_offset + index * 16 + 12;
1190 	val = bus_read_4(msix->msix_table_res, offset);
1191 	if (val & PCIM_MSIX_VCTRL_MASK) {
1192 		val &= ~PCIM_MSIX_VCTRL_MASK;
1193 		bus_write_4(msix->msix_table_res, offset, val);
1194 	}
1195 }
1196 
1197 int
1198 pci_pending_msix(device_t dev, u_int index)
1199 {
1200 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1201 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1202 	uint32_t offset, bit;
1203 
1204 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1205 	offset = msix->msix_pba_offset + (index / 32) * 4;
1206 	bit = 1 << index % 32;
1207 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1208 }
1209 
1210 /*
1211  * Restore MSI-X registers and table during resume.  If MSI-X is
1212  * enabled then walk the virtual table to restore the actual MSI-X
1213  * table.
1214  */
1215 static void
1216 pci_resume_msix(device_t dev)
1217 {
1218 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1219 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1220 	struct msix_table_entry *mte;
1221 	struct msix_vector *mv;
1222 	int i;
1223 
1224 	if (msix->msix_alloc > 0) {
1225 		/* First, mask all vectors. */
1226 		for (i = 0; i < msix->msix_msgnum; i++)
1227 			pci_mask_msix(dev, i);
1228 
1229 		/* Second, program any messages with at least one handler. */
1230 		for (i = 0; i < msix->msix_table_len; i++) {
1231 			mte = &msix->msix_table[i];
1232 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1233 				continue;
1234 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1235 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1236 			pci_unmask_msix(dev, i);
1237 		}
1238 	}
1239 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1240 	    msix->msix_ctrl, 2);
1241 }
1242 
1243 /*
1244  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1245  * returned in *count.  After this function returns, each message will be
1246  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1247  */
1248 int
1249 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1250 {
1251 	struct pci_devinfo *dinfo = device_get_ivars(child);
1252 	pcicfgregs *cfg = &dinfo->cfg;
1253 	struct resource_list_entry *rle;
1254 	int actual, error, i, irq, max;
1255 
1256 	/* Don't let count == 0 get us into trouble. */
1257 	if (*count == 0)
1258 		return (EINVAL);
1259 
1260 	/* If rid 0 is allocated, then fail. */
1261 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1262 	if (rle != NULL && rle->res != NULL)
1263 		return (ENXIO);
1264 
1265 	/* Already have allocated messages? */
1266 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1267 		return (ENXIO);
1268 
1269 	/* If MSI is blacklisted for this system, fail. */
1270 	if (pci_msi_blacklisted())
1271 		return (ENXIO);
1272 
1273 	/* MSI-X capability present? */
1274 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1275 		return (ENODEV);
1276 
1277 	/* Make sure the appropriate BARs are mapped. */
1278 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1279 	    cfg->msix.msix_table_bar);
1280 	if (rle == NULL || rle->res == NULL ||
1281 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1282 		return (ENXIO);
1283 	cfg->msix.msix_table_res = rle->res;
1284 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1285 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1286 		    cfg->msix.msix_pba_bar);
1287 		if (rle == NULL || rle->res == NULL ||
1288 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1289 			return (ENXIO);
1290 	}
1291 	cfg->msix.msix_pba_res = rle->res;
1292 
1293 	if (bootverbose)
1294 		device_printf(child,
1295 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1296 		    *count, cfg->msix.msix_msgnum);
1297 	max = min(*count, cfg->msix.msix_msgnum);
1298 	for (i = 0; i < max; i++) {
1299 		/* Allocate a message. */
1300 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1301 		if (error)
1302 			break;
1303 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1304 		    irq, 1);
1305 	}
1306 	actual = i;
1307 
1308 	if (bootverbose) {
1309 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1310 		if (actual == 1)
1311 			device_printf(child, "using IRQ %lu for MSI-X\n",
1312 			    rle->start);
1313 		else {
1314 			int run;
1315 
1316 			/*
1317 			 * Be fancy and try to print contiguous runs of
1318 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1319 			 * 'run' is true if we are in a range.
1320 			 */
1321 			device_printf(child, "using IRQs %lu", rle->start);
1322 			irq = rle->start;
1323 			run = 0;
1324 			for (i = 1; i < actual; i++) {
1325 				rle = resource_list_find(&dinfo->resources,
1326 				    SYS_RES_IRQ, i + 1);
1327 
1328 				/* Still in a run? */
1329 				if (rle->start == irq + 1) {
1330 					run = 1;
1331 					irq++;
1332 					continue;
1333 				}
1334 
1335 				/* Finish previous range. */
1336 				if (run) {
1337 					printf("-%d", irq);
1338 					run = 0;
1339 				}
1340 
1341 				/* Start new range. */
1342 				printf(",%lu", rle->start);
1343 				irq = rle->start;
1344 			}
1345 
1346 			/* Unfinished range? */
1347 			if (run)
1348 				printf("-%d", irq);
1349 			printf(" for MSI-X\n");
1350 		}
1351 	}
1352 
1353 	/* Mask all vectors. */
1354 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1355 		pci_mask_msix(child, i);
1356 
1357 	/* Allocate and initialize vector data and virtual table. */
1358 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1359 	    M_DEVBUF, M_WAITOK | M_ZERO);
1360 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1361 	    M_DEVBUF, M_WAITOK | M_ZERO);
1362 	for (i = 0; i < actual; i++) {
1363 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1364 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1365 		cfg->msix.msix_table[i].mte_vector = i + 1;
1366 	}
1367 
1368 	/* Update control register to enable MSI-X. */
1369 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1370 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1371 	    cfg->msix.msix_ctrl, 2);
1372 
1373 	/* Update counts of alloc'd messages. */
1374 	cfg->msix.msix_alloc = actual;
1375 	cfg->msix.msix_table_len = actual;
1376 	*count = actual;
1377 	return (0);
1378 }
1379 
1380 /*
1381  * By default, pci_alloc_msix() will assign the allocated IRQ
1382  * resources consecutively to the first N messages in the MSI-X table.
1383  * However, device drivers may want to use different layouts if they
1384  * either receive fewer messages than they asked for, or they wish to
1385  * populate the MSI-X table sparsely.  This method allows the driver
1386  * to specify what layout it wants.  It must be called after a
1387  * successful pci_alloc_msix() but before any of the associated
1388  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1389  *
1390  * The 'vectors' array contains 'count' message vectors.  The array
1391  * maps directly to the MSI-X table in that index 0 in the array
1392  * specifies the vector for the first message in the MSI-X table, etc.
1393  * The vector value in each array index can either be 0 to indicate
1394  * that no vector should be assigned to a message slot, or it can be a
1395  * number from 1 to N (where N is the count returned from a
1396  * succcessful call to pci_alloc_msix()) to indicate which message
1397  * vector (IRQ) to be used for the corresponding message.
1398  *
1399  * On successful return, each message with a non-zero vector will have
1400  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1401  * 1.  Additionally, if any of the IRQs allocated via the previous
1402  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1403  * will be freed back to the system automatically.
1404  *
1405  * For example, suppose a driver has a MSI-X table with 6 messages and
1406  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1407  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1408  * C.  After the call to pci_alloc_msix(), the device will be setup to
1409  * have an MSI-X table of ABC--- (where - means no vector assigned).
1410  * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1411  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1412  * be freed back to the system.  This device will also have valid
1413  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1414  *
1415  * In any case, the SYS_RES_IRQ rid X will always map to the message
1416  * at MSI-X table index X - 1 and will only be valid if a vector is
1417  * assigned to that table entry.
1418  */
1419 int
1420 pci_remap_msix_method(device_t dev, device_t child, int count,
1421     const u_int *vectors)
1422 {
1423 	struct pci_devinfo *dinfo = device_get_ivars(child);
1424 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1425 	struct resource_list_entry *rle;
1426 	int i, irq, j, *used;
1427 
1428 	/*
1429 	 * Have to have at least one message in the table but the
1430 	 * table can't be bigger than the actual MSI-X table in the
1431 	 * device.
1432 	 */
1433 	if (count == 0 || count > msix->msix_msgnum)
1434 		return (EINVAL);
1435 
1436 	/* Sanity check the vectors. */
1437 	for (i = 0; i < count; i++)
1438 		if (vectors[i] > msix->msix_alloc)
1439 			return (EINVAL);
1440 
1441 	/*
1442 	 * Make sure there aren't any holes in the vectors to be used.
1443 	 * It's a big pain to support it, and it doesn't really make
1444 	 * sense anyway.  Also, at least one vector must be used.
1445 	 */
1446 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1447 	    M_ZERO);
1448 	for (i = 0; i < count; i++)
1449 		if (vectors[i] != 0)
1450 			used[vectors[i] - 1] = 1;
1451 	for (i = 0; i < msix->msix_alloc - 1; i++)
1452 		if (used[i] == 0 && used[i + 1] == 1) {
1453 			free(used, M_DEVBUF);
1454 			return (EINVAL);
1455 		}
1456 	if (used[0] != 1) {
1457 		free(used, M_DEVBUF);
1458 		return (EINVAL);
1459 	}
1460 
1461 	/* Make sure none of the resources are allocated. */
1462 	for (i = 0; i < msix->msix_table_len; i++) {
1463 		if (msix->msix_table[i].mte_vector == 0)
1464 			continue;
1465 		if (msix->msix_table[i].mte_handlers > 0)
1466 			return (EBUSY);
1467 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1468 		KASSERT(rle != NULL, ("missing resource"));
1469 		if (rle->res != NULL)
1470 			return (EBUSY);
1471 	}
1472 
1473 	/* Free the existing resource list entries. */
1474 	for (i = 0; i < msix->msix_table_len; i++) {
1475 		if (msix->msix_table[i].mte_vector == 0)
1476 			continue;
1477 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1478 	}
1479 
1480 	/*
1481 	 * Build the new virtual table keeping track of which vectors are
1482 	 * used.
1483 	 */
1484 	free(msix->msix_table, M_DEVBUF);
1485 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1486 	    M_DEVBUF, M_WAITOK | M_ZERO);
1487 	for (i = 0; i < count; i++)
1488 		msix->msix_table[i].mte_vector = vectors[i];
1489 	msix->msix_table_len = count;
1490 
1491 	/* Free any unused IRQs and resize the vectors array if necessary. */
1492 	j = msix->msix_alloc - 1;
1493 	if (used[j] == 0) {
1494 		struct msix_vector *vec;
1495 
1496 		while (used[j] == 0) {
1497 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1498 			    msix->msix_vectors[j].mv_irq);
1499 			j--;
1500 		}
1501 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1502 		    M_WAITOK);
1503 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1504 		    (j + 1));
1505 		free(msix->msix_vectors, M_DEVBUF);
1506 		msix->msix_vectors = vec;
1507 		msix->msix_alloc = j + 1;
1508 	}
1509 	free(used, M_DEVBUF);
1510 
1511 	/* Map the IRQs onto the rids. */
1512 	for (i = 0; i < count; i++) {
1513 		if (vectors[i] == 0)
1514 			continue;
1515 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1516 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1517 		    irq, 1);
1518 	}
1519 
1520 	if (bootverbose) {
1521 		device_printf(child, "Remapped MSI-X IRQs as: ");
1522 		for (i = 0; i < count; i++) {
1523 			if (i != 0)
1524 				printf(", ");
1525 			if (vectors[i] == 0)
1526 				printf("---");
1527 			else
1528 				printf("%d",
1529 				    msix->msix_vectors[vectors[i]].mv_irq);
1530 		}
1531 		printf("\n");
1532 	}
1533 
1534 	return (0);
1535 }
1536 
1537 static int
1538 pci_release_msix(device_t dev, device_t child)
1539 {
1540 	struct pci_devinfo *dinfo = device_get_ivars(child);
1541 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1542 	struct resource_list_entry *rle;
1543 	int i;
1544 
1545 	/* Do we have any messages to release? */
1546 	if (msix->msix_alloc == 0)
1547 		return (ENODEV);
1548 
1549 	/* Make sure none of the resources are allocated. */
1550 	for (i = 0; i < msix->msix_table_len; i++) {
1551 		if (msix->msix_table[i].mte_vector == 0)
1552 			continue;
1553 		if (msix->msix_table[i].mte_handlers > 0)
1554 			return (EBUSY);
1555 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1556 		KASSERT(rle != NULL, ("missing resource"));
1557 		if (rle->res != NULL)
1558 			return (EBUSY);
1559 	}
1560 
1561 	/* Update control register to disable MSI-X. */
1562 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1563 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1564 	    msix->msix_ctrl, 2);
1565 
1566 	/* Free the resource list entries. */
1567 	for (i = 0; i < msix->msix_table_len; i++) {
1568 		if (msix->msix_table[i].mte_vector == 0)
1569 			continue;
1570 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1571 	}
1572 	free(msix->msix_table, M_DEVBUF);
1573 	msix->msix_table_len = 0;
1574 
1575 	/* Release the IRQs. */
1576 	for (i = 0; i < msix->msix_alloc; i++)
1577 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1578 		    msix->msix_vectors[i].mv_irq);
1579 	free(msix->msix_vectors, M_DEVBUF);
1580 	msix->msix_alloc = 0;
1581 	return (0);
1582 }
1583 
1584 /*
1585  * Return the max supported MSI-X messages this device supports.
1586  * Basically, assuming the MD code can alloc messages, this function
1587  * should return the maximum value that pci_alloc_msix() can return.
1588  * Thus, it is subject to the tunables, etc.
1589  */
1590 int
1591 pci_msix_count_method(device_t dev, device_t child)
1592 {
1593 	struct pci_devinfo *dinfo = device_get_ivars(child);
1594 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1595 
1596 	if (pci_do_msix && msix->msix_location != 0)
1597 		return (msix->msix_msgnum);
1598 	return (0);
1599 }
1600 
1601 /*
1602  * HyperTransport MSI mapping control
1603  */
1604 void
1605 pci_ht_map_msi(device_t dev, uint64_t addr)
1606 {
1607 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1608 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1609 
1610 	if (!ht->ht_msimap)
1611 		return;
1612 
1613 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1614 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1615 		/* Enable MSI -> HT mapping. */
1616 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1617 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1618 		    ht->ht_msictrl, 2);
1619 	}
1620 
1621 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1622 		/* Disable MSI -> HT mapping. */
1623 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1624 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1625 		    ht->ht_msictrl, 2);
1626 	}
1627 }
1628 
1629 int
1630 pci_get_max_read_req(device_t dev)
1631 {
1632 	int cap;
1633 	uint16_t val;
1634 
1635 	if (pci_find_extcap(dev, PCIY_EXPRESS, &cap) != 0)
1636 		return (0);
1637 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1638 	val &= PCIM_EXP_CTL_MAX_READ_REQUEST;
1639 	val >>= 12;
1640 	return (1 << (val + 7));
1641 }
1642 
1643 int
1644 pci_set_max_read_req(device_t dev, int size)
1645 {
1646 	int cap;
1647 	uint16_t val;
1648 
1649 	if (pci_find_extcap(dev, PCIY_EXPRESS, &cap) != 0)
1650 		return (0);
1651 	if (size < 128)
1652 		size = 128;
1653 	if (size > 4096)
1654 		size = 4096;
1655 	size = (1 << (fls(size) - 1));
1656 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1657 	val &= ~PCIM_EXP_CTL_MAX_READ_REQUEST;
1658 	val |= (fls(size) - 8) << 12;
1659 	pci_write_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, val, 2);
1660 	return (size);
1661 }
1662 
1663 /*
1664  * Support for MSI message signalled interrupts.
1665  */
1666 void
1667 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1668 {
1669 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1670 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1671 
1672 	/* Write data and address values. */
1673 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1674 	    address & 0xffffffff, 4);
1675 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1676 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1677 		    address >> 32, 4);
1678 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1679 		    data, 2);
1680 	} else
1681 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1682 		    2);
1683 
1684 	/* Enable MSI in the control register. */
1685 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1686 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1687 	    2);
1688 
1689 	/* Enable MSI -> HT mapping. */
1690 	pci_ht_map_msi(dev, address);
1691 }
1692 
1693 void
1694 pci_disable_msi(device_t dev)
1695 {
1696 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1697 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1698 
1699 	/* Disable MSI -> HT mapping. */
1700 	pci_ht_map_msi(dev, 0);
1701 
1702 	/* Disable MSI in the control register. */
1703 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1704 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1705 	    2);
1706 }
1707 
1708 /*
1709  * Restore MSI registers during resume.  If MSI is enabled then
1710  * restore the data and address registers in addition to the control
1711  * register.
1712  */
1713 static void
1714 pci_resume_msi(device_t dev)
1715 {
1716 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1717 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1718 	uint64_t address;
1719 	uint16_t data;
1720 
1721 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1722 		address = msi->msi_addr;
1723 		data = msi->msi_data;
1724 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1725 		    address & 0xffffffff, 4);
1726 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1727 			pci_write_config(dev, msi->msi_location +
1728 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1729 			pci_write_config(dev, msi->msi_location +
1730 			    PCIR_MSI_DATA_64BIT, data, 2);
1731 		} else
1732 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1733 			    data, 2);
1734 	}
1735 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1736 	    2);
1737 }
1738 
1739 int
1740 pci_remap_msi_irq(device_t dev, u_int irq)
1741 {
1742 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1743 	pcicfgregs *cfg = &dinfo->cfg;
1744 	struct resource_list_entry *rle;
1745 	struct msix_table_entry *mte;
1746 	struct msix_vector *mv;
1747 	device_t bus;
1748 	uint64_t addr;
1749 	uint32_t data;
1750 	int error, i, j;
1751 
1752 	bus = device_get_parent(dev);
1753 
1754 	/*
1755 	 * Handle MSI first.  We try to find this IRQ among our list
1756 	 * of MSI IRQs.  If we find it, we request updated address and
1757 	 * data registers and apply the results.
1758 	 */
1759 	if (cfg->msi.msi_alloc > 0) {
1760 
1761 		/* If we don't have any active handlers, nothing to do. */
1762 		if (cfg->msi.msi_handlers == 0)
1763 			return (0);
1764 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1765 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1766 			    i + 1);
1767 			if (rle->start == irq) {
1768 				error = PCIB_MAP_MSI(device_get_parent(bus),
1769 				    dev, irq, &addr, &data);
1770 				if (error)
1771 					return (error);
1772 				pci_disable_msi(dev);
1773 				dinfo->cfg.msi.msi_addr = addr;
1774 				dinfo->cfg.msi.msi_data = data;
1775 				pci_enable_msi(dev, addr, data);
1776 				return (0);
1777 			}
1778 		}
1779 		return (ENOENT);
1780 	}
1781 
1782 	/*
1783 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1784 	 * we request the updated mapping info.  If that works, we go
1785 	 * through all the slots that use this IRQ and update them.
1786 	 */
1787 	if (cfg->msix.msix_alloc > 0) {
1788 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1789 			mv = &cfg->msix.msix_vectors[i];
1790 			if (mv->mv_irq == irq) {
1791 				error = PCIB_MAP_MSI(device_get_parent(bus),
1792 				    dev, irq, &addr, &data);
1793 				if (error)
1794 					return (error);
1795 				mv->mv_address = addr;
1796 				mv->mv_data = data;
1797 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1798 					mte = &cfg->msix.msix_table[j];
1799 					if (mte->mte_vector != i + 1)
1800 						continue;
1801 					if (mte->mte_handlers == 0)
1802 						continue;
1803 					pci_mask_msix(dev, j);
1804 					pci_enable_msix(dev, j, addr, data);
1805 					pci_unmask_msix(dev, j);
1806 				}
1807 			}
1808 		}
1809 		return (ENOENT);
1810 	}
1811 
1812 	return (ENOENT);
1813 }
1814 
1815 /*
1816  * Returns true if the specified device is blacklisted because MSI
1817  * doesn't work.
1818  */
1819 int
1820 pci_msi_device_blacklisted(device_t dev)
1821 {
1822 	struct pci_quirk *q;
1823 
1824 	if (!pci_honor_msi_blacklist)
1825 		return (0);
1826 
1827 	for (q = &pci_quirks[0]; q->devid; q++) {
1828 		if (q->devid == pci_get_devid(dev) &&
1829 		    q->type == PCI_QUIRK_DISABLE_MSI)
1830 			return (1);
1831 	}
1832 	return (0);
1833 }
1834 
1835 /*
1836  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1837  * we just check for blacklisted chipsets as represented by the
1838  * host-PCI bridge at device 0:0:0.  In the future, it may become
1839  * necessary to check other system attributes, such as the kenv values
1840  * that give the motherboard manufacturer and model number.
1841  */
1842 static int
1843 pci_msi_blacklisted(void)
1844 {
1845 	device_t dev;
1846 
1847 	if (!pci_honor_msi_blacklist)
1848 		return (0);
1849 
1850 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1851 	if (!(pcie_chipset || pcix_chipset))
1852 		return (1);
1853 
1854 	dev = pci_find_bsf(0, 0, 0);
1855 	if (dev != NULL)
1856 		return (pci_msi_device_blacklisted(dev));
1857 	return (0);
1858 }
1859 
1860 /*
1861  * Attempt to allocate *count MSI messages.  The actual number allocated is
1862  * returned in *count.  After this function returns, each message will be
1863  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1864  */
1865 int
1866 pci_alloc_msi_method(device_t dev, device_t child, int *count)
1867 {
1868 	struct pci_devinfo *dinfo = device_get_ivars(child);
1869 	pcicfgregs *cfg = &dinfo->cfg;
1870 	struct resource_list_entry *rle;
1871 	int actual, error, i, irqs[32];
1872 	uint16_t ctrl;
1873 
1874 	/* Don't let count == 0 get us into trouble. */
1875 	if (*count == 0)
1876 		return (EINVAL);
1877 
1878 	/* If rid 0 is allocated, then fail. */
1879 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1880 	if (rle != NULL && rle->res != NULL)
1881 		return (ENXIO);
1882 
1883 	/* Already have allocated messages? */
1884 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1885 		return (ENXIO);
1886 
1887 	/* If MSI is blacklisted for this system, fail. */
1888 	if (pci_msi_blacklisted())
1889 		return (ENXIO);
1890 
1891 	/* MSI capability present? */
1892 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1893 		return (ENODEV);
1894 
1895 	if (bootverbose)
1896 		device_printf(child,
1897 		    "attempting to allocate %d MSI vectors (%d supported)\n",
1898 		    *count, cfg->msi.msi_msgnum);
1899 
1900 	/* Don't ask for more than the device supports. */
1901 	actual = min(*count, cfg->msi.msi_msgnum);
1902 
1903 	/* Don't ask for more than 32 messages. */
1904 	actual = min(actual, 32);
1905 
1906 	/* MSI requires power of 2 number of messages. */
1907 	if (!powerof2(actual))
1908 		return (EINVAL);
1909 
1910 	for (;;) {
1911 		/* Try to allocate N messages. */
1912 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1913 		    cfg->msi.msi_msgnum, irqs);
1914 		if (error == 0)
1915 			break;
1916 		if (actual == 1)
1917 			return (error);
1918 
1919 		/* Try N / 2. */
1920 		actual >>= 1;
1921 	}
1922 
1923 	/*
1924 	 * We now have N actual messages mapped onto SYS_RES_IRQ
1925 	 * resources in the irqs[] array, so add new resources
1926 	 * starting at rid 1.
1927 	 */
1928 	for (i = 0; i < actual; i++)
1929 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1930 		    irqs[i], irqs[i], 1);
1931 
1932 	if (bootverbose) {
1933 		if (actual == 1)
1934 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1935 		else {
1936 			int run;
1937 
1938 			/*
1939 			 * Be fancy and try to print contiguous runs
1940 			 * of IRQ values as ranges.  'run' is true if
1941 			 * we are in a range.
1942 			 */
1943 			device_printf(child, "using IRQs %d", irqs[0]);
1944 			run = 0;
1945 			for (i = 1; i < actual; i++) {
1946 
1947 				/* Still in a run? */
1948 				if (irqs[i] == irqs[i - 1] + 1) {
1949 					run = 1;
1950 					continue;
1951 				}
1952 
1953 				/* Finish previous range. */
1954 				if (run) {
1955 					printf("-%d", irqs[i - 1]);
1956 					run = 0;
1957 				}
1958 
1959 				/* Start new range. */
1960 				printf(",%d", irqs[i]);
1961 			}
1962 
1963 			/* Unfinished range? */
1964 			if (run)
1965 				printf("-%d", irqs[actual - 1]);
1966 			printf(" for MSI\n");
1967 		}
1968 	}
1969 
1970 	/* Update control register with actual count. */
1971 	ctrl = cfg->msi.msi_ctrl;
1972 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1973 	ctrl |= (ffs(actual) - 1) << 4;
1974 	cfg->msi.msi_ctrl = ctrl;
1975 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1976 
1977 	/* Update counts of alloc'd messages. */
1978 	cfg->msi.msi_alloc = actual;
1979 	cfg->msi.msi_handlers = 0;
1980 	*count = actual;
1981 	return (0);
1982 }
1983 
1984 /* Release the MSI messages associated with this device. */
1985 int
1986 pci_release_msi_method(device_t dev, device_t child)
1987 {
1988 	struct pci_devinfo *dinfo = device_get_ivars(child);
1989 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1990 	struct resource_list_entry *rle;
1991 	int error, i, irqs[32];
1992 
1993 	/* Try MSI-X first. */
1994 	error = pci_release_msix(dev, child);
1995 	if (error != ENODEV)
1996 		return (error);
1997 
1998 	/* Do we have any messages to release? */
1999 	if (msi->msi_alloc == 0)
2000 		return (ENODEV);
2001 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2002 
2003 	/* Make sure none of the resources are allocated. */
2004 	if (msi->msi_handlers > 0)
2005 		return (EBUSY);
2006 	for (i = 0; i < msi->msi_alloc; i++) {
2007 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2008 		KASSERT(rle != NULL, ("missing MSI resource"));
2009 		if (rle->res != NULL)
2010 			return (EBUSY);
2011 		irqs[i] = rle->start;
2012 	}
2013 
2014 	/* Update control register with 0 count. */
2015 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2016 	    ("%s: MSI still enabled", __func__));
2017 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2018 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2019 	    msi->msi_ctrl, 2);
2020 
2021 	/* Release the messages. */
2022 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2023 	for (i = 0; i < msi->msi_alloc; i++)
2024 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2025 
2026 	/* Update alloc count. */
2027 	msi->msi_alloc = 0;
2028 	msi->msi_addr = 0;
2029 	msi->msi_data = 0;
2030 	return (0);
2031 }
2032 
2033 /*
2034  * Return the max supported MSI messages this device supports.
2035  * Basically, assuming the MD code can alloc messages, this function
2036  * should return the maximum value that pci_alloc_msi() can return.
2037  * Thus, it is subject to the tunables, etc.
2038  */
2039 int
2040 pci_msi_count_method(device_t dev, device_t child)
2041 {
2042 	struct pci_devinfo *dinfo = device_get_ivars(child);
2043 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2044 
2045 	if (pci_do_msi && msi->msi_location != 0)
2046 		return (msi->msi_msgnum);
2047 	return (0);
2048 }
2049 
2050 /* free pcicfgregs structure and all depending data structures */
2051 
2052 int
2053 pci_freecfg(struct pci_devinfo *dinfo)
2054 {
2055 	struct devlist *devlist_head;
2056 	int i;
2057 
2058 	devlist_head = &pci_devq;
2059 
2060 	if (dinfo->cfg.vpd.vpd_reg) {
2061 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2062 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2063 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2064 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2065 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2066 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2067 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2068 	}
2069 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2070 	free(dinfo, M_DEVBUF);
2071 
2072 	/* increment the generation count */
2073 	pci_generation++;
2074 
2075 	/* we're losing one device */
2076 	pci_numdevs--;
2077 	return (0);
2078 }
2079 
2080 /*
2081  * PCI power manangement
2082  */
2083 int
2084 pci_set_powerstate_method(device_t dev, device_t child, int state)
2085 {
2086 	struct pci_devinfo *dinfo = device_get_ivars(child);
2087 	pcicfgregs *cfg = &dinfo->cfg;
2088 	uint16_t status;
2089 	int result, oldstate, highest, delay;
2090 
2091 	if (cfg->pp.pp_cap == 0)
2092 		return (EOPNOTSUPP);
2093 
2094 	/*
2095 	 * Optimize a no state change request away.  While it would be OK to
2096 	 * write to the hardware in theory, some devices have shown odd
2097 	 * behavior when going from D3 -> D3.
2098 	 */
2099 	oldstate = pci_get_powerstate(child);
2100 	if (oldstate == state)
2101 		return (0);
2102 
2103 	/*
2104 	 * The PCI power management specification states that after a state
2105 	 * transition between PCI power states, system software must
2106 	 * guarantee a minimal delay before the function accesses the device.
2107 	 * Compute the worst case delay that we need to guarantee before we
2108 	 * access the device.  Many devices will be responsive much more
2109 	 * quickly than this delay, but there are some that don't respond
2110 	 * instantly to state changes.  Transitions to/from D3 state require
2111 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2112 	 * is done below with DELAY rather than a sleeper function because
2113 	 * this function can be called from contexts where we cannot sleep.
2114 	 */
2115 	highest = (oldstate > state) ? oldstate : state;
2116 	if (highest == PCI_POWERSTATE_D3)
2117 	    delay = 10000;
2118 	else if (highest == PCI_POWERSTATE_D2)
2119 	    delay = 200;
2120 	else
2121 	    delay = 0;
2122 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2123 	    & ~PCIM_PSTAT_DMASK;
2124 	result = 0;
2125 	switch (state) {
2126 	case PCI_POWERSTATE_D0:
2127 		status |= PCIM_PSTAT_D0;
2128 		break;
2129 	case PCI_POWERSTATE_D1:
2130 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2131 			return (EOPNOTSUPP);
2132 		status |= PCIM_PSTAT_D1;
2133 		break;
2134 	case PCI_POWERSTATE_D2:
2135 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2136 			return (EOPNOTSUPP);
2137 		status |= PCIM_PSTAT_D2;
2138 		break;
2139 	case PCI_POWERSTATE_D3:
2140 		status |= PCIM_PSTAT_D3;
2141 		break;
2142 	default:
2143 		return (EINVAL);
2144 	}
2145 
2146 	if (bootverbose)
2147 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2148 		    state);
2149 
2150 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2151 	if (delay)
2152 		DELAY(delay);
2153 	return (0);
2154 }
2155 
2156 int
2157 pci_get_powerstate_method(device_t dev, device_t child)
2158 {
2159 	struct pci_devinfo *dinfo = device_get_ivars(child);
2160 	pcicfgregs *cfg = &dinfo->cfg;
2161 	uint16_t status;
2162 	int result;
2163 
2164 	if (cfg->pp.pp_cap != 0) {
2165 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2166 		switch (status & PCIM_PSTAT_DMASK) {
2167 		case PCIM_PSTAT_D0:
2168 			result = PCI_POWERSTATE_D0;
2169 			break;
2170 		case PCIM_PSTAT_D1:
2171 			result = PCI_POWERSTATE_D1;
2172 			break;
2173 		case PCIM_PSTAT_D2:
2174 			result = PCI_POWERSTATE_D2;
2175 			break;
2176 		case PCIM_PSTAT_D3:
2177 			result = PCI_POWERSTATE_D3;
2178 			break;
2179 		default:
2180 			result = PCI_POWERSTATE_UNKNOWN;
2181 			break;
2182 		}
2183 	} else {
2184 		/* No support, device is always at D0 */
2185 		result = PCI_POWERSTATE_D0;
2186 	}
2187 	return (result);
2188 }
2189 
2190 /*
2191  * Some convenience functions for PCI device drivers.
2192  */
2193 
2194 static __inline void
2195 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2196 {
2197 	uint16_t	command;
2198 
2199 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2200 	command |= bit;
2201 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2202 }
2203 
2204 static __inline void
2205 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2206 {
2207 	uint16_t	command;
2208 
2209 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2210 	command &= ~bit;
2211 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2212 }
2213 
2214 int
2215 pci_enable_busmaster_method(device_t dev, device_t child)
2216 {
2217 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2218 	return (0);
2219 }
2220 
2221 int
2222 pci_disable_busmaster_method(device_t dev, device_t child)
2223 {
2224 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2225 	return (0);
2226 }
2227 
2228 int
2229 pci_enable_io_method(device_t dev, device_t child, int space)
2230 {
2231 	uint16_t bit;
2232 
2233 	switch(space) {
2234 	case SYS_RES_IOPORT:
2235 		bit = PCIM_CMD_PORTEN;
2236 		break;
2237 	case SYS_RES_MEMORY:
2238 		bit = PCIM_CMD_MEMEN;
2239 		break;
2240 	default:
2241 		return (EINVAL);
2242 	}
2243 	pci_set_command_bit(dev, child, bit);
2244 	return (0);
2245 }
2246 
2247 int
2248 pci_disable_io_method(device_t dev, device_t child, int space)
2249 {
2250 	uint16_t bit;
2251 
2252 	switch(space) {
2253 	case SYS_RES_IOPORT:
2254 		bit = PCIM_CMD_PORTEN;
2255 		break;
2256 	case SYS_RES_MEMORY:
2257 		bit = PCIM_CMD_MEMEN;
2258 		break;
2259 	default:
2260 		return (EINVAL);
2261 	}
2262 	pci_clear_command_bit(dev, child, bit);
2263 	return (0);
2264 }
2265 
2266 /*
2267  * New style pci driver.  Parent device is either a pci-host-bridge or a
2268  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2269  */
2270 
2271 void
2272 pci_print_verbose(struct pci_devinfo *dinfo)
2273 {
2274 
2275 	if (bootverbose) {
2276 		pcicfgregs *cfg = &dinfo->cfg;
2277 
2278 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2279 		    cfg->vendor, cfg->device, cfg->revid);
2280 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2281 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2282 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2283 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2284 		    cfg->mfdev);
2285 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2286 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2287 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2288 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2289 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2290 		if (cfg->intpin > 0)
2291 			printf("\tintpin=%c, irq=%d\n",
2292 			    cfg->intpin +'a' -1, cfg->intline);
2293 		if (cfg->pp.pp_cap) {
2294 			uint16_t status;
2295 
2296 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2297 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2298 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2299 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2300 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2301 			    status & PCIM_PSTAT_DMASK);
2302 		}
2303 		if (cfg->msi.msi_location) {
2304 			int ctrl;
2305 
2306 			ctrl = cfg->msi.msi_ctrl;
2307 			printf("\tMSI supports %d message%s%s%s\n",
2308 			    cfg->msi.msi_msgnum,
2309 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2310 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2311 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2312 		}
2313 		if (cfg->msix.msix_location) {
2314 			printf("\tMSI-X supports %d message%s ",
2315 			    cfg->msix.msix_msgnum,
2316 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2317 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2318 				printf("in map 0x%x\n",
2319 				    cfg->msix.msix_table_bar);
2320 			else
2321 				printf("in maps 0x%x and 0x%x\n",
2322 				    cfg->msix.msix_table_bar,
2323 				    cfg->msix.msix_pba_bar);
2324 		}
2325 	}
2326 }
2327 
2328 static int
2329 pci_porten(device_t dev)
2330 {
2331 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2332 }
2333 
2334 static int
2335 pci_memen(device_t dev)
2336 {
2337 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2338 }
2339 
2340 static void
2341 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2342 {
2343 	pci_addr_t map, testval;
2344 	int ln2range;
2345 	uint16_t cmd;
2346 
2347 	/*
2348 	 * The device ROM BAR is special.  It is always a 32-bit
2349 	 * memory BAR.  Bit 0 is special and should not be set when
2350 	 * sizing the BAR.
2351 	 */
2352 	if (reg == PCIR_BIOS) {
2353 		map = pci_read_config(dev, reg, 4);
2354 		pci_write_config(dev, reg, 0xfffffffe, 4);
2355 		testval = pci_read_config(dev, reg, 4);
2356 		pci_write_config(dev, reg, map, 4);
2357 		*mapp = map;
2358 		*testvalp = testval;
2359 		return;
2360 	}
2361 
2362 	map = pci_read_config(dev, reg, 4);
2363 	ln2range = pci_maprange(map);
2364 	if (ln2range == 64)
2365 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2366 
2367 	/*
2368 	 * Disable decoding via the command register before
2369 	 * determining the BAR's length since we will be placing it in
2370 	 * a weird state.
2371 	 */
2372 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2373 	pci_write_config(dev, PCIR_COMMAND,
2374 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2375 
2376 	/*
2377 	 * Determine the BAR's length by writing all 1's.  The bottom
2378 	 * log_2(size) bits of the BAR will stick as 0 when we read
2379 	 * the value back.
2380 	 */
2381 	pci_write_config(dev, reg, 0xffffffff, 4);
2382 	testval = pci_read_config(dev, reg, 4);
2383 	if (ln2range == 64) {
2384 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2385 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2386 	}
2387 
2388 	/*
2389 	 * Restore the original value of the BAR.  We may have reprogrammed
2390 	 * the BAR of the low-level console device and when booting verbose,
2391 	 * we need the console device addressable.
2392 	 */
2393 	pci_write_config(dev, reg, map, 4);
2394 	if (ln2range == 64)
2395 		pci_write_config(dev, reg + 4, map >> 32, 4);
2396 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2397 
2398 	*mapp = map;
2399 	*testvalp = testval;
2400 }
2401 
2402 static void
2403 pci_write_bar(device_t dev, int reg, pci_addr_t base)
2404 {
2405 	pci_addr_t map;
2406 	int ln2range;
2407 
2408 	map = pci_read_config(dev, reg, 4);
2409 
2410 	/* The device ROM BAR is always 32-bits. */
2411 	if (reg == PCIR_BIOS)
2412 		return;
2413 	ln2range = pci_maprange(map);
2414 	pci_write_config(dev, reg, base, 4);
2415 	if (ln2range == 64)
2416 		pci_write_config(dev, reg + 4, base >> 32, 4);
2417 }
2418 
2419 /*
2420  * Add a resource based on a pci map register. Return 1 if the map
2421  * register is a 32bit map register or 2 if it is a 64bit register.
2422  */
2423 static int
2424 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2425     int force, int prefetch)
2426 {
2427 	pci_addr_t base, map, testval;
2428 	pci_addr_t start, end, count;
2429 	int barlen, basezero, maprange, mapsize, type;
2430 	uint16_t cmd;
2431 	struct resource *res;
2432 
2433 	pci_read_bar(dev, reg, &map, &testval);
2434 	if (PCI_BAR_MEM(map)) {
2435 		type = SYS_RES_MEMORY;
2436 		if (map & PCIM_BAR_MEM_PREFETCH)
2437 			prefetch = 1;
2438 	} else
2439 		type = SYS_RES_IOPORT;
2440 	mapsize = pci_mapsize(testval);
2441 	base = pci_mapbase(map);
2442 #ifdef __PCI_BAR_ZERO_VALID
2443 	basezero = 0;
2444 #else
2445 	basezero = base == 0;
2446 #endif
2447 	maprange = pci_maprange(map);
2448 	barlen = maprange == 64 ? 2 : 1;
2449 
2450 	/*
2451 	 * For I/O registers, if bottom bit is set, and the next bit up
2452 	 * isn't clear, we know we have a BAR that doesn't conform to the
2453 	 * spec, so ignore it.  Also, sanity check the size of the data
2454 	 * areas to the type of memory involved.  Memory must be at least
2455 	 * 16 bytes in size, while I/O ranges must be at least 4.
2456 	 */
2457 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2458 		return (barlen);
2459 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2460 	    (type == SYS_RES_IOPORT && mapsize < 2))
2461 		return (barlen);
2462 
2463 	if (bootverbose) {
2464 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2465 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2466 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2467 			printf(", port disabled\n");
2468 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2469 			printf(", memory disabled\n");
2470 		else
2471 			printf(", enabled\n");
2472 	}
2473 
2474 	/*
2475 	 * If base is 0, then we have problems if this architecture does
2476 	 * not allow that.  It is best to ignore such entries for the
2477 	 * moment.  These will be allocated later if the driver specifically
2478 	 * requests them.  However, some removable busses look better when
2479 	 * all resources are allocated, so allow '0' to be overriden.
2480 	 *
2481 	 * Similarly treat maps whose values is the same as the test value
2482 	 * read back.  These maps have had all f's written to them by the
2483 	 * BIOS in an attempt to disable the resources.
2484 	 */
2485 	if (!force && (basezero || map == testval))
2486 		return (barlen);
2487 	if ((u_long)base != base) {
2488 		device_printf(bus,
2489 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2490 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2491 		    pci_get_function(dev), reg);
2492 		return (barlen);
2493 	}
2494 
2495 	/*
2496 	 * This code theoretically does the right thing, but has
2497 	 * undesirable side effects in some cases where peripherals
2498 	 * respond oddly to having these bits enabled.  Let the user
2499 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2500 	 * default).
2501 	 */
2502 	if (pci_enable_io_modes) {
2503 		/* Turn on resources that have been left off by a lazy BIOS */
2504 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2505 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2506 			cmd |= PCIM_CMD_PORTEN;
2507 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2508 		}
2509 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2510 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2511 			cmd |= PCIM_CMD_MEMEN;
2512 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2513 		}
2514 	} else {
2515 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2516 			return (barlen);
2517 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2518 			return (barlen);
2519 	}
2520 
2521 	count = 1 << mapsize;
2522 	if (basezero || base == pci_mapbase(testval)) {
2523 		start = 0;	/* Let the parent decide. */
2524 		end = ~0ULL;
2525 	} else {
2526 		start = base;
2527 		end = base + (1 << mapsize) - 1;
2528 	}
2529 	resource_list_add(rl, type, reg, start, end, count);
2530 
2531 	/*
2532 	 * Try to allocate the resource for this BAR from our parent
2533 	 * so that this resource range is already reserved.  The
2534 	 * driver for this device will later inherit this resource in
2535 	 * pci_alloc_resource().
2536 	 */
2537 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2538 	    prefetch ? RF_PREFETCHABLE : 0);
2539 	if (res == NULL) {
2540 		/*
2541 		 * If the allocation fails, clear the BAR and delete
2542 		 * the resource list entry to force
2543 		 * pci_alloc_resource() to allocate resources from the
2544 		 * parent.
2545 		 */
2546 		resource_list_delete(rl, type, reg);
2547 		start = 0;
2548 	} else
2549 		start = rman_get_start(res);
2550 	pci_write_bar(dev, reg, start);
2551 	return (barlen);
2552 }
2553 
2554 /*
2555  * For ATA devices we need to decide early what addressing mode to use.
2556  * Legacy demands that the primary and secondary ATA ports sits on the
2557  * same addresses that old ISA hardware did. This dictates that we use
2558  * those addresses and ignore the BAR's if we cannot set PCI native
2559  * addressing mode.
2560  */
2561 static void
2562 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2563     uint32_t prefetchmask)
2564 {
2565 	struct resource *r;
2566 	int rid, type, progif;
2567 #if 0
2568 	/* if this device supports PCI native addressing use it */
2569 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2570 	if ((progif & 0x8a) == 0x8a) {
2571 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2572 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2573 			printf("Trying ATA native PCI addressing mode\n");
2574 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2575 		}
2576 	}
2577 #endif
2578 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2579 	type = SYS_RES_IOPORT;
2580 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2581 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2582 		    prefetchmask & (1 << 0));
2583 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2584 		    prefetchmask & (1 << 1));
2585 	} else {
2586 		rid = PCIR_BAR(0);
2587 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2588 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2589 		    0x1f7, 8, 0);
2590 		rid = PCIR_BAR(1);
2591 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2592 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2593 		    0x3f6, 1, 0);
2594 	}
2595 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2596 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2597 		    prefetchmask & (1 << 2));
2598 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2599 		    prefetchmask & (1 << 3));
2600 	} else {
2601 		rid = PCIR_BAR(2);
2602 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2603 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2604 		    0x177, 8, 0);
2605 		rid = PCIR_BAR(3);
2606 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2607 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2608 		    0x376, 1, 0);
2609 	}
2610 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2611 	    prefetchmask & (1 << 4));
2612 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2613 	    prefetchmask & (1 << 5));
2614 }
2615 
2616 static void
2617 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2618 {
2619 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2620 	pcicfgregs *cfg = &dinfo->cfg;
2621 	char tunable_name[64];
2622 	int irq;
2623 
2624 	/* Has to have an intpin to have an interrupt. */
2625 	if (cfg->intpin == 0)
2626 		return;
2627 
2628 	/* Let the user override the IRQ with a tunable. */
2629 	irq = PCI_INVALID_IRQ;
2630 	snprintf(tunable_name, sizeof(tunable_name),
2631 	    "hw.pci%d.%d.%d.INT%c.irq",
2632 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2633 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2634 		irq = PCI_INVALID_IRQ;
2635 
2636 	/*
2637 	 * If we didn't get an IRQ via the tunable, then we either use the
2638 	 * IRQ value in the intline register or we ask the bus to route an
2639 	 * interrupt for us.  If force_route is true, then we only use the
2640 	 * value in the intline register if the bus was unable to assign an
2641 	 * IRQ.
2642 	 */
2643 	if (!PCI_INTERRUPT_VALID(irq)) {
2644 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2645 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2646 		if (!PCI_INTERRUPT_VALID(irq))
2647 			irq = cfg->intline;
2648 	}
2649 
2650 	/* If after all that we don't have an IRQ, just bail. */
2651 	if (!PCI_INTERRUPT_VALID(irq))
2652 		return;
2653 
2654 	/* Update the config register if it changed. */
2655 	if (irq != cfg->intline) {
2656 		cfg->intline = irq;
2657 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2658 	}
2659 
2660 	/* Add this IRQ as rid 0 interrupt resource. */
2661 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2662 }
2663 
2664 /* Perform early OHCI takeover from SMM. */
2665 static void
2666 ohci_early_takeover(device_t self)
2667 {
2668 	struct resource *res;
2669 	uint32_t ctl;
2670 	int rid;
2671 	int i;
2672 
2673 	rid = PCIR_BAR(0);
2674 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2675 	if (res == NULL)
2676 		return;
2677 
2678 	ctl = bus_read_4(res, OHCI_CONTROL);
2679 	if (ctl & OHCI_IR) {
2680 		if (bootverbose)
2681 			printf("ohci early: "
2682 			    "SMM active, request owner change\n");
2683 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2684 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2685 			DELAY(1000);
2686 			ctl = bus_read_4(res, OHCI_CONTROL);
2687 		}
2688 		if (ctl & OHCI_IR) {
2689 			if (bootverbose)
2690 				printf("ohci early: "
2691 				    "SMM does not respond, resetting\n");
2692 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2693 		}
2694 		/* Disable interrupts */
2695 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2696 	}
2697 
2698 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2699 }
2700 
2701 /* Perform early UHCI takeover from SMM. */
2702 static void
2703 uhci_early_takeover(device_t self)
2704 {
2705 	struct resource *res;
2706 	int rid;
2707 
2708 	/*
2709 	 * Set the PIRQD enable bit and switch off all the others. We don't
2710 	 * want legacy support to interfere with us XXX Does this also mean
2711 	 * that the BIOS won't touch the keyboard anymore if it is connected
2712 	 * to the ports of the root hub?
2713 	 */
2714 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2715 
2716 	/* Disable interrupts */
2717 	rid = PCI_UHCI_BASE_REG;
2718 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2719 	if (res != NULL) {
2720 		bus_write_2(res, UHCI_INTR, 0);
2721 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2722 	}
2723 }
2724 
2725 /* Perform early EHCI takeover from SMM. */
2726 static void
2727 ehci_early_takeover(device_t self)
2728 {
2729 	struct resource *res;
2730 	uint32_t cparams;
2731 	uint32_t eec;
2732 	uint8_t eecp;
2733 	uint8_t bios_sem;
2734 	uint8_t offs;
2735 	int rid;
2736 	int i;
2737 
2738 	rid = PCIR_BAR(0);
2739 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2740 	if (res == NULL)
2741 		return;
2742 
2743 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2744 
2745 	/* Synchronise with the BIOS if it owns the controller. */
2746 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2747 	    eecp = EHCI_EECP_NEXT(eec)) {
2748 		eec = pci_read_config(self, eecp, 4);
2749 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2750 			continue;
2751 		}
2752 		bios_sem = pci_read_config(self, eecp +
2753 		    EHCI_LEGSUP_BIOS_SEM, 1);
2754 		if (bios_sem == 0) {
2755 			continue;
2756 		}
2757 		if (bootverbose)
2758 			printf("ehci early: "
2759 			    "SMM active, request owner change\n");
2760 
2761 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2762 
2763 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2764 			DELAY(1000);
2765 			bios_sem = pci_read_config(self, eecp +
2766 			    EHCI_LEGSUP_BIOS_SEM, 1);
2767 		}
2768 
2769 		if (bios_sem != 0) {
2770 			if (bootverbose)
2771 				printf("ehci early: "
2772 				    "SMM does not respond\n");
2773 		}
2774 		/* Disable interrupts */
2775 		offs = bus_read_1(res, EHCI_CAPLENGTH);
2776 		bus_write_4(res, offs + EHCI_USBINTR, 0);
2777 	}
2778 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2779 }
2780 
2781 void
2782 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2783 {
2784 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2785 	pcicfgregs *cfg = &dinfo->cfg;
2786 	struct resource_list *rl = &dinfo->resources;
2787 	struct pci_quirk *q;
2788 	int i;
2789 
2790 	/* ATA devices needs special map treatment */
2791 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2792 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2793 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2794 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2795 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2796 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
2797 	else
2798 		for (i = 0; i < cfg->nummaps;)
2799 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
2800 			    prefetchmask & (1 << i));
2801 
2802 	/*
2803 	 * Add additional, quirked resources.
2804 	 */
2805 	for (q = &pci_quirks[0]; q->devid; q++) {
2806 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2807 		    && q->type == PCI_QUIRK_MAP_REG)
2808 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
2809 	}
2810 
2811 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2812 #ifdef __PCI_REROUTE_INTERRUPT
2813 		/*
2814 		 * Try to re-route interrupts. Sometimes the BIOS or
2815 		 * firmware may leave bogus values in these registers.
2816 		 * If the re-route fails, then just stick with what we
2817 		 * have.
2818 		 */
2819 		pci_assign_interrupt(bus, dev, 1);
2820 #else
2821 		pci_assign_interrupt(bus, dev, 0);
2822 #endif
2823 	}
2824 
2825 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
2826 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
2827 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
2828 			ehci_early_takeover(dev);
2829 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
2830 			ohci_early_takeover(dev);
2831 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
2832 			uhci_early_takeover(dev);
2833 	}
2834 }
2835 
2836 void
2837 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2838 {
2839 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2840 	device_t pcib = device_get_parent(dev);
2841 	struct pci_devinfo *dinfo;
2842 	int maxslots;
2843 	int s, f, pcifunchigh;
2844 	uint8_t hdrtype;
2845 
2846 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2847 	    ("dinfo_size too small"));
2848 	maxslots = PCIB_MAXSLOTS(pcib);
2849 	for (s = 0; s <= maxslots; s++) {
2850 		pcifunchigh = 0;
2851 		f = 0;
2852 		DELAY(1);
2853 		hdrtype = REG(PCIR_HDRTYPE, 1);
2854 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2855 			continue;
2856 		if (hdrtype & PCIM_MFDEV)
2857 			pcifunchigh = PCI_FUNCMAX;
2858 		for (f = 0; f <= pcifunchigh; f++) {
2859 			dinfo = pci_read_device(pcib, domain, busno, s, f,
2860 			    dinfo_size);
2861 			if (dinfo != NULL) {
2862 				pci_add_child(dev, dinfo);
2863 			}
2864 		}
2865 	}
2866 #undef REG
2867 }
2868 
2869 void
2870 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2871 {
2872 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2873 	device_set_ivars(dinfo->cfg.dev, dinfo);
2874 	resource_list_init(&dinfo->resources);
2875 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2876 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2877 	pci_print_verbose(dinfo);
2878 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2879 }
2880 
2881 static int
2882 pci_probe(device_t dev)
2883 {
2884 
2885 	device_set_desc(dev, "PCI bus");
2886 
2887 	/* Allow other subclasses to override this driver. */
2888 	return (BUS_PROBE_GENERIC);
2889 }
2890 
2891 static int
2892 pci_attach(device_t dev)
2893 {
2894 	int busno, domain;
2895 
2896 	/*
2897 	 * Since there can be multiple independantly numbered PCI
2898 	 * busses on systems with multiple PCI domains, we can't use
2899 	 * the unit number to decide which bus we are probing. We ask
2900 	 * the parent pcib what our domain and bus numbers are.
2901 	 */
2902 	domain = pcib_get_domain(dev);
2903 	busno = pcib_get_bus(dev);
2904 	if (bootverbose)
2905 		device_printf(dev, "domain=%d, physical bus=%d\n",
2906 		    domain, busno);
2907 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2908 	return (bus_generic_attach(dev));
2909 }
2910 
2911 int
2912 pci_suspend(device_t dev)
2913 {
2914 	int dstate, error, i, numdevs;
2915 	device_t acpi_dev, child, *devlist;
2916 	struct pci_devinfo *dinfo;
2917 
2918 	/*
2919 	 * Save the PCI configuration space for each child and set the
2920 	 * device in the appropriate power state for this sleep state.
2921 	 */
2922 	acpi_dev = NULL;
2923 	if (pci_do_power_resume)
2924 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2925 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2926 		return (error);
2927 	for (i = 0; i < numdevs; i++) {
2928 		child = devlist[i];
2929 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2930 		pci_cfg_save(child, dinfo, 0);
2931 	}
2932 
2933 	/* Suspend devices before potentially powering them down. */
2934 	error = bus_generic_suspend(dev);
2935 	if (error) {
2936 		free(devlist, M_TEMP);
2937 		return (error);
2938 	}
2939 
2940 	/*
2941 	 * Always set the device to D3.  If ACPI suggests a different
2942 	 * power state, use it instead.  If ACPI is not present, the
2943 	 * firmware is responsible for managing device power.  Skip
2944 	 * children who aren't attached since they are powered down
2945 	 * separately.  Only manage type 0 devices for now.
2946 	 */
2947 	for (i = 0; acpi_dev && i < numdevs; i++) {
2948 		child = devlist[i];
2949 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2950 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2951 			dstate = PCI_POWERSTATE_D3;
2952 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2953 			pci_set_powerstate(child, dstate);
2954 		}
2955 	}
2956 	free(devlist, M_TEMP);
2957 	return (0);
2958 }
2959 
2960 int
2961 pci_resume(device_t dev)
2962 {
2963 	int i, numdevs, error;
2964 	device_t acpi_dev, child, *devlist;
2965 	struct pci_devinfo *dinfo;
2966 
2967 	/*
2968 	 * Set each child to D0 and restore its PCI configuration space.
2969 	 */
2970 	acpi_dev = NULL;
2971 	if (pci_do_power_resume)
2972 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2973 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2974 		return (error);
2975 	for (i = 0; i < numdevs; i++) {
2976 		/*
2977 		 * Notify ACPI we're going to D0 but ignore the result.  If
2978 		 * ACPI is not present, the firmware is responsible for
2979 		 * managing device power.  Only manage type 0 devices for now.
2980 		 */
2981 		child = devlist[i];
2982 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2983 		if (acpi_dev && device_is_attached(child) &&
2984 		    dinfo->cfg.hdrtype == 0) {
2985 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2986 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2987 		}
2988 
2989 		/* Now the device is powered up, restore its config space. */
2990 		pci_cfg_restore(child, dinfo);
2991 		if (!device_is_attached(child))
2992 			pci_cfg_save(child, dinfo, 1);
2993 	}
2994 	free(devlist, M_TEMP);
2995 	return (bus_generic_resume(dev));
2996 }
2997 
2998 static void
2999 pci_load_vendor_data(void)
3000 {
3001 	caddr_t vendordata, info;
3002 
3003 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
3004 		info = preload_search_info(vendordata, MODINFO_ADDR);
3005 		pci_vendordata = *(char **)info;
3006 		info = preload_search_info(vendordata, MODINFO_SIZE);
3007 		pci_vendordata_size = *(size_t *)info;
3008 		/* terminate the database */
3009 		pci_vendordata[pci_vendordata_size] = '\n';
3010 	}
3011 }
3012 
3013 void
3014 pci_driver_added(device_t dev, driver_t *driver)
3015 {
3016 	int numdevs;
3017 	device_t *devlist;
3018 	device_t child;
3019 	struct pci_devinfo *dinfo;
3020 	int i;
3021 
3022 	if (bootverbose)
3023 		device_printf(dev, "driver added\n");
3024 	DEVICE_IDENTIFY(driver, dev);
3025 	if (device_get_children(dev, &devlist, &numdevs) != 0)
3026 		return;
3027 	for (i = 0; i < numdevs; i++) {
3028 		child = devlist[i];
3029 		if (device_get_state(child) != DS_NOTPRESENT)
3030 			continue;
3031 		dinfo = device_get_ivars(child);
3032 		pci_print_verbose(dinfo);
3033 		if (bootverbose)
3034 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3035 		pci_cfg_restore(child, dinfo);
3036 		if (device_probe_and_attach(child) != 0)
3037 			pci_cfg_save(child, dinfo, 1);
3038 	}
3039 	free(devlist, M_TEMP);
3040 }
3041 
3042 int
3043 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3044     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3045 {
3046 	struct pci_devinfo *dinfo;
3047 	struct msix_table_entry *mte;
3048 	struct msix_vector *mv;
3049 	uint64_t addr;
3050 	uint32_t data;
3051 	void *cookie;
3052 	int error, rid;
3053 
3054 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3055 	    arg, &cookie);
3056 	if (error)
3057 		return (error);
3058 
3059 	/* If this is not a direct child, just bail out. */
3060 	if (device_get_parent(child) != dev) {
3061 		*cookiep = cookie;
3062 		return(0);
3063 	}
3064 
3065 	rid = rman_get_rid(irq);
3066 	if (rid == 0) {
3067 		/* Make sure that INTx is enabled */
3068 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3069 	} else {
3070 		/*
3071 		 * Check to see if the interrupt is MSI or MSI-X.
3072 		 * Ask our parent to map the MSI and give
3073 		 * us the address and data register values.
3074 		 * If we fail for some reason, teardown the
3075 		 * interrupt handler.
3076 		 */
3077 		dinfo = device_get_ivars(child);
3078 		if (dinfo->cfg.msi.msi_alloc > 0) {
3079 			if (dinfo->cfg.msi.msi_addr == 0) {
3080 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3081 			    ("MSI has handlers, but vectors not mapped"));
3082 				error = PCIB_MAP_MSI(device_get_parent(dev),
3083 				    child, rman_get_start(irq), &addr, &data);
3084 				if (error)
3085 					goto bad;
3086 				dinfo->cfg.msi.msi_addr = addr;
3087 				dinfo->cfg.msi.msi_data = data;
3088 			}
3089 			if (dinfo->cfg.msi.msi_handlers == 0)
3090 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3091 				    dinfo->cfg.msi.msi_data);
3092 			dinfo->cfg.msi.msi_handlers++;
3093 		} else {
3094 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3095 			    ("No MSI or MSI-X interrupts allocated"));
3096 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3097 			    ("MSI-X index too high"));
3098 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3099 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3100 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3101 			KASSERT(mv->mv_irq == rman_get_start(irq),
3102 			    ("IRQ mismatch"));
3103 			if (mv->mv_address == 0) {
3104 				KASSERT(mte->mte_handlers == 0,
3105 		    ("MSI-X table entry has handlers, but vector not mapped"));
3106 				error = PCIB_MAP_MSI(device_get_parent(dev),
3107 				    child, rman_get_start(irq), &addr, &data);
3108 				if (error)
3109 					goto bad;
3110 				mv->mv_address = addr;
3111 				mv->mv_data = data;
3112 			}
3113 			if (mte->mte_handlers == 0) {
3114 				pci_enable_msix(child, rid - 1, mv->mv_address,
3115 				    mv->mv_data);
3116 				pci_unmask_msix(child, rid - 1);
3117 			}
3118 			mte->mte_handlers++;
3119 		}
3120 
3121 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3122 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3123 	bad:
3124 		if (error) {
3125 			(void)bus_generic_teardown_intr(dev, child, irq,
3126 			    cookie);
3127 			return (error);
3128 		}
3129 	}
3130 	*cookiep = cookie;
3131 	return (0);
3132 }
3133 
3134 int
3135 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3136     void *cookie)
3137 {
3138 	struct msix_table_entry *mte;
3139 	struct resource_list_entry *rle;
3140 	struct pci_devinfo *dinfo;
3141 	int error, rid;
3142 
3143 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3144 		return (EINVAL);
3145 
3146 	/* If this isn't a direct child, just bail out */
3147 	if (device_get_parent(child) != dev)
3148 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3149 
3150 	rid = rman_get_rid(irq);
3151 	if (rid == 0) {
3152 		/* Mask INTx */
3153 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3154 	} else {
3155 		/*
3156 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3157 		 * decrement the appropriate handlers count and mask the
3158 		 * MSI-X message, or disable MSI messages if the count
3159 		 * drops to 0.
3160 		 */
3161 		dinfo = device_get_ivars(child);
3162 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3163 		if (rle->res != irq)
3164 			return (EINVAL);
3165 		if (dinfo->cfg.msi.msi_alloc > 0) {
3166 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3167 			    ("MSI-X index too high"));
3168 			if (dinfo->cfg.msi.msi_handlers == 0)
3169 				return (EINVAL);
3170 			dinfo->cfg.msi.msi_handlers--;
3171 			if (dinfo->cfg.msi.msi_handlers == 0)
3172 				pci_disable_msi(child);
3173 		} else {
3174 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3175 			    ("No MSI or MSI-X interrupts allocated"));
3176 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3177 			    ("MSI-X index too high"));
3178 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3179 			if (mte->mte_handlers == 0)
3180 				return (EINVAL);
3181 			mte->mte_handlers--;
3182 			if (mte->mte_handlers == 0)
3183 				pci_mask_msix(child, rid - 1);
3184 		}
3185 	}
3186 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3187 	if (rid > 0)
3188 		KASSERT(error == 0,
3189 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3190 	return (error);
3191 }
3192 
3193 int
3194 pci_print_child(device_t dev, device_t child)
3195 {
3196 	struct pci_devinfo *dinfo;
3197 	struct resource_list *rl;
3198 	int retval = 0;
3199 
3200 	dinfo = device_get_ivars(child);
3201 	rl = &dinfo->resources;
3202 
3203 	retval += bus_print_child_header(dev, child);
3204 
3205 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3206 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3207 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3208 	if (device_get_flags(dev))
3209 		retval += printf(" flags %#x", device_get_flags(dev));
3210 
3211 	retval += printf(" at device %d.%d", pci_get_slot(child),
3212 	    pci_get_function(child));
3213 
3214 	retval += bus_print_child_footer(dev, child);
3215 
3216 	return (retval);
3217 }
3218 
3219 static struct
3220 {
3221 	int	class;
3222 	int	subclass;
3223 	char	*desc;
3224 } pci_nomatch_tab[] = {
3225 	{PCIC_OLD,		-1,			"old"},
3226 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3227 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3228 	{PCIC_STORAGE,		-1,			"mass storage"},
3229 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3230 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3231 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3232 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3233 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3234 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3235 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3236 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3237 	{PCIC_NETWORK,		-1,			"network"},
3238 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3239 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3240 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3241 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3242 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3243 	{PCIC_DISPLAY,		-1,			"display"},
3244 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3245 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3246 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3247 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3248 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3249 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3250 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3251 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3252 	{PCIC_MEMORY,		-1,			"memory"},
3253 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3254 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3255 	{PCIC_BRIDGE,		-1,			"bridge"},
3256 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3257 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3258 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3259 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3260 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3261 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3262 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3263 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3264 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3265 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3266 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3267 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3268 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3269 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3270 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3271 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3272 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3273 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3274 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3275 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3276 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3277 	{PCIC_INPUTDEV,		-1,			"input device"},
3278 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3279 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3280 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3281 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3282 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3283 	{PCIC_DOCKING,		-1,			"docking station"},
3284 	{PCIC_PROCESSOR,	-1,			"processor"},
3285 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3286 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3287 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3288 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3289 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3290 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3291 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3292 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3293 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3294 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3295 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3296 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3297 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3298 	{PCIC_SATCOM,		-1,			"satellite communication"},
3299 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3300 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3301 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3302 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3303 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3304 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3305 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3306 	{PCIC_DASP,		-1,			"dasp"},
3307 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3308 	{0, 0,		NULL}
3309 };
3310 
3311 void
3312 pci_probe_nomatch(device_t dev, device_t child)
3313 {
3314 	int	i;
3315 	char	*cp, *scp, *device;
3316 
3317 	/*
3318 	 * Look for a listing for this device in a loaded device database.
3319 	 */
3320 	if ((device = pci_describe_device(child)) != NULL) {
3321 		device_printf(dev, "<%s>", device);
3322 		free(device, M_DEVBUF);
3323 	} else {
3324 		/*
3325 		 * Scan the class/subclass descriptions for a general
3326 		 * description.
3327 		 */
3328 		cp = "unknown";
3329 		scp = NULL;
3330 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3331 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3332 				if (pci_nomatch_tab[i].subclass == -1) {
3333 					cp = pci_nomatch_tab[i].desc;
3334 				} else if (pci_nomatch_tab[i].subclass ==
3335 				    pci_get_subclass(child)) {
3336 					scp = pci_nomatch_tab[i].desc;
3337 				}
3338 			}
3339 		}
3340 		device_printf(dev, "<%s%s%s>",
3341 		    cp ? cp : "",
3342 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3343 		    scp ? scp : "");
3344 	}
3345 	printf(" at device %d.%d (no driver attached)\n",
3346 	    pci_get_slot(child), pci_get_function(child));
3347 	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3348 	return;
3349 }
3350 
3351 /*
3352  * Parse the PCI device database, if loaded, and return a pointer to a
3353  * description of the device.
3354  *
3355  * The database is flat text formatted as follows:
3356  *
3357  * Any line not in a valid format is ignored.
3358  * Lines are terminated with newline '\n' characters.
3359  *
3360  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3361  * the vendor name.
3362  *
3363  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3364  * - devices cannot be listed without a corresponding VENDOR line.
3365  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3366  * another TAB, then the device name.
3367  */
3368 
3369 /*
3370  * Assuming (ptr) points to the beginning of a line in the database,
3371  * return the vendor or device and description of the next entry.
3372  * The value of (vendor) or (device) inappropriate for the entry type
3373  * is set to -1.  Returns nonzero at the end of the database.
3374  *
3375  * Note that this is slightly unrobust in the face of corrupt data;
3376  * we attempt to safeguard against this by spamming the end of the
3377  * database with a newline when we initialise.
3378  */
3379 static int
3380 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3381 {
3382 	char	*cp = *ptr;
3383 	int	left;
3384 
3385 	*device = -1;
3386 	*vendor = -1;
3387 	**desc = '\0';
3388 	for (;;) {
3389 		left = pci_vendordata_size - (cp - pci_vendordata);
3390 		if (left <= 0) {
3391 			*ptr = cp;
3392 			return(1);
3393 		}
3394 
3395 		/* vendor entry? */
3396 		if (*cp != '\t' &&
3397 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3398 			break;
3399 		/* device entry? */
3400 		if (*cp == '\t' &&
3401 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3402 			break;
3403 
3404 		/* skip to next line */
3405 		while (*cp != '\n' && left > 0) {
3406 			cp++;
3407 			left--;
3408 		}
3409 		if (*cp == '\n') {
3410 			cp++;
3411 			left--;
3412 		}
3413 	}
3414 	/* skip to next line */
3415 	while (*cp != '\n' && left > 0) {
3416 		cp++;
3417 		left--;
3418 	}
3419 	if (*cp == '\n' && left > 0)
3420 		cp++;
3421 	*ptr = cp;
3422 	return(0);
3423 }
3424 
3425 static char *
3426 pci_describe_device(device_t dev)
3427 {
3428 	int	vendor, device;
3429 	char	*desc, *vp, *dp, *line;
3430 
3431 	desc = vp = dp = NULL;
3432 
3433 	/*
3434 	 * If we have no vendor data, we can't do anything.
3435 	 */
3436 	if (pci_vendordata == NULL)
3437 		goto out;
3438 
3439 	/*
3440 	 * Scan the vendor data looking for this device
3441 	 */
3442 	line = pci_vendordata;
3443 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3444 		goto out;
3445 	for (;;) {
3446 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3447 			goto out;
3448 		if (vendor == pci_get_vendor(dev))
3449 			break;
3450 	}
3451 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3452 		goto out;
3453 	for (;;) {
3454 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3455 			*dp = 0;
3456 			break;
3457 		}
3458 		if (vendor != -1) {
3459 			*dp = 0;
3460 			break;
3461 		}
3462 		if (device == pci_get_device(dev))
3463 			break;
3464 	}
3465 	if (dp[0] == '\0')
3466 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3467 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3468 	    NULL)
3469 		sprintf(desc, "%s, %s", vp, dp);
3470  out:
3471 	if (vp != NULL)
3472 		free(vp, M_DEVBUF);
3473 	if (dp != NULL)
3474 		free(dp, M_DEVBUF);
3475 	return(desc);
3476 }
3477 
3478 int
3479 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3480 {
3481 	struct pci_devinfo *dinfo;
3482 	pcicfgregs *cfg;
3483 
3484 	dinfo = device_get_ivars(child);
3485 	cfg = &dinfo->cfg;
3486 
3487 	switch (which) {
3488 	case PCI_IVAR_ETHADDR:
3489 		/*
3490 		 * The generic accessor doesn't deal with failure, so
3491 		 * we set the return value, then return an error.
3492 		 */
3493 		*((uint8_t **) result) = NULL;
3494 		return (EINVAL);
3495 	case PCI_IVAR_SUBVENDOR:
3496 		*result = cfg->subvendor;
3497 		break;
3498 	case PCI_IVAR_SUBDEVICE:
3499 		*result = cfg->subdevice;
3500 		break;
3501 	case PCI_IVAR_VENDOR:
3502 		*result = cfg->vendor;
3503 		break;
3504 	case PCI_IVAR_DEVICE:
3505 		*result = cfg->device;
3506 		break;
3507 	case PCI_IVAR_DEVID:
3508 		*result = (cfg->device << 16) | cfg->vendor;
3509 		break;
3510 	case PCI_IVAR_CLASS:
3511 		*result = cfg->baseclass;
3512 		break;
3513 	case PCI_IVAR_SUBCLASS:
3514 		*result = cfg->subclass;
3515 		break;
3516 	case PCI_IVAR_PROGIF:
3517 		*result = cfg->progif;
3518 		break;
3519 	case PCI_IVAR_REVID:
3520 		*result = cfg->revid;
3521 		break;
3522 	case PCI_IVAR_INTPIN:
3523 		*result = cfg->intpin;
3524 		break;
3525 	case PCI_IVAR_IRQ:
3526 		*result = cfg->intline;
3527 		break;
3528 	case PCI_IVAR_DOMAIN:
3529 		*result = cfg->domain;
3530 		break;
3531 	case PCI_IVAR_BUS:
3532 		*result = cfg->bus;
3533 		break;
3534 	case PCI_IVAR_SLOT:
3535 		*result = cfg->slot;
3536 		break;
3537 	case PCI_IVAR_FUNCTION:
3538 		*result = cfg->func;
3539 		break;
3540 	case PCI_IVAR_CMDREG:
3541 		*result = cfg->cmdreg;
3542 		break;
3543 	case PCI_IVAR_CACHELNSZ:
3544 		*result = cfg->cachelnsz;
3545 		break;
3546 	case PCI_IVAR_MINGNT:
3547 		*result = cfg->mingnt;
3548 		break;
3549 	case PCI_IVAR_MAXLAT:
3550 		*result = cfg->maxlat;
3551 		break;
3552 	case PCI_IVAR_LATTIMER:
3553 		*result = cfg->lattimer;
3554 		break;
3555 	default:
3556 		return (ENOENT);
3557 	}
3558 	return (0);
3559 }
3560 
3561 int
3562 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3563 {
3564 	struct pci_devinfo *dinfo;
3565 
3566 	dinfo = device_get_ivars(child);
3567 
3568 	switch (which) {
3569 	case PCI_IVAR_INTPIN:
3570 		dinfo->cfg.intpin = value;
3571 		return (0);
3572 	case PCI_IVAR_ETHADDR:
3573 	case PCI_IVAR_SUBVENDOR:
3574 	case PCI_IVAR_SUBDEVICE:
3575 	case PCI_IVAR_VENDOR:
3576 	case PCI_IVAR_DEVICE:
3577 	case PCI_IVAR_DEVID:
3578 	case PCI_IVAR_CLASS:
3579 	case PCI_IVAR_SUBCLASS:
3580 	case PCI_IVAR_PROGIF:
3581 	case PCI_IVAR_REVID:
3582 	case PCI_IVAR_IRQ:
3583 	case PCI_IVAR_DOMAIN:
3584 	case PCI_IVAR_BUS:
3585 	case PCI_IVAR_SLOT:
3586 	case PCI_IVAR_FUNCTION:
3587 		return (EINVAL);	/* disallow for now */
3588 
3589 	default:
3590 		return (ENOENT);
3591 	}
3592 }
3593 
3594 
3595 #include "opt_ddb.h"
3596 #ifdef DDB
3597 #include <ddb/ddb.h>
3598 #include <sys/cons.h>
3599 
3600 /*
3601  * List resources based on pci map registers, used for within ddb
3602  */
3603 
3604 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3605 {
3606 	struct pci_devinfo *dinfo;
3607 	struct devlist *devlist_head;
3608 	struct pci_conf *p;
3609 	const char *name;
3610 	int i, error, none_count;
3611 
3612 	none_count = 0;
3613 	/* get the head of the device queue */
3614 	devlist_head = &pci_devq;
3615 
3616 	/*
3617 	 * Go through the list of devices and print out devices
3618 	 */
3619 	for (error = 0, i = 0,
3620 	     dinfo = STAILQ_FIRST(devlist_head);
3621 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3622 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3623 
3624 		/* Populate pd_name and pd_unit */
3625 		name = NULL;
3626 		if (dinfo->cfg.dev)
3627 			name = device_get_name(dinfo->cfg.dev);
3628 
3629 		p = &dinfo->conf;
3630 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3631 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3632 			(name && *name) ? name : "none",
3633 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3634 			none_count++,
3635 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3636 			p->pc_sel.pc_func, (p->pc_class << 16) |
3637 			(p->pc_subclass << 8) | p->pc_progif,
3638 			(p->pc_subdevice << 16) | p->pc_subvendor,
3639 			(p->pc_device << 16) | p->pc_vendor,
3640 			p->pc_revid, p->pc_hdr);
3641 	}
3642 }
3643 #endif /* DDB */
3644 
3645 static struct resource *
3646 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
3647     u_long start, u_long end, u_long count, u_int flags)
3648 {
3649 	struct pci_devinfo *dinfo = device_get_ivars(child);
3650 	struct resource_list *rl = &dinfo->resources;
3651 	struct resource_list_entry *rle;
3652 	struct resource *res;
3653 	pci_addr_t map, testval;
3654 	int mapsize;
3655 
3656 	/*
3657 	 * Weed out the bogons, and figure out how large the BAR/map
3658 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3659 	 * Note: atapci in legacy mode are special and handled elsewhere
3660 	 * in the code.  If you have a atapci device in legacy mode and
3661 	 * it fails here, that other code is broken.
3662 	 */
3663 	res = NULL;
3664 	pci_read_bar(child, *rid, &map, &testval);
3665 
3666 	/* Ignore a BAR with a base of 0. */
3667 	if ((*rid == PCIR_BIOS && pci_rombase(testval) == 0) ||
3668 	    pci_mapbase(testval) == 0)
3669 		goto out;
3670 
3671 	if (PCI_BAR_MEM(testval) || *rid == PCIR_BIOS) {
3672 		if (type != SYS_RES_MEMORY) {
3673 			if (bootverbose)
3674 				device_printf(dev,
3675 				    "child %s requested type %d for rid %#x,"
3676 				    " but the BAR says it is an memio\n",
3677 				    device_get_nameunit(child), type, *rid);
3678 			goto out;
3679 		}
3680 	} else {
3681 		if (type != SYS_RES_IOPORT) {
3682 			if (bootverbose)
3683 				device_printf(dev,
3684 				    "child %s requested type %d for rid %#x,"
3685 				    " but the BAR says it is an ioport\n",
3686 				    device_get_nameunit(child), type, *rid);
3687 			goto out;
3688 		}
3689 	}
3690 
3691 	/*
3692 	 * For real BARs, we need to override the size that
3693 	 * the driver requests, because that's what the BAR
3694 	 * actually uses and we would otherwise have a
3695 	 * situation where we might allocate the excess to
3696 	 * another driver, which won't work.
3697 	 *
3698 	 * Device ROM BARs use a different mask value.
3699 	 */
3700 	if (*rid == PCIR_BIOS)
3701 		mapsize = pci_romsize(testval);
3702 	else
3703 		mapsize = pci_mapsize(testval);
3704 	count = 1UL << mapsize;
3705 	if (RF_ALIGNMENT(flags) < mapsize)
3706 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3707 	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3708 		flags |= RF_PREFETCHABLE;
3709 
3710 	/*
3711 	 * Allocate enough resource, and then write back the
3712 	 * appropriate bar for that resource.
3713 	 */
3714 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3715 	    start, end, count, flags & ~RF_ACTIVE);
3716 	if (res == NULL) {
3717 		device_printf(child,
3718 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3719 		    count, *rid, type, start, end);
3720 		goto out;
3721 	}
3722 	resource_list_add(rl, type, *rid, start, end, count);
3723 	rle = resource_list_find(rl, type, *rid);
3724 	if (rle == NULL)
3725 		panic("pci_reserve_map: unexpectedly can't find resource.");
3726 	rle->res = res;
3727 	rle->start = rman_get_start(res);
3728 	rle->end = rman_get_end(res);
3729 	rle->count = count;
3730 	rle->flags = RLE_RESERVED;
3731 	if (bootverbose)
3732 		device_printf(child,
3733 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3734 		    count, *rid, type, rman_get_start(res));
3735 	map = rman_get_start(res);
3736 	pci_write_bar(child, *rid, map);
3737 out:;
3738 	return (res);
3739 }
3740 
3741 
3742 struct resource *
3743 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3744 		   u_long start, u_long end, u_long count, u_int flags)
3745 {
3746 	struct pci_devinfo *dinfo = device_get_ivars(child);
3747 	struct resource_list *rl = &dinfo->resources;
3748 	struct resource_list_entry *rle;
3749 	struct resource *res;
3750 	pcicfgregs *cfg = &dinfo->cfg;
3751 
3752 	if (device_get_parent(child) != dev)
3753 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3754 		    type, rid, start, end, count, flags));
3755 
3756 	/*
3757 	 * Perform lazy resource allocation
3758 	 */
3759 	switch (type) {
3760 	case SYS_RES_IRQ:
3761 		/*
3762 		 * Can't alloc legacy interrupt once MSI messages have
3763 		 * been allocated.
3764 		 */
3765 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3766 		    cfg->msix.msix_alloc > 0))
3767 			return (NULL);
3768 
3769 		/*
3770 		 * If the child device doesn't have an interrupt
3771 		 * routed and is deserving of an interrupt, try to
3772 		 * assign it one.
3773 		 */
3774 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3775 		    (cfg->intpin != 0))
3776 			pci_assign_interrupt(dev, child, 0);
3777 		break;
3778 	case SYS_RES_IOPORT:
3779 	case SYS_RES_MEMORY:
3780 		/* Reserve resources for this BAR if needed. */
3781 		rle = resource_list_find(rl, type, *rid);
3782 		if (rle == NULL) {
3783 			res = pci_reserve_map(dev, child, type, rid, start, end,
3784 			    count, flags);
3785 			if (res == NULL)
3786 				return (NULL);
3787 		}
3788 	}
3789 	return (resource_list_alloc(rl, dev, child, type, rid,
3790 	    start, end, count, flags));
3791 }
3792 
3793 int
3794 pci_activate_resource(device_t dev, device_t child, int type, int rid,
3795     struct resource *r)
3796 {
3797 	int error;
3798 
3799 	error = bus_generic_activate_resource(dev, child, type, rid, r);
3800 	if (error)
3801 		return (error);
3802 
3803 	/* Enable decoding in the command register when activating BARs. */
3804 	if (device_get_parent(child) == dev) {
3805 		/* Device ROMs need their decoding explicitly enabled. */
3806 		if (rid == PCIR_BIOS)
3807 			pci_write_config(child, rid, rman_get_start(r) |
3808 			    PCIM_BIOS_ENABLE, 4);
3809 		switch (type) {
3810 		case SYS_RES_IOPORT:
3811 		case SYS_RES_MEMORY:
3812 			error = PCI_ENABLE_IO(dev, child, type);
3813 			break;
3814 		}
3815 	}
3816 	return (error);
3817 }
3818 
3819 int
3820 pci_deactivate_resource(device_t dev, device_t child, int type,
3821     int rid, struct resource *r)
3822 {
3823 	int error;
3824 
3825 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
3826 	if (error)
3827 		return (error);
3828 
3829 	/* Disable decoding for device ROMs. */
3830 	if (rid == PCIR_BIOS)
3831 		pci_write_config(child, rid, rman_get_start(r), 4);
3832 	return (0);
3833 }
3834 
3835 void
3836 pci_delete_child(device_t dev, device_t child)
3837 {
3838 	struct resource_list_entry *rle;
3839 	struct resource_list *rl;
3840 	struct pci_devinfo *dinfo;
3841 
3842 	dinfo = device_get_ivars(child);
3843 	rl = &dinfo->resources;
3844 
3845 	if (device_is_attached(child))
3846 		device_detach(child);
3847 
3848 	/* Turn off access to resources we're about to free */
3849 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
3850 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
3851 
3852 	/* Free all allocated resources */
3853 	STAILQ_FOREACH(rle, rl, link) {
3854 		if (rle->res) {
3855 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
3856 			    resource_list_busy(rl, rle->type, rle->rid)) {
3857 				pci_printf(&dinfo->cfg,
3858 				    "Resource still owned, oops. "
3859 				    "(type=%d, rid=%d, addr=%lx)\n",
3860 				    rle->type, rle->rid,
3861 				    rman_get_start(rle->res));
3862 				bus_release_resource(child, rle->type, rle->rid,
3863 				    rle->res);
3864 			}
3865 			resource_list_unreserve(rl, dev, child, rle->type,
3866 			    rle->rid);
3867 		}
3868 	}
3869 	resource_list_free(rl);
3870 
3871 	device_delete_child(dev, child);
3872 	pci_freecfg(dinfo);
3873 }
3874 
3875 void
3876 pci_delete_resource(device_t dev, device_t child, int type, int rid)
3877 {
3878 	struct pci_devinfo *dinfo;
3879 	struct resource_list *rl;
3880 	struct resource_list_entry *rle;
3881 
3882 	if (device_get_parent(child) != dev)
3883 		return;
3884 
3885 	dinfo = device_get_ivars(child);
3886 	rl = &dinfo->resources;
3887 	rle = resource_list_find(rl, type, rid);
3888 	if (rle == NULL)
3889 		return;
3890 
3891 	if (rle->res) {
3892 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
3893 		    resource_list_busy(rl, type, rid)) {
3894 			device_printf(dev, "delete_resource: "
3895 			    "Resource still owned by child, oops. "
3896 			    "(type=%d, rid=%d, addr=%lx)\n",
3897 			    type, rid, rman_get_start(rle->res));
3898 			return;
3899 		}
3900 
3901 #ifndef __PCI_BAR_ZERO_VALID
3902 		/*
3903 		 * If this is a BAR, clear the BAR so it stops
3904 		 * decoding before releasing the resource.
3905 		 */
3906 		switch (type) {
3907 		case SYS_RES_IOPORT:
3908 		case SYS_RES_MEMORY:
3909 			pci_write_bar(child, rid, 0);
3910 			break;
3911 		}
3912 #endif
3913 		resource_list_unreserve(rl, dev, child, type, rid);
3914 	}
3915 	resource_list_delete(rl, type, rid);
3916 }
3917 
3918 struct resource_list *
3919 pci_get_resource_list (device_t dev, device_t child)
3920 {
3921 	struct pci_devinfo *dinfo = device_get_ivars(child);
3922 
3923 	return (&dinfo->resources);
3924 }
3925 
3926 uint32_t
3927 pci_read_config_method(device_t dev, device_t child, int reg, int width)
3928 {
3929 	struct pci_devinfo *dinfo = device_get_ivars(child);
3930 	pcicfgregs *cfg = &dinfo->cfg;
3931 
3932 	return (PCIB_READ_CONFIG(device_get_parent(dev),
3933 	    cfg->bus, cfg->slot, cfg->func, reg, width));
3934 }
3935 
3936 void
3937 pci_write_config_method(device_t dev, device_t child, int reg,
3938     uint32_t val, int width)
3939 {
3940 	struct pci_devinfo *dinfo = device_get_ivars(child);
3941 	pcicfgregs *cfg = &dinfo->cfg;
3942 
3943 	PCIB_WRITE_CONFIG(device_get_parent(dev),
3944 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3945 }
3946 
3947 int
3948 pci_child_location_str_method(device_t dev, device_t child, char *buf,
3949     size_t buflen)
3950 {
3951 
3952 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3953 	    pci_get_function(child));
3954 	return (0);
3955 }
3956 
3957 int
3958 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3959     size_t buflen)
3960 {
3961 	struct pci_devinfo *dinfo;
3962 	pcicfgregs *cfg;
3963 
3964 	dinfo = device_get_ivars(child);
3965 	cfg = &dinfo->cfg;
3966 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3967 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3968 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3969 	    cfg->progif);
3970 	return (0);
3971 }
3972 
3973 int
3974 pci_assign_interrupt_method(device_t dev, device_t child)
3975 {
3976 	struct pci_devinfo *dinfo = device_get_ivars(child);
3977 	pcicfgregs *cfg = &dinfo->cfg;
3978 
3979 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3980 	    cfg->intpin));
3981 }
3982 
3983 static int
3984 pci_modevent(module_t mod, int what, void *arg)
3985 {
3986 	static struct cdev *pci_cdev;
3987 
3988 	switch (what) {
3989 	case MOD_LOAD:
3990 		STAILQ_INIT(&pci_devq);
3991 		pci_generation = 0;
3992 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3993 		    "pci");
3994 		pci_load_vendor_data();
3995 		break;
3996 
3997 	case MOD_UNLOAD:
3998 		destroy_dev(pci_cdev);
3999 		break;
4000 	}
4001 
4002 	return (0);
4003 }
4004 
4005 void
4006 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4007 {
4008 	int i;
4009 
4010 	/*
4011 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4012 	 * which we know need special treatment.  Type 2 devices are
4013 	 * cardbus bridges which also require special treatment.
4014 	 * Other types are unknown, and we err on the side of safety
4015 	 * by ignoring them.
4016 	 */
4017 	if (dinfo->cfg.hdrtype != 0)
4018 		return;
4019 
4020 	/*
4021 	 * Restore the device to full power mode.  We must do this
4022 	 * before we restore the registers because moving from D3 to
4023 	 * D0 will cause the chip's BARs and some other registers to
4024 	 * be reset to some unknown power on reset values.  Cut down
4025 	 * the noise on boot by doing nothing if we are already in
4026 	 * state D0.
4027 	 */
4028 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
4029 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4030 	}
4031 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4032 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
4033 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
4034 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4035 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4036 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4037 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4038 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4039 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4040 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4041 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4042 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4043 
4044 	/* Restore MSI and MSI-X configurations if they are present. */
4045 	if (dinfo->cfg.msi.msi_location != 0)
4046 		pci_resume_msi(dev);
4047 	if (dinfo->cfg.msix.msix_location != 0)
4048 		pci_resume_msix(dev);
4049 }
4050 
4051 void
4052 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4053 {
4054 	int i;
4055 	uint32_t cls;
4056 	int ps;
4057 
4058 	/*
4059 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4060 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4061 	 * which also require special treatment.  Other types are unknown, and
4062 	 * we err on the side of safety by ignoring them.  Powering down
4063 	 * bridges should not be undertaken lightly.
4064 	 */
4065 	if (dinfo->cfg.hdrtype != 0)
4066 		return;
4067 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4068 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
4069 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
4070 
4071 	/*
4072 	 * Some drivers apparently write to these registers w/o updating our
4073 	 * cached copy.  No harm happens if we update the copy, so do so here
4074 	 * so we can restore them.  The COMMAND register is modified by the
4075 	 * bus w/o updating the cache.  This should represent the normally
4076 	 * writable portion of the 'defined' part of type 0 headers.  In
4077 	 * theory we also need to save/restore the PCI capability structures
4078 	 * we know about, but apart from power we don't know any that are
4079 	 * writable.
4080 	 */
4081 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4082 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4083 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4084 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4085 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4086 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4087 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4088 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4089 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4090 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4091 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4092 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4093 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4094 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4095 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4096 
4097 	/*
4098 	 * don't set the state for display devices, base peripherals and
4099 	 * memory devices since bad things happen when they are powered down.
4100 	 * We should (a) have drivers that can easily detach and (b) use
4101 	 * generic drivers for these devices so that some device actually
4102 	 * attaches.  We need to make sure that when we implement (a) we don't
4103 	 * power the device down on a reattach.
4104 	 */
4105 	cls = pci_get_class(dev);
4106 	if (!setstate)
4107 		return;
4108 	switch (pci_do_power_nodriver)
4109 	{
4110 		case 0:		/* NO powerdown at all */
4111 			return;
4112 		case 1:		/* Conservative about what to power down */
4113 			if (cls == PCIC_STORAGE)
4114 				return;
4115 			/*FALLTHROUGH*/
4116 		case 2:		/* Agressive about what to power down */
4117 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4118 			    cls == PCIC_BASEPERIPH)
4119 				return;
4120 			/*FALLTHROUGH*/
4121 		case 3:		/* Power down everything */
4122 			break;
4123 	}
4124 	/*
4125 	 * PCI spec says we can only go into D3 state from D0 state.
4126 	 * Transition from D[12] into D0 before going to D3 state.
4127 	 */
4128 	ps = pci_get_powerstate(dev);
4129 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4130 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4131 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4132 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4133 }
4134