xref: /freebsd/sys/dev/pci/pci.c (revision dadef94c7a762d05890e2891bc4a7d1dfe0cf758)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 
56 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57 #include <machine/intr_machdep.h>
58 #endif
59 
60 #include <sys/pciio.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/pci/pci_private.h>
64 
65 #include <dev/usb/controller/ehcireg.h>
66 #include <dev/usb/controller/ohcireg.h>
67 #include <dev/usb/controller/uhcireg.h>
68 
69 #include "pcib_if.h"
70 #include "pci_if.h"
71 
72 static pci_addr_t	pci_mapbase(uint64_t mapreg);
73 static const char	*pci_maptype(uint64_t mapreg);
74 static int		pci_mapsize(uint64_t testval);
75 static int		pci_maprange(uint64_t mapreg);
76 static pci_addr_t	pci_rombase(uint64_t mapreg);
77 static int		pci_romsize(uint64_t testval);
78 static void		pci_fixancient(pcicfgregs *cfg);
79 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
80 
81 static int		pci_porten(device_t dev);
82 static int		pci_memen(device_t dev);
83 static void		pci_assign_interrupt(device_t bus, device_t dev,
84 			    int force_route);
85 static int		pci_add_map(device_t bus, device_t dev, int reg,
86 			    struct resource_list *rl, int force, int prefetch);
87 static int		pci_probe(device_t dev);
88 static int		pci_attach(device_t dev);
89 static void		pci_load_vendor_data(void);
90 static int		pci_describe_parse_line(char **ptr, int *vendor,
91 			    int *device, char **desc);
92 static char		*pci_describe_device(device_t dev);
93 static int		pci_modevent(module_t mod, int what, void *arg);
94 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
95 			    pcicfgregs *cfg);
96 static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
97 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
98 			    int reg, uint32_t *data);
99 #if 0
100 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
101 			    int reg, uint32_t data);
102 #endif
103 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
104 static void		pci_disable_msi(device_t dev);
105 static void		pci_enable_msi(device_t dev, uint64_t address,
106 			    uint16_t data);
107 static void		pci_enable_msix(device_t dev, u_int index,
108 			    uint64_t address, uint32_t data);
109 static void		pci_mask_msix(device_t dev, u_int index);
110 static void		pci_unmask_msix(device_t dev, u_int index);
111 static int		pci_msi_blacklisted(void);
112 static void		pci_resume_msi(device_t dev);
113 static void		pci_resume_msix(device_t dev);
114 static int		pci_remap_intr_method(device_t bus, device_t dev,
115 			    u_int irq);
116 
117 static device_method_t pci_methods[] = {
118 	/* Device interface */
119 	DEVMETHOD(device_probe,		pci_probe),
120 	DEVMETHOD(device_attach,	pci_attach),
121 	DEVMETHOD(device_detach,	bus_generic_detach),
122 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
123 	DEVMETHOD(device_suspend,	pci_suspend),
124 	DEVMETHOD(device_resume,	pci_resume),
125 
126 	/* Bus interface */
127 	DEVMETHOD(bus_print_child,	pci_print_child),
128 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
129 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
130 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
131 	DEVMETHOD(bus_driver_added,	pci_driver_added),
132 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
133 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
134 
135 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
136 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
137 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
138 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
139 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
140 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
141 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
142 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
143 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
144 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
145 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
146 
147 	/* PCI interface */
148 	DEVMETHOD(pci_read_config,	pci_read_config_method),
149 	DEVMETHOD(pci_write_config,	pci_write_config_method),
150 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
151 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
152 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
153 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
154 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
155 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
156 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
157 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
158 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
159 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
160 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
161 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
162 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
163 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
164 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
165 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
166 
167 	{ 0, 0 }
168 };
169 
170 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
171 
172 static devclass_t pci_devclass;
173 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
174 MODULE_VERSION(pci, 1);
175 
176 static char	*pci_vendordata;
177 static size_t	pci_vendordata_size;
178 
179 
180 struct pci_quirk {
181 	uint32_t devid;	/* Vendor/device of the card */
182 	int	type;
183 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
184 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
185 	int	arg1;
186 	int	arg2;
187 };
188 
189 struct pci_quirk pci_quirks[] = {
190 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
191 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
192 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
193 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
194 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
195 
196 	/*
197 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
198 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
199 	 */
200 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
201 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
202 
203 	/*
204 	 * MSI doesn't work on earlier Intel chipsets including
205 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
206 	 */
207 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
210 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
211 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
212 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
213 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
214 
215 	/*
216 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
217 	 * bridge.
218 	 */
219 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
220 
221 	{ 0 }
222 };
223 
224 /* map register information */
225 #define	PCI_MAPMEM	0x01	/* memory map */
226 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
227 #define	PCI_MAPPORT	0x04	/* port map */
228 
229 struct devlist pci_devq;
230 uint32_t pci_generation;
231 uint32_t pci_numdevs = 0;
232 static int pcie_chipset, pcix_chipset;
233 
234 /* sysctl vars */
235 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
236 
237 static int pci_enable_io_modes = 1;
238 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
239 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
240     &pci_enable_io_modes, 1,
241     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
242 enable these bits correctly.  We'd like to do this all the time, but there\n\
243 are some peripherals that this causes problems with.");
244 
245 static int pci_do_power_nodriver = 0;
246 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
247 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
248     &pci_do_power_nodriver, 0,
249   "Place a function into D3 state when no driver attaches to it.  0 means\n\
250 disable.  1 means conservatively place devices into D3 state.  2 means\n\
251 agressively place devices into D3 state.  3 means put absolutely everything\n\
252 in D3 state.");
253 
254 int pci_do_power_resume = 1;
255 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
256 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
257     &pci_do_power_resume, 1,
258   "Transition from D3 -> D0 on resume.");
259 
260 static int pci_do_msi = 1;
261 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
262 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
263     "Enable support for MSI interrupts");
264 
265 static int pci_do_msix = 1;
266 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
267 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
268     "Enable support for MSI-X interrupts");
269 
270 static int pci_honor_msi_blacklist = 1;
271 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
272 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
273     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
274 
275 #if defined(__i386__) || defined(__amd64__)
276 static int pci_usb_takeover = 1;
277 #else
278 static int pci_usb_takeover = 0;
279 #endif
280 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
281 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RD | CTLFLAG_TUN,
282     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
283 Disable this if you depend on BIOS emulation of USB devices, that is\n\
284 you use USB devices (like keyboard or mouse) but do not load USB drivers");
285 
286 /* Find a device_t by bus/slot/function in domain 0 */
287 
288 device_t
289 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
290 {
291 
292 	return (pci_find_dbsf(0, bus, slot, func));
293 }
294 
295 /* Find a device_t by domain/bus/slot/function */
296 
297 device_t
298 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
299 {
300 	struct pci_devinfo *dinfo;
301 
302 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
303 		if ((dinfo->cfg.domain == domain) &&
304 		    (dinfo->cfg.bus == bus) &&
305 		    (dinfo->cfg.slot == slot) &&
306 		    (dinfo->cfg.func == func)) {
307 			return (dinfo->cfg.dev);
308 		}
309 	}
310 
311 	return (NULL);
312 }
313 
314 /* Find a device_t by vendor/device ID */
315 
316 device_t
317 pci_find_device(uint16_t vendor, uint16_t device)
318 {
319 	struct pci_devinfo *dinfo;
320 
321 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
322 		if ((dinfo->cfg.vendor == vendor) &&
323 		    (dinfo->cfg.device == device)) {
324 			return (dinfo->cfg.dev);
325 		}
326 	}
327 
328 	return (NULL);
329 }
330 
331 static int
332 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
333 {
334 	va_list ap;
335 	int retval;
336 
337 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
338 	    cfg->func);
339 	va_start(ap, fmt);
340 	retval += vprintf(fmt, ap);
341 	va_end(ap);
342 	return (retval);
343 }
344 
345 /* return base address of memory or port map */
346 
347 static pci_addr_t
348 pci_mapbase(uint64_t mapreg)
349 {
350 
351 	if (PCI_BAR_MEM(mapreg))
352 		return (mapreg & PCIM_BAR_MEM_BASE);
353 	else
354 		return (mapreg & PCIM_BAR_IO_BASE);
355 }
356 
357 /* return map type of memory or port map */
358 
359 static const char *
360 pci_maptype(uint64_t mapreg)
361 {
362 
363 	if (PCI_BAR_IO(mapreg))
364 		return ("I/O Port");
365 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
366 		return ("Prefetchable Memory");
367 	return ("Memory");
368 }
369 
370 /* return log2 of map size decoded for memory or port map */
371 
372 static int
373 pci_mapsize(uint64_t testval)
374 {
375 	int ln2size;
376 
377 	testval = pci_mapbase(testval);
378 	ln2size = 0;
379 	if (testval != 0) {
380 		while ((testval & 1) == 0)
381 		{
382 			ln2size++;
383 			testval >>= 1;
384 		}
385 	}
386 	return (ln2size);
387 }
388 
389 /* return base address of device ROM */
390 
391 static pci_addr_t
392 pci_rombase(uint64_t mapreg)
393 {
394 
395 	return (mapreg & PCIM_BIOS_ADDR_MASK);
396 }
397 
398 /* return log2 of map size decided for device ROM */
399 
400 static int
401 pci_romsize(uint64_t testval)
402 {
403 	int ln2size;
404 
405 	testval = pci_rombase(testval);
406 	ln2size = 0;
407 	if (testval != 0) {
408 		while ((testval & 1) == 0)
409 		{
410 			ln2size++;
411 			testval >>= 1;
412 		}
413 	}
414 	return (ln2size);
415 }
416 
417 /* return log2 of address range supported by map register */
418 
419 static int
420 pci_maprange(uint64_t mapreg)
421 {
422 	int ln2range = 0;
423 
424 	if (PCI_BAR_IO(mapreg))
425 		ln2range = 32;
426 	else
427 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
428 		case PCIM_BAR_MEM_32:
429 			ln2range = 32;
430 			break;
431 		case PCIM_BAR_MEM_1MB:
432 			ln2range = 20;
433 			break;
434 		case PCIM_BAR_MEM_64:
435 			ln2range = 64;
436 			break;
437 		}
438 	return (ln2range);
439 }
440 
441 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
442 
443 static void
444 pci_fixancient(pcicfgregs *cfg)
445 {
446 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
447 		return;
448 
449 	/* PCI to PCI bridges use header type 1 */
450 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
451 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
452 }
453 
454 /* extract header type specific config data */
455 
456 static void
457 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
458 {
459 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
460 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
461 	case PCIM_HDRTYPE_NORMAL:
462 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
463 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
464 		cfg->nummaps	    = PCI_MAXMAPS_0;
465 		break;
466 	case PCIM_HDRTYPE_BRIDGE:
467 		cfg->nummaps	    = PCI_MAXMAPS_1;
468 		break;
469 	case PCIM_HDRTYPE_CARDBUS:
470 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
471 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
472 		cfg->nummaps	    = PCI_MAXMAPS_2;
473 		break;
474 	}
475 #undef REG
476 }
477 
478 /* read configuration header into pcicfgregs structure */
479 struct pci_devinfo *
480 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
481 {
482 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
483 	pcicfgregs *cfg = NULL;
484 	struct pci_devinfo *devlist_entry;
485 	struct devlist *devlist_head;
486 
487 	devlist_head = &pci_devq;
488 
489 	devlist_entry = NULL;
490 
491 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
492 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
493 		if (devlist_entry == NULL)
494 			return (NULL);
495 
496 		cfg = &devlist_entry->cfg;
497 
498 		cfg->domain		= d;
499 		cfg->bus		= b;
500 		cfg->slot		= s;
501 		cfg->func		= f;
502 		cfg->vendor		= REG(PCIR_VENDOR, 2);
503 		cfg->device		= REG(PCIR_DEVICE, 2);
504 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
505 		cfg->statreg		= REG(PCIR_STATUS, 2);
506 		cfg->baseclass		= REG(PCIR_CLASS, 1);
507 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
508 		cfg->progif		= REG(PCIR_PROGIF, 1);
509 		cfg->revid		= REG(PCIR_REVID, 1);
510 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
511 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
512 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
513 		cfg->intpin		= REG(PCIR_INTPIN, 1);
514 		cfg->intline		= REG(PCIR_INTLINE, 1);
515 
516 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
517 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
518 
519 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
520 		cfg->hdrtype		&= ~PCIM_MFDEV;
521 
522 		pci_fixancient(cfg);
523 		pci_hdrtypedata(pcib, b, s, f, cfg);
524 
525 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
526 			pci_read_extcap(pcib, cfg);
527 
528 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
529 
530 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
531 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
532 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
533 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
534 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
535 
536 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
537 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
538 		devlist_entry->conf.pc_vendor = cfg->vendor;
539 		devlist_entry->conf.pc_device = cfg->device;
540 
541 		devlist_entry->conf.pc_class = cfg->baseclass;
542 		devlist_entry->conf.pc_subclass = cfg->subclass;
543 		devlist_entry->conf.pc_progif = cfg->progif;
544 		devlist_entry->conf.pc_revid = cfg->revid;
545 
546 		pci_numdevs++;
547 		pci_generation++;
548 	}
549 	return (devlist_entry);
550 #undef REG
551 }
552 
553 static void
554 pci_read_extcap(device_t pcib, pcicfgregs *cfg)
555 {
556 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
557 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
558 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
559 	uint64_t addr;
560 #endif
561 	uint32_t val;
562 	int	ptr, nextptr, ptrptr;
563 
564 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
565 	case PCIM_HDRTYPE_NORMAL:
566 	case PCIM_HDRTYPE_BRIDGE:
567 		ptrptr = PCIR_CAP_PTR;
568 		break;
569 	case PCIM_HDRTYPE_CARDBUS:
570 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
571 		break;
572 	default:
573 		return;		/* no extended capabilities support */
574 	}
575 	nextptr = REG(ptrptr, 1);	/* sanity check? */
576 
577 	/*
578 	 * Read capability entries.
579 	 */
580 	while (nextptr != 0) {
581 		/* Sanity check */
582 		if (nextptr > 255) {
583 			printf("illegal PCI extended capability offset %d\n",
584 			    nextptr);
585 			return;
586 		}
587 		/* Find the next entry */
588 		ptr = nextptr;
589 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
590 
591 		/* Process this entry */
592 		switch (REG(ptr + PCICAP_ID, 1)) {
593 		case PCIY_PMG:		/* PCI power management */
594 			if (cfg->pp.pp_cap == 0) {
595 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
596 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
597 				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
598 				if ((nextptr - ptr) > PCIR_POWER_DATA)
599 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
600 			}
601 			break;
602 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
603 		case PCIY_HT:		/* HyperTransport */
604 			/* Determine HT-specific capability type. */
605 			val = REG(ptr + PCIR_HT_COMMAND, 2);
606 			switch (val & PCIM_HTCMD_CAP_MASK) {
607 			case PCIM_HTCAP_MSI_MAPPING:
608 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
609 					/* Sanity check the mapping window. */
610 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
611 					    4);
612 					addr <<= 32;
613 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
614 					    4);
615 					if (addr != MSI_INTEL_ADDR_BASE)
616 						device_printf(pcib,
617 	    "HT Bridge at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
618 						    cfg->domain, cfg->bus,
619 						    cfg->slot, cfg->func,
620 						    (long long)addr);
621 				} else
622 					addr = MSI_INTEL_ADDR_BASE;
623 
624 				cfg->ht.ht_msimap = ptr;
625 				cfg->ht.ht_msictrl = val;
626 				cfg->ht.ht_msiaddr = addr;
627 				break;
628 			}
629 			break;
630 #endif
631 		case PCIY_MSI:		/* PCI MSI */
632 			cfg->msi.msi_location = ptr;
633 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
634 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
635 						     PCIM_MSICTRL_MMC_MASK)>>1);
636 			break;
637 		case PCIY_MSIX:		/* PCI MSI-X */
638 			cfg->msix.msix_location = ptr;
639 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
640 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
641 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
642 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
643 			cfg->msix.msix_table_bar = PCIR_BAR(val &
644 			    PCIM_MSIX_BIR_MASK);
645 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
646 			val = REG(ptr + PCIR_MSIX_PBA, 4);
647 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
648 			    PCIM_MSIX_BIR_MASK);
649 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
650 			break;
651 		case PCIY_VPD:		/* PCI Vital Product Data */
652 			cfg->vpd.vpd_reg = ptr;
653 			break;
654 		case PCIY_SUBVENDOR:
655 			/* Should always be true. */
656 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
657 			    PCIM_HDRTYPE_BRIDGE) {
658 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
659 				cfg->subvendor = val & 0xffff;
660 				cfg->subdevice = val >> 16;
661 			}
662 			break;
663 		case PCIY_PCIX:		/* PCI-X */
664 			/*
665 			 * Assume we have a PCI-X chipset if we have
666 			 * at least one PCI-PCI bridge with a PCI-X
667 			 * capability.  Note that some systems with
668 			 * PCI-express or HT chipsets might match on
669 			 * this check as well.
670 			 */
671 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
672 			    PCIM_HDRTYPE_BRIDGE)
673 				pcix_chipset = 1;
674 			break;
675 		case PCIY_EXPRESS:	/* PCI-express */
676 			/*
677 			 * Assume we have a PCI-express chipset if we have
678 			 * at least one PCI-express device.
679 			 */
680 			pcie_chipset = 1;
681 			break;
682 		default:
683 			break;
684 		}
685 	}
686 /* REG and WREG use carry through to next functions */
687 }
688 
689 /*
690  * PCI Vital Product Data
691  */
692 
693 #define	PCI_VPD_TIMEOUT		1000000
694 
695 static int
696 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
697 {
698 	int count = PCI_VPD_TIMEOUT;
699 
700 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
701 
702 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
703 
704 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
705 		if (--count < 0)
706 			return (ENXIO);
707 		DELAY(1);	/* limit looping */
708 	}
709 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
710 
711 	return (0);
712 }
713 
714 #if 0
715 static int
716 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
717 {
718 	int count = PCI_VPD_TIMEOUT;
719 
720 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
721 
722 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
723 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
724 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
725 		if (--count < 0)
726 			return (ENXIO);
727 		DELAY(1);	/* limit looping */
728 	}
729 
730 	return (0);
731 }
732 #endif
733 
734 #undef PCI_VPD_TIMEOUT
735 
736 struct vpd_readstate {
737 	device_t	pcib;
738 	pcicfgregs	*cfg;
739 	uint32_t	val;
740 	int		bytesinval;
741 	int		off;
742 	uint8_t		cksum;
743 };
744 
745 static int
746 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
747 {
748 	uint32_t reg;
749 	uint8_t byte;
750 
751 	if (vrs->bytesinval == 0) {
752 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
753 			return (ENXIO);
754 		vrs->val = le32toh(reg);
755 		vrs->off += 4;
756 		byte = vrs->val & 0xff;
757 		vrs->bytesinval = 3;
758 	} else {
759 		vrs->val = vrs->val >> 8;
760 		byte = vrs->val & 0xff;
761 		vrs->bytesinval--;
762 	}
763 
764 	vrs->cksum += byte;
765 	*data = byte;
766 	return (0);
767 }
768 
769 static void
770 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
771 {
772 	struct vpd_readstate vrs;
773 	int state;
774 	int name;
775 	int remain;
776 	int i;
777 	int alloc, off;		/* alloc/off for RO/W arrays */
778 	int cksumvalid;
779 	int dflen;
780 	uint8_t byte;
781 	uint8_t byte2;
782 
783 	/* init vpd reader */
784 	vrs.bytesinval = 0;
785 	vrs.off = 0;
786 	vrs.pcib = pcib;
787 	vrs.cfg = cfg;
788 	vrs.cksum = 0;
789 
790 	state = 0;
791 	name = remain = i = 0;	/* shut up stupid gcc */
792 	alloc = off = 0;	/* shut up stupid gcc */
793 	dflen = 0;		/* shut up stupid gcc */
794 	cksumvalid = -1;
795 	while (state >= 0) {
796 		if (vpd_nextbyte(&vrs, &byte)) {
797 			state = -2;
798 			break;
799 		}
800 #if 0
801 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
802 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
803 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
804 #endif
805 		switch (state) {
806 		case 0:		/* item name */
807 			if (byte & 0x80) {
808 				if (vpd_nextbyte(&vrs, &byte2)) {
809 					state = -2;
810 					break;
811 				}
812 				remain = byte2;
813 				if (vpd_nextbyte(&vrs, &byte2)) {
814 					state = -2;
815 					break;
816 				}
817 				remain |= byte2 << 8;
818 				if (remain > (0x7f*4 - vrs.off)) {
819 					state = -1;
820 					printf(
821 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
822 					    cfg->domain, cfg->bus, cfg->slot,
823 					    cfg->func, remain);
824 				}
825 				name = byte & 0x7f;
826 			} else {
827 				remain = byte & 0x7;
828 				name = (byte >> 3) & 0xf;
829 			}
830 			switch (name) {
831 			case 0x2:	/* String */
832 				cfg->vpd.vpd_ident = malloc(remain + 1,
833 				    M_DEVBUF, M_WAITOK);
834 				i = 0;
835 				state = 1;
836 				break;
837 			case 0xf:	/* End */
838 				state = -1;
839 				break;
840 			case 0x10:	/* VPD-R */
841 				alloc = 8;
842 				off = 0;
843 				cfg->vpd.vpd_ros = malloc(alloc *
844 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
845 				    M_WAITOK | M_ZERO);
846 				state = 2;
847 				break;
848 			case 0x11:	/* VPD-W */
849 				alloc = 8;
850 				off = 0;
851 				cfg->vpd.vpd_w = malloc(alloc *
852 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
853 				    M_WAITOK | M_ZERO);
854 				state = 5;
855 				break;
856 			default:	/* Invalid data, abort */
857 				state = -1;
858 				break;
859 			}
860 			break;
861 
862 		case 1:	/* Identifier String */
863 			cfg->vpd.vpd_ident[i++] = byte;
864 			remain--;
865 			if (remain == 0)  {
866 				cfg->vpd.vpd_ident[i] = '\0';
867 				state = 0;
868 			}
869 			break;
870 
871 		case 2:	/* VPD-R Keyword Header */
872 			if (off == alloc) {
873 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
874 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
875 				    M_DEVBUF, M_WAITOK | M_ZERO);
876 			}
877 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
878 			if (vpd_nextbyte(&vrs, &byte2)) {
879 				state = -2;
880 				break;
881 			}
882 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
883 			if (vpd_nextbyte(&vrs, &byte2)) {
884 				state = -2;
885 				break;
886 			}
887 			dflen = byte2;
888 			if (dflen == 0 &&
889 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
890 			    2) == 0) {
891 				/*
892 				 * if this happens, we can't trust the rest
893 				 * of the VPD.
894 				 */
895 				printf(
896 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
897 				    cfg->domain, cfg->bus, cfg->slot,
898 				    cfg->func, dflen);
899 				cksumvalid = 0;
900 				state = -1;
901 				break;
902 			} else if (dflen == 0) {
903 				cfg->vpd.vpd_ros[off].value = malloc(1 *
904 				    sizeof(*cfg->vpd.vpd_ros[off].value),
905 				    M_DEVBUF, M_WAITOK);
906 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
907 			} else
908 				cfg->vpd.vpd_ros[off].value = malloc(
909 				    (dflen + 1) *
910 				    sizeof(*cfg->vpd.vpd_ros[off].value),
911 				    M_DEVBUF, M_WAITOK);
912 			remain -= 3;
913 			i = 0;
914 			/* keep in sync w/ state 3's transistions */
915 			if (dflen == 0 && remain == 0)
916 				state = 0;
917 			else if (dflen == 0)
918 				state = 2;
919 			else
920 				state = 3;
921 			break;
922 
923 		case 3:	/* VPD-R Keyword Value */
924 			cfg->vpd.vpd_ros[off].value[i++] = byte;
925 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
926 			    "RV", 2) == 0 && cksumvalid == -1) {
927 				if (vrs.cksum == 0)
928 					cksumvalid = 1;
929 				else {
930 					if (bootverbose)
931 						printf(
932 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
933 						    cfg->domain, cfg->bus,
934 						    cfg->slot, cfg->func,
935 						    vrs.cksum);
936 					cksumvalid = 0;
937 					state = -1;
938 					break;
939 				}
940 			}
941 			dflen--;
942 			remain--;
943 			/* keep in sync w/ state 2's transistions */
944 			if (dflen == 0)
945 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
946 			if (dflen == 0 && remain == 0) {
947 				cfg->vpd.vpd_rocnt = off;
948 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
949 				    off * sizeof(*cfg->vpd.vpd_ros),
950 				    M_DEVBUF, M_WAITOK | M_ZERO);
951 				state = 0;
952 			} else if (dflen == 0)
953 				state = 2;
954 			break;
955 
956 		case 4:
957 			remain--;
958 			if (remain == 0)
959 				state = 0;
960 			break;
961 
962 		case 5:	/* VPD-W Keyword Header */
963 			if (off == alloc) {
964 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
965 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
966 				    M_DEVBUF, M_WAITOK | M_ZERO);
967 			}
968 			cfg->vpd.vpd_w[off].keyword[0] = byte;
969 			if (vpd_nextbyte(&vrs, &byte2)) {
970 				state = -2;
971 				break;
972 			}
973 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
974 			if (vpd_nextbyte(&vrs, &byte2)) {
975 				state = -2;
976 				break;
977 			}
978 			cfg->vpd.vpd_w[off].len = dflen = byte2;
979 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
980 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
981 			    sizeof(*cfg->vpd.vpd_w[off].value),
982 			    M_DEVBUF, M_WAITOK);
983 			remain -= 3;
984 			i = 0;
985 			/* keep in sync w/ state 6's transistions */
986 			if (dflen == 0 && remain == 0)
987 				state = 0;
988 			else if (dflen == 0)
989 				state = 5;
990 			else
991 				state = 6;
992 			break;
993 
994 		case 6:	/* VPD-W Keyword Value */
995 			cfg->vpd.vpd_w[off].value[i++] = byte;
996 			dflen--;
997 			remain--;
998 			/* keep in sync w/ state 5's transistions */
999 			if (dflen == 0)
1000 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1001 			if (dflen == 0 && remain == 0) {
1002 				cfg->vpd.vpd_wcnt = off;
1003 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1004 				    off * sizeof(*cfg->vpd.vpd_w),
1005 				    M_DEVBUF, M_WAITOK | M_ZERO);
1006 				state = 0;
1007 			} else if (dflen == 0)
1008 				state = 5;
1009 			break;
1010 
1011 		default:
1012 			printf("pci%d:%d:%d:%d: invalid state: %d\n",
1013 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1014 			    state);
1015 			state = -1;
1016 			break;
1017 		}
1018 	}
1019 
1020 	if (cksumvalid == 0 || state < -1) {
1021 		/* read-only data bad, clean up */
1022 		if (cfg->vpd.vpd_ros != NULL) {
1023 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1024 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1025 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1026 			cfg->vpd.vpd_ros = NULL;
1027 		}
1028 	}
1029 	if (state < -1) {
1030 		/* I/O error, clean up */
1031 		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1032 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1033 		if (cfg->vpd.vpd_ident != NULL) {
1034 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1035 			cfg->vpd.vpd_ident = NULL;
1036 		}
1037 		if (cfg->vpd.vpd_w != NULL) {
1038 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1039 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1040 			free(cfg->vpd.vpd_w, M_DEVBUF);
1041 			cfg->vpd.vpd_w = NULL;
1042 		}
1043 	}
1044 	cfg->vpd.vpd_cached = 1;
1045 #undef REG
1046 #undef WREG
1047 }
1048 
1049 int
1050 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1051 {
1052 	struct pci_devinfo *dinfo = device_get_ivars(child);
1053 	pcicfgregs *cfg = &dinfo->cfg;
1054 
1055 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1056 		pci_read_vpd(device_get_parent(dev), cfg);
1057 
1058 	*identptr = cfg->vpd.vpd_ident;
1059 
1060 	if (*identptr == NULL)
1061 		return (ENXIO);
1062 
1063 	return (0);
1064 }
1065 
1066 int
1067 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1068 	const char **vptr)
1069 {
1070 	struct pci_devinfo *dinfo = device_get_ivars(child);
1071 	pcicfgregs *cfg = &dinfo->cfg;
1072 	int i;
1073 
1074 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1075 		pci_read_vpd(device_get_parent(dev), cfg);
1076 
1077 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1078 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1079 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1080 			*vptr = cfg->vpd.vpd_ros[i].value;
1081 		}
1082 
1083 	if (i != cfg->vpd.vpd_rocnt)
1084 		return (0);
1085 
1086 	*vptr = NULL;
1087 	return (ENXIO);
1088 }
1089 
1090 /*
1091  * Find the requested extended capability and return the offset in
1092  * configuration space via the pointer provided. The function returns
1093  * 0 on success and error code otherwise.
1094  */
1095 int
1096 pci_find_extcap_method(device_t dev, device_t child, int capability,
1097     int *capreg)
1098 {
1099 	struct pci_devinfo *dinfo = device_get_ivars(child);
1100 	pcicfgregs *cfg = &dinfo->cfg;
1101 	u_int32_t status;
1102 	u_int8_t ptr;
1103 
1104 	/*
1105 	 * Check the CAP_LIST bit of the PCI status register first.
1106 	 */
1107 	status = pci_read_config(child, PCIR_STATUS, 2);
1108 	if (!(status & PCIM_STATUS_CAPPRESENT))
1109 		return (ENXIO);
1110 
1111 	/*
1112 	 * Determine the start pointer of the capabilities list.
1113 	 */
1114 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1115 	case PCIM_HDRTYPE_NORMAL:
1116 	case PCIM_HDRTYPE_BRIDGE:
1117 		ptr = PCIR_CAP_PTR;
1118 		break;
1119 	case PCIM_HDRTYPE_CARDBUS:
1120 		ptr = PCIR_CAP_PTR_2;
1121 		break;
1122 	default:
1123 		/* XXX: panic? */
1124 		return (ENXIO);		/* no extended capabilities support */
1125 	}
1126 	ptr = pci_read_config(child, ptr, 1);
1127 
1128 	/*
1129 	 * Traverse the capabilities list.
1130 	 */
1131 	while (ptr != 0) {
1132 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1133 			if (capreg != NULL)
1134 				*capreg = ptr;
1135 			return (0);
1136 		}
1137 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1138 	}
1139 
1140 	return (ENOENT);
1141 }
1142 
1143 /*
1144  * Support for MSI-X message interrupts.
1145  */
1146 void
1147 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1148 {
1149 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1150 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1151 	uint32_t offset;
1152 
1153 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1154 	offset = msix->msix_table_offset + index * 16;
1155 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1156 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1157 	bus_write_4(msix->msix_table_res, offset + 8, data);
1158 
1159 	/* Enable MSI -> HT mapping. */
1160 	pci_ht_map_msi(dev, address);
1161 }
1162 
1163 void
1164 pci_mask_msix(device_t dev, u_int index)
1165 {
1166 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1167 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1168 	uint32_t offset, val;
1169 
1170 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1171 	offset = msix->msix_table_offset + index * 16 + 12;
1172 	val = bus_read_4(msix->msix_table_res, offset);
1173 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1174 		val |= PCIM_MSIX_VCTRL_MASK;
1175 		bus_write_4(msix->msix_table_res, offset, val);
1176 	}
1177 }
1178 
1179 void
1180 pci_unmask_msix(device_t dev, u_int index)
1181 {
1182 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1183 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1184 	uint32_t offset, val;
1185 
1186 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1187 	offset = msix->msix_table_offset + index * 16 + 12;
1188 	val = bus_read_4(msix->msix_table_res, offset);
1189 	if (val & PCIM_MSIX_VCTRL_MASK) {
1190 		val &= ~PCIM_MSIX_VCTRL_MASK;
1191 		bus_write_4(msix->msix_table_res, offset, val);
1192 	}
1193 }
1194 
1195 int
1196 pci_pending_msix(device_t dev, u_int index)
1197 {
1198 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1199 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1200 	uint32_t offset, bit;
1201 
1202 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1203 	offset = msix->msix_pba_offset + (index / 32) * 4;
1204 	bit = 1 << index % 32;
1205 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1206 }
1207 
1208 /*
1209  * Restore MSI-X registers and table during resume.  If MSI-X is
1210  * enabled then walk the virtual table to restore the actual MSI-X
1211  * table.
1212  */
1213 static void
1214 pci_resume_msix(device_t dev)
1215 {
1216 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1217 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1218 	struct msix_table_entry *mte;
1219 	struct msix_vector *mv;
1220 	int i;
1221 
1222 	if (msix->msix_alloc > 0) {
1223 		/* First, mask all vectors. */
1224 		for (i = 0; i < msix->msix_msgnum; i++)
1225 			pci_mask_msix(dev, i);
1226 
1227 		/* Second, program any messages with at least one handler. */
1228 		for (i = 0; i < msix->msix_table_len; i++) {
1229 			mte = &msix->msix_table[i];
1230 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1231 				continue;
1232 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1233 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1234 			pci_unmask_msix(dev, i);
1235 		}
1236 	}
1237 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1238 	    msix->msix_ctrl, 2);
1239 }
1240 
1241 /*
1242  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1243  * returned in *count.  After this function returns, each message will be
1244  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1245  */
1246 int
1247 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1248 {
1249 	struct pci_devinfo *dinfo = device_get_ivars(child);
1250 	pcicfgregs *cfg = &dinfo->cfg;
1251 	struct resource_list_entry *rle;
1252 	int actual, error, i, irq, max;
1253 
1254 	/* Don't let count == 0 get us into trouble. */
1255 	if (*count == 0)
1256 		return (EINVAL);
1257 
1258 	/* If rid 0 is allocated, then fail. */
1259 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1260 	if (rle != NULL && rle->res != NULL)
1261 		return (ENXIO);
1262 
1263 	/* Already have allocated messages? */
1264 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1265 		return (ENXIO);
1266 
1267 	/* If MSI is blacklisted for this system, fail. */
1268 	if (pci_msi_blacklisted())
1269 		return (ENXIO);
1270 
1271 	/* MSI-X capability present? */
1272 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1273 		return (ENODEV);
1274 
1275 	/* Make sure the appropriate BARs are mapped. */
1276 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1277 	    cfg->msix.msix_table_bar);
1278 	if (rle == NULL || rle->res == NULL ||
1279 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1280 		return (ENXIO);
1281 	cfg->msix.msix_table_res = rle->res;
1282 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1283 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1284 		    cfg->msix.msix_pba_bar);
1285 		if (rle == NULL || rle->res == NULL ||
1286 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1287 			return (ENXIO);
1288 	}
1289 	cfg->msix.msix_pba_res = rle->res;
1290 
1291 	if (bootverbose)
1292 		device_printf(child,
1293 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1294 		    *count, cfg->msix.msix_msgnum);
1295 	max = min(*count, cfg->msix.msix_msgnum);
1296 	for (i = 0; i < max; i++) {
1297 		/* Allocate a message. */
1298 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1299 		if (error)
1300 			break;
1301 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1302 		    irq, 1);
1303 	}
1304 	actual = i;
1305 
1306 	if (bootverbose) {
1307 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1308 		if (actual == 1)
1309 			device_printf(child, "using IRQ %lu for MSI-X\n",
1310 			    rle->start);
1311 		else {
1312 			int run;
1313 
1314 			/*
1315 			 * Be fancy and try to print contiguous runs of
1316 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1317 			 * 'run' is true if we are in a range.
1318 			 */
1319 			device_printf(child, "using IRQs %lu", rle->start);
1320 			irq = rle->start;
1321 			run = 0;
1322 			for (i = 1; i < actual; i++) {
1323 				rle = resource_list_find(&dinfo->resources,
1324 				    SYS_RES_IRQ, i + 1);
1325 
1326 				/* Still in a run? */
1327 				if (rle->start == irq + 1) {
1328 					run = 1;
1329 					irq++;
1330 					continue;
1331 				}
1332 
1333 				/* Finish previous range. */
1334 				if (run) {
1335 					printf("-%d", irq);
1336 					run = 0;
1337 				}
1338 
1339 				/* Start new range. */
1340 				printf(",%lu", rle->start);
1341 				irq = rle->start;
1342 			}
1343 
1344 			/* Unfinished range? */
1345 			if (run)
1346 				printf("-%d", irq);
1347 			printf(" for MSI-X\n");
1348 		}
1349 	}
1350 
1351 	/* Mask all vectors. */
1352 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1353 		pci_mask_msix(child, i);
1354 
1355 	/* Allocate and initialize vector data and virtual table. */
1356 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1357 	    M_DEVBUF, M_WAITOK | M_ZERO);
1358 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1359 	    M_DEVBUF, M_WAITOK | M_ZERO);
1360 	for (i = 0; i < actual; i++) {
1361 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1362 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1363 		cfg->msix.msix_table[i].mte_vector = i + 1;
1364 	}
1365 
1366 	/* Update control register to enable MSI-X. */
1367 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1368 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1369 	    cfg->msix.msix_ctrl, 2);
1370 
1371 	/* Update counts of alloc'd messages. */
1372 	cfg->msix.msix_alloc = actual;
1373 	cfg->msix.msix_table_len = actual;
1374 	*count = actual;
1375 	return (0);
1376 }
1377 
1378 /*
1379  * By default, pci_alloc_msix() will assign the allocated IRQ
1380  * resources consecutively to the first N messages in the MSI-X table.
1381  * However, device drivers may want to use different layouts if they
1382  * either receive fewer messages than they asked for, or they wish to
1383  * populate the MSI-X table sparsely.  This method allows the driver
1384  * to specify what layout it wants.  It must be called after a
1385  * successful pci_alloc_msix() but before any of the associated
1386  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1387  *
1388  * The 'vectors' array contains 'count' message vectors.  The array
1389  * maps directly to the MSI-X table in that index 0 in the array
1390  * specifies the vector for the first message in the MSI-X table, etc.
1391  * The vector value in each array index can either be 0 to indicate
1392  * that no vector should be assigned to a message slot, or it can be a
1393  * number from 1 to N (where N is the count returned from a
1394  * succcessful call to pci_alloc_msix()) to indicate which message
1395  * vector (IRQ) to be used for the corresponding message.
1396  *
1397  * On successful return, each message with a non-zero vector will have
1398  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1399  * 1.  Additionally, if any of the IRQs allocated via the previous
1400  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1401  * will be freed back to the system automatically.
1402  *
1403  * For example, suppose a driver has a MSI-X table with 6 messages and
1404  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1405  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1406  * C.  After the call to pci_alloc_msix(), the device will be setup to
1407  * have an MSI-X table of ABC--- (where - means no vector assigned).
1408  * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1409  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1410  * be freed back to the system.  This device will also have valid
1411  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1412  *
1413  * In any case, the SYS_RES_IRQ rid X will always map to the message
1414  * at MSI-X table index X - 1 and will only be valid if a vector is
1415  * assigned to that table entry.
1416  */
1417 int
1418 pci_remap_msix_method(device_t dev, device_t child, int count,
1419     const u_int *vectors)
1420 {
1421 	struct pci_devinfo *dinfo = device_get_ivars(child);
1422 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1423 	struct resource_list_entry *rle;
1424 	int i, irq, j, *used;
1425 
1426 	/*
1427 	 * Have to have at least one message in the table but the
1428 	 * table can't be bigger than the actual MSI-X table in the
1429 	 * device.
1430 	 */
1431 	if (count == 0 || count > msix->msix_msgnum)
1432 		return (EINVAL);
1433 
1434 	/* Sanity check the vectors. */
1435 	for (i = 0; i < count; i++)
1436 		if (vectors[i] > msix->msix_alloc)
1437 			return (EINVAL);
1438 
1439 	/*
1440 	 * Make sure there aren't any holes in the vectors to be used.
1441 	 * It's a big pain to support it, and it doesn't really make
1442 	 * sense anyway.  Also, at least one vector must be used.
1443 	 */
1444 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1445 	    M_ZERO);
1446 	for (i = 0; i < count; i++)
1447 		if (vectors[i] != 0)
1448 			used[vectors[i] - 1] = 1;
1449 	for (i = 0; i < msix->msix_alloc - 1; i++)
1450 		if (used[i] == 0 && used[i + 1] == 1) {
1451 			free(used, M_DEVBUF);
1452 			return (EINVAL);
1453 		}
1454 	if (used[0] != 1) {
1455 		free(used, M_DEVBUF);
1456 		return (EINVAL);
1457 	}
1458 
1459 	/* Make sure none of the resources are allocated. */
1460 	for (i = 0; i < msix->msix_table_len; i++) {
1461 		if (msix->msix_table[i].mte_vector == 0)
1462 			continue;
1463 		if (msix->msix_table[i].mte_handlers > 0)
1464 			return (EBUSY);
1465 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1466 		KASSERT(rle != NULL, ("missing resource"));
1467 		if (rle->res != NULL)
1468 			return (EBUSY);
1469 	}
1470 
1471 	/* Free the existing resource list entries. */
1472 	for (i = 0; i < msix->msix_table_len; i++) {
1473 		if (msix->msix_table[i].mte_vector == 0)
1474 			continue;
1475 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1476 	}
1477 
1478 	/*
1479 	 * Build the new virtual table keeping track of which vectors are
1480 	 * used.
1481 	 */
1482 	free(msix->msix_table, M_DEVBUF);
1483 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1484 	    M_DEVBUF, M_WAITOK | M_ZERO);
1485 	for (i = 0; i < count; i++)
1486 		msix->msix_table[i].mte_vector = vectors[i];
1487 	msix->msix_table_len = count;
1488 
1489 	/* Free any unused IRQs and resize the vectors array if necessary. */
1490 	j = msix->msix_alloc - 1;
1491 	if (used[j] == 0) {
1492 		struct msix_vector *vec;
1493 
1494 		while (used[j] == 0) {
1495 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1496 			    msix->msix_vectors[j].mv_irq);
1497 			j--;
1498 		}
1499 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1500 		    M_WAITOK);
1501 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1502 		    (j + 1));
1503 		free(msix->msix_vectors, M_DEVBUF);
1504 		msix->msix_vectors = vec;
1505 		msix->msix_alloc = j + 1;
1506 	}
1507 	free(used, M_DEVBUF);
1508 
1509 	/* Map the IRQs onto the rids. */
1510 	for (i = 0; i < count; i++) {
1511 		if (vectors[i] == 0)
1512 			continue;
1513 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1514 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1515 		    irq, 1);
1516 	}
1517 
1518 	if (bootverbose) {
1519 		device_printf(child, "Remapped MSI-X IRQs as: ");
1520 		for (i = 0; i < count; i++) {
1521 			if (i != 0)
1522 				printf(", ");
1523 			if (vectors[i] == 0)
1524 				printf("---");
1525 			else
1526 				printf("%d",
1527 				    msix->msix_vectors[vectors[i]].mv_irq);
1528 		}
1529 		printf("\n");
1530 	}
1531 
1532 	return (0);
1533 }
1534 
1535 static int
1536 pci_release_msix(device_t dev, device_t child)
1537 {
1538 	struct pci_devinfo *dinfo = device_get_ivars(child);
1539 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1540 	struct resource_list_entry *rle;
1541 	int i;
1542 
1543 	/* Do we have any messages to release? */
1544 	if (msix->msix_alloc == 0)
1545 		return (ENODEV);
1546 
1547 	/* Make sure none of the resources are allocated. */
1548 	for (i = 0; i < msix->msix_table_len; i++) {
1549 		if (msix->msix_table[i].mte_vector == 0)
1550 			continue;
1551 		if (msix->msix_table[i].mte_handlers > 0)
1552 			return (EBUSY);
1553 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1554 		KASSERT(rle != NULL, ("missing resource"));
1555 		if (rle->res != NULL)
1556 			return (EBUSY);
1557 	}
1558 
1559 	/* Update control register to disable MSI-X. */
1560 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1561 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1562 	    msix->msix_ctrl, 2);
1563 
1564 	/* Free the resource list entries. */
1565 	for (i = 0; i < msix->msix_table_len; i++) {
1566 		if (msix->msix_table[i].mte_vector == 0)
1567 			continue;
1568 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1569 	}
1570 	free(msix->msix_table, M_DEVBUF);
1571 	msix->msix_table_len = 0;
1572 
1573 	/* Release the IRQs. */
1574 	for (i = 0; i < msix->msix_alloc; i++)
1575 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1576 		    msix->msix_vectors[i].mv_irq);
1577 	free(msix->msix_vectors, M_DEVBUF);
1578 	msix->msix_alloc = 0;
1579 	return (0);
1580 }
1581 
1582 /*
1583  * Return the max supported MSI-X messages this device supports.
1584  * Basically, assuming the MD code can alloc messages, this function
1585  * should return the maximum value that pci_alloc_msix() can return.
1586  * Thus, it is subject to the tunables, etc.
1587  */
1588 int
1589 pci_msix_count_method(device_t dev, device_t child)
1590 {
1591 	struct pci_devinfo *dinfo = device_get_ivars(child);
1592 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1593 
1594 	if (pci_do_msix && msix->msix_location != 0)
1595 		return (msix->msix_msgnum);
1596 	return (0);
1597 }
1598 
1599 /*
1600  * HyperTransport MSI mapping control
1601  */
1602 void
1603 pci_ht_map_msi(device_t dev, uint64_t addr)
1604 {
1605 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1606 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1607 
1608 	if (!ht->ht_msimap)
1609 		return;
1610 
1611 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1612 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1613 		/* Enable MSI -> HT mapping. */
1614 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1615 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1616 		    ht->ht_msictrl, 2);
1617 	}
1618 
1619 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1620 		/* Disable MSI -> HT mapping. */
1621 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1622 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1623 		    ht->ht_msictrl, 2);
1624 	}
1625 }
1626 
1627 int
1628 pci_get_max_read_req(device_t dev)
1629 {
1630 	int cap;
1631 	uint16_t val;
1632 
1633 	if (pci_find_extcap(dev, PCIY_EXPRESS, &cap) != 0)
1634 		return (0);
1635 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1636 	val &= PCIM_EXP_CTL_MAX_READ_REQUEST;
1637 	val >>= 12;
1638 	return (1 << (val + 7));
1639 }
1640 
1641 int
1642 pci_set_max_read_req(device_t dev, int size)
1643 {
1644 	int cap;
1645 	uint16_t val;
1646 
1647 	if (pci_find_extcap(dev, PCIY_EXPRESS, &cap) != 0)
1648 		return (0);
1649 	if (size < 128)
1650 		size = 128;
1651 	if (size > 4096)
1652 		size = 4096;
1653 	size = (1 << (fls(size) - 1));
1654 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1655 	val &= ~PCIM_EXP_CTL_MAX_READ_REQUEST;
1656 	val |= (fls(size) - 8) << 12;
1657 	pci_write_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, val, 2);
1658 	return (size);
1659 }
1660 
1661 /*
1662  * Support for MSI message signalled interrupts.
1663  */
1664 void
1665 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1666 {
1667 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1668 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1669 
1670 	/* Write data and address values. */
1671 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1672 	    address & 0xffffffff, 4);
1673 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1674 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1675 		    address >> 32, 4);
1676 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1677 		    data, 2);
1678 	} else
1679 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1680 		    2);
1681 
1682 	/* Enable MSI in the control register. */
1683 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1684 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1685 	    2);
1686 
1687 	/* Enable MSI -> HT mapping. */
1688 	pci_ht_map_msi(dev, address);
1689 }
1690 
1691 void
1692 pci_disable_msi(device_t dev)
1693 {
1694 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1695 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1696 
1697 	/* Disable MSI -> HT mapping. */
1698 	pci_ht_map_msi(dev, 0);
1699 
1700 	/* Disable MSI in the control register. */
1701 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1702 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1703 	    2);
1704 }
1705 
1706 /*
1707  * Restore MSI registers during resume.  If MSI is enabled then
1708  * restore the data and address registers in addition to the control
1709  * register.
1710  */
1711 static void
1712 pci_resume_msi(device_t dev)
1713 {
1714 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1715 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1716 	uint64_t address;
1717 	uint16_t data;
1718 
1719 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1720 		address = msi->msi_addr;
1721 		data = msi->msi_data;
1722 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1723 		    address & 0xffffffff, 4);
1724 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1725 			pci_write_config(dev, msi->msi_location +
1726 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1727 			pci_write_config(dev, msi->msi_location +
1728 			    PCIR_MSI_DATA_64BIT, data, 2);
1729 		} else
1730 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1731 			    data, 2);
1732 	}
1733 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1734 	    2);
1735 }
1736 
1737 static int
1738 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1739 {
1740 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1741 	pcicfgregs *cfg = &dinfo->cfg;
1742 	struct resource_list_entry *rle;
1743 	struct msix_table_entry *mte;
1744 	struct msix_vector *mv;
1745 	uint64_t addr;
1746 	uint32_t data;
1747 	int error, i, j;
1748 
1749 	/*
1750 	 * Handle MSI first.  We try to find this IRQ among our list
1751 	 * of MSI IRQs.  If we find it, we request updated address and
1752 	 * data registers and apply the results.
1753 	 */
1754 	if (cfg->msi.msi_alloc > 0) {
1755 
1756 		/* If we don't have any active handlers, nothing to do. */
1757 		if (cfg->msi.msi_handlers == 0)
1758 			return (0);
1759 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1760 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1761 			    i + 1);
1762 			if (rle->start == irq) {
1763 				error = PCIB_MAP_MSI(device_get_parent(bus),
1764 				    dev, irq, &addr, &data);
1765 				if (error)
1766 					return (error);
1767 				pci_disable_msi(dev);
1768 				dinfo->cfg.msi.msi_addr = addr;
1769 				dinfo->cfg.msi.msi_data = data;
1770 				pci_enable_msi(dev, addr, data);
1771 				return (0);
1772 			}
1773 		}
1774 		return (ENOENT);
1775 	}
1776 
1777 	/*
1778 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1779 	 * we request the updated mapping info.  If that works, we go
1780 	 * through all the slots that use this IRQ and update them.
1781 	 */
1782 	if (cfg->msix.msix_alloc > 0) {
1783 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1784 			mv = &cfg->msix.msix_vectors[i];
1785 			if (mv->mv_irq == irq) {
1786 				error = PCIB_MAP_MSI(device_get_parent(bus),
1787 				    dev, irq, &addr, &data);
1788 				if (error)
1789 					return (error);
1790 				mv->mv_address = addr;
1791 				mv->mv_data = data;
1792 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1793 					mte = &cfg->msix.msix_table[j];
1794 					if (mte->mte_vector != i + 1)
1795 						continue;
1796 					if (mte->mte_handlers == 0)
1797 						continue;
1798 					pci_mask_msix(dev, j);
1799 					pci_enable_msix(dev, j, addr, data);
1800 					pci_unmask_msix(dev, j);
1801 				}
1802 			}
1803 		}
1804 		return (ENOENT);
1805 	}
1806 
1807 	return (ENOENT);
1808 }
1809 
1810 /*
1811  * Returns true if the specified device is blacklisted because MSI
1812  * doesn't work.
1813  */
1814 int
1815 pci_msi_device_blacklisted(device_t dev)
1816 {
1817 	struct pci_quirk *q;
1818 
1819 	if (!pci_honor_msi_blacklist)
1820 		return (0);
1821 
1822 	for (q = &pci_quirks[0]; q->devid; q++) {
1823 		if (q->devid == pci_get_devid(dev) &&
1824 		    q->type == PCI_QUIRK_DISABLE_MSI)
1825 			return (1);
1826 	}
1827 	return (0);
1828 }
1829 
1830 /*
1831  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1832  * we just check for blacklisted chipsets as represented by the
1833  * host-PCI bridge at device 0:0:0.  In the future, it may become
1834  * necessary to check other system attributes, such as the kenv values
1835  * that give the motherboard manufacturer and model number.
1836  */
1837 static int
1838 pci_msi_blacklisted(void)
1839 {
1840 	device_t dev;
1841 
1842 	if (!pci_honor_msi_blacklist)
1843 		return (0);
1844 
1845 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1846 	if (!(pcie_chipset || pcix_chipset))
1847 		return (1);
1848 
1849 	dev = pci_find_bsf(0, 0, 0);
1850 	if (dev != NULL)
1851 		return (pci_msi_device_blacklisted(dev));
1852 	return (0);
1853 }
1854 
1855 /*
1856  * Attempt to allocate *count MSI messages.  The actual number allocated is
1857  * returned in *count.  After this function returns, each message will be
1858  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1859  */
1860 int
1861 pci_alloc_msi_method(device_t dev, device_t child, int *count)
1862 {
1863 	struct pci_devinfo *dinfo = device_get_ivars(child);
1864 	pcicfgregs *cfg = &dinfo->cfg;
1865 	struct resource_list_entry *rle;
1866 	int actual, error, i, irqs[32];
1867 	uint16_t ctrl;
1868 
1869 	/* Don't let count == 0 get us into trouble. */
1870 	if (*count == 0)
1871 		return (EINVAL);
1872 
1873 	/* If rid 0 is allocated, then fail. */
1874 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1875 	if (rle != NULL && rle->res != NULL)
1876 		return (ENXIO);
1877 
1878 	/* Already have allocated messages? */
1879 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1880 		return (ENXIO);
1881 
1882 	/* If MSI is blacklisted for this system, fail. */
1883 	if (pci_msi_blacklisted())
1884 		return (ENXIO);
1885 
1886 	/* MSI capability present? */
1887 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1888 		return (ENODEV);
1889 
1890 	if (bootverbose)
1891 		device_printf(child,
1892 		    "attempting to allocate %d MSI vectors (%d supported)\n",
1893 		    *count, cfg->msi.msi_msgnum);
1894 
1895 	/* Don't ask for more than the device supports. */
1896 	actual = min(*count, cfg->msi.msi_msgnum);
1897 
1898 	/* Don't ask for more than 32 messages. */
1899 	actual = min(actual, 32);
1900 
1901 	/* MSI requires power of 2 number of messages. */
1902 	if (!powerof2(actual))
1903 		return (EINVAL);
1904 
1905 	for (;;) {
1906 		/* Try to allocate N messages. */
1907 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1908 		    cfg->msi.msi_msgnum, irqs);
1909 		if (error == 0)
1910 			break;
1911 		if (actual == 1)
1912 			return (error);
1913 
1914 		/* Try N / 2. */
1915 		actual >>= 1;
1916 	}
1917 
1918 	/*
1919 	 * We now have N actual messages mapped onto SYS_RES_IRQ
1920 	 * resources in the irqs[] array, so add new resources
1921 	 * starting at rid 1.
1922 	 */
1923 	for (i = 0; i < actual; i++)
1924 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1925 		    irqs[i], irqs[i], 1);
1926 
1927 	if (bootverbose) {
1928 		if (actual == 1)
1929 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1930 		else {
1931 			int run;
1932 
1933 			/*
1934 			 * Be fancy and try to print contiguous runs
1935 			 * of IRQ values as ranges.  'run' is true if
1936 			 * we are in a range.
1937 			 */
1938 			device_printf(child, "using IRQs %d", irqs[0]);
1939 			run = 0;
1940 			for (i = 1; i < actual; i++) {
1941 
1942 				/* Still in a run? */
1943 				if (irqs[i] == irqs[i - 1] + 1) {
1944 					run = 1;
1945 					continue;
1946 				}
1947 
1948 				/* Finish previous range. */
1949 				if (run) {
1950 					printf("-%d", irqs[i - 1]);
1951 					run = 0;
1952 				}
1953 
1954 				/* Start new range. */
1955 				printf(",%d", irqs[i]);
1956 			}
1957 
1958 			/* Unfinished range? */
1959 			if (run)
1960 				printf("-%d", irqs[actual - 1]);
1961 			printf(" for MSI\n");
1962 		}
1963 	}
1964 
1965 	/* Update control register with actual count. */
1966 	ctrl = cfg->msi.msi_ctrl;
1967 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1968 	ctrl |= (ffs(actual) - 1) << 4;
1969 	cfg->msi.msi_ctrl = ctrl;
1970 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1971 
1972 	/* Update counts of alloc'd messages. */
1973 	cfg->msi.msi_alloc = actual;
1974 	cfg->msi.msi_handlers = 0;
1975 	*count = actual;
1976 	return (0);
1977 }
1978 
1979 /* Release the MSI messages associated with this device. */
1980 int
1981 pci_release_msi_method(device_t dev, device_t child)
1982 {
1983 	struct pci_devinfo *dinfo = device_get_ivars(child);
1984 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1985 	struct resource_list_entry *rle;
1986 	int error, i, irqs[32];
1987 
1988 	/* Try MSI-X first. */
1989 	error = pci_release_msix(dev, child);
1990 	if (error != ENODEV)
1991 		return (error);
1992 
1993 	/* Do we have any messages to release? */
1994 	if (msi->msi_alloc == 0)
1995 		return (ENODEV);
1996 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
1997 
1998 	/* Make sure none of the resources are allocated. */
1999 	if (msi->msi_handlers > 0)
2000 		return (EBUSY);
2001 	for (i = 0; i < msi->msi_alloc; i++) {
2002 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2003 		KASSERT(rle != NULL, ("missing MSI resource"));
2004 		if (rle->res != NULL)
2005 			return (EBUSY);
2006 		irqs[i] = rle->start;
2007 	}
2008 
2009 	/* Update control register with 0 count. */
2010 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2011 	    ("%s: MSI still enabled", __func__));
2012 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2013 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2014 	    msi->msi_ctrl, 2);
2015 
2016 	/* Release the messages. */
2017 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2018 	for (i = 0; i < msi->msi_alloc; i++)
2019 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2020 
2021 	/* Update alloc count. */
2022 	msi->msi_alloc = 0;
2023 	msi->msi_addr = 0;
2024 	msi->msi_data = 0;
2025 	return (0);
2026 }
2027 
2028 /*
2029  * Return the max supported MSI messages this device supports.
2030  * Basically, assuming the MD code can alloc messages, this function
2031  * should return the maximum value that pci_alloc_msi() can return.
2032  * Thus, it is subject to the tunables, etc.
2033  */
2034 int
2035 pci_msi_count_method(device_t dev, device_t child)
2036 {
2037 	struct pci_devinfo *dinfo = device_get_ivars(child);
2038 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2039 
2040 	if (pci_do_msi && msi->msi_location != 0)
2041 		return (msi->msi_msgnum);
2042 	return (0);
2043 }
2044 
2045 /* free pcicfgregs structure and all depending data structures */
2046 
2047 int
2048 pci_freecfg(struct pci_devinfo *dinfo)
2049 {
2050 	struct devlist *devlist_head;
2051 	int i;
2052 
2053 	devlist_head = &pci_devq;
2054 
2055 	if (dinfo->cfg.vpd.vpd_reg) {
2056 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2057 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2058 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2059 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2060 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2061 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2062 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2063 	}
2064 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2065 	free(dinfo, M_DEVBUF);
2066 
2067 	/* increment the generation count */
2068 	pci_generation++;
2069 
2070 	/* we're losing one device */
2071 	pci_numdevs--;
2072 	return (0);
2073 }
2074 
2075 /*
2076  * PCI power manangement
2077  */
2078 int
2079 pci_set_powerstate_method(device_t dev, device_t child, int state)
2080 {
2081 	struct pci_devinfo *dinfo = device_get_ivars(child);
2082 	pcicfgregs *cfg = &dinfo->cfg;
2083 	uint16_t status;
2084 	int result, oldstate, highest, delay;
2085 
2086 	if (cfg->pp.pp_cap == 0)
2087 		return (EOPNOTSUPP);
2088 
2089 	/*
2090 	 * Optimize a no state change request away.  While it would be OK to
2091 	 * write to the hardware in theory, some devices have shown odd
2092 	 * behavior when going from D3 -> D3.
2093 	 */
2094 	oldstate = pci_get_powerstate(child);
2095 	if (oldstate == state)
2096 		return (0);
2097 
2098 	/*
2099 	 * The PCI power management specification states that after a state
2100 	 * transition between PCI power states, system software must
2101 	 * guarantee a minimal delay before the function accesses the device.
2102 	 * Compute the worst case delay that we need to guarantee before we
2103 	 * access the device.  Many devices will be responsive much more
2104 	 * quickly than this delay, but there are some that don't respond
2105 	 * instantly to state changes.  Transitions to/from D3 state require
2106 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2107 	 * is done below with DELAY rather than a sleeper function because
2108 	 * this function can be called from contexts where we cannot sleep.
2109 	 */
2110 	highest = (oldstate > state) ? oldstate : state;
2111 	if (highest == PCI_POWERSTATE_D3)
2112 	    delay = 10000;
2113 	else if (highest == PCI_POWERSTATE_D2)
2114 	    delay = 200;
2115 	else
2116 	    delay = 0;
2117 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2118 	    & ~PCIM_PSTAT_DMASK;
2119 	result = 0;
2120 	switch (state) {
2121 	case PCI_POWERSTATE_D0:
2122 		status |= PCIM_PSTAT_D0;
2123 		break;
2124 	case PCI_POWERSTATE_D1:
2125 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2126 			return (EOPNOTSUPP);
2127 		status |= PCIM_PSTAT_D1;
2128 		break;
2129 	case PCI_POWERSTATE_D2:
2130 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2131 			return (EOPNOTSUPP);
2132 		status |= PCIM_PSTAT_D2;
2133 		break;
2134 	case PCI_POWERSTATE_D3:
2135 		status |= PCIM_PSTAT_D3;
2136 		break;
2137 	default:
2138 		return (EINVAL);
2139 	}
2140 
2141 	if (bootverbose)
2142 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2143 		    state);
2144 
2145 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2146 	if (delay)
2147 		DELAY(delay);
2148 	return (0);
2149 }
2150 
2151 int
2152 pci_get_powerstate_method(device_t dev, device_t child)
2153 {
2154 	struct pci_devinfo *dinfo = device_get_ivars(child);
2155 	pcicfgregs *cfg = &dinfo->cfg;
2156 	uint16_t status;
2157 	int result;
2158 
2159 	if (cfg->pp.pp_cap != 0) {
2160 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2161 		switch (status & PCIM_PSTAT_DMASK) {
2162 		case PCIM_PSTAT_D0:
2163 			result = PCI_POWERSTATE_D0;
2164 			break;
2165 		case PCIM_PSTAT_D1:
2166 			result = PCI_POWERSTATE_D1;
2167 			break;
2168 		case PCIM_PSTAT_D2:
2169 			result = PCI_POWERSTATE_D2;
2170 			break;
2171 		case PCIM_PSTAT_D3:
2172 			result = PCI_POWERSTATE_D3;
2173 			break;
2174 		default:
2175 			result = PCI_POWERSTATE_UNKNOWN;
2176 			break;
2177 		}
2178 	} else {
2179 		/* No support, device is always at D0 */
2180 		result = PCI_POWERSTATE_D0;
2181 	}
2182 	return (result);
2183 }
2184 
2185 /*
2186  * Some convenience functions for PCI device drivers.
2187  */
2188 
2189 static __inline void
2190 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2191 {
2192 	uint16_t	command;
2193 
2194 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2195 	command |= bit;
2196 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2197 }
2198 
2199 static __inline void
2200 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2201 {
2202 	uint16_t	command;
2203 
2204 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2205 	command &= ~bit;
2206 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2207 }
2208 
2209 int
2210 pci_enable_busmaster_method(device_t dev, device_t child)
2211 {
2212 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2213 	return (0);
2214 }
2215 
2216 int
2217 pci_disable_busmaster_method(device_t dev, device_t child)
2218 {
2219 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2220 	return (0);
2221 }
2222 
2223 int
2224 pci_enable_io_method(device_t dev, device_t child, int space)
2225 {
2226 	uint16_t bit;
2227 
2228 	switch(space) {
2229 	case SYS_RES_IOPORT:
2230 		bit = PCIM_CMD_PORTEN;
2231 		break;
2232 	case SYS_RES_MEMORY:
2233 		bit = PCIM_CMD_MEMEN;
2234 		break;
2235 	default:
2236 		return (EINVAL);
2237 	}
2238 	pci_set_command_bit(dev, child, bit);
2239 	return (0);
2240 }
2241 
2242 int
2243 pci_disable_io_method(device_t dev, device_t child, int space)
2244 {
2245 	uint16_t bit;
2246 
2247 	switch(space) {
2248 	case SYS_RES_IOPORT:
2249 		bit = PCIM_CMD_PORTEN;
2250 		break;
2251 	case SYS_RES_MEMORY:
2252 		bit = PCIM_CMD_MEMEN;
2253 		break;
2254 	default:
2255 		return (EINVAL);
2256 	}
2257 	pci_clear_command_bit(dev, child, bit);
2258 	return (0);
2259 }
2260 
2261 /*
2262  * New style pci driver.  Parent device is either a pci-host-bridge or a
2263  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2264  */
2265 
2266 void
2267 pci_print_verbose(struct pci_devinfo *dinfo)
2268 {
2269 
2270 	if (bootverbose) {
2271 		pcicfgregs *cfg = &dinfo->cfg;
2272 
2273 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2274 		    cfg->vendor, cfg->device, cfg->revid);
2275 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2276 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2277 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2278 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2279 		    cfg->mfdev);
2280 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2281 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2282 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2283 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2284 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2285 		if (cfg->intpin > 0)
2286 			printf("\tintpin=%c, irq=%d\n",
2287 			    cfg->intpin +'a' -1, cfg->intline);
2288 		if (cfg->pp.pp_cap) {
2289 			uint16_t status;
2290 
2291 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2292 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2293 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2294 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2295 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2296 			    status & PCIM_PSTAT_DMASK);
2297 		}
2298 		if (cfg->msi.msi_location) {
2299 			int ctrl;
2300 
2301 			ctrl = cfg->msi.msi_ctrl;
2302 			printf("\tMSI supports %d message%s%s%s\n",
2303 			    cfg->msi.msi_msgnum,
2304 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2305 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2306 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2307 		}
2308 		if (cfg->msix.msix_location) {
2309 			printf("\tMSI-X supports %d message%s ",
2310 			    cfg->msix.msix_msgnum,
2311 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2312 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2313 				printf("in map 0x%x\n",
2314 				    cfg->msix.msix_table_bar);
2315 			else
2316 				printf("in maps 0x%x and 0x%x\n",
2317 				    cfg->msix.msix_table_bar,
2318 				    cfg->msix.msix_pba_bar);
2319 		}
2320 	}
2321 }
2322 
2323 static int
2324 pci_porten(device_t dev)
2325 {
2326 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2327 }
2328 
2329 static int
2330 pci_memen(device_t dev)
2331 {
2332 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2333 }
2334 
2335 static void
2336 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2337 {
2338 	pci_addr_t map, testval;
2339 	int ln2range;
2340 	uint16_t cmd;
2341 
2342 	/*
2343 	 * The device ROM BAR is special.  It is always a 32-bit
2344 	 * memory BAR.  Bit 0 is special and should not be set when
2345 	 * sizing the BAR.
2346 	 */
2347 	if (reg == PCIR_BIOS) {
2348 		map = pci_read_config(dev, reg, 4);
2349 		pci_write_config(dev, reg, 0xfffffffe, 4);
2350 		testval = pci_read_config(dev, reg, 4);
2351 		pci_write_config(dev, reg, map, 4);
2352 		*mapp = map;
2353 		*testvalp = testval;
2354 		return;
2355 	}
2356 
2357 	map = pci_read_config(dev, reg, 4);
2358 	ln2range = pci_maprange(map);
2359 	if (ln2range == 64)
2360 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2361 
2362 	/*
2363 	 * Disable decoding via the command register before
2364 	 * determining the BAR's length since we will be placing it in
2365 	 * a weird state.
2366 	 */
2367 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2368 	pci_write_config(dev, PCIR_COMMAND,
2369 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2370 
2371 	/*
2372 	 * Determine the BAR's length by writing all 1's.  The bottom
2373 	 * log_2(size) bits of the BAR will stick as 0 when we read
2374 	 * the value back.
2375 	 */
2376 	pci_write_config(dev, reg, 0xffffffff, 4);
2377 	testval = pci_read_config(dev, reg, 4);
2378 	if (ln2range == 64) {
2379 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2380 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2381 	}
2382 
2383 	/*
2384 	 * Restore the original value of the BAR.  We may have reprogrammed
2385 	 * the BAR of the low-level console device and when booting verbose,
2386 	 * we need the console device addressable.
2387 	 */
2388 	pci_write_config(dev, reg, map, 4);
2389 	if (ln2range == 64)
2390 		pci_write_config(dev, reg + 4, map >> 32, 4);
2391 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2392 
2393 	*mapp = map;
2394 	*testvalp = testval;
2395 }
2396 
2397 static void
2398 pci_write_bar(device_t dev, int reg, pci_addr_t base)
2399 {
2400 	pci_addr_t map;
2401 	int ln2range;
2402 
2403 	map = pci_read_config(dev, reg, 4);
2404 
2405 	/* The device ROM BAR is always 32-bits. */
2406 	if (reg == PCIR_BIOS)
2407 		return;
2408 	ln2range = pci_maprange(map);
2409 	pci_write_config(dev, reg, base, 4);
2410 	if (ln2range == 64)
2411 		pci_write_config(dev, reg + 4, base >> 32, 4);
2412 }
2413 
2414 /*
2415  * Add a resource based on a pci map register. Return 1 if the map
2416  * register is a 32bit map register or 2 if it is a 64bit register.
2417  */
2418 static int
2419 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2420     int force, int prefetch)
2421 {
2422 	pci_addr_t base, map, testval;
2423 	pci_addr_t start, end, count;
2424 	int barlen, basezero, maprange, mapsize, type;
2425 	uint16_t cmd;
2426 	struct resource *res;
2427 
2428 	pci_read_bar(dev, reg, &map, &testval);
2429 	if (PCI_BAR_MEM(map)) {
2430 		type = SYS_RES_MEMORY;
2431 		if (map & PCIM_BAR_MEM_PREFETCH)
2432 			prefetch = 1;
2433 	} else
2434 		type = SYS_RES_IOPORT;
2435 	mapsize = pci_mapsize(testval);
2436 	base = pci_mapbase(map);
2437 #ifdef __PCI_BAR_ZERO_VALID
2438 	basezero = 0;
2439 #else
2440 	basezero = base == 0;
2441 #endif
2442 	maprange = pci_maprange(map);
2443 	barlen = maprange == 64 ? 2 : 1;
2444 
2445 	/*
2446 	 * For I/O registers, if bottom bit is set, and the next bit up
2447 	 * isn't clear, we know we have a BAR that doesn't conform to the
2448 	 * spec, so ignore it.  Also, sanity check the size of the data
2449 	 * areas to the type of memory involved.  Memory must be at least
2450 	 * 16 bytes in size, while I/O ranges must be at least 4.
2451 	 */
2452 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2453 		return (barlen);
2454 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2455 	    (type == SYS_RES_IOPORT && mapsize < 2))
2456 		return (barlen);
2457 
2458 	if (bootverbose) {
2459 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2460 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2461 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2462 			printf(", port disabled\n");
2463 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2464 			printf(", memory disabled\n");
2465 		else
2466 			printf(", enabled\n");
2467 	}
2468 
2469 	/*
2470 	 * If base is 0, then we have problems if this architecture does
2471 	 * not allow that.  It is best to ignore such entries for the
2472 	 * moment.  These will be allocated later if the driver specifically
2473 	 * requests them.  However, some removable busses look better when
2474 	 * all resources are allocated, so allow '0' to be overriden.
2475 	 *
2476 	 * Similarly treat maps whose values is the same as the test value
2477 	 * read back.  These maps have had all f's written to them by the
2478 	 * BIOS in an attempt to disable the resources.
2479 	 */
2480 	if (!force && (basezero || map == testval))
2481 		return (barlen);
2482 	if ((u_long)base != base) {
2483 		device_printf(bus,
2484 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2485 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2486 		    pci_get_function(dev), reg);
2487 		return (barlen);
2488 	}
2489 
2490 	/*
2491 	 * This code theoretically does the right thing, but has
2492 	 * undesirable side effects in some cases where peripherals
2493 	 * respond oddly to having these bits enabled.  Let the user
2494 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2495 	 * default).
2496 	 */
2497 	if (pci_enable_io_modes) {
2498 		/* Turn on resources that have been left off by a lazy BIOS */
2499 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2500 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2501 			cmd |= PCIM_CMD_PORTEN;
2502 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2503 		}
2504 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2505 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2506 			cmd |= PCIM_CMD_MEMEN;
2507 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2508 		}
2509 	} else {
2510 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2511 			return (barlen);
2512 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2513 			return (barlen);
2514 	}
2515 
2516 	count = 1 << mapsize;
2517 	if (basezero || base == pci_mapbase(testval)) {
2518 		start = 0;	/* Let the parent decide. */
2519 		end = ~0ULL;
2520 	} else {
2521 		start = base;
2522 		end = base + (1 << mapsize) - 1;
2523 	}
2524 	resource_list_add(rl, type, reg, start, end, count);
2525 
2526 	/*
2527 	 * Try to allocate the resource for this BAR from our parent
2528 	 * so that this resource range is already reserved.  The
2529 	 * driver for this device will later inherit this resource in
2530 	 * pci_alloc_resource().
2531 	 */
2532 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2533 	    prefetch ? RF_PREFETCHABLE : 0);
2534 	if (res == NULL) {
2535 		/*
2536 		 * If the allocation fails, clear the BAR and delete
2537 		 * the resource list entry to force
2538 		 * pci_alloc_resource() to allocate resources from the
2539 		 * parent.
2540 		 */
2541 		resource_list_delete(rl, type, reg);
2542 		start = 0;
2543 	} else
2544 		start = rman_get_start(res);
2545 	pci_write_bar(dev, reg, start);
2546 	return (barlen);
2547 }
2548 
2549 /*
2550  * For ATA devices we need to decide early what addressing mode to use.
2551  * Legacy demands that the primary and secondary ATA ports sits on the
2552  * same addresses that old ISA hardware did. This dictates that we use
2553  * those addresses and ignore the BAR's if we cannot set PCI native
2554  * addressing mode.
2555  */
2556 static void
2557 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2558     uint32_t prefetchmask)
2559 {
2560 	struct resource *r;
2561 	int rid, type, progif;
2562 #if 0
2563 	/* if this device supports PCI native addressing use it */
2564 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2565 	if ((progif & 0x8a) == 0x8a) {
2566 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2567 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2568 			printf("Trying ATA native PCI addressing mode\n");
2569 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2570 		}
2571 	}
2572 #endif
2573 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2574 	type = SYS_RES_IOPORT;
2575 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2576 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2577 		    prefetchmask & (1 << 0));
2578 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2579 		    prefetchmask & (1 << 1));
2580 	} else {
2581 		rid = PCIR_BAR(0);
2582 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2583 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2584 		    0x1f7, 8, 0);
2585 		rid = PCIR_BAR(1);
2586 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2587 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2588 		    0x3f6, 1, 0);
2589 	}
2590 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2591 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2592 		    prefetchmask & (1 << 2));
2593 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2594 		    prefetchmask & (1 << 3));
2595 	} else {
2596 		rid = PCIR_BAR(2);
2597 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2598 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2599 		    0x177, 8, 0);
2600 		rid = PCIR_BAR(3);
2601 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2602 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2603 		    0x376, 1, 0);
2604 	}
2605 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2606 	    prefetchmask & (1 << 4));
2607 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2608 	    prefetchmask & (1 << 5));
2609 }
2610 
2611 static void
2612 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2613 {
2614 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2615 	pcicfgregs *cfg = &dinfo->cfg;
2616 	char tunable_name[64];
2617 	int irq;
2618 
2619 	/* Has to have an intpin to have an interrupt. */
2620 	if (cfg->intpin == 0)
2621 		return;
2622 
2623 	/* Let the user override the IRQ with a tunable. */
2624 	irq = PCI_INVALID_IRQ;
2625 	snprintf(tunable_name, sizeof(tunable_name),
2626 	    "hw.pci%d.%d.%d.INT%c.irq",
2627 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2628 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2629 		irq = PCI_INVALID_IRQ;
2630 
2631 	/*
2632 	 * If we didn't get an IRQ via the tunable, then we either use the
2633 	 * IRQ value in the intline register or we ask the bus to route an
2634 	 * interrupt for us.  If force_route is true, then we only use the
2635 	 * value in the intline register if the bus was unable to assign an
2636 	 * IRQ.
2637 	 */
2638 	if (!PCI_INTERRUPT_VALID(irq)) {
2639 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2640 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2641 		if (!PCI_INTERRUPT_VALID(irq))
2642 			irq = cfg->intline;
2643 	}
2644 
2645 	/* If after all that we don't have an IRQ, just bail. */
2646 	if (!PCI_INTERRUPT_VALID(irq))
2647 		return;
2648 
2649 	/* Update the config register if it changed. */
2650 	if (irq != cfg->intline) {
2651 		cfg->intline = irq;
2652 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2653 	}
2654 
2655 	/* Add this IRQ as rid 0 interrupt resource. */
2656 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2657 }
2658 
2659 /* Perform early OHCI takeover from SMM. */
2660 static void
2661 ohci_early_takeover(device_t self)
2662 {
2663 	struct resource *res;
2664 	uint32_t ctl;
2665 	int rid;
2666 	int i;
2667 
2668 	rid = PCIR_BAR(0);
2669 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2670 	if (res == NULL)
2671 		return;
2672 
2673 	ctl = bus_read_4(res, OHCI_CONTROL);
2674 	if (ctl & OHCI_IR) {
2675 		if (bootverbose)
2676 			printf("ohci early: "
2677 			    "SMM active, request owner change\n");
2678 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2679 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2680 			DELAY(1000);
2681 			ctl = bus_read_4(res, OHCI_CONTROL);
2682 		}
2683 		if (ctl & OHCI_IR) {
2684 			if (bootverbose)
2685 				printf("ohci early: "
2686 				    "SMM does not respond, resetting\n");
2687 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2688 		}
2689 		/* Disable interrupts */
2690 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2691 	}
2692 
2693 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2694 }
2695 
2696 /* Perform early UHCI takeover from SMM. */
2697 static void
2698 uhci_early_takeover(device_t self)
2699 {
2700 	struct resource *res;
2701 	int rid;
2702 
2703 	/*
2704 	 * Set the PIRQD enable bit and switch off all the others. We don't
2705 	 * want legacy support to interfere with us XXX Does this also mean
2706 	 * that the BIOS won't touch the keyboard anymore if it is connected
2707 	 * to the ports of the root hub?
2708 	 */
2709 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2710 
2711 	/* Disable interrupts */
2712 	rid = PCI_UHCI_BASE_REG;
2713 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2714 	if (res != NULL) {
2715 		bus_write_2(res, UHCI_INTR, 0);
2716 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2717 	}
2718 }
2719 
2720 /* Perform early EHCI takeover from SMM. */
2721 static void
2722 ehci_early_takeover(device_t self)
2723 {
2724 	struct resource *res;
2725 	uint32_t cparams;
2726 	uint32_t eec;
2727 	uint8_t eecp;
2728 	uint8_t bios_sem;
2729 	uint8_t offs;
2730 	int rid;
2731 	int i;
2732 
2733 	rid = PCIR_BAR(0);
2734 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2735 	if (res == NULL)
2736 		return;
2737 
2738 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2739 
2740 	/* Synchronise with the BIOS if it owns the controller. */
2741 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2742 	    eecp = EHCI_EECP_NEXT(eec)) {
2743 		eec = pci_read_config(self, eecp, 4);
2744 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2745 			continue;
2746 		}
2747 		bios_sem = pci_read_config(self, eecp +
2748 		    EHCI_LEGSUP_BIOS_SEM, 1);
2749 		if (bios_sem == 0) {
2750 			continue;
2751 		}
2752 		if (bootverbose)
2753 			printf("ehci early: "
2754 			    "SMM active, request owner change\n");
2755 
2756 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2757 
2758 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2759 			DELAY(1000);
2760 			bios_sem = pci_read_config(self, eecp +
2761 			    EHCI_LEGSUP_BIOS_SEM, 1);
2762 		}
2763 
2764 		if (bios_sem != 0) {
2765 			if (bootverbose)
2766 				printf("ehci early: "
2767 				    "SMM does not respond\n");
2768 		}
2769 		/* Disable interrupts */
2770 		offs = bus_read_1(res, EHCI_CAPLENGTH);
2771 		bus_write_4(res, offs + EHCI_USBINTR, 0);
2772 	}
2773 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2774 }
2775 
2776 void
2777 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2778 {
2779 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2780 	pcicfgregs *cfg = &dinfo->cfg;
2781 	struct resource_list *rl = &dinfo->resources;
2782 	struct pci_quirk *q;
2783 	int i;
2784 
2785 	/* ATA devices needs special map treatment */
2786 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2787 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2788 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2789 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2790 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2791 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
2792 	else
2793 		for (i = 0; i < cfg->nummaps;)
2794 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
2795 			    prefetchmask & (1 << i));
2796 
2797 	/*
2798 	 * Add additional, quirked resources.
2799 	 */
2800 	for (q = &pci_quirks[0]; q->devid; q++) {
2801 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2802 		    && q->type == PCI_QUIRK_MAP_REG)
2803 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
2804 	}
2805 
2806 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2807 #ifdef __PCI_REROUTE_INTERRUPT
2808 		/*
2809 		 * Try to re-route interrupts. Sometimes the BIOS or
2810 		 * firmware may leave bogus values in these registers.
2811 		 * If the re-route fails, then just stick with what we
2812 		 * have.
2813 		 */
2814 		pci_assign_interrupt(bus, dev, 1);
2815 #else
2816 		pci_assign_interrupt(bus, dev, 0);
2817 #endif
2818 	}
2819 
2820 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
2821 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
2822 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
2823 			ehci_early_takeover(dev);
2824 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
2825 			ohci_early_takeover(dev);
2826 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
2827 			uhci_early_takeover(dev);
2828 	}
2829 }
2830 
2831 void
2832 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2833 {
2834 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2835 	device_t pcib = device_get_parent(dev);
2836 	struct pci_devinfo *dinfo;
2837 	int maxslots;
2838 	int s, f, pcifunchigh;
2839 	uint8_t hdrtype;
2840 
2841 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2842 	    ("dinfo_size too small"));
2843 	maxslots = PCIB_MAXSLOTS(pcib);
2844 	for (s = 0; s <= maxslots; s++) {
2845 		pcifunchigh = 0;
2846 		f = 0;
2847 		DELAY(1);
2848 		hdrtype = REG(PCIR_HDRTYPE, 1);
2849 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2850 			continue;
2851 		if (hdrtype & PCIM_MFDEV)
2852 			pcifunchigh = PCI_FUNCMAX;
2853 		for (f = 0; f <= pcifunchigh; f++) {
2854 			dinfo = pci_read_device(pcib, domain, busno, s, f,
2855 			    dinfo_size);
2856 			if (dinfo != NULL) {
2857 				pci_add_child(dev, dinfo);
2858 			}
2859 		}
2860 	}
2861 #undef REG
2862 }
2863 
2864 void
2865 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2866 {
2867 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2868 	device_set_ivars(dinfo->cfg.dev, dinfo);
2869 	resource_list_init(&dinfo->resources);
2870 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2871 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2872 	pci_print_verbose(dinfo);
2873 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2874 }
2875 
2876 static int
2877 pci_probe(device_t dev)
2878 {
2879 
2880 	device_set_desc(dev, "PCI bus");
2881 
2882 	/* Allow other subclasses to override this driver. */
2883 	return (BUS_PROBE_GENERIC);
2884 }
2885 
2886 static int
2887 pci_attach(device_t dev)
2888 {
2889 	int busno, domain;
2890 
2891 	/*
2892 	 * Since there can be multiple independantly numbered PCI
2893 	 * busses on systems with multiple PCI domains, we can't use
2894 	 * the unit number to decide which bus we are probing. We ask
2895 	 * the parent pcib what our domain and bus numbers are.
2896 	 */
2897 	domain = pcib_get_domain(dev);
2898 	busno = pcib_get_bus(dev);
2899 	if (bootverbose)
2900 		device_printf(dev, "domain=%d, physical bus=%d\n",
2901 		    domain, busno);
2902 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2903 	return (bus_generic_attach(dev));
2904 }
2905 
2906 static void
2907 pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
2908     int state)
2909 {
2910 	device_t child, pcib;
2911 	struct pci_devinfo *dinfo;
2912 	int dstate, i;
2913 
2914 	if (!pci_do_power_resume)
2915 		return;
2916 
2917 	/*
2918 	 * Set the device to the given state.  If the firmware suggests
2919 	 * a different power state, use it instead.  If power management
2920 	 * is not present, the firmware is responsible for managing
2921 	 * device power.  Skip children who aren't attached since they
2922 	 * are handled separately.  Only manage type 0 devices for now.
2923 	 */
2924 	pcib = device_get_parent(dev);
2925 	for (i = 0; i < numdevs; i++) {
2926 		child = devlist[i];
2927 		dinfo = device_get_ivars(child);
2928 		dstate = state;
2929 		if (device_is_attached(child) &&
2930 		    (dinfo->cfg.hdrtype & PCIM_HDRTYPE) ==
2931 		    PCIM_HDRTYPE_NORMAL &&
2932 		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
2933 			pci_set_powerstate(child, dstate);
2934 	}
2935 }
2936 
2937 int
2938 pci_suspend(device_t dev)
2939 {
2940 	device_t child, *devlist;
2941 	struct pci_devinfo *dinfo;
2942 	int error, i, numdevs;
2943 
2944 	/*
2945 	 * Save the PCI configuration space for each child and set the
2946 	 * device in the appropriate power state for this sleep state.
2947 	 */
2948 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2949 		return (error);
2950 	for (i = 0; i < numdevs; i++) {
2951 		child = devlist[i];
2952 		dinfo = device_get_ivars(child);
2953 		pci_cfg_save(child, dinfo, 0);
2954 	}
2955 
2956 	/* Suspend devices before potentially powering them down. */
2957 	error = bus_generic_suspend(dev);
2958 	if (error) {
2959 		free(devlist, M_TEMP);
2960 		return (error);
2961 	}
2962 	pci_set_power_children(dev, devlist, numdevs, PCI_POWERSTATE_D3);
2963 	free(devlist, M_TEMP);
2964 	return (0);
2965 }
2966 
2967 int
2968 pci_resume(device_t dev)
2969 {
2970 	device_t child, *devlist;
2971 	struct pci_devinfo *dinfo;
2972 	int error, i, numdevs;
2973 
2974 	/*
2975 	 * Set each child to D0 and restore its PCI configuration space.
2976 	 */
2977 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2978 		return (error);
2979 	pci_set_power_children(dev, devlist, numdevs, PCI_POWERSTATE_D0);
2980 
2981 	/* Now the device is powered up, restore its config space. */
2982 	for (i = 0; i < numdevs; i++) {
2983 		child = devlist[i];
2984 		dinfo = device_get_ivars(child);
2985 
2986 		pci_cfg_restore(child, dinfo);
2987 		if (!device_is_attached(child))
2988 			pci_cfg_save(child, dinfo, 1);
2989 	}
2990 	free(devlist, M_TEMP);
2991 	return (bus_generic_resume(dev));
2992 }
2993 
2994 static void
2995 pci_load_vendor_data(void)
2996 {
2997 	caddr_t vendordata, info;
2998 
2999 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
3000 		info = preload_search_info(vendordata, MODINFO_ADDR);
3001 		pci_vendordata = *(char **)info;
3002 		info = preload_search_info(vendordata, MODINFO_SIZE);
3003 		pci_vendordata_size = *(size_t *)info;
3004 		/* terminate the database */
3005 		pci_vendordata[pci_vendordata_size] = '\n';
3006 	}
3007 }
3008 
3009 void
3010 pci_driver_added(device_t dev, driver_t *driver)
3011 {
3012 	int numdevs;
3013 	device_t *devlist;
3014 	device_t child;
3015 	struct pci_devinfo *dinfo;
3016 	int i;
3017 
3018 	if (bootverbose)
3019 		device_printf(dev, "driver added\n");
3020 	DEVICE_IDENTIFY(driver, dev);
3021 	if (device_get_children(dev, &devlist, &numdevs) != 0)
3022 		return;
3023 	for (i = 0; i < numdevs; i++) {
3024 		child = devlist[i];
3025 		if (device_get_state(child) != DS_NOTPRESENT)
3026 			continue;
3027 		dinfo = device_get_ivars(child);
3028 		pci_print_verbose(dinfo);
3029 		if (bootverbose)
3030 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3031 		pci_cfg_restore(child, dinfo);
3032 		if (device_probe_and_attach(child) != 0)
3033 			pci_cfg_save(child, dinfo, 1);
3034 	}
3035 	free(devlist, M_TEMP);
3036 }
3037 
3038 int
3039 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3040     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3041 {
3042 	struct pci_devinfo *dinfo;
3043 	struct msix_table_entry *mte;
3044 	struct msix_vector *mv;
3045 	uint64_t addr;
3046 	uint32_t data;
3047 	void *cookie;
3048 	int error, rid;
3049 
3050 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3051 	    arg, &cookie);
3052 	if (error)
3053 		return (error);
3054 
3055 	/* If this is not a direct child, just bail out. */
3056 	if (device_get_parent(child) != dev) {
3057 		*cookiep = cookie;
3058 		return(0);
3059 	}
3060 
3061 	rid = rman_get_rid(irq);
3062 	if (rid == 0) {
3063 		/* Make sure that INTx is enabled */
3064 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3065 	} else {
3066 		/*
3067 		 * Check to see if the interrupt is MSI or MSI-X.
3068 		 * Ask our parent to map the MSI and give
3069 		 * us the address and data register values.
3070 		 * If we fail for some reason, teardown the
3071 		 * interrupt handler.
3072 		 */
3073 		dinfo = device_get_ivars(child);
3074 		if (dinfo->cfg.msi.msi_alloc > 0) {
3075 			if (dinfo->cfg.msi.msi_addr == 0) {
3076 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3077 			    ("MSI has handlers, but vectors not mapped"));
3078 				error = PCIB_MAP_MSI(device_get_parent(dev),
3079 				    child, rman_get_start(irq), &addr, &data);
3080 				if (error)
3081 					goto bad;
3082 				dinfo->cfg.msi.msi_addr = addr;
3083 				dinfo->cfg.msi.msi_data = data;
3084 			}
3085 			if (dinfo->cfg.msi.msi_handlers == 0)
3086 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3087 				    dinfo->cfg.msi.msi_data);
3088 			dinfo->cfg.msi.msi_handlers++;
3089 		} else {
3090 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3091 			    ("No MSI or MSI-X interrupts allocated"));
3092 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3093 			    ("MSI-X index too high"));
3094 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3095 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3096 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3097 			KASSERT(mv->mv_irq == rman_get_start(irq),
3098 			    ("IRQ mismatch"));
3099 			if (mv->mv_address == 0) {
3100 				KASSERT(mte->mte_handlers == 0,
3101 		    ("MSI-X table entry has handlers, but vector not mapped"));
3102 				error = PCIB_MAP_MSI(device_get_parent(dev),
3103 				    child, rman_get_start(irq), &addr, &data);
3104 				if (error)
3105 					goto bad;
3106 				mv->mv_address = addr;
3107 				mv->mv_data = data;
3108 			}
3109 			if (mte->mte_handlers == 0) {
3110 				pci_enable_msix(child, rid - 1, mv->mv_address,
3111 				    mv->mv_data);
3112 				pci_unmask_msix(child, rid - 1);
3113 			}
3114 			mte->mte_handlers++;
3115 		}
3116 
3117 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3118 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3119 	bad:
3120 		if (error) {
3121 			(void)bus_generic_teardown_intr(dev, child, irq,
3122 			    cookie);
3123 			return (error);
3124 		}
3125 	}
3126 	*cookiep = cookie;
3127 	return (0);
3128 }
3129 
3130 int
3131 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3132     void *cookie)
3133 {
3134 	struct msix_table_entry *mte;
3135 	struct resource_list_entry *rle;
3136 	struct pci_devinfo *dinfo;
3137 	int error, rid;
3138 
3139 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3140 		return (EINVAL);
3141 
3142 	/* If this isn't a direct child, just bail out */
3143 	if (device_get_parent(child) != dev)
3144 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3145 
3146 	rid = rman_get_rid(irq);
3147 	if (rid == 0) {
3148 		/* Mask INTx */
3149 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3150 	} else {
3151 		/*
3152 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3153 		 * decrement the appropriate handlers count and mask the
3154 		 * MSI-X message, or disable MSI messages if the count
3155 		 * drops to 0.
3156 		 */
3157 		dinfo = device_get_ivars(child);
3158 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3159 		if (rle->res != irq)
3160 			return (EINVAL);
3161 		if (dinfo->cfg.msi.msi_alloc > 0) {
3162 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3163 			    ("MSI-X index too high"));
3164 			if (dinfo->cfg.msi.msi_handlers == 0)
3165 				return (EINVAL);
3166 			dinfo->cfg.msi.msi_handlers--;
3167 			if (dinfo->cfg.msi.msi_handlers == 0)
3168 				pci_disable_msi(child);
3169 		} else {
3170 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3171 			    ("No MSI or MSI-X interrupts allocated"));
3172 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3173 			    ("MSI-X index too high"));
3174 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3175 			if (mte->mte_handlers == 0)
3176 				return (EINVAL);
3177 			mte->mte_handlers--;
3178 			if (mte->mte_handlers == 0)
3179 				pci_mask_msix(child, rid - 1);
3180 		}
3181 	}
3182 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3183 	if (rid > 0)
3184 		KASSERT(error == 0,
3185 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3186 	return (error);
3187 }
3188 
3189 int
3190 pci_print_child(device_t dev, device_t child)
3191 {
3192 	struct pci_devinfo *dinfo;
3193 	struct resource_list *rl;
3194 	int retval = 0;
3195 
3196 	dinfo = device_get_ivars(child);
3197 	rl = &dinfo->resources;
3198 
3199 	retval += bus_print_child_header(dev, child);
3200 
3201 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3202 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3203 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3204 	if (device_get_flags(dev))
3205 		retval += printf(" flags %#x", device_get_flags(dev));
3206 
3207 	retval += printf(" at device %d.%d", pci_get_slot(child),
3208 	    pci_get_function(child));
3209 
3210 	retval += bus_print_child_footer(dev, child);
3211 
3212 	return (retval);
3213 }
3214 
3215 static struct
3216 {
3217 	int	class;
3218 	int	subclass;
3219 	char	*desc;
3220 } pci_nomatch_tab[] = {
3221 	{PCIC_OLD,		-1,			"old"},
3222 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3223 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3224 	{PCIC_STORAGE,		-1,			"mass storage"},
3225 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3226 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3227 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3228 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3229 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3230 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3231 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3232 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3233 	{PCIC_NETWORK,		-1,			"network"},
3234 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3235 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3236 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3237 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3238 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3239 	{PCIC_DISPLAY,		-1,			"display"},
3240 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3241 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3242 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3243 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3244 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3245 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3246 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3247 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3248 	{PCIC_MEMORY,		-1,			"memory"},
3249 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3250 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3251 	{PCIC_BRIDGE,		-1,			"bridge"},
3252 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3253 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3254 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3255 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3256 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3257 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3258 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3259 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3260 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3261 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3262 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3263 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3264 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3265 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3266 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3267 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3268 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3269 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3270 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3271 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3272 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3273 	{PCIC_INPUTDEV,		-1,			"input device"},
3274 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3275 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3276 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3277 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3278 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3279 	{PCIC_DOCKING,		-1,			"docking station"},
3280 	{PCIC_PROCESSOR,	-1,			"processor"},
3281 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3282 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3283 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3284 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3285 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3286 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3287 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3288 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3289 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3290 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3291 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3292 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3293 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3294 	{PCIC_SATCOM,		-1,			"satellite communication"},
3295 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3296 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3297 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3298 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3299 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3300 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3301 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3302 	{PCIC_DASP,		-1,			"dasp"},
3303 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3304 	{0, 0,		NULL}
3305 };
3306 
3307 void
3308 pci_probe_nomatch(device_t dev, device_t child)
3309 {
3310 	int	i;
3311 	char	*cp, *scp, *device;
3312 
3313 	/*
3314 	 * Look for a listing for this device in a loaded device database.
3315 	 */
3316 	if ((device = pci_describe_device(child)) != NULL) {
3317 		device_printf(dev, "<%s>", device);
3318 		free(device, M_DEVBUF);
3319 	} else {
3320 		/*
3321 		 * Scan the class/subclass descriptions for a general
3322 		 * description.
3323 		 */
3324 		cp = "unknown";
3325 		scp = NULL;
3326 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3327 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3328 				if (pci_nomatch_tab[i].subclass == -1) {
3329 					cp = pci_nomatch_tab[i].desc;
3330 				} else if (pci_nomatch_tab[i].subclass ==
3331 				    pci_get_subclass(child)) {
3332 					scp = pci_nomatch_tab[i].desc;
3333 				}
3334 			}
3335 		}
3336 		device_printf(dev, "<%s%s%s>",
3337 		    cp ? cp : "",
3338 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3339 		    scp ? scp : "");
3340 	}
3341 	printf(" at device %d.%d (no driver attached)\n",
3342 	    pci_get_slot(child), pci_get_function(child));
3343 	pci_cfg_save(child, device_get_ivars(child), 1);
3344 	return;
3345 }
3346 
3347 /*
3348  * Parse the PCI device database, if loaded, and return a pointer to a
3349  * description of the device.
3350  *
3351  * The database is flat text formatted as follows:
3352  *
3353  * Any line not in a valid format is ignored.
3354  * Lines are terminated with newline '\n' characters.
3355  *
3356  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3357  * the vendor name.
3358  *
3359  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3360  * - devices cannot be listed without a corresponding VENDOR line.
3361  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3362  * another TAB, then the device name.
3363  */
3364 
3365 /*
3366  * Assuming (ptr) points to the beginning of a line in the database,
3367  * return the vendor or device and description of the next entry.
3368  * The value of (vendor) or (device) inappropriate for the entry type
3369  * is set to -1.  Returns nonzero at the end of the database.
3370  *
3371  * Note that this is slightly unrobust in the face of corrupt data;
3372  * we attempt to safeguard against this by spamming the end of the
3373  * database with a newline when we initialise.
3374  */
3375 static int
3376 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3377 {
3378 	char	*cp = *ptr;
3379 	int	left;
3380 
3381 	*device = -1;
3382 	*vendor = -1;
3383 	**desc = '\0';
3384 	for (;;) {
3385 		left = pci_vendordata_size - (cp - pci_vendordata);
3386 		if (left <= 0) {
3387 			*ptr = cp;
3388 			return(1);
3389 		}
3390 
3391 		/* vendor entry? */
3392 		if (*cp != '\t' &&
3393 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3394 			break;
3395 		/* device entry? */
3396 		if (*cp == '\t' &&
3397 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3398 			break;
3399 
3400 		/* skip to next line */
3401 		while (*cp != '\n' && left > 0) {
3402 			cp++;
3403 			left--;
3404 		}
3405 		if (*cp == '\n') {
3406 			cp++;
3407 			left--;
3408 		}
3409 	}
3410 	/* skip to next line */
3411 	while (*cp != '\n' && left > 0) {
3412 		cp++;
3413 		left--;
3414 	}
3415 	if (*cp == '\n' && left > 0)
3416 		cp++;
3417 	*ptr = cp;
3418 	return(0);
3419 }
3420 
3421 static char *
3422 pci_describe_device(device_t dev)
3423 {
3424 	int	vendor, device;
3425 	char	*desc, *vp, *dp, *line;
3426 
3427 	desc = vp = dp = NULL;
3428 
3429 	/*
3430 	 * If we have no vendor data, we can't do anything.
3431 	 */
3432 	if (pci_vendordata == NULL)
3433 		goto out;
3434 
3435 	/*
3436 	 * Scan the vendor data looking for this device
3437 	 */
3438 	line = pci_vendordata;
3439 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3440 		goto out;
3441 	for (;;) {
3442 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3443 			goto out;
3444 		if (vendor == pci_get_vendor(dev))
3445 			break;
3446 	}
3447 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3448 		goto out;
3449 	for (;;) {
3450 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3451 			*dp = 0;
3452 			break;
3453 		}
3454 		if (vendor != -1) {
3455 			*dp = 0;
3456 			break;
3457 		}
3458 		if (device == pci_get_device(dev))
3459 			break;
3460 	}
3461 	if (dp[0] == '\0')
3462 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3463 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3464 	    NULL)
3465 		sprintf(desc, "%s, %s", vp, dp);
3466  out:
3467 	if (vp != NULL)
3468 		free(vp, M_DEVBUF);
3469 	if (dp != NULL)
3470 		free(dp, M_DEVBUF);
3471 	return(desc);
3472 }
3473 
3474 int
3475 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3476 {
3477 	struct pci_devinfo *dinfo;
3478 	pcicfgregs *cfg;
3479 
3480 	dinfo = device_get_ivars(child);
3481 	cfg = &dinfo->cfg;
3482 
3483 	switch (which) {
3484 	case PCI_IVAR_ETHADDR:
3485 		/*
3486 		 * The generic accessor doesn't deal with failure, so
3487 		 * we set the return value, then return an error.
3488 		 */
3489 		*((uint8_t **) result) = NULL;
3490 		return (EINVAL);
3491 	case PCI_IVAR_SUBVENDOR:
3492 		*result = cfg->subvendor;
3493 		break;
3494 	case PCI_IVAR_SUBDEVICE:
3495 		*result = cfg->subdevice;
3496 		break;
3497 	case PCI_IVAR_VENDOR:
3498 		*result = cfg->vendor;
3499 		break;
3500 	case PCI_IVAR_DEVICE:
3501 		*result = cfg->device;
3502 		break;
3503 	case PCI_IVAR_DEVID:
3504 		*result = (cfg->device << 16) | cfg->vendor;
3505 		break;
3506 	case PCI_IVAR_CLASS:
3507 		*result = cfg->baseclass;
3508 		break;
3509 	case PCI_IVAR_SUBCLASS:
3510 		*result = cfg->subclass;
3511 		break;
3512 	case PCI_IVAR_PROGIF:
3513 		*result = cfg->progif;
3514 		break;
3515 	case PCI_IVAR_REVID:
3516 		*result = cfg->revid;
3517 		break;
3518 	case PCI_IVAR_INTPIN:
3519 		*result = cfg->intpin;
3520 		break;
3521 	case PCI_IVAR_IRQ:
3522 		*result = cfg->intline;
3523 		break;
3524 	case PCI_IVAR_DOMAIN:
3525 		*result = cfg->domain;
3526 		break;
3527 	case PCI_IVAR_BUS:
3528 		*result = cfg->bus;
3529 		break;
3530 	case PCI_IVAR_SLOT:
3531 		*result = cfg->slot;
3532 		break;
3533 	case PCI_IVAR_FUNCTION:
3534 		*result = cfg->func;
3535 		break;
3536 	case PCI_IVAR_CMDREG:
3537 		*result = cfg->cmdreg;
3538 		break;
3539 	case PCI_IVAR_CACHELNSZ:
3540 		*result = cfg->cachelnsz;
3541 		break;
3542 	case PCI_IVAR_MINGNT:
3543 		*result = cfg->mingnt;
3544 		break;
3545 	case PCI_IVAR_MAXLAT:
3546 		*result = cfg->maxlat;
3547 		break;
3548 	case PCI_IVAR_LATTIMER:
3549 		*result = cfg->lattimer;
3550 		break;
3551 	default:
3552 		return (ENOENT);
3553 	}
3554 	return (0);
3555 }
3556 
3557 int
3558 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3559 {
3560 	struct pci_devinfo *dinfo;
3561 
3562 	dinfo = device_get_ivars(child);
3563 
3564 	switch (which) {
3565 	case PCI_IVAR_INTPIN:
3566 		dinfo->cfg.intpin = value;
3567 		return (0);
3568 	case PCI_IVAR_ETHADDR:
3569 	case PCI_IVAR_SUBVENDOR:
3570 	case PCI_IVAR_SUBDEVICE:
3571 	case PCI_IVAR_VENDOR:
3572 	case PCI_IVAR_DEVICE:
3573 	case PCI_IVAR_DEVID:
3574 	case PCI_IVAR_CLASS:
3575 	case PCI_IVAR_SUBCLASS:
3576 	case PCI_IVAR_PROGIF:
3577 	case PCI_IVAR_REVID:
3578 	case PCI_IVAR_IRQ:
3579 	case PCI_IVAR_DOMAIN:
3580 	case PCI_IVAR_BUS:
3581 	case PCI_IVAR_SLOT:
3582 	case PCI_IVAR_FUNCTION:
3583 		return (EINVAL);	/* disallow for now */
3584 
3585 	default:
3586 		return (ENOENT);
3587 	}
3588 }
3589 
3590 
3591 #include "opt_ddb.h"
3592 #ifdef DDB
3593 #include <ddb/ddb.h>
3594 #include <sys/cons.h>
3595 
3596 /*
3597  * List resources based on pci map registers, used for within ddb
3598  */
3599 
3600 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3601 {
3602 	struct pci_devinfo *dinfo;
3603 	struct devlist *devlist_head;
3604 	struct pci_conf *p;
3605 	const char *name;
3606 	int i, error, none_count;
3607 
3608 	none_count = 0;
3609 	/* get the head of the device queue */
3610 	devlist_head = &pci_devq;
3611 
3612 	/*
3613 	 * Go through the list of devices and print out devices
3614 	 */
3615 	for (error = 0, i = 0,
3616 	     dinfo = STAILQ_FIRST(devlist_head);
3617 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3618 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3619 
3620 		/* Populate pd_name and pd_unit */
3621 		name = NULL;
3622 		if (dinfo->cfg.dev)
3623 			name = device_get_name(dinfo->cfg.dev);
3624 
3625 		p = &dinfo->conf;
3626 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3627 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3628 			(name && *name) ? name : "none",
3629 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3630 			none_count++,
3631 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3632 			p->pc_sel.pc_func, (p->pc_class << 16) |
3633 			(p->pc_subclass << 8) | p->pc_progif,
3634 			(p->pc_subdevice << 16) | p->pc_subvendor,
3635 			(p->pc_device << 16) | p->pc_vendor,
3636 			p->pc_revid, p->pc_hdr);
3637 	}
3638 }
3639 #endif /* DDB */
3640 
3641 static struct resource *
3642 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
3643     u_long start, u_long end, u_long count, u_int flags)
3644 {
3645 	struct pci_devinfo *dinfo = device_get_ivars(child);
3646 	struct resource_list *rl = &dinfo->resources;
3647 	struct resource_list_entry *rle;
3648 	struct resource *res;
3649 	pci_addr_t map, testval;
3650 	int mapsize;
3651 
3652 	/*
3653 	 * Weed out the bogons, and figure out how large the BAR/map
3654 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3655 	 * Note: atapci in legacy mode are special and handled elsewhere
3656 	 * in the code.  If you have a atapci device in legacy mode and
3657 	 * it fails here, that other code is broken.
3658 	 */
3659 	res = NULL;
3660 	pci_read_bar(child, *rid, &map, &testval);
3661 
3662 	/* Ignore a BAR with a base of 0. */
3663 	if ((*rid == PCIR_BIOS && pci_rombase(testval) == 0) ||
3664 	    pci_mapbase(testval) == 0)
3665 		goto out;
3666 
3667 	if (PCI_BAR_MEM(testval) || *rid == PCIR_BIOS) {
3668 		if (type != SYS_RES_MEMORY) {
3669 			if (bootverbose)
3670 				device_printf(dev,
3671 				    "child %s requested type %d for rid %#x,"
3672 				    " but the BAR says it is an memio\n",
3673 				    device_get_nameunit(child), type, *rid);
3674 			goto out;
3675 		}
3676 	} else {
3677 		if (type != SYS_RES_IOPORT) {
3678 			if (bootverbose)
3679 				device_printf(dev,
3680 				    "child %s requested type %d for rid %#x,"
3681 				    " but the BAR says it is an ioport\n",
3682 				    device_get_nameunit(child), type, *rid);
3683 			goto out;
3684 		}
3685 	}
3686 
3687 	/*
3688 	 * For real BARs, we need to override the size that
3689 	 * the driver requests, because that's what the BAR
3690 	 * actually uses and we would otherwise have a
3691 	 * situation where we might allocate the excess to
3692 	 * another driver, which won't work.
3693 	 *
3694 	 * Device ROM BARs use a different mask value.
3695 	 */
3696 	if (*rid == PCIR_BIOS)
3697 		mapsize = pci_romsize(testval);
3698 	else
3699 		mapsize = pci_mapsize(testval);
3700 	count = 1UL << mapsize;
3701 	if (RF_ALIGNMENT(flags) < mapsize)
3702 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3703 	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3704 		flags |= RF_PREFETCHABLE;
3705 
3706 	/*
3707 	 * Allocate enough resource, and then write back the
3708 	 * appropriate bar for that resource.
3709 	 */
3710 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3711 	    start, end, count, flags & ~RF_ACTIVE);
3712 	if (res == NULL) {
3713 		device_printf(child,
3714 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3715 		    count, *rid, type, start, end);
3716 		goto out;
3717 	}
3718 	resource_list_add(rl, type, *rid, start, end, count);
3719 	rle = resource_list_find(rl, type, *rid);
3720 	if (rle == NULL)
3721 		panic("pci_reserve_map: unexpectedly can't find resource.");
3722 	rle->res = res;
3723 	rle->start = rman_get_start(res);
3724 	rle->end = rman_get_end(res);
3725 	rle->count = count;
3726 	rle->flags = RLE_RESERVED;
3727 	if (bootverbose)
3728 		device_printf(child,
3729 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3730 		    count, *rid, type, rman_get_start(res));
3731 	map = rman_get_start(res);
3732 	pci_write_bar(child, *rid, map);
3733 out:;
3734 	return (res);
3735 }
3736 
3737 
3738 struct resource *
3739 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3740 		   u_long start, u_long end, u_long count, u_int flags)
3741 {
3742 	struct pci_devinfo *dinfo = device_get_ivars(child);
3743 	struct resource_list *rl = &dinfo->resources;
3744 	struct resource_list_entry *rle;
3745 	struct resource *res;
3746 	pcicfgregs *cfg = &dinfo->cfg;
3747 
3748 	if (device_get_parent(child) != dev)
3749 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3750 		    type, rid, start, end, count, flags));
3751 
3752 	/*
3753 	 * Perform lazy resource allocation
3754 	 */
3755 	switch (type) {
3756 	case SYS_RES_IRQ:
3757 		/*
3758 		 * Can't alloc legacy interrupt once MSI messages have
3759 		 * been allocated.
3760 		 */
3761 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3762 		    cfg->msix.msix_alloc > 0))
3763 			return (NULL);
3764 
3765 		/*
3766 		 * If the child device doesn't have an interrupt
3767 		 * routed and is deserving of an interrupt, try to
3768 		 * assign it one.
3769 		 */
3770 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3771 		    (cfg->intpin != 0))
3772 			pci_assign_interrupt(dev, child, 0);
3773 		break;
3774 	case SYS_RES_IOPORT:
3775 	case SYS_RES_MEMORY:
3776 		/* Reserve resources for this BAR if needed. */
3777 		rle = resource_list_find(rl, type, *rid);
3778 		if (rle == NULL) {
3779 			res = pci_reserve_map(dev, child, type, rid, start, end,
3780 			    count, flags);
3781 			if (res == NULL)
3782 				return (NULL);
3783 		}
3784 	}
3785 	return (resource_list_alloc(rl, dev, child, type, rid,
3786 	    start, end, count, flags));
3787 }
3788 
3789 int
3790 pci_activate_resource(device_t dev, device_t child, int type, int rid,
3791     struct resource *r)
3792 {
3793 	int error;
3794 
3795 	error = bus_generic_activate_resource(dev, child, type, rid, r);
3796 	if (error)
3797 		return (error);
3798 
3799 	/* Enable decoding in the command register when activating BARs. */
3800 	if (device_get_parent(child) == dev) {
3801 		/* Device ROMs need their decoding explicitly enabled. */
3802 		if (rid == PCIR_BIOS)
3803 			pci_write_config(child, rid, rman_get_start(r) |
3804 			    PCIM_BIOS_ENABLE, 4);
3805 		switch (type) {
3806 		case SYS_RES_IOPORT:
3807 		case SYS_RES_MEMORY:
3808 			error = PCI_ENABLE_IO(dev, child, type);
3809 			break;
3810 		}
3811 	}
3812 	return (error);
3813 }
3814 
3815 int
3816 pci_deactivate_resource(device_t dev, device_t child, int type,
3817     int rid, struct resource *r)
3818 {
3819 	int error;
3820 
3821 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
3822 	if (error)
3823 		return (error);
3824 
3825 	/* Disable decoding for device ROMs. */
3826 	if (rid == PCIR_BIOS)
3827 		pci_write_config(child, rid, rman_get_start(r), 4);
3828 	return (0);
3829 }
3830 
3831 void
3832 pci_delete_child(device_t dev, device_t child)
3833 {
3834 	struct resource_list_entry *rle;
3835 	struct resource_list *rl;
3836 	struct pci_devinfo *dinfo;
3837 
3838 	dinfo = device_get_ivars(child);
3839 	rl = &dinfo->resources;
3840 
3841 	if (device_is_attached(child))
3842 		device_detach(child);
3843 
3844 	/* Turn off access to resources we're about to free */
3845 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
3846 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
3847 
3848 	/* Free all allocated resources */
3849 	STAILQ_FOREACH(rle, rl, link) {
3850 		if (rle->res) {
3851 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
3852 			    resource_list_busy(rl, rle->type, rle->rid)) {
3853 				pci_printf(&dinfo->cfg,
3854 				    "Resource still owned, oops. "
3855 				    "(type=%d, rid=%d, addr=%lx)\n",
3856 				    rle->type, rle->rid,
3857 				    rman_get_start(rle->res));
3858 				bus_release_resource(child, rle->type, rle->rid,
3859 				    rle->res);
3860 			}
3861 			resource_list_unreserve(rl, dev, child, rle->type,
3862 			    rle->rid);
3863 		}
3864 	}
3865 	resource_list_free(rl);
3866 
3867 	device_delete_child(dev, child);
3868 	pci_freecfg(dinfo);
3869 }
3870 
3871 void
3872 pci_delete_resource(device_t dev, device_t child, int type, int rid)
3873 {
3874 	struct pci_devinfo *dinfo;
3875 	struct resource_list *rl;
3876 	struct resource_list_entry *rle;
3877 
3878 	if (device_get_parent(child) != dev)
3879 		return;
3880 
3881 	dinfo = device_get_ivars(child);
3882 	rl = &dinfo->resources;
3883 	rle = resource_list_find(rl, type, rid);
3884 	if (rle == NULL)
3885 		return;
3886 
3887 	if (rle->res) {
3888 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
3889 		    resource_list_busy(rl, type, rid)) {
3890 			device_printf(dev, "delete_resource: "
3891 			    "Resource still owned by child, oops. "
3892 			    "(type=%d, rid=%d, addr=%lx)\n",
3893 			    type, rid, rman_get_start(rle->res));
3894 			return;
3895 		}
3896 
3897 #ifndef __PCI_BAR_ZERO_VALID
3898 		/*
3899 		 * If this is a BAR, clear the BAR so it stops
3900 		 * decoding before releasing the resource.
3901 		 */
3902 		switch (type) {
3903 		case SYS_RES_IOPORT:
3904 		case SYS_RES_MEMORY:
3905 			pci_write_bar(child, rid, 0);
3906 			break;
3907 		}
3908 #endif
3909 		resource_list_unreserve(rl, dev, child, type, rid);
3910 	}
3911 	resource_list_delete(rl, type, rid);
3912 }
3913 
3914 struct resource_list *
3915 pci_get_resource_list (device_t dev, device_t child)
3916 {
3917 	struct pci_devinfo *dinfo = device_get_ivars(child);
3918 
3919 	return (&dinfo->resources);
3920 }
3921 
3922 uint32_t
3923 pci_read_config_method(device_t dev, device_t child, int reg, int width)
3924 {
3925 	struct pci_devinfo *dinfo = device_get_ivars(child);
3926 	pcicfgregs *cfg = &dinfo->cfg;
3927 
3928 	return (PCIB_READ_CONFIG(device_get_parent(dev),
3929 	    cfg->bus, cfg->slot, cfg->func, reg, width));
3930 }
3931 
3932 void
3933 pci_write_config_method(device_t dev, device_t child, int reg,
3934     uint32_t val, int width)
3935 {
3936 	struct pci_devinfo *dinfo = device_get_ivars(child);
3937 	pcicfgregs *cfg = &dinfo->cfg;
3938 
3939 	PCIB_WRITE_CONFIG(device_get_parent(dev),
3940 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3941 }
3942 
3943 int
3944 pci_child_location_str_method(device_t dev, device_t child, char *buf,
3945     size_t buflen)
3946 {
3947 
3948 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3949 	    pci_get_function(child));
3950 	return (0);
3951 }
3952 
3953 int
3954 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3955     size_t buflen)
3956 {
3957 	struct pci_devinfo *dinfo;
3958 	pcicfgregs *cfg;
3959 
3960 	dinfo = device_get_ivars(child);
3961 	cfg = &dinfo->cfg;
3962 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3963 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3964 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3965 	    cfg->progif);
3966 	return (0);
3967 }
3968 
3969 int
3970 pci_assign_interrupt_method(device_t dev, device_t child)
3971 {
3972 	struct pci_devinfo *dinfo = device_get_ivars(child);
3973 	pcicfgregs *cfg = &dinfo->cfg;
3974 
3975 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3976 	    cfg->intpin));
3977 }
3978 
3979 static int
3980 pci_modevent(module_t mod, int what, void *arg)
3981 {
3982 	static struct cdev *pci_cdev;
3983 
3984 	switch (what) {
3985 	case MOD_LOAD:
3986 		STAILQ_INIT(&pci_devq);
3987 		pci_generation = 0;
3988 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3989 		    "pci");
3990 		pci_load_vendor_data();
3991 		break;
3992 
3993 	case MOD_UNLOAD:
3994 		destroy_dev(pci_cdev);
3995 		break;
3996 	}
3997 
3998 	return (0);
3999 }
4000 
4001 void
4002 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4003 {
4004 	int i;
4005 
4006 	/*
4007 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4008 	 * which we know need special treatment.  Type 2 devices are
4009 	 * cardbus bridges which also require special treatment.
4010 	 * Other types are unknown, and we err on the side of safety
4011 	 * by ignoring them.
4012 	 */
4013 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4014 		return;
4015 
4016 	/*
4017 	 * Restore the device to full power mode.  We must do this
4018 	 * before we restore the registers because moving from D3 to
4019 	 * D0 will cause the chip's BARs and some other registers to
4020 	 * be reset to some unknown power on reset values.  Cut down
4021 	 * the noise on boot by doing nothing if we are already in
4022 	 * state D0.
4023 	 */
4024 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4025 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4026 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4027 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
4028 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
4029 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4030 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4031 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4032 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4033 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4034 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4035 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4036 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4037 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4038 
4039 	/* Restore MSI and MSI-X configurations if they are present. */
4040 	if (dinfo->cfg.msi.msi_location != 0)
4041 		pci_resume_msi(dev);
4042 	if (dinfo->cfg.msix.msix_location != 0)
4043 		pci_resume_msix(dev);
4044 }
4045 
4046 void
4047 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4048 {
4049 	int i;
4050 	uint32_t cls;
4051 	int ps;
4052 
4053 	/*
4054 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4055 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4056 	 * which also require special treatment.  Other types are unknown, and
4057 	 * we err on the side of safety by ignoring them.  Powering down
4058 	 * bridges should not be undertaken lightly.
4059 	 */
4060 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4061 		return;
4062 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4063 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
4064 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
4065 
4066 	/*
4067 	 * Some drivers apparently write to these registers w/o updating our
4068 	 * cached copy.  No harm happens if we update the copy, so do so here
4069 	 * so we can restore them.  The COMMAND register is modified by the
4070 	 * bus w/o updating the cache.  This should represent the normally
4071 	 * writable portion of the 'defined' part of type 0 headers.  In
4072 	 * theory we also need to save/restore the PCI capability structures
4073 	 * we know about, but apart from power we don't know any that are
4074 	 * writable.
4075 	 */
4076 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4077 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4078 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4079 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4080 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4081 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4082 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4083 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4084 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4085 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4086 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4087 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4088 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4089 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4090 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4091 
4092 	/*
4093 	 * don't set the state for display devices, base peripherals and
4094 	 * memory devices since bad things happen when they are powered down.
4095 	 * We should (a) have drivers that can easily detach and (b) use
4096 	 * generic drivers for these devices so that some device actually
4097 	 * attaches.  We need to make sure that when we implement (a) we don't
4098 	 * power the device down on a reattach.
4099 	 */
4100 	cls = pci_get_class(dev);
4101 	if (!setstate)
4102 		return;
4103 	switch (pci_do_power_nodriver)
4104 	{
4105 		case 0:		/* NO powerdown at all */
4106 			return;
4107 		case 1:		/* Conservative about what to power down */
4108 			if (cls == PCIC_STORAGE)
4109 				return;
4110 			/*FALLTHROUGH*/
4111 		case 2:		/* Agressive about what to power down */
4112 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4113 			    cls == PCIC_BASEPERIPH)
4114 				return;
4115 			/*FALLTHROUGH*/
4116 		case 3:		/* Power down everything */
4117 			break;
4118 	}
4119 	/*
4120 	 * PCI spec says we can only go into D3 state from D0 state.
4121 	 * Transition from D[12] into D0 before going to D3 state.
4122 	 */
4123 	ps = pci_get_powerstate(dev);
4124 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4125 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4126 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4127 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4128 }
4129