xref: /freebsd/sys/dev/pci/pci.c (revision 0f2bd1e89db1a2f09268edea21e0ead329e092df)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 
56 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57 #include <machine/intr_machdep.h>
58 #endif
59 
60 #include <sys/pciio.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/pci/pci_private.h>
64 
65 #include <dev/usb/controller/ehcireg.h>
66 #include <dev/usb/controller/ohcireg.h>
67 #include <dev/usb/controller/uhcireg.h>
68 
69 #include "pcib_if.h"
70 #include "pci_if.h"
71 
72 static pci_addr_t	pci_mapbase(uint64_t mapreg);
73 static const char	*pci_maptype(uint64_t mapreg);
74 static int		pci_mapsize(uint64_t testval);
75 static int		pci_maprange(uint64_t mapreg);
76 static pci_addr_t	pci_rombase(uint64_t mapreg);
77 static int		pci_romsize(uint64_t testval);
78 static void		pci_fixancient(pcicfgregs *cfg);
79 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
80 
81 static int		pci_porten(device_t dev);
82 static int		pci_memen(device_t dev);
83 static void		pci_assign_interrupt(device_t bus, device_t dev,
84 			    int force_route);
85 static int		pci_add_map(device_t bus, device_t dev, int reg,
86 			    struct resource_list *rl, int force, int prefetch);
87 static int		pci_probe(device_t dev);
88 static int		pci_attach(device_t dev);
89 static void		pci_load_vendor_data(void);
90 static int		pci_describe_parse_line(char **ptr, int *vendor,
91 			    int *device, char **desc);
92 static char		*pci_describe_device(device_t dev);
93 static int		pci_modevent(module_t mod, int what, void *arg);
94 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
95 			    pcicfgregs *cfg);
96 static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
97 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
98 			    int reg, uint32_t *data);
99 #if 0
100 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
101 			    int reg, uint32_t data);
102 #endif
103 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
104 static void		pci_disable_msi(device_t dev);
105 static void		pci_enable_msi(device_t dev, uint64_t address,
106 			    uint16_t data);
107 static void		pci_enable_msix(device_t dev, u_int index,
108 			    uint64_t address, uint32_t data);
109 static void		pci_mask_msix(device_t dev, u_int index);
110 static void		pci_unmask_msix(device_t dev, u_int index);
111 static int		pci_msi_blacklisted(void);
112 static void		pci_resume_msi(device_t dev);
113 static void		pci_resume_msix(device_t dev);
114 static int		pci_remap_intr_method(device_t bus, device_t dev,
115 			    u_int irq);
116 
117 static device_method_t pci_methods[] = {
118 	/* Device interface */
119 	DEVMETHOD(device_probe,		pci_probe),
120 	DEVMETHOD(device_attach,	pci_attach),
121 	DEVMETHOD(device_detach,	bus_generic_detach),
122 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
123 	DEVMETHOD(device_suspend,	pci_suspend),
124 	DEVMETHOD(device_resume,	pci_resume),
125 
126 	/* Bus interface */
127 	DEVMETHOD(bus_print_child,	pci_print_child),
128 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
129 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
130 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
131 	DEVMETHOD(bus_driver_added,	pci_driver_added),
132 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
133 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
134 
135 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
136 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
137 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
138 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
139 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
140 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
141 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
142 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
143 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
144 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
145 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
146 
147 	/* PCI interface */
148 	DEVMETHOD(pci_read_config,	pci_read_config_method),
149 	DEVMETHOD(pci_write_config,	pci_write_config_method),
150 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
151 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
152 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
153 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
154 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
155 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
156 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
157 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
158 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
159 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
160 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
161 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
162 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
163 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
164 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
165 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
166 
167 	{ 0, 0 }
168 };
169 
170 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
171 
172 static devclass_t pci_devclass;
173 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
174 MODULE_VERSION(pci, 1);
175 
176 static char	*pci_vendordata;
177 static size_t	pci_vendordata_size;
178 
179 
180 struct pci_quirk {
181 	uint32_t devid;	/* Vendor/device of the card */
182 	int	type;
183 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
184 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
185 	int	arg1;
186 	int	arg2;
187 };
188 
189 struct pci_quirk pci_quirks[] = {
190 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
191 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
192 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
193 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
194 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
195 
196 	/*
197 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
198 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
199 	 */
200 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
201 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
202 
203 	/*
204 	 * MSI doesn't work on earlier Intel chipsets including
205 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
206 	 */
207 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
210 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
211 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
212 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
213 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
214 
215 	/*
216 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
217 	 * bridge.
218 	 */
219 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
220 
221 	{ 0 }
222 };
223 
224 /* map register information */
225 #define	PCI_MAPMEM	0x01	/* memory map */
226 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
227 #define	PCI_MAPPORT	0x04	/* port map */
228 
229 struct devlist pci_devq;
230 uint32_t pci_generation;
231 uint32_t pci_numdevs = 0;
232 static int pcie_chipset, pcix_chipset;
233 
234 /* sysctl vars */
235 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
236 
237 static int pci_enable_io_modes = 1;
238 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
239 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
240     &pci_enable_io_modes, 1,
241     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
242 enable these bits correctly.  We'd like to do this all the time, but there\n\
243 are some peripherals that this causes problems with.");
244 
245 static int pci_do_power_nodriver = 0;
246 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
247 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
248     &pci_do_power_nodriver, 0,
249   "Place a function into D3 state when no driver attaches to it.  0 means\n\
250 disable.  1 means conservatively place devices into D3 state.  2 means\n\
251 agressively place devices into D3 state.  3 means put absolutely everything\n\
252 in D3 state.");
253 
254 int pci_do_power_resume = 1;
255 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
256 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
257     &pci_do_power_resume, 1,
258   "Transition from D3 -> D0 on resume.");
259 
260 static int pci_do_msi = 1;
261 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
262 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
263     "Enable support for MSI interrupts");
264 
265 static int pci_do_msix = 1;
266 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
267 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
268     "Enable support for MSI-X interrupts");
269 
270 static int pci_honor_msi_blacklist = 1;
271 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
272 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
273     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
274 
275 #if defined(__i386__) || defined(__amd64__)
276 static int pci_usb_takeover = 1;
277 #else
278 static int pci_usb_takeover = 0;
279 #endif
280 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
281 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RD | CTLFLAG_TUN,
282     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
283 Disable this if you depend on BIOS emulation of USB devices, that is\n\
284 you use USB devices (like keyboard or mouse) but do not load USB drivers");
285 
286 /* Find a device_t by bus/slot/function in domain 0 */
287 
288 device_t
289 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
290 {
291 
292 	return (pci_find_dbsf(0, bus, slot, func));
293 }
294 
295 /* Find a device_t by domain/bus/slot/function */
296 
297 device_t
298 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
299 {
300 	struct pci_devinfo *dinfo;
301 
302 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
303 		if ((dinfo->cfg.domain == domain) &&
304 		    (dinfo->cfg.bus == bus) &&
305 		    (dinfo->cfg.slot == slot) &&
306 		    (dinfo->cfg.func == func)) {
307 			return (dinfo->cfg.dev);
308 		}
309 	}
310 
311 	return (NULL);
312 }
313 
314 /* Find a device_t by vendor/device ID */
315 
316 device_t
317 pci_find_device(uint16_t vendor, uint16_t device)
318 {
319 	struct pci_devinfo *dinfo;
320 
321 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
322 		if ((dinfo->cfg.vendor == vendor) &&
323 		    (dinfo->cfg.device == device)) {
324 			return (dinfo->cfg.dev);
325 		}
326 	}
327 
328 	return (NULL);
329 }
330 
331 static int
332 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
333 {
334 	va_list ap;
335 	int retval;
336 
337 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
338 	    cfg->func);
339 	va_start(ap, fmt);
340 	retval += vprintf(fmt, ap);
341 	va_end(ap);
342 	return (retval);
343 }
344 
345 /* return base address of memory or port map */
346 
347 static pci_addr_t
348 pci_mapbase(uint64_t mapreg)
349 {
350 
351 	if (PCI_BAR_MEM(mapreg))
352 		return (mapreg & PCIM_BAR_MEM_BASE);
353 	else
354 		return (mapreg & PCIM_BAR_IO_BASE);
355 }
356 
357 /* return map type of memory or port map */
358 
359 static const char *
360 pci_maptype(uint64_t mapreg)
361 {
362 
363 	if (PCI_BAR_IO(mapreg))
364 		return ("I/O Port");
365 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
366 		return ("Prefetchable Memory");
367 	return ("Memory");
368 }
369 
370 /* return log2 of map size decoded for memory or port map */
371 
372 static int
373 pci_mapsize(uint64_t testval)
374 {
375 	int ln2size;
376 
377 	testval = pci_mapbase(testval);
378 	ln2size = 0;
379 	if (testval != 0) {
380 		while ((testval & 1) == 0)
381 		{
382 			ln2size++;
383 			testval >>= 1;
384 		}
385 	}
386 	return (ln2size);
387 }
388 
389 /* return base address of device ROM */
390 
391 static pci_addr_t
392 pci_rombase(uint64_t mapreg)
393 {
394 
395 	return (mapreg & PCIM_BIOS_ADDR_MASK);
396 }
397 
398 /* return log2 of map size decided for device ROM */
399 
400 static int
401 pci_romsize(uint64_t testval)
402 {
403 	int ln2size;
404 
405 	testval = pci_rombase(testval);
406 	ln2size = 0;
407 	if (testval != 0) {
408 		while ((testval & 1) == 0)
409 		{
410 			ln2size++;
411 			testval >>= 1;
412 		}
413 	}
414 	return (ln2size);
415 }
416 
417 /* return log2 of address range supported by map register */
418 
419 static int
420 pci_maprange(uint64_t mapreg)
421 {
422 	int ln2range = 0;
423 
424 	if (PCI_BAR_IO(mapreg))
425 		ln2range = 32;
426 	else
427 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
428 		case PCIM_BAR_MEM_32:
429 			ln2range = 32;
430 			break;
431 		case PCIM_BAR_MEM_1MB:
432 			ln2range = 20;
433 			break;
434 		case PCIM_BAR_MEM_64:
435 			ln2range = 64;
436 			break;
437 		}
438 	return (ln2range);
439 }
440 
441 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
442 
443 static void
444 pci_fixancient(pcicfgregs *cfg)
445 {
446 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
447 		return;
448 
449 	/* PCI to PCI bridges use header type 1 */
450 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
451 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
452 }
453 
454 /* extract header type specific config data */
455 
456 static void
457 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
458 {
459 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
460 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
461 	case PCIM_HDRTYPE_NORMAL:
462 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
463 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
464 		cfg->nummaps	    = PCI_MAXMAPS_0;
465 		break;
466 	case PCIM_HDRTYPE_BRIDGE:
467 		cfg->nummaps	    = PCI_MAXMAPS_1;
468 		break;
469 	case PCIM_HDRTYPE_CARDBUS:
470 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
471 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
472 		cfg->nummaps	    = PCI_MAXMAPS_2;
473 		break;
474 	}
475 #undef REG
476 }
477 
478 /* read configuration header into pcicfgregs structure */
479 struct pci_devinfo *
480 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
481 {
482 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
483 	pcicfgregs *cfg = NULL;
484 	struct pci_devinfo *devlist_entry;
485 	struct devlist *devlist_head;
486 
487 	devlist_head = &pci_devq;
488 
489 	devlist_entry = NULL;
490 
491 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
492 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
493 		if (devlist_entry == NULL)
494 			return (NULL);
495 
496 		cfg = &devlist_entry->cfg;
497 
498 		cfg->domain		= d;
499 		cfg->bus		= b;
500 		cfg->slot		= s;
501 		cfg->func		= f;
502 		cfg->vendor		= REG(PCIR_VENDOR, 2);
503 		cfg->device		= REG(PCIR_DEVICE, 2);
504 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
505 		cfg->statreg		= REG(PCIR_STATUS, 2);
506 		cfg->baseclass		= REG(PCIR_CLASS, 1);
507 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
508 		cfg->progif		= REG(PCIR_PROGIF, 1);
509 		cfg->revid		= REG(PCIR_REVID, 1);
510 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
511 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
512 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
513 		cfg->intpin		= REG(PCIR_INTPIN, 1);
514 		cfg->intline		= REG(PCIR_INTLINE, 1);
515 
516 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
517 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
518 
519 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
520 		cfg->hdrtype		&= ~PCIM_MFDEV;
521 
522 		pci_fixancient(cfg);
523 		pci_hdrtypedata(pcib, b, s, f, cfg);
524 
525 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
526 			pci_read_extcap(pcib, cfg);
527 
528 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
529 
530 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
531 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
532 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
533 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
534 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
535 
536 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
537 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
538 		devlist_entry->conf.pc_vendor = cfg->vendor;
539 		devlist_entry->conf.pc_device = cfg->device;
540 
541 		devlist_entry->conf.pc_class = cfg->baseclass;
542 		devlist_entry->conf.pc_subclass = cfg->subclass;
543 		devlist_entry->conf.pc_progif = cfg->progif;
544 		devlist_entry->conf.pc_revid = cfg->revid;
545 
546 		pci_numdevs++;
547 		pci_generation++;
548 	}
549 	return (devlist_entry);
550 #undef REG
551 }
552 
553 static void
554 pci_read_extcap(device_t pcib, pcicfgregs *cfg)
555 {
556 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
557 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
558 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
559 	uint64_t addr;
560 #endif
561 	uint32_t val;
562 	int	ptr, nextptr, ptrptr;
563 
564 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
565 	case PCIM_HDRTYPE_NORMAL:
566 	case PCIM_HDRTYPE_BRIDGE:
567 		ptrptr = PCIR_CAP_PTR;
568 		break;
569 	case PCIM_HDRTYPE_CARDBUS:
570 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
571 		break;
572 	default:
573 		return;		/* no extended capabilities support */
574 	}
575 	nextptr = REG(ptrptr, 1);	/* sanity check? */
576 
577 	/*
578 	 * Read capability entries.
579 	 */
580 	while (nextptr != 0) {
581 		/* Sanity check */
582 		if (nextptr > 255) {
583 			printf("illegal PCI extended capability offset %d\n",
584 			    nextptr);
585 			return;
586 		}
587 		/* Find the next entry */
588 		ptr = nextptr;
589 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
590 
591 		/* Process this entry */
592 		switch (REG(ptr + PCICAP_ID, 1)) {
593 		case PCIY_PMG:		/* PCI power management */
594 			if (cfg->pp.pp_cap == 0) {
595 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
596 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
597 				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
598 				if ((nextptr - ptr) > PCIR_POWER_DATA)
599 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
600 			}
601 			break;
602 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
603 		case PCIY_HT:		/* HyperTransport */
604 			/* Determine HT-specific capability type. */
605 			val = REG(ptr + PCIR_HT_COMMAND, 2);
606 			switch (val & PCIM_HTCMD_CAP_MASK) {
607 			case PCIM_HTCAP_MSI_MAPPING:
608 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
609 					/* Sanity check the mapping window. */
610 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
611 					    4);
612 					addr <<= 32;
613 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
614 					    4);
615 					if (addr != MSI_INTEL_ADDR_BASE)
616 						device_printf(pcib,
617 	    "HT Bridge at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
618 						    cfg->domain, cfg->bus,
619 						    cfg->slot, cfg->func,
620 						    (long long)addr);
621 				} else
622 					addr = MSI_INTEL_ADDR_BASE;
623 
624 				cfg->ht.ht_msimap = ptr;
625 				cfg->ht.ht_msictrl = val;
626 				cfg->ht.ht_msiaddr = addr;
627 				break;
628 			}
629 			break;
630 #endif
631 		case PCIY_MSI:		/* PCI MSI */
632 			cfg->msi.msi_location = ptr;
633 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
634 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
635 						     PCIM_MSICTRL_MMC_MASK)>>1);
636 			break;
637 		case PCIY_MSIX:		/* PCI MSI-X */
638 			cfg->msix.msix_location = ptr;
639 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
640 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
641 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
642 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
643 			cfg->msix.msix_table_bar = PCIR_BAR(val &
644 			    PCIM_MSIX_BIR_MASK);
645 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
646 			val = REG(ptr + PCIR_MSIX_PBA, 4);
647 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
648 			    PCIM_MSIX_BIR_MASK);
649 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
650 			break;
651 		case PCIY_VPD:		/* PCI Vital Product Data */
652 			cfg->vpd.vpd_reg = ptr;
653 			break;
654 		case PCIY_SUBVENDOR:
655 			/* Should always be true. */
656 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
657 			    PCIM_HDRTYPE_BRIDGE) {
658 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
659 				cfg->subvendor = val & 0xffff;
660 				cfg->subdevice = val >> 16;
661 			}
662 			break;
663 		case PCIY_PCIX:		/* PCI-X */
664 			/*
665 			 * Assume we have a PCI-X chipset if we have
666 			 * at least one PCI-PCI bridge with a PCI-X
667 			 * capability.  Note that some systems with
668 			 * PCI-express or HT chipsets might match on
669 			 * this check as well.
670 			 */
671 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
672 			    PCIM_HDRTYPE_BRIDGE)
673 				pcix_chipset = 1;
674 			break;
675 		case PCIY_EXPRESS:	/* PCI-express */
676 			/*
677 			 * Assume we have a PCI-express chipset if we have
678 			 * at least one PCI-express device.
679 			 */
680 			pcie_chipset = 1;
681 			break;
682 		default:
683 			break;
684 		}
685 	}
686 /* REG and WREG use carry through to next functions */
687 }
688 
689 /*
690  * PCI Vital Product Data
691  */
692 
693 #define	PCI_VPD_TIMEOUT		1000000
694 
695 static int
696 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
697 {
698 	int count = PCI_VPD_TIMEOUT;
699 
700 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
701 
702 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
703 
704 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
705 		if (--count < 0)
706 			return (ENXIO);
707 		DELAY(1);	/* limit looping */
708 	}
709 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
710 
711 	return (0);
712 }
713 
714 #if 0
715 static int
716 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
717 {
718 	int count = PCI_VPD_TIMEOUT;
719 
720 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
721 
722 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
723 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
724 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
725 		if (--count < 0)
726 			return (ENXIO);
727 		DELAY(1);	/* limit looping */
728 	}
729 
730 	return (0);
731 }
732 #endif
733 
734 #undef PCI_VPD_TIMEOUT
735 
736 struct vpd_readstate {
737 	device_t	pcib;
738 	pcicfgregs	*cfg;
739 	uint32_t	val;
740 	int		bytesinval;
741 	int		off;
742 	uint8_t		cksum;
743 };
744 
745 static int
746 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
747 {
748 	uint32_t reg;
749 	uint8_t byte;
750 
751 	if (vrs->bytesinval == 0) {
752 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
753 			return (ENXIO);
754 		vrs->val = le32toh(reg);
755 		vrs->off += 4;
756 		byte = vrs->val & 0xff;
757 		vrs->bytesinval = 3;
758 	} else {
759 		vrs->val = vrs->val >> 8;
760 		byte = vrs->val & 0xff;
761 		vrs->bytesinval--;
762 	}
763 
764 	vrs->cksum += byte;
765 	*data = byte;
766 	return (0);
767 }
768 
769 static void
770 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
771 {
772 	struct vpd_readstate vrs;
773 	int state;
774 	int name;
775 	int remain;
776 	int i;
777 	int alloc, off;		/* alloc/off for RO/W arrays */
778 	int cksumvalid;
779 	int dflen;
780 	uint8_t byte;
781 	uint8_t byte2;
782 
783 	/* init vpd reader */
784 	vrs.bytesinval = 0;
785 	vrs.off = 0;
786 	vrs.pcib = pcib;
787 	vrs.cfg = cfg;
788 	vrs.cksum = 0;
789 
790 	state = 0;
791 	name = remain = i = 0;	/* shut up stupid gcc */
792 	alloc = off = 0;	/* shut up stupid gcc */
793 	dflen = 0;		/* shut up stupid gcc */
794 	cksumvalid = -1;
795 	while (state >= 0) {
796 		if (vpd_nextbyte(&vrs, &byte)) {
797 			state = -2;
798 			break;
799 		}
800 #if 0
801 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
802 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
803 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
804 #endif
805 		switch (state) {
806 		case 0:		/* item name */
807 			if (byte & 0x80) {
808 				if (vpd_nextbyte(&vrs, &byte2)) {
809 					state = -2;
810 					break;
811 				}
812 				remain = byte2;
813 				if (vpd_nextbyte(&vrs, &byte2)) {
814 					state = -2;
815 					break;
816 				}
817 				remain |= byte2 << 8;
818 				if (remain > (0x7f*4 - vrs.off)) {
819 					state = -1;
820 					printf(
821 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
822 					    cfg->domain, cfg->bus, cfg->slot,
823 					    cfg->func, remain);
824 				}
825 				name = byte & 0x7f;
826 			} else {
827 				remain = byte & 0x7;
828 				name = (byte >> 3) & 0xf;
829 			}
830 			switch (name) {
831 			case 0x2:	/* String */
832 				cfg->vpd.vpd_ident = malloc(remain + 1,
833 				    M_DEVBUF, M_WAITOK);
834 				i = 0;
835 				state = 1;
836 				break;
837 			case 0xf:	/* End */
838 				state = -1;
839 				break;
840 			case 0x10:	/* VPD-R */
841 				alloc = 8;
842 				off = 0;
843 				cfg->vpd.vpd_ros = malloc(alloc *
844 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
845 				    M_WAITOK | M_ZERO);
846 				state = 2;
847 				break;
848 			case 0x11:	/* VPD-W */
849 				alloc = 8;
850 				off = 0;
851 				cfg->vpd.vpd_w = malloc(alloc *
852 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
853 				    M_WAITOK | M_ZERO);
854 				state = 5;
855 				break;
856 			default:	/* Invalid data, abort */
857 				state = -1;
858 				break;
859 			}
860 			break;
861 
862 		case 1:	/* Identifier String */
863 			cfg->vpd.vpd_ident[i++] = byte;
864 			remain--;
865 			if (remain == 0)  {
866 				cfg->vpd.vpd_ident[i] = '\0';
867 				state = 0;
868 			}
869 			break;
870 
871 		case 2:	/* VPD-R Keyword Header */
872 			if (off == alloc) {
873 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
874 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
875 				    M_DEVBUF, M_WAITOK | M_ZERO);
876 			}
877 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
878 			if (vpd_nextbyte(&vrs, &byte2)) {
879 				state = -2;
880 				break;
881 			}
882 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
883 			if (vpd_nextbyte(&vrs, &byte2)) {
884 				state = -2;
885 				break;
886 			}
887 			dflen = byte2;
888 			if (dflen == 0 &&
889 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
890 			    2) == 0) {
891 				/*
892 				 * if this happens, we can't trust the rest
893 				 * of the VPD.
894 				 */
895 				printf(
896 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
897 				    cfg->domain, cfg->bus, cfg->slot,
898 				    cfg->func, dflen);
899 				cksumvalid = 0;
900 				state = -1;
901 				break;
902 			} else if (dflen == 0) {
903 				cfg->vpd.vpd_ros[off].value = malloc(1 *
904 				    sizeof(*cfg->vpd.vpd_ros[off].value),
905 				    M_DEVBUF, M_WAITOK);
906 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
907 			} else
908 				cfg->vpd.vpd_ros[off].value = malloc(
909 				    (dflen + 1) *
910 				    sizeof(*cfg->vpd.vpd_ros[off].value),
911 				    M_DEVBUF, M_WAITOK);
912 			remain -= 3;
913 			i = 0;
914 			/* keep in sync w/ state 3's transistions */
915 			if (dflen == 0 && remain == 0)
916 				state = 0;
917 			else if (dflen == 0)
918 				state = 2;
919 			else
920 				state = 3;
921 			break;
922 
923 		case 3:	/* VPD-R Keyword Value */
924 			cfg->vpd.vpd_ros[off].value[i++] = byte;
925 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
926 			    "RV", 2) == 0 && cksumvalid == -1) {
927 				if (vrs.cksum == 0)
928 					cksumvalid = 1;
929 				else {
930 					if (bootverbose)
931 						printf(
932 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
933 						    cfg->domain, cfg->bus,
934 						    cfg->slot, cfg->func,
935 						    vrs.cksum);
936 					cksumvalid = 0;
937 					state = -1;
938 					break;
939 				}
940 			}
941 			dflen--;
942 			remain--;
943 			/* keep in sync w/ state 2's transistions */
944 			if (dflen == 0)
945 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
946 			if (dflen == 0 && remain == 0) {
947 				cfg->vpd.vpd_rocnt = off;
948 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
949 				    off * sizeof(*cfg->vpd.vpd_ros),
950 				    M_DEVBUF, M_WAITOK | M_ZERO);
951 				state = 0;
952 			} else if (dflen == 0)
953 				state = 2;
954 			break;
955 
956 		case 4:
957 			remain--;
958 			if (remain == 0)
959 				state = 0;
960 			break;
961 
962 		case 5:	/* VPD-W Keyword Header */
963 			if (off == alloc) {
964 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
965 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
966 				    M_DEVBUF, M_WAITOK | M_ZERO);
967 			}
968 			cfg->vpd.vpd_w[off].keyword[0] = byte;
969 			if (vpd_nextbyte(&vrs, &byte2)) {
970 				state = -2;
971 				break;
972 			}
973 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
974 			if (vpd_nextbyte(&vrs, &byte2)) {
975 				state = -2;
976 				break;
977 			}
978 			cfg->vpd.vpd_w[off].len = dflen = byte2;
979 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
980 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
981 			    sizeof(*cfg->vpd.vpd_w[off].value),
982 			    M_DEVBUF, M_WAITOK);
983 			remain -= 3;
984 			i = 0;
985 			/* keep in sync w/ state 6's transistions */
986 			if (dflen == 0 && remain == 0)
987 				state = 0;
988 			else if (dflen == 0)
989 				state = 5;
990 			else
991 				state = 6;
992 			break;
993 
994 		case 6:	/* VPD-W Keyword Value */
995 			cfg->vpd.vpd_w[off].value[i++] = byte;
996 			dflen--;
997 			remain--;
998 			/* keep in sync w/ state 5's transistions */
999 			if (dflen == 0)
1000 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1001 			if (dflen == 0 && remain == 0) {
1002 				cfg->vpd.vpd_wcnt = off;
1003 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1004 				    off * sizeof(*cfg->vpd.vpd_w),
1005 				    M_DEVBUF, M_WAITOK | M_ZERO);
1006 				state = 0;
1007 			} else if (dflen == 0)
1008 				state = 5;
1009 			break;
1010 
1011 		default:
1012 			printf("pci%d:%d:%d:%d: invalid state: %d\n",
1013 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1014 			    state);
1015 			state = -1;
1016 			break;
1017 		}
1018 	}
1019 
1020 	if (cksumvalid == 0 || state < -1) {
1021 		/* read-only data bad, clean up */
1022 		if (cfg->vpd.vpd_ros != NULL) {
1023 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1024 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1025 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1026 			cfg->vpd.vpd_ros = NULL;
1027 		}
1028 	}
1029 	if (state < -1) {
1030 		/* I/O error, clean up */
1031 		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1032 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1033 		if (cfg->vpd.vpd_ident != NULL) {
1034 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1035 			cfg->vpd.vpd_ident = NULL;
1036 		}
1037 		if (cfg->vpd.vpd_w != NULL) {
1038 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1039 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1040 			free(cfg->vpd.vpd_w, M_DEVBUF);
1041 			cfg->vpd.vpd_w = NULL;
1042 		}
1043 	}
1044 	cfg->vpd.vpd_cached = 1;
1045 #undef REG
1046 #undef WREG
1047 }
1048 
1049 int
1050 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1051 {
1052 	struct pci_devinfo *dinfo = device_get_ivars(child);
1053 	pcicfgregs *cfg = &dinfo->cfg;
1054 
1055 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1056 		pci_read_vpd(device_get_parent(dev), cfg);
1057 
1058 	*identptr = cfg->vpd.vpd_ident;
1059 
1060 	if (*identptr == NULL)
1061 		return (ENXIO);
1062 
1063 	return (0);
1064 }
1065 
1066 int
1067 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1068 	const char **vptr)
1069 {
1070 	struct pci_devinfo *dinfo = device_get_ivars(child);
1071 	pcicfgregs *cfg = &dinfo->cfg;
1072 	int i;
1073 
1074 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1075 		pci_read_vpd(device_get_parent(dev), cfg);
1076 
1077 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1078 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1079 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1080 			*vptr = cfg->vpd.vpd_ros[i].value;
1081 		}
1082 
1083 	if (i != cfg->vpd.vpd_rocnt)
1084 		return (0);
1085 
1086 	*vptr = NULL;
1087 	return (ENXIO);
1088 }
1089 
1090 /*
1091  * Find the requested extended capability and return the offset in
1092  * configuration space via the pointer provided. The function returns
1093  * 0 on success and error code otherwise.
1094  */
1095 int
1096 pci_find_extcap_method(device_t dev, device_t child, int capability,
1097     int *capreg)
1098 {
1099 	struct pci_devinfo *dinfo = device_get_ivars(child);
1100 	pcicfgregs *cfg = &dinfo->cfg;
1101 	u_int32_t status;
1102 	u_int8_t ptr;
1103 
1104 	/*
1105 	 * Check the CAP_LIST bit of the PCI status register first.
1106 	 */
1107 	status = pci_read_config(child, PCIR_STATUS, 2);
1108 	if (!(status & PCIM_STATUS_CAPPRESENT))
1109 		return (ENXIO);
1110 
1111 	/*
1112 	 * Determine the start pointer of the capabilities list.
1113 	 */
1114 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1115 	case PCIM_HDRTYPE_NORMAL:
1116 	case PCIM_HDRTYPE_BRIDGE:
1117 		ptr = PCIR_CAP_PTR;
1118 		break;
1119 	case PCIM_HDRTYPE_CARDBUS:
1120 		ptr = PCIR_CAP_PTR_2;
1121 		break;
1122 	default:
1123 		/* XXX: panic? */
1124 		return (ENXIO);		/* no extended capabilities support */
1125 	}
1126 	ptr = pci_read_config(child, ptr, 1);
1127 
1128 	/*
1129 	 * Traverse the capabilities list.
1130 	 */
1131 	while (ptr != 0) {
1132 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1133 			if (capreg != NULL)
1134 				*capreg = ptr;
1135 			return (0);
1136 		}
1137 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1138 	}
1139 
1140 	return (ENOENT);
1141 }
1142 
1143 /*
1144  * Support for MSI-X message interrupts.
1145  */
1146 void
1147 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1148 {
1149 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1150 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1151 	uint32_t offset;
1152 
1153 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1154 	offset = msix->msix_table_offset + index * 16;
1155 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1156 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1157 	bus_write_4(msix->msix_table_res, offset + 8, data);
1158 
1159 	/* Enable MSI -> HT mapping. */
1160 	pci_ht_map_msi(dev, address);
1161 }
1162 
1163 void
1164 pci_mask_msix(device_t dev, u_int index)
1165 {
1166 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1167 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1168 	uint32_t offset, val;
1169 
1170 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1171 	offset = msix->msix_table_offset + index * 16 + 12;
1172 	val = bus_read_4(msix->msix_table_res, offset);
1173 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1174 		val |= PCIM_MSIX_VCTRL_MASK;
1175 		bus_write_4(msix->msix_table_res, offset, val);
1176 	}
1177 }
1178 
1179 void
1180 pci_unmask_msix(device_t dev, u_int index)
1181 {
1182 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1183 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1184 	uint32_t offset, val;
1185 
1186 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1187 	offset = msix->msix_table_offset + index * 16 + 12;
1188 	val = bus_read_4(msix->msix_table_res, offset);
1189 	if (val & PCIM_MSIX_VCTRL_MASK) {
1190 		val &= ~PCIM_MSIX_VCTRL_MASK;
1191 		bus_write_4(msix->msix_table_res, offset, val);
1192 	}
1193 }
1194 
1195 int
1196 pci_pending_msix(device_t dev, u_int index)
1197 {
1198 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1199 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1200 	uint32_t offset, bit;
1201 
1202 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1203 	offset = msix->msix_pba_offset + (index / 32) * 4;
1204 	bit = 1 << index % 32;
1205 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1206 }
1207 
1208 /*
1209  * Restore MSI-X registers and table during resume.  If MSI-X is
1210  * enabled then walk the virtual table to restore the actual MSI-X
1211  * table.
1212  */
1213 static void
1214 pci_resume_msix(device_t dev)
1215 {
1216 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1217 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1218 	struct msix_table_entry *mte;
1219 	struct msix_vector *mv;
1220 	int i;
1221 
1222 	if (msix->msix_alloc > 0) {
1223 		/* First, mask all vectors. */
1224 		for (i = 0; i < msix->msix_msgnum; i++)
1225 			pci_mask_msix(dev, i);
1226 
1227 		/* Second, program any messages with at least one handler. */
1228 		for (i = 0; i < msix->msix_table_len; i++) {
1229 			mte = &msix->msix_table[i];
1230 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1231 				continue;
1232 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1233 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1234 			pci_unmask_msix(dev, i);
1235 		}
1236 	}
1237 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1238 	    msix->msix_ctrl, 2);
1239 }
1240 
1241 /*
1242  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1243  * returned in *count.  After this function returns, each message will be
1244  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1245  */
1246 int
1247 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1248 {
1249 	struct pci_devinfo *dinfo = device_get_ivars(child);
1250 	pcicfgregs *cfg = &dinfo->cfg;
1251 	struct resource_list_entry *rle;
1252 	int actual, error, i, irq, max;
1253 
1254 	/* Don't let count == 0 get us into trouble. */
1255 	if (*count == 0)
1256 		return (EINVAL);
1257 
1258 	/* If rid 0 is allocated, then fail. */
1259 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1260 	if (rle != NULL && rle->res != NULL)
1261 		return (ENXIO);
1262 
1263 	/* Already have allocated messages? */
1264 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1265 		return (ENXIO);
1266 
1267 	/* If MSI is blacklisted for this system, fail. */
1268 	if (pci_msi_blacklisted())
1269 		return (ENXIO);
1270 
1271 	/* MSI-X capability present? */
1272 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1273 		return (ENODEV);
1274 
1275 	/* Make sure the appropriate BARs are mapped. */
1276 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1277 	    cfg->msix.msix_table_bar);
1278 	if (rle == NULL || rle->res == NULL ||
1279 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1280 		return (ENXIO);
1281 	cfg->msix.msix_table_res = rle->res;
1282 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1283 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1284 		    cfg->msix.msix_pba_bar);
1285 		if (rle == NULL || rle->res == NULL ||
1286 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1287 			return (ENXIO);
1288 	}
1289 	cfg->msix.msix_pba_res = rle->res;
1290 
1291 	if (bootverbose)
1292 		device_printf(child,
1293 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1294 		    *count, cfg->msix.msix_msgnum);
1295 	max = min(*count, cfg->msix.msix_msgnum);
1296 	for (i = 0; i < max; i++) {
1297 		/* Allocate a message. */
1298 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1299 		if (error)
1300 			break;
1301 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1302 		    irq, 1);
1303 	}
1304 	actual = i;
1305 
1306 	if (bootverbose) {
1307 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1308 		if (actual == 1)
1309 			device_printf(child, "using IRQ %lu for MSI-X\n",
1310 			    rle->start);
1311 		else {
1312 			int run;
1313 
1314 			/*
1315 			 * Be fancy and try to print contiguous runs of
1316 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1317 			 * 'run' is true if we are in a range.
1318 			 */
1319 			device_printf(child, "using IRQs %lu", rle->start);
1320 			irq = rle->start;
1321 			run = 0;
1322 			for (i = 1; i < actual; i++) {
1323 				rle = resource_list_find(&dinfo->resources,
1324 				    SYS_RES_IRQ, i + 1);
1325 
1326 				/* Still in a run? */
1327 				if (rle->start == irq + 1) {
1328 					run = 1;
1329 					irq++;
1330 					continue;
1331 				}
1332 
1333 				/* Finish previous range. */
1334 				if (run) {
1335 					printf("-%d", irq);
1336 					run = 0;
1337 				}
1338 
1339 				/* Start new range. */
1340 				printf(",%lu", rle->start);
1341 				irq = rle->start;
1342 			}
1343 
1344 			/* Unfinished range? */
1345 			if (run)
1346 				printf("-%d", irq);
1347 			printf(" for MSI-X\n");
1348 		}
1349 	}
1350 
1351 	/* Mask all vectors. */
1352 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1353 		pci_mask_msix(child, i);
1354 
1355 	/* Allocate and initialize vector data and virtual table. */
1356 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1357 	    M_DEVBUF, M_WAITOK | M_ZERO);
1358 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1359 	    M_DEVBUF, M_WAITOK | M_ZERO);
1360 	for (i = 0; i < actual; i++) {
1361 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1362 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1363 		cfg->msix.msix_table[i].mte_vector = i + 1;
1364 	}
1365 
1366 	/* Update control register to enable MSI-X. */
1367 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1368 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1369 	    cfg->msix.msix_ctrl, 2);
1370 
1371 	/* Update counts of alloc'd messages. */
1372 	cfg->msix.msix_alloc = actual;
1373 	cfg->msix.msix_table_len = actual;
1374 	*count = actual;
1375 	return (0);
1376 }
1377 
1378 /*
1379  * By default, pci_alloc_msix() will assign the allocated IRQ
1380  * resources consecutively to the first N messages in the MSI-X table.
1381  * However, device drivers may want to use different layouts if they
1382  * either receive fewer messages than they asked for, or they wish to
1383  * populate the MSI-X table sparsely.  This method allows the driver
1384  * to specify what layout it wants.  It must be called after a
1385  * successful pci_alloc_msix() but before any of the associated
1386  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1387  *
1388  * The 'vectors' array contains 'count' message vectors.  The array
1389  * maps directly to the MSI-X table in that index 0 in the array
1390  * specifies the vector for the first message in the MSI-X table, etc.
1391  * The vector value in each array index can either be 0 to indicate
1392  * that no vector should be assigned to a message slot, or it can be a
1393  * number from 1 to N (where N is the count returned from a
1394  * succcessful call to pci_alloc_msix()) to indicate which message
1395  * vector (IRQ) to be used for the corresponding message.
1396  *
1397  * On successful return, each message with a non-zero vector will have
1398  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1399  * 1.  Additionally, if any of the IRQs allocated via the previous
1400  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1401  * will be freed back to the system automatically.
1402  *
1403  * For example, suppose a driver has a MSI-X table with 6 messages and
1404  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1405  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1406  * C.  After the call to pci_alloc_msix(), the device will be setup to
1407  * have an MSI-X table of ABC--- (where - means no vector assigned).
1408  * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1409  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1410  * be freed back to the system.  This device will also have valid
1411  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1412  *
1413  * In any case, the SYS_RES_IRQ rid X will always map to the message
1414  * at MSI-X table index X - 1 and will only be valid if a vector is
1415  * assigned to that table entry.
1416  */
1417 int
1418 pci_remap_msix_method(device_t dev, device_t child, int count,
1419     const u_int *vectors)
1420 {
1421 	struct pci_devinfo *dinfo = device_get_ivars(child);
1422 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1423 	struct resource_list_entry *rle;
1424 	int i, irq, j, *used;
1425 
1426 	/*
1427 	 * Have to have at least one message in the table but the
1428 	 * table can't be bigger than the actual MSI-X table in the
1429 	 * device.
1430 	 */
1431 	if (count == 0 || count > msix->msix_msgnum)
1432 		return (EINVAL);
1433 
1434 	/* Sanity check the vectors. */
1435 	for (i = 0; i < count; i++)
1436 		if (vectors[i] > msix->msix_alloc)
1437 			return (EINVAL);
1438 
1439 	/*
1440 	 * Make sure there aren't any holes in the vectors to be used.
1441 	 * It's a big pain to support it, and it doesn't really make
1442 	 * sense anyway.  Also, at least one vector must be used.
1443 	 */
1444 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1445 	    M_ZERO);
1446 	for (i = 0; i < count; i++)
1447 		if (vectors[i] != 0)
1448 			used[vectors[i] - 1] = 1;
1449 	for (i = 0; i < msix->msix_alloc - 1; i++)
1450 		if (used[i] == 0 && used[i + 1] == 1) {
1451 			free(used, M_DEVBUF);
1452 			return (EINVAL);
1453 		}
1454 	if (used[0] != 1) {
1455 		free(used, M_DEVBUF);
1456 		return (EINVAL);
1457 	}
1458 
1459 	/* Make sure none of the resources are allocated. */
1460 	for (i = 0; i < msix->msix_table_len; i++) {
1461 		if (msix->msix_table[i].mte_vector == 0)
1462 			continue;
1463 		if (msix->msix_table[i].mte_handlers > 0)
1464 			return (EBUSY);
1465 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1466 		KASSERT(rle != NULL, ("missing resource"));
1467 		if (rle->res != NULL)
1468 			return (EBUSY);
1469 	}
1470 
1471 	/* Free the existing resource list entries. */
1472 	for (i = 0; i < msix->msix_table_len; i++) {
1473 		if (msix->msix_table[i].mte_vector == 0)
1474 			continue;
1475 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1476 	}
1477 
1478 	/*
1479 	 * Build the new virtual table keeping track of which vectors are
1480 	 * used.
1481 	 */
1482 	free(msix->msix_table, M_DEVBUF);
1483 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1484 	    M_DEVBUF, M_WAITOK | M_ZERO);
1485 	for (i = 0; i < count; i++)
1486 		msix->msix_table[i].mte_vector = vectors[i];
1487 	msix->msix_table_len = count;
1488 
1489 	/* Free any unused IRQs and resize the vectors array if necessary. */
1490 	j = msix->msix_alloc - 1;
1491 	if (used[j] == 0) {
1492 		struct msix_vector *vec;
1493 
1494 		while (used[j] == 0) {
1495 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1496 			    msix->msix_vectors[j].mv_irq);
1497 			j--;
1498 		}
1499 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1500 		    M_WAITOK);
1501 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1502 		    (j + 1));
1503 		free(msix->msix_vectors, M_DEVBUF);
1504 		msix->msix_vectors = vec;
1505 		msix->msix_alloc = j + 1;
1506 	}
1507 	free(used, M_DEVBUF);
1508 
1509 	/* Map the IRQs onto the rids. */
1510 	for (i = 0; i < count; i++) {
1511 		if (vectors[i] == 0)
1512 			continue;
1513 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1514 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1515 		    irq, 1);
1516 	}
1517 
1518 	if (bootverbose) {
1519 		device_printf(child, "Remapped MSI-X IRQs as: ");
1520 		for (i = 0; i < count; i++) {
1521 			if (i != 0)
1522 				printf(", ");
1523 			if (vectors[i] == 0)
1524 				printf("---");
1525 			else
1526 				printf("%d",
1527 				    msix->msix_vectors[vectors[i]].mv_irq);
1528 		}
1529 		printf("\n");
1530 	}
1531 
1532 	return (0);
1533 }
1534 
1535 static int
1536 pci_release_msix(device_t dev, device_t child)
1537 {
1538 	struct pci_devinfo *dinfo = device_get_ivars(child);
1539 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1540 	struct resource_list_entry *rle;
1541 	int i;
1542 
1543 	/* Do we have any messages to release? */
1544 	if (msix->msix_alloc == 0)
1545 		return (ENODEV);
1546 
1547 	/* Make sure none of the resources are allocated. */
1548 	for (i = 0; i < msix->msix_table_len; i++) {
1549 		if (msix->msix_table[i].mte_vector == 0)
1550 			continue;
1551 		if (msix->msix_table[i].mte_handlers > 0)
1552 			return (EBUSY);
1553 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1554 		KASSERT(rle != NULL, ("missing resource"));
1555 		if (rle->res != NULL)
1556 			return (EBUSY);
1557 	}
1558 
1559 	/* Update control register to disable MSI-X. */
1560 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1561 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1562 	    msix->msix_ctrl, 2);
1563 
1564 	/* Free the resource list entries. */
1565 	for (i = 0; i < msix->msix_table_len; i++) {
1566 		if (msix->msix_table[i].mte_vector == 0)
1567 			continue;
1568 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1569 	}
1570 	free(msix->msix_table, M_DEVBUF);
1571 	msix->msix_table_len = 0;
1572 
1573 	/* Release the IRQs. */
1574 	for (i = 0; i < msix->msix_alloc; i++)
1575 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1576 		    msix->msix_vectors[i].mv_irq);
1577 	free(msix->msix_vectors, M_DEVBUF);
1578 	msix->msix_alloc = 0;
1579 	return (0);
1580 }
1581 
1582 /*
1583  * Return the max supported MSI-X messages this device supports.
1584  * Basically, assuming the MD code can alloc messages, this function
1585  * should return the maximum value that pci_alloc_msix() can return.
1586  * Thus, it is subject to the tunables, etc.
1587  */
1588 int
1589 pci_msix_count_method(device_t dev, device_t child)
1590 {
1591 	struct pci_devinfo *dinfo = device_get_ivars(child);
1592 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1593 
1594 	if (pci_do_msix && msix->msix_location != 0)
1595 		return (msix->msix_msgnum);
1596 	return (0);
1597 }
1598 
1599 /*
1600  * HyperTransport MSI mapping control
1601  */
1602 void
1603 pci_ht_map_msi(device_t dev, uint64_t addr)
1604 {
1605 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1606 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1607 
1608 	if (!ht->ht_msimap)
1609 		return;
1610 
1611 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1612 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1613 		/* Enable MSI -> HT mapping. */
1614 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1615 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1616 		    ht->ht_msictrl, 2);
1617 	}
1618 
1619 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1620 		/* Disable MSI -> HT mapping. */
1621 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1622 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1623 		    ht->ht_msictrl, 2);
1624 	}
1625 }
1626 
1627 int
1628 pci_get_max_read_req(device_t dev)
1629 {
1630 	int cap;
1631 	uint16_t val;
1632 
1633 	if (pci_find_extcap(dev, PCIY_EXPRESS, &cap) != 0)
1634 		return (0);
1635 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1636 	val &= PCIM_EXP_CTL_MAX_READ_REQUEST;
1637 	val >>= 12;
1638 	return (1 << (val + 7));
1639 }
1640 
1641 int
1642 pci_set_max_read_req(device_t dev, int size)
1643 {
1644 	int cap;
1645 	uint16_t val;
1646 
1647 	if (pci_find_extcap(dev, PCIY_EXPRESS, &cap) != 0)
1648 		return (0);
1649 	if (size < 128)
1650 		size = 128;
1651 	if (size > 4096)
1652 		size = 4096;
1653 	size = (1 << (fls(size) - 1));
1654 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1655 	val &= ~PCIM_EXP_CTL_MAX_READ_REQUEST;
1656 	val |= (fls(size) - 8) << 12;
1657 	pci_write_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, val, 2);
1658 	return (size);
1659 }
1660 
1661 /*
1662  * Support for MSI message signalled interrupts.
1663  */
1664 void
1665 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1666 {
1667 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1668 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1669 
1670 	/* Write data and address values. */
1671 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1672 	    address & 0xffffffff, 4);
1673 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1674 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1675 		    address >> 32, 4);
1676 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1677 		    data, 2);
1678 	} else
1679 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1680 		    2);
1681 
1682 	/* Enable MSI in the control register. */
1683 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1684 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1685 	    2);
1686 
1687 	/* Enable MSI -> HT mapping. */
1688 	pci_ht_map_msi(dev, address);
1689 }
1690 
1691 void
1692 pci_disable_msi(device_t dev)
1693 {
1694 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1695 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1696 
1697 	/* Disable MSI -> HT mapping. */
1698 	pci_ht_map_msi(dev, 0);
1699 
1700 	/* Disable MSI in the control register. */
1701 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1702 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1703 	    2);
1704 }
1705 
1706 /*
1707  * Restore MSI registers during resume.  If MSI is enabled then
1708  * restore the data and address registers in addition to the control
1709  * register.
1710  */
1711 static void
1712 pci_resume_msi(device_t dev)
1713 {
1714 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1715 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1716 	uint64_t address;
1717 	uint16_t data;
1718 
1719 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1720 		address = msi->msi_addr;
1721 		data = msi->msi_data;
1722 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1723 		    address & 0xffffffff, 4);
1724 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1725 			pci_write_config(dev, msi->msi_location +
1726 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1727 			pci_write_config(dev, msi->msi_location +
1728 			    PCIR_MSI_DATA_64BIT, data, 2);
1729 		} else
1730 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1731 			    data, 2);
1732 	}
1733 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1734 	    2);
1735 }
1736 
1737 static int
1738 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1739 {
1740 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1741 	pcicfgregs *cfg = &dinfo->cfg;
1742 	struct resource_list_entry *rle;
1743 	struct msix_table_entry *mte;
1744 	struct msix_vector *mv;
1745 	uint64_t addr;
1746 	uint32_t data;
1747 	int error, i, j;
1748 
1749 	/*
1750 	 * Handle MSI first.  We try to find this IRQ among our list
1751 	 * of MSI IRQs.  If we find it, we request updated address and
1752 	 * data registers and apply the results.
1753 	 */
1754 	if (cfg->msi.msi_alloc > 0) {
1755 
1756 		/* If we don't have any active handlers, nothing to do. */
1757 		if (cfg->msi.msi_handlers == 0)
1758 			return (0);
1759 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1760 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1761 			    i + 1);
1762 			if (rle->start == irq) {
1763 				error = PCIB_MAP_MSI(device_get_parent(bus),
1764 				    dev, irq, &addr, &data);
1765 				if (error)
1766 					return (error);
1767 				pci_disable_msi(dev);
1768 				dinfo->cfg.msi.msi_addr = addr;
1769 				dinfo->cfg.msi.msi_data = data;
1770 				pci_enable_msi(dev, addr, data);
1771 				return (0);
1772 			}
1773 		}
1774 		return (ENOENT);
1775 	}
1776 
1777 	/*
1778 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1779 	 * we request the updated mapping info.  If that works, we go
1780 	 * through all the slots that use this IRQ and update them.
1781 	 */
1782 	if (cfg->msix.msix_alloc > 0) {
1783 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1784 			mv = &cfg->msix.msix_vectors[i];
1785 			if (mv->mv_irq == irq) {
1786 				error = PCIB_MAP_MSI(device_get_parent(bus),
1787 				    dev, irq, &addr, &data);
1788 				if (error)
1789 					return (error);
1790 				mv->mv_address = addr;
1791 				mv->mv_data = data;
1792 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1793 					mte = &cfg->msix.msix_table[j];
1794 					if (mte->mte_vector != i + 1)
1795 						continue;
1796 					if (mte->mte_handlers == 0)
1797 						continue;
1798 					pci_mask_msix(dev, j);
1799 					pci_enable_msix(dev, j, addr, data);
1800 					pci_unmask_msix(dev, j);
1801 				}
1802 			}
1803 		}
1804 		return (ENOENT);
1805 	}
1806 
1807 	return (ENOENT);
1808 }
1809 
1810 /*
1811  * Returns true if the specified device is blacklisted because MSI
1812  * doesn't work.
1813  */
1814 int
1815 pci_msi_device_blacklisted(device_t dev)
1816 {
1817 	struct pci_quirk *q;
1818 
1819 	if (!pci_honor_msi_blacklist)
1820 		return (0);
1821 
1822 	for (q = &pci_quirks[0]; q->devid; q++) {
1823 		if (q->devid == pci_get_devid(dev) &&
1824 		    q->type == PCI_QUIRK_DISABLE_MSI)
1825 			return (1);
1826 	}
1827 	return (0);
1828 }
1829 
1830 /*
1831  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1832  * we just check for blacklisted chipsets as represented by the
1833  * host-PCI bridge at device 0:0:0.  In the future, it may become
1834  * necessary to check other system attributes, such as the kenv values
1835  * that give the motherboard manufacturer and model number.
1836  */
1837 static int
1838 pci_msi_blacklisted(void)
1839 {
1840 	device_t dev;
1841 
1842 	if (!pci_honor_msi_blacklist)
1843 		return (0);
1844 
1845 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1846 	if (!(pcie_chipset || pcix_chipset))
1847 		return (1);
1848 
1849 	dev = pci_find_bsf(0, 0, 0);
1850 	if (dev != NULL)
1851 		return (pci_msi_device_blacklisted(dev));
1852 	return (0);
1853 }
1854 
1855 /*
1856  * Attempt to allocate *count MSI messages.  The actual number allocated is
1857  * returned in *count.  After this function returns, each message will be
1858  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1859  */
1860 int
1861 pci_alloc_msi_method(device_t dev, device_t child, int *count)
1862 {
1863 	struct pci_devinfo *dinfo = device_get_ivars(child);
1864 	pcicfgregs *cfg = &dinfo->cfg;
1865 	struct resource_list_entry *rle;
1866 	int actual, error, i, irqs[32];
1867 	uint16_t ctrl;
1868 
1869 	/* Don't let count == 0 get us into trouble. */
1870 	if (*count == 0)
1871 		return (EINVAL);
1872 
1873 	/* If rid 0 is allocated, then fail. */
1874 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1875 	if (rle != NULL && rle->res != NULL)
1876 		return (ENXIO);
1877 
1878 	/* Already have allocated messages? */
1879 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1880 		return (ENXIO);
1881 
1882 	/* If MSI is blacklisted for this system, fail. */
1883 	if (pci_msi_blacklisted())
1884 		return (ENXIO);
1885 
1886 	/* MSI capability present? */
1887 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1888 		return (ENODEV);
1889 
1890 	if (bootverbose)
1891 		device_printf(child,
1892 		    "attempting to allocate %d MSI vectors (%d supported)\n",
1893 		    *count, cfg->msi.msi_msgnum);
1894 
1895 	/* Don't ask for more than the device supports. */
1896 	actual = min(*count, cfg->msi.msi_msgnum);
1897 
1898 	/* Don't ask for more than 32 messages. */
1899 	actual = min(actual, 32);
1900 
1901 	/* MSI requires power of 2 number of messages. */
1902 	if (!powerof2(actual))
1903 		return (EINVAL);
1904 
1905 	for (;;) {
1906 		/* Try to allocate N messages. */
1907 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1908 		    cfg->msi.msi_msgnum, irqs);
1909 		if (error == 0)
1910 			break;
1911 		if (actual == 1)
1912 			return (error);
1913 
1914 		/* Try N / 2. */
1915 		actual >>= 1;
1916 	}
1917 
1918 	/*
1919 	 * We now have N actual messages mapped onto SYS_RES_IRQ
1920 	 * resources in the irqs[] array, so add new resources
1921 	 * starting at rid 1.
1922 	 */
1923 	for (i = 0; i < actual; i++)
1924 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1925 		    irqs[i], irqs[i], 1);
1926 
1927 	if (bootverbose) {
1928 		if (actual == 1)
1929 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1930 		else {
1931 			int run;
1932 
1933 			/*
1934 			 * Be fancy and try to print contiguous runs
1935 			 * of IRQ values as ranges.  'run' is true if
1936 			 * we are in a range.
1937 			 */
1938 			device_printf(child, "using IRQs %d", irqs[0]);
1939 			run = 0;
1940 			for (i = 1; i < actual; i++) {
1941 
1942 				/* Still in a run? */
1943 				if (irqs[i] == irqs[i - 1] + 1) {
1944 					run = 1;
1945 					continue;
1946 				}
1947 
1948 				/* Finish previous range. */
1949 				if (run) {
1950 					printf("-%d", irqs[i - 1]);
1951 					run = 0;
1952 				}
1953 
1954 				/* Start new range. */
1955 				printf(",%d", irqs[i]);
1956 			}
1957 
1958 			/* Unfinished range? */
1959 			if (run)
1960 				printf("-%d", irqs[actual - 1]);
1961 			printf(" for MSI\n");
1962 		}
1963 	}
1964 
1965 	/* Update control register with actual count. */
1966 	ctrl = cfg->msi.msi_ctrl;
1967 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1968 	ctrl |= (ffs(actual) - 1) << 4;
1969 	cfg->msi.msi_ctrl = ctrl;
1970 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1971 
1972 	/* Update counts of alloc'd messages. */
1973 	cfg->msi.msi_alloc = actual;
1974 	cfg->msi.msi_handlers = 0;
1975 	*count = actual;
1976 	return (0);
1977 }
1978 
1979 /* Release the MSI messages associated with this device. */
1980 int
1981 pci_release_msi_method(device_t dev, device_t child)
1982 {
1983 	struct pci_devinfo *dinfo = device_get_ivars(child);
1984 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1985 	struct resource_list_entry *rle;
1986 	int error, i, irqs[32];
1987 
1988 	/* Try MSI-X first. */
1989 	error = pci_release_msix(dev, child);
1990 	if (error != ENODEV)
1991 		return (error);
1992 
1993 	/* Do we have any messages to release? */
1994 	if (msi->msi_alloc == 0)
1995 		return (ENODEV);
1996 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
1997 
1998 	/* Make sure none of the resources are allocated. */
1999 	if (msi->msi_handlers > 0)
2000 		return (EBUSY);
2001 	for (i = 0; i < msi->msi_alloc; i++) {
2002 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2003 		KASSERT(rle != NULL, ("missing MSI resource"));
2004 		if (rle->res != NULL)
2005 			return (EBUSY);
2006 		irqs[i] = rle->start;
2007 	}
2008 
2009 	/* Update control register with 0 count. */
2010 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2011 	    ("%s: MSI still enabled", __func__));
2012 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2013 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2014 	    msi->msi_ctrl, 2);
2015 
2016 	/* Release the messages. */
2017 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2018 	for (i = 0; i < msi->msi_alloc; i++)
2019 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2020 
2021 	/* Update alloc count. */
2022 	msi->msi_alloc = 0;
2023 	msi->msi_addr = 0;
2024 	msi->msi_data = 0;
2025 	return (0);
2026 }
2027 
2028 /*
2029  * Return the max supported MSI messages this device supports.
2030  * Basically, assuming the MD code can alloc messages, this function
2031  * should return the maximum value that pci_alloc_msi() can return.
2032  * Thus, it is subject to the tunables, etc.
2033  */
2034 int
2035 pci_msi_count_method(device_t dev, device_t child)
2036 {
2037 	struct pci_devinfo *dinfo = device_get_ivars(child);
2038 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2039 
2040 	if (pci_do_msi && msi->msi_location != 0)
2041 		return (msi->msi_msgnum);
2042 	return (0);
2043 }
2044 
2045 /* free pcicfgregs structure and all depending data structures */
2046 
2047 int
2048 pci_freecfg(struct pci_devinfo *dinfo)
2049 {
2050 	struct devlist *devlist_head;
2051 	int i;
2052 
2053 	devlist_head = &pci_devq;
2054 
2055 	if (dinfo->cfg.vpd.vpd_reg) {
2056 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2057 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2058 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2059 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2060 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2061 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2062 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2063 	}
2064 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2065 	free(dinfo, M_DEVBUF);
2066 
2067 	/* increment the generation count */
2068 	pci_generation++;
2069 
2070 	/* we're losing one device */
2071 	pci_numdevs--;
2072 	return (0);
2073 }
2074 
2075 /*
2076  * PCI power manangement
2077  */
2078 int
2079 pci_set_powerstate_method(device_t dev, device_t child, int state)
2080 {
2081 	struct pci_devinfo *dinfo = device_get_ivars(child);
2082 	pcicfgregs *cfg = &dinfo->cfg;
2083 	uint16_t status;
2084 	int result, oldstate, highest, delay;
2085 
2086 	if (cfg->pp.pp_cap == 0)
2087 		return (EOPNOTSUPP);
2088 
2089 	/*
2090 	 * Optimize a no state change request away.  While it would be OK to
2091 	 * write to the hardware in theory, some devices have shown odd
2092 	 * behavior when going from D3 -> D3.
2093 	 */
2094 	oldstate = pci_get_powerstate(child);
2095 	if (oldstate == state)
2096 		return (0);
2097 
2098 	/*
2099 	 * The PCI power management specification states that after a state
2100 	 * transition between PCI power states, system software must
2101 	 * guarantee a minimal delay before the function accesses the device.
2102 	 * Compute the worst case delay that we need to guarantee before we
2103 	 * access the device.  Many devices will be responsive much more
2104 	 * quickly than this delay, but there are some that don't respond
2105 	 * instantly to state changes.  Transitions to/from D3 state require
2106 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2107 	 * is done below with DELAY rather than a sleeper function because
2108 	 * this function can be called from contexts where we cannot sleep.
2109 	 */
2110 	highest = (oldstate > state) ? oldstate : state;
2111 	if (highest == PCI_POWERSTATE_D3)
2112 	    delay = 10000;
2113 	else if (highest == PCI_POWERSTATE_D2)
2114 	    delay = 200;
2115 	else
2116 	    delay = 0;
2117 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2118 	    & ~PCIM_PSTAT_DMASK;
2119 	result = 0;
2120 	switch (state) {
2121 	case PCI_POWERSTATE_D0:
2122 		status |= PCIM_PSTAT_D0;
2123 		break;
2124 	case PCI_POWERSTATE_D1:
2125 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2126 			return (EOPNOTSUPP);
2127 		status |= PCIM_PSTAT_D1;
2128 		break;
2129 	case PCI_POWERSTATE_D2:
2130 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2131 			return (EOPNOTSUPP);
2132 		status |= PCIM_PSTAT_D2;
2133 		break;
2134 	case PCI_POWERSTATE_D3:
2135 		status |= PCIM_PSTAT_D3;
2136 		break;
2137 	default:
2138 		return (EINVAL);
2139 	}
2140 
2141 	if (bootverbose)
2142 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2143 		    state);
2144 
2145 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2146 	if (delay)
2147 		DELAY(delay);
2148 	return (0);
2149 }
2150 
2151 int
2152 pci_get_powerstate_method(device_t dev, device_t child)
2153 {
2154 	struct pci_devinfo *dinfo = device_get_ivars(child);
2155 	pcicfgregs *cfg = &dinfo->cfg;
2156 	uint16_t status;
2157 	int result;
2158 
2159 	if (cfg->pp.pp_cap != 0) {
2160 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2161 		switch (status & PCIM_PSTAT_DMASK) {
2162 		case PCIM_PSTAT_D0:
2163 			result = PCI_POWERSTATE_D0;
2164 			break;
2165 		case PCIM_PSTAT_D1:
2166 			result = PCI_POWERSTATE_D1;
2167 			break;
2168 		case PCIM_PSTAT_D2:
2169 			result = PCI_POWERSTATE_D2;
2170 			break;
2171 		case PCIM_PSTAT_D3:
2172 			result = PCI_POWERSTATE_D3;
2173 			break;
2174 		default:
2175 			result = PCI_POWERSTATE_UNKNOWN;
2176 			break;
2177 		}
2178 	} else {
2179 		/* No support, device is always at D0 */
2180 		result = PCI_POWERSTATE_D0;
2181 	}
2182 	return (result);
2183 }
2184 
2185 /*
2186  * Some convenience functions for PCI device drivers.
2187  */
2188 
2189 static __inline void
2190 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2191 {
2192 	uint16_t	command;
2193 
2194 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2195 	command |= bit;
2196 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2197 }
2198 
2199 static __inline void
2200 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2201 {
2202 	uint16_t	command;
2203 
2204 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2205 	command &= ~bit;
2206 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2207 }
2208 
2209 int
2210 pci_enable_busmaster_method(device_t dev, device_t child)
2211 {
2212 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2213 	return (0);
2214 }
2215 
2216 int
2217 pci_disable_busmaster_method(device_t dev, device_t child)
2218 {
2219 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2220 	return (0);
2221 }
2222 
2223 int
2224 pci_enable_io_method(device_t dev, device_t child, int space)
2225 {
2226 	uint16_t bit;
2227 
2228 	switch(space) {
2229 	case SYS_RES_IOPORT:
2230 		bit = PCIM_CMD_PORTEN;
2231 		break;
2232 	case SYS_RES_MEMORY:
2233 		bit = PCIM_CMD_MEMEN;
2234 		break;
2235 	default:
2236 		return (EINVAL);
2237 	}
2238 	pci_set_command_bit(dev, child, bit);
2239 	return (0);
2240 }
2241 
2242 int
2243 pci_disable_io_method(device_t dev, device_t child, int space)
2244 {
2245 	uint16_t bit;
2246 
2247 	switch(space) {
2248 	case SYS_RES_IOPORT:
2249 		bit = PCIM_CMD_PORTEN;
2250 		break;
2251 	case SYS_RES_MEMORY:
2252 		bit = PCIM_CMD_MEMEN;
2253 		break;
2254 	default:
2255 		return (EINVAL);
2256 	}
2257 	pci_clear_command_bit(dev, child, bit);
2258 	return (0);
2259 }
2260 
2261 /*
2262  * New style pci driver.  Parent device is either a pci-host-bridge or a
2263  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2264  */
2265 
2266 void
2267 pci_print_verbose(struct pci_devinfo *dinfo)
2268 {
2269 
2270 	if (bootverbose) {
2271 		pcicfgregs *cfg = &dinfo->cfg;
2272 
2273 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2274 		    cfg->vendor, cfg->device, cfg->revid);
2275 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2276 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2277 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2278 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2279 		    cfg->mfdev);
2280 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2281 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2282 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2283 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2284 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2285 		if (cfg->intpin > 0)
2286 			printf("\tintpin=%c, irq=%d\n",
2287 			    cfg->intpin +'a' -1, cfg->intline);
2288 		if (cfg->pp.pp_cap) {
2289 			uint16_t status;
2290 
2291 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2292 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2293 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2294 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2295 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2296 			    status & PCIM_PSTAT_DMASK);
2297 		}
2298 		if (cfg->msi.msi_location) {
2299 			int ctrl;
2300 
2301 			ctrl = cfg->msi.msi_ctrl;
2302 			printf("\tMSI supports %d message%s%s%s\n",
2303 			    cfg->msi.msi_msgnum,
2304 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2305 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2306 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2307 		}
2308 		if (cfg->msix.msix_location) {
2309 			printf("\tMSI-X supports %d message%s ",
2310 			    cfg->msix.msix_msgnum,
2311 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2312 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2313 				printf("in map 0x%x\n",
2314 				    cfg->msix.msix_table_bar);
2315 			else
2316 				printf("in maps 0x%x and 0x%x\n",
2317 				    cfg->msix.msix_table_bar,
2318 				    cfg->msix.msix_pba_bar);
2319 		}
2320 	}
2321 }
2322 
2323 static int
2324 pci_porten(device_t dev)
2325 {
2326 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2327 }
2328 
2329 static int
2330 pci_memen(device_t dev)
2331 {
2332 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2333 }
2334 
2335 static void
2336 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2337 {
2338 	pci_addr_t map, testval;
2339 	int ln2range;
2340 	uint16_t cmd;
2341 
2342 	/*
2343 	 * The device ROM BAR is special.  It is always a 32-bit
2344 	 * memory BAR.  Bit 0 is special and should not be set when
2345 	 * sizing the BAR.
2346 	 */
2347 	if (reg == PCIR_BIOS) {
2348 		map = pci_read_config(dev, reg, 4);
2349 		pci_write_config(dev, reg, 0xfffffffe, 4);
2350 		testval = pci_read_config(dev, reg, 4);
2351 		pci_write_config(dev, reg, map, 4);
2352 		*mapp = map;
2353 		*testvalp = testval;
2354 		return;
2355 	}
2356 
2357 	map = pci_read_config(dev, reg, 4);
2358 	ln2range = pci_maprange(map);
2359 	if (ln2range == 64)
2360 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2361 
2362 	/*
2363 	 * Disable decoding via the command register before
2364 	 * determining the BAR's length since we will be placing it in
2365 	 * a weird state.
2366 	 */
2367 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2368 	pci_write_config(dev, PCIR_COMMAND,
2369 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2370 
2371 	/*
2372 	 * Determine the BAR's length by writing all 1's.  The bottom
2373 	 * log_2(size) bits of the BAR will stick as 0 when we read
2374 	 * the value back.
2375 	 */
2376 	pci_write_config(dev, reg, 0xffffffff, 4);
2377 	testval = pci_read_config(dev, reg, 4);
2378 	if (ln2range == 64) {
2379 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2380 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2381 	}
2382 
2383 	/*
2384 	 * Restore the original value of the BAR.  We may have reprogrammed
2385 	 * the BAR of the low-level console device and when booting verbose,
2386 	 * we need the console device addressable.
2387 	 */
2388 	pci_write_config(dev, reg, map, 4);
2389 	if (ln2range == 64)
2390 		pci_write_config(dev, reg + 4, map >> 32, 4);
2391 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2392 
2393 	*mapp = map;
2394 	*testvalp = testval;
2395 }
2396 
2397 static void
2398 pci_write_bar(device_t dev, int reg, pci_addr_t base)
2399 {
2400 	pci_addr_t map;
2401 	int ln2range;
2402 
2403 	map = pci_read_config(dev, reg, 4);
2404 
2405 	/* The device ROM BAR is always 32-bits. */
2406 	if (reg == PCIR_BIOS)
2407 		return;
2408 	ln2range = pci_maprange(map);
2409 	pci_write_config(dev, reg, base, 4);
2410 	if (ln2range == 64)
2411 		pci_write_config(dev, reg + 4, base >> 32, 4);
2412 }
2413 
2414 /*
2415  * Add a resource based on a pci map register. Return 1 if the map
2416  * register is a 32bit map register or 2 if it is a 64bit register.
2417  */
2418 static int
2419 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2420     int force, int prefetch)
2421 {
2422 	pci_addr_t base, map, testval;
2423 	pci_addr_t start, end, count;
2424 	int barlen, basezero, maprange, mapsize, type;
2425 	uint16_t cmd;
2426 	struct resource *res;
2427 
2428 	pci_read_bar(dev, reg, &map, &testval);
2429 	if (PCI_BAR_MEM(map)) {
2430 		type = SYS_RES_MEMORY;
2431 		if (map & PCIM_BAR_MEM_PREFETCH)
2432 			prefetch = 1;
2433 	} else
2434 		type = SYS_RES_IOPORT;
2435 	mapsize = pci_mapsize(testval);
2436 	base = pci_mapbase(map);
2437 #ifdef __PCI_BAR_ZERO_VALID
2438 	basezero = 0;
2439 #else
2440 	basezero = base == 0;
2441 #endif
2442 	maprange = pci_maprange(map);
2443 	barlen = maprange == 64 ? 2 : 1;
2444 
2445 	/*
2446 	 * For I/O registers, if bottom bit is set, and the next bit up
2447 	 * isn't clear, we know we have a BAR that doesn't conform to the
2448 	 * spec, so ignore it.  Also, sanity check the size of the data
2449 	 * areas to the type of memory involved.  Memory must be at least
2450 	 * 16 bytes in size, while I/O ranges must be at least 4.
2451 	 */
2452 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2453 		return (barlen);
2454 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2455 	    (type == SYS_RES_IOPORT && mapsize < 2))
2456 		return (barlen);
2457 
2458 	if (bootverbose) {
2459 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2460 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2461 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2462 			printf(", port disabled\n");
2463 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2464 			printf(", memory disabled\n");
2465 		else
2466 			printf(", enabled\n");
2467 	}
2468 
2469 	/*
2470 	 * If base is 0, then we have problems if this architecture does
2471 	 * not allow that.  It is best to ignore such entries for the
2472 	 * moment.  These will be allocated later if the driver specifically
2473 	 * requests them.  However, some removable busses look better when
2474 	 * all resources are allocated, so allow '0' to be overriden.
2475 	 *
2476 	 * Similarly treat maps whose values is the same as the test value
2477 	 * read back.  These maps have had all f's written to them by the
2478 	 * BIOS in an attempt to disable the resources.
2479 	 */
2480 	if (!force && (basezero || map == testval))
2481 		return (barlen);
2482 	if ((u_long)base != base) {
2483 		device_printf(bus,
2484 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2485 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2486 		    pci_get_function(dev), reg);
2487 		return (barlen);
2488 	}
2489 
2490 	/*
2491 	 * This code theoretically does the right thing, but has
2492 	 * undesirable side effects in some cases where peripherals
2493 	 * respond oddly to having these bits enabled.  Let the user
2494 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2495 	 * default).
2496 	 */
2497 	if (pci_enable_io_modes) {
2498 		/* Turn on resources that have been left off by a lazy BIOS */
2499 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2500 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2501 			cmd |= PCIM_CMD_PORTEN;
2502 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2503 		}
2504 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2505 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2506 			cmd |= PCIM_CMD_MEMEN;
2507 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2508 		}
2509 	} else {
2510 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2511 			return (barlen);
2512 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2513 			return (barlen);
2514 	}
2515 
2516 	count = 1 << mapsize;
2517 	if (basezero || base == pci_mapbase(testval)) {
2518 		start = 0;	/* Let the parent decide. */
2519 		end = ~0ULL;
2520 	} else {
2521 		start = base;
2522 		end = base + (1 << mapsize) - 1;
2523 	}
2524 	resource_list_add(rl, type, reg, start, end, count);
2525 
2526 	/*
2527 	 * Try to allocate the resource for this BAR from our parent
2528 	 * so that this resource range is already reserved.  The
2529 	 * driver for this device will later inherit this resource in
2530 	 * pci_alloc_resource().
2531 	 */
2532 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2533 	    prefetch ? RF_PREFETCHABLE : 0);
2534 	if (res == NULL) {
2535 		/*
2536 		 * If the allocation fails, clear the BAR and delete
2537 		 * the resource list entry to force
2538 		 * pci_alloc_resource() to allocate resources from the
2539 		 * parent.
2540 		 */
2541 		resource_list_delete(rl, type, reg);
2542 		start = 0;
2543 	} else
2544 		start = rman_get_start(res);
2545 	pci_write_bar(dev, reg, start);
2546 	return (barlen);
2547 }
2548 
2549 /*
2550  * For ATA devices we need to decide early what addressing mode to use.
2551  * Legacy demands that the primary and secondary ATA ports sits on the
2552  * same addresses that old ISA hardware did. This dictates that we use
2553  * those addresses and ignore the BAR's if we cannot set PCI native
2554  * addressing mode.
2555  */
2556 static void
2557 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2558     uint32_t prefetchmask)
2559 {
2560 	struct resource *r;
2561 	int rid, type, progif;
2562 #if 0
2563 	/* if this device supports PCI native addressing use it */
2564 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2565 	if ((progif & 0x8a) == 0x8a) {
2566 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2567 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2568 			printf("Trying ATA native PCI addressing mode\n");
2569 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2570 		}
2571 	}
2572 #endif
2573 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2574 	type = SYS_RES_IOPORT;
2575 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2576 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2577 		    prefetchmask & (1 << 0));
2578 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2579 		    prefetchmask & (1 << 1));
2580 	} else {
2581 		rid = PCIR_BAR(0);
2582 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2583 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2584 		    0x1f7, 8, 0);
2585 		rid = PCIR_BAR(1);
2586 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2587 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2588 		    0x3f6, 1, 0);
2589 	}
2590 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2591 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2592 		    prefetchmask & (1 << 2));
2593 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2594 		    prefetchmask & (1 << 3));
2595 	} else {
2596 		rid = PCIR_BAR(2);
2597 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2598 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2599 		    0x177, 8, 0);
2600 		rid = PCIR_BAR(3);
2601 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2602 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2603 		    0x376, 1, 0);
2604 	}
2605 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2606 	    prefetchmask & (1 << 4));
2607 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2608 	    prefetchmask & (1 << 5));
2609 }
2610 
2611 static void
2612 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2613 {
2614 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2615 	pcicfgregs *cfg = &dinfo->cfg;
2616 	char tunable_name[64];
2617 	int irq;
2618 
2619 	/* Has to have an intpin to have an interrupt. */
2620 	if (cfg->intpin == 0)
2621 		return;
2622 
2623 	/* Let the user override the IRQ with a tunable. */
2624 	irq = PCI_INVALID_IRQ;
2625 	snprintf(tunable_name, sizeof(tunable_name),
2626 	    "hw.pci%d.%d.%d.INT%c.irq",
2627 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2628 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2629 		irq = PCI_INVALID_IRQ;
2630 
2631 	/*
2632 	 * If we didn't get an IRQ via the tunable, then we either use the
2633 	 * IRQ value in the intline register or we ask the bus to route an
2634 	 * interrupt for us.  If force_route is true, then we only use the
2635 	 * value in the intline register if the bus was unable to assign an
2636 	 * IRQ.
2637 	 */
2638 	if (!PCI_INTERRUPT_VALID(irq)) {
2639 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2640 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2641 		if (!PCI_INTERRUPT_VALID(irq))
2642 			irq = cfg->intline;
2643 	}
2644 
2645 	/* If after all that we don't have an IRQ, just bail. */
2646 	if (!PCI_INTERRUPT_VALID(irq))
2647 		return;
2648 
2649 	/* Update the config register if it changed. */
2650 	if (irq != cfg->intline) {
2651 		cfg->intline = irq;
2652 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2653 	}
2654 
2655 	/* Add this IRQ as rid 0 interrupt resource. */
2656 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2657 }
2658 
2659 /* Perform early OHCI takeover from SMM. */
2660 static void
2661 ohci_early_takeover(device_t self)
2662 {
2663 	struct resource *res;
2664 	uint32_t ctl;
2665 	int rid;
2666 	int i;
2667 
2668 	rid = PCIR_BAR(0);
2669 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2670 	if (res == NULL)
2671 		return;
2672 
2673 	ctl = bus_read_4(res, OHCI_CONTROL);
2674 	if (ctl & OHCI_IR) {
2675 		if (bootverbose)
2676 			printf("ohci early: "
2677 			    "SMM active, request owner change\n");
2678 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2679 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2680 			DELAY(1000);
2681 			ctl = bus_read_4(res, OHCI_CONTROL);
2682 		}
2683 		if (ctl & OHCI_IR) {
2684 			if (bootverbose)
2685 				printf("ohci early: "
2686 				    "SMM does not respond, resetting\n");
2687 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2688 		}
2689 		/* Disable interrupts */
2690 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2691 	}
2692 
2693 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2694 }
2695 
2696 /* Perform early UHCI takeover from SMM. */
2697 static void
2698 uhci_early_takeover(device_t self)
2699 {
2700 	struct resource *res;
2701 	int rid;
2702 
2703 	/*
2704 	 * Set the PIRQD enable bit and switch off all the others. We don't
2705 	 * want legacy support to interfere with us XXX Does this also mean
2706 	 * that the BIOS won't touch the keyboard anymore if it is connected
2707 	 * to the ports of the root hub?
2708 	 */
2709 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2710 
2711 	/* Disable interrupts */
2712 	rid = PCI_UHCI_BASE_REG;
2713 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2714 	if (res != NULL) {
2715 		bus_write_2(res, UHCI_INTR, 0);
2716 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2717 	}
2718 }
2719 
2720 /* Perform early EHCI takeover from SMM. */
2721 static void
2722 ehci_early_takeover(device_t self)
2723 {
2724 	struct resource *res;
2725 	uint32_t cparams;
2726 	uint32_t eec;
2727 	uint8_t eecp;
2728 	uint8_t bios_sem;
2729 	uint8_t offs;
2730 	int rid;
2731 	int i;
2732 
2733 	rid = PCIR_BAR(0);
2734 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2735 	if (res == NULL)
2736 		return;
2737 
2738 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2739 
2740 	/* Synchronise with the BIOS if it owns the controller. */
2741 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2742 	    eecp = EHCI_EECP_NEXT(eec)) {
2743 		eec = pci_read_config(self, eecp, 4);
2744 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2745 			continue;
2746 		}
2747 		bios_sem = pci_read_config(self, eecp +
2748 		    EHCI_LEGSUP_BIOS_SEM, 1);
2749 		if (bios_sem == 0) {
2750 			continue;
2751 		}
2752 		if (bootverbose)
2753 			printf("ehci early: "
2754 			    "SMM active, request owner change\n");
2755 
2756 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2757 
2758 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2759 			DELAY(1000);
2760 			bios_sem = pci_read_config(self, eecp +
2761 			    EHCI_LEGSUP_BIOS_SEM, 1);
2762 		}
2763 
2764 		if (bios_sem != 0) {
2765 			if (bootverbose)
2766 				printf("ehci early: "
2767 				    "SMM does not respond\n");
2768 		}
2769 		/* Disable interrupts */
2770 		offs = bus_read_1(res, EHCI_CAPLENGTH);
2771 		bus_write_4(res, offs + EHCI_USBINTR, 0);
2772 	}
2773 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2774 }
2775 
2776 void
2777 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2778 {
2779 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2780 	pcicfgregs *cfg = &dinfo->cfg;
2781 	struct resource_list *rl = &dinfo->resources;
2782 	struct pci_quirk *q;
2783 	int i;
2784 
2785 	/* ATA devices needs special map treatment */
2786 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2787 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2788 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2789 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2790 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2791 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
2792 	else
2793 		for (i = 0; i < cfg->nummaps;)
2794 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
2795 			    prefetchmask & (1 << i));
2796 
2797 	/*
2798 	 * Add additional, quirked resources.
2799 	 */
2800 	for (q = &pci_quirks[0]; q->devid; q++) {
2801 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2802 		    && q->type == PCI_QUIRK_MAP_REG)
2803 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
2804 	}
2805 
2806 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2807 #ifdef __PCI_REROUTE_INTERRUPT
2808 		/*
2809 		 * Try to re-route interrupts. Sometimes the BIOS or
2810 		 * firmware may leave bogus values in these registers.
2811 		 * If the re-route fails, then just stick with what we
2812 		 * have.
2813 		 */
2814 		pci_assign_interrupt(bus, dev, 1);
2815 #else
2816 		pci_assign_interrupt(bus, dev, 0);
2817 #endif
2818 	}
2819 
2820 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
2821 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
2822 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
2823 			ehci_early_takeover(dev);
2824 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
2825 			ohci_early_takeover(dev);
2826 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
2827 			uhci_early_takeover(dev);
2828 	}
2829 }
2830 
2831 void
2832 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2833 {
2834 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2835 	device_t pcib = device_get_parent(dev);
2836 	struct pci_devinfo *dinfo;
2837 	int maxslots;
2838 	int s, f, pcifunchigh;
2839 	uint8_t hdrtype;
2840 
2841 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2842 	    ("dinfo_size too small"));
2843 	maxslots = PCIB_MAXSLOTS(pcib);
2844 	for (s = 0; s <= maxslots; s++) {
2845 		pcifunchigh = 0;
2846 		f = 0;
2847 		DELAY(1);
2848 		hdrtype = REG(PCIR_HDRTYPE, 1);
2849 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2850 			continue;
2851 		if (hdrtype & PCIM_MFDEV)
2852 			pcifunchigh = PCI_FUNCMAX;
2853 		for (f = 0; f <= pcifunchigh; f++) {
2854 			dinfo = pci_read_device(pcib, domain, busno, s, f,
2855 			    dinfo_size);
2856 			if (dinfo != NULL) {
2857 				pci_add_child(dev, dinfo);
2858 			}
2859 		}
2860 	}
2861 #undef REG
2862 }
2863 
2864 void
2865 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2866 {
2867 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2868 	device_set_ivars(dinfo->cfg.dev, dinfo);
2869 	resource_list_init(&dinfo->resources);
2870 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2871 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2872 	pci_print_verbose(dinfo);
2873 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2874 }
2875 
2876 static int
2877 pci_probe(device_t dev)
2878 {
2879 
2880 	device_set_desc(dev, "PCI bus");
2881 
2882 	/* Allow other subclasses to override this driver. */
2883 	return (BUS_PROBE_GENERIC);
2884 }
2885 
2886 static int
2887 pci_attach(device_t dev)
2888 {
2889 	int busno, domain;
2890 
2891 	/*
2892 	 * Since there can be multiple independantly numbered PCI
2893 	 * busses on systems with multiple PCI domains, we can't use
2894 	 * the unit number to decide which bus we are probing. We ask
2895 	 * the parent pcib what our domain and bus numbers are.
2896 	 */
2897 	domain = pcib_get_domain(dev);
2898 	busno = pcib_get_bus(dev);
2899 	if (bootverbose)
2900 		device_printf(dev, "domain=%d, physical bus=%d\n",
2901 		    domain, busno);
2902 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2903 	return (bus_generic_attach(dev));
2904 }
2905 
2906 int
2907 pci_suspend(device_t dev)
2908 {
2909 	int dstate, error, i, numdevs;
2910 	device_t child, *devlist, pcib;
2911 	struct pci_devinfo *dinfo;
2912 
2913 	/*
2914 	 * Save the PCI configuration space for each child and set the
2915 	 * device in the appropriate power state for this sleep state.
2916 	 */
2917 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2918 		return (error);
2919 	for (i = 0; i < numdevs; i++) {
2920 		child = devlist[i];
2921 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2922 		pci_cfg_save(child, dinfo, 0);
2923 	}
2924 
2925 	/* Suspend devices before potentially powering them down. */
2926 	error = bus_generic_suspend(dev);
2927 	if (error) {
2928 		free(devlist, M_TEMP);
2929 		return (error);
2930 	}
2931 
2932 	/*
2933 	 * Always set the device to D3.  If the firmware suggests a
2934 	 * different power state, use it instead.  If power management
2935 	 * is not present, the firmware is responsible for managing
2936 	 * device power.  Skip children who aren't attached since they
2937 	 * are powered down separately.  Only manage type 0 devices
2938 	 * for now.
2939 	 */
2940 	pcib = device_get_parent(dev);
2941 	for (i = 0; pci_do_power_resume && i < numdevs; i++) {
2942 		child = devlist[i];
2943 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2944 		dstate = PCI_POWERSTATE_D3;
2945 		if (device_is_attached(child) &&
2946 		    (dinfo->cfg.hdrtype & PCIM_HDRTYPE) ==
2947 		    PCIM_HDRTYPE_NORMAL &&
2948 		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
2949 			pci_set_powerstate(child, dstate);
2950 	}
2951 	free(devlist, M_TEMP);
2952 	return (0);
2953 }
2954 
2955 int
2956 pci_resume(device_t dev)
2957 {
2958 	int i, numdevs, error;
2959 	device_t child, *devlist, pcib;
2960 	struct pci_devinfo *dinfo;
2961 
2962 	/*
2963 	 * Set each child to D0 and restore its PCI configuration space.
2964 	 */
2965 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2966 		return (error);
2967 	pcib = device_get_parent(dev);
2968 	for (i = 0; i < numdevs; i++) {
2969 		/*
2970 		 * Notify power managment we're going to D0 but ignore
2971 		 * the result.  If power management is not present,
2972 		 * the firmware is responsible for managing device
2973 		 * power.  Only manage type 0 devices for now.
2974 		 */
2975 		child = devlist[i];
2976 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2977 		if (device_is_attached(child) &&
2978 		    (dinfo->cfg.hdrtype & PCIM_HDRTYPE) ==
2979 		    PCIM_HDRTYPE_NORMAL &&
2980 		    PCIB_POWER_FOR_SLEEP(pcib, dev, NULL) == 0)
2981 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2982 
2983 		/* Now the device is powered up, restore its config space. */
2984 		pci_cfg_restore(child, dinfo);
2985 		if (!device_is_attached(child))
2986 			pci_cfg_save(child, dinfo, 1);
2987 	}
2988 	free(devlist, M_TEMP);
2989 	return (bus_generic_resume(dev));
2990 }
2991 
2992 static void
2993 pci_load_vendor_data(void)
2994 {
2995 	caddr_t vendordata, info;
2996 
2997 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2998 		info = preload_search_info(vendordata, MODINFO_ADDR);
2999 		pci_vendordata = *(char **)info;
3000 		info = preload_search_info(vendordata, MODINFO_SIZE);
3001 		pci_vendordata_size = *(size_t *)info;
3002 		/* terminate the database */
3003 		pci_vendordata[pci_vendordata_size] = '\n';
3004 	}
3005 }
3006 
3007 void
3008 pci_driver_added(device_t dev, driver_t *driver)
3009 {
3010 	int numdevs;
3011 	device_t *devlist;
3012 	device_t child;
3013 	struct pci_devinfo *dinfo;
3014 	int i;
3015 
3016 	if (bootverbose)
3017 		device_printf(dev, "driver added\n");
3018 	DEVICE_IDENTIFY(driver, dev);
3019 	if (device_get_children(dev, &devlist, &numdevs) != 0)
3020 		return;
3021 	for (i = 0; i < numdevs; i++) {
3022 		child = devlist[i];
3023 		if (device_get_state(child) != DS_NOTPRESENT)
3024 			continue;
3025 		dinfo = device_get_ivars(child);
3026 		pci_print_verbose(dinfo);
3027 		if (bootverbose)
3028 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3029 		pci_cfg_restore(child, dinfo);
3030 		if (device_probe_and_attach(child) != 0)
3031 			pci_cfg_save(child, dinfo, 1);
3032 	}
3033 	free(devlist, M_TEMP);
3034 }
3035 
3036 int
3037 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3038     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3039 {
3040 	struct pci_devinfo *dinfo;
3041 	struct msix_table_entry *mte;
3042 	struct msix_vector *mv;
3043 	uint64_t addr;
3044 	uint32_t data;
3045 	void *cookie;
3046 	int error, rid;
3047 
3048 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3049 	    arg, &cookie);
3050 	if (error)
3051 		return (error);
3052 
3053 	/* If this is not a direct child, just bail out. */
3054 	if (device_get_parent(child) != dev) {
3055 		*cookiep = cookie;
3056 		return(0);
3057 	}
3058 
3059 	rid = rman_get_rid(irq);
3060 	if (rid == 0) {
3061 		/* Make sure that INTx is enabled */
3062 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3063 	} else {
3064 		/*
3065 		 * Check to see if the interrupt is MSI or MSI-X.
3066 		 * Ask our parent to map the MSI and give
3067 		 * us the address and data register values.
3068 		 * If we fail for some reason, teardown the
3069 		 * interrupt handler.
3070 		 */
3071 		dinfo = device_get_ivars(child);
3072 		if (dinfo->cfg.msi.msi_alloc > 0) {
3073 			if (dinfo->cfg.msi.msi_addr == 0) {
3074 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3075 			    ("MSI has handlers, but vectors not mapped"));
3076 				error = PCIB_MAP_MSI(device_get_parent(dev),
3077 				    child, rman_get_start(irq), &addr, &data);
3078 				if (error)
3079 					goto bad;
3080 				dinfo->cfg.msi.msi_addr = addr;
3081 				dinfo->cfg.msi.msi_data = data;
3082 			}
3083 			if (dinfo->cfg.msi.msi_handlers == 0)
3084 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3085 				    dinfo->cfg.msi.msi_data);
3086 			dinfo->cfg.msi.msi_handlers++;
3087 		} else {
3088 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3089 			    ("No MSI or MSI-X interrupts allocated"));
3090 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3091 			    ("MSI-X index too high"));
3092 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3093 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3094 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3095 			KASSERT(mv->mv_irq == rman_get_start(irq),
3096 			    ("IRQ mismatch"));
3097 			if (mv->mv_address == 0) {
3098 				KASSERT(mte->mte_handlers == 0,
3099 		    ("MSI-X table entry has handlers, but vector not mapped"));
3100 				error = PCIB_MAP_MSI(device_get_parent(dev),
3101 				    child, rman_get_start(irq), &addr, &data);
3102 				if (error)
3103 					goto bad;
3104 				mv->mv_address = addr;
3105 				mv->mv_data = data;
3106 			}
3107 			if (mte->mte_handlers == 0) {
3108 				pci_enable_msix(child, rid - 1, mv->mv_address,
3109 				    mv->mv_data);
3110 				pci_unmask_msix(child, rid - 1);
3111 			}
3112 			mte->mte_handlers++;
3113 		}
3114 
3115 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3116 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3117 	bad:
3118 		if (error) {
3119 			(void)bus_generic_teardown_intr(dev, child, irq,
3120 			    cookie);
3121 			return (error);
3122 		}
3123 	}
3124 	*cookiep = cookie;
3125 	return (0);
3126 }
3127 
3128 int
3129 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3130     void *cookie)
3131 {
3132 	struct msix_table_entry *mte;
3133 	struct resource_list_entry *rle;
3134 	struct pci_devinfo *dinfo;
3135 	int error, rid;
3136 
3137 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3138 		return (EINVAL);
3139 
3140 	/* If this isn't a direct child, just bail out */
3141 	if (device_get_parent(child) != dev)
3142 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3143 
3144 	rid = rman_get_rid(irq);
3145 	if (rid == 0) {
3146 		/* Mask INTx */
3147 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3148 	} else {
3149 		/*
3150 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3151 		 * decrement the appropriate handlers count and mask the
3152 		 * MSI-X message, or disable MSI messages if the count
3153 		 * drops to 0.
3154 		 */
3155 		dinfo = device_get_ivars(child);
3156 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3157 		if (rle->res != irq)
3158 			return (EINVAL);
3159 		if (dinfo->cfg.msi.msi_alloc > 0) {
3160 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3161 			    ("MSI-X index too high"));
3162 			if (dinfo->cfg.msi.msi_handlers == 0)
3163 				return (EINVAL);
3164 			dinfo->cfg.msi.msi_handlers--;
3165 			if (dinfo->cfg.msi.msi_handlers == 0)
3166 				pci_disable_msi(child);
3167 		} else {
3168 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3169 			    ("No MSI or MSI-X interrupts allocated"));
3170 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3171 			    ("MSI-X index too high"));
3172 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3173 			if (mte->mte_handlers == 0)
3174 				return (EINVAL);
3175 			mte->mte_handlers--;
3176 			if (mte->mte_handlers == 0)
3177 				pci_mask_msix(child, rid - 1);
3178 		}
3179 	}
3180 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3181 	if (rid > 0)
3182 		KASSERT(error == 0,
3183 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3184 	return (error);
3185 }
3186 
3187 int
3188 pci_print_child(device_t dev, device_t child)
3189 {
3190 	struct pci_devinfo *dinfo;
3191 	struct resource_list *rl;
3192 	int retval = 0;
3193 
3194 	dinfo = device_get_ivars(child);
3195 	rl = &dinfo->resources;
3196 
3197 	retval += bus_print_child_header(dev, child);
3198 
3199 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3200 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3201 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3202 	if (device_get_flags(dev))
3203 		retval += printf(" flags %#x", device_get_flags(dev));
3204 
3205 	retval += printf(" at device %d.%d", pci_get_slot(child),
3206 	    pci_get_function(child));
3207 
3208 	retval += bus_print_child_footer(dev, child);
3209 
3210 	return (retval);
3211 }
3212 
3213 static struct
3214 {
3215 	int	class;
3216 	int	subclass;
3217 	char	*desc;
3218 } pci_nomatch_tab[] = {
3219 	{PCIC_OLD,		-1,			"old"},
3220 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3221 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3222 	{PCIC_STORAGE,		-1,			"mass storage"},
3223 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3224 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3225 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3226 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3227 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3228 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3229 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3230 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3231 	{PCIC_NETWORK,		-1,			"network"},
3232 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3233 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3234 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3235 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3236 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3237 	{PCIC_DISPLAY,		-1,			"display"},
3238 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3239 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3240 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3241 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3242 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3243 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3244 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3245 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3246 	{PCIC_MEMORY,		-1,			"memory"},
3247 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3248 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3249 	{PCIC_BRIDGE,		-1,			"bridge"},
3250 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3251 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3252 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3253 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3254 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3255 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3256 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3257 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3258 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3259 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3260 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3261 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3262 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3263 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3264 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3265 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3266 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3267 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3268 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3269 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3270 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3271 	{PCIC_INPUTDEV,		-1,			"input device"},
3272 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3273 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3274 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3275 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3276 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3277 	{PCIC_DOCKING,		-1,			"docking station"},
3278 	{PCIC_PROCESSOR,	-1,			"processor"},
3279 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3280 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3281 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3282 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3283 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3284 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3285 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3286 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3287 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3288 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3289 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3290 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3291 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3292 	{PCIC_SATCOM,		-1,			"satellite communication"},
3293 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3294 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3295 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3296 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3297 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3298 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3299 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3300 	{PCIC_DASP,		-1,			"dasp"},
3301 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3302 	{0, 0,		NULL}
3303 };
3304 
3305 void
3306 pci_probe_nomatch(device_t dev, device_t child)
3307 {
3308 	int	i;
3309 	char	*cp, *scp, *device;
3310 
3311 	/*
3312 	 * Look for a listing for this device in a loaded device database.
3313 	 */
3314 	if ((device = pci_describe_device(child)) != NULL) {
3315 		device_printf(dev, "<%s>", device);
3316 		free(device, M_DEVBUF);
3317 	} else {
3318 		/*
3319 		 * Scan the class/subclass descriptions for a general
3320 		 * description.
3321 		 */
3322 		cp = "unknown";
3323 		scp = NULL;
3324 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3325 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3326 				if (pci_nomatch_tab[i].subclass == -1) {
3327 					cp = pci_nomatch_tab[i].desc;
3328 				} else if (pci_nomatch_tab[i].subclass ==
3329 				    pci_get_subclass(child)) {
3330 					scp = pci_nomatch_tab[i].desc;
3331 				}
3332 			}
3333 		}
3334 		device_printf(dev, "<%s%s%s>",
3335 		    cp ? cp : "",
3336 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3337 		    scp ? scp : "");
3338 	}
3339 	printf(" at device %d.%d (no driver attached)\n",
3340 	    pci_get_slot(child), pci_get_function(child));
3341 	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3342 	return;
3343 }
3344 
3345 /*
3346  * Parse the PCI device database, if loaded, and return a pointer to a
3347  * description of the device.
3348  *
3349  * The database is flat text formatted as follows:
3350  *
3351  * Any line not in a valid format is ignored.
3352  * Lines are terminated with newline '\n' characters.
3353  *
3354  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3355  * the vendor name.
3356  *
3357  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3358  * - devices cannot be listed without a corresponding VENDOR line.
3359  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3360  * another TAB, then the device name.
3361  */
3362 
3363 /*
3364  * Assuming (ptr) points to the beginning of a line in the database,
3365  * return the vendor or device and description of the next entry.
3366  * The value of (vendor) or (device) inappropriate for the entry type
3367  * is set to -1.  Returns nonzero at the end of the database.
3368  *
3369  * Note that this is slightly unrobust in the face of corrupt data;
3370  * we attempt to safeguard against this by spamming the end of the
3371  * database with a newline when we initialise.
3372  */
3373 static int
3374 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3375 {
3376 	char	*cp = *ptr;
3377 	int	left;
3378 
3379 	*device = -1;
3380 	*vendor = -1;
3381 	**desc = '\0';
3382 	for (;;) {
3383 		left = pci_vendordata_size - (cp - pci_vendordata);
3384 		if (left <= 0) {
3385 			*ptr = cp;
3386 			return(1);
3387 		}
3388 
3389 		/* vendor entry? */
3390 		if (*cp != '\t' &&
3391 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3392 			break;
3393 		/* device entry? */
3394 		if (*cp == '\t' &&
3395 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3396 			break;
3397 
3398 		/* skip to next line */
3399 		while (*cp != '\n' && left > 0) {
3400 			cp++;
3401 			left--;
3402 		}
3403 		if (*cp == '\n') {
3404 			cp++;
3405 			left--;
3406 		}
3407 	}
3408 	/* skip to next line */
3409 	while (*cp != '\n' && left > 0) {
3410 		cp++;
3411 		left--;
3412 	}
3413 	if (*cp == '\n' && left > 0)
3414 		cp++;
3415 	*ptr = cp;
3416 	return(0);
3417 }
3418 
3419 static char *
3420 pci_describe_device(device_t dev)
3421 {
3422 	int	vendor, device;
3423 	char	*desc, *vp, *dp, *line;
3424 
3425 	desc = vp = dp = NULL;
3426 
3427 	/*
3428 	 * If we have no vendor data, we can't do anything.
3429 	 */
3430 	if (pci_vendordata == NULL)
3431 		goto out;
3432 
3433 	/*
3434 	 * Scan the vendor data looking for this device
3435 	 */
3436 	line = pci_vendordata;
3437 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3438 		goto out;
3439 	for (;;) {
3440 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3441 			goto out;
3442 		if (vendor == pci_get_vendor(dev))
3443 			break;
3444 	}
3445 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3446 		goto out;
3447 	for (;;) {
3448 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3449 			*dp = 0;
3450 			break;
3451 		}
3452 		if (vendor != -1) {
3453 			*dp = 0;
3454 			break;
3455 		}
3456 		if (device == pci_get_device(dev))
3457 			break;
3458 	}
3459 	if (dp[0] == '\0')
3460 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3461 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3462 	    NULL)
3463 		sprintf(desc, "%s, %s", vp, dp);
3464  out:
3465 	if (vp != NULL)
3466 		free(vp, M_DEVBUF);
3467 	if (dp != NULL)
3468 		free(dp, M_DEVBUF);
3469 	return(desc);
3470 }
3471 
3472 int
3473 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3474 {
3475 	struct pci_devinfo *dinfo;
3476 	pcicfgregs *cfg;
3477 
3478 	dinfo = device_get_ivars(child);
3479 	cfg = &dinfo->cfg;
3480 
3481 	switch (which) {
3482 	case PCI_IVAR_ETHADDR:
3483 		/*
3484 		 * The generic accessor doesn't deal with failure, so
3485 		 * we set the return value, then return an error.
3486 		 */
3487 		*((uint8_t **) result) = NULL;
3488 		return (EINVAL);
3489 	case PCI_IVAR_SUBVENDOR:
3490 		*result = cfg->subvendor;
3491 		break;
3492 	case PCI_IVAR_SUBDEVICE:
3493 		*result = cfg->subdevice;
3494 		break;
3495 	case PCI_IVAR_VENDOR:
3496 		*result = cfg->vendor;
3497 		break;
3498 	case PCI_IVAR_DEVICE:
3499 		*result = cfg->device;
3500 		break;
3501 	case PCI_IVAR_DEVID:
3502 		*result = (cfg->device << 16) | cfg->vendor;
3503 		break;
3504 	case PCI_IVAR_CLASS:
3505 		*result = cfg->baseclass;
3506 		break;
3507 	case PCI_IVAR_SUBCLASS:
3508 		*result = cfg->subclass;
3509 		break;
3510 	case PCI_IVAR_PROGIF:
3511 		*result = cfg->progif;
3512 		break;
3513 	case PCI_IVAR_REVID:
3514 		*result = cfg->revid;
3515 		break;
3516 	case PCI_IVAR_INTPIN:
3517 		*result = cfg->intpin;
3518 		break;
3519 	case PCI_IVAR_IRQ:
3520 		*result = cfg->intline;
3521 		break;
3522 	case PCI_IVAR_DOMAIN:
3523 		*result = cfg->domain;
3524 		break;
3525 	case PCI_IVAR_BUS:
3526 		*result = cfg->bus;
3527 		break;
3528 	case PCI_IVAR_SLOT:
3529 		*result = cfg->slot;
3530 		break;
3531 	case PCI_IVAR_FUNCTION:
3532 		*result = cfg->func;
3533 		break;
3534 	case PCI_IVAR_CMDREG:
3535 		*result = cfg->cmdreg;
3536 		break;
3537 	case PCI_IVAR_CACHELNSZ:
3538 		*result = cfg->cachelnsz;
3539 		break;
3540 	case PCI_IVAR_MINGNT:
3541 		*result = cfg->mingnt;
3542 		break;
3543 	case PCI_IVAR_MAXLAT:
3544 		*result = cfg->maxlat;
3545 		break;
3546 	case PCI_IVAR_LATTIMER:
3547 		*result = cfg->lattimer;
3548 		break;
3549 	default:
3550 		return (ENOENT);
3551 	}
3552 	return (0);
3553 }
3554 
3555 int
3556 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3557 {
3558 	struct pci_devinfo *dinfo;
3559 
3560 	dinfo = device_get_ivars(child);
3561 
3562 	switch (which) {
3563 	case PCI_IVAR_INTPIN:
3564 		dinfo->cfg.intpin = value;
3565 		return (0);
3566 	case PCI_IVAR_ETHADDR:
3567 	case PCI_IVAR_SUBVENDOR:
3568 	case PCI_IVAR_SUBDEVICE:
3569 	case PCI_IVAR_VENDOR:
3570 	case PCI_IVAR_DEVICE:
3571 	case PCI_IVAR_DEVID:
3572 	case PCI_IVAR_CLASS:
3573 	case PCI_IVAR_SUBCLASS:
3574 	case PCI_IVAR_PROGIF:
3575 	case PCI_IVAR_REVID:
3576 	case PCI_IVAR_IRQ:
3577 	case PCI_IVAR_DOMAIN:
3578 	case PCI_IVAR_BUS:
3579 	case PCI_IVAR_SLOT:
3580 	case PCI_IVAR_FUNCTION:
3581 		return (EINVAL);	/* disallow for now */
3582 
3583 	default:
3584 		return (ENOENT);
3585 	}
3586 }
3587 
3588 
3589 #include "opt_ddb.h"
3590 #ifdef DDB
3591 #include <ddb/ddb.h>
3592 #include <sys/cons.h>
3593 
3594 /*
3595  * List resources based on pci map registers, used for within ddb
3596  */
3597 
3598 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3599 {
3600 	struct pci_devinfo *dinfo;
3601 	struct devlist *devlist_head;
3602 	struct pci_conf *p;
3603 	const char *name;
3604 	int i, error, none_count;
3605 
3606 	none_count = 0;
3607 	/* get the head of the device queue */
3608 	devlist_head = &pci_devq;
3609 
3610 	/*
3611 	 * Go through the list of devices and print out devices
3612 	 */
3613 	for (error = 0, i = 0,
3614 	     dinfo = STAILQ_FIRST(devlist_head);
3615 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3616 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3617 
3618 		/* Populate pd_name and pd_unit */
3619 		name = NULL;
3620 		if (dinfo->cfg.dev)
3621 			name = device_get_name(dinfo->cfg.dev);
3622 
3623 		p = &dinfo->conf;
3624 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3625 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3626 			(name && *name) ? name : "none",
3627 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3628 			none_count++,
3629 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3630 			p->pc_sel.pc_func, (p->pc_class << 16) |
3631 			(p->pc_subclass << 8) | p->pc_progif,
3632 			(p->pc_subdevice << 16) | p->pc_subvendor,
3633 			(p->pc_device << 16) | p->pc_vendor,
3634 			p->pc_revid, p->pc_hdr);
3635 	}
3636 }
3637 #endif /* DDB */
3638 
3639 static struct resource *
3640 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
3641     u_long start, u_long end, u_long count, u_int flags)
3642 {
3643 	struct pci_devinfo *dinfo = device_get_ivars(child);
3644 	struct resource_list *rl = &dinfo->resources;
3645 	struct resource_list_entry *rle;
3646 	struct resource *res;
3647 	pci_addr_t map, testval;
3648 	int mapsize;
3649 
3650 	/*
3651 	 * Weed out the bogons, and figure out how large the BAR/map
3652 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3653 	 * Note: atapci in legacy mode are special and handled elsewhere
3654 	 * in the code.  If you have a atapci device in legacy mode and
3655 	 * it fails here, that other code is broken.
3656 	 */
3657 	res = NULL;
3658 	pci_read_bar(child, *rid, &map, &testval);
3659 
3660 	/* Ignore a BAR with a base of 0. */
3661 	if ((*rid == PCIR_BIOS && pci_rombase(testval) == 0) ||
3662 	    pci_mapbase(testval) == 0)
3663 		goto out;
3664 
3665 	if (PCI_BAR_MEM(testval) || *rid == PCIR_BIOS) {
3666 		if (type != SYS_RES_MEMORY) {
3667 			if (bootverbose)
3668 				device_printf(dev,
3669 				    "child %s requested type %d for rid %#x,"
3670 				    " but the BAR says it is an memio\n",
3671 				    device_get_nameunit(child), type, *rid);
3672 			goto out;
3673 		}
3674 	} else {
3675 		if (type != SYS_RES_IOPORT) {
3676 			if (bootverbose)
3677 				device_printf(dev,
3678 				    "child %s requested type %d for rid %#x,"
3679 				    " but the BAR says it is an ioport\n",
3680 				    device_get_nameunit(child), type, *rid);
3681 			goto out;
3682 		}
3683 	}
3684 
3685 	/*
3686 	 * For real BARs, we need to override the size that
3687 	 * the driver requests, because that's what the BAR
3688 	 * actually uses and we would otherwise have a
3689 	 * situation where we might allocate the excess to
3690 	 * another driver, which won't work.
3691 	 *
3692 	 * Device ROM BARs use a different mask value.
3693 	 */
3694 	if (*rid == PCIR_BIOS)
3695 		mapsize = pci_romsize(testval);
3696 	else
3697 		mapsize = pci_mapsize(testval);
3698 	count = 1UL << mapsize;
3699 	if (RF_ALIGNMENT(flags) < mapsize)
3700 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3701 	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3702 		flags |= RF_PREFETCHABLE;
3703 
3704 	/*
3705 	 * Allocate enough resource, and then write back the
3706 	 * appropriate bar for that resource.
3707 	 */
3708 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3709 	    start, end, count, flags & ~RF_ACTIVE);
3710 	if (res == NULL) {
3711 		device_printf(child,
3712 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3713 		    count, *rid, type, start, end);
3714 		goto out;
3715 	}
3716 	resource_list_add(rl, type, *rid, start, end, count);
3717 	rle = resource_list_find(rl, type, *rid);
3718 	if (rle == NULL)
3719 		panic("pci_reserve_map: unexpectedly can't find resource.");
3720 	rle->res = res;
3721 	rle->start = rman_get_start(res);
3722 	rle->end = rman_get_end(res);
3723 	rle->count = count;
3724 	rle->flags = RLE_RESERVED;
3725 	if (bootverbose)
3726 		device_printf(child,
3727 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3728 		    count, *rid, type, rman_get_start(res));
3729 	map = rman_get_start(res);
3730 	pci_write_bar(child, *rid, map);
3731 out:;
3732 	return (res);
3733 }
3734 
3735 
3736 struct resource *
3737 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3738 		   u_long start, u_long end, u_long count, u_int flags)
3739 {
3740 	struct pci_devinfo *dinfo = device_get_ivars(child);
3741 	struct resource_list *rl = &dinfo->resources;
3742 	struct resource_list_entry *rle;
3743 	struct resource *res;
3744 	pcicfgregs *cfg = &dinfo->cfg;
3745 
3746 	if (device_get_parent(child) != dev)
3747 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3748 		    type, rid, start, end, count, flags));
3749 
3750 	/*
3751 	 * Perform lazy resource allocation
3752 	 */
3753 	switch (type) {
3754 	case SYS_RES_IRQ:
3755 		/*
3756 		 * Can't alloc legacy interrupt once MSI messages have
3757 		 * been allocated.
3758 		 */
3759 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3760 		    cfg->msix.msix_alloc > 0))
3761 			return (NULL);
3762 
3763 		/*
3764 		 * If the child device doesn't have an interrupt
3765 		 * routed and is deserving of an interrupt, try to
3766 		 * assign it one.
3767 		 */
3768 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3769 		    (cfg->intpin != 0))
3770 			pci_assign_interrupt(dev, child, 0);
3771 		break;
3772 	case SYS_RES_IOPORT:
3773 	case SYS_RES_MEMORY:
3774 		/* Reserve resources for this BAR if needed. */
3775 		rle = resource_list_find(rl, type, *rid);
3776 		if (rle == NULL) {
3777 			res = pci_reserve_map(dev, child, type, rid, start, end,
3778 			    count, flags);
3779 			if (res == NULL)
3780 				return (NULL);
3781 		}
3782 	}
3783 	return (resource_list_alloc(rl, dev, child, type, rid,
3784 	    start, end, count, flags));
3785 }
3786 
3787 int
3788 pci_activate_resource(device_t dev, device_t child, int type, int rid,
3789     struct resource *r)
3790 {
3791 	int error;
3792 
3793 	error = bus_generic_activate_resource(dev, child, type, rid, r);
3794 	if (error)
3795 		return (error);
3796 
3797 	/* Enable decoding in the command register when activating BARs. */
3798 	if (device_get_parent(child) == dev) {
3799 		/* Device ROMs need their decoding explicitly enabled. */
3800 		if (rid == PCIR_BIOS)
3801 			pci_write_config(child, rid, rman_get_start(r) |
3802 			    PCIM_BIOS_ENABLE, 4);
3803 		switch (type) {
3804 		case SYS_RES_IOPORT:
3805 		case SYS_RES_MEMORY:
3806 			error = PCI_ENABLE_IO(dev, child, type);
3807 			break;
3808 		}
3809 	}
3810 	return (error);
3811 }
3812 
3813 int
3814 pci_deactivate_resource(device_t dev, device_t child, int type,
3815     int rid, struct resource *r)
3816 {
3817 	int error;
3818 
3819 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
3820 	if (error)
3821 		return (error);
3822 
3823 	/* Disable decoding for device ROMs. */
3824 	if (rid == PCIR_BIOS)
3825 		pci_write_config(child, rid, rman_get_start(r), 4);
3826 	return (0);
3827 }
3828 
3829 void
3830 pci_delete_child(device_t dev, device_t child)
3831 {
3832 	struct resource_list_entry *rle;
3833 	struct resource_list *rl;
3834 	struct pci_devinfo *dinfo;
3835 
3836 	dinfo = device_get_ivars(child);
3837 	rl = &dinfo->resources;
3838 
3839 	if (device_is_attached(child))
3840 		device_detach(child);
3841 
3842 	/* Turn off access to resources we're about to free */
3843 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
3844 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
3845 
3846 	/* Free all allocated resources */
3847 	STAILQ_FOREACH(rle, rl, link) {
3848 		if (rle->res) {
3849 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
3850 			    resource_list_busy(rl, rle->type, rle->rid)) {
3851 				pci_printf(&dinfo->cfg,
3852 				    "Resource still owned, oops. "
3853 				    "(type=%d, rid=%d, addr=%lx)\n",
3854 				    rle->type, rle->rid,
3855 				    rman_get_start(rle->res));
3856 				bus_release_resource(child, rle->type, rle->rid,
3857 				    rle->res);
3858 			}
3859 			resource_list_unreserve(rl, dev, child, rle->type,
3860 			    rle->rid);
3861 		}
3862 	}
3863 	resource_list_free(rl);
3864 
3865 	device_delete_child(dev, child);
3866 	pci_freecfg(dinfo);
3867 }
3868 
3869 void
3870 pci_delete_resource(device_t dev, device_t child, int type, int rid)
3871 {
3872 	struct pci_devinfo *dinfo;
3873 	struct resource_list *rl;
3874 	struct resource_list_entry *rle;
3875 
3876 	if (device_get_parent(child) != dev)
3877 		return;
3878 
3879 	dinfo = device_get_ivars(child);
3880 	rl = &dinfo->resources;
3881 	rle = resource_list_find(rl, type, rid);
3882 	if (rle == NULL)
3883 		return;
3884 
3885 	if (rle->res) {
3886 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
3887 		    resource_list_busy(rl, type, rid)) {
3888 			device_printf(dev, "delete_resource: "
3889 			    "Resource still owned by child, oops. "
3890 			    "(type=%d, rid=%d, addr=%lx)\n",
3891 			    type, rid, rman_get_start(rle->res));
3892 			return;
3893 		}
3894 
3895 #ifndef __PCI_BAR_ZERO_VALID
3896 		/*
3897 		 * If this is a BAR, clear the BAR so it stops
3898 		 * decoding before releasing the resource.
3899 		 */
3900 		switch (type) {
3901 		case SYS_RES_IOPORT:
3902 		case SYS_RES_MEMORY:
3903 			pci_write_bar(child, rid, 0);
3904 			break;
3905 		}
3906 #endif
3907 		resource_list_unreserve(rl, dev, child, type, rid);
3908 	}
3909 	resource_list_delete(rl, type, rid);
3910 }
3911 
3912 struct resource_list *
3913 pci_get_resource_list (device_t dev, device_t child)
3914 {
3915 	struct pci_devinfo *dinfo = device_get_ivars(child);
3916 
3917 	return (&dinfo->resources);
3918 }
3919 
3920 uint32_t
3921 pci_read_config_method(device_t dev, device_t child, int reg, int width)
3922 {
3923 	struct pci_devinfo *dinfo = device_get_ivars(child);
3924 	pcicfgregs *cfg = &dinfo->cfg;
3925 
3926 	return (PCIB_READ_CONFIG(device_get_parent(dev),
3927 	    cfg->bus, cfg->slot, cfg->func, reg, width));
3928 }
3929 
3930 void
3931 pci_write_config_method(device_t dev, device_t child, int reg,
3932     uint32_t val, int width)
3933 {
3934 	struct pci_devinfo *dinfo = device_get_ivars(child);
3935 	pcicfgregs *cfg = &dinfo->cfg;
3936 
3937 	PCIB_WRITE_CONFIG(device_get_parent(dev),
3938 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3939 }
3940 
3941 int
3942 pci_child_location_str_method(device_t dev, device_t child, char *buf,
3943     size_t buflen)
3944 {
3945 
3946 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3947 	    pci_get_function(child));
3948 	return (0);
3949 }
3950 
3951 int
3952 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3953     size_t buflen)
3954 {
3955 	struct pci_devinfo *dinfo;
3956 	pcicfgregs *cfg;
3957 
3958 	dinfo = device_get_ivars(child);
3959 	cfg = &dinfo->cfg;
3960 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3961 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3962 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3963 	    cfg->progif);
3964 	return (0);
3965 }
3966 
3967 int
3968 pci_assign_interrupt_method(device_t dev, device_t child)
3969 {
3970 	struct pci_devinfo *dinfo = device_get_ivars(child);
3971 	pcicfgregs *cfg = &dinfo->cfg;
3972 
3973 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3974 	    cfg->intpin));
3975 }
3976 
3977 static int
3978 pci_modevent(module_t mod, int what, void *arg)
3979 {
3980 	static struct cdev *pci_cdev;
3981 
3982 	switch (what) {
3983 	case MOD_LOAD:
3984 		STAILQ_INIT(&pci_devq);
3985 		pci_generation = 0;
3986 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3987 		    "pci");
3988 		pci_load_vendor_data();
3989 		break;
3990 
3991 	case MOD_UNLOAD:
3992 		destroy_dev(pci_cdev);
3993 		break;
3994 	}
3995 
3996 	return (0);
3997 }
3998 
3999 void
4000 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4001 {
4002 	int i;
4003 
4004 	/*
4005 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4006 	 * which we know need special treatment.  Type 2 devices are
4007 	 * cardbus bridges which also require special treatment.
4008 	 * Other types are unknown, and we err on the side of safety
4009 	 * by ignoring them.
4010 	 */
4011 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4012 		return;
4013 
4014 	/*
4015 	 * Restore the device to full power mode.  We must do this
4016 	 * before we restore the registers because moving from D3 to
4017 	 * D0 will cause the chip's BARs and some other registers to
4018 	 * be reset to some unknown power on reset values.  Cut down
4019 	 * the noise on boot by doing nothing if we are already in
4020 	 * state D0.
4021 	 */
4022 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
4023 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4024 	}
4025 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4026 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
4027 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
4028 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4029 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4030 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4031 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4032 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4033 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4034 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4035 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4036 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4037 
4038 	/* Restore MSI and MSI-X configurations if they are present. */
4039 	if (dinfo->cfg.msi.msi_location != 0)
4040 		pci_resume_msi(dev);
4041 	if (dinfo->cfg.msix.msix_location != 0)
4042 		pci_resume_msix(dev);
4043 }
4044 
4045 void
4046 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4047 {
4048 	int i;
4049 	uint32_t cls;
4050 	int ps;
4051 
4052 	/*
4053 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4054 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4055 	 * which also require special treatment.  Other types are unknown, and
4056 	 * we err on the side of safety by ignoring them.  Powering down
4057 	 * bridges should not be undertaken lightly.
4058 	 */
4059 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4060 		return;
4061 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4062 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
4063 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
4064 
4065 	/*
4066 	 * Some drivers apparently write to these registers w/o updating our
4067 	 * cached copy.  No harm happens if we update the copy, so do so here
4068 	 * so we can restore them.  The COMMAND register is modified by the
4069 	 * bus w/o updating the cache.  This should represent the normally
4070 	 * writable portion of the 'defined' part of type 0 headers.  In
4071 	 * theory we also need to save/restore the PCI capability structures
4072 	 * we know about, but apart from power we don't know any that are
4073 	 * writable.
4074 	 */
4075 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4076 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4077 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4078 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4079 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4080 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4081 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4082 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4083 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4084 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4085 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4086 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4087 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4088 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4089 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4090 
4091 	/*
4092 	 * don't set the state for display devices, base peripherals and
4093 	 * memory devices since bad things happen when they are powered down.
4094 	 * We should (a) have drivers that can easily detach and (b) use
4095 	 * generic drivers for these devices so that some device actually
4096 	 * attaches.  We need to make sure that when we implement (a) we don't
4097 	 * power the device down on a reattach.
4098 	 */
4099 	cls = pci_get_class(dev);
4100 	if (!setstate)
4101 		return;
4102 	switch (pci_do_power_nodriver)
4103 	{
4104 		case 0:		/* NO powerdown at all */
4105 			return;
4106 		case 1:		/* Conservative about what to power down */
4107 			if (cls == PCIC_STORAGE)
4108 				return;
4109 			/*FALLTHROUGH*/
4110 		case 2:		/* Agressive about what to power down */
4111 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4112 			    cls == PCIC_BASEPERIPH)
4113 				return;
4114 			/*FALLTHROUGH*/
4115 		case 3:		/* Power down everything */
4116 			break;
4117 	}
4118 	/*
4119 	 * PCI spec says we can only go into D3 state from D0 state.
4120 	 * Transition from D[12] into D0 before going to D3 state.
4121 	 */
4122 	ps = pci_get_powerstate(dev);
4123 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4124 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4125 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4126 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4127 }
4128