xref: /freebsd/sys/dev/pci/pci.c (revision 9bd497b8354567454e075076d40c996e21bd6095)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 
56 #if defined(__i386__) || defined(__amd64__)
57 #include <machine/intr_machdep.h>
58 #endif
59 
60 #include <sys/pciio.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/pci/pci_private.h>
64 
65 #include <dev/usb/controller/ehcireg.h>
66 #include <dev/usb/controller/ohcireg.h>
67 #include <dev/usb/controller/uhcireg.h>
68 
69 #include "pcib_if.h"
70 #include "pci_if.h"
71 
72 #ifdef __HAVE_ACPI
73 #include <contrib/dev/acpica/include/acpi.h>
74 #include "acpi_if.h"
75 #else
76 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
77 #endif
78 
79 static pci_addr_t	pci_mapbase(uint64_t mapreg);
80 static const char	*pci_maptype(uint64_t mapreg);
81 static int		pci_mapsize(uint64_t testval);
82 static int		pci_maprange(uint64_t mapreg);
83 static pci_addr_t	pci_rombase(uint64_t mapreg);
84 static int		pci_romsize(uint64_t testval);
85 static void		pci_fixancient(pcicfgregs *cfg);
86 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
87 
88 static int		pci_porten(device_t dev);
89 static int		pci_memen(device_t dev);
90 static void		pci_assign_interrupt(device_t bus, device_t dev,
91 			    int force_route);
92 static int		pci_add_map(device_t bus, device_t dev, int reg,
93 			    struct resource_list *rl, int force, int prefetch);
94 static int		pci_probe(device_t dev);
95 static int		pci_attach(device_t dev);
96 static void		pci_load_vendor_data(void);
97 static int		pci_describe_parse_line(char **ptr, int *vendor,
98 			    int *device, char **desc);
99 static char		*pci_describe_device(device_t dev);
100 static int		pci_modevent(module_t mod, int what, void *arg);
101 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
102 			    pcicfgregs *cfg);
103 static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
104 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
105 			    int reg, uint32_t *data);
106 #if 0
107 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
108 			    int reg, uint32_t data);
109 #endif
110 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
111 static void		pci_disable_msi(device_t dev);
112 static void		pci_enable_msi(device_t dev, uint64_t address,
113 			    uint16_t data);
114 static void		pci_enable_msix(device_t dev, u_int index,
115 			    uint64_t address, uint32_t data);
116 static void		pci_mask_msix(device_t dev, u_int index);
117 static void		pci_unmask_msix(device_t dev, u_int index);
118 static int		pci_msi_blacklisted(void);
119 static void		pci_resume_msi(device_t dev);
120 static void		pci_resume_msix(device_t dev);
121 
122 static device_method_t pci_methods[] = {
123 	/* Device interface */
124 	DEVMETHOD(device_probe,		pci_probe),
125 	DEVMETHOD(device_attach,	pci_attach),
126 	DEVMETHOD(device_detach,	bus_generic_detach),
127 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
128 	DEVMETHOD(device_suspend,	pci_suspend),
129 	DEVMETHOD(device_resume,	pci_resume),
130 
131 	/* Bus interface */
132 	DEVMETHOD(bus_print_child,	pci_print_child),
133 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
134 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
135 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
136 	DEVMETHOD(bus_driver_added,	pci_driver_added),
137 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
138 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
139 
140 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
141 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
142 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
143 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
144 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
145 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
146 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
147 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
148 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
149 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
150 
151 	/* PCI interface */
152 	DEVMETHOD(pci_read_config,	pci_read_config_method),
153 	DEVMETHOD(pci_write_config,	pci_write_config_method),
154 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
155 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
156 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
157 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
158 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
159 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
160 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
161 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
162 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
163 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
164 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
165 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
166 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
167 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
168 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
169 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
170 
171 	{ 0, 0 }
172 };
173 
174 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
175 
176 static devclass_t pci_devclass;
177 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
178 MODULE_VERSION(pci, 1);
179 
180 static char	*pci_vendordata;
181 static size_t	pci_vendordata_size;
182 
183 
184 struct pci_quirk {
185 	uint32_t devid;	/* Vendor/device of the card */
186 	int	type;
187 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
188 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
189 	int	arg1;
190 	int	arg2;
191 };
192 
193 struct pci_quirk pci_quirks[] = {
194 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
195 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
196 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
197 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
198 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
199 
200 	/*
201 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
202 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
203 	 */
204 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
205 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
206 
207 	/*
208 	 * MSI doesn't work on earlier Intel chipsets including
209 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
210 	 */
211 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
212 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
213 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
214 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
215 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
216 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
218 
219 	/*
220 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
221 	 * bridge.
222 	 */
223 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
224 
225 	{ 0 }
226 };
227 
228 /* map register information */
229 #define	PCI_MAPMEM	0x01	/* memory map */
230 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
231 #define	PCI_MAPPORT	0x04	/* port map */
232 
233 struct devlist pci_devq;
234 uint32_t pci_generation;
235 uint32_t pci_numdevs = 0;
236 static int pcie_chipset, pcix_chipset;
237 
238 /* sysctl vars */
239 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
240 
241 static int pci_enable_io_modes = 1;
242 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
243 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
244     &pci_enable_io_modes, 1,
245     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
246 enable these bits correctly.  We'd like to do this all the time, but there\n\
247 are some peripherals that this causes problems with.");
248 
249 static int pci_do_power_nodriver = 0;
250 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
251 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
252     &pci_do_power_nodriver, 0,
253   "Place a function into D3 state when no driver attaches to it.  0 means\n\
254 disable.  1 means conservatively place devices into D3 state.  2 means\n\
255 agressively place devices into D3 state.  3 means put absolutely everything\n\
256 in D3 state.");
257 
258 int pci_do_power_resume = 1;
259 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
260 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
261     &pci_do_power_resume, 1,
262   "Transition from D3 -> D0 on resume.");
263 
264 static int pci_do_msi = 1;
265 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
266 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
267     "Enable support for MSI interrupts");
268 
269 static int pci_do_msix = 1;
270 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
271 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
272     "Enable support for MSI-X interrupts");
273 
274 static int pci_honor_msi_blacklist = 1;
275 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
276 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
277     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
278 
279 #if defined(__i386__) || defined(__amd64__)
280 static int pci_usb_takeover = 1;
281 #else
282 static int pci_usb_takeover = 0;
283 #endif
284 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
285 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RD | CTLFLAG_TUN,
286     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
287 Disable this if you depend on BIOS emulation of USB devices, that is\n\
288 you use USB devices (like keyboard or mouse) but do not load USB drivers");
289 
290 /* Find a device_t by bus/slot/function in domain 0 */
291 
292 device_t
293 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
294 {
295 
296 	return (pci_find_dbsf(0, bus, slot, func));
297 }
298 
299 /* Find a device_t by domain/bus/slot/function */
300 
301 device_t
302 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
303 {
304 	struct pci_devinfo *dinfo;
305 
306 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
307 		if ((dinfo->cfg.domain == domain) &&
308 		    (dinfo->cfg.bus == bus) &&
309 		    (dinfo->cfg.slot == slot) &&
310 		    (dinfo->cfg.func == func)) {
311 			return (dinfo->cfg.dev);
312 		}
313 	}
314 
315 	return (NULL);
316 }
317 
318 /* Find a device_t by vendor/device ID */
319 
320 device_t
321 pci_find_device(uint16_t vendor, uint16_t device)
322 {
323 	struct pci_devinfo *dinfo;
324 
325 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
326 		if ((dinfo->cfg.vendor == vendor) &&
327 		    (dinfo->cfg.device == device)) {
328 			return (dinfo->cfg.dev);
329 		}
330 	}
331 
332 	return (NULL);
333 }
334 
335 static int
336 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
337 {
338 	va_list ap;
339 	int retval;
340 
341 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
342 	    cfg->func);
343 	va_start(ap, fmt);
344 	retval += vprintf(fmt, ap);
345 	va_end(ap);
346 	return (retval);
347 }
348 
349 /* return base address of memory or port map */
350 
351 static pci_addr_t
352 pci_mapbase(uint64_t mapreg)
353 {
354 
355 	if (PCI_BAR_MEM(mapreg))
356 		return (mapreg & PCIM_BAR_MEM_BASE);
357 	else
358 		return (mapreg & PCIM_BAR_IO_BASE);
359 }
360 
361 /* return map type of memory or port map */
362 
363 static const char *
364 pci_maptype(uint64_t mapreg)
365 {
366 
367 	if (PCI_BAR_IO(mapreg))
368 		return ("I/O Port");
369 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
370 		return ("Prefetchable Memory");
371 	return ("Memory");
372 }
373 
374 /* return log2 of map size decoded for memory or port map */
375 
376 static int
377 pci_mapsize(uint64_t testval)
378 {
379 	int ln2size;
380 
381 	testval = pci_mapbase(testval);
382 	ln2size = 0;
383 	if (testval != 0) {
384 		while ((testval & 1) == 0)
385 		{
386 			ln2size++;
387 			testval >>= 1;
388 		}
389 	}
390 	return (ln2size);
391 }
392 
393 /* return base address of device ROM */
394 
395 static pci_addr_t
396 pci_rombase(uint64_t mapreg)
397 {
398 
399 	return (mapreg & PCIM_BIOS_ADDR_MASK);
400 }
401 
402 /* return log2 of map size decided for device ROM */
403 
404 static int
405 pci_romsize(uint64_t testval)
406 {
407 	int ln2size;
408 
409 	testval = pci_rombase(testval);
410 	ln2size = 0;
411 	if (testval != 0) {
412 		while ((testval & 1) == 0)
413 		{
414 			ln2size++;
415 			testval >>= 1;
416 		}
417 	}
418 	return (ln2size);
419 }
420 
421 /* return log2 of address range supported by map register */
422 
423 static int
424 pci_maprange(uint64_t mapreg)
425 {
426 	int ln2range = 0;
427 
428 	if (PCI_BAR_IO(mapreg))
429 		ln2range = 32;
430 	else
431 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
432 		case PCIM_BAR_MEM_32:
433 			ln2range = 32;
434 			break;
435 		case PCIM_BAR_MEM_1MB:
436 			ln2range = 20;
437 			break;
438 		case PCIM_BAR_MEM_64:
439 			ln2range = 64;
440 			break;
441 		}
442 	return (ln2range);
443 }
444 
445 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
446 
447 static void
448 pci_fixancient(pcicfgregs *cfg)
449 {
450 	if (cfg->hdrtype != 0)
451 		return;
452 
453 	/* PCI to PCI bridges use header type 1 */
454 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
455 		cfg->hdrtype = 1;
456 }
457 
458 /* extract header type specific config data */
459 
460 static void
461 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
462 {
463 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
464 	switch (cfg->hdrtype) {
465 	case 0:
466 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
467 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
468 		cfg->nummaps	    = PCI_MAXMAPS_0;
469 		break;
470 	case 1:
471 		cfg->nummaps	    = PCI_MAXMAPS_1;
472 		break;
473 	case 2:
474 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
475 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
476 		cfg->nummaps	    = PCI_MAXMAPS_2;
477 		break;
478 	}
479 #undef REG
480 }
481 
482 /* read configuration header into pcicfgregs structure */
483 struct pci_devinfo *
484 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
485 {
486 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
487 	pcicfgregs *cfg = NULL;
488 	struct pci_devinfo *devlist_entry;
489 	struct devlist *devlist_head;
490 
491 	devlist_head = &pci_devq;
492 
493 	devlist_entry = NULL;
494 
495 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
496 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
497 		if (devlist_entry == NULL)
498 			return (NULL);
499 
500 		cfg = &devlist_entry->cfg;
501 
502 		cfg->domain		= d;
503 		cfg->bus		= b;
504 		cfg->slot		= s;
505 		cfg->func		= f;
506 		cfg->vendor		= REG(PCIR_VENDOR, 2);
507 		cfg->device		= REG(PCIR_DEVICE, 2);
508 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
509 		cfg->statreg		= REG(PCIR_STATUS, 2);
510 		cfg->baseclass		= REG(PCIR_CLASS, 1);
511 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
512 		cfg->progif		= REG(PCIR_PROGIF, 1);
513 		cfg->revid		= REG(PCIR_REVID, 1);
514 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
515 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
516 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
517 		cfg->intpin		= REG(PCIR_INTPIN, 1);
518 		cfg->intline		= REG(PCIR_INTLINE, 1);
519 
520 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
521 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
522 
523 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
524 		cfg->hdrtype		&= ~PCIM_MFDEV;
525 
526 		pci_fixancient(cfg);
527 		pci_hdrtypedata(pcib, b, s, f, cfg);
528 
529 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
530 			pci_read_extcap(pcib, cfg);
531 
532 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
533 
534 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
535 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
536 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
537 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
538 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
539 
540 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
541 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
542 		devlist_entry->conf.pc_vendor = cfg->vendor;
543 		devlist_entry->conf.pc_device = cfg->device;
544 
545 		devlist_entry->conf.pc_class = cfg->baseclass;
546 		devlist_entry->conf.pc_subclass = cfg->subclass;
547 		devlist_entry->conf.pc_progif = cfg->progif;
548 		devlist_entry->conf.pc_revid = cfg->revid;
549 
550 		pci_numdevs++;
551 		pci_generation++;
552 	}
553 	return (devlist_entry);
554 #undef REG
555 }
556 
557 static void
558 pci_read_extcap(device_t pcib, pcicfgregs *cfg)
559 {
560 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
561 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
562 #if defined(__i386__) || defined(__amd64__)
563 	uint64_t addr;
564 #endif
565 	uint32_t val;
566 	int	ptr, nextptr, ptrptr;
567 
568 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
569 	case 0:
570 	case 1:
571 		ptrptr = PCIR_CAP_PTR;
572 		break;
573 	case 2:
574 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
575 		break;
576 	default:
577 		return;		/* no extended capabilities support */
578 	}
579 	nextptr = REG(ptrptr, 1);	/* sanity check? */
580 
581 	/*
582 	 * Read capability entries.
583 	 */
584 	while (nextptr != 0) {
585 		/* Sanity check */
586 		if (nextptr > 255) {
587 			printf("illegal PCI extended capability offset %d\n",
588 			    nextptr);
589 			return;
590 		}
591 		/* Find the next entry */
592 		ptr = nextptr;
593 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
594 
595 		/* Process this entry */
596 		switch (REG(ptr + PCICAP_ID, 1)) {
597 		case PCIY_PMG:		/* PCI power management */
598 			if (cfg->pp.pp_cap == 0) {
599 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
600 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
601 				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
602 				if ((nextptr - ptr) > PCIR_POWER_DATA)
603 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
604 			}
605 			break;
606 #if defined(__i386__) || defined(__amd64__)
607 		case PCIY_HT:		/* HyperTransport */
608 			/* Determine HT-specific capability type. */
609 			val = REG(ptr + PCIR_HT_COMMAND, 2);
610 			switch (val & PCIM_HTCMD_CAP_MASK) {
611 			case PCIM_HTCAP_MSI_MAPPING:
612 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
613 					/* Sanity check the mapping window. */
614 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
615 					    4);
616 					addr <<= 32;
617 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
618 					    4);
619 					if (addr != MSI_INTEL_ADDR_BASE)
620 						device_printf(pcib,
621 	    "HT Bridge at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
622 						    cfg->domain, cfg->bus,
623 						    cfg->slot, cfg->func,
624 						    (long long)addr);
625 				} else
626 					addr = MSI_INTEL_ADDR_BASE;
627 
628 				cfg->ht.ht_msimap = ptr;
629 				cfg->ht.ht_msictrl = val;
630 				cfg->ht.ht_msiaddr = addr;
631 				break;
632 			}
633 			break;
634 #endif
635 		case PCIY_MSI:		/* PCI MSI */
636 			cfg->msi.msi_location = ptr;
637 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
638 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
639 						     PCIM_MSICTRL_MMC_MASK)>>1);
640 			break;
641 		case PCIY_MSIX:		/* PCI MSI-X */
642 			cfg->msix.msix_location = ptr;
643 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
644 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
645 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
646 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
647 			cfg->msix.msix_table_bar = PCIR_BAR(val &
648 			    PCIM_MSIX_BIR_MASK);
649 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
650 			val = REG(ptr + PCIR_MSIX_PBA, 4);
651 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
652 			    PCIM_MSIX_BIR_MASK);
653 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
654 			break;
655 		case PCIY_VPD:		/* PCI Vital Product Data */
656 			cfg->vpd.vpd_reg = ptr;
657 			break;
658 		case PCIY_SUBVENDOR:
659 			/* Should always be true. */
660 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
661 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
662 				cfg->subvendor = val & 0xffff;
663 				cfg->subdevice = val >> 16;
664 			}
665 			break;
666 		case PCIY_PCIX:		/* PCI-X */
667 			/*
668 			 * Assume we have a PCI-X chipset if we have
669 			 * at least one PCI-PCI bridge with a PCI-X
670 			 * capability.  Note that some systems with
671 			 * PCI-express or HT chipsets might match on
672 			 * this check as well.
673 			 */
674 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
675 				pcix_chipset = 1;
676 			break;
677 		case PCIY_EXPRESS:	/* PCI-express */
678 			/*
679 			 * Assume we have a PCI-express chipset if we have
680 			 * at least one PCI-express device.
681 			 */
682 			pcie_chipset = 1;
683 			break;
684 		default:
685 			break;
686 		}
687 	}
688 /* REG and WREG use carry through to next functions */
689 }
690 
691 /*
692  * PCI Vital Product Data
693  */
694 
695 #define	PCI_VPD_TIMEOUT		1000000
696 
697 static int
698 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
699 {
700 	int count = PCI_VPD_TIMEOUT;
701 
702 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
703 
704 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
705 
706 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
707 		if (--count < 0)
708 			return (ENXIO);
709 		DELAY(1);	/* limit looping */
710 	}
711 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
712 
713 	return (0);
714 }
715 
716 #if 0
717 static int
718 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
719 {
720 	int count = PCI_VPD_TIMEOUT;
721 
722 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
723 
724 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
725 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
726 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
727 		if (--count < 0)
728 			return (ENXIO);
729 		DELAY(1);	/* limit looping */
730 	}
731 
732 	return (0);
733 }
734 #endif
735 
736 #undef PCI_VPD_TIMEOUT
737 
738 struct vpd_readstate {
739 	device_t	pcib;
740 	pcicfgregs	*cfg;
741 	uint32_t	val;
742 	int		bytesinval;
743 	int		off;
744 	uint8_t		cksum;
745 };
746 
747 static int
748 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
749 {
750 	uint32_t reg;
751 	uint8_t byte;
752 
753 	if (vrs->bytesinval == 0) {
754 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
755 			return (ENXIO);
756 		vrs->val = le32toh(reg);
757 		vrs->off += 4;
758 		byte = vrs->val & 0xff;
759 		vrs->bytesinval = 3;
760 	} else {
761 		vrs->val = vrs->val >> 8;
762 		byte = vrs->val & 0xff;
763 		vrs->bytesinval--;
764 	}
765 
766 	vrs->cksum += byte;
767 	*data = byte;
768 	return (0);
769 }
770 
771 static void
772 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
773 {
774 	struct vpd_readstate vrs;
775 	int state;
776 	int name;
777 	int remain;
778 	int i;
779 	int alloc, off;		/* alloc/off for RO/W arrays */
780 	int cksumvalid;
781 	int dflen;
782 	uint8_t byte;
783 	uint8_t byte2;
784 
785 	/* init vpd reader */
786 	vrs.bytesinval = 0;
787 	vrs.off = 0;
788 	vrs.pcib = pcib;
789 	vrs.cfg = cfg;
790 	vrs.cksum = 0;
791 
792 	state = 0;
793 	name = remain = i = 0;	/* shut up stupid gcc */
794 	alloc = off = 0;	/* shut up stupid gcc */
795 	dflen = 0;		/* shut up stupid gcc */
796 	cksumvalid = -1;
797 	while (state >= 0) {
798 		if (vpd_nextbyte(&vrs, &byte)) {
799 			state = -2;
800 			break;
801 		}
802 #if 0
803 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
804 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
805 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
806 #endif
807 		switch (state) {
808 		case 0:		/* item name */
809 			if (byte & 0x80) {
810 				if (vpd_nextbyte(&vrs, &byte2)) {
811 					state = -2;
812 					break;
813 				}
814 				remain = byte2;
815 				if (vpd_nextbyte(&vrs, &byte2)) {
816 					state = -2;
817 					break;
818 				}
819 				remain |= byte2 << 8;
820 				if (remain > (0x7f*4 - vrs.off)) {
821 					state = -1;
822 					printf(
823 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
824 					    cfg->domain, cfg->bus, cfg->slot,
825 					    cfg->func, remain);
826 				}
827 				name = byte & 0x7f;
828 			} else {
829 				remain = byte & 0x7;
830 				name = (byte >> 3) & 0xf;
831 			}
832 			switch (name) {
833 			case 0x2:	/* String */
834 				cfg->vpd.vpd_ident = malloc(remain + 1,
835 				    M_DEVBUF, M_WAITOK);
836 				i = 0;
837 				state = 1;
838 				break;
839 			case 0xf:	/* End */
840 				state = -1;
841 				break;
842 			case 0x10:	/* VPD-R */
843 				alloc = 8;
844 				off = 0;
845 				cfg->vpd.vpd_ros = malloc(alloc *
846 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
847 				    M_WAITOK | M_ZERO);
848 				state = 2;
849 				break;
850 			case 0x11:	/* VPD-W */
851 				alloc = 8;
852 				off = 0;
853 				cfg->vpd.vpd_w = malloc(alloc *
854 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
855 				    M_WAITOK | M_ZERO);
856 				state = 5;
857 				break;
858 			default:	/* Invalid data, abort */
859 				state = -1;
860 				break;
861 			}
862 			break;
863 
864 		case 1:	/* Identifier String */
865 			cfg->vpd.vpd_ident[i++] = byte;
866 			remain--;
867 			if (remain == 0)  {
868 				cfg->vpd.vpd_ident[i] = '\0';
869 				state = 0;
870 			}
871 			break;
872 
873 		case 2:	/* VPD-R Keyword Header */
874 			if (off == alloc) {
875 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
876 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
877 				    M_DEVBUF, M_WAITOK | M_ZERO);
878 			}
879 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
880 			if (vpd_nextbyte(&vrs, &byte2)) {
881 				state = -2;
882 				break;
883 			}
884 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
885 			if (vpd_nextbyte(&vrs, &byte2)) {
886 				state = -2;
887 				break;
888 			}
889 			dflen = byte2;
890 			if (dflen == 0 &&
891 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
892 			    2) == 0) {
893 				/*
894 				 * if this happens, we can't trust the rest
895 				 * of the VPD.
896 				 */
897 				printf(
898 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
899 				    cfg->domain, cfg->bus, cfg->slot,
900 				    cfg->func, dflen);
901 				cksumvalid = 0;
902 				state = -1;
903 				break;
904 			} else if (dflen == 0) {
905 				cfg->vpd.vpd_ros[off].value = malloc(1 *
906 				    sizeof(*cfg->vpd.vpd_ros[off].value),
907 				    M_DEVBUF, M_WAITOK);
908 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
909 			} else
910 				cfg->vpd.vpd_ros[off].value = malloc(
911 				    (dflen + 1) *
912 				    sizeof(*cfg->vpd.vpd_ros[off].value),
913 				    M_DEVBUF, M_WAITOK);
914 			remain -= 3;
915 			i = 0;
916 			/* keep in sync w/ state 3's transistions */
917 			if (dflen == 0 && remain == 0)
918 				state = 0;
919 			else if (dflen == 0)
920 				state = 2;
921 			else
922 				state = 3;
923 			break;
924 
925 		case 3:	/* VPD-R Keyword Value */
926 			cfg->vpd.vpd_ros[off].value[i++] = byte;
927 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
928 			    "RV", 2) == 0 && cksumvalid == -1) {
929 				if (vrs.cksum == 0)
930 					cksumvalid = 1;
931 				else {
932 					if (bootverbose)
933 						printf(
934 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
935 						    cfg->domain, cfg->bus,
936 						    cfg->slot, cfg->func,
937 						    vrs.cksum);
938 					cksumvalid = 0;
939 					state = -1;
940 					break;
941 				}
942 			}
943 			dflen--;
944 			remain--;
945 			/* keep in sync w/ state 2's transistions */
946 			if (dflen == 0)
947 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
948 			if (dflen == 0 && remain == 0) {
949 				cfg->vpd.vpd_rocnt = off;
950 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
951 				    off * sizeof(*cfg->vpd.vpd_ros),
952 				    M_DEVBUF, M_WAITOK | M_ZERO);
953 				state = 0;
954 			} else if (dflen == 0)
955 				state = 2;
956 			break;
957 
958 		case 4:
959 			remain--;
960 			if (remain == 0)
961 				state = 0;
962 			break;
963 
964 		case 5:	/* VPD-W Keyword Header */
965 			if (off == alloc) {
966 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
967 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
968 				    M_DEVBUF, M_WAITOK | M_ZERO);
969 			}
970 			cfg->vpd.vpd_w[off].keyword[0] = byte;
971 			if (vpd_nextbyte(&vrs, &byte2)) {
972 				state = -2;
973 				break;
974 			}
975 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
976 			if (vpd_nextbyte(&vrs, &byte2)) {
977 				state = -2;
978 				break;
979 			}
980 			cfg->vpd.vpd_w[off].len = dflen = byte2;
981 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
982 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
983 			    sizeof(*cfg->vpd.vpd_w[off].value),
984 			    M_DEVBUF, M_WAITOK);
985 			remain -= 3;
986 			i = 0;
987 			/* keep in sync w/ state 6's transistions */
988 			if (dflen == 0 && remain == 0)
989 				state = 0;
990 			else if (dflen == 0)
991 				state = 5;
992 			else
993 				state = 6;
994 			break;
995 
996 		case 6:	/* VPD-W Keyword Value */
997 			cfg->vpd.vpd_w[off].value[i++] = byte;
998 			dflen--;
999 			remain--;
1000 			/* keep in sync w/ state 5's transistions */
1001 			if (dflen == 0)
1002 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1003 			if (dflen == 0 && remain == 0) {
1004 				cfg->vpd.vpd_wcnt = off;
1005 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1006 				    off * sizeof(*cfg->vpd.vpd_w),
1007 				    M_DEVBUF, M_WAITOK | M_ZERO);
1008 				state = 0;
1009 			} else if (dflen == 0)
1010 				state = 5;
1011 			break;
1012 
1013 		default:
1014 			printf("pci%d:%d:%d:%d: invalid state: %d\n",
1015 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1016 			    state);
1017 			state = -1;
1018 			break;
1019 		}
1020 	}
1021 
1022 	if (cksumvalid == 0 || state < -1) {
1023 		/* read-only data bad, clean up */
1024 		if (cfg->vpd.vpd_ros != NULL) {
1025 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1026 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1027 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1028 			cfg->vpd.vpd_ros = NULL;
1029 		}
1030 	}
1031 	if (state < -1) {
1032 		/* I/O error, clean up */
1033 		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1034 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1035 		if (cfg->vpd.vpd_ident != NULL) {
1036 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1037 			cfg->vpd.vpd_ident = NULL;
1038 		}
1039 		if (cfg->vpd.vpd_w != NULL) {
1040 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1041 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1042 			free(cfg->vpd.vpd_w, M_DEVBUF);
1043 			cfg->vpd.vpd_w = NULL;
1044 		}
1045 	}
1046 	cfg->vpd.vpd_cached = 1;
1047 #undef REG
1048 #undef WREG
1049 }
1050 
1051 int
1052 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1053 {
1054 	struct pci_devinfo *dinfo = device_get_ivars(child);
1055 	pcicfgregs *cfg = &dinfo->cfg;
1056 
1057 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1058 		pci_read_vpd(device_get_parent(dev), cfg);
1059 
1060 	*identptr = cfg->vpd.vpd_ident;
1061 
1062 	if (*identptr == NULL)
1063 		return (ENXIO);
1064 
1065 	return (0);
1066 }
1067 
1068 int
1069 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1070 	const char **vptr)
1071 {
1072 	struct pci_devinfo *dinfo = device_get_ivars(child);
1073 	pcicfgregs *cfg = &dinfo->cfg;
1074 	int i;
1075 
1076 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1077 		pci_read_vpd(device_get_parent(dev), cfg);
1078 
1079 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1080 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1081 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1082 			*vptr = cfg->vpd.vpd_ros[i].value;
1083 		}
1084 
1085 	if (i != cfg->vpd.vpd_rocnt)
1086 		return (0);
1087 
1088 	*vptr = NULL;
1089 	return (ENXIO);
1090 }
1091 
1092 /*
1093  * Find the requested extended capability and return the offset in
1094  * configuration space via the pointer provided. The function returns
1095  * 0 on success and error code otherwise.
1096  */
1097 int
1098 pci_find_extcap_method(device_t dev, device_t child, int capability,
1099     int *capreg)
1100 {
1101 	struct pci_devinfo *dinfo = device_get_ivars(child);
1102 	pcicfgregs *cfg = &dinfo->cfg;
1103 	u_int32_t status;
1104 	u_int8_t ptr;
1105 
1106 	/*
1107 	 * Check the CAP_LIST bit of the PCI status register first.
1108 	 */
1109 	status = pci_read_config(child, PCIR_STATUS, 2);
1110 	if (!(status & PCIM_STATUS_CAPPRESENT))
1111 		return (ENXIO);
1112 
1113 	/*
1114 	 * Determine the start pointer of the capabilities list.
1115 	 */
1116 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1117 	case 0:
1118 	case 1:
1119 		ptr = PCIR_CAP_PTR;
1120 		break;
1121 	case 2:
1122 		ptr = PCIR_CAP_PTR_2;
1123 		break;
1124 	default:
1125 		/* XXX: panic? */
1126 		return (ENXIO);		/* no extended capabilities support */
1127 	}
1128 	ptr = pci_read_config(child, ptr, 1);
1129 
1130 	/*
1131 	 * Traverse the capabilities list.
1132 	 */
1133 	while (ptr != 0) {
1134 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1135 			if (capreg != NULL)
1136 				*capreg = ptr;
1137 			return (0);
1138 		}
1139 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1140 	}
1141 
1142 	return (ENOENT);
1143 }
1144 
1145 /*
1146  * Support for MSI-X message interrupts.
1147  */
1148 void
1149 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1150 {
1151 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1152 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1153 	uint32_t offset;
1154 
1155 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1156 	offset = msix->msix_table_offset + index * 16;
1157 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1158 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1159 	bus_write_4(msix->msix_table_res, offset + 8, data);
1160 
1161 	/* Enable MSI -> HT mapping. */
1162 	pci_ht_map_msi(dev, address);
1163 }
1164 
1165 void
1166 pci_mask_msix(device_t dev, u_int index)
1167 {
1168 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1169 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1170 	uint32_t offset, val;
1171 
1172 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1173 	offset = msix->msix_table_offset + index * 16 + 12;
1174 	val = bus_read_4(msix->msix_table_res, offset);
1175 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1176 		val |= PCIM_MSIX_VCTRL_MASK;
1177 		bus_write_4(msix->msix_table_res, offset, val);
1178 	}
1179 }
1180 
1181 void
1182 pci_unmask_msix(device_t dev, u_int index)
1183 {
1184 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1185 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1186 	uint32_t offset, val;
1187 
1188 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1189 	offset = msix->msix_table_offset + index * 16 + 12;
1190 	val = bus_read_4(msix->msix_table_res, offset);
1191 	if (val & PCIM_MSIX_VCTRL_MASK) {
1192 		val &= ~PCIM_MSIX_VCTRL_MASK;
1193 		bus_write_4(msix->msix_table_res, offset, val);
1194 	}
1195 }
1196 
1197 int
1198 pci_pending_msix(device_t dev, u_int index)
1199 {
1200 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1201 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1202 	uint32_t offset, bit;
1203 
1204 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1205 	offset = msix->msix_pba_offset + (index / 32) * 4;
1206 	bit = 1 << index % 32;
1207 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1208 }
1209 
1210 /*
1211  * Restore MSI-X registers and table during resume.  If MSI-X is
1212  * enabled then walk the virtual table to restore the actual MSI-X
1213  * table.
1214  */
1215 static void
1216 pci_resume_msix(device_t dev)
1217 {
1218 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1219 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1220 	struct msix_table_entry *mte;
1221 	struct msix_vector *mv;
1222 	int i;
1223 
1224 	if (msix->msix_alloc > 0) {
1225 		/* First, mask all vectors. */
1226 		for (i = 0; i < msix->msix_msgnum; i++)
1227 			pci_mask_msix(dev, i);
1228 
1229 		/* Second, program any messages with at least one handler. */
1230 		for (i = 0; i < msix->msix_table_len; i++) {
1231 			mte = &msix->msix_table[i];
1232 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1233 				continue;
1234 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1235 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1236 			pci_unmask_msix(dev, i);
1237 		}
1238 	}
1239 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1240 	    msix->msix_ctrl, 2);
1241 }
1242 
1243 /*
1244  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1245  * returned in *count.  After this function returns, each message will be
1246  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1247  */
1248 int
1249 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1250 {
1251 	struct pci_devinfo *dinfo = device_get_ivars(child);
1252 	pcicfgregs *cfg = &dinfo->cfg;
1253 	struct resource_list_entry *rle;
1254 	int actual, error, i, irq, max;
1255 
1256 	/* Don't let count == 0 get us into trouble. */
1257 	if (*count == 0)
1258 		return (EINVAL);
1259 
1260 	/* If rid 0 is allocated, then fail. */
1261 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1262 	if (rle != NULL && rle->res != NULL)
1263 		return (ENXIO);
1264 
1265 	/* Already have allocated messages? */
1266 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1267 		return (ENXIO);
1268 
1269 	/* If MSI is blacklisted for this system, fail. */
1270 	if (pci_msi_blacklisted())
1271 		return (ENXIO);
1272 
1273 	/* MSI-X capability present? */
1274 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1275 		return (ENODEV);
1276 
1277 	/* Make sure the appropriate BARs are mapped. */
1278 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1279 	    cfg->msix.msix_table_bar);
1280 	if (rle == NULL || rle->res == NULL ||
1281 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1282 		return (ENXIO);
1283 	cfg->msix.msix_table_res = rle->res;
1284 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1285 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1286 		    cfg->msix.msix_pba_bar);
1287 		if (rle == NULL || rle->res == NULL ||
1288 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1289 			return (ENXIO);
1290 	}
1291 	cfg->msix.msix_pba_res = rle->res;
1292 
1293 	if (bootverbose)
1294 		device_printf(child,
1295 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1296 		    *count, cfg->msix.msix_msgnum);
1297 	max = min(*count, cfg->msix.msix_msgnum);
1298 	for (i = 0; i < max; i++) {
1299 		/* Allocate a message. */
1300 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1301 		if (error)
1302 			break;
1303 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1304 		    irq, 1);
1305 	}
1306 	actual = i;
1307 
1308 	if (bootverbose) {
1309 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1310 		if (actual == 1)
1311 			device_printf(child, "using IRQ %lu for MSI-X\n",
1312 			    rle->start);
1313 		else {
1314 			int run;
1315 
1316 			/*
1317 			 * Be fancy and try to print contiguous runs of
1318 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1319 			 * 'run' is true if we are in a range.
1320 			 */
1321 			device_printf(child, "using IRQs %lu", rle->start);
1322 			irq = rle->start;
1323 			run = 0;
1324 			for (i = 1; i < actual; i++) {
1325 				rle = resource_list_find(&dinfo->resources,
1326 				    SYS_RES_IRQ, i + 1);
1327 
1328 				/* Still in a run? */
1329 				if (rle->start == irq + 1) {
1330 					run = 1;
1331 					irq++;
1332 					continue;
1333 				}
1334 
1335 				/* Finish previous range. */
1336 				if (run) {
1337 					printf("-%d", irq);
1338 					run = 0;
1339 				}
1340 
1341 				/* Start new range. */
1342 				printf(",%lu", rle->start);
1343 				irq = rle->start;
1344 			}
1345 
1346 			/* Unfinished range? */
1347 			if (run)
1348 				printf("-%d", irq);
1349 			printf(" for MSI-X\n");
1350 		}
1351 	}
1352 
1353 	/* Mask all vectors. */
1354 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1355 		pci_mask_msix(child, i);
1356 
1357 	/* Allocate and initialize vector data and virtual table. */
1358 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1359 	    M_DEVBUF, M_WAITOK | M_ZERO);
1360 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1361 	    M_DEVBUF, M_WAITOK | M_ZERO);
1362 	for (i = 0; i < actual; i++) {
1363 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1364 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1365 		cfg->msix.msix_table[i].mte_vector = i + 1;
1366 	}
1367 
1368 	/* Update control register to enable MSI-X. */
1369 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1370 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1371 	    cfg->msix.msix_ctrl, 2);
1372 
1373 	/* Update counts of alloc'd messages. */
1374 	cfg->msix.msix_alloc = actual;
1375 	cfg->msix.msix_table_len = actual;
1376 	*count = actual;
1377 	return (0);
1378 }
1379 
1380 /*
1381  * By default, pci_alloc_msix() will assign the allocated IRQ
1382  * resources consecutively to the first N messages in the MSI-X table.
1383  * However, device drivers may want to use different layouts if they
1384  * either receive fewer messages than they asked for, or they wish to
1385  * populate the MSI-X table sparsely.  This method allows the driver
1386  * to specify what layout it wants.  It must be called after a
1387  * successful pci_alloc_msix() but before any of the associated
1388  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1389  *
1390  * The 'vectors' array contains 'count' message vectors.  The array
1391  * maps directly to the MSI-X table in that index 0 in the array
1392  * specifies the vector for the first message in the MSI-X table, etc.
1393  * The vector value in each array index can either be 0 to indicate
1394  * that no vector should be assigned to a message slot, or it can be a
1395  * number from 1 to N (where N is the count returned from a
1396  * succcessful call to pci_alloc_msix()) to indicate which message
1397  * vector (IRQ) to be used for the corresponding message.
1398  *
1399  * On successful return, each message with a non-zero vector will have
1400  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1401  * 1.  Additionally, if any of the IRQs allocated via the previous
1402  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1403  * will be freed back to the system automatically.
1404  *
1405  * For example, suppose a driver has a MSI-X table with 6 messages and
1406  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1407  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1408  * C.  After the call to pci_alloc_msix(), the device will be setup to
1409  * have an MSI-X table of ABC--- (where - means no vector assigned).
1410  * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1411  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1412  * be freed back to the system.  This device will also have valid
1413  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1414  *
1415  * In any case, the SYS_RES_IRQ rid X will always map to the message
1416  * at MSI-X table index X - 1 and will only be valid if a vector is
1417  * assigned to that table entry.
1418  */
1419 int
1420 pci_remap_msix_method(device_t dev, device_t child, int count,
1421     const u_int *vectors)
1422 {
1423 	struct pci_devinfo *dinfo = device_get_ivars(child);
1424 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1425 	struct resource_list_entry *rle;
1426 	int i, irq, j, *used;
1427 
1428 	/*
1429 	 * Have to have at least one message in the table but the
1430 	 * table can't be bigger than the actual MSI-X table in the
1431 	 * device.
1432 	 */
1433 	if (count == 0 || count > msix->msix_msgnum)
1434 		return (EINVAL);
1435 
1436 	/* Sanity check the vectors. */
1437 	for (i = 0; i < count; i++)
1438 		if (vectors[i] > msix->msix_alloc)
1439 			return (EINVAL);
1440 
1441 	/*
1442 	 * Make sure there aren't any holes in the vectors to be used.
1443 	 * It's a big pain to support it, and it doesn't really make
1444 	 * sense anyway.  Also, at least one vector must be used.
1445 	 */
1446 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1447 	    M_ZERO);
1448 	for (i = 0; i < count; i++)
1449 		if (vectors[i] != 0)
1450 			used[vectors[i] - 1] = 1;
1451 	for (i = 0; i < msix->msix_alloc - 1; i++)
1452 		if (used[i] == 0 && used[i + 1] == 1) {
1453 			free(used, M_DEVBUF);
1454 			return (EINVAL);
1455 		}
1456 	if (used[0] != 1) {
1457 		free(used, M_DEVBUF);
1458 		return (EINVAL);
1459 	}
1460 
1461 	/* Make sure none of the resources are allocated. */
1462 	for (i = 0; i < msix->msix_table_len; i++) {
1463 		if (msix->msix_table[i].mte_vector == 0)
1464 			continue;
1465 		if (msix->msix_table[i].mte_handlers > 0)
1466 			return (EBUSY);
1467 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1468 		KASSERT(rle != NULL, ("missing resource"));
1469 		if (rle->res != NULL)
1470 			return (EBUSY);
1471 	}
1472 
1473 	/* Free the existing resource list entries. */
1474 	for (i = 0; i < msix->msix_table_len; i++) {
1475 		if (msix->msix_table[i].mte_vector == 0)
1476 			continue;
1477 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1478 	}
1479 
1480 	/*
1481 	 * Build the new virtual table keeping track of which vectors are
1482 	 * used.
1483 	 */
1484 	free(msix->msix_table, M_DEVBUF);
1485 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1486 	    M_DEVBUF, M_WAITOK | M_ZERO);
1487 	for (i = 0; i < count; i++)
1488 		msix->msix_table[i].mte_vector = vectors[i];
1489 	msix->msix_table_len = count;
1490 
1491 	/* Free any unused IRQs and resize the vectors array if necessary. */
1492 	j = msix->msix_alloc - 1;
1493 	if (used[j] == 0) {
1494 		struct msix_vector *vec;
1495 
1496 		while (used[j] == 0) {
1497 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1498 			    msix->msix_vectors[j].mv_irq);
1499 			j--;
1500 		}
1501 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1502 		    M_WAITOK);
1503 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1504 		    (j + 1));
1505 		free(msix->msix_vectors, M_DEVBUF);
1506 		msix->msix_vectors = vec;
1507 		msix->msix_alloc = j + 1;
1508 	}
1509 	free(used, M_DEVBUF);
1510 
1511 	/* Map the IRQs onto the rids. */
1512 	for (i = 0; i < count; i++) {
1513 		if (vectors[i] == 0)
1514 			continue;
1515 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1516 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1517 		    irq, 1);
1518 	}
1519 
1520 	if (bootverbose) {
1521 		device_printf(child, "Remapped MSI-X IRQs as: ");
1522 		for (i = 0; i < count; i++) {
1523 			if (i != 0)
1524 				printf(", ");
1525 			if (vectors[i] == 0)
1526 				printf("---");
1527 			else
1528 				printf("%d",
1529 				    msix->msix_vectors[vectors[i]].mv_irq);
1530 		}
1531 		printf("\n");
1532 	}
1533 
1534 	return (0);
1535 }
1536 
1537 static int
1538 pci_release_msix(device_t dev, device_t child)
1539 {
1540 	struct pci_devinfo *dinfo = device_get_ivars(child);
1541 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1542 	struct resource_list_entry *rle;
1543 	int i;
1544 
1545 	/* Do we have any messages to release? */
1546 	if (msix->msix_alloc == 0)
1547 		return (ENODEV);
1548 
1549 	/* Make sure none of the resources are allocated. */
1550 	for (i = 0; i < msix->msix_table_len; i++) {
1551 		if (msix->msix_table[i].mte_vector == 0)
1552 			continue;
1553 		if (msix->msix_table[i].mte_handlers > 0)
1554 			return (EBUSY);
1555 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1556 		KASSERT(rle != NULL, ("missing resource"));
1557 		if (rle->res != NULL)
1558 			return (EBUSY);
1559 	}
1560 
1561 	/* Update control register to disable MSI-X. */
1562 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1563 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1564 	    msix->msix_ctrl, 2);
1565 
1566 	/* Free the resource list entries. */
1567 	for (i = 0; i < msix->msix_table_len; i++) {
1568 		if (msix->msix_table[i].mte_vector == 0)
1569 			continue;
1570 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1571 	}
1572 	free(msix->msix_table, M_DEVBUF);
1573 	msix->msix_table_len = 0;
1574 
1575 	/* Release the IRQs. */
1576 	for (i = 0; i < msix->msix_alloc; i++)
1577 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1578 		    msix->msix_vectors[i].mv_irq);
1579 	free(msix->msix_vectors, M_DEVBUF);
1580 	msix->msix_alloc = 0;
1581 	return (0);
1582 }
1583 
1584 /*
1585  * Return the max supported MSI-X messages this device supports.
1586  * Basically, assuming the MD code can alloc messages, this function
1587  * should return the maximum value that pci_alloc_msix() can return.
1588  * Thus, it is subject to the tunables, etc.
1589  */
1590 int
1591 pci_msix_count_method(device_t dev, device_t child)
1592 {
1593 	struct pci_devinfo *dinfo = device_get_ivars(child);
1594 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1595 
1596 	if (pci_do_msix && msix->msix_location != 0)
1597 		return (msix->msix_msgnum);
1598 	return (0);
1599 }
1600 
1601 /*
1602  * HyperTransport MSI mapping control
1603  */
1604 void
1605 pci_ht_map_msi(device_t dev, uint64_t addr)
1606 {
1607 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1608 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1609 
1610 	if (!ht->ht_msimap)
1611 		return;
1612 
1613 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1614 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1615 		/* Enable MSI -> HT mapping. */
1616 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1617 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1618 		    ht->ht_msictrl, 2);
1619 	}
1620 
1621 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1622 		/* Disable MSI -> HT mapping. */
1623 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1624 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1625 		    ht->ht_msictrl, 2);
1626 	}
1627 }
1628 
1629 /*
1630  * Support for MSI message signalled interrupts.
1631  */
1632 void
1633 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1634 {
1635 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1636 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1637 
1638 	/* Write data and address values. */
1639 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1640 	    address & 0xffffffff, 4);
1641 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1642 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1643 		    address >> 32, 4);
1644 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1645 		    data, 2);
1646 	} else
1647 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1648 		    2);
1649 
1650 	/* Enable MSI in the control register. */
1651 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1652 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1653 	    2);
1654 
1655 	/* Enable MSI -> HT mapping. */
1656 	pci_ht_map_msi(dev, address);
1657 }
1658 
1659 void
1660 pci_disable_msi(device_t dev)
1661 {
1662 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1663 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1664 
1665 	/* Disable MSI -> HT mapping. */
1666 	pci_ht_map_msi(dev, 0);
1667 
1668 	/* Disable MSI in the control register. */
1669 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1670 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1671 	    2);
1672 }
1673 
1674 /*
1675  * Restore MSI registers during resume.  If MSI is enabled then
1676  * restore the data and address registers in addition to the control
1677  * register.
1678  */
1679 static void
1680 pci_resume_msi(device_t dev)
1681 {
1682 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1683 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1684 	uint64_t address;
1685 	uint16_t data;
1686 
1687 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1688 		address = msi->msi_addr;
1689 		data = msi->msi_data;
1690 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1691 		    address & 0xffffffff, 4);
1692 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1693 			pci_write_config(dev, msi->msi_location +
1694 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1695 			pci_write_config(dev, msi->msi_location +
1696 			    PCIR_MSI_DATA_64BIT, data, 2);
1697 		} else
1698 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1699 			    data, 2);
1700 	}
1701 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1702 	    2);
1703 }
1704 
1705 int
1706 pci_remap_msi_irq(device_t dev, u_int irq)
1707 {
1708 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1709 	pcicfgregs *cfg = &dinfo->cfg;
1710 	struct resource_list_entry *rle;
1711 	struct msix_table_entry *mte;
1712 	struct msix_vector *mv;
1713 	device_t bus;
1714 	uint64_t addr;
1715 	uint32_t data;
1716 	int error, i, j;
1717 
1718 	bus = device_get_parent(dev);
1719 
1720 	/*
1721 	 * Handle MSI first.  We try to find this IRQ among our list
1722 	 * of MSI IRQs.  If we find it, we request updated address and
1723 	 * data registers and apply the results.
1724 	 */
1725 	if (cfg->msi.msi_alloc > 0) {
1726 
1727 		/* If we don't have any active handlers, nothing to do. */
1728 		if (cfg->msi.msi_handlers == 0)
1729 			return (0);
1730 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1731 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1732 			    i + 1);
1733 			if (rle->start == irq) {
1734 				error = PCIB_MAP_MSI(device_get_parent(bus),
1735 				    dev, irq, &addr, &data);
1736 				if (error)
1737 					return (error);
1738 				pci_disable_msi(dev);
1739 				dinfo->cfg.msi.msi_addr = addr;
1740 				dinfo->cfg.msi.msi_data = data;
1741 				pci_enable_msi(dev, addr, data);
1742 				return (0);
1743 			}
1744 		}
1745 		return (ENOENT);
1746 	}
1747 
1748 	/*
1749 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1750 	 * we request the updated mapping info.  If that works, we go
1751 	 * through all the slots that use this IRQ and update them.
1752 	 */
1753 	if (cfg->msix.msix_alloc > 0) {
1754 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1755 			mv = &cfg->msix.msix_vectors[i];
1756 			if (mv->mv_irq == irq) {
1757 				error = PCIB_MAP_MSI(device_get_parent(bus),
1758 				    dev, irq, &addr, &data);
1759 				if (error)
1760 					return (error);
1761 				mv->mv_address = addr;
1762 				mv->mv_data = data;
1763 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1764 					mte = &cfg->msix.msix_table[j];
1765 					if (mte->mte_vector != i + 1)
1766 						continue;
1767 					if (mte->mte_handlers == 0)
1768 						continue;
1769 					pci_mask_msix(dev, j);
1770 					pci_enable_msix(dev, j, addr, data);
1771 					pci_unmask_msix(dev, j);
1772 				}
1773 			}
1774 		}
1775 		return (ENOENT);
1776 	}
1777 
1778 	return (ENOENT);
1779 }
1780 
1781 /*
1782  * Returns true if the specified device is blacklisted because MSI
1783  * doesn't work.
1784  */
1785 int
1786 pci_msi_device_blacklisted(device_t dev)
1787 {
1788 	struct pci_quirk *q;
1789 
1790 	if (!pci_honor_msi_blacklist)
1791 		return (0);
1792 
1793 	for (q = &pci_quirks[0]; q->devid; q++) {
1794 		if (q->devid == pci_get_devid(dev) &&
1795 		    q->type == PCI_QUIRK_DISABLE_MSI)
1796 			return (1);
1797 	}
1798 	return (0);
1799 }
1800 
1801 /*
1802  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1803  * we just check for blacklisted chipsets as represented by the
1804  * host-PCI bridge at device 0:0:0.  In the future, it may become
1805  * necessary to check other system attributes, such as the kenv values
1806  * that give the motherboard manufacturer and model number.
1807  */
1808 static int
1809 pci_msi_blacklisted(void)
1810 {
1811 	device_t dev;
1812 
1813 	if (!pci_honor_msi_blacklist)
1814 		return (0);
1815 
1816 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1817 	if (!(pcie_chipset || pcix_chipset))
1818 		return (1);
1819 
1820 	dev = pci_find_bsf(0, 0, 0);
1821 	if (dev != NULL)
1822 		return (pci_msi_device_blacklisted(dev));
1823 	return (0);
1824 }
1825 
1826 /*
1827  * Attempt to allocate *count MSI messages.  The actual number allocated is
1828  * returned in *count.  After this function returns, each message will be
1829  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1830  */
1831 int
1832 pci_alloc_msi_method(device_t dev, device_t child, int *count)
1833 {
1834 	struct pci_devinfo *dinfo = device_get_ivars(child);
1835 	pcicfgregs *cfg = &dinfo->cfg;
1836 	struct resource_list_entry *rle;
1837 	int actual, error, i, irqs[32];
1838 	uint16_t ctrl;
1839 
1840 	/* Don't let count == 0 get us into trouble. */
1841 	if (*count == 0)
1842 		return (EINVAL);
1843 
1844 	/* If rid 0 is allocated, then fail. */
1845 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1846 	if (rle != NULL && rle->res != NULL)
1847 		return (ENXIO);
1848 
1849 	/* Already have allocated messages? */
1850 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1851 		return (ENXIO);
1852 
1853 	/* If MSI is blacklisted for this system, fail. */
1854 	if (pci_msi_blacklisted())
1855 		return (ENXIO);
1856 
1857 	/* MSI capability present? */
1858 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1859 		return (ENODEV);
1860 
1861 	if (bootverbose)
1862 		device_printf(child,
1863 		    "attempting to allocate %d MSI vectors (%d supported)\n",
1864 		    *count, cfg->msi.msi_msgnum);
1865 
1866 	/* Don't ask for more than the device supports. */
1867 	actual = min(*count, cfg->msi.msi_msgnum);
1868 
1869 	/* Don't ask for more than 32 messages. */
1870 	actual = min(actual, 32);
1871 
1872 	/* MSI requires power of 2 number of messages. */
1873 	if (!powerof2(actual))
1874 		return (EINVAL);
1875 
1876 	for (;;) {
1877 		/* Try to allocate N messages. */
1878 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1879 		    cfg->msi.msi_msgnum, irqs);
1880 		if (error == 0)
1881 			break;
1882 		if (actual == 1)
1883 			return (error);
1884 
1885 		/* Try N / 2. */
1886 		actual >>= 1;
1887 	}
1888 
1889 	/*
1890 	 * We now have N actual messages mapped onto SYS_RES_IRQ
1891 	 * resources in the irqs[] array, so add new resources
1892 	 * starting at rid 1.
1893 	 */
1894 	for (i = 0; i < actual; i++)
1895 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1896 		    irqs[i], irqs[i], 1);
1897 
1898 	if (bootverbose) {
1899 		if (actual == 1)
1900 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1901 		else {
1902 			int run;
1903 
1904 			/*
1905 			 * Be fancy and try to print contiguous runs
1906 			 * of IRQ values as ranges.  'run' is true if
1907 			 * we are in a range.
1908 			 */
1909 			device_printf(child, "using IRQs %d", irqs[0]);
1910 			run = 0;
1911 			for (i = 1; i < actual; i++) {
1912 
1913 				/* Still in a run? */
1914 				if (irqs[i] == irqs[i - 1] + 1) {
1915 					run = 1;
1916 					continue;
1917 				}
1918 
1919 				/* Finish previous range. */
1920 				if (run) {
1921 					printf("-%d", irqs[i - 1]);
1922 					run = 0;
1923 				}
1924 
1925 				/* Start new range. */
1926 				printf(",%d", irqs[i]);
1927 			}
1928 
1929 			/* Unfinished range? */
1930 			if (run)
1931 				printf("-%d", irqs[actual - 1]);
1932 			printf(" for MSI\n");
1933 		}
1934 	}
1935 
1936 	/* Update control register with actual count. */
1937 	ctrl = cfg->msi.msi_ctrl;
1938 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1939 	ctrl |= (ffs(actual) - 1) << 4;
1940 	cfg->msi.msi_ctrl = ctrl;
1941 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1942 
1943 	/* Update counts of alloc'd messages. */
1944 	cfg->msi.msi_alloc = actual;
1945 	cfg->msi.msi_handlers = 0;
1946 	*count = actual;
1947 	return (0);
1948 }
1949 
1950 /* Release the MSI messages associated with this device. */
1951 int
1952 pci_release_msi_method(device_t dev, device_t child)
1953 {
1954 	struct pci_devinfo *dinfo = device_get_ivars(child);
1955 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1956 	struct resource_list_entry *rle;
1957 	int error, i, irqs[32];
1958 
1959 	/* Try MSI-X first. */
1960 	error = pci_release_msix(dev, child);
1961 	if (error != ENODEV)
1962 		return (error);
1963 
1964 	/* Do we have any messages to release? */
1965 	if (msi->msi_alloc == 0)
1966 		return (ENODEV);
1967 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
1968 
1969 	/* Make sure none of the resources are allocated. */
1970 	if (msi->msi_handlers > 0)
1971 		return (EBUSY);
1972 	for (i = 0; i < msi->msi_alloc; i++) {
1973 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1974 		KASSERT(rle != NULL, ("missing MSI resource"));
1975 		if (rle->res != NULL)
1976 			return (EBUSY);
1977 		irqs[i] = rle->start;
1978 	}
1979 
1980 	/* Update control register with 0 count. */
1981 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
1982 	    ("%s: MSI still enabled", __func__));
1983 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
1984 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1985 	    msi->msi_ctrl, 2);
1986 
1987 	/* Release the messages. */
1988 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
1989 	for (i = 0; i < msi->msi_alloc; i++)
1990 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1991 
1992 	/* Update alloc count. */
1993 	msi->msi_alloc = 0;
1994 	msi->msi_addr = 0;
1995 	msi->msi_data = 0;
1996 	return (0);
1997 }
1998 
1999 /*
2000  * Return the max supported MSI messages this device supports.
2001  * Basically, assuming the MD code can alloc messages, this function
2002  * should return the maximum value that pci_alloc_msi() can return.
2003  * Thus, it is subject to the tunables, etc.
2004  */
2005 int
2006 pci_msi_count_method(device_t dev, device_t child)
2007 {
2008 	struct pci_devinfo *dinfo = device_get_ivars(child);
2009 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2010 
2011 	if (pci_do_msi && msi->msi_location != 0)
2012 		return (msi->msi_msgnum);
2013 	return (0);
2014 }
2015 
2016 /* free pcicfgregs structure and all depending data structures */
2017 
2018 int
2019 pci_freecfg(struct pci_devinfo *dinfo)
2020 {
2021 	struct devlist *devlist_head;
2022 	int i;
2023 
2024 	devlist_head = &pci_devq;
2025 
2026 	if (dinfo->cfg.vpd.vpd_reg) {
2027 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2028 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2029 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2030 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2031 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2032 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2033 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2034 	}
2035 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2036 	free(dinfo, M_DEVBUF);
2037 
2038 	/* increment the generation count */
2039 	pci_generation++;
2040 
2041 	/* we're losing one device */
2042 	pci_numdevs--;
2043 	return (0);
2044 }
2045 
2046 /*
2047  * PCI power manangement
2048  */
2049 int
2050 pci_set_powerstate_method(device_t dev, device_t child, int state)
2051 {
2052 	struct pci_devinfo *dinfo = device_get_ivars(child);
2053 	pcicfgregs *cfg = &dinfo->cfg;
2054 	uint16_t status;
2055 	int result, oldstate, highest, delay;
2056 
2057 	if (cfg->pp.pp_cap == 0)
2058 		return (EOPNOTSUPP);
2059 
2060 	/*
2061 	 * Optimize a no state change request away.  While it would be OK to
2062 	 * write to the hardware in theory, some devices have shown odd
2063 	 * behavior when going from D3 -> D3.
2064 	 */
2065 	oldstate = pci_get_powerstate(child);
2066 	if (oldstate == state)
2067 		return (0);
2068 
2069 	/*
2070 	 * The PCI power management specification states that after a state
2071 	 * transition between PCI power states, system software must
2072 	 * guarantee a minimal delay before the function accesses the device.
2073 	 * Compute the worst case delay that we need to guarantee before we
2074 	 * access the device.  Many devices will be responsive much more
2075 	 * quickly than this delay, but there are some that don't respond
2076 	 * instantly to state changes.  Transitions to/from D3 state require
2077 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2078 	 * is done below with DELAY rather than a sleeper function because
2079 	 * this function can be called from contexts where we cannot sleep.
2080 	 */
2081 	highest = (oldstate > state) ? oldstate : state;
2082 	if (highest == PCI_POWERSTATE_D3)
2083 	    delay = 10000;
2084 	else if (highest == PCI_POWERSTATE_D2)
2085 	    delay = 200;
2086 	else
2087 	    delay = 0;
2088 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2089 	    & ~PCIM_PSTAT_DMASK;
2090 	result = 0;
2091 	switch (state) {
2092 	case PCI_POWERSTATE_D0:
2093 		status |= PCIM_PSTAT_D0;
2094 		break;
2095 	case PCI_POWERSTATE_D1:
2096 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2097 			return (EOPNOTSUPP);
2098 		status |= PCIM_PSTAT_D1;
2099 		break;
2100 	case PCI_POWERSTATE_D2:
2101 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2102 			return (EOPNOTSUPP);
2103 		status |= PCIM_PSTAT_D2;
2104 		break;
2105 	case PCI_POWERSTATE_D3:
2106 		status |= PCIM_PSTAT_D3;
2107 		break;
2108 	default:
2109 		return (EINVAL);
2110 	}
2111 
2112 	if (bootverbose)
2113 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2114 		    state);
2115 
2116 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2117 	if (delay)
2118 		DELAY(delay);
2119 	return (0);
2120 }
2121 
2122 int
2123 pci_get_powerstate_method(device_t dev, device_t child)
2124 {
2125 	struct pci_devinfo *dinfo = device_get_ivars(child);
2126 	pcicfgregs *cfg = &dinfo->cfg;
2127 	uint16_t status;
2128 	int result;
2129 
2130 	if (cfg->pp.pp_cap != 0) {
2131 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2132 		switch (status & PCIM_PSTAT_DMASK) {
2133 		case PCIM_PSTAT_D0:
2134 			result = PCI_POWERSTATE_D0;
2135 			break;
2136 		case PCIM_PSTAT_D1:
2137 			result = PCI_POWERSTATE_D1;
2138 			break;
2139 		case PCIM_PSTAT_D2:
2140 			result = PCI_POWERSTATE_D2;
2141 			break;
2142 		case PCIM_PSTAT_D3:
2143 			result = PCI_POWERSTATE_D3;
2144 			break;
2145 		default:
2146 			result = PCI_POWERSTATE_UNKNOWN;
2147 			break;
2148 		}
2149 	} else {
2150 		/* No support, device is always at D0 */
2151 		result = PCI_POWERSTATE_D0;
2152 	}
2153 	return (result);
2154 }
2155 
2156 /*
2157  * Some convenience functions for PCI device drivers.
2158  */
2159 
2160 static __inline void
2161 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2162 {
2163 	uint16_t	command;
2164 
2165 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2166 	command |= bit;
2167 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2168 }
2169 
2170 static __inline void
2171 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2172 {
2173 	uint16_t	command;
2174 
2175 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2176 	command &= ~bit;
2177 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2178 }
2179 
2180 int
2181 pci_enable_busmaster_method(device_t dev, device_t child)
2182 {
2183 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2184 	return (0);
2185 }
2186 
2187 int
2188 pci_disable_busmaster_method(device_t dev, device_t child)
2189 {
2190 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2191 	return (0);
2192 }
2193 
2194 int
2195 pci_enable_io_method(device_t dev, device_t child, int space)
2196 {
2197 	uint16_t bit;
2198 
2199 	switch(space) {
2200 	case SYS_RES_IOPORT:
2201 		bit = PCIM_CMD_PORTEN;
2202 		break;
2203 	case SYS_RES_MEMORY:
2204 		bit = PCIM_CMD_MEMEN;
2205 		break;
2206 	default:
2207 		return (EINVAL);
2208 	}
2209 	pci_set_command_bit(dev, child, bit);
2210 	return (0);
2211 }
2212 
2213 int
2214 pci_disable_io_method(device_t dev, device_t child, int space)
2215 {
2216 	uint16_t bit;
2217 
2218 	switch(space) {
2219 	case SYS_RES_IOPORT:
2220 		bit = PCIM_CMD_PORTEN;
2221 		break;
2222 	case SYS_RES_MEMORY:
2223 		bit = PCIM_CMD_MEMEN;
2224 		break;
2225 	default:
2226 		return (EINVAL);
2227 	}
2228 	pci_clear_command_bit(dev, child, bit);
2229 	return (0);
2230 }
2231 
2232 /*
2233  * New style pci driver.  Parent device is either a pci-host-bridge or a
2234  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2235  */
2236 
2237 void
2238 pci_print_verbose(struct pci_devinfo *dinfo)
2239 {
2240 
2241 	if (bootverbose) {
2242 		pcicfgregs *cfg = &dinfo->cfg;
2243 
2244 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2245 		    cfg->vendor, cfg->device, cfg->revid);
2246 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2247 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2248 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2249 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2250 		    cfg->mfdev);
2251 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2252 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2253 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2254 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2255 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2256 		if (cfg->intpin > 0)
2257 			printf("\tintpin=%c, irq=%d\n",
2258 			    cfg->intpin +'a' -1, cfg->intline);
2259 		if (cfg->pp.pp_cap) {
2260 			uint16_t status;
2261 
2262 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2263 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2264 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2265 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2266 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2267 			    status & PCIM_PSTAT_DMASK);
2268 		}
2269 		if (cfg->msi.msi_location) {
2270 			int ctrl;
2271 
2272 			ctrl = cfg->msi.msi_ctrl;
2273 			printf("\tMSI supports %d message%s%s%s\n",
2274 			    cfg->msi.msi_msgnum,
2275 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2276 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2277 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2278 		}
2279 		if (cfg->msix.msix_location) {
2280 			printf("\tMSI-X supports %d message%s ",
2281 			    cfg->msix.msix_msgnum,
2282 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2283 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2284 				printf("in map 0x%x\n",
2285 				    cfg->msix.msix_table_bar);
2286 			else
2287 				printf("in maps 0x%x and 0x%x\n",
2288 				    cfg->msix.msix_table_bar,
2289 				    cfg->msix.msix_pba_bar);
2290 		}
2291 	}
2292 }
2293 
2294 static int
2295 pci_porten(device_t dev)
2296 {
2297 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2298 }
2299 
2300 static int
2301 pci_memen(device_t dev)
2302 {
2303 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2304 }
2305 
2306 static void
2307 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2308 {
2309 	pci_addr_t map, testval;
2310 	int ln2range;
2311 	uint16_t cmd;
2312 
2313 	/*
2314 	 * The device ROM BAR is special.  It is always a 32-bit
2315 	 * memory BAR.  Bit 0 is special and should not be set when
2316 	 * sizing the BAR.
2317 	 */
2318 	if (reg == PCIR_BIOS) {
2319 		map = pci_read_config(dev, reg, 4);
2320 		pci_write_config(dev, reg, 0xfffffffe, 4);
2321 		testval = pci_read_config(dev, reg, 4);
2322 		pci_write_config(dev, reg, map, 4);
2323 		*mapp = map;
2324 		*testvalp = testval;
2325 		return;
2326 	}
2327 
2328 	map = pci_read_config(dev, reg, 4);
2329 	ln2range = pci_maprange(map);
2330 	if (ln2range == 64)
2331 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2332 
2333 	/*
2334 	 * Disable decoding via the command register before
2335 	 * determining the BAR's length since we will be placing it in
2336 	 * a weird state.
2337 	 */
2338 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2339 	pci_write_config(dev, PCIR_COMMAND,
2340 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2341 
2342 	/*
2343 	 * Determine the BAR's length by writing all 1's.  The bottom
2344 	 * log_2(size) bits of the BAR will stick as 0 when we read
2345 	 * the value back.
2346 	 */
2347 	pci_write_config(dev, reg, 0xffffffff, 4);
2348 	testval = pci_read_config(dev, reg, 4);
2349 	if (ln2range == 64) {
2350 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2351 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2352 	}
2353 
2354 	/*
2355 	 * Restore the original value of the BAR.  We may have reprogrammed
2356 	 * the BAR of the low-level console device and when booting verbose,
2357 	 * we need the console device addressable.
2358 	 */
2359 	pci_write_config(dev, reg, map, 4);
2360 	if (ln2range == 64)
2361 		pci_write_config(dev, reg + 4, map >> 32, 4);
2362 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2363 
2364 	*mapp = map;
2365 	*testvalp = testval;
2366 }
2367 
2368 static void
2369 pci_write_bar(device_t dev, int reg, pci_addr_t base)
2370 {
2371 	pci_addr_t map;
2372 	int ln2range;
2373 
2374 	map = pci_read_config(dev, reg, 4);
2375 
2376 	/* The device ROM BAR is always 32-bits. */
2377 	if (reg == PCIR_BIOS)
2378 		return;
2379 	ln2range = pci_maprange(map);
2380 	pci_write_config(dev, reg, base, 4);
2381 	if (ln2range == 64)
2382 		pci_write_config(dev, reg + 4, base >> 32, 4);
2383 }
2384 
2385 /*
2386  * Add a resource based on a pci map register. Return 1 if the map
2387  * register is a 32bit map register or 2 if it is a 64bit register.
2388  */
2389 static int
2390 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2391     int force, int prefetch)
2392 {
2393 	pci_addr_t base, map, testval;
2394 	pci_addr_t start, end, count;
2395 	int barlen, basezero, maprange, mapsize, type;
2396 	uint16_t cmd;
2397 	struct resource *res;
2398 
2399 	pci_read_bar(dev, reg, &map, &testval);
2400 	if (PCI_BAR_MEM(map)) {
2401 		type = SYS_RES_MEMORY;
2402 		if (map & PCIM_BAR_MEM_PREFETCH)
2403 			prefetch = 1;
2404 	} else
2405 		type = SYS_RES_IOPORT;
2406 	mapsize = pci_mapsize(testval);
2407 	base = pci_mapbase(map);
2408 #ifdef __PCI_BAR_ZERO_VALID
2409 	basezero = 0;
2410 #else
2411 	basezero = base == 0;
2412 #endif
2413 	maprange = pci_maprange(map);
2414 	barlen = maprange == 64 ? 2 : 1;
2415 
2416 	/*
2417 	 * For I/O registers, if bottom bit is set, and the next bit up
2418 	 * isn't clear, we know we have a BAR that doesn't conform to the
2419 	 * spec, so ignore it.  Also, sanity check the size of the data
2420 	 * areas to the type of memory involved.  Memory must be at least
2421 	 * 16 bytes in size, while I/O ranges must be at least 4.
2422 	 */
2423 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2424 		return (barlen);
2425 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2426 	    (type == SYS_RES_IOPORT && mapsize < 2))
2427 		return (barlen);
2428 
2429 	if (bootverbose) {
2430 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2431 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2432 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2433 			printf(", port disabled\n");
2434 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2435 			printf(", memory disabled\n");
2436 		else
2437 			printf(", enabled\n");
2438 	}
2439 
2440 	/*
2441 	 * If base is 0, then we have problems if this architecture does
2442 	 * not allow that.  It is best to ignore such entries for the
2443 	 * moment.  These will be allocated later if the driver specifically
2444 	 * requests them.  However, some removable busses look better when
2445 	 * all resources are allocated, so allow '0' to be overriden.
2446 	 *
2447 	 * Similarly treat maps whose values is the same as the test value
2448 	 * read back.  These maps have had all f's written to them by the
2449 	 * BIOS in an attempt to disable the resources.
2450 	 */
2451 	if (!force && (basezero || map == testval))
2452 		return (barlen);
2453 	if ((u_long)base != base) {
2454 		device_printf(bus,
2455 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2456 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2457 		    pci_get_function(dev), reg);
2458 		return (barlen);
2459 	}
2460 
2461 	/*
2462 	 * This code theoretically does the right thing, but has
2463 	 * undesirable side effects in some cases where peripherals
2464 	 * respond oddly to having these bits enabled.  Let the user
2465 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2466 	 * default).
2467 	 */
2468 	if (pci_enable_io_modes) {
2469 		/* Turn on resources that have been left off by a lazy BIOS */
2470 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2471 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2472 			cmd |= PCIM_CMD_PORTEN;
2473 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2474 		}
2475 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2476 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2477 			cmd |= PCIM_CMD_MEMEN;
2478 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2479 		}
2480 	} else {
2481 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2482 			return (barlen);
2483 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2484 			return (barlen);
2485 	}
2486 
2487 	count = 1 << mapsize;
2488 	if (basezero || base == pci_mapbase(testval)) {
2489 		start = 0;	/* Let the parent decide. */
2490 		end = ~0ULL;
2491 	} else {
2492 		start = base;
2493 		end = base + (1 << mapsize) - 1;
2494 	}
2495 	resource_list_add(rl, type, reg, start, end, count);
2496 
2497 	/*
2498 	 * Try to allocate the resource for this BAR from our parent
2499 	 * so that this resource range is already reserved.  The
2500 	 * driver for this device will later inherit this resource in
2501 	 * pci_alloc_resource().
2502 	 */
2503 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2504 	    prefetch ? RF_PREFETCHABLE : 0);
2505 	if (res == NULL) {
2506 		/*
2507 		 * If the allocation fails, clear the BAR and delete
2508 		 * the resource list entry to force
2509 		 * pci_alloc_resource() to allocate resources from the
2510 		 * parent.
2511 		 */
2512 		resource_list_delete(rl, type, reg);
2513 		start = 0;
2514 	} else
2515 		start = rman_get_start(res);
2516 	pci_write_bar(dev, reg, start);
2517 	return (barlen);
2518 }
2519 
2520 /*
2521  * For ATA devices we need to decide early what addressing mode to use.
2522  * Legacy demands that the primary and secondary ATA ports sits on the
2523  * same addresses that old ISA hardware did. This dictates that we use
2524  * those addresses and ignore the BAR's if we cannot set PCI native
2525  * addressing mode.
2526  */
2527 static void
2528 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2529     uint32_t prefetchmask)
2530 {
2531 	struct resource *r;
2532 	int rid, type, progif;
2533 #if 0
2534 	/* if this device supports PCI native addressing use it */
2535 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2536 	if ((progif & 0x8a) == 0x8a) {
2537 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2538 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2539 			printf("Trying ATA native PCI addressing mode\n");
2540 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2541 		}
2542 	}
2543 #endif
2544 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2545 	type = SYS_RES_IOPORT;
2546 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2547 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2548 		    prefetchmask & (1 << 0));
2549 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2550 		    prefetchmask & (1 << 1));
2551 	} else {
2552 		rid = PCIR_BAR(0);
2553 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2554 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2555 		    0x1f7, 8, 0);
2556 		rid = PCIR_BAR(1);
2557 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2558 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2559 		    0x3f6, 1, 0);
2560 	}
2561 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2562 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2563 		    prefetchmask & (1 << 2));
2564 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2565 		    prefetchmask & (1 << 3));
2566 	} else {
2567 		rid = PCIR_BAR(2);
2568 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2569 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2570 		    0x177, 8, 0);
2571 		rid = PCIR_BAR(3);
2572 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2573 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2574 		    0x376, 1, 0);
2575 	}
2576 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2577 	    prefetchmask & (1 << 4));
2578 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2579 	    prefetchmask & (1 << 5));
2580 }
2581 
2582 static void
2583 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2584 {
2585 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2586 	pcicfgregs *cfg = &dinfo->cfg;
2587 	char tunable_name[64];
2588 	int irq;
2589 
2590 	/* Has to have an intpin to have an interrupt. */
2591 	if (cfg->intpin == 0)
2592 		return;
2593 
2594 	/* Let the user override the IRQ with a tunable. */
2595 	irq = PCI_INVALID_IRQ;
2596 	snprintf(tunable_name, sizeof(tunable_name),
2597 	    "hw.pci%d.%d.%d.INT%c.irq",
2598 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2599 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2600 		irq = PCI_INVALID_IRQ;
2601 
2602 	/*
2603 	 * If we didn't get an IRQ via the tunable, then we either use the
2604 	 * IRQ value in the intline register or we ask the bus to route an
2605 	 * interrupt for us.  If force_route is true, then we only use the
2606 	 * value in the intline register if the bus was unable to assign an
2607 	 * IRQ.
2608 	 */
2609 	if (!PCI_INTERRUPT_VALID(irq)) {
2610 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2611 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2612 		if (!PCI_INTERRUPT_VALID(irq))
2613 			irq = cfg->intline;
2614 	}
2615 
2616 	/* If after all that we don't have an IRQ, just bail. */
2617 	if (!PCI_INTERRUPT_VALID(irq))
2618 		return;
2619 
2620 	/* Update the config register if it changed. */
2621 	if (irq != cfg->intline) {
2622 		cfg->intline = irq;
2623 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2624 	}
2625 
2626 	/* Add this IRQ as rid 0 interrupt resource. */
2627 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2628 }
2629 
2630 /* Perform early OHCI takeover from SMM. */
2631 static void
2632 ohci_early_takeover(device_t self)
2633 {
2634 	struct resource *res;
2635 	uint32_t ctl;
2636 	int rid;
2637 	int i;
2638 
2639 	rid = PCIR_BAR(0);
2640 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2641 	if (res == NULL)
2642 		return;
2643 
2644 	ctl = bus_read_4(res, OHCI_CONTROL);
2645 	if (ctl & OHCI_IR) {
2646 		if (bootverbose)
2647 			printf("ohci early: "
2648 			    "SMM active, request owner change\n");
2649 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2650 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2651 			DELAY(1000);
2652 			ctl = bus_read_4(res, OHCI_CONTROL);
2653 		}
2654 		if (ctl & OHCI_IR) {
2655 			if (bootverbose)
2656 				printf("ohci early: "
2657 				    "SMM does not respond, resetting\n");
2658 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2659 		}
2660 		/* Disable interrupts */
2661 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2662 	}
2663 
2664 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2665 }
2666 
2667 /* Perform early UHCI takeover from SMM. */
2668 static void
2669 uhci_early_takeover(device_t self)
2670 {
2671 	struct resource *res;
2672 	int rid;
2673 
2674 	/*
2675 	 * Set the PIRQD enable bit and switch off all the others. We don't
2676 	 * want legacy support to interfere with us XXX Does this also mean
2677 	 * that the BIOS won't touch the keyboard anymore if it is connected
2678 	 * to the ports of the root hub?
2679 	 */
2680 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2681 
2682 	/* Disable interrupts */
2683 	rid = PCI_UHCI_BASE_REG;
2684 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2685 	if (res != NULL) {
2686 		bus_write_2(res, UHCI_INTR, 0);
2687 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2688 	}
2689 }
2690 
2691 /* Perform early EHCI takeover from SMM. */
2692 static void
2693 ehci_early_takeover(device_t self)
2694 {
2695 	struct resource *res;
2696 	uint32_t cparams;
2697 	uint32_t eec;
2698 	uint8_t eecp;
2699 	uint8_t bios_sem;
2700 	uint8_t offs;
2701 	int rid;
2702 	int i;
2703 
2704 	rid = PCIR_BAR(0);
2705 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2706 	if (res == NULL)
2707 		return;
2708 
2709 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2710 
2711 	/* Synchronise with the BIOS if it owns the controller. */
2712 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2713 	    eecp = EHCI_EECP_NEXT(eec)) {
2714 		eec = pci_read_config(self, eecp, 4);
2715 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2716 			continue;
2717 		}
2718 		bios_sem = pci_read_config(self, eecp +
2719 		    EHCI_LEGSUP_BIOS_SEM, 1);
2720 		if (bios_sem == 0) {
2721 			continue;
2722 		}
2723 		if (bootverbose)
2724 			printf("ehci early: "
2725 			    "SMM active, request owner change\n");
2726 
2727 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2728 
2729 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2730 			DELAY(1000);
2731 			bios_sem = pci_read_config(self, eecp +
2732 			    EHCI_LEGSUP_BIOS_SEM, 1);
2733 		}
2734 
2735 		if (bios_sem != 0) {
2736 			if (bootverbose)
2737 				printf("ehci early: "
2738 				    "SMM does not respond\n");
2739 		}
2740 		/* Disable interrupts */
2741 		offs = bus_read_1(res, EHCI_CAPLENGTH);
2742 		bus_write_4(res, offs + EHCI_USBINTR, 0);
2743 	}
2744 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2745 }
2746 
2747 void
2748 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2749 {
2750 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2751 	pcicfgregs *cfg = &dinfo->cfg;
2752 	struct resource_list *rl = &dinfo->resources;
2753 	struct pci_quirk *q;
2754 	int i;
2755 
2756 	/* ATA devices needs special map treatment */
2757 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2758 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2759 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2760 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2761 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2762 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
2763 	else
2764 		for (i = 0; i < cfg->nummaps;)
2765 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
2766 			    prefetchmask & (1 << i));
2767 
2768 	/*
2769 	 * Add additional, quirked resources.
2770 	 */
2771 	for (q = &pci_quirks[0]; q->devid; q++) {
2772 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2773 		    && q->type == PCI_QUIRK_MAP_REG)
2774 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
2775 	}
2776 
2777 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2778 #ifdef __PCI_REROUTE_INTERRUPT
2779 		/*
2780 		 * Try to re-route interrupts. Sometimes the BIOS or
2781 		 * firmware may leave bogus values in these registers.
2782 		 * If the re-route fails, then just stick with what we
2783 		 * have.
2784 		 */
2785 		pci_assign_interrupt(bus, dev, 1);
2786 #else
2787 		pci_assign_interrupt(bus, dev, 0);
2788 #endif
2789 	}
2790 
2791 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
2792 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
2793 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
2794 			ehci_early_takeover(dev);
2795 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
2796 			ohci_early_takeover(dev);
2797 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
2798 			uhci_early_takeover(dev);
2799 	}
2800 }
2801 
2802 void
2803 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2804 {
2805 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2806 	device_t pcib = device_get_parent(dev);
2807 	struct pci_devinfo *dinfo;
2808 	int maxslots;
2809 	int s, f, pcifunchigh;
2810 	uint8_t hdrtype;
2811 
2812 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2813 	    ("dinfo_size too small"));
2814 	maxslots = PCIB_MAXSLOTS(pcib);
2815 	for (s = 0; s <= maxslots; s++) {
2816 		pcifunchigh = 0;
2817 		f = 0;
2818 		DELAY(1);
2819 		hdrtype = REG(PCIR_HDRTYPE, 1);
2820 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2821 			continue;
2822 		if (hdrtype & PCIM_MFDEV)
2823 			pcifunchigh = PCI_FUNCMAX;
2824 		for (f = 0; f <= pcifunchigh; f++) {
2825 			dinfo = pci_read_device(pcib, domain, busno, s, f,
2826 			    dinfo_size);
2827 			if (dinfo != NULL) {
2828 				pci_add_child(dev, dinfo);
2829 			}
2830 		}
2831 	}
2832 #undef REG
2833 }
2834 
2835 void
2836 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2837 {
2838 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2839 	device_set_ivars(dinfo->cfg.dev, dinfo);
2840 	resource_list_init(&dinfo->resources);
2841 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2842 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2843 	pci_print_verbose(dinfo);
2844 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2845 }
2846 
2847 static int
2848 pci_probe(device_t dev)
2849 {
2850 
2851 	device_set_desc(dev, "PCI bus");
2852 
2853 	/* Allow other subclasses to override this driver. */
2854 	return (BUS_PROBE_GENERIC);
2855 }
2856 
2857 static int
2858 pci_attach(device_t dev)
2859 {
2860 	int busno, domain;
2861 
2862 	/*
2863 	 * Since there can be multiple independantly numbered PCI
2864 	 * busses on systems with multiple PCI domains, we can't use
2865 	 * the unit number to decide which bus we are probing. We ask
2866 	 * the parent pcib what our domain and bus numbers are.
2867 	 */
2868 	domain = pcib_get_domain(dev);
2869 	busno = pcib_get_bus(dev);
2870 	if (bootverbose)
2871 		device_printf(dev, "domain=%d, physical bus=%d\n",
2872 		    domain, busno);
2873 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2874 	return (bus_generic_attach(dev));
2875 }
2876 
2877 int
2878 pci_suspend(device_t dev)
2879 {
2880 	int dstate, error, i, numdevs;
2881 	device_t acpi_dev, child, *devlist;
2882 	struct pci_devinfo *dinfo;
2883 
2884 	/*
2885 	 * Save the PCI configuration space for each child and set the
2886 	 * device in the appropriate power state for this sleep state.
2887 	 */
2888 	acpi_dev = NULL;
2889 	if (pci_do_power_resume)
2890 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2891 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2892 		return (error);
2893 	for (i = 0; i < numdevs; i++) {
2894 		child = devlist[i];
2895 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2896 		pci_cfg_save(child, dinfo, 0);
2897 	}
2898 
2899 	/* Suspend devices before potentially powering them down. */
2900 	error = bus_generic_suspend(dev);
2901 	if (error) {
2902 		free(devlist, M_TEMP);
2903 		return (error);
2904 	}
2905 
2906 	/*
2907 	 * Always set the device to D3.  If ACPI suggests a different
2908 	 * power state, use it instead.  If ACPI is not present, the
2909 	 * firmware is responsible for managing device power.  Skip
2910 	 * children who aren't attached since they are powered down
2911 	 * separately.  Only manage type 0 devices for now.
2912 	 */
2913 	for (i = 0; acpi_dev && i < numdevs; i++) {
2914 		child = devlist[i];
2915 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2916 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2917 			dstate = PCI_POWERSTATE_D3;
2918 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2919 			pci_set_powerstate(child, dstate);
2920 		}
2921 	}
2922 	free(devlist, M_TEMP);
2923 	return (0);
2924 }
2925 
2926 int
2927 pci_resume(device_t dev)
2928 {
2929 	int i, numdevs, error;
2930 	device_t acpi_dev, child, *devlist;
2931 	struct pci_devinfo *dinfo;
2932 
2933 	/*
2934 	 * Set each child to D0 and restore its PCI configuration space.
2935 	 */
2936 	acpi_dev = NULL;
2937 	if (pci_do_power_resume)
2938 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2939 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2940 		return (error);
2941 	for (i = 0; i < numdevs; i++) {
2942 		/*
2943 		 * Notify ACPI we're going to D0 but ignore the result.  If
2944 		 * ACPI is not present, the firmware is responsible for
2945 		 * managing device power.  Only manage type 0 devices for now.
2946 		 */
2947 		child = devlist[i];
2948 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2949 		if (acpi_dev && device_is_attached(child) &&
2950 		    dinfo->cfg.hdrtype == 0) {
2951 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2952 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2953 		}
2954 
2955 		/* Now the device is powered up, restore its config space. */
2956 		pci_cfg_restore(child, dinfo);
2957 	}
2958 	free(devlist, M_TEMP);
2959 	return (bus_generic_resume(dev));
2960 }
2961 
2962 static void
2963 pci_load_vendor_data(void)
2964 {
2965 	caddr_t vendordata, info;
2966 
2967 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2968 		info = preload_search_info(vendordata, MODINFO_ADDR);
2969 		pci_vendordata = *(char **)info;
2970 		info = preload_search_info(vendordata, MODINFO_SIZE);
2971 		pci_vendordata_size = *(size_t *)info;
2972 		/* terminate the database */
2973 		pci_vendordata[pci_vendordata_size] = '\n';
2974 	}
2975 }
2976 
2977 void
2978 pci_driver_added(device_t dev, driver_t *driver)
2979 {
2980 	int numdevs;
2981 	device_t *devlist;
2982 	device_t child;
2983 	struct pci_devinfo *dinfo;
2984 	int i;
2985 
2986 	if (bootverbose)
2987 		device_printf(dev, "driver added\n");
2988 	DEVICE_IDENTIFY(driver, dev);
2989 	if (device_get_children(dev, &devlist, &numdevs) != 0)
2990 		return;
2991 	for (i = 0; i < numdevs; i++) {
2992 		child = devlist[i];
2993 		if (device_get_state(child) != DS_NOTPRESENT)
2994 			continue;
2995 		dinfo = device_get_ivars(child);
2996 		pci_print_verbose(dinfo);
2997 		if (bootverbose)
2998 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
2999 		pci_cfg_restore(child, dinfo);
3000 		if (device_probe_and_attach(child) != 0)
3001 			pci_cfg_save(child, dinfo, 1);
3002 	}
3003 	free(devlist, M_TEMP);
3004 }
3005 
3006 int
3007 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3008     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3009 {
3010 	struct pci_devinfo *dinfo;
3011 	struct msix_table_entry *mte;
3012 	struct msix_vector *mv;
3013 	uint64_t addr;
3014 	uint32_t data;
3015 	void *cookie;
3016 	int error, rid;
3017 
3018 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3019 	    arg, &cookie);
3020 	if (error)
3021 		return (error);
3022 
3023 	/* If this is not a direct child, just bail out. */
3024 	if (device_get_parent(child) != dev) {
3025 		*cookiep = cookie;
3026 		return(0);
3027 	}
3028 
3029 	rid = rman_get_rid(irq);
3030 	if (rid == 0) {
3031 		/* Make sure that INTx is enabled */
3032 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3033 	} else {
3034 		/*
3035 		 * Check to see if the interrupt is MSI or MSI-X.
3036 		 * Ask our parent to map the MSI and give
3037 		 * us the address and data register values.
3038 		 * If we fail for some reason, teardown the
3039 		 * interrupt handler.
3040 		 */
3041 		dinfo = device_get_ivars(child);
3042 		if (dinfo->cfg.msi.msi_alloc > 0) {
3043 			if (dinfo->cfg.msi.msi_addr == 0) {
3044 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3045 			    ("MSI has handlers, but vectors not mapped"));
3046 				error = PCIB_MAP_MSI(device_get_parent(dev),
3047 				    child, rman_get_start(irq), &addr, &data);
3048 				if (error)
3049 					goto bad;
3050 				dinfo->cfg.msi.msi_addr = addr;
3051 				dinfo->cfg.msi.msi_data = data;
3052 			}
3053 			if (dinfo->cfg.msi.msi_handlers == 0)
3054 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3055 				    dinfo->cfg.msi.msi_data);
3056 			dinfo->cfg.msi.msi_handlers++;
3057 		} else {
3058 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3059 			    ("No MSI or MSI-X interrupts allocated"));
3060 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3061 			    ("MSI-X index too high"));
3062 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3063 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3064 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3065 			KASSERT(mv->mv_irq == rman_get_start(irq),
3066 			    ("IRQ mismatch"));
3067 			if (mv->mv_address == 0) {
3068 				KASSERT(mte->mte_handlers == 0,
3069 		    ("MSI-X table entry has handlers, but vector not mapped"));
3070 				error = PCIB_MAP_MSI(device_get_parent(dev),
3071 				    child, rman_get_start(irq), &addr, &data);
3072 				if (error)
3073 					goto bad;
3074 				mv->mv_address = addr;
3075 				mv->mv_data = data;
3076 			}
3077 			if (mte->mte_handlers == 0) {
3078 				pci_enable_msix(child, rid - 1, mv->mv_address,
3079 				    mv->mv_data);
3080 				pci_unmask_msix(child, rid - 1);
3081 			}
3082 			mte->mte_handlers++;
3083 		}
3084 
3085 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3086 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3087 	bad:
3088 		if (error) {
3089 			(void)bus_generic_teardown_intr(dev, child, irq,
3090 			    cookie);
3091 			return (error);
3092 		}
3093 	}
3094 	*cookiep = cookie;
3095 	return (0);
3096 }
3097 
3098 int
3099 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3100     void *cookie)
3101 {
3102 	struct msix_table_entry *mte;
3103 	struct resource_list_entry *rle;
3104 	struct pci_devinfo *dinfo;
3105 	int error, rid;
3106 
3107 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3108 		return (EINVAL);
3109 
3110 	/* If this isn't a direct child, just bail out */
3111 	if (device_get_parent(child) != dev)
3112 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3113 
3114 	rid = rman_get_rid(irq);
3115 	if (rid == 0) {
3116 		/* Mask INTx */
3117 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3118 	} else {
3119 		/*
3120 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3121 		 * decrement the appropriate handlers count and mask the
3122 		 * MSI-X message, or disable MSI messages if the count
3123 		 * drops to 0.
3124 		 */
3125 		dinfo = device_get_ivars(child);
3126 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3127 		if (rle->res != irq)
3128 			return (EINVAL);
3129 		if (dinfo->cfg.msi.msi_alloc > 0) {
3130 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3131 			    ("MSI-X index too high"));
3132 			if (dinfo->cfg.msi.msi_handlers == 0)
3133 				return (EINVAL);
3134 			dinfo->cfg.msi.msi_handlers--;
3135 			if (dinfo->cfg.msi.msi_handlers == 0)
3136 				pci_disable_msi(child);
3137 		} else {
3138 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3139 			    ("No MSI or MSI-X interrupts allocated"));
3140 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3141 			    ("MSI-X index too high"));
3142 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3143 			if (mte->mte_handlers == 0)
3144 				return (EINVAL);
3145 			mte->mte_handlers--;
3146 			if (mte->mte_handlers == 0)
3147 				pci_mask_msix(child, rid - 1);
3148 		}
3149 	}
3150 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3151 	if (rid > 0)
3152 		KASSERT(error == 0,
3153 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3154 	return (error);
3155 }
3156 
3157 int
3158 pci_print_child(device_t dev, device_t child)
3159 {
3160 	struct pci_devinfo *dinfo;
3161 	struct resource_list *rl;
3162 	int retval = 0;
3163 
3164 	dinfo = device_get_ivars(child);
3165 	rl = &dinfo->resources;
3166 
3167 	retval += bus_print_child_header(dev, child);
3168 
3169 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3170 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3171 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3172 	if (device_get_flags(dev))
3173 		retval += printf(" flags %#x", device_get_flags(dev));
3174 
3175 	retval += printf(" at device %d.%d", pci_get_slot(child),
3176 	    pci_get_function(child));
3177 
3178 	retval += bus_print_child_footer(dev, child);
3179 
3180 	return (retval);
3181 }
3182 
3183 static struct
3184 {
3185 	int	class;
3186 	int	subclass;
3187 	char	*desc;
3188 } pci_nomatch_tab[] = {
3189 	{PCIC_OLD,		-1,			"old"},
3190 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3191 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3192 	{PCIC_STORAGE,		-1,			"mass storage"},
3193 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3194 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3195 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3196 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3197 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3198 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3199 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3200 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3201 	{PCIC_NETWORK,		-1,			"network"},
3202 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3203 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3204 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3205 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3206 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3207 	{PCIC_DISPLAY,		-1,			"display"},
3208 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3209 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3210 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3211 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3212 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3213 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3214 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3215 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3216 	{PCIC_MEMORY,		-1,			"memory"},
3217 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3218 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3219 	{PCIC_BRIDGE,		-1,			"bridge"},
3220 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3221 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3222 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3223 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3224 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3225 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3226 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3227 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3228 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3229 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3230 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3231 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3232 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3233 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3234 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3235 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3236 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3237 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3238 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3239 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3240 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3241 	{PCIC_INPUTDEV,		-1,			"input device"},
3242 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3243 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3244 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3245 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3246 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3247 	{PCIC_DOCKING,		-1,			"docking station"},
3248 	{PCIC_PROCESSOR,	-1,			"processor"},
3249 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3250 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3251 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3252 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3253 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3254 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3255 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3256 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3257 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3258 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3259 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3260 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3261 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3262 	{PCIC_SATCOM,		-1,			"satellite communication"},
3263 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3264 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3265 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3266 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3267 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3268 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3269 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3270 	{PCIC_DASP,		-1,			"dasp"},
3271 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3272 	{0, 0,		NULL}
3273 };
3274 
3275 void
3276 pci_probe_nomatch(device_t dev, device_t child)
3277 {
3278 	int	i;
3279 	char	*cp, *scp, *device;
3280 
3281 	/*
3282 	 * Look for a listing for this device in a loaded device database.
3283 	 */
3284 	if ((device = pci_describe_device(child)) != NULL) {
3285 		device_printf(dev, "<%s>", device);
3286 		free(device, M_DEVBUF);
3287 	} else {
3288 		/*
3289 		 * Scan the class/subclass descriptions for a general
3290 		 * description.
3291 		 */
3292 		cp = "unknown";
3293 		scp = NULL;
3294 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3295 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3296 				if (pci_nomatch_tab[i].subclass == -1) {
3297 					cp = pci_nomatch_tab[i].desc;
3298 				} else if (pci_nomatch_tab[i].subclass ==
3299 				    pci_get_subclass(child)) {
3300 					scp = pci_nomatch_tab[i].desc;
3301 				}
3302 			}
3303 		}
3304 		device_printf(dev, "<%s%s%s>",
3305 		    cp ? cp : "",
3306 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3307 		    scp ? scp : "");
3308 	}
3309 	printf(" at device %d.%d (no driver attached)\n",
3310 	    pci_get_slot(child), pci_get_function(child));
3311 	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3312 	return;
3313 }
3314 
3315 /*
3316  * Parse the PCI device database, if loaded, and return a pointer to a
3317  * description of the device.
3318  *
3319  * The database is flat text formatted as follows:
3320  *
3321  * Any line not in a valid format is ignored.
3322  * Lines are terminated with newline '\n' characters.
3323  *
3324  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3325  * the vendor name.
3326  *
3327  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3328  * - devices cannot be listed without a corresponding VENDOR line.
3329  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3330  * another TAB, then the device name.
3331  */
3332 
3333 /*
3334  * Assuming (ptr) points to the beginning of a line in the database,
3335  * return the vendor or device and description of the next entry.
3336  * The value of (vendor) or (device) inappropriate for the entry type
3337  * is set to -1.  Returns nonzero at the end of the database.
3338  *
3339  * Note that this is slightly unrobust in the face of corrupt data;
3340  * we attempt to safeguard against this by spamming the end of the
3341  * database with a newline when we initialise.
3342  */
3343 static int
3344 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3345 {
3346 	char	*cp = *ptr;
3347 	int	left;
3348 
3349 	*device = -1;
3350 	*vendor = -1;
3351 	**desc = '\0';
3352 	for (;;) {
3353 		left = pci_vendordata_size - (cp - pci_vendordata);
3354 		if (left <= 0) {
3355 			*ptr = cp;
3356 			return(1);
3357 		}
3358 
3359 		/* vendor entry? */
3360 		if (*cp != '\t' &&
3361 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3362 			break;
3363 		/* device entry? */
3364 		if (*cp == '\t' &&
3365 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3366 			break;
3367 
3368 		/* skip to next line */
3369 		while (*cp != '\n' && left > 0) {
3370 			cp++;
3371 			left--;
3372 		}
3373 		if (*cp == '\n') {
3374 			cp++;
3375 			left--;
3376 		}
3377 	}
3378 	/* skip to next line */
3379 	while (*cp != '\n' && left > 0) {
3380 		cp++;
3381 		left--;
3382 	}
3383 	if (*cp == '\n' && left > 0)
3384 		cp++;
3385 	*ptr = cp;
3386 	return(0);
3387 }
3388 
3389 static char *
3390 pci_describe_device(device_t dev)
3391 {
3392 	int	vendor, device;
3393 	char	*desc, *vp, *dp, *line;
3394 
3395 	desc = vp = dp = NULL;
3396 
3397 	/*
3398 	 * If we have no vendor data, we can't do anything.
3399 	 */
3400 	if (pci_vendordata == NULL)
3401 		goto out;
3402 
3403 	/*
3404 	 * Scan the vendor data looking for this device
3405 	 */
3406 	line = pci_vendordata;
3407 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3408 		goto out;
3409 	for (;;) {
3410 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3411 			goto out;
3412 		if (vendor == pci_get_vendor(dev))
3413 			break;
3414 	}
3415 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3416 		goto out;
3417 	for (;;) {
3418 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3419 			*dp = 0;
3420 			break;
3421 		}
3422 		if (vendor != -1) {
3423 			*dp = 0;
3424 			break;
3425 		}
3426 		if (device == pci_get_device(dev))
3427 			break;
3428 	}
3429 	if (dp[0] == '\0')
3430 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3431 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3432 	    NULL)
3433 		sprintf(desc, "%s, %s", vp, dp);
3434  out:
3435 	if (vp != NULL)
3436 		free(vp, M_DEVBUF);
3437 	if (dp != NULL)
3438 		free(dp, M_DEVBUF);
3439 	return(desc);
3440 }
3441 
3442 int
3443 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3444 {
3445 	struct pci_devinfo *dinfo;
3446 	pcicfgregs *cfg;
3447 
3448 	dinfo = device_get_ivars(child);
3449 	cfg = &dinfo->cfg;
3450 
3451 	switch (which) {
3452 	case PCI_IVAR_ETHADDR:
3453 		/*
3454 		 * The generic accessor doesn't deal with failure, so
3455 		 * we set the return value, then return an error.
3456 		 */
3457 		*((uint8_t **) result) = NULL;
3458 		return (EINVAL);
3459 	case PCI_IVAR_SUBVENDOR:
3460 		*result = cfg->subvendor;
3461 		break;
3462 	case PCI_IVAR_SUBDEVICE:
3463 		*result = cfg->subdevice;
3464 		break;
3465 	case PCI_IVAR_VENDOR:
3466 		*result = cfg->vendor;
3467 		break;
3468 	case PCI_IVAR_DEVICE:
3469 		*result = cfg->device;
3470 		break;
3471 	case PCI_IVAR_DEVID:
3472 		*result = (cfg->device << 16) | cfg->vendor;
3473 		break;
3474 	case PCI_IVAR_CLASS:
3475 		*result = cfg->baseclass;
3476 		break;
3477 	case PCI_IVAR_SUBCLASS:
3478 		*result = cfg->subclass;
3479 		break;
3480 	case PCI_IVAR_PROGIF:
3481 		*result = cfg->progif;
3482 		break;
3483 	case PCI_IVAR_REVID:
3484 		*result = cfg->revid;
3485 		break;
3486 	case PCI_IVAR_INTPIN:
3487 		*result = cfg->intpin;
3488 		break;
3489 	case PCI_IVAR_IRQ:
3490 		*result = cfg->intline;
3491 		break;
3492 	case PCI_IVAR_DOMAIN:
3493 		*result = cfg->domain;
3494 		break;
3495 	case PCI_IVAR_BUS:
3496 		*result = cfg->bus;
3497 		break;
3498 	case PCI_IVAR_SLOT:
3499 		*result = cfg->slot;
3500 		break;
3501 	case PCI_IVAR_FUNCTION:
3502 		*result = cfg->func;
3503 		break;
3504 	case PCI_IVAR_CMDREG:
3505 		*result = cfg->cmdreg;
3506 		break;
3507 	case PCI_IVAR_CACHELNSZ:
3508 		*result = cfg->cachelnsz;
3509 		break;
3510 	case PCI_IVAR_MINGNT:
3511 		*result = cfg->mingnt;
3512 		break;
3513 	case PCI_IVAR_MAXLAT:
3514 		*result = cfg->maxlat;
3515 		break;
3516 	case PCI_IVAR_LATTIMER:
3517 		*result = cfg->lattimer;
3518 		break;
3519 	default:
3520 		return (ENOENT);
3521 	}
3522 	return (0);
3523 }
3524 
3525 int
3526 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3527 {
3528 	struct pci_devinfo *dinfo;
3529 
3530 	dinfo = device_get_ivars(child);
3531 
3532 	switch (which) {
3533 	case PCI_IVAR_INTPIN:
3534 		dinfo->cfg.intpin = value;
3535 		return (0);
3536 	case PCI_IVAR_ETHADDR:
3537 	case PCI_IVAR_SUBVENDOR:
3538 	case PCI_IVAR_SUBDEVICE:
3539 	case PCI_IVAR_VENDOR:
3540 	case PCI_IVAR_DEVICE:
3541 	case PCI_IVAR_DEVID:
3542 	case PCI_IVAR_CLASS:
3543 	case PCI_IVAR_SUBCLASS:
3544 	case PCI_IVAR_PROGIF:
3545 	case PCI_IVAR_REVID:
3546 	case PCI_IVAR_IRQ:
3547 	case PCI_IVAR_DOMAIN:
3548 	case PCI_IVAR_BUS:
3549 	case PCI_IVAR_SLOT:
3550 	case PCI_IVAR_FUNCTION:
3551 		return (EINVAL);	/* disallow for now */
3552 
3553 	default:
3554 		return (ENOENT);
3555 	}
3556 }
3557 
3558 
3559 #include "opt_ddb.h"
3560 #ifdef DDB
3561 #include <ddb/ddb.h>
3562 #include <sys/cons.h>
3563 
3564 /*
3565  * List resources based on pci map registers, used for within ddb
3566  */
3567 
3568 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3569 {
3570 	struct pci_devinfo *dinfo;
3571 	struct devlist *devlist_head;
3572 	struct pci_conf *p;
3573 	const char *name;
3574 	int i, error, none_count;
3575 
3576 	none_count = 0;
3577 	/* get the head of the device queue */
3578 	devlist_head = &pci_devq;
3579 
3580 	/*
3581 	 * Go through the list of devices and print out devices
3582 	 */
3583 	for (error = 0, i = 0,
3584 	     dinfo = STAILQ_FIRST(devlist_head);
3585 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3586 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3587 
3588 		/* Populate pd_name and pd_unit */
3589 		name = NULL;
3590 		if (dinfo->cfg.dev)
3591 			name = device_get_name(dinfo->cfg.dev);
3592 
3593 		p = &dinfo->conf;
3594 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3595 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3596 			(name && *name) ? name : "none",
3597 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3598 			none_count++,
3599 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3600 			p->pc_sel.pc_func, (p->pc_class << 16) |
3601 			(p->pc_subclass << 8) | p->pc_progif,
3602 			(p->pc_subdevice << 16) | p->pc_subvendor,
3603 			(p->pc_device << 16) | p->pc_vendor,
3604 			p->pc_revid, p->pc_hdr);
3605 	}
3606 }
3607 #endif /* DDB */
3608 
3609 static struct resource *
3610 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
3611     u_long start, u_long end, u_long count, u_int flags)
3612 {
3613 	struct pci_devinfo *dinfo = device_get_ivars(child);
3614 	struct resource_list *rl = &dinfo->resources;
3615 	struct resource_list_entry *rle;
3616 	struct resource *res;
3617 	pci_addr_t map, testval;
3618 	int mapsize;
3619 
3620 	/*
3621 	 * Weed out the bogons, and figure out how large the BAR/map
3622 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3623 	 * Note: atapci in legacy mode are special and handled elsewhere
3624 	 * in the code.  If you have a atapci device in legacy mode and
3625 	 * it fails here, that other code is broken.
3626 	 */
3627 	res = NULL;
3628 	pci_read_bar(child, *rid, &map, &testval);
3629 
3630 	/* Ignore a BAR with a base of 0. */
3631 	if ((*rid == PCIR_BIOS && pci_rombase(testval) == 0) ||
3632 	    pci_mapbase(testval) == 0)
3633 		goto out;
3634 
3635 	if (PCI_BAR_MEM(testval) || *rid == PCIR_BIOS) {
3636 		if (type != SYS_RES_MEMORY) {
3637 			if (bootverbose)
3638 				device_printf(dev,
3639 				    "child %s requested type %d for rid %#x,"
3640 				    " but the BAR says it is an memio\n",
3641 				    device_get_nameunit(child), type, *rid);
3642 			goto out;
3643 		}
3644 	} else {
3645 		if (type != SYS_RES_IOPORT) {
3646 			if (bootverbose)
3647 				device_printf(dev,
3648 				    "child %s requested type %d for rid %#x,"
3649 				    " but the BAR says it is an ioport\n",
3650 				    device_get_nameunit(child), type, *rid);
3651 			goto out;
3652 		}
3653 	}
3654 
3655 	/*
3656 	 * For real BARs, we need to override the size that
3657 	 * the driver requests, because that's what the BAR
3658 	 * actually uses and we would otherwise have a
3659 	 * situation where we might allocate the excess to
3660 	 * another driver, which won't work.
3661 	 *
3662 	 * Device ROM BARs use a different mask value.
3663 	 */
3664 	if (*rid == PCIR_BIOS)
3665 		mapsize = pci_romsize(testval);
3666 	else
3667 		mapsize = pci_mapsize(testval);
3668 	count = 1UL << mapsize;
3669 	if (RF_ALIGNMENT(flags) < mapsize)
3670 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3671 	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3672 		flags |= RF_PREFETCHABLE;
3673 
3674 	/*
3675 	 * Allocate enough resource, and then write back the
3676 	 * appropriate bar for that resource.
3677 	 */
3678 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3679 	    start, end, count, flags & ~RF_ACTIVE);
3680 	if (res == NULL) {
3681 		device_printf(child,
3682 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3683 		    count, *rid, type, start, end);
3684 		goto out;
3685 	}
3686 	resource_list_add(rl, type, *rid, start, end, count);
3687 	rle = resource_list_find(rl, type, *rid);
3688 	if (rle == NULL)
3689 		panic("pci_reserve_map: unexpectedly can't find resource.");
3690 	rle->res = res;
3691 	rle->start = rman_get_start(res);
3692 	rle->end = rman_get_end(res);
3693 	rle->count = count;
3694 	rle->flags = RLE_RESERVED;
3695 	if (bootverbose)
3696 		device_printf(child,
3697 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3698 		    count, *rid, type, rman_get_start(res));
3699 	map = rman_get_start(res);
3700 	pci_write_bar(child, *rid, map);
3701 out:;
3702 	return (res);
3703 }
3704 
3705 
3706 struct resource *
3707 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3708 		   u_long start, u_long end, u_long count, u_int flags)
3709 {
3710 	struct pci_devinfo *dinfo = device_get_ivars(child);
3711 	struct resource_list *rl = &dinfo->resources;
3712 	struct resource_list_entry *rle;
3713 	struct resource *res;
3714 	pcicfgregs *cfg = &dinfo->cfg;
3715 
3716 	if (device_get_parent(child) != dev)
3717 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3718 		    type, rid, start, end, count, flags));
3719 
3720 	/*
3721 	 * Perform lazy resource allocation
3722 	 */
3723 	switch (type) {
3724 	case SYS_RES_IRQ:
3725 		/*
3726 		 * Can't alloc legacy interrupt once MSI messages have
3727 		 * been allocated.
3728 		 */
3729 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3730 		    cfg->msix.msix_alloc > 0))
3731 			return (NULL);
3732 
3733 		/*
3734 		 * If the child device doesn't have an interrupt
3735 		 * routed and is deserving of an interrupt, try to
3736 		 * assign it one.
3737 		 */
3738 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3739 		    (cfg->intpin != 0))
3740 			pci_assign_interrupt(dev, child, 0);
3741 		break;
3742 	case SYS_RES_IOPORT:
3743 	case SYS_RES_MEMORY:
3744 		/* Reserve resources for this BAR if needed. */
3745 		rle = resource_list_find(rl, type, *rid);
3746 		if (rle == NULL) {
3747 			res = pci_reserve_map(dev, child, type, rid, start, end,
3748 			    count, flags);
3749 			if (res == NULL)
3750 				return (NULL);
3751 		}
3752 	}
3753 	return (resource_list_alloc(rl, dev, child, type, rid,
3754 	    start, end, count, flags));
3755 }
3756 
3757 int
3758 pci_activate_resource(device_t dev, device_t child, int type, int rid,
3759     struct resource *r)
3760 {
3761 	int error;
3762 
3763 	error = bus_generic_activate_resource(dev, child, type, rid, r);
3764 	if (error)
3765 		return (error);
3766 
3767 	/* Enable decoding in the command register when activating BARs. */
3768 	if (device_get_parent(child) == dev) {
3769 		/* Device ROMs need their decoding explicitly enabled. */
3770 		if (rid == PCIR_BIOS)
3771 			pci_write_config(child, rid, rman_get_start(r) |
3772 			    PCIM_BIOS_ENABLE, 4);
3773 		switch (type) {
3774 		case SYS_RES_IOPORT:
3775 		case SYS_RES_MEMORY:
3776 			error = PCI_ENABLE_IO(dev, child, type);
3777 			break;
3778 		}
3779 	}
3780 	return (error);
3781 }
3782 
3783 int
3784 pci_deactivate_resource(device_t dev, device_t child, int type,
3785     int rid, struct resource *r)
3786 {
3787 	int error;
3788 
3789 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
3790 	if (error)
3791 		return (error);
3792 
3793 	/* Disable decoding for device ROMs. */
3794 	if (rid == PCIR_BIOS)
3795 		pci_write_config(child, rid, rman_get_start(r), 4);
3796 	return (0);
3797 }
3798 
3799 void
3800 pci_delete_child(device_t dev, device_t child)
3801 {
3802 	struct resource_list_entry *rle;
3803 	struct resource_list *rl;
3804 	struct pci_devinfo *dinfo;
3805 
3806 	dinfo = device_get_ivars(child);
3807 	rl = &dinfo->resources;
3808 
3809 	if (device_is_attached(child))
3810 		device_detach(child);
3811 
3812 	/* Turn off access to resources we're about to free */
3813 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
3814 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
3815 
3816 	/* Free all allocated resources */
3817 	STAILQ_FOREACH(rle, rl, link) {
3818 		if (rle->res) {
3819 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
3820 			    resource_list_busy(rl, rle->type, rle->rid)) {
3821 				pci_printf(&dinfo->cfg,
3822 				    "Resource still owned, oops. "
3823 				    "(type=%d, rid=%d, addr=%lx)\n",
3824 				    rle->type, rle->rid,
3825 				    rman_get_start(rle->res));
3826 				bus_release_resource(child, rle->type, rle->rid,
3827 				    rle->res);
3828 			}
3829 			resource_list_unreserve(rl, dev, child, rle->type,
3830 			    rle->rid);
3831 		}
3832 	}
3833 	resource_list_free(rl);
3834 
3835 	device_delete_child(dev, child);
3836 	pci_freecfg(dinfo);
3837 }
3838 
3839 void
3840 pci_delete_resource(device_t dev, device_t child, int type, int rid)
3841 {
3842 	struct pci_devinfo *dinfo;
3843 	struct resource_list *rl;
3844 	struct resource_list_entry *rle;
3845 
3846 	if (device_get_parent(child) != dev)
3847 		return;
3848 
3849 	dinfo = device_get_ivars(child);
3850 	rl = &dinfo->resources;
3851 	rle = resource_list_find(rl, type, rid);
3852 	if (rle == NULL)
3853 		return;
3854 
3855 	if (rle->res) {
3856 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
3857 		    resource_list_busy(rl, type, rid)) {
3858 			device_printf(dev, "delete_resource: "
3859 			    "Resource still owned by child, oops. "
3860 			    "(type=%d, rid=%d, addr=%lx)\n",
3861 			    type, rid, rman_get_start(rle->res));
3862 			return;
3863 		}
3864 
3865 #ifndef __PCI_BAR_ZERO_VALID
3866 		/*
3867 		 * If this is a BAR, clear the BAR so it stops
3868 		 * decoding before releasing the resource.
3869 		 */
3870 		switch (type) {
3871 		case SYS_RES_IOPORT:
3872 		case SYS_RES_MEMORY:
3873 			pci_write_bar(child, rid, 0);
3874 			break;
3875 		}
3876 #endif
3877 		resource_list_unreserve(rl, dev, child, type, rid);
3878 	}
3879 	resource_list_delete(rl, type, rid);
3880 }
3881 
3882 struct resource_list *
3883 pci_get_resource_list (device_t dev, device_t child)
3884 {
3885 	struct pci_devinfo *dinfo = device_get_ivars(child);
3886 
3887 	return (&dinfo->resources);
3888 }
3889 
3890 uint32_t
3891 pci_read_config_method(device_t dev, device_t child, int reg, int width)
3892 {
3893 	struct pci_devinfo *dinfo = device_get_ivars(child);
3894 	pcicfgregs *cfg = &dinfo->cfg;
3895 
3896 	return (PCIB_READ_CONFIG(device_get_parent(dev),
3897 	    cfg->bus, cfg->slot, cfg->func, reg, width));
3898 }
3899 
3900 void
3901 pci_write_config_method(device_t dev, device_t child, int reg,
3902     uint32_t val, int width)
3903 {
3904 	struct pci_devinfo *dinfo = device_get_ivars(child);
3905 	pcicfgregs *cfg = &dinfo->cfg;
3906 
3907 	PCIB_WRITE_CONFIG(device_get_parent(dev),
3908 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3909 }
3910 
3911 int
3912 pci_child_location_str_method(device_t dev, device_t child, char *buf,
3913     size_t buflen)
3914 {
3915 
3916 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3917 	    pci_get_function(child));
3918 	return (0);
3919 }
3920 
3921 int
3922 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3923     size_t buflen)
3924 {
3925 	struct pci_devinfo *dinfo;
3926 	pcicfgregs *cfg;
3927 
3928 	dinfo = device_get_ivars(child);
3929 	cfg = &dinfo->cfg;
3930 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3931 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3932 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3933 	    cfg->progif);
3934 	return (0);
3935 }
3936 
3937 int
3938 pci_assign_interrupt_method(device_t dev, device_t child)
3939 {
3940 	struct pci_devinfo *dinfo = device_get_ivars(child);
3941 	pcicfgregs *cfg = &dinfo->cfg;
3942 
3943 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3944 	    cfg->intpin));
3945 }
3946 
3947 static int
3948 pci_modevent(module_t mod, int what, void *arg)
3949 {
3950 	static struct cdev *pci_cdev;
3951 
3952 	switch (what) {
3953 	case MOD_LOAD:
3954 		STAILQ_INIT(&pci_devq);
3955 		pci_generation = 0;
3956 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3957 		    "pci");
3958 		pci_load_vendor_data();
3959 		break;
3960 
3961 	case MOD_UNLOAD:
3962 		destroy_dev(pci_cdev);
3963 		break;
3964 	}
3965 
3966 	return (0);
3967 }
3968 
3969 void
3970 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3971 {
3972 	int i;
3973 
3974 	/*
3975 	 * Only do header type 0 devices.  Type 1 devices are bridges,
3976 	 * which we know need special treatment.  Type 2 devices are
3977 	 * cardbus bridges which also require special treatment.
3978 	 * Other types are unknown, and we err on the side of safety
3979 	 * by ignoring them.
3980 	 */
3981 	if (dinfo->cfg.hdrtype != 0)
3982 		return;
3983 
3984 	/*
3985 	 * Restore the device to full power mode.  We must do this
3986 	 * before we restore the registers because moving from D3 to
3987 	 * D0 will cause the chip's BARs and some other registers to
3988 	 * be reset to some unknown power on reset values.  Cut down
3989 	 * the noise on boot by doing nothing if we are already in
3990 	 * state D0.
3991 	 */
3992 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3993 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3994 	}
3995 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3996 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3997 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3998 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3999 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4000 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4001 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4002 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4003 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4004 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4005 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4006 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4007 
4008 	/* Restore MSI and MSI-X configurations if they are present. */
4009 	if (dinfo->cfg.msi.msi_location != 0)
4010 		pci_resume_msi(dev);
4011 	if (dinfo->cfg.msix.msix_location != 0)
4012 		pci_resume_msix(dev);
4013 }
4014 
4015 void
4016 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4017 {
4018 	int i;
4019 	uint32_t cls;
4020 	int ps;
4021 
4022 	/*
4023 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4024 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4025 	 * which also require special treatment.  Other types are unknown, and
4026 	 * we err on the side of safety by ignoring them.  Powering down
4027 	 * bridges should not be undertaken lightly.
4028 	 */
4029 	if (dinfo->cfg.hdrtype != 0)
4030 		return;
4031 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4032 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
4033 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
4034 
4035 	/*
4036 	 * Some drivers apparently write to these registers w/o updating our
4037 	 * cached copy.  No harm happens if we update the copy, so do so here
4038 	 * so we can restore them.  The COMMAND register is modified by the
4039 	 * bus w/o updating the cache.  This should represent the normally
4040 	 * writable portion of the 'defined' part of type 0 headers.  In
4041 	 * theory we also need to save/restore the PCI capability structures
4042 	 * we know about, but apart from power we don't know any that are
4043 	 * writable.
4044 	 */
4045 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4046 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4047 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4048 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4049 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4050 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4051 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4052 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4053 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4054 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4055 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4056 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4057 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4058 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4059 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4060 
4061 	/*
4062 	 * don't set the state for display devices, base peripherals and
4063 	 * memory devices since bad things happen when they are powered down.
4064 	 * We should (a) have drivers that can easily detach and (b) use
4065 	 * generic drivers for these devices so that some device actually
4066 	 * attaches.  We need to make sure that when we implement (a) we don't
4067 	 * power the device down on a reattach.
4068 	 */
4069 	cls = pci_get_class(dev);
4070 	if (!setstate)
4071 		return;
4072 	switch (pci_do_power_nodriver)
4073 	{
4074 		case 0:		/* NO powerdown at all */
4075 			return;
4076 		case 1:		/* Conservative about what to power down */
4077 			if (cls == PCIC_STORAGE)
4078 				return;
4079 			/*FALLTHROUGH*/
4080 		case 2:		/* Agressive about what to power down */
4081 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4082 			    cls == PCIC_BASEPERIPH)
4083 				return;
4084 			/*FALLTHROUGH*/
4085 		case 3:		/* Power down everything */
4086 			break;
4087 	}
4088 	/*
4089 	 * PCI spec says we can only go into D3 state from D0 state.
4090 	 * Transition from D[12] into D0 before going to D3 state.
4091 	 */
4092 	ps = pci_get_powerstate(dev);
4093 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4094 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4095 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4096 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4097 }
4098